294 files changed, 12409 insertions, 11379 deletions
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
deleted file mode 100644
index 94e2f92ab2e8..000000000000
--- a/fs/9p/9p.h
+++ /dev/null
@@ -1,375 +0,0 @@
-/*
- * linux/fs/9p/9p.h
- *
- * 9P protocol definitions.
- *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-/* Message Types */
-enum {
-        TVERSION = 100,
-        RVERSION,
-        TAUTH = 102,
-        RAUTH,
-        TATTACH = 104,
-        RATTACH,
-        TERROR = 106,
-        RERROR,
-        TFLUSH = 108,
-        RFLUSH,
-        TWALK = 110,
-        RWALK,
-        TOPEN = 112,
-        ROPEN,
-        TCREATE = 114,
-        RCREATE,
-        TREAD = 116,
-        RREAD,
-        TWRITE = 118,
-        RWRITE,
-        TCLUNK = 120,
-        RCLUNK,
-        TREMOVE = 122,
-        RREMOVE,
-        TSTAT = 124,
-        RSTAT,
-        TWSTAT = 126,
-        RWSTAT,
-};
-/* modes */
-enum {
-        V9FS_OREAD = 0x00,
-        V9FS_OWRITE = 0x01,
-        V9FS_ORDWR = 0x02,
-        V9FS_OEXEC = 0x03,
-        V9FS_OEXCL = 0x04,
-        V9FS_OTRUNC = 0x10,
-        V9FS_OREXEC = 0x20,
-        V9FS_ORCLOSE = 0x40,
-        V9FS_OAPPEND = 0x80,
-};
-/* permissions */
-enum {
-        V9FS_DMDIR = 0x80000000,
-        V9FS_DMAPPEND = 0x40000000,
-        V9FS_DMEXCL = 0x20000000,
-        V9FS_DMMOUNT = 0x10000000,
-        V9FS_DMAUTH = 0x08000000,
-        V9FS_DMTMP = 0x04000000,
-        V9FS_DMSYMLINK = 0x02000000,
-        V9FS_DMLINK = 0x01000000,
-        /* 9P2000.u extensions */
-        V9FS_DMDEVICE = 0x00800000,
-        V9FS_DMNAMEDPIPE = 0x00200000,
-        V9FS_DMSOCKET = 0x00100000,
-        V9FS_DMSETUID = 0x00080000,
-        V9FS_DMSETGID = 0x00040000,
-};
-/* qid.types */
-enum {
-        V9FS_QTDIR = 0x80,
-        V9FS_QTAPPEND = 0x40,
-        V9FS_QTEXCL = 0x20,
-        V9FS_QTMOUNT = 0x10,
-        V9FS_QTAUTH = 0x08,
-        V9FS_QTTMP = 0x04,
-        V9FS_QTSYMLINK = 0x02,
-        V9FS_QTLINK = 0x01,
-        V9FS_QTFILE = 0x00,
-};
-#define V9FS_NOTAG      (u16)(~0)
-#define V9FS_NOFID      (u32)(~0)
-#define V9FS_MAXWELEM   16
-/* ample room for Twrite/Rread header (iounit) */
-#define V9FS_IOHDRSZ    24
-struct v9fs_str {
-        u16 len;
-        char *str;
-};
-/* qids are the unique ID for a file (like an inode */
-struct v9fs_qid {
-        u8 type;
-        u32 version;
-        u64 path;
-};
-/* Plan 9 file metadata (stat) structure */
-struct v9fs_stat {
-        u16 size;
-        u16 type;
-        u32 dev;
-        struct v9fs_qid qid;
-        u32 mode;
-        u32 atime;
-        u32 mtime;
-        u64 length;
-        struct v9fs_str name;
-        struct v9fs_str uid;
-        struct v9fs_str gid;
-        struct v9fs_str muid;
-        struct v9fs_str extension;      /* 9p2000.u extensions */
-        u32 n_uid;              /* 9p2000.u extensions */
-        u32 n_gid;              /* 9p2000.u extensions */
-        u32 n_muid;             /* 9p2000.u extensions */
-};
-/* file metadata (stat) structure used to create Twstat message
-   The is similar to v9fs_stat, but the strings don't point to
-   the same memory block and should be freed separately
-*/
-struct v9fs_wstat {
-        u16 size;
-        u16 type;
-        u32 dev;
-        struct v9fs_qid qid;
-        u32 mode;
-        u32 atime;
-        u32 mtime;
-        u64 length;
-        char *name;
-        char *uid;
-        char *gid;
-        char *muid;
-        char *extension;        /* 9p2000.u extensions */
-        u32 n_uid;              /* 9p2000.u extensions */
-        u32 n_gid;              /* 9p2000.u extensions */
-        u32 n_muid;             /* 9p2000.u extensions */
-};
-/* Structures for Protocol Operations */
-struct Tversion {
-        u32 msize;
-        struct v9fs_str version;
-};
-struct Rversion {
-        u32 msize;
-        struct v9fs_str version;
-};
-struct Tauth {
-        u32 afid;
-        struct v9fs_str uname;
-        struct v9fs_str aname;
-};
-struct Rauth {
-        struct v9fs_qid qid;
-};
-struct Rerror {
-        struct v9fs_str error;
-        u32 errno;              /* 9p2000.u extension */
-};
-struct Tflush {
-        u16 oldtag;
-};
-struct Rflush {
-};
-struct Tattach {
-        u32 fid;
-        u32 afid;
-        struct v9fs_str uname;
-        struct v9fs_str aname;
-};
-struct Rattach {
-        struct v9fs_qid qid;
-};
-struct Twalk {
-        u32 fid;
-        u32 newfid;
-        u16 nwname;
-        struct v9fs_str wnames[16];
-};
-struct Rwalk {
-        u16 nwqid;
-        struct v9fs_qid wqids[16];
-};
-struct Topen {
-        u32 fid;
-        u8 mode;
-};
-struct Ropen {
-        struct v9fs_qid qid;
-        u32 iounit;
-};
-struct Tcreate {
-        u32 fid;
-        struct v9fs_str name;
-        u32 perm;
-        u8 mode;
-        struct v9fs_str extension;
-};
-struct Rcreate {
-        struct v9fs_qid qid;
-        u32 iounit;
-};
-struct Tread {
-        u32 fid;
-        u64 offset;
-        u32 count;
-};
-struct Rread {
-        u32 count;
-        u8 *data;
-};
-struct Twrite {
-        u32 fid;
-        u64 offset;
-        u32 count;
-        u8 *data;
-};
-struct Rwrite {
-        u32 count;
-};
-struct Tclunk {
-        u32 fid;
-};
-struct Rclunk {
-};
-struct Tremove {
-        u32 fid;
-};
-struct Rremove {
-};
-struct Tstat {
-        u32 fid;
-};
-struct Rstat {
-        struct v9fs_stat stat;
-};
-struct Twstat {
-        u32 fid;
-        struct v9fs_stat stat;
-};
-struct Rwstat {
-};
-/*
-  * fcall is the primary packet structure
-  *
-  */
-struct v9fs_fcall {
-        u32 size;
-        u8 id;
-        u16 tag;
-        void *sdata;
-        union {
-                struct Tversion tversion;
-                struct Rversion rversion;
-                struct Tauth tauth;
-                struct Rauth rauth;
-                struct Rerror rerror;
-                struct Tflush tflush;
-                struct Rflush rflush;
-                struct Tattach tattach;
-                struct Rattach rattach;
-                struct Twalk twalk;
-                struct Rwalk rwalk;
-                struct Topen topen;
-                struct Ropen ropen;
-                struct Tcreate tcreate;
-                struct Rcreate rcreate;
-                struct Tread tread;
-                struct Rread rread;
-                struct Twrite twrite;
-                struct Rwrite rwrite;
-                struct Tclunk tclunk;
-                struct Rclunk rclunk;
-                struct Tremove tremove;
-                struct Rremove rremove;
-                struct Tstat tstat;
-                struct Rstat rstat;
-                struct Twstat twstat;
-                struct Rwstat rwstat;
-        } params;
-};
-#define PRINT_FCALL_ERROR(s, fcall) dprintk(DEBUG_ERROR, "%s: %.*s\n", s, \
-        fcall?fcall->params.rerror.error.len:0, \
-        fcall?fcall->params.rerror.error.str:"");
-int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
-                   char *version, struct v9fs_fcall **rcall);
-int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
-                  u32 fid, u32 afid, struct v9fs_fcall **rcall);
-int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid);
-int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
-                struct v9fs_fcall **rcall);
-int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
-                 struct v9fs_wstat *wstat, struct v9fs_fcall **rcall);
-int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
-                char *name, struct v9fs_fcall **rcall);
-int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
-                struct v9fs_fcall **rcall);
-int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
-                  struct v9fs_fcall **rcall);
-int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
-        u32 perm, u8 mode, char *extension, struct v9fs_fcall **rcall);
-int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
-                u64 offset, u32 count, struct v9fs_fcall **rcall);
-int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
-                 u32 count, const char __user * data,
-                 struct v9fs_fcall **rcall);
-int v9fs_printfcall(char *, int, struct v9fs_fcall *, int);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 87897f84dfb6..bc7f0d1551e6 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -1,18 +1,12 @@
 obj-$(CONFIG_9P_FS) := 9p.o
 9p-objs := \
-        trans_fd.o \
-        mux.o \
-        fcall.o \
-        conv.o \
        vfs_super.o \
        vfs_inode.o \
        vfs_addr.o \
        vfs_file.o \
        vfs_dir.o \
        vfs_dentry.o \
-        error.o \
        v9fs.o \
        fid.o \
-        fcprint.o
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
deleted file mode 100644
index a3ed571eee31..000000000000
--- a/fs/9p/conv.c
+++ /dev/null
@@ -1,845 +0,0 @@
-/*
- * linux/fs/9p/conv.c
- *
- * 9P protocol conversion functions
- *
- *  Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/idr.h>
-#include <asm/uaccess.h>
-#include "debug.h"
-#include "v9fs.h"
-#include "9p.h"
-#include "conv.h"
-/*
- * Buffer to help with string parsing
- */
-struct cbuf {
-        unsigned char *sp;
-        unsigned char *p;
-        unsigned char *ep;
-};
-static inline void buf_init(struct cbuf *buf, void *data, int datalen)
-{
-        buf->sp = buf->p = data;
-        buf->ep = data + datalen;
-}
-static inline int buf_check_overflow(struct cbuf *buf)
-{
-        return buf->p > buf->ep;
-}
-static int buf_check_size(struct cbuf *buf, int len)
-{
-        if (buf->p + len > buf->ep) {
-                if (buf->p < buf->ep) {
-                        eprintk(KERN_ERR, "buffer overflow: want %d has %d\n",
-                                len, (int)(buf->ep - buf->p));
-                        dump_stack();
-                        buf->p = buf->ep + 1;
-                }
-                return 0;
-        }
-        return 1;
-}
-static void *buf_alloc(struct cbuf *buf, int len)
-{
-        void *ret = NULL;
-        if (buf_check_size(buf, len)) {
-                ret = buf->p;
-                buf->p += len;
-        }
-        return ret;
-}
-static void buf_put_int8(struct cbuf *buf, u8 val)
-{
-        if (buf_check_size(buf, 1)) {
-                buf->p[0] = val;
-                buf->p++;
-        }
-}
-static void buf_put_int16(struct cbuf *buf, u16 val)
-{
-        if (buf_check_size(buf, 2)) {
-                *(__le16 *) buf->p = cpu_to_le16(val);
-                buf->p += 2;
-        }
-}
-static void buf_put_int32(struct cbuf *buf, u32 val)
-{
-        if (buf_check_size(buf, 4)) {
-                *(__le32 *)buf->p = cpu_to_le32(val);
-                buf->p += 4;
-        }
-}
-static void buf_put_int64(struct cbuf *buf, u64 val)
-{
-        if (buf_check_size(buf, 8)) {
-                *(__le64 *)buf->p = cpu_to_le64(val);
-                buf->p += 8;
-        }
-}
-static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
-{
-        char *ret;
-        ret = NULL;
-        if (buf_check_size(buf, slen + 2)) {
-                buf_put_int16(buf, slen);
-                ret = buf->p;
-                memcpy(buf->p, s, slen);
-                buf->p += slen;
-        }
-        return ret;
-}
-static inline void buf_put_string(struct cbuf *buf, const char *s)
-{
-        buf_put_stringn(buf, s, strlen(s));
-}
-static u8 buf_get_int8(struct cbuf *buf)
-{
-        u8 ret = 0;
-        if (buf_check_size(buf, 1)) {
-                ret = buf->p[0];
-                buf->p++;
-        }
-        return ret;
-}
-static u16 buf_get_int16(struct cbuf *buf)
-{
-        u16 ret = 0;
-        if (buf_check_size(buf, 2)) {
-                ret = le16_to_cpu(*(__le16 *)buf->p);
-                buf->p += 2;
-        }
-        return ret;
-}
-static u32 buf_get_int32(struct cbuf *buf)
-{
-        u32 ret = 0;
-        if (buf_check_size(buf, 4)) {
-                ret = le32_to_cpu(*(__le32 *)buf->p);
-                buf->p += 4;
-        }
-        return ret;
-}
-static u64 buf_get_int64(struct cbuf *buf)
-{
-        u64 ret = 0;
-        if (buf_check_size(buf, 8)) {
-                ret = le64_to_cpu(*(__le64 *)buf->p);
-                buf->p += 8;
-        }
-        return ret;
-}
-static void buf_get_str(struct cbuf *buf, struct v9fs_str *vstr)
-{
-        vstr->len = buf_get_int16(buf);
-        if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) {
-                vstr->str = buf->p;
-                buf->p += vstr->len;
-        } else {
-                vstr->len = 0;
-                vstr->str = NULL;
-        }
-}
-static void buf_get_qid(struct cbuf *bufp, struct v9fs_qid *qid)
-{
-        qid->type = buf_get_int8(bufp);
-        qid->version = buf_get_int32(bufp);
-        qid->path = buf_get_int64(bufp);
-}
-/**
- * v9fs_size_wstat - calculate the size of a variable length stat struct
- * @stat: metadata (stat) structure
- * @extended: non-zero if 9P2000.u
- *
- */
-static int v9fs_size_wstat(struct v9fs_wstat *wstat, int extended)
-{
-        int size = 0;
-        if (wstat == NULL) {
-                eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
-                return 0;
-        }
-        size =                  /* 2 + *//* size[2] */
-            2 +                 /* type[2] */
-            4 +                 /* dev[4] */
-            1 +                 /* qid.type[1] */
-            4 +                 /* qid.vers[4] */
-            8 +                 /* qid.path[8] */
-            4 +                 /* mode[4] */
-            4 +                 /* atime[4] */
-            4 +                 /* mtime[4] */
-            8 +                 /* length[8] */
-            8;                  /* minimum sum of string lengths */
-        if (wstat->name)
-                size += strlen(wstat->name);
-        if (wstat->uid)
-                size += strlen(wstat->uid);
-        if (wstat->gid)
-                size += strlen(wstat->gid);
-        if (wstat->muid)
-                size += strlen(wstat->muid);
-        if (extended) {
-                size += 4 +     /* n_uid[4] */
-                    4 +         /* n_gid[4] */
-                    4 +         /* n_muid[4] */
-                    2;          /* string length of extension[4] */
-                if (wstat->extension)
-                        size += strlen(wstat->extension);
-        }
-        return size;
-}
-/**
- * buf_get_stat - safely decode a recieved metadata (stat) structure
- * @bufp: buffer to deserialize
- * @stat: metadata (stat) structure
- * @extended: non-zero if 9P2000.u
- *
- */
-static void
-buf_get_stat(struct cbuf *bufp, struct v9fs_stat *stat, int extended)
-{
-        stat->size = buf_get_int16(bufp);
-        stat->type = buf_get_int16(bufp);
-        stat->dev = buf_get_int32(bufp);
-        stat->qid.type = buf_get_int8(bufp);
-        stat->qid.version = buf_get_int32(bufp);
-        stat->qid.path = buf_get_int64(bufp);
-        stat->mode = buf_get_int32(bufp);
-        stat->atime = buf_get_int32(bufp);
-        stat->mtime = buf_get_int32(bufp);
-        stat->length = buf_get_int64(bufp);
-        buf_get_str(bufp, &stat->name);
-        buf_get_str(bufp, &stat->uid);
-        buf_get_str(bufp, &stat->gid);
-        buf_get_str(bufp, &stat->muid);
-        if (extended) {
-                buf_get_str(bufp, &stat->extension);
-                stat->n_uid = buf_get_int32(bufp);
-                stat->n_gid = buf_get_int32(bufp);
-                stat->n_muid = buf_get_int32(bufp);
-        }
-}
-/**
- * v9fs_deserialize_stat - decode a received metadata structure
- * @buf: buffer to deserialize
- * @buflen: length of received buffer
- * @stat: metadata structure to decode into
- * @extended: non-zero if 9P2000.u
- *
- * Note: stat will point to the buf region.
- */
-int
-v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
-                int extended)
-{
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        unsigned char *p;
-        buf_init(bufp, buf, buflen);
-        p = bufp->p;
-        buf_get_stat(bufp, stat, extended);
-        if (buf_check_overflow(bufp))
-                return 0;
-        else
-                return bufp->p - p;
-}
-/**
- * deserialize_fcall - unmarshal a response
- * @buf: recieved buffer
- * @buflen: length of received buffer
- * @rcall: fcall structure to populate
- * @rcalllen: length of fcall structure to populate
- * @extended: non-zero if 9P2000.u
- *
- */
-int
-v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
-                       int extended)
-{
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        int i = 0;
-        buf_init(bufp, buf, buflen);
-        rcall->size = buf_get_int32(bufp);
-        rcall->id = buf_get_int8(bufp);
-        rcall->tag = buf_get_int16(bufp);
-        dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
-                rcall->tag);
-        switch (rcall->id) {
-        default:
-                eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
-                return -EPROTO;
-        case RVERSION:
-                rcall->params.rversion.msize = buf_get_int32(bufp);
-                buf_get_str(bufp, &rcall->params.rversion.version);
-                break;
-        case RFLUSH:
-                break;
-        case RATTACH:
-                rcall->params.rattach.qid.type = buf_get_int8(bufp);
-                rcall->params.rattach.qid.version = buf_get_int32(bufp);
-                rcall->params.rattach.qid.path = buf_get_int64(bufp);
-                break;
-        case RWALK:
-                rcall->params.rwalk.nwqid = buf_get_int16(bufp);
-                if (rcall->params.rwalk.nwqid > V9FS_MAXWELEM) {
-                        eprintk(KERN_ERR, "Rwalk with more than %d qids: %d\n",
-                                V9FS_MAXWELEM, rcall->params.rwalk.nwqid);
-                        return -EPROTO;
-                }
-                for (i = 0; i < rcall->params.rwalk.nwqid; i++)
-                        buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]);
-                break;
-        case ROPEN:
-                buf_get_qid(bufp, &rcall->params.ropen.qid);
-                rcall->params.ropen.iounit = buf_get_int32(bufp);
-                break;
-        case RCREATE:
-                buf_get_qid(bufp, &rcall->params.rcreate.qid);
-                rcall->params.rcreate.iounit = buf_get_int32(bufp);
-                break;
-        case RREAD:
-                rcall->params.rread.count = buf_get_int32(bufp);
-                rcall->params.rread.data = bufp->p;
-                buf_check_size(bufp, rcall->params.rread.count);
-                break;
-        case RWRITE:
-                rcall->params.rwrite.count = buf_get_int32(bufp);
-                break;
-        case RCLUNK:
-                break;
-        case RREMOVE:
-                break;
-        case RSTAT:
-                buf_get_int16(bufp);
-                buf_get_stat(bufp, &rcall->params.rstat.stat, extended);
-                break;
-        case RWSTAT:
-                break;
-        case RERROR:
-                buf_get_str(bufp, &rcall->params.rerror.error);
-                if (extended)
-                        rcall->params.rerror.errno = buf_get_int16(bufp);
-                break;
-        }
-        if (buf_check_overflow(bufp)) {
-                dprintk(DEBUG_ERROR, "buffer overflow\n");
-                return -EIO;
-        }
-        return bufp->p - bufp->sp;
-}
-static inline void v9fs_put_int8(struct cbuf *bufp, u8 val, u8 * p)
-{
-        *p = val;
-        buf_put_int8(bufp, val);
-}
-static inline void v9fs_put_int16(struct cbuf *bufp, u16 val, u16 * p)
-{
-        *p = val;
-        buf_put_int16(bufp, val);
-}
-static inline void v9fs_put_int32(struct cbuf *bufp, u32 val, u32 * p)
-{
-        *p = val;
-        buf_put_int32(bufp, val);
-}
-static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p)
-{
-        *p = val;
-        buf_put_int64(bufp, val);
-}
-static void
-v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str)
-{
-        int len;
-        char *s;
-        if (data)
-                len = strlen(data);
-        else
-                len = 0;
-        s = buf_put_stringn(bufp, data, len);
-        if (str) {
-                str->len = len;
-                str->str = s;
-        }
-}
-static int
-v9fs_put_user_data(struct cbuf *bufp, const char __user * data, int count,
-                   unsigned char **pdata)
-{
-        *pdata = buf_alloc(bufp, count);
-        return copy_from_user(*pdata, data, count);
-}
-static void
-v9fs_put_wstat(struct cbuf *bufp, struct v9fs_wstat *wstat,
-               struct v9fs_stat *stat, int statsz, int extended)
-{
-        v9fs_put_int16(bufp, statsz, &stat->size);
-        v9fs_put_int16(bufp, wstat->type, &stat->type);
-        v9fs_put_int32(bufp, wstat->dev, &stat->dev);
-        v9fs_put_int8(bufp, wstat->qid.type, &stat->qid.type);
-        v9fs_put_int32(bufp, wstat->qid.version, &stat->qid.version);
-        v9fs_put_int64(bufp, wstat->qid.path, &stat->qid.path);
-        v9fs_put_int32(bufp, wstat->mode, &stat->mode);
-        v9fs_put_int32(bufp, wstat->atime, &stat->atime);
-        v9fs_put_int32(bufp, wstat->mtime, &stat->mtime);
-        v9fs_put_int64(bufp, wstat->length, &stat->length);
-        v9fs_put_str(bufp, wstat->name, &stat->name);
-        v9fs_put_str(bufp, wstat->uid, &stat->uid);
-        v9fs_put_str(bufp, wstat->gid, &stat->gid);
-        v9fs_put_str(bufp, wstat->muid, &stat->muid);
-        if (extended) {
-                v9fs_put_str(bufp, wstat->extension, &stat->extension);
-                v9fs_put_int32(bufp, wstat->n_uid, &stat->n_uid);
-                v9fs_put_int32(bufp, wstat->n_gid, &stat->n_gid);
-                v9fs_put_int32(bufp, wstat->n_muid, &stat->n_muid);
-        }
-}
-static struct v9fs_fcall *
-v9fs_create_common(struct cbuf *bufp, u32 size, u8 id)
-{
-        struct v9fs_fcall *fc;
-        size += 4 + 1 + 2;      /* size[4] id[1] tag[2] */
-        fc = kmalloc(sizeof(struct v9fs_fcall) + size, GFP_KERNEL);
-        if (!fc)
-                return ERR_PTR(-ENOMEM);
-        fc->sdata = (char *)fc + sizeof(*fc);
-        buf_init(bufp, (char *)fc->sdata, size);
-        v9fs_put_int32(bufp, size, &fc->size);
-        v9fs_put_int8(bufp, id, &fc->id);
-        v9fs_put_int16(bufp, V9FS_NOTAG, &fc->tag);
-        return fc;
-}
-void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag)
-{
-        fc->tag = tag;
-        *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag);
-}
-struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version)
-{
-        int size;
-        struct v9fs_fcall *fc;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        size = 4 + 2 + strlen(version); /* msize[4] version[s] */
-        fc = v9fs_create_common(bufp, size, TVERSION);
-        if (IS_ERR(fc))
-                goto error;
-        v9fs_put_int32(bufp, msize, &fc->params.tversion.msize);
-        v9fs_put_str(bufp, version, &fc->params.tversion.version);
-        if (buf_check_overflow(bufp)) {
-                kfree(fc);
-                fc = ERR_PTR(-ENOMEM);
-        }
-      error:
-        return fc;
-}
-#if 0
-struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname)
-{
-        int size;
-        struct v9fs_fcall *fc;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        size = 4 + 2 + strlen(uname) + 2 + strlen(aname);       /* afid[4] uname[s] aname[s] */
-        fc = v9fs_create_common(bufp, size, TAUTH);
-        if (IS_ERR(fc))
-                goto error;
-        v9fs_put_int32(bufp, afid, &fc->params.tauth.afid);
-        v9fs_put_str(bufp, uname, &fc->params.tauth.uname);
-        v9fs_put_str(bufp, aname, &fc->params.tauth.aname);
-        if (buf_check_overflow(bufp)) {
-                kfree(fc);
-                fc = ERR_PTR(-ENOMEM);
-        }
-      error:
-        return fc;
-}
-#endif  /*  0  */
-struct v9fs_fcall *
-v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname)
-{
-        int size;
-        struct v9fs_fcall *fc;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname);   /* fid[4] afid[4] uname[s] aname[s] */
-        fc = v9fs_create_common(bufp, size, TATTACH);
-        if (IS_ERR(fc))
-                goto error;
-        v9fs_put_int32(bufp, fid, &fc->params.tattach.fid);
-        v9fs_put_int32(bufp, afid, &fc->params.tattach.afid);
-        v9fs_put_str(bufp, uname, &fc->params.tattach.uname);
-        v9fs_put_str(bufp, aname, &fc->params.tattach.aname);
-      error:
-        return fc;
-}
-struct v9fs_fcall *v9fs_create_tflush(u16 oldtag)
-{
-        int size;
-        struct v9fs_fcall *fc;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        size = 2;               /* oldtag[2] */
-        fc = v9fs_create_common(bufp, size, TFLUSH);
-        if (IS_ERR(fc))
-                goto error;
-        v9fs_put_int16(bufp, oldtag, &fc->params.tflush.oldtag);
-        if (buf_check_overflow(bufp)) {
-                kfree(fc);
-                fc = ERR_PTR(-ENOMEM);
-        }
-      error:
-        return fc;
-}
-struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
-                                     char **wnames)
-{
-        int i, size;
-        struct v9fs_fcall *fc;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        if (nwname > V9FS_MAXWELEM) {
-                dprintk(DEBUG_ERROR, "nwname > %d\n", V9FS_MAXWELEM);
-                return NULL;
-        }
-        size = 4 + 4 + 2;       /* fid[4] newfid[4] nwname[2] ... */
-        for (i = 0; i < nwname; i++) {
-                size += 2 + strlen(wnames[i]);  /* wname[s] */
-        }
-        fc = v9fs_create_common(bufp, size, TWALK);
-        if (IS_ERR(fc))
-                goto error;
-        v9fs_put_int32(bufp, fid, &fc->params.twalk.fid);
-        v9fs_put_int32(bufp, newfid, &fc->params.twalk.newfid);
-        v9fs_put_int16(bufp, nwname, &fc->params.twalk.nwname);
-        for (i = 0; i < nwname; i++) {
-                v9fs_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]);
-        }
-        if (buf_check_overflow(bufp)) {
-                kfree(fc);
-                fc = ERR_PTR(-ENOMEM);
-        }
-      error:
-        return fc;
-}
-struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode)
-{
-        int size;
-        struct v9fs_fcall *fc;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        size = 4 + 1;           /* fid[4] mode[1] */
-        fc = v9fs_create_common(bufp, size, TOPEN);
-        if (IS_ERR(fc))
-                goto error;
-        v9fs_put_int32(bufp, fid, &fc->params.topen.fid);
-        v9fs_put_int8(bufp, mode, &fc->params.topen.mode);
-        if (buf_check_overflow(bufp)) {
-                kfree(fc);
-                fc = ERR_PTR(-ENOMEM);
-        }
-      error:
-        return fc;
-}
-struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
-        char *extension, int extended)
-{
-        int size;
-        struct v9fs_fcall *fc;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        size = 4 + 2 + strlen(name) + 4 + 1;    /* fid[4] name[s] perm[4] mode[1] */
-        if (extended) {
-                size += 2 +                     /* extension[s] */
-                    (extension == NULL ? 0 : strlen(extension));
-        }
-        fc = v9fs_create_common(bufp, size, TCREATE);
-        if (IS_ERR(fc))
-                goto error;
-        v9fs_put_int32(bufp, fid, &fc->params.tcreate.fid);
-        v9fs_put_str(bufp, name, &fc->params.tcreate.name);
-        v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm);
-        v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode);
-        if (extended)
-                v9fs_put_str(bufp, extension, &fc->params.tcreate.extension);
-        if (buf_check_overflow(bufp)) {
-                kfree(fc);
-                fc = ERR_PTR(-ENOMEM);
-        }
-      error:
-        return fc;
-}
-struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count)
-{
-        int size;
-        struct v9fs_fcall *fc;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        size = 4 + 8 + 4;       /* fid[4] offset[8] count[4] */
-        fc = v9fs_create_common(bufp, size, TREAD);
-        if (IS_ERR(fc))
-                goto error;
-        v9fs_put_int32(bufp, fid, &fc->params.tread.fid);
-        v9fs_put_int64(bufp, offset, &fc->params.tread.offset);
-        v9fs_put_int32(bufp, count, &fc->params.tread.count);
-        if (buf_check_overflow(bufp)) {
-                kfree(fc);
-                fc = ERR_PTR(-ENOMEM);
-        }
-      error:
-        return fc;
-}
-struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
-                                      const char __user * data)
-{
-        int size, err;
-        struct v9fs_fcall *fc;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        size = 4 + 8 + 4 + count;       /* fid[4] offset[8] count[4] data[count] */
-        fc = v9fs_create_common(bufp, size, TWRITE);
-        if (IS_ERR(fc))
-                goto error;
-        v9fs_put_int32(bufp, fid, &fc->params.twrite.fid);
-        v9fs_put_int64(bufp, offset, &fc->params.twrite.offset);
-        v9fs_put_int32(bufp, count, &fc->params.twrite.count);
-        err = v9fs_put_user_data(bufp, data, count, &fc->params.twrite.data);
-        if (err) {
-                kfree(fc);
-                fc = ERR_PTR(err);
-        }
-        if (buf_check_overflow(bufp)) {
-                kfree(fc);
-                fc = ERR_PTR(-ENOMEM);
-        }
-      error:
-        return fc;
-}
-struct v9fs_fcall *v9fs_create_tclunk(u32 fid)
-{
-        int size;
-        struct v9fs_fcall *fc;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        size = 4;               /* fid[4] */
-        fc = v9fs_create_common(bufp, size, TCLUNK);
-        if (IS_ERR(fc))
-                goto error;
-        v9fs_put_int32(bufp, fid, &fc->params.tclunk.fid);
-        if (buf_check_overflow(bufp)) {
-                kfree(fc);
-                fc = ERR_PTR(-ENOMEM);
-        }
-      error:
-        return fc;
-}
-struct v9fs_fcall *v9fs_create_tremove(u32 fid)
-{
-        int size;
-        struct v9fs_fcall *fc;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        size = 4;               /* fid[4] */
-        fc = v9fs_create_common(bufp, size, TREMOVE);
-        if (IS_ERR(fc))
-                goto error;
-        v9fs_put_int32(bufp, fid, &fc->params.tremove.fid);
-        if (buf_check_overflow(bufp)) {
-                kfree(fc);
-                fc = ERR_PTR(-ENOMEM);
-        }
-      error:
-        return fc;
-}
-struct v9fs_fcall *v9fs_create_tstat(u32 fid)
-{
-        int size;
-        struct v9fs_fcall *fc;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        size = 4;               /* fid[4] */
-        fc = v9fs_create_common(bufp, size, TSTAT);
-        if (IS_ERR(fc))
-                goto error;
-        v9fs_put_int32(bufp, fid, &fc->params.tstat.fid);
-        if (buf_check_overflow(bufp)) {
-                kfree(fc);
-                fc = ERR_PTR(-ENOMEM);
-        }
-      error:
-        return fc;
-}
-struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
-                                      int extended)
-{
-        int size, statsz;
-        struct v9fs_fcall *fc;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        statsz = v9fs_size_wstat(wstat, extended);
-        size = 4 + 2 + 2 + statsz;      /* fid[4] stat[n] */
-        fc = v9fs_create_common(bufp, size, TWSTAT);
-        if (IS_ERR(fc))
-                goto error;
-        v9fs_put_int32(bufp, fid, &fc->params.twstat.fid);
-        buf_put_int16(bufp, statsz + 2);
-        v9fs_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, extended);
-        if (buf_check_overflow(bufp)) {
-                kfree(fc);
-                fc = ERR_PTR(-ENOMEM);
-        }
-      error:
-        return fc;
-}
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
deleted file mode 100644
index dd5b6b1b610f..000000000000
--- a/fs/9p/conv.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * linux/fs/9p/conv.h
- *
- * 9P protocol conversion definitions.
- *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-int v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
-        int extended);
-int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
-        int extended);
-void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag);
-struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version);
-struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname,
-        char *aname);
-struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
-struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
-        char **wnames);
-struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode);
-struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
-        char *extension, int extended);
-struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count);
-struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
-        const char __user *data);
-struct v9fs_fcall *v9fs_create_tclunk(u32 fid);
-struct v9fs_fcall *v9fs_create_tremove(u32 fid);
-struct v9fs_fcall *v9fs_create_tstat(u32 fid);
-struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
-        int extended);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
deleted file mode 100644
index 4228c0bb3c32..000000000000
--- a/fs/9p/debug.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- *  linux/fs/9p/debug.h - V9FS Debug Definitions
- *
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-#define DEBUG_ERROR             (1<<0)
-#define DEBUG_CURRENT           (1<<1)
-#define DEBUG_9P                (1<<2)
-#define DEBUG_VFS               (1<<3)
-#define DEBUG_CONV              (1<<4)
-#define DEBUG_MUX               (1<<5)
-#define DEBUG_TRANS             (1<<6)
-#define DEBUG_SLABS             (1<<7)
-#define DEBUG_FCALL             (1<<8)
-#define DEBUG_DUMP_PKT          0
-extern int v9fs_debug_level;
-#define dprintk(level, format, arg...) \
-do {  \
-        if((v9fs_debug_level & level)==level) \
-                printk(KERN_NOTICE "-- %s (%d): " \
-                format , __FUNCTION__, current->pid , ## arg); \
-} while(0)
-#define eprintk(level, format, arg...) \
-do { \
-        printk(level "v9fs: %s (%d): " \
-                format , __FUNCTION__, current->pid , ## arg); \
-} while(0)
-#if DEBUG_DUMP_PKT
-static inline void dump_data(const unsigned char *data, unsigned int datalen)
-{
-        int i, n;
-        char buf[5*8];
-        n = 0;
-        i = 0;
-        while (i < datalen) {
-                n += snprintf(buf+n, sizeof(buf)-n, "%02x", data[i++]);
-                if (i%4 == 0)
-                        n += snprintf(buf+n, sizeof(buf)-n, " ");
-                if (i%16 == 0) {
-                        dprintk(DEBUG_ERROR, "%s\n", buf);
-                        n = 0;
-                }
-        }
-        dprintk(DEBUG_ERROR, "%s\n", buf);
-}
-#else                           /* DEBUG_DUMP_PKT */
-static inline void dump_data(const unsigned char *data, unsigned int datalen)
-{
-}
-#endif                          /* DEBUG_DUMP_PKT */
diff --git a/fs/9p/error.c b/fs/9p/error.c
deleted file mode 100644
index 0d7fa4e08812..000000000000
--- a/fs/9p/error.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * linux/fs/9p/error.c
- *
- * Error string handling
- *
- * Plan 9 uses error strings, Unix uses error numbers.  These functions
- * try to help manage that and provide for dynamically adding error
- * mappings.
- *
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/jhash.h>
-#include "debug.h"
-#include "error.h"
-/**
- * v9fs_error_init - preload
- * @errstr: error string
- *
- */
-int v9fs_error_init(void)
-{
-        struct errormap *c;
-        int bucket;
-        /* initialize hash table */
-        for (bucket = 0; bucket < ERRHASHSZ; bucket++)
-                INIT_HLIST_HEAD(&hash_errmap[bucket]);
-        /* load initial error map into hash table */
-        for (c = errmap; c->name != NULL; c++) {
-                c->namelen = strlen(c->name);
-                bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ;
-                INIT_HLIST_NODE(&c->list);
-                hlist_add_head(&c->list, &hash_errmap[bucket]);
-        }
-        return 1;
-}
-/**
- * errstr2errno - convert error string to error number
- * @errstr: error string
- *
- */
-int v9fs_errstr2errno(char *errstr, int len)
-{
-        int errno = 0;
-        struct hlist_node *p = NULL;
-        struct errormap *c = NULL;
-        int bucket = jhash(errstr, len, 0) % ERRHASHSZ;
-        hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
-                if (c->namelen==len && !memcmp(c->name, errstr, len)) {
-                        errno = c->val;
-                        break;
-                }
-        }
-        if (errno == 0) {
-                /* TODO: if error isn't found, add it dynamically */
-                errstr[len] = 0;
-                printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__,
-                       errstr);
-                errno = 1;
-        }
-        return -errno;
-}
diff --git a/fs/9p/error.h b/fs/9p/error.h
deleted file mode 100644
index 5f3ca522b316..000000000000
--- a/fs/9p/error.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * linux/fs/9p/error.h
- *
- * Huge Nasty Error Table
- *
- * Plan 9 uses error strings, Unix uses error numbers.  This table tries to
- * match UNIX strings and Plan 9 strings to unix error numbers.  It is used
- * to preload the dynamic error table which can also track user-specific error
- * strings.
- *
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-#include <linux/errno.h>
-#include <asm/errno.h>
-struct errormap {
-        char *name;
-        int val;
-        int namelen;
-        struct hlist_node list;
-};
-#define ERRHASHSZ               32
-static struct hlist_head hash_errmap[ERRHASHSZ];
-/* FixMe - reduce to a reasonable size */
-static struct errormap errmap[] = {
-        {"Operation not permitted", EPERM},
-        {"wstat prohibited", EPERM},
-        {"No such file or directory", ENOENT},
-        {"directory entry not found", ENOENT},
-        {"file not found", ENOENT},
-        {"Interrupted system call", EINTR},
-        {"Input/output error", EIO},
-        {"No such device or address", ENXIO},
-        {"Argument list too long", E2BIG},
-        {"Bad file descriptor", EBADF},
-        {"Resource temporarily unavailable", EAGAIN},
-        {"Cannot allocate memory", ENOMEM},
-        {"Permission denied", EACCES},
-        {"Bad address", EFAULT},
-        {"Block device required", ENOTBLK},
-        {"Device or resource busy", EBUSY},
-        {"File exists", EEXIST},
-        {"Invalid cross-device link", EXDEV},
-        {"No such device", ENODEV},
-        {"Not a directory", ENOTDIR},
-        {"Is a directory", EISDIR},
-        {"Invalid argument", EINVAL},
-        {"Too many open files in system", ENFILE},
-        {"Too many open files", EMFILE},
-        {"Text file busy", ETXTBSY},
-        {"File too large", EFBIG},
-        {"No space left on device", ENOSPC},
-        {"Illegal seek", ESPIPE},
-        {"Read-only file system", EROFS},
-        {"Too many links", EMLINK},
-        {"Broken pipe", EPIPE},
-        {"Numerical argument out of domain", EDOM},
-        {"Numerical result out of range", ERANGE},
-        {"Resource deadlock avoided", EDEADLK},
-        {"File name too long", ENAMETOOLONG},
-        {"No locks available", ENOLCK},
-        {"Function not implemented", ENOSYS},
-        {"Directory not empty", ENOTEMPTY},
-        {"Too many levels of symbolic links", ELOOP},
-        {"No message of desired type", ENOMSG},
-        {"Identifier removed", EIDRM},
-        {"No data available", ENODATA},
-        {"Machine is not on the network", ENONET},
-        {"Package not installed", ENOPKG},
-        {"Object is remote", EREMOTE},
-        {"Link has been severed", ENOLINK},
-        {"Communication error on send", ECOMM},
-        {"Protocol error", EPROTO},
-        {"Bad message", EBADMSG},
-        {"File descriptor in bad state", EBADFD},
-        {"Streams pipe error", ESTRPIPE},
-        {"Too many users", EUSERS},
-        {"Socket operation on non-socket", ENOTSOCK},
-        {"Message too long", EMSGSIZE},
-        {"Protocol not available", ENOPROTOOPT},
-        {"Protocol not supported", EPROTONOSUPPORT},
-        {"Socket type not supported", ESOCKTNOSUPPORT},
-        {"Operation not supported", EOPNOTSUPP},
-        {"Protocol family not supported", EPFNOSUPPORT},
-        {"Network is down", ENETDOWN},
-        {"Network is unreachable", ENETUNREACH},
-        {"Network dropped connection on reset", ENETRESET},
-        {"Software caused connection abort", ECONNABORTED},
-        {"Connection reset by peer", ECONNRESET},
-        {"No buffer space available", ENOBUFS},
-        {"Transport endpoint is already connected", EISCONN},
-        {"Transport endpoint is not connected", ENOTCONN},
-        {"Cannot send after transport endpoint shutdown", ESHUTDOWN},
-        {"Connection timed out", ETIMEDOUT},
-        {"Connection refused", ECONNREFUSED},
-        {"Host is down", EHOSTDOWN},
-        {"No route to host", EHOSTUNREACH},
-        {"Operation already in progress", EALREADY},
-        {"Operation now in progress", EINPROGRESS},
-        {"Is a named type file", EISNAM},
-        {"Remote I/O error", EREMOTEIO},
-        {"Disk quota exceeded", EDQUOT},
-/* errors from fossil, vacfs, and u9fs */
-        {"fid unknown or out of range", EBADF},
-        {"permission denied", EACCES},
-        {"file does not exist", ENOENT},
-        {"authentication failed", ECONNREFUSED},
-        {"bad offset in directory read", ESPIPE},
-        {"bad use of fid", EBADF},
-        {"wstat can't convert between files and directories", EPERM},
-        {"directory is not empty", ENOTEMPTY},
-        {"file exists", EEXIST},
-        {"file already exists", EEXIST},
-        {"file or directory already exists", EEXIST},
-        {"fid already in use", EBADF},
-        {"file in use", ETXTBSY},
-        {"i/o error", EIO},
-        {"file already open for I/O", ETXTBSY},
-        {"illegal mode", EINVAL},
-        {"illegal name", ENAMETOOLONG},
-        {"not a directory", ENOTDIR},
-        {"not a member of proposed group", EPERM},
-        {"not owner", EACCES},
-        {"only owner can change group in wstat", EACCES},
-        {"read only file system", EROFS},
-        {"no access to special file", EPERM},
-        {"i/o count too large", EIO},
-        {"unknown group", EINVAL},
-        {"unknown user", EINVAL},
-        {"bogus wstat buffer", EPROTO},
-        {"exclusive use file already open", EAGAIN},
-        {"corrupted directory entry", EIO},
-        {"corrupted file entry", EIO},
-        {"corrupted block label", EIO},
-        {"corrupted meta data", EIO},
-        {"illegal offset", EINVAL},
-        {"illegal path element", ENOENT},
-        {"root of file system is corrupted", EIO},
-        {"corrupted super block", EIO},
-        {"protocol botch", EPROTO},
-        {"file system is full", ENOSPC},
-        {"file is in use", EAGAIN},
-        {"directory entry is not allocated", ENOENT},
-        {"file is read only", EROFS},
-        {"file has been removed", EIDRM},
-        {"only support truncation to zero length", EPERM},
-        {"cannot remove root", EPERM},
-        {"file too big", EFBIG},
-        {"venti i/o error", EIO},
-        /* these are not errors */
-        {"u9fs rhostsauth: no authentication required", 0},
-        {"u9fs authnone: no authentication required", 0},
-        {NULL, -1}
-};
-extern int v9fs_error_init(void);
diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c
deleted file mode 100644
index dc336a67592f..000000000000
--- a/fs/9p/fcall.c
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- *  linux/fs/9p/fcall.c
- *
- *  This file contains functions to perform synchronous 9P calls
- *
- *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/idr.h>
-#include "debug.h"
-#include "v9fs.h"
-#include "9p.h"
-#include "conv.h"
-#include "mux.h"
-/**
- * v9fs_t_version - negotiate protocol parameters with sever
- * @v9ses: 9P2000 session information
- * @msize: requested max size packet
- * @version: requested version.extension string
- * @fcall: pointer to response fcall pointer
- *
- */
-int
-v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
-               char *version, struct v9fs_fcall **rcp)
-{
-        int ret;
-        struct v9fs_fcall *tc;
-        dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
-        tc = v9fs_create_tversion(msize, version);
-        if (!IS_ERR(tc)) {
-                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-                kfree(tc);
-        } else
-                ret = PTR_ERR(tc);
-        return ret;
-}
-/**
- * v9fs_t_attach - mount the server
- * @v9ses: 9P2000 session information
- * @uname: user name doing the attach
- * @aname: remote name being attached to
- * @fid: mount fid to attatch to root node
- * @afid: authentication fid (in this case result key)
- * @fcall: pointer to response fcall pointer
- *
- */
-int
-v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
-              u32 fid, u32 afid, struct v9fs_fcall **rcp)
-{
-        int ret;
-        struct v9fs_fcall* tc;
-        dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
-                aname, fid, afid);
-        tc = v9fs_create_tattach(fid, afid, uname, aname);
-        if (!IS_ERR(tc)) {
-                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-                kfree(tc);
-        } else
-                ret = PTR_ERR(tc);
-        return ret;
-}
-static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
-        struct v9fs_fcall *rc, int err)
-{
-        int fid, id;
-        struct v9fs_session_info *v9ses;
-        id = 0;
-        fid = tc->params.tclunk.fid;
-        if (rc)
-                id = rc->id;
-        kfree(tc);
-        kfree(rc);
-        if (id == RCLUNK) {
-                v9ses = a;
-                v9fs_put_idpool(fid, &v9ses->fidpool);
-        }
-}
-/**
- * v9fs_t_clunk - release a fid (finish a transaction)
- * @v9ses: 9P2000 session information
- * @fid: fid to release
- * @fcall: pointer to response fcall pointer
- *
- */
-int
-v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
-{
-        int ret;
-        struct v9fs_fcall *tc, *rc;
-        dprintk(DEBUG_9P, "fid %d\n", fid);
-        rc = NULL;
-        tc = v9fs_create_tclunk(fid);
-        if (!IS_ERR(tc))
-                ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-        else
-                ret = PTR_ERR(tc);
-        if (ret)
-                dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret);
-        v9fs_t_clunk_cb(v9ses, tc, rc, ret);
-        return ret;
-}
-#if 0
-/**
- * v9fs_v9fs_t_flush - flush a pending transaction
- * @v9ses: 9P2000 session information
- * @tag: tag to release
- *
- */
-int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
-{
-        int ret;
-        struct v9fs_fcall *tc;
-        dprintk(DEBUG_9P, "oldtag %d\n", oldtag);
-        tc = v9fs_create_tflush(oldtag);
-        if (!IS_ERR(tc)) {
-                ret = v9fs_mux_rpc(v9ses->mux, tc, NULL);
-                kfree(tc);
-        } else
-                ret = PTR_ERR(tc);
-        return ret;
-}
-#endif
-/**
- * v9fs_t_stat - read a file's meta-data
- * @v9ses: 9P2000 session information
- * @fid: fid pointing to file or directory to get info about
- * @fcall: pointer to response fcall
- *
- */
-int
-v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp)
-{
-        int ret;
-        struct v9fs_fcall *tc;
-        dprintk(DEBUG_9P, "fid %d\n", fid);
-        ret = -ENOMEM;
-        tc = v9fs_create_tstat(fid);
-        if (!IS_ERR(tc)) {
-                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-                kfree(tc);
-        } else
-                ret = PTR_ERR(tc);
-        return ret;
-}
-/**
- * v9fs_t_wstat - write a file's meta-data
- * @v9ses: 9P2000 session information
- * @fid: fid pointing to file or directory to write info about
- * @stat: metadata
- * @fcall: pointer to response fcall
- *
- */
-int
-v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
-             struct v9fs_wstat *wstat, struct v9fs_fcall **rcp)
-{
-        int ret;
-        struct v9fs_fcall *tc;
-        dprintk(DEBUG_9P, "fid %d\n", fid);
-        tc = v9fs_create_twstat(fid, wstat, v9ses->extended);
-        if (!IS_ERR(tc)) {
-                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-                kfree(tc);
-        } else
-                ret = PTR_ERR(tc);
-        return ret;
-}
-/**
- * v9fs_t_walk - walk a fid to a new file or directory
- * @v9ses: 9P2000 session information
- * @fid: fid to walk
- * @newfid: new fid (for clone operations)
- * @name: path to walk fid to
- * @fcall: pointer to response fcall
- *
- */
-/* TODO: support multiple walk */
-int
-v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
-            char *name, struct v9fs_fcall **rcp)
-{
-        int ret;
-        struct v9fs_fcall *tc;
-        int nwname;
-        dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
-        if (name)
-                nwname = 1;
-        else
-                nwname = 0;
-        tc = v9fs_create_twalk(fid, newfid, nwname, &name);
-        if (!IS_ERR(tc)) {
-                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-                kfree(tc);
-        } else
-                ret = PTR_ERR(tc);
-        return ret;
-}
-/**
- * v9fs_t_open - open a file
- *
- * @v9ses - 9P2000 session information
- * @fid - fid to open
- * @mode - mode to open file (R, RW, etc)
- * @fcall - pointer to response fcall
- *
- */
-int
-v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
-            struct v9fs_fcall **rcp)
-{
-        int ret;
-        struct v9fs_fcall *tc;
-        dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
-        tc = v9fs_create_topen(fid, mode);
-        if (!IS_ERR(tc)) {
-                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-                kfree(tc);
-        } else
-                ret = PTR_ERR(tc);
-        return ret;
-}
-/**
- * v9fs_t_remove - remove a file or directory
- * @v9ses: 9P2000 session information
- * @fid: fid to remove
- * @fcall: pointer to response fcall
- *
- */
-int
-v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
-              struct v9fs_fcall **rcp)
-{
-        int ret;
-        struct v9fs_fcall *tc;
-        dprintk(DEBUG_9P, "fid %d\n", fid);
-        tc = v9fs_create_tremove(fid);
-        if (!IS_ERR(tc)) {
-                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-                kfree(tc);
-        } else
-                ret = PTR_ERR(tc);
-        return ret;
-}
-/**
- * v9fs_t_create - create a file or directory
- * @v9ses: 9P2000 session information
- * @fid: fid to create
- * @name: name of the file or directory to create
- * @perm: permissions to create with
- * @mode: mode to open file (R, RW, etc)
- * @fcall: pointer to response fcall
- *
- */
-int
-v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, u32 perm,
-        u8 mode, char *extension, struct v9fs_fcall **rcp)
-{
-        int ret;
-        struct v9fs_fcall *tc;
-        dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
-                fid, name, perm, mode);
-        tc = v9fs_create_tcreate(fid, name, perm, mode, extension,
-                v9ses->extended);
-        if (!IS_ERR(tc)) {
-                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-                kfree(tc);
-        } else
-                ret = PTR_ERR(tc);
-        return ret;
-}
-/**
- * v9fs_t_read - read data
- * @v9ses: 9P2000 session information
- * @fid: fid to read from
- * @offset: offset to start read at
- * @count: how many bytes to read
- * @fcall: pointer to response fcall (with data)
- *
- */
-int
-v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
-            u32 count, struct v9fs_fcall **rcp)
-{
-        int ret;
-        struct v9fs_fcall *tc, *rc;
-        dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
-                (long long unsigned) offset, count);
-        tc = v9fs_create_tread(fid, offset, count);
-        if (!IS_ERR(tc)) {
-                ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-                if (!ret)
-                        ret = rc->params.rread.count;
-                if (rcp)
-                        *rcp = rc;
-                else
-                        kfree(rc);
-                kfree(tc);
-        } else
-                ret = PTR_ERR(tc);
-        return ret;
-}
-/**
- * v9fs_t_write - write data
- * @v9ses: 9P2000 session information
- * @fid: fid to write to
- * @offset: offset to start write at
- * @count: how many bytes to write
- * @fcall: pointer to response fcall
- *
- */
-int
-v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count,
-        const char __user *data, struct v9fs_fcall **rcp)
-{
-        int ret;
-        struct v9fs_fcall *tc, *rc;
-        dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
-                (long long unsigned) offset, count);
-        tc = v9fs_create_twrite(fid, offset, count, data);
-        if (!IS_ERR(tc)) {
-                ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-                if (!ret)
-                        ret = rc->params.rwrite.count;
-                if (rcp)
-                        *rcp = rc;
-                else
-                        kfree(rc);
-                kfree(tc);
-        } else
-                ret = PTR_ERR(tc);
-        return ret;
-}
diff --git a/fs/9p/fcprint.c b/fs/9p/fcprint.c
deleted file mode 100644
index 34b96114a28d..000000000000
--- a/fs/9p/fcprint.c
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- *  linux/fs/9p/fcprint.c
- *
- *  Print 9P call.
- *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/idr.h>
-#include "debug.h"
-#include "v9fs.h"
-#include "9p.h"
-#include "mux.h"
-static int
-v9fs_printqid(char *buf, int buflen, struct v9fs_qid *q)
-{
-        int n;
-        char b[10];
-        n = 0;
-        if (q->type & V9FS_QTDIR)
-                b[n++] = 'd';
-        if (q->type & V9FS_QTAPPEND)
-                b[n++] = 'a';
-        if (q->type & V9FS_QTAUTH)
-                b[n++] = 'A';
-        if (q->type & V9FS_QTEXCL)
-                b[n++] = 'l';
-        if (q->type & V9FS_QTTMP)
-                b[n++] = 't';
-        if (q->type & V9FS_QTSYMLINK)
-                b[n++] = 'L';
-        b[n] = '\0';
-        return scnprintf(buf, buflen, "(%.16llx %x %s)", (long long int) q->path,
-                q->version, b);
-}
-static int
-v9fs_printperm(char *buf, int buflen, int perm)
-{
-        int n;
-        char b[15];
-        n = 0;
-        if (perm & V9FS_DMDIR)
-                b[n++] = 'd';
-        if (perm & V9FS_DMAPPEND)
-                b[n++] = 'a';
-        if (perm & V9FS_DMAUTH)
-                b[n++] = 'A';
-        if (perm & V9FS_DMEXCL)
-                b[n++] = 'l';
-        if (perm & V9FS_DMTMP)
-                b[n++] = 't';
-        if (perm & V9FS_DMDEVICE)
-                b[n++] = 'D';
-        if (perm & V9FS_DMSOCKET)
-                b[n++] = 'S';
-        if (perm & V9FS_DMNAMEDPIPE)
-                b[n++] = 'P';
-        if (perm & V9FS_DMSYMLINK)
-                b[n++] = 'L';
-        b[n] = '\0';
-        return scnprintf(buf, buflen, "%s%03o", b, perm&077);
-}
-static int
-v9fs_printstat(char *buf, int buflen, struct v9fs_stat *st, int extended)
-{
-        int n;
-        n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len,
-                st->name.str, st->uid.len, st->uid.str);
-        if (extended)
-                n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid);
-        n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str);
-        if (extended)
-                n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid);
-        n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str);
-        if (extended)
-                n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid);
-        n += scnprintf(buf+n, buflen-n, " q ");
-        n += v9fs_printqid(buf+n, buflen-n, &st->qid);
-        n += scnprintf(buf+n, buflen-n, " m ");
-        n += v9fs_printperm(buf+n, buflen-n, st->mode);
-        n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld",
-                st->atime, st->mtime, (long long int) st->length);
-        if (extended)
-                n += scnprintf(buf+n, buflen-n, " ext '%.*s'",
-                        st->extension.len, st->extension.str);
-        return n;
-}
-static int
-v9fs_dumpdata(char *buf, int buflen, u8 *data, int datalen)
-{
-        int i, n;
-        i = n = 0;
-        while (i < datalen) {
-                n += scnprintf(buf + n, buflen - n, "%02x", data[i]);
-                if (i%4 == 3)
-                        n += scnprintf(buf + n, buflen - n, " ");
-                if (i%32 == 31)
-                        n += scnprintf(buf + n, buflen - n, "\n");
-                i++;
-        }
-        n += scnprintf(buf + n, buflen - n, "\n");
-        return n;
-}
-static int
-v9fs_printdata(char *buf, int buflen, u8 *data, int datalen)
-{
-        return v9fs_dumpdata(buf, buflen, data, datalen<16?datalen:16);
-}
-int
-v9fs_printfcall(char *buf, int buflen, struct v9fs_fcall *fc, int extended)
-{
-        int i, ret, type, tag;
-        if (!fc)
-                return scnprintf(buf, buflen, "<NULL>");
-        type = fc->id;
-        tag = fc->tag;
-        ret = 0;
-        switch (type) {
-        case TVERSION:
-                ret += scnprintf(buf+ret, buflen-ret,
-                        "Tversion tag %u msize %u version '%.*s'", tag,
-                        fc->params.tversion.msize, fc->params.tversion.version.len,
-                        fc->params.tversion.version.str);
-                break;
-        case RVERSION:
-                ret += scnprintf(buf+ret, buflen-ret,
-                        "Rversion tag %u msize %u version '%.*s'", tag,
-                        fc->params.rversion.msize, fc->params.rversion.version.len,
-                        fc->params.rversion.version.str);
-                break;
-        case TAUTH:
-                ret += scnprintf(buf+ret, buflen-ret,
-                        "Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag,
-                        fc->params.tauth.afid, fc->params.tauth.uname.len,
-                        fc->params.tauth.uname.str, fc->params.tauth.aname.len,
-                        fc->params.tauth.aname.str);
-                break;
-        case RAUTH:
-                ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag);
-                v9fs_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid);
-                break;
-        case TATTACH:
-                ret += scnprintf(buf+ret, buflen-ret,
-                        "Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'",
-                        tag, fc->params.tattach.fid, fc->params.tattach.afid,
-                        fc->params.tattach.uname.len, fc->params.tattach.uname.str,
-                        fc->params.tattach.aname.len, fc->params.tattach.aname.str);
-                break;
-        case RATTACH:
-                ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ", tag);
-                v9fs_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid);
-                break;
-        case RERROR:
-                ret += scnprintf(buf+ret, buflen-ret, "Rerror tag %u ename '%.*s'",
-                        tag, fc->params.rerror.error.len,
-                        fc->params.rerror.error.str);
-                if (extended)
-                        ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n",
-                                fc->params.rerror.errno);
-                break;
-        case TFLUSH:
-                ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u",
-                        tag, fc->params.tflush.oldtag);
-                break;
-        case RFLUSH:
-                ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag);
-                break;
-        case TWALK:
-                ret += scnprintf(buf+ret, buflen-ret,
-                        "Twalk tag %u fid %d newfid %d nwname %d", tag,
-                        fc->params.twalk.fid, fc->params.twalk.newfid,
-                        fc->params.twalk.nwname);
-                for(i = 0; i < fc->params.twalk.nwname; i++)
-                        ret += scnprintf(buf+ret, buflen-ret," '%.*s'",
-                                fc->params.twalk.wnames[i].len,
-                                fc->params.twalk.wnames[i].str);
-                break;
-        case RWALK:
-                ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d",
-                        tag, fc->params.rwalk.nwqid);
-                for(i = 0; i < fc->params.rwalk.nwqid; i++)
-                        ret += v9fs_printqid(buf+ret, buflen-ret,
-                                &fc->params.rwalk.wqids[i]);
-                break;
-        case TOPEN:
-                ret += scnprintf(buf+ret, buflen-ret,
-                        "Topen tag %u fid %d mode %d", tag,
-                        fc->params.topen.fid, fc->params.topen.mode);
-                break;
-        case ROPEN:
-                ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag);
-                ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid);
-                ret += scnprintf(buf+ret, buflen-ret," iounit %d",
-                        fc->params.ropen.iounit);
-                break;
-        case TCREATE:
-                ret += scnprintf(buf+ret, buflen-ret,
-                        "Tcreate tag %u fid %d name '%.*s' perm ", tag,
-                        fc->params.tcreate.fid, fc->params.tcreate.name.len,
-                        fc->params.tcreate.name.str);
-                ret += v9fs_printperm(buf+ret, buflen-ret, fc->params.tcreate.perm);
-                ret += scnprintf(buf+ret, buflen-ret, " mode %d",
-                        fc->params.tcreate.mode);
-                break;
-        case RCREATE:
-                ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag);
-                ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.rcreate.qid);
-                ret += scnprintf(buf+ret, buflen-ret, " iounit %d",
-                        fc->params.rcreate.iounit);
-                break;
-        case TREAD:
-                ret += scnprintf(buf+ret, buflen-ret,
-                        "Tread tag %u fid %d offset %lld count %u", tag,
-                        fc->params.tread.fid,
-                        (long long int) fc->params.tread.offset,
-                        fc->params.tread.count);
-                break;
-        case RREAD:
-                ret += scnprintf(buf+ret, buflen-ret,
-                        "Rread tag %u count %u data ", tag,
-                        fc->params.rread.count);
-                ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.rread.data,
-                        fc->params.rread.count);
-                break;
-        case TWRITE:
-                ret += scnprintf(buf+ret, buflen-ret,
-                        "Twrite tag %u fid %d offset %lld count %u data ",
-                        tag, fc->params.twrite.fid,
-                        (long long int) fc->params.twrite.offset,
-                        fc->params.twrite.count);
-                ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.twrite.data,
-                        fc->params.twrite.count);
-                break;
-        case RWRITE:
-                ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u",
-                        tag, fc->params.rwrite.count);
-                break;
-        case TCLUNK:
-                ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d",
-                        tag, fc->params.tclunk.fid);
-                break;
-        case RCLUNK:
-                ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag);
-                break;
-        case TREMOVE:
-                ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d",
-                        tag, fc->params.tremove.fid);
-                break;
-        case RREMOVE:
-                ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag);
-                break;
-        case TSTAT:
-                ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d",
-                        tag, fc->params.tstat.fid);
-                break;
-        case RSTAT:
-                ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag);
-                ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat,
-                        extended);
-                break;
-        case TWSTAT:
-                ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ",
-                        tag, fc->params.twstat.fid);
-                ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.twstat.stat,
-                        extended);
-                break;
-        case RWSTAT:
-                ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag);
-                break;
-        default:
-                ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type);
-                break;
-        }
-        return ret;
-}
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 90419715c7e9..08fa320b7e6d 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -26,10 +26,10 @@
 #include <linux/sched.h>
 #include <linux/idr.h>
 #include <asm/semaphore.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
@@ -40,67 +40,29 @@
 *
 */
-int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry)
+int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
 {
-        struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
+        struct v9fs_dentry *dent;
-        dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid,
-                dentry->d_iname, dentry);
-        if (dentry->d_fsdata == NULL) {
-                dentry->d_fsdata =
-                    kmalloc(sizeof(struct list_head), GFP_KERNEL);
-                if (dentry->d_fsdata == NULL) {
-                        dprintk(DEBUG_ERROR, "Out of memory\n");
-                        return -ENOMEM;
-                }
-                fid_list = (struct list_head *)dentry->d_fsdata;
-                INIT_LIST_HEAD(fid_list);       /* Initialize list head */
-        }
-        fid->uid = current->uid;
+        P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n",
-        list_add(&fid->list, fid_list);
+                                        fid->fid, dentry->d_iname);
-        return 0;
-}
-/**
+        dent = dentry->d_fsdata;
- * v9fs_fid_create - allocate a FID structure
+        if (!dent) {
- * @dentry - dentry to link newly created fid to
+                dent = kmalloc(sizeof(struct v9fs_dentry), GFP_KERNEL);
- *
+                if (!dent)
- */
+                        return -ENOMEM;
-struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *v9ses, int fid)
-{
-        struct v9fs_fid *new;
-        dprintk(DEBUG_9P, "fid create fid %d\n", fid);
+                spin_lock_init(&dent->lock);
-        new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
+                INIT_LIST_HEAD(&dent->fidlist);
-        if (new == NULL) {
+                dentry->d_fsdata = dent;
-                dprintk(DEBUG_ERROR, "Out of Memory\n");
-                return ERR_PTR(-ENOMEM);
        }
-        new->fid = fid;
+        spin_lock(&dent->lock);
-        new->v9ses = v9ses;
+        list_add(&fid->dlist, &dent->fidlist);
-        new->fidopen = 0;
+        spin_unlock(&dent->lock);
-        new->fidclunked = 0;
-        new->iounit = 0;
-        new->rdir_pos = 0;
-        new->rdir_fcall = NULL;
-        init_MUTEX(&new->lock);
-        INIT_LIST_HEAD(&new->list);
-        return new;
-}
-/**
- * v9fs_fid_destroy - deallocate a FID structure
- * @fid: fid to destroy
- *
- */
-void v9fs_fid_destroy(struct v9fs_fid *fid)
+        return 0;
-{
-        list_del(&fid->list);
-        kfree(fid);
 }
 /**
@@ -114,30 +76,42 @@ void v9fs_fid_destroy(struct v9fs_fid *fid)
 *
 */
-struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry)
+struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
 {
-        struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
+        struct v9fs_dentry *dent;
-        struct v9fs_fid *return_fid = NULL;
+        struct p9_fid *fid;
-        dprintk(DEBUG_9P, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+        P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+        dent = dentry->d_fsdata;
-        if (fid_list)
+        if (dent)
-                return_fid = list_entry(fid_list->next, struct v9fs_fid, list);
+                fid = list_entry(dent->fidlist.next, struct p9_fid, dlist);
+        else
+                fid = ERR_PTR(-EBADF);
+        P9_DPRINTK(P9_DEBUG_VFS, " fid: %p\n", fid);
+        return fid;
+}
-        if (!return_fid) {
+struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry)
-                dprintk(DEBUG_ERROR, "Couldn't find a fid in dentry\n");
+{
-                return_fid = ERR_PTR(-EBADF);
+        struct p9_fid *fid;
+        struct v9fs_dentry *dent;
+        dent = dentry->d_fsdata;
+        fid = v9fs_fid_lookup(dentry);
+        if (!IS_ERR(fid)) {
+                spin_lock(&dent->lock);
+                list_del(&fid->dlist);
+                spin_unlock(&dent->lock);
        }
-        if(down_interruptible(&return_fid->lock))
+        return fid;
-                return ERR_PTR(-EINTR);
-        return return_fid;
 }
 /**
 * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and
- *                      release it
+ *      release it
 * @dentry: dentry to look for fid in
 *
 * find a fid in the dentry and then clone to a new private fid
@@ -146,49 +120,15 @@ struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry)
 *
 */
-struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry)
+struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
 {
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+        struct p9_fid *ofid, *fid;
-        struct v9fs_fid *base_fid, *new_fid = ERR_PTR(-EBADF);
-        struct v9fs_fcall *fcall = NULL;
-        int fid, err;
-        base_fid = v9fs_fid_lookup(dentry);
-        if(IS_ERR(base_fid))
-                return base_fid;
-        if(base_fid) {  /* clone fid */
-                fid = v9fs_get_idpool(&v9ses->fidpool);
-                if (fid < 0) {
-                        eprintk(KERN_WARNING, "newfid fails!\n");
-                        new_fid = ERR_PTR(-ENOSPC);
-                        goto Release_Fid;
-                }
-                err = v9fs_t_walk(v9ses, base_fid->fid, fid, NULL, &fcall);
-                if (err < 0) {
-                        dprintk(DEBUG_ERROR, "clone walk didn't work\n");
-                        v9fs_put_idpool(fid, &v9ses->fidpool);
-                        new_fid = ERR_PTR(err);
-                        goto Free_Fcall;
-                }
-                new_fid = v9fs_fid_create(v9ses, fid);
-                if (new_fid == NULL) {
-                        dprintk(DEBUG_ERROR, "out of memory\n");
-                        new_fid = ERR_PTR(-ENOMEM);
-                }
-Free_Fcall:
-                kfree(fcall);
-        }
-Release_Fid:
+        P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
-        up(&base_fid->lock);
+        ofid = v9fs_fid_lookup(dentry);
-        return new_fid;
+        if (IS_ERR(ofid))
-}
+                return ofid;
-void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid)
+        fid = p9_client_walk(ofid, 0, NULL, 1);
-{
+        return fid;
-        v9fs_t_clunk(v9ses, fid->fid);
-        v9fs_fid_destroy(fid);
 }
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 48fc170c26c8..47a0ba742872 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -22,41 +22,12 @@
 #include <linux/list.h>
-#define FID_OP   0
+struct v9fs_dentry {
-#define FID_WALK 1
+        spinlock_t lock; /* protect fidlist */
-#define FID_CREATE 2
+        struct list_head fidlist;
-struct v9fs_fid {
-        struct list_head list;   /* list of fids associated with a dentry */
-        struct list_head active; /* XXX - debug */
-        struct semaphore lock;
-        u32 fid;
-        unsigned char fidopen;    /* set when fid is opened */
-        unsigned char fidclunked; /* set when fid has already been clunked */
-        struct v9fs_qid qid;
-        u32 iounit;
-        /* readdir stuff */
-        int rdir_fpos;
-        loff_t rdir_pos;
-        struct v9fs_fcall *rdir_fcall;
-        /* management stuff */
-        uid_t uid;              /* user associated with this fid */
-        /* private data */
-        struct file *filp;      /* backpointer to File struct for open files */
-        struct v9fs_session_info *v9ses;        /* session info for this FID */
 };
-struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry);
+struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
-struct v9fs_fid *v9fs_fid_get_created(struct dentry *);
+struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry);
-void v9fs_fid_destroy(struct v9fs_fid *fid);
+struct p9_fid *v9fs_fid_clone(struct dentry *dentry);
-struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *, int fid);
+int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
-int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry);
-struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry);
-void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid);
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
deleted file mode 100644
index c783874a9caf..000000000000
--- a/fs/9p/mux.c
+++ /dev/null
@@ -1,1033 +0,0 @@
-/*
- * linux/fs/9p/mux.c
- *
- * Protocol Multiplexer
- *
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/poll.h>
-#include <linux/kthread.h>
-#include <linux/idr.h>
-#include <linux/mutex.h>
-#include "debug.h"
-#include "v9fs.h"
-#include "9p.h"
-#include "conv.h"
-#include "transport.h"
-#include "mux.h"
-#define ERREQFLUSH      1
-#define SCHED_TIMEOUT   10
-#define MAXPOLLWADDR    2
-enum {
-        Rworksched = 1,         /* read work scheduled or running */
-        Rpending = 2,           /* can read */
-        Wworksched = 4,         /* write work scheduled or running */
-        Wpending = 8,           /* can write */
-};
-enum {
-        None,
-        Flushing,
-        Flushed,
-};
-struct v9fs_mux_poll_task;
-struct v9fs_req {
-        spinlock_t lock;
-        int tag;
-        struct v9fs_fcall *tcall;
-        struct v9fs_fcall *rcall;
-        int err;
-        v9fs_mux_req_callback cb;
-        void *cba;
-        int flush;
-        struct list_head req_list;
-};
-struct v9fs_mux_data {
-        spinlock_t lock;
-        struct list_head mux_list;
-        struct v9fs_mux_poll_task *poll_task;
-        int msize;
-        unsigned char *extended;
-        struct v9fs_transport *trans;
-        struct v9fs_idpool tagpool;
-        int err;
-        wait_queue_head_t equeue;
-        struct list_head req_list;
-        struct list_head unsent_req_list;
-        struct v9fs_fcall *rcall;
-        int rpos;
-        char *rbuf;
-        int wpos;
-        int wsize;
-        char *wbuf;
-        wait_queue_t poll_wait[MAXPOLLWADDR];
-        wait_queue_head_t *poll_waddr[MAXPOLLWADDR];
-        poll_table pt;
-        struct work_struct rq;
-        struct work_struct wq;
-        unsigned long wsched;
-};
-struct v9fs_mux_poll_task {
-        struct task_struct *task;
-        struct list_head mux_list;
-        int muxnum;
-};
-struct v9fs_mux_rpc {
-        struct v9fs_mux_data *m;
-        int err;
-        struct v9fs_fcall *tcall;
-        struct v9fs_fcall *rcall;
-        wait_queue_head_t wqueue;
-};
-static int v9fs_poll_proc(void *);
-static void v9fs_read_work(struct work_struct *work);
-static void v9fs_write_work(struct work_struct *work);
-static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
-                          poll_table * p);
-static u16 v9fs_mux_get_tag(struct v9fs_mux_data *);
-static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16);
-static DEFINE_MUTEX(v9fs_mux_task_lock);
-static struct workqueue_struct *v9fs_mux_wq;
-static int v9fs_mux_num;
-static int v9fs_mux_poll_task_num;
-static struct v9fs_mux_poll_task v9fs_mux_poll_tasks[100];
-int v9fs_mux_global_init(void)
-{
-        int i;
-        for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++)
-                v9fs_mux_poll_tasks[i].task = NULL;
-        v9fs_mux_wq = create_workqueue("v9fs");
-        if (!v9fs_mux_wq) {
-                printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
-                return -ENOMEM;
-        }
-        return 0;
-}
-void v9fs_mux_global_exit(void)
-{
-        destroy_workqueue(v9fs_mux_wq);
-}
-/**
- * v9fs_mux_calc_poll_procs - calculates the number of polling procs
- * based on the number of mounted v9fs filesystems.
- *
- * The current implementation returns sqrt of the number of mounts.
- */
-static int v9fs_mux_calc_poll_procs(int muxnum)
-{
-        int n;
-        if (v9fs_mux_poll_task_num)
-                n = muxnum / v9fs_mux_poll_task_num +
-                    (muxnum % v9fs_mux_poll_task_num ? 1 : 0);
-        else
-                n = 1;
-        if (n > ARRAY_SIZE(v9fs_mux_poll_tasks))
-                n = ARRAY_SIZE(v9fs_mux_poll_tasks);
-        return n;
-}
-static int v9fs_mux_poll_start(struct v9fs_mux_data *m)
-{
-        int i, n;
-        struct v9fs_mux_poll_task *vpt, *vptlast;
-        struct task_struct *pproc;
-        dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num,
-                v9fs_mux_poll_task_num);
-        mutex_lock(&v9fs_mux_task_lock);
-        n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1);
-        if (n > v9fs_mux_poll_task_num) {
-                for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
-                        if (v9fs_mux_poll_tasks[i].task == NULL) {
-                                vpt = &v9fs_mux_poll_tasks[i];
-                                dprintk(DEBUG_MUX, "create proc %p\n", vpt);
-                                pproc = kthread_create(v9fs_poll_proc, vpt,
-                                                   "v9fs-poll");
-                                if (!IS_ERR(pproc)) {
-                                        vpt->task = pproc;
-                                        INIT_LIST_HEAD(&vpt->mux_list);
-                                        vpt->muxnum = 0;
-                                        v9fs_mux_poll_task_num++;
-                                        wake_up_process(vpt->task);
-                                }
-                                break;
-                        }
-                }
-                if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks))
-                        dprintk(DEBUG_ERROR, "warning: no free poll slots\n");
-        }
-        n = (v9fs_mux_num + 1) / v9fs_mux_poll_task_num +
-            ((v9fs_mux_num + 1) % v9fs_mux_poll_task_num ? 1 : 0);
-        vptlast = NULL;
-        for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
-                vpt = &v9fs_mux_poll_tasks[i];
-                if (vpt->task != NULL) {
-                        vptlast = vpt;
-                        if (vpt->muxnum < n) {
-                                dprintk(DEBUG_MUX, "put in proc %d\n", i);
-                                list_add(&m->mux_list, &vpt->mux_list);
-                                vpt->muxnum++;
-                                m->poll_task = vpt;
-                                memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
-                                init_poll_funcptr(&m->pt, v9fs_pollwait);
-                                break;
-                        }
-                }
-        }
-        if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) {
-                if (vptlast == NULL)
-                        return -ENOMEM;
-                dprintk(DEBUG_MUX, "put in proc %d\n", i);
-                list_add(&m->mux_list, &vptlast->mux_list);
-                vptlast->muxnum++;
-                m->poll_task = vptlast;
-                memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
-                init_poll_funcptr(&m->pt, v9fs_pollwait);
-        }
-        v9fs_mux_num++;
-        mutex_unlock(&v9fs_mux_task_lock);
-        return 0;
-}
-static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
-{
-        int i;
-        struct v9fs_mux_poll_task *vpt;
-        mutex_lock(&v9fs_mux_task_lock);
-        vpt = m->poll_task;
-        list_del(&m->mux_list);
-        for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
-                if (m->poll_waddr[i] != NULL) {
-                        remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
-                        m->poll_waddr[i] = NULL;
-                }
-        }
-        vpt->muxnum--;
-        if (!vpt->muxnum) {
-                dprintk(DEBUG_MUX, "destroy proc %p\n", vpt);
-                kthread_stop(vpt->task);
-                vpt->task = NULL;
-                v9fs_mux_poll_task_num--;
-        }
-        v9fs_mux_num--;
-        mutex_unlock(&v9fs_mux_task_lock);
-}
-/**
- * v9fs_mux_init - allocate and initialize the per-session mux data
- * Creates the polling task if this is the first session.
- *
- * @trans - transport structure
- * @msize - maximum message size
- * @extended - pointer to the extended flag
- */
-struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
-                                    unsigned char *extended)
-{
-        int i, n;
-        struct v9fs_mux_data *m, *mtmp;
-        dprintk(DEBUG_MUX, "transport %p msize %d\n", trans, msize);
-        m = kmalloc(sizeof(struct v9fs_mux_data), GFP_KERNEL);
-        if (!m)
-                return ERR_PTR(-ENOMEM);
-        spin_lock_init(&m->lock);
-        INIT_LIST_HEAD(&m->mux_list);
-        m->msize = msize;
-        m->extended = extended;
-        m->trans = trans;
-        idr_init(&m->tagpool.pool);
-        init_MUTEX(&m->tagpool.lock);
-        m->err = 0;
-        init_waitqueue_head(&m->equeue);
-        INIT_LIST_HEAD(&m->req_list);
-        INIT_LIST_HEAD(&m->unsent_req_list);
-        m->rcall = NULL;
-        m->rpos = 0;
-        m->rbuf = NULL;
-        m->wpos = m->wsize = 0;
-        m->wbuf = NULL;
-        INIT_WORK(&m->rq, v9fs_read_work);
-        INIT_WORK(&m->wq, v9fs_write_work);
-        m->wsched = 0;
-        memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
-        m->poll_task = NULL;
-        n = v9fs_mux_poll_start(m);
-        if (n)
-                return ERR_PTR(n);
-        n = trans->poll(trans, &m->pt);
-        if (n & POLLIN) {
-                dprintk(DEBUG_MUX, "mux %p can read\n", m);
-                set_bit(Rpending, &m->wsched);
-        }
-        if (n & POLLOUT) {
-                dprintk(DEBUG_MUX, "mux %p can write\n", m);
-                set_bit(Wpending, &m->wsched);
-        }
-        for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
-                if (IS_ERR(m->poll_waddr[i])) {
-                        v9fs_mux_poll_stop(m);
-                        mtmp = (void *)m->poll_waddr;   /* the error code */
-                        kfree(m);
-                        m = mtmp;
-                        break;
-                }
-        }
-        return m;
-}
-/**
- * v9fs_mux_destroy - cancels all pending requests and frees mux resources
- */
-void v9fs_mux_destroy(struct v9fs_mux_data *m)
-{
-        dprintk(DEBUG_MUX, "mux %p prev %p next %p\n", m,
-                m->mux_list.prev, m->mux_list.next);
-        v9fs_mux_cancel(m, -ECONNRESET);
-        if (!list_empty(&m->req_list)) {
-                /* wait until all processes waiting on this session exit */
-                dprintk(DEBUG_MUX, "mux %p waiting for empty request queue\n",
-                        m);
-                wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000);
-                dprintk(DEBUG_MUX, "mux %p request queue empty: %d\n", m,
-                        list_empty(&m->req_list));
-        }
-        v9fs_mux_poll_stop(m);
-        m->trans = NULL;
-        kfree(m);
-}
-/**
- * v9fs_pollwait - called by files poll operation to add v9fs-poll task
- *      to files wait queue
- */
-static void
-v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
-              poll_table * p)
-{
-        int i;
-        struct v9fs_mux_data *m;
-        m = container_of(p, struct v9fs_mux_data, pt);
-        for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++)
-                if (m->poll_waddr[i] == NULL)
-                        break;
-        if (i >= ARRAY_SIZE(m->poll_waddr)) {
-                dprintk(DEBUG_ERROR, "not enough wait_address slots\n");
-                return;
-        }
-        m->poll_waddr[i] = wait_address;
-        if (!wait_address) {
-                dprintk(DEBUG_ERROR, "no wait_address\n");
-                m->poll_waddr[i] = ERR_PTR(-EIO);
-                return;
-        }
-        init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task);
-        add_wait_queue(wait_address, &m->poll_wait[i]);
-}
-/**
- * v9fs_poll_mux - polls a mux and schedules read or write works if necessary
- */
-static void v9fs_poll_mux(struct v9fs_mux_data *m)
-{
-        int n;
-        if (m->err < 0)
-                return;
-        n = m->trans->poll(m->trans, NULL);
-        if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
-                dprintk(DEBUG_MUX, "error mux %p err %d\n", m, n);
-                if (n >= 0)
-                        n = -ECONNRESET;
-                v9fs_mux_cancel(m, n);
-        }
-        if (n & POLLIN) {
-                set_bit(Rpending, &m->wsched);
-                dprintk(DEBUG_MUX, "mux %p can read\n", m);
-                if (!test_and_set_bit(Rworksched, &m->wsched)) {
-                        dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
-                        queue_work(v9fs_mux_wq, &m->rq);
-                }
-        }
-        if (n & POLLOUT) {
-                set_bit(Wpending, &m->wsched);
-                dprintk(DEBUG_MUX, "mux %p can write\n", m);
-                if ((m->wsize || !list_empty(&m->unsent_req_list))
-                    && !test_and_set_bit(Wworksched, &m->wsched)) {
-                        dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
-                        queue_work(v9fs_mux_wq, &m->wq);
-                }
-        }
-}
-/**
- * v9fs_poll_proc - polls all v9fs transports for new events and queues
- *      the appropriate work to the work queue
- */
-static int v9fs_poll_proc(void *a)
-{
-        struct v9fs_mux_data *m, *mtmp;
-        struct v9fs_mux_poll_task *vpt;
-        vpt = a;
-        dprintk(DEBUG_MUX, "start %p %p\n", current, vpt);
-        while (!kthread_should_stop()) {
-                set_current_state(TASK_INTERRUPTIBLE);
-                list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
-                        v9fs_poll_mux(m);
-                }
-                dprintk(DEBUG_MUX, "sleeping...\n");
-                schedule_timeout(SCHED_TIMEOUT * HZ);
-        }
-        __set_current_state(TASK_RUNNING);
-        dprintk(DEBUG_MUX, "finish\n");
-        return 0;
-}
-/**
- * v9fs_write_work - called when a transport can send some data
- */
-static void v9fs_write_work(struct work_struct *work)
-{
-        int n, err;
-        struct v9fs_mux_data *m;
-        struct v9fs_req *req;
-        m = container_of(work, struct v9fs_mux_data, wq);
-        if (m->err < 0) {
-                clear_bit(Wworksched, &m->wsched);
-                return;
-        }
-        if (!m->wsize) {
-                if (list_empty(&m->unsent_req_list)) {
-                        clear_bit(Wworksched, &m->wsched);
-                        return;
-                }
-                spin_lock(&m->lock);
-again:
-                req = list_entry(m->unsent_req_list.next, struct v9fs_req,
-                               req_list);
-                list_move_tail(&req->req_list, &m->req_list);
-                if (req->err == ERREQFLUSH)
-                        goto again;
-                m->wbuf = req->tcall->sdata;
-                m->wsize = req->tcall->size;
-                m->wpos = 0;
-                dump_data(m->wbuf, m->wsize);
-                spin_unlock(&m->lock);
-        }
-        dprintk(DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, m->wsize);
-        clear_bit(Wpending, &m->wsched);
-        err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos);
-        dprintk(DEBUG_MUX, "mux %p sent %d bytes\n", m, err);
-        if (err == -EAGAIN) {
-                clear_bit(Wworksched, &m->wsched);
-                return;
-        }
-        if (err <= 0)
-                goto error;
-        m->wpos += err;
-        if (m->wpos == m->wsize)
-                m->wpos = m->wsize = 0;
-        if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) {
-                if (test_and_clear_bit(Wpending, &m->wsched))
-                        n = POLLOUT;
-                else
-                        n = m->trans->poll(m->trans, NULL);
-                if (n & POLLOUT) {
-                        dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
-                        queue_work(v9fs_mux_wq, &m->wq);
-                } else
-                        clear_bit(Wworksched, &m->wsched);
-        } else
-                clear_bit(Wworksched, &m->wsched);
-        return;
-      error:
-        v9fs_mux_cancel(m, err);
-        clear_bit(Wworksched, &m->wsched);
-}
-static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
-{
-        int ecode;
-        struct v9fs_str *ename;
-        if (!req->err && req->rcall->id == RERROR) {
-                ecode = req->rcall->params.rerror.errno;
-                ename = &req->rcall->params.rerror.error;
-                dprintk(DEBUG_MUX, "Rerror %.*s\n", ename->len, ename->str);
-                if (*m->extended)
-                        req->err = -ecode;
-                if (!req->err) {
-                        req->err = v9fs_errstr2errno(ename->str, ename->len);
-                        if (!req->err) {        /* string match failed */
-                                PRINT_FCALL_ERROR("unknown error", req->rcall);
-                        }
-                        if (!req->err)
-                                req->err = -ESERVERFAULT;
-                }
-        } else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
-                dprintk(DEBUG_ERROR, "fcall mismatch: expected %d, got %d\n",
-                        req->tcall->id + 1, req->rcall->id);
-                if (!req->err)
-                        req->err = -EIO;
-        }
-}
-/**
- * v9fs_read_work - called when there is some data to be read from a transport
- */
-static void v9fs_read_work(struct work_struct *work)
-{
-        int n, err;
-        struct v9fs_mux_data *m;
-        struct v9fs_req *req, *rptr, *rreq;
-        struct v9fs_fcall *rcall;
-        char *rbuf;
-        m = container_of(work, struct v9fs_mux_data, rq);
-        if (m->err < 0)
-                return;
-        rcall = NULL;
-        dprintk(DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
-        if (!m->rcall) {
-                m->rcall =
-                    kmalloc(sizeof(struct v9fs_fcall) + m->msize, GFP_KERNEL);
-                if (!m->rcall) {
-                        err = -ENOMEM;
-                        goto error;
-                }
-                m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
-                m->rpos = 0;
-        }
-        clear_bit(Rpending, &m->wsched);
-        err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos);
-        dprintk(DEBUG_MUX, "mux %p got %d bytes\n", m, err);
-        if (err == -EAGAIN) {
-                clear_bit(Rworksched, &m->wsched);
-                return;
-        }
-        if (err <= 0)
-                goto error;
-        m->rpos += err;
-        while (m->rpos > 4) {
-                n = le32_to_cpu(*(__le32 *) m->rbuf);
-                if (n >= m->msize) {
-                        dprintk(DEBUG_ERROR,
-                                "requested packet size too big: %d\n", n);
-                        err = -EIO;
-                        goto error;
-                }
-                if (m->rpos < n)
-                        break;
-                dump_data(m->rbuf, n);
-                err =
-                    v9fs_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended);
-                if (err < 0) {
-                        goto error;
-                }
-                if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
-                        char buf[150];
-                        v9fs_printfcall(buf, sizeof(buf), m->rcall,
-                                *m->extended);
-                        printk(KERN_NOTICE ">>> %p %s\n", m, buf);
-                }
-                rcall = m->rcall;
-                rbuf = m->rbuf;
-                if (m->rpos > n) {
-                        m->rcall = kmalloc(sizeof(struct v9fs_fcall) + m->msize,
-                                           GFP_KERNEL);
-                        if (!m->rcall) {
-                                err = -ENOMEM;
-                                goto error;
-                        }
-                        m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
-                        memmove(m->rbuf, rbuf + n, m->rpos - n);
-                        m->rpos -= n;
-                } else {
-                        m->rcall = NULL;
-                        m->rbuf = NULL;
-                        m->rpos = 0;
-                }
-                dprintk(DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, rcall->id,
-                        rcall->tag);
-                req = NULL;
-                spin_lock(&m->lock);
-                list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
-                        if (rreq->tag == rcall->tag) {
-                                req = rreq;
-                                if (req->flush != Flushing)
-                                        list_del(&req->req_list);
-                                break;
-                        }
-                }
-                spin_unlock(&m->lock);
-                if (req) {
-                        req->rcall = rcall;
-                        process_request(m, req);
-                        if (req->flush != Flushing) {
-                                if (req->cb)
-                                        (*req->cb) (req, req->cba);
-                                else
-                                        kfree(req->rcall);
-                                wake_up(&m->equeue);
-                        }
-                } else {
-                        if (err >= 0 && rcall->id != RFLUSH)
-                                dprintk(DEBUG_ERROR,
-                                        "unexpected response mux %p id %d tag %d\n",
-                                        m, rcall->id, rcall->tag);
-                        kfree(rcall);
-                }
-        }
-        if (!list_empty(&m->req_list)) {
-                if (test_and_clear_bit(Rpending, &m->wsched))
-                        n = POLLIN;
-                else
-                        n = m->trans->poll(m->trans, NULL);
-                if (n & POLLIN) {
-                        dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
-                        queue_work(v9fs_mux_wq, &m->rq);
-                } else
-                        clear_bit(Rworksched, &m->wsched);
-        } else
-                clear_bit(Rworksched, &m->wsched);
-        return;
-      error:
-        v9fs_mux_cancel(m, err);
-        clear_bit(Rworksched, &m->wsched);
-}
-/**
- * v9fs_send_request - send 9P request
- * The function can sleep until the request is scheduled for sending.
- * The function can be interrupted. Return from the function is not
- * a guarantee that the request is sent successfully. Can return errors
- * that can be retrieved by PTR_ERR macros.
- *
- * @m: mux data
- * @tc: request to be sent
- * @cb: callback function to call when response is received
- * @cba: parameter to pass to the callback function
- */
-static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
-                                          struct v9fs_fcall *tc,
-                                          v9fs_mux_req_callback cb, void *cba)
-{
-        int n;
-        struct v9fs_req *req;
-        dprintk(DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
-                tc, tc->id);
-        if (m->err < 0)
-                return ERR_PTR(m->err);
-        req = kmalloc(sizeof(struct v9fs_req), GFP_KERNEL);
-        if (!req)
-                return ERR_PTR(-ENOMEM);
-        if (tc->id == TVERSION)
-                n = V9FS_NOTAG;
-        else
-                n = v9fs_mux_get_tag(m);
-        if (n < 0)
-                return ERR_PTR(-ENOMEM);
-        v9fs_set_tag(tc, n);
-        if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
-                char buf[150];
-                v9fs_printfcall(buf, sizeof(buf), tc, *m->extended);
-                printk(KERN_NOTICE "<<< %p %s\n", m, buf);
-        }
-        spin_lock_init(&req->lock);
-        req->tag = n;
-        req->tcall = tc;
-        req->rcall = NULL;
-        req->err = 0;
-        req->cb = cb;
-        req->cba = cba;
-        req->flush = None;
-        spin_lock(&m->lock);
-        list_add_tail(&req->req_list, &m->unsent_req_list);
-        spin_unlock(&m->lock);
-        if (test_and_clear_bit(Wpending, &m->wsched))
-                n = POLLOUT;
-        else
-                n = m->trans->poll(m->trans, NULL);
-        if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
-                queue_work(v9fs_mux_wq, &m->wq);
-        return req;
-}
-static void v9fs_mux_free_request(struct v9fs_mux_data *m, struct v9fs_req *req)
-{
-        v9fs_mux_put_tag(m, req->tag);
-        kfree(req);
-}
-static void v9fs_mux_flush_cb(struct v9fs_req *freq, void *a)
-{
-        v9fs_mux_req_callback cb;
-        int tag;
-        struct v9fs_mux_data *m;
-        struct v9fs_req *req, *rreq, *rptr;
-        m = a;
-        dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m,
-                freq->tcall, freq->rcall, freq->err,
-                freq->tcall->params.tflush.oldtag);
-        spin_lock(&m->lock);
-        cb = NULL;
-        tag = freq->tcall->params.tflush.oldtag;
-        req = NULL;
-        list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
-                if (rreq->tag == tag) {
-                        req = rreq;
-                        list_del(&req->req_list);
-                        break;
-                }
-        }
-        spin_unlock(&m->lock);
-        if (req) {
-                spin_lock(&req->lock);
-                req->flush = Flushed;
-                spin_unlock(&req->lock);
-                if (req->cb)
-                        (*req->cb) (req, req->cba);
-                else
-                        kfree(req->rcall);
-                wake_up(&m->equeue);
-        }
-        kfree(freq->tcall);
-        kfree(freq->rcall);
-        v9fs_mux_free_request(m, freq);
-}
-static int
-v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req)
-{
-        struct v9fs_fcall *fc;
-        struct v9fs_req *rreq, *rptr;
-        dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
-        /* if a response was received for a request, do nothing */
-        spin_lock(&req->lock);
-        if (req->rcall || req->err) {
-                spin_unlock(&req->lock);
-                dprintk(DEBUG_MUX, "mux %p req %p response already received\n", m, req);
-                return 0;
-        }
-        req->flush = Flushing;
-        spin_unlock(&req->lock);
-        spin_lock(&m->lock);
-        /* if the request is not sent yet, just remove it from the list */
-        list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) {
-                if (rreq->tag == req->tag) {
-                        dprintk(DEBUG_MUX, "mux %p req %p request is not sent yet\n", m, req);
-                        list_del(&rreq->req_list);
-                        req->flush = Flushed;
-                        spin_unlock(&m->lock);
-                        if (req->cb)
-                                (*req->cb) (req, req->cba);
-                        return 0;
-                }
-        }
-        spin_unlock(&m->lock);
-        clear_thread_flag(TIF_SIGPENDING);
-        fc = v9fs_create_tflush(req->tag);
-        v9fs_send_request(m, fc, v9fs_mux_flush_cb, m);
-        return 1;
-}
-static void
-v9fs_mux_rpc_cb(struct v9fs_req *req, void *a)
-{
-        struct v9fs_mux_rpc *r;
-        dprintk(DEBUG_MUX, "req %p r %p\n", req, a);
-        r = a;
-        r->rcall = req->rcall;
-        r->err = req->err;
-        if (req->flush!=None && !req->err)
-                r->err = -ERESTARTSYS;
-        wake_up(&r->wqueue);
-}
-/**
- * v9fs_mux_rpc - sends 9P request and waits until a response is available.
- *      The function can be interrupted.
- * @m: mux data
- * @tc: request to be sent
- * @rc: pointer where a pointer to the response is stored
- */
-int
-v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
-             struct v9fs_fcall **rc)
-{
-        int err, sigpending;
-        unsigned long flags;
-        struct v9fs_req *req;
-        struct v9fs_mux_rpc r;
-        r.err = 0;
-        r.tcall = tc;
-        r.rcall = NULL;
-        r.m = m;
-        init_waitqueue_head(&r.wqueue);
-        if (rc)
-                *rc = NULL;
-        sigpending = 0;
-        if (signal_pending(current)) {
-                sigpending = 1;
-                clear_thread_flag(TIF_SIGPENDING);
-        }
-        req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r);
-        if (IS_ERR(req)) {
-                err = PTR_ERR(req);
-                dprintk(DEBUG_MUX, "error %d\n", err);
-                return err;
-        }
-        err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
-        if (r.err < 0)
-                err = r.err;
-        if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) {
-                if (v9fs_mux_flush_request(m, req)) {
-                        /* wait until we get response of the flush message */
-                        do {
-                                clear_thread_flag(TIF_SIGPENDING);
-                                err = wait_event_interruptible(r.wqueue,
-                                        r.rcall || r.err);
-                        } while (!r.rcall && !r.err && err==-ERESTARTSYS &&
-                                m->trans->status==Connected && !m->err);
-                        err = -ERESTARTSYS;
-                }
-                sigpending = 1;
-        }
-        if (sigpending) {
-                spin_lock_irqsave(&current->sighand->siglock, flags);
-                recalc_sigpending();
-                spin_unlock_irqrestore(&current->sighand->siglock, flags);
-        }
-        if (rc)
-                *rc = r.rcall;
-        else
-                kfree(r.rcall);
-        v9fs_mux_free_request(m, req);
-        if (err > 0)
-                err = -EIO;
-        return err;
-}
-#if 0
-/**
- * v9fs_mux_rpcnb - sends 9P request without waiting for response.
- * @m: mux data
- * @tc: request to be sent
- * @cb: callback function to be called when response arrives
- * @cba: value to pass to the callback function
- */
-int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
-                   v9fs_mux_req_callback cb, void *a)
-{
-        int err;
-        struct v9fs_req *req;
-        req = v9fs_send_request(m, tc, cb, a);
-        if (IS_ERR(req)) {
-                err = PTR_ERR(req);
-                dprintk(DEBUG_MUX, "error %d\n", err);
-                return PTR_ERR(req);
-        }
-        dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
-        return 0;
-}
-#endif  /*  0  */
-/**
- * v9fs_mux_cancel - cancel all pending requests with error
- * @m: mux data
- * @err: error code
- */
-void v9fs_mux_cancel(struct v9fs_mux_data *m, int err)
-{
-        struct v9fs_req *req, *rtmp;
-        LIST_HEAD(cancel_list);
-        dprintk(DEBUG_ERROR, "mux %p err %d\n", m, err);
-        m->err = err;
-        spin_lock(&m->lock);
-        list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
-                list_move(&req->req_list, &cancel_list);
-        }
-        list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
-                list_move(&req->req_list, &cancel_list);
-        }
-        spin_unlock(&m->lock);
-        list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
-                list_del(&req->req_list);
-                if (!req->err)
-                        req->err = err;
-                if (req->cb)
-                        (*req->cb) (req, req->cba);
-                else
-                        kfree(req->rcall);
-        }
-        wake_up(&m->equeue);
-}
-static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
-{
-        int tag;
-        tag = v9fs_get_idpool(&m->tagpool);
-        if (tag < 0)
-                return V9FS_NOTAG;
-        else
-                return (u16) tag;
-}
-static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag)
-{
-        if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tagpool))
-                v9fs_put_idpool(tag, &m->tagpool);
-}
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
deleted file mode 100644
index fb10c50186a1..000000000000
--- a/fs/9p/mux.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * linux/fs/9p/mux.h
- *
- * Multiplexer Definitions
- *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-struct v9fs_mux_data;
-struct v9fs_req;
-/**
- * v9fs_mux_req_callback - callback function that is called when the
- * response of a request is received. The callback is called from
- * a workqueue and shouldn't block.
- *
- * @a - the pointer that was specified when the request was send to be
- *      passed to the callback
- * @tc - request call
- * @rc - response call
- * @err - error code (non-zero if error occured)
- */
-typedef void (*v9fs_mux_req_callback)(struct v9fs_req *req, void *a);
-int v9fs_mux_global_init(void);
-void v9fs_mux_global_exit(void);
-struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
-        unsigned char *extended);
-void v9fs_mux_destroy(struct v9fs_mux_data *);
-int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc);
-struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m);
-int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc);
-void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush);
-void v9fs_mux_cancel(struct v9fs_mux_data *m, int err);
-int v9fs_errstr2errno(char *errstr, int len);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
deleted file mode 100644
index 34d43355beb7..000000000000
--- a/fs/9p/trans_fd.c
+++ /dev/null
@@ -1,308 +0,0 @@
-/*
- * linux/fs/9p/trans_fd.c
- *
- * Fd transport layer.  Includes deprecated socket layer.
- *
- *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
- *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004-2005 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-#include <linux/in.h>
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/ipv6.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/un.h>
-#include <asm/uaccess.h>
-#include <linux/inet.h>
-#include <linux/idr.h>
-#include <linux/file.h>
-#include "debug.h"
-#include "v9fs.h"
-#include "transport.h"
-#define V9FS_PORT 564
-struct v9fs_trans_fd {
-        struct file *rd;
-        struct file *wr;
-};
-/**
- * v9fs_fd_read- read from a fd
- * @v9ses: session information
- * @v: buffer to receive data into
- * @len: size of receive buffer
- *
- */
-static int v9fs_fd_read(struct v9fs_transport *trans, void *v, int len)
-{
-        int ret;
-        struct v9fs_trans_fd *ts;
-        if (!trans || trans->status == Disconnected || !(ts = trans->priv))
-                return -EREMOTEIO;
-        if (!(ts->rd->f_flags & O_NONBLOCK))
-                dprintk(DEBUG_ERROR, "blocking read ...\n");
-        ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
-        if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
-                trans->status = Disconnected;
-        return ret;
-}
-/**
- * v9fs_fd_write - write to a socket
- * @v9ses: session information
- * @v: buffer to send data from
- * @len: size of send buffer
- *
- */
-static int v9fs_fd_write(struct v9fs_transport *trans, void *v, int len)
-{
-        int ret;
-        mm_segment_t oldfs;
-        struct v9fs_trans_fd *ts;
-        if (!trans || trans->status == Disconnected || !(ts = trans->priv))
-                return -EREMOTEIO;
-        if (!(ts->wr->f_flags & O_NONBLOCK))
-                dprintk(DEBUG_ERROR, "blocking write ...\n");
-        oldfs = get_fs();
-        set_fs(get_ds());
-        /* The cast to a user pointer is valid due to the set_fs() */
-        ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos);
-        set_fs(oldfs);
-        if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
-                trans->status = Disconnected;
-        return ret;
-}
-static unsigned int
-v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
-{
-        int ret, n;
-        struct v9fs_trans_fd *ts;
-        mm_segment_t oldfs;
-        if (!trans || trans->status != Connected || !(ts = trans->priv))
-                return -EREMOTEIO;
-        if (!ts->rd->f_op || !ts->rd->f_op->poll)
-                return -EIO;
-        if (!ts->wr->f_op || !ts->wr->f_op->poll)
-                return -EIO;
-        oldfs = get_fs();
-        set_fs(get_ds());
-        ret = ts->rd->f_op->poll(ts->rd, pt);
-        if (ret < 0)
-                goto end;
-        if (ts->rd != ts->wr) {
-                n = ts->wr->f_op->poll(ts->wr, pt);
-                if (n < 0) {
-                        ret = n;
-                        goto end;
-                }
-                ret = (ret & ~POLLOUT) | (n & ~POLLIN);
-        }
-      end:
-        set_fs(oldfs);
-        return ret;
-}
-static int v9fs_fd_open(struct v9fs_session_info *v9ses, int rfd, int wfd)
-{
-        struct v9fs_transport *trans = v9ses->transport;
-        struct v9fs_trans_fd *ts = kmalloc(sizeof(struct v9fs_trans_fd),
-                                           GFP_KERNEL);
-        if (!ts)
-                return -ENOMEM;
-        ts->rd = fget(rfd);
-        ts->wr = fget(wfd);
-        if (!ts->rd || !ts->wr) {
-                if (ts->rd)
-                        fput(ts->rd);
-                if (ts->wr)
-                        fput(ts->wr);
-                kfree(ts);
-                return -EIO;
-        }
-        trans->priv = ts;
-        trans->status = Connected;
-        return 0;
-}
-static int v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr,
-                        char *data)
-{
-        if (v9ses->rfdno == ~0 || v9ses->wfdno == ~0) {
-                printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
-                return -ENOPROTOOPT;
-        }
-        return v9fs_fd_open(v9ses, v9ses->rfdno, v9ses->wfdno);
-}
-static int v9fs_socket_open(struct v9fs_session_info *v9ses,
-                            struct socket *csocket)
-{
-        int fd, ret;
-        csocket->sk->sk_allocation = GFP_NOIO;
-        if ((fd = sock_map_fd(csocket)) < 0) {
-                eprintk(KERN_ERR, "v9fs_socket_open: failed to map fd\n");
-                ret = fd;
-              release_csocket:
-                sock_release(csocket);
-                return ret;
-        }
-        if ((ret = v9fs_fd_open(v9ses, fd, fd)) < 0) {
-                sockfd_put(csocket);
-                eprintk(KERN_ERR, "v9fs_socket_open: failed to open fd\n");
-                goto release_csocket;
-        }
-        ((struct v9fs_trans_fd *)v9ses->transport->priv)->rd->f_flags |=
-            O_NONBLOCK;
-        return 0;
-}
-static int v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr,
-                         char *data)
-{
-        int ret;
-        struct socket *csocket = NULL;
-        struct sockaddr_in sin_server;
-        sin_server.sin_family = AF_INET;
-        sin_server.sin_addr.s_addr = in_aton(addr);
-        sin_server.sin_port = htons(v9ses->port);
-        sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
-        if (!csocket) {
-                eprintk(KERN_ERR, "v9fs_trans_tcp: problem creating socket\n");
-                return -1;
-        }
-        ret = csocket->ops->connect(csocket,
-                                    (struct sockaddr *)&sin_server,
-                                    sizeof(struct sockaddr_in), 0);
-        if (ret < 0) {
-                eprintk(KERN_ERR,
-                        "v9fs_trans_tcp: problem connecting socket to %s\n",
-                        addr);
-                return ret;
-        }
-        return v9fs_socket_open(v9ses, csocket);
-}
-static int
-v9fs_unix_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
-{
-        int ret;
-        struct socket *csocket;
-        struct sockaddr_un sun_server;
-        if (strlen(addr) > UNIX_PATH_MAX) {
-                eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
-                        addr);
-                return -ENAMETOOLONG;
-        }
-        sun_server.sun_family = PF_UNIX;
-        strcpy(sun_server.sun_path, addr);
-        sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
-        ret = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
-                        sizeof(struct sockaddr_un) - 1, 0);
-        if (ret < 0) {
-                eprintk(KERN_ERR,
-                        "v9fs_trans_unix: problem connecting socket: %s: %d\n",
-                        addr, ret);
-                return ret;
-        }
-        return v9fs_socket_open(v9ses, csocket);
-}
-/**
- * v9fs_sock_close - shutdown socket
- * @trans: private socket structure
- *
- */
-static void v9fs_fd_close(struct v9fs_transport *trans)
-{
-        struct v9fs_trans_fd *ts;
-        if (!trans)
-                return;
-        ts = xchg(&trans->priv, NULL);
-        if (!ts)
-                return;
-        trans->status = Disconnected;
-        if (ts->rd)
-                fput(ts->rd);
-        if (ts->wr)
-                fput(ts->wr);
-        kfree(ts);
-}
-struct v9fs_transport v9fs_trans_fd = {
-        .init = v9fs_fd_init,
-        .write = v9fs_fd_write,
-        .read = v9fs_fd_read,
-        .close = v9fs_fd_close,
-        .poll = v9fs_fd_poll,
-};
-struct v9fs_transport v9fs_trans_tcp = {
-        .init = v9fs_tcp_init,
-        .write = v9fs_fd_write,
-        .read = v9fs_fd_read,
-        .close = v9fs_fd_close,
-        .poll = v9fs_fd_poll,
-};
-struct v9fs_transport v9fs_trans_unix = {
-        .init = v9fs_unix_init,
-        .write = v9fs_fd_write,
-        .read = v9fs_fd_read,
-        .close = v9fs_fd_close,
-        .poll = v9fs_fd_poll,
-};
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
deleted file mode 100644
index b38a4b8a41ce..000000000000
--- a/fs/9p/transport.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * linux/fs/9p/transport.h
- *
- * Transport Definition
- *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-enum v9fs_transport_status {
-        Connected,
-        Disconnected,
-        Hung,
-};
-struct v9fs_transport {
-        enum v9fs_transport_status status;
-        void *priv;
-        int (*init) (struct v9fs_session_info *, const char *, char *);
-        int (*write) (struct v9fs_transport *, void *, int);
-        int (*read) (struct v9fs_transport *, void *, int);
-        void (*close) (struct v9fs_transport *);
-        unsigned int (*poll)(struct v9fs_transport *, struct poll_table_struct *);
-};
-extern struct v9fs_transport v9fs_trans_tcp;
-extern struct v9fs_transport v9fs_trans_unix;
-extern struct v9fs_transport v9fs_trans_fd;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 6ad6f192b6e4..45c35986d49f 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -29,16 +29,12 @@
 #include <linux/sched.h>
 #include <linux/parser.h>
 #include <linux/idr.h>
+#include <net/9p/9p.h>
-#include "debug.h"
+#include <net/9p/transport.h>
+#include <net/9p/conn.h>
+#include <net/9p/client.h>
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
-#include "transport.h"
-#include "mux.h"
-/* TODO: sysfs or debugfs interface */
-int v9fs_debug_level = 0;       /* feature-rific global debug level  */
 /*
  * Option Parsing (code inspired by NFS code)
@@ -47,12 +43,12 @@ int v9fs_debug_level = 0;	/* feature-rific global debug level  */
 enum {
        /* Options that take integer arguments */
-        Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug,
+        Opt_debug, Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid,
        Opt_rfdno, Opt_wfdno,
        /* String options */
        Opt_uname, Opt_remotename,
        /* Options that take no arguments */
-        Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd,
+        Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd, Opt_pci,
        /* Cache options */
        Opt_cache_loose,
        /* Error token */
@@ -60,6 +56,7 @@ enum {
 };
 static match_table_t tokens = {
+        {Opt_debug, "debug=%x"},
        {Opt_port, "port=%u"},
        {Opt_msize, "msize=%u"},
        {Opt_uid, "uid=%u"},
@@ -67,12 +64,14 @@ static match_table_t tokens = {
        {Opt_afid, "afid=%u"},
        {Opt_rfdno, "rfdno=%u"},
        {Opt_wfdno, "wfdno=%u"},
-        {Opt_debug, "debug=%x"},
        {Opt_uname, "uname=%s"},
        {Opt_remotename, "aname=%s"},
        {Opt_unix, "proto=unix"},
        {Opt_tcp, "proto=tcp"},
        {Opt_fd, "proto=fd"},
+#ifdef CONFIG_PCI_9P
+        {Opt_pci, "proto=pci"},
+#endif
        {Opt_tcp, "tcp"},
        {Opt_unix, "unix"},
        {Opt_fd, "fd"},
@@ -83,6 +82,8 @@ static match_table_t tokens = {
        {Opt_err, NULL}
 };
+extern struct p9_transport *p9pci_trans_create(void);
 /*
 *  Parse option string.
 */
@@ -122,12 +123,16 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
                token = match_token(p, tokens, args);
                if (token < Opt_uname) {
                        if ((ret = match_int(&args[0], &option)) < 0) {
-                                dprintk(DEBUG_ERROR,
+                                P9_DPRINTK(P9_DEBUG_ERROR,
                                        "integer field, but no integer?\n");
                                continue;
                        }
                }
                switch (token) {
+                case Opt_debug:
+                        v9ses->debug = option;
+                        p9_debug_level = option;
+                        break;
                case Opt_port:
                        v9ses->port = option;
                        break;
@@ -149,15 +154,15 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
                case Opt_wfdno:
                        v9ses->wfdno = option;
                        break;
-                case Opt_debug:
-                        v9ses->debug = option;
-                        break;
                case Opt_tcp:
                        v9ses->proto = PROTO_TCP;
                        break;
                case Opt_unix:
                        v9ses->proto = PROTO_UNIX;
                        break;
+                case Opt_pci:
+                        v9ses->proto = PROTO_PCI;
+                        break;
                case Opt_fd:
                        v9ses->proto = PROTO_FD;
                        break;
@@ -183,82 +188,6 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
 }
 /**
- * v9fs_inode2v9ses - safely extract v9fs session info from super block
- * @inode: inode to extract information from
- *
- * Paranoid function to extract v9ses information from superblock,
- * if anything is missing it will report an error.
- *
- */
-struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
-{
-        return (inode->i_sb->s_fs_info);
-}
-/**
- * v9fs_get_idpool - allocate numeric id from pool
- * @p - pool to allocate from
- *
- * XXX - This seems to be an awful generic function, should it be in idr.c with
- *            the lock included in struct idr?
- */
-int v9fs_get_idpool(struct v9fs_idpool *p)
-{
-        int i = 0;
-        int error;
-retry:
-        if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
-                return 0;
-        if (down_interruptible(&p->lock) == -EINTR) {
-                eprintk(KERN_WARNING, "Interrupted while locking\n");
-                return -1;
-        }
-        /* no need to store exactly p, we just need something non-null */
-        error = idr_get_new(&p->pool, p, &i);
-        up(&p->lock);
-        if (error == -EAGAIN)
-                goto retry;
-        else if (error)
-                return -1;
-        return i;
-}
-/**
- * v9fs_put_idpool - release numeric id from pool
- * @p - pool to allocate from
- *
- * XXX - This seems to be an awful generic function, should it be in idr.c with
- *            the lock included in struct idr?
- */
-void v9fs_put_idpool(int id, struct v9fs_idpool *p)
-{
-        if (down_interruptible(&p->lock) == -EINTR) {
-                eprintk(KERN_WARNING, "Interrupted while locking\n");
-                return;
-        }
-        idr_remove(&p->pool, id);
-        up(&p->lock);
-}
-/**
- * v9fs_check_idpool - check if the specified id is available
- * @id - id to check
- * @p - pool
- */
-int v9fs_check_idpool(int id, struct v9fs_idpool *p)
-{
-        return idr_find(&p->pool, id) != NULL;
-}
-/**
 * v9fs_session_init - initialize session
 * @v9ses: session information structure
 * @dev_name: device being mounted
@@ -266,25 +195,21 @@ int v9fs_check_idpool(int id, struct v9fs_idpool *p)
 *
 */
-int
+struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
-v9fs_session_init(struct v9fs_session_info *v9ses,
                  const char *dev_name, char *data)
 {
-        struct v9fs_fcall *fcall = NULL;
-        struct v9fs_transport *trans_proto;
-        int n = 0;
-        int newfid = -1;
        int retval = -EINVAL;
-        struct v9fs_str *version;
+        struct p9_transport *trans;
+        struct p9_fid *fid;
        v9ses->name = __getname();
        if (!v9ses->name)
-                return -ENOMEM;
+                return ERR_PTR(-ENOMEM);
        v9ses->remotename = __getname();
        if (!v9ses->remotename) {
                __putname(v9ses->name);
-                return -ENOMEM;
+                return ERR_PTR(-ENOMEM);
        }
        strcpy(v9ses->name, V9FS_DEFUSER);
@@ -292,130 +217,60 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
        v9fs_parse_options(data, v9ses);
-        /* set global debug level */
-        v9fs_debug_level = v9ses->debug;
-        /* id pools that are session-dependent: fids and tags */
-        idr_init(&v9ses->fidpool.pool);
-        init_MUTEX(&v9ses->fidpool.lock);
        switch (v9ses->proto) {
        case PROTO_TCP:
-                trans_proto = &v9fs_trans_tcp;
+                trans = p9_trans_create_tcp(dev_name, v9ses->port);
                break;
        case PROTO_UNIX:
-                trans_proto = &v9fs_trans_unix;
+                trans = p9_trans_create_unix(dev_name);
                *v9ses->remotename = 0;
                break;
        case PROTO_FD:
-                trans_proto = &v9fs_trans_fd;
+                trans = p9_trans_create_fd(v9ses->rfdno, v9ses->wfdno);
                *v9ses->remotename = 0;
                break;
+#ifdef CONFIG_PCI_9P
+        case PROTO_PCI:
+                trans = p9pci_trans_create();
+                *v9ses->remotename = 0;
+                break;
+#endif
        default:
                printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto);
                retval = -ENOPROTOOPT;
-                goto SessCleanUp;
+                goto error;
        };
-        v9ses->transport = kmalloc(sizeof(*v9ses->transport), GFP_KERNEL);
+        if (IS_ERR(trans)) {
-        if (!v9ses->transport) {
+                retval = PTR_ERR(trans);
-                retval = -ENOMEM;
+                trans = NULL;
-                goto SessCleanUp;
+                goto error;
        }
-        memmove(v9ses->transport, trans_proto, sizeof(*v9ses->transport));
+        v9ses->clnt = p9_client_create(trans, v9ses->maxdata + P9_IOHDRSZ,
+                v9ses->extended);
-        if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) {
+        if (IS_ERR(v9ses->clnt)) {
-                eprintk(KERN_ERR, "problem initializing transport\n");
+                retval = PTR_ERR(v9ses->clnt);
-                goto SessCleanUp;
+                v9ses->clnt = NULL;
+                P9_DPRINTK(P9_DEBUG_ERROR, "problem initializing 9p client\n");
+                goto error;
        }
-        v9ses->inprogress = 0;
+        fid = p9_client_attach(v9ses->clnt, NULL, v9ses->name,
-        v9ses->shutdown = 0;
+                                                        v9ses->remotename);
-        v9ses->session_hung = 0;
+        if (IS_ERR(fid)) {
+                retval = PTR_ERR(fid);
-        v9ses->mux = v9fs_mux_init(v9ses->transport, v9ses->maxdata + V9FS_IOHDRSZ,
+                fid = NULL;
-                &v9ses->extended);
+                P9_DPRINTK(P9_DEBUG_ERROR, "cannot attach\n");
+                goto error;
-        if (IS_ERR(v9ses->mux)) {
-                retval = PTR_ERR(v9ses->mux);
-                v9ses->mux = NULL;
-                dprintk(DEBUG_ERROR, "problem initializing mux\n");
-                goto SessCleanUp;
        }
-        if (v9ses->afid == ~0) {
+        return fid;
-                if (v9ses->extended)
-                        retval =
-                            v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u",
-                                           &fcall);
-                else
-                        retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000",
-                                                &fcall);
-                if (retval < 0) {
-                        dprintk(DEBUG_ERROR, "v9fs_t_version failed\n");
-                        goto FreeFcall;
-                }
-                version = &fcall->params.rversion.version;
-                if (version->len==8 && !memcmp(version->str, "9P2000.u", 8)) {
-                        dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
-                        v9ses->extended = 1;
-                } else if (version->len==6 && !memcmp(version->str, "9P2000", 6)) {
-                        dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
-                        v9ses->extended = 0;
-                } else {
-                        retval = -EREMOTEIO;
-                        goto FreeFcall;
-                }
-                n = fcall->params.rversion.msize;
+error:
-                kfree(fcall);
-                if (n < v9ses->maxdata)
-                        v9ses->maxdata = n;
-        }
-        newfid = v9fs_get_idpool(&v9ses->fidpool);
-        if (newfid < 0) {
-                eprintk(KERN_WARNING, "couldn't allocate FID\n");
-                retval = -ENOMEM;
-                goto SessCleanUp;
-        }
-        /* it is a little bit ugly, but we have to prevent newfid */
-        /* being the same as afid, so if it is, get a new fid     */
-        if (v9ses->afid != ~0 && newfid == v9ses->afid) {
-                newfid = v9fs_get_idpool(&v9ses->fidpool);
-                if (newfid < 0) {
-                        eprintk(KERN_WARNING, "couldn't allocate FID\n");
-                        retval = -ENOMEM;
-                        goto SessCleanUp;
-                }
-        }
-        if ((retval =
-             v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid,
-                           v9ses->afid, NULL))
-            < 0) {
-                dprintk(DEBUG_ERROR, "cannot attach\n");
-                goto SessCleanUp;
-        }
-        if (v9ses->afid != ~0) {
-                dprintk(DEBUG_ERROR, "afid not equal to ~0\n");
-                if (v9fs_t_clunk(v9ses, v9ses->afid))
-                        dprintk(DEBUG_ERROR, "clunk failed\n");
-        }
-        return newfid;
-      FreeFcall:
-        kfree(fcall);
-      SessCleanUp:
        v9fs_session_close(v9ses);
-        return retval;
+        return ERR_PTR(retval);
 }
 /**
@@ -426,15 +281,9 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 void v9fs_session_close(struct v9fs_session_info *v9ses)
 {
-        if (v9ses->mux) {
+        if (v9ses->clnt) {
-                v9fs_mux_destroy(v9ses->mux);
+                p9_client_destroy(v9ses->clnt);
-                v9ses->mux = NULL;
+                v9ses->clnt = NULL;
-        }
-        if (v9ses->transport) {
-                v9ses->transport->close(v9ses->transport);
-                kfree(v9ses->transport);
-                v9ses->transport = NULL;
        }
        __putname(v9ses->name);
@@ -446,9 +295,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
 *      and cancel all pending requests.
 */
 void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
-        dprintk(DEBUG_ERROR, "cancel session %p\n", v9ses);
+        P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses);
-        v9ses->transport->status = Disconnected;
+        p9_client_disconnect(v9ses->clnt);
-        v9fs_mux_cancel(v9ses->mux, -EIO);
 }
 extern int v9fs_error_init(void);
@@ -460,24 +308,9 @@ extern int v9fs_error_init(void);
 static int __init init_v9fs(void)
 {
-        int ret;
-        v9fs_error_init();
        printk(KERN_INFO "Installing v9fs 9p2000 file system support\n");
-        ret = v9fs_mux_global_init();
+        return register_filesystem(&v9fs_fs_type);
-        if (ret) {
-                printk(KERN_WARNING "v9fs: starting mux failed\n");
-                return ret;
-        }
-        ret = register_filesystem(&v9fs_fs_type);
-        if (ret) {
-                printk(KERN_WARNING "v9fs: registering file system failed\n");
-                v9fs_mux_global_exit();
-        }
-        return ret;
 }
 /**
@@ -487,13 +320,13 @@ static int __init init_v9fs(void)
 static void __exit exit_v9fs(void)
 {
-        v9fs_mux_global_exit();
        unregister_filesystem(&v9fs_fs_type);
 }
 module_init(init_v9fs)
 module_exit(exit_v9fs)
+MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>");
 MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
 MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
 MODULE_LICENSE("GPL");
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 820bf5ca35d8..abc4b1668ace 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -22,16 +22,6 @@
 */
 /*
-  * Idpool structure provides lock and id management
-  *
-  */
-struct v9fs_idpool {
-        struct semaphore lock;
-        struct idr pool;
-};
-/*
  * Session structure provides information for an opened session
  *
  */
@@ -54,15 +44,7 @@ struct v9fs_session_info {
        unsigned int uid;       /* default uid/muid for legacy support */
        unsigned int gid;       /* default gid for legacy support */
-        /* book keeping */
+        struct p9_client *clnt; /* 9p client */
-        struct v9fs_idpool fidpool;     /* The FID pool for file descriptors */
-        struct v9fs_transport *transport;
-        struct v9fs_mux_data *mux;
-        int inprogress;         /* session in progress => true */
-        int shutdown;           /* session shutting down. no more attaches. */
-        unsigned char session_hung;
        struct dentry *debugfs_dir;
 };
@@ -71,6 +53,7 @@ enum {
        PROTO_TCP,
        PROTO_UNIX,
        PROTO_FD,
+        PROTO_PCI,
 };
 /* possible values of ->cache */
@@ -82,12 +65,9 @@ enum {
 extern struct dentry *v9fs_debugfs_root;
-int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
+struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
-struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
+                                                                        char *);
 void v9fs_session_close(struct v9fs_session_info *v9ses);
-int v9fs_get_idpool(struct v9fs_idpool *p);
-void v9fs_put_idpool(int id, struct v9fs_idpool *p);
-int v9fs_check_idpool(int id, struct v9fs_idpool *p);
 void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 #define V9FS_MAGIC 0x01021997
@@ -97,3 +77,7 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 #define V9FS_DEFUSER    "nobody"
 #define V9FS_DEFANAME   ""
+static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
+{
+        return (inode->i_sb->s_fs_info);
+}
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 6a82d39dc498..fd01d90cada5 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -45,10 +45,10 @@ extern struct dentry_operations v9fs_dentry_operations;
 extern struct dentry_operations v9fs_cached_dentry_operations;
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
-ino_t v9fs_qid2ino(struct v9fs_qid *qid);
+ino_t v9fs_qid2ino(struct p9_qid *qid);
-void v9fs_stat2inode(struct v9fs_stat *, struct inode *, struct super_block *);
+void v9fs_stat2inode(struct p9_stat *, struct inode *, struct super_block *);
 int v9fs_dir_release(struct inode *inode, struct file *filp);
 int v9fs_file_open(struct inode *inode, struct file *file);
-void v9fs_inode2stat(struct inode *inode, struct v9fs_stat *stat);
+void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat);
 void v9fs_dentry_release(struct dentry *);
 int v9fs_uflags2omode(int uflags);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 9ac4ffe9ac7d..6248f0e727a3 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,10 +33,10 @@
 #include <linux/pagemap.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
@@ -50,55 +50,26 @@
 static int v9fs_vfs_readpage(struct file *filp, struct page *page)
 {
-        char *buffer = NULL;
+        int retval;
-        int retval = -EIO;
+        loff_t offset;
-        loff_t offset = page_offset(page);
+        char *buffer;
-        int count = PAGE_CACHE_SIZE;
+        struct p9_fid *fid;
-        struct inode *inode = filp->f_path.dentry->d_inode;
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
-        int rsize = v9ses->maxdata - V9FS_IOHDRSZ;
-        struct v9fs_fid *v9f = filp->private_data;
-        struct v9fs_fcall *fcall = NULL;
-        int fid = v9f->fid;
-        int total = 0;
-        int result = 0;
-        dprintk(DEBUG_VFS, "\n");
+        P9_DPRINTK(P9_DEBUG_VFS, "\n");
+        fid = filp->private_data;
        buffer = kmap(page);
-        do {
+        offset = page_offset(page);
-                if (count < rsize)
-                        rsize = count;
-                result = v9fs_t_read(v9ses, fid, offset, rsize, &fcall);
-                if (result < 0) {
-                        printk(KERN_ERR "v9fs_t_read returned %d\n",
-                               result);
-                        kfree(fcall);
-                        goto UnmapAndUnlock;
-                } else
-                        offset += result;
-                memcpy(buffer, fcall->params.rread.data, result);
-                count -= result;
-                buffer += result;
-                total += result;
-                kfree(fcall);
-                if (result < rsize)
+        retval = p9_client_readn(fid, buffer, offset, PAGE_CACHE_SIZE);
-                        break;
+        if (retval < 0)
-        } while (count);
+                goto done;
-        memset(buffer, 0, count);
+        memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval);
        flush_dcache_page(page);
        SetPageUptodate(page);
        retval = 0;
-UnmapAndUnlock:
+done:
        kunmap(page);
        unlock_page(page);
        return retval;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index d93960429c09..f9534f18df0a 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -34,10 +34,10 @@
 #include <linux/namei.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
@@ -52,7 +52,7 @@
 static int v9fs_dentry_delete(struct dentry *dentry)
 {
-        dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+        P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
        return 1;
 }
@@ -69,7 +69,7 @@ static int v9fs_dentry_delete(struct dentry *dentry)
 static int v9fs_cached_dentry_delete(struct dentry *dentry)
 {
        struct inode *inode = dentry->d_inode;
-        dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+        P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
        if(!inode)
                return 1;
@@ -85,26 +85,19 @@ static int v9fs_cached_dentry_delete(struct dentry *dentry)
 void v9fs_dentry_release(struct dentry *dentry)
 {
-        int err;
+        struct v9fs_dentry *dent;
+        struct p9_fid *temp, *current_fid;
-        dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+        P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
-        if (dentry->d_fsdata != NULL) {
+        dent = dentry->d_fsdata;
-                struct list_head *fid_list = dentry->d_fsdata;
+        if (dent) {
-                struct v9fs_fid *temp = NULL;
+                list_for_each_entry_safe(current_fid, temp, &dent->fidlist,
-                struct v9fs_fid *current_fid = NULL;
+                                                                        dlist) {
+                        p9_client_clunk(current_fid);
-                list_for_each_entry_safe(current_fid, temp, fid_list, list) {
-                        err = v9fs_t_clunk(current_fid->v9ses, current_fid->fid);
-                        if (err < 0)
-                                dprintk(DEBUG_ERROR, "clunk failed: %d name %s\n",
-                                        err, dentry->d_iname);
-                        v9fs_fid_destroy(current_fid);
                }
-                kfree(dentry->d_fsdata);        /* free the list_head */
+                kfree(dent);
+                dentry->d_fsdata = NULL;
        }
 }
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 1dd86ee90bc5..0924d4477da3 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -32,11 +32,10 @@
 #include <linux/sched.h>
 #include <linux/inet.h>
 #include <linux/idr.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
-#include "conv.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
@@ -46,14 +45,14 @@
 *
 */
-static inline int dt_type(struct v9fs_stat *mistat)
+static inline int dt_type(struct p9_stat *mistat)
 {
        unsigned long perm = mistat->mode;
        int rettype = DT_REG;
-        if (perm & V9FS_DMDIR)
+        if (perm & P9_DMDIR)
                rettype = DT_DIR;
-        if (perm & V9FS_DMSYMLINK)
+        if (perm & P9_DMSYMLINK)
                rettype = DT_LNK;
        return rettype;
@@ -69,106 +68,36 @@ static inline int dt_type(struct v9fs_stat *mistat)
 static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct v9fs_fcall *fcall = NULL;
+        int over;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct p9_fid *fid;
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+        struct v9fs_session_info *v9ses;
-        struct v9fs_fid *file = filp->private_data;
+        struct inode *inode;
-        unsigned int i, n, s;
+        struct p9_stat *st;
-        int fid = -1;
-        int ret = 0;
+        P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
-        struct v9fs_stat stat;
+        inode = filp->f_path.dentry->d_inode;
-        int over = 0;
+        v9ses = v9fs_inode2v9ses(inode);
+        fid = filp->private_data;
-        dprintk(DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
+        while ((st = p9_client_dirread(fid, filp->f_pos)) != NULL) {
+                if (IS_ERR(st))
-        fid = file->fid;
+                        return PTR_ERR(st);
-        if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) {
+                over = filldir(dirent, st->name.str, st->name.len, filp->f_pos,
-                kfree(file->rdir_fcall);
+                        v9fs_qid2ino(&st->qid), dt_type(st));
-                file->rdir_fcall = NULL;
-        }
+                if (over)
-        if (file->rdir_fcall) {
-                n = file->rdir_fcall->params.rread.count;
-                i = file->rdir_fpos;
-                while (i < n) {
-                        s = v9fs_deserialize_stat(
-                                file->rdir_fcall->params.rread.data + i,
-                                n - i, &stat, v9ses->extended);
-                        if (s == 0) {
-                                dprintk(DEBUG_ERROR,
-                                        "error while deserializing stat\n");
-                                ret = -EIO;
-                                goto FreeStructs;
-                        }
-                        over = filldir(dirent, stat.name.str, stat.name.len,
-                                    filp->f_pos, v9fs_qid2ino(&stat.qid),
-                                    dt_type(&stat));
-                        if (over) {
-                                file->rdir_fpos = i;
-                                file->rdir_pos = filp->f_pos;
-                                break;
-                        }
-                        i += s;
-                        filp->f_pos += s;
-                }
-                if (!over) {
-                        kfree(file->rdir_fcall);
-                        file->rdir_fcall = NULL;
-                }
-        }
-        while (!over) {
-                ret = v9fs_t_read(v9ses, fid, filp->f_pos,
-                        v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
-                if (ret < 0) {
-                        dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
-                                ret, fcall);
-                        goto FreeStructs;
-                } else if (ret == 0)
                        break;
-                n = ret;
+                filp->f_pos += st->size;
-                i = 0;
+                kfree(st);
-                while (i < n) {
+                st = NULL;
-                        s = v9fs_deserialize_stat(fcall->params.rread.data + i,
-                                n - i, &stat, v9ses->extended);
-                        if (s == 0) {
-                                dprintk(DEBUG_ERROR,
-                                        "error while deserializing stat\n");
-                                return -EIO;
-                        }
-                        over = filldir(dirent, stat.name.str, stat.name.len,
-                                    filp->f_pos, v9fs_qid2ino(&stat.qid),
-                                    dt_type(&stat));
-                        if (over) {
-                                file->rdir_fcall = fcall;
-                                file->rdir_fpos = i;
-                                file->rdir_pos = filp->f_pos;
-                                fcall = NULL;
-                                break;
-                        }
-                        i += s;
-                        filp->f_pos += s;
-                }
-                kfree(fcall);
        }
-      FreeStructs:
+        kfree(st);
-        kfree(fcall);
+        return 0;
-        return ret;
 }
 /**
 * v9fs_dir_release - close a directory
 * @inode: inode of the directory
@@ -178,29 +107,13 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 int v9fs_dir_release(struct inode *inode, struct file *filp)
 {
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+        struct p9_fid *fid;
-        struct v9fs_fid *fid = filp->private_data;
-        int fidnum = -1;
-        dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp,
-                fid->fid);
-        fidnum = fid->fid;
+        fid = filp->private_data;
+        P9_DPRINTK(P9_DEBUG_VFS,
+                        "inode: %p filp: %p fid: %d\n", inode, filp, fid->fid);
        filemap_write_and_wait(inode->i_mapping);
+        p9_client_clunk(fid);
-        if (fidnum >= 0) {
-                dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
-                        fid->fid);
-                if (v9fs_t_clunk(v9ses, fidnum))
-                        dprintk(DEBUG_ERROR, "clunk failed\n");
-                kfree(fid->rdir_fcall);
-                kfree(fid);
-                filp->private_data = NULL;
-        }
        return 0;
 }
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 6e7678e4852f..2a40c2946d0a 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -34,10 +34,10 @@
 #include <linux/list.h>
 #include <asm/uaccess.h>
 #include <linux/idr.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
@@ -52,48 +52,40 @@ static const struct file_operations v9fs_cached_file_operations;
 int v9fs_file_open(struct inode *inode, struct file *file)
 {
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
-        struct v9fs_fid *vfid;
-        struct v9fs_fcall *fcall = NULL;
-        int omode;
        int err;
+        struct v9fs_session_info *v9ses;
+        struct p9_fid *fid;
+        int omode;
-        dprintk(DEBUG_VFS, "inode: %p file: %p \n", inode, file);
+        P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file);
+        v9ses = v9fs_inode2v9ses(inode);
-        vfid = v9fs_fid_clone(file->f_path.dentry);
-        if (IS_ERR(vfid))
-                return PTR_ERR(vfid);
        omode = v9fs_uflags2omode(file->f_flags);
-        err = v9fs_t_open(v9ses, vfid->fid, omode, &fcall);
+        fid = file->private_data;
-        if (err < 0) {
+        if (!fid) {
-                PRINT_FCALL_ERROR("open failed", fcall);
+                fid = v9fs_fid_clone(file->f_path.dentry);
-                goto Clunk_Fid;
+                if (IS_ERR(fid))
+                        return PTR_ERR(fid);
+                err = p9_client_open(fid, omode);
+                if (err < 0) {
+                        p9_client_clunk(fid);
+                        return err;
+                }
+                if (omode & P9_OTRUNC) {
+                        inode->i_size = 0;
+                        inode->i_blocks = 0;
+                }
        }
-        file->private_data = vfid;
+        file->private_data = fid;
-        vfid->fidopen = 1;
+        if ((fid->qid.version) && (v9ses->cache)) {
-        vfid->fidclunked = 0;
+                P9_DPRINTK(P9_DEBUG_VFS, "cached");
-        vfid->iounit = fcall->params.ropen.iounit;
-        vfid->rdir_pos = 0;
-        vfid->rdir_fcall = NULL;
-        vfid->filp = file;
-        kfree(fcall);
-        if((vfid->qid.version) && (v9ses->cache)) {
-                dprintk(DEBUG_VFS, "cached");
                /* enable cached file options */
                if(file->f_op == &v9fs_file_operations)
                        file->f_op = &v9fs_cached_file_operations;
        }
        return 0;
-Clunk_Fid:
-        v9fs_fid_clunk(v9ses, vfid);
-        kfree(fcall);
-        return err;
 }
 /**
@@ -110,7 +102,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
        int res = 0;
        struct inode *inode = filp->f_path.dentry->d_inode;
-        dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
+        P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
        /* No mandatory locks */
        if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
@@ -136,55 +128,16 @@ static ssize_t
 v9fs_file_read(struct file *filp, char __user * data, size_t count,
               loff_t * offset)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        int ret;
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+        struct p9_fid *fid;
-        struct v9fs_fid *v9f = filp->private_data;
-        struct v9fs_fcall *fcall = NULL;
-        int fid = v9f->fid;
-        int rsize = 0;
-        int result = 0;
-        int total = 0;
-        int n;
-        dprintk(DEBUG_VFS, "\n");
-        rsize = v9ses->maxdata - V9FS_IOHDRSZ;
-        if (v9f->iounit != 0 && rsize > v9f->iounit)
-                rsize = v9f->iounit;
-        do {
-                if (count < rsize)
-                        rsize = count;
-                result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall);
+        P9_DPRINTK(P9_DEBUG_VFS, "\n");
+        fid = filp->private_data;
+        ret = p9_client_uread(fid, data, *offset, count);
+        if (ret > 0)
+                *offset += ret;
-                if (result < 0) {
+        return ret;
-                        printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n",
-                               result);
-                        kfree(fcall);
-                        return total;
-                } else
-                        *offset += result;
-                n = copy_to_user(data, fcall->params.rread.data, result);
-                if (n) {
-                        dprintk(DEBUG_ERROR, "Problem copying to user %d\n", n);
-                        kfree(fcall);
-                        return -EFAULT;
-                }
-                count -= result;
-                data += result;
-                total += result;
-                kfree(fcall);
-                if (result < rsize)
-                        break;
-        } while (count);
-        return total;
 }
 /**
@@ -200,50 +153,25 @@ static ssize_t
 v9fs_file_write(struct file *filp, const char __user * data,
                size_t count, loff_t * offset)
 {
+        int ret;
+        struct p9_fid *fid;
        struct inode *inode = filp->f_path.dentry->d_inode;
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
-        struct v9fs_fid *v9fid = filp->private_data;
-        struct v9fs_fcall *fcall;
-        int fid = v9fid->fid;
-        int result = -EIO;
-        int rsize = 0;
-        int total = 0;
-        dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count,
-                (int)*offset);
-        rsize = v9ses->maxdata - V9FS_IOHDRSZ;
-        if (v9fid->iounit != 0 && rsize > v9fid->iounit)
-                rsize = v9fid->iounit;
-        do {
-                if (count < rsize)
-                        rsize = count;
-                result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall);
+        P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
-                if (result < 0) {
+                (int)count, (int)*offset);
-                        PRINT_FCALL_ERROR("error while writing", fcall);
-                        kfree(fcall);
-                        return result;
-                } else
-                        *offset += result;
-                kfree(fcall);
+        fid = filp->private_data;
-                fcall = NULL;
+        ret = p9_client_uwrite(fid, data, *offset, count);
+        if (ret > 0)
+                *offset += ret;
-                if (result != rsize) {
+        if (*offset > inode->i_size) {
-                        eprintk(KERN_ERR,
+                inode->i_size = *offset;
-                                "short write: v9fs_t_write returned %d\n",
+                inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
-                                result);
+        }
-                        break;
-                }
-                count -= result;
-                data += result;
-                total += result;
-        } while (count);
        invalidate_inode_pages2(inode->i_mapping);
-        return total;
+        return ret;
 }
 static const struct file_operations v9fs_cached_file_operations = {
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c76cd8fa3f6c..e5c45eed58a9 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -34,10 +34,10 @@
 #include <linux/namei.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
@@ -58,27 +58,27 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
        int res;
        res = mode & 0777;
        if (S_ISDIR(mode))
-                res |= V9FS_DMDIR;
+                res |= P9_DMDIR;
        if (v9ses->extended) {
                if (S_ISLNK(mode))
-                        res |= V9FS_DMSYMLINK;
+                        res |= P9_DMSYMLINK;
                if (v9ses->nodev == 0) {
                        if (S_ISSOCK(mode))
-                                res |= V9FS_DMSOCKET;
+                                res |= P9_DMSOCKET;
                        if (S_ISFIFO(mode))
-                                res |= V9FS_DMNAMEDPIPE;
+                                res |= P9_DMNAMEDPIPE;
                        if (S_ISBLK(mode))
-                                res |= V9FS_DMDEVICE;
+                                res |= P9_DMDEVICE;
                        if (S_ISCHR(mode))
-                                res |= V9FS_DMDEVICE;
+                                res |= P9_DMDEVICE;
                }
                if ((mode & S_ISUID) == S_ISUID)
-                        res |= V9FS_DMSETUID;
+                        res |= P9_DMSETUID;
                if ((mode & S_ISGID) == S_ISGID)
-                        res |= V9FS_DMSETGID;
+                        res |= P9_DMSETGID;
-                if ((mode & V9FS_DMLINK))
+                if ((mode & P9_DMLINK))
-                        res |= V9FS_DMLINK;
+                        res |= P9_DMLINK;
        }
        return res;
@@ -97,27 +97,27 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
        res = mode & 0777;
-        if ((mode & V9FS_DMDIR) == V9FS_DMDIR)
+        if ((mode & P9_DMDIR) == P9_DMDIR)
                res |= S_IFDIR;
-        else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended))
+        else if ((mode & P9_DMSYMLINK) && (v9ses->extended))
                res |= S_IFLNK;
-        else if ((mode & V9FS_DMSOCKET) && (v9ses->extended)
+        else if ((mode & P9_DMSOCKET) && (v9ses->extended)
                 && (v9ses->nodev == 0))
                res |= S_IFSOCK;
-        else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended)
+        else if ((mode & P9_DMNAMEDPIPE) && (v9ses->extended)
                 && (v9ses->nodev == 0))
                res |= S_IFIFO;
-        else if ((mode & V9FS_DMDEVICE) && (v9ses->extended)
+        else if ((mode & P9_DMDEVICE) && (v9ses->extended)
                 && (v9ses->nodev == 0))
                res |= S_IFBLK;
        else
                res |= S_IFREG;
        if (v9ses->extended) {
-                if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID)
+                if ((mode & P9_DMSETUID) == P9_DMSETUID)
                        res |= S_ISUID;
-                if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID)
+                if ((mode & P9_DMSETGID) == P9_DMSETGID)
                        res |= S_ISGID;
        }
@@ -132,26 +132,26 @@ int v9fs_uflags2omode(int uflags)
        switch (uflags&3) {
        default:
        case O_RDONLY:
-                ret = V9FS_OREAD;
+                ret = P9_OREAD;
                break;
        case O_WRONLY:
-                ret = V9FS_OWRITE;
+                ret = P9_OWRITE;
                break;
        case O_RDWR:
-                ret = V9FS_ORDWR;
+                ret = P9_ORDWR;
                break;
        }
        if (uflags & O_EXCL)
-                ret |= V9FS_OEXCL;
+                ret |= P9_OEXCL;
        if (uflags & O_TRUNC)
-                ret |= V9FS_OTRUNC;
+                ret |= P9_OTRUNC;
        if (uflags & O_APPEND)
-                ret |= V9FS_OAPPEND;
+                ret |= P9_OAPPEND;
        return ret;
 }
@@ -164,7 +164,7 @@ int v9fs_uflags2omode(int uflags)
 */
 static void
-v9fs_blank_wstat(struct v9fs_wstat *wstat)
+v9fs_blank_wstat(struct p9_wstat *wstat)
 {
        wstat->type = ~0;
        wstat->dev = ~0;
@@ -197,7 +197,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
        struct inode *inode;
        struct v9fs_session_info *v9ses = sb->s_fs_info;
-        dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
+        P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
        inode = new_inode(sb);
        if (inode) {
@@ -215,7 +215,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
                case S_IFCHR:
                case S_IFSOCK:
                        if(!v9ses->extended) {
-                                dprintk(DEBUG_ERROR, "special files without extended mode\n");
+                                P9_DPRINTK(P9_DEBUG_ERROR,
+                                      "special files without extended mode\n");
                                return ERR_PTR(-EINVAL);
                        }
                        init_special_inode(inode, inode->i_mode,
@@ -227,7 +228,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
                        break;
                case S_IFLNK:
                        if(!v9ses->extended) {
-                                dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n");
+                                P9_DPRINTK(P9_DEBUG_ERROR,
+                                        "extended modes used w/o 9P2000.u\n");
                                return ERR_PTR(-EINVAL);
                        }
                        inode->i_op = &v9fs_symlink_inode_operations;
@@ -241,71 +243,19 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
                        inode->i_fop = &v9fs_dir_operations;
                        break;
                default:
-                        dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
+                        P9_DPRINTK(P9_DEBUG_ERROR,
+                                "BAD mode 0x%x S_IFMT 0x%x\n",
                                mode, mode & S_IFMT);
                        return ERR_PTR(-EINVAL);
                }
        } else {
-                eprintk(KERN_WARNING, "Problem allocating inode\n");
+                P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
                return ERR_PTR(-ENOMEM);
        }
        return inode;
 }
-static int
+/*
-v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm,
-        u8 mode, char *extension, u32 *fidp, struct v9fs_qid *qid, u32 *iounit)
-{
-        int fid;
-        int err;
-        struct v9fs_fcall *fcall;
-        fid = v9fs_get_idpool(&v9ses->fidpool);
-        if (fid < 0) {
-                eprintk(KERN_WARNING, "no free fids available\n");
-                return -ENOSPC;
-        }
-        err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall);
-        if (err < 0) {
-                PRINT_FCALL_ERROR("clone error", fcall);
-                if (fcall && fcall->id == RWALK)
-                        goto clunk_fid;
-                else
-                        goto put_fid;
-        }
-        kfree(fcall);
-        err = v9fs_t_create(v9ses, fid, name, perm, mode, extension, &fcall);
-        if (err < 0) {
-                PRINT_FCALL_ERROR("create fails", fcall);
-                goto clunk_fid;
-        }
-        if (iounit)
-                *iounit = fcall->params.rcreate.iounit;
-        if (qid)
-                *qid = fcall->params.rcreate.qid;
-        if (fidp)
-                *fidp = fid;
-        kfree(fcall);
-        return 0;
-clunk_fid:
-        v9fs_t_clunk(v9ses, fid);
-        fid = V9FS_NOFID;
-put_fid:
-        if (fid != V9FS_NOFID)
-                v9fs_put_idpool(fid, &v9ses->fidpool);
-        kfree(fcall);
-        return err;
-}
 static struct v9fs_fid*
 v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry)
 {
@@ -355,23 +305,25 @@ error:
        kfree(fcall);
        return ERR_PTR(err);
 }
+*/
 static struct inode *
-v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid,
+v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
        struct super_block *sb)
 {
        int err, umode;
        struct inode *ret;
-        struct v9fs_fcall *fcall;
+        struct p9_stat *st;
        ret = NULL;
-        err = v9fs_t_stat(v9ses, fid, &fcall);
+        st = p9_client_stat(fid);
-        if (err) {
+        if (IS_ERR(st)) {
-                PRINT_FCALL_ERROR("stat error", fcall);
+                err = PTR_ERR(st);
+                st = NULL;
                goto error;
        }
-        umode = p9mode2unixmode(v9ses, fcall->params.rstat.stat.mode);
+        umode = p9mode2unixmode(v9ses, st->mode);
        ret = v9fs_get_inode(sb, umode);
        if (IS_ERR(ret)) {
                err = PTR_ERR(ret);
@@ -379,12 +331,13 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid,
                goto error;
        }
-        v9fs_stat2inode(&fcall->params.rstat.stat, ret, sb);
+        v9fs_stat2inode(st, ret, sb);
-        kfree(fcall);
+        ret->i_ino = v9fs_qid2ino(&st->qid);
+        kfree(st);
        return ret;
 error:
-        kfree(fcall);
+        kfree(st);
        if (ret)
                iput(ret);
@@ -401,43 +354,20 @@ error:
 static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
 {
-        struct v9fs_fcall *fcall = NULL;
+        struct inode *file_inode;
-        struct super_block *sb = NULL;
+        struct v9fs_session_info *v9ses;
-        struct v9fs_session_info *v9ses = NULL;
+        struct p9_fid *v9fid;
-        struct v9fs_fid *v9fid = NULL;
-        struct inode *file_inode = NULL;
-        int fid = -1;
-        int result = 0;
-        dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
+        P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
                rmdir);
        file_inode = file->d_inode;
-        sb = file_inode->i_sb;
        v9ses = v9fs_inode2v9ses(file_inode);
        v9fid = v9fs_fid_clone(file);
        if(IS_ERR(v9fid))
                return PTR_ERR(v9fid);
-        fid = v9fid->fid;
+        return p9_client_remove(v9fid);
-        if (fid < 0) {
-                dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n",
-                        file_inode->i_ino);
-                return -EBADF;
-        }
-        result = v9fs_t_remove(v9ses, fid, &fcall);
-        if (result < 0) {
-                PRINT_FCALL_ERROR("remove fails", fcall);
-                goto Error;
-        }
-        v9fs_put_idpool(fid, &v9ses->fidpool);
-        v9fs_fid_destroy(v9fid);
-Error:
-        kfree(fcall);
-        return result;
 }
 static int
@@ -446,61 +376,59 @@ v9fs_open_created(struct inode *inode, struct file *file)
        return 0;
 }
 /**
- * v9fs_vfs_create - VFS hook to create files
+ * v9fs_create - Create a file
- * @inode: directory inode that is being deleted
+ * @dentry:  dentry that is being created
- * @dentry:  dentry that is being deleted
+ * @perm: create permissions
- * @mode: create permissions
+ * @mode: open mode
- * @nd: path information
 *
 */
+static struct p9_fid *
-static int
+v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
-v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
+                struct dentry *dentry, char *extension, u32 perm, u8 mode)
-                struct nameidata *nd)
 {
        int err;
-        u32 fid, perm, iounit;
+        char *name;
-        int flags;
+        struct p9_fid *dfid, *ofid, *fid;
-        struct v9fs_session_info *v9ses;
-        struct v9fs_fid *dfid, *vfid, *ffid;
        struct inode *inode;
-        struct v9fs_qid qid;
-        struct file *filp;
-        inode = NULL;
+        err = 0;
-        vfid = NULL;
+        ofid = NULL;
-        v9ses = v9fs_inode2v9ses(dir);
+        fid = NULL;
+        name = (char *) dentry->d_name.name;
        dfid = v9fs_fid_clone(dentry->d_parent);
        if(IS_ERR(dfid)) {
                err = PTR_ERR(dfid);
+                dfid = NULL;
                goto error;
        }
-        perm = unixmode2p9mode(v9ses, mode);
+        /* clone a fid to use for creation */
-        if (nd && nd->flags & LOOKUP_OPEN)
+        ofid = p9_client_walk(dfid, 0, NULL, 1);
-                flags = nd->intent.open.flags - 1;
+        if (IS_ERR(ofid)) {
-        else
+                err = PTR_ERR(ofid);
-                flags = O_RDWR;
+                ofid = NULL;
+                goto error;
-        err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
+        }
-                perm, v9fs_uflags2omode(flags), NULL, &fid, &qid, &iounit);
-        if (err)
+        err = p9_client_fcreate(ofid, name, perm, mode, extension);
-                goto clunk_dfid;
+        if (err < 0)
+                goto error;
-        vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry);
+        /* now walk from the parent so we can get unopened fid */
-        v9fs_fid_clunk(v9ses, dfid);
+        fid = p9_client_walk(dfid, 1, &name, 0);
-        if (IS_ERR(vfid)) {
+        if (IS_ERR(fid)) {
-                err = PTR_ERR(vfid);
+                err = PTR_ERR(fid);
-                vfid = NULL;
+                fid = NULL;
                goto error;
-        }
+        } else
+                dfid = NULL;
-        inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb);
+        /* instantiate inode and assign the unopened fid to the dentry */
+        inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
-                inode = NULL;
                goto error;
        }
@@ -508,35 +436,78 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
                dentry->d_op = &v9fs_cached_dentry_operations;
        else
                dentry->d_op = &v9fs_dentry_operations;
        d_instantiate(dentry, inode);
+        v9fs_fid_add(dentry, fid);
+        return ofid;
-        if (nd && nd->flags & LOOKUP_OPEN) {
+error:
-                ffid = v9fs_fid_create(v9ses, fid);
+        if (dfid)
-                if (!ffid)
+                p9_client_clunk(dfid);
-                        return -ENOMEM;
+        if (ofid)
+                p9_client_clunk(ofid);
+        if (fid)
+                p9_client_clunk(fid);
+        return ERR_PTR(err);
+}
+/**
+ * v9fs_vfs_create - VFS hook to create files
+ * @inode: directory inode that is being created
+ * @dentry:  dentry that is being deleted
+ * @mode: create permissions
+ * @nd: path information
+ *
+ */
+static int
+v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
+                struct nameidata *nd)
+{
+        int err;
+        u32 perm;
+        int flags;
+        struct v9fs_session_info *v9ses;
+        struct p9_fid *fid;
+        struct file *filp;
+        err = 0;
+        fid = NULL;
+        v9ses = v9fs_inode2v9ses(dir);
+        perm = unixmode2p9mode(v9ses, mode);
+        if (nd && nd->flags & LOOKUP_OPEN)
+                flags = nd->intent.open.flags - 1;
+        else
+                flags = O_RDWR;
+        fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
+                                                v9fs_uflags2omode(flags));
+        if (IS_ERR(fid)) {
+                err = PTR_ERR(fid);
+                fid = NULL;
+                goto error;
+        }
+        /* if we are opening a file, assign the open fid to the file */
+        if (nd && nd->flags & LOOKUP_OPEN) {
                filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
                if (IS_ERR(filp)) {
-                        v9fs_fid_destroy(ffid);
+                        err = PTR_ERR(filp);
-                        return PTR_ERR(filp);
+                        goto error;
                }
-                ffid->rdir_pos = 0;
+                filp->private_data = fid;
-                ffid->rdir_fcall = NULL;
+        } else
-                ffid->fidopen = 1;
+                p9_client_clunk(fid);
-                ffid->iounit = iounit;
-                ffid->filp = filp;
-                filp->private_data = ffid;
-        }
        return 0;
-clunk_dfid:
-        v9fs_fid_clunk(v9ses, dfid);
 error:
-        if (vfid)
+        if (fid)
-                v9fs_fid_destroy(vfid);
+                p9_client_clunk(fid);
        return err;
 }
@@ -552,57 +523,23 @@ error:
 static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
        int err;
-        u32 fid, perm;
+        u32 perm;
        struct v9fs_session_info *v9ses;
-        struct v9fs_fid *dfid, *vfid;
+        struct p9_fid *fid;
-        struct inode *inode;
-        inode = NULL;
+        P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
-        vfid = NULL;
+        err = 0;
        v9ses = v9fs_inode2v9ses(dir);
-        dfid = v9fs_fid_clone(dentry->d_parent);
-        if(IS_ERR(dfid)) {
-                err = PTR_ERR(dfid);
-                goto error;
-        }
        perm = unixmode2p9mode(v9ses, mode | S_IFDIR);
+        fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_OREAD);
-        err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
+        if (IS_ERR(fid)) {
-                perm, V9FS_OREAD, NULL, &fid, NULL, NULL);
+                err = PTR_ERR(fid);
+                fid = NULL;
-        if (err) {
-                dprintk(DEBUG_ERROR, "create error %d\n", err);
-                goto clean_up_dfid;
        }
-        vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry);
+        if (fid)
-        if (IS_ERR(vfid)) {
+                p9_client_clunk(fid);
-                err = PTR_ERR(vfid);
-                vfid = NULL;
-                goto clean_up_dfid;
-        }
-        v9fs_fid_clunk(v9ses, dfid);
-        inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb);
-        if (IS_ERR(inode)) {
-                err = PTR_ERR(inode);
-                inode = NULL;
-                v9fs_fid_destroy(vfid);
-                goto error;
-        }
-        if(v9ses->cache)
-                dentry->d_op = &v9fs_cached_dentry_operations;
-        else
-                dentry->d_op = &v9fs_dentry_operations;
-        d_instantiate(dentry, inode);
-        return 0;
-clean_up_dfid:
-        v9fs_fid_clunk(v9ses, dfid);
-error:
        return err;
 }
@@ -619,104 +556,54 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 {
        struct super_block *sb;
        struct v9fs_session_info *v9ses;
-        struct v9fs_fid *dirfid;
+        struct p9_fid *dfid, *fid;
-        struct v9fs_fid *fid;
        struct inode *inode;
-        struct v9fs_fcall *fcall = NULL;
+        char *name;
-        int dirfidnum = -1;
-        int newfid = -1;
        int result = 0;
-        dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
+        P9_DPRINTK(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
                dir, dentry->d_name.name, dentry, nameidata);
        sb = dir->i_sb;
        v9ses = v9fs_inode2v9ses(dir);
-        dirfid = v9fs_fid_lookup(dentry->d_parent);
+        dfid = v9fs_fid_lookup(dentry->d_parent);
+        if (IS_ERR(dfid))
-        if(IS_ERR(dirfid))
+                return ERR_PTR(PTR_ERR(dfid));
-                return ERR_PTR(PTR_ERR(dirfid));
+        name = (char *) dentry->d_name.name;
-        dirfidnum = dirfid->fid;
+        fid = p9_client_walk(dfid, 1, &name, 1);
+        if (IS_ERR(fid)) {
-        newfid = v9fs_get_idpool(&v9ses->fidpool);
+                result = PTR_ERR(fid);
-        if (newfid < 0) {
-                eprintk(KERN_WARNING, "newfid fails!\n");
-                result = -ENOSPC;
-                goto Release_Dirfid;
-        }
-        result = v9fs_t_walk(v9ses, dirfidnum, newfid,
-                (char *)dentry->d_name.name, &fcall);
-        up(&dirfid->lock);
-        if (result < 0) {
-                if (fcall && fcall->id == RWALK)
-                        v9fs_t_clunk(v9ses, newfid);
-                else
-                        v9fs_put_idpool(newfid, &v9ses->fidpool);
                if (result == -ENOENT) {
                        d_add(dentry, NULL);
-                        dprintk(DEBUG_VFS,
-                                "Return negative dentry %p count %d\n",
-                                dentry, atomic_read(&dentry->d_count));
-                        kfree(fcall);
                        return NULL;
                }
-                dprintk(DEBUG_ERROR, "walk error:%d\n", result);
-                goto FreeFcall;
-        }
-        kfree(fcall);
-        result = v9fs_t_stat(v9ses, newfid, &fcall);
-        if (result < 0) {
-                dprintk(DEBUG_ERROR, "stat error\n");
-                goto FreeFcall;
-        }
-        inode = v9fs_get_inode(sb, p9mode2unixmode(v9ses,
-                fcall->params.rstat.stat.mode));
-        if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) {
+                return ERR_PTR(result);
-                eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
-                        PTR_ERR(inode));
-                result = -ENOSPC;
-                goto FreeFcall;
        }
-        inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat.qid);
+        inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+        if (IS_ERR(inode)) {
-        fid = v9fs_fid_create(v9ses, newfid);
+                result = PTR_ERR(inode);
-        if (fid == NULL) {
+                inode = NULL;
-                dprintk(DEBUG_ERROR, "couldn't insert\n");
+                goto error;
-                result = -ENOMEM;
-                goto FreeFcall;
        }
-        result = v9fs_fid_insert(fid, dentry);
+        result = v9fs_fid_add(dentry, fid);
        if (result < 0)
-                goto FreeFcall;
+                goto error;
-        fid->qid = fcall->params.rstat.stat.qid;
-        v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
        if((fid->qid.version)&&(v9ses->cache))
                dentry->d_op = &v9fs_cached_dentry_operations;
        else
                dentry->d_op = &v9fs_dentry_operations;
        d_add(dentry, inode);
-        kfree(fcall);
        return NULL;
-Release_Dirfid:
+error:
-        up(&dirfid->lock);
+        if (fid)
+                p9_client_clunk(fid);
-FreeFcall:
-        kfree(fcall);
        return ERR_PTR(result);
 }
@@ -758,73 +645,54 @@ static int
 v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                struct inode *new_dir, struct dentry *new_dentry)
 {
-        struct inode *old_inode = old_dentry->d_inode;
+        struct inode *old_inode;
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode);
+        struct v9fs_session_info *v9ses;
-        struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry);
+        struct p9_fid *oldfid;
-        struct v9fs_fid *olddirfid;
+        struct p9_fid *olddirfid;
-        struct v9fs_fid *newdirfid;
+        struct p9_fid *newdirfid;
-        struct v9fs_wstat wstat;
+        struct p9_wstat wstat;
-        struct v9fs_fcall *fcall = NULL;
+        int retval;
-        int fid = -1;
-        int olddirfidnum = -1;
-        int newdirfidnum = -1;
-        int retval = 0;
-        dprintk(DEBUG_VFS, "\n");
+        P9_DPRINTK(P9_DEBUG_VFS, "\n");
+        retval = 0;
+        old_inode = old_dentry->d_inode;
+        v9ses = v9fs_inode2v9ses(old_inode);
+        oldfid = v9fs_fid_lookup(old_dentry);
        if(IS_ERR(oldfid))
                return PTR_ERR(oldfid);
        olddirfid = v9fs_fid_clone(old_dentry->d_parent);
        if(IS_ERR(olddirfid)) {
                retval = PTR_ERR(olddirfid);
-                goto Release_lock;
+                goto done;
        }
        newdirfid = v9fs_fid_clone(new_dentry->d_parent);
        if(IS_ERR(newdirfid)) {
                retval = PTR_ERR(newdirfid);
-                goto Clunk_olddir;
+                goto clunk_olddir;
        }
        /* 9P can only handle file rename in the same directory */
        if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
-                dprintk(DEBUG_ERROR, "old dir and new dir are different\n");
+                P9_DPRINTK(P9_DEBUG_ERROR,
+                                "old dir and new dir are different\n");
                retval = -EXDEV;
-                goto Clunk_newdir;
+                goto clunk_newdir;
-        }
-        fid = oldfid->fid;
-        olddirfidnum = olddirfid->fid;
-        newdirfidnum = newdirfid->fid;
-        if (fid < 0) {
-                dprintk(DEBUG_ERROR, "no fid for old file #%lu\n",
-                        old_inode->i_ino);
-                retval = -EBADF;
-                goto Clunk_newdir;
        }
        v9fs_blank_wstat(&wstat);
        wstat.muid = v9ses->name;
        wstat.name = (char *) new_dentry->d_name.name;
+        retval = p9_client_wstat(oldfid, &wstat);
-        retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall);
+clunk_newdir:
+        p9_client_clunk(olddirfid);
-        if (retval < 0)
+clunk_olddir:
-                PRINT_FCALL_ERROR("wstat error", fcall);
+        p9_client_clunk(newdirfid);
-        kfree(fcall);
-Clunk_newdir:
-        v9fs_fid_clunk(v9ses, newdirfid);
-Clunk_olddir:
-        v9fs_fid_clunk(v9ses, olddirfid);
-Release_lock:
-        up(&oldfid->lock);
+done:
        return retval;
 }
@@ -840,28 +708,30 @@ static int
 v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
                 struct kstat *stat)
 {
-        struct v9fs_fcall *fcall = NULL;
+        int err;
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+        struct v9fs_session_info *v9ses;
-        struct v9fs_fid *fid = v9fs_fid_clone(dentry);
+        struct p9_fid *fid;
-        int err = -EPERM;
+        struct p9_stat *st;
-        dprintk(DEBUG_VFS, "dentry: %p\n", dentry);
+        P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
-        if(IS_ERR(fid))
+        err = -EPERM;
+        v9ses = v9fs_inode2v9ses(dentry->d_inode);
+        if (v9ses->cache == CACHE_LOOSE)
+                return simple_getattr(mnt, dentry, stat);
+        fid = v9fs_fid_lookup(dentry);
+        if (IS_ERR(fid))
                return PTR_ERR(fid);
-        err = v9fs_t_stat(v9ses, fid->fid, &fcall);
+        st = p9_client_stat(fid);
+        if (IS_ERR(st))
+                return PTR_ERR(st);
-        if (err < 0)
+        v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
-                dprintk(DEBUG_ERROR, "stat error\n");
-        else {
-                v9fs_stat2inode(&fcall->params.rstat.stat, dentry->d_inode,
-                                  dentry->d_inode->i_sb);
                generic_fillattr(dentry->d_inode, stat);
-        }
-        kfree(fcall);
+        kfree(st);
-        v9fs_fid_clunk(v9ses, fid);
+        return 0;
-        return err;
 }
 /**
@@ -873,13 +743,15 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+        int retval;
-        struct v9fs_fid *fid = v9fs_fid_clone(dentry);
+        struct v9fs_session_info *v9ses;
-        struct v9fs_fcall *fcall = NULL;
+        struct p9_fid *fid;
-        struct v9fs_wstat wstat;
+        struct p9_wstat wstat;
-        int res = -EPERM;
-        dprintk(DEBUG_VFS, "\n");
+        P9_DPRINTK(P9_DEBUG_VFS, "\n");
+        retval = -EPERM;
+        v9ses = v9fs_inode2v9ses(dentry->d_inode);
+        fid = v9fs_fid_lookup(dentry);
        if(IS_ERR(fid))
                return PTR_ERR(fid);
@@ -904,17 +776,11 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
                        wstat.n_gid = iattr->ia_gid;
        }
-        res = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall);
+        retval = p9_client_wstat(fid, &wstat);
+        if (retval >= 0)
+                retval = inode_setattr(dentry->d_inode, iattr);
-        if (res < 0)
+        return retval;
-                PRINT_FCALL_ERROR("wstat error", fcall);
-        kfree(fcall);
-        if (res >= 0)
-                res = inode_setattr(dentry->d_inode, iattr);
-        v9fs_fid_clunk(v9ses, fid);
-        return res;
 }
 /**
@@ -926,7 +792,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 */
 void
-v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
+v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
        struct super_block *sb)
 {
        int n;
@@ -967,8 +833,9 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
                case 'b':
                        break;
                default:
-                        dprintk(DEBUG_ERROR, "Unknown special type %c (%.*s)\n",
+                        P9_DPRINTK(P9_DEBUG_ERROR,
-                                type, stat->extension.len, stat->extension.str);
+                                "Unknown special type %c (%.*s)\n", type,
+                                stat->extension.len, stat->extension.str);
                };
                inode->i_rdev = MKDEV(major, minor);
        } else
@@ -976,8 +843,8 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
        inode->i_size = stat->length;
-        inode->i_blocks =
+        /* not real number of blocks, but 512 byte ones ... */
-            (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+        inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
 }
 /**
@@ -987,7 +854,7 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
 * BUG: potential for inode number collisions?
 */
-ino_t v9fs_qid2ino(struct v9fs_qid *qid)
+ino_t v9fs_qid2ino(struct p9_qid *qid)
 {
        u64 path = qid->path + 2;
        ino_t i = 0;
@@ -1010,56 +877,46 @@ ino_t v9fs_qid2ino(struct v9fs_qid *qid)
 static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 {
-        int retval = -EPERM;
+        int retval;
-        struct v9fs_fcall *fcall = NULL;
+        struct v9fs_session_info *v9ses;
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+        struct p9_fid *fid;
-        struct v9fs_fid *fid = v9fs_fid_clone(dentry);
+        struct p9_stat *st;
+        P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
+        retval = -EPERM;
+        v9ses = v9fs_inode2v9ses(dentry->d_inode);
+        fid = v9fs_fid_lookup(dentry);
        if(IS_ERR(fid))
                return PTR_ERR(fid);
-        if (!v9ses->extended) {
+        if (!v9ses->extended)
-                retval = -EBADF;
+                return -EBADF;
-                dprintk(DEBUG_ERROR, "not extended\n");
-                goto ClunkFid;
-        }
-        dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name);
-        retval = v9fs_t_stat(v9ses, fid->fid, &fcall);
-        if (retval < 0) {
-                dprintk(DEBUG_ERROR, "stat error\n");
-                goto FreeFcall;
-        }
-        if (!fcall) {
+        st = p9_client_stat(fid);
-                retval = -EIO;
+        if (IS_ERR(st))
-                goto ClunkFid;
+                return PTR_ERR(st);
-        }
-        if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) {
+        if (!(st->mode & P9_DMSYMLINK)) {
                retval = -EINVAL;
-                goto FreeFcall;
+                goto done;
        }
        /* copy extension buffer into buffer */
-        if (fcall->params.rstat.stat.extension.len < buflen)
+        if (st->extension.len < buflen)
-                buflen = fcall->params.rstat.stat.extension.len + 1;
+                buflen = st->extension.len + 1;
-        memmove(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
+        memmove(buffer, st->extension.str, buflen - 1);
        buffer[buflen-1] = 0;
-        dprintk(DEBUG_ERROR, "%s -> %.*s (%s)\n", dentry->d_name.name, fcall->params.rstat.stat.extension.len,
+        P9_DPRINTK(P9_DEBUG_VFS,
-                fcall->params.rstat.stat.extension.str, buffer);
+                "%s -> %.*s (%s)\n", dentry->d_name.name, st->extension.len,
-        retval = buflen;
+                st->extension.str, buffer);
-FreeFcall:
+        retval = buflen;
-        kfree(fcall);
-ClunkFid:
-        v9fs_fid_clunk(v9ses, fid);
+done:
+        kfree(st);
        return retval;
 }
@@ -1084,14 +941,14 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
        if (buflen > PATH_MAX)
                buflen = PATH_MAX;
-        dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+        P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
        retval = v9fs_readlink(dentry, link, buflen);
        if (retval > 0) {
                if ((ret = copy_to_user(buffer, link, retval)) != 0) {
-                        dprintk(DEBUG_ERROR, "problem copying to user: %d\n",
+                        P9_DPRINTK(P9_DEBUG_ERROR,
-                                ret);
+                                        "problem copying to user: %d\n", ret);
                        retval = ret;
                }
        }
@@ -1112,7 +969,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
        int len = 0;
        char *link = __getname();
-        dprintk(DEBUG_VFS, "%s n", dentry->d_name.name);
+        P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name);
        if (!link)
                link = ERR_PTR(-ENOMEM);
@@ -1141,7 +998,7 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
 {
        char *s = nd_get_link(nd);
-        dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s);
+        P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, s);
        if (!IS_ERR(s))
                __putname(s);
 }
@@ -1149,66 +1006,24 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
 static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
        int mode, const char *extension)
 {
-        int err;
+        u32 perm;
-        u32 fid, perm;
        struct v9fs_session_info *v9ses;
-        struct v9fs_fid *dfid, *vfid = NULL;
+        struct p9_fid *fid;
-        struct inode *inode = NULL;
        v9ses = v9fs_inode2v9ses(dir);
        if (!v9ses->extended) {
-                dprintk(DEBUG_ERROR, "not extended\n");
+                P9_DPRINTK(P9_DEBUG_ERROR, "not extended\n");
                return -EPERM;
        }
-        dfid = v9fs_fid_clone(dentry->d_parent);
-        if(IS_ERR(dfid)) {
-                err = PTR_ERR(dfid);
-                goto error;
-        }
        perm = unixmode2p9mode(v9ses, mode);
+        fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm,
+                                                                P9_OREAD);
+        if (IS_ERR(fid))
+                return PTR_ERR(fid);
-        err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
+        p9_client_clunk(fid);
-                perm, V9FS_OREAD, (char *) extension, &fid, NULL, NULL);
-        if (err)
-                goto clunk_dfid;
-        err = v9fs_t_clunk(v9ses, fid);
-        if (err)
-                goto clunk_dfid;
-        vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry);
-        if (IS_ERR(vfid)) {
-                err = PTR_ERR(vfid);
-                vfid = NULL;
-                goto clunk_dfid;
-        }
-        inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb);
-        if (IS_ERR(inode)) {
-                err = PTR_ERR(inode);
-                inode = NULL;
-                goto free_vfid;
-        }
-        if(v9ses->cache)
-                dentry->d_op = &v9fs_cached_dentry_operations;
-        else
-                dentry->d_op = &v9fs_dentry_operations;
-        d_instantiate(dentry, inode);
        return 0;
-free_vfid:
-        v9fs_fid_destroy(vfid);
-clunk_dfid:
-        v9fs_fid_clunk(v9ses, dfid);
-error:
-        return err;
 }
 /**
@@ -1224,8 +1039,8 @@ error:
 static int
 v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 {
-        dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+        P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino,
-                symname);
+                                        dentry->d_name.name, symname);
        return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname);
 }
@@ -1247,11 +1062,11 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
              struct dentry *dentry)
 {
        int retval;
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+        struct p9_fid *oldfid;
-        struct v9fs_fid *oldfid;
        char *name;
-        dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+        P9_DPRINTK(P9_DEBUG_VFS,
+                " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
                old_dentry->d_name.name);
        oldfid = v9fs_fid_clone(old_dentry);
@@ -1265,11 +1080,11 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
        }
        sprintf(name, "%d\n", oldfid->fid);
-        retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name);
+        retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
        __putname(name);
 clunk_fid:
-        v9fs_fid_clunk(v9ses, oldfid);
+        p9_client_clunk(oldfid);
        return retval;
 }
@@ -1288,7 +1103,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
        int retval;
        char *name;
-        dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+        P9_DPRINTK(P9_DEBUG_VFS,
+                " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
                dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
        if (!new_valid_dev(rdev))
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 7bdf8b326841..ba904371218b 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -37,10 +37,10 @@
 #include <linux/mount.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
@@ -107,41 +107,48 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
                       struct vfsmount *mnt)
 {
        struct super_block *sb = NULL;
-        struct v9fs_fcall *fcall = NULL;
        struct inode *inode = NULL;
        struct dentry *root = NULL;
        struct v9fs_session_info *v9ses = NULL;
-        struct v9fs_fid *root_fid = NULL;
+        struct p9_stat *st = NULL;
        int mode = S_IRWXUGO | S_ISVTX;
        uid_t uid = current->fsuid;
        gid_t gid = current->fsgid;
-        int stat_result = 0;
+        struct p9_fid *fid;
-        int newfid = 0;
        int retval = 0;
-        dprintk(DEBUG_VFS, " \n");
+        P9_DPRINTK(P9_DEBUG_VFS, " \n");
        v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
        if (!v9ses)
                return -ENOMEM;
-        if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
+        fid = v9fs_session_init(v9ses, dev_name, data);
-                dprintk(DEBUG_ERROR, "problem initiating session\n");
+        if (IS_ERR(fid)) {
-                retval = newfid;
+                retval = PTR_ERR(fid);
-                goto out_free_session;
+                fid = NULL;
+                kfree(v9ses);
+                v9ses = NULL;
+                goto error;
+        }
+        st = p9_client_stat(fid);
+        if (IS_ERR(st)) {
+                retval = PTR_ERR(st);
+                goto error;
        }
        sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
        if (IS_ERR(sb)) {
                retval = PTR_ERR(sb);
-                goto out_close_session;
+                goto error;
        }
        v9fs_fill_super(sb, v9ses, flags);
        inode = v9fs_get_inode(sb, S_IFDIR | mode);
        if (IS_ERR(inode)) {
                retval = PTR_ERR(inode);
-                goto put_back_sb;
+                goto error;
        }
        inode->i_uid = uid;
@@ -150,54 +157,30 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
        root = d_alloc_root(inode);
        if (!root) {
                retval = -ENOMEM;
-                goto put_back_sb;
+                goto error;
        }
        sb->s_root = root;
+        root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
+        v9fs_stat2inode(st, root->d_inode, sb);
+        v9fs_fid_add(root, fid);
-        stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
+        return simple_set_mnt(mnt, sb);
-        if (stat_result < 0) {
-                dprintk(DEBUG_ERROR, "stat error\n");
-                v9fs_t_clunk(v9ses, newfid);
-        } else {
-                /* Setup the Root Inode */
-                root_fid = v9fs_fid_create(v9ses, newfid);
-                if (root_fid == NULL) {
-                        retval = -ENOMEM;
-                        goto put_back_sb;
-                }
-                retval = v9fs_fid_insert(root_fid, root);
-                if (retval < 0) {
-                        kfree(fcall);
-                        goto put_back_sb;
-                }
-                root_fid->qid = fcall->params.rstat.stat.qid;
-                root->d_inode->i_ino =
-                    v9fs_qid2ino(&fcall->params.rstat.stat.qid);
-                v9fs_stat2inode(&fcall->params.rstat.stat, root->d_inode, sb);
-        }
-        kfree(fcall);
+error:
+        if (fid)
+                p9_client_clunk(fid);
-        if (stat_result < 0) {
+        if (v9ses) {
-                retval = stat_result;
+                v9fs_session_close(v9ses);
-                goto put_back_sb;
+                kfree(v9ses);
        }
-        return simple_set_mnt(mnt, sb);
+        if (sb) {
+                up_write(&sb->s_umount);
-out_close_session:
+                deactivate_super(sb);
-        v9fs_session_close(v9ses);
+        }
-out_free_session:
-        kfree(v9ses);
-        return retval;
-put_back_sb:
-        /* deactivate_super calls v9fs_kill_super which will frees the rest */
-        up_write(&sb->s_umount);
-        deactivate_super(sb);
        return retval;
 }
@@ -211,7 +194,7 @@ static void v9fs_kill_super(struct super_block *s)
 {
        struct v9fs_session_info *v9ses = s->s_fs_info;
-        dprintk(DEBUG_VFS, " %p\n", s);
+        P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
        v9fs_dentry_release(s->s_root); /* clunk root */
@@ -219,7 +202,7 @@ static void v9fs_kill_super(struct super_block *s)
        v9fs_session_close(v9ses);
        kfree(v9ses);
-        dprintk(DEBUG_VFS, "exiting kill_super\n");
+        P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n");
 }
 /**
@@ -234,7 +217,7 @@ static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
        struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info;
        if (v9ses->debug != 0)
-                seq_printf(m, ",debug=%u", v9ses->debug);
+                seq_printf(m, ",debug=%x", v9ses->debug);
        if (v9ses->port != V9FS_PORT)
                seq_printf(m, ",port=%u", v9ses->port);
        if (v9ses->maxdata != 9000)
diff --git a/fs/Kconfig b/fs/Kconfig
index 0fa0c1193e81..94b9d861bf9b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -2048,7 +2048,7 @@ config AFS_DEBUG
 config 9P_FS
        tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
-        depends on INET && EXPERIMENTAL
+        depends on INET && NET_9P && EXPERIMENTAL
        help
          If you say Y here, you will get experimental support for
          Plan 9 resource sharing via the 9P2000 protocol.
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index f544a2855923..36e381c6a99a 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -33,7 +33,7 @@ const struct file_operations adfs_file_operations = {
        .fsync          = file_fsync,
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 const struct inode_operations adfs_file_inode_operations = {
diff --git a/fs/affs/file.c b/fs/affs/file.c
index c8796906f584..c314a35f0918 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -35,7 +35,7 @@ const struct file_operations affs_file_operations = {
        .open           = affs_file_open,
        .release        = affs_file_release,
        .fsync          = file_fsync,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 const struct inode_operations affs_file_inode_operations = {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 9c0e721d9fc2..aede7eb66dd4 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -32,7 +32,7 @@ const struct file_operations afs_file_operations = {
        .aio_read       = generic_file_aio_read,
        .aio_write      = afs_file_write,
        .mmap           = generic_file_readonly_mmap,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
        .fsync          = afs_fsync,
 };
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 2dac3ad2c44b..2c55dd94a1de 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -17,6 +17,8 @@
 #include <linux/rxrpc.h>
 #include <linux/key.h>
 #include <linux/workqueue.h>
+#include <linux/sched.h>
 #include "afs.h"
 #include "afs_vl.h"
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 329ee473eede..521ff7caadbd 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -114,12 +114,6 @@ static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl)
        return -EIO;
 }
-static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos,
-                        size_t count, read_actor_t actor, void *target)
-{
-        return -EIO;
-}
 static ssize_t bad_file_sendpage(struct file *file, struct page *page,
                        int off, size_t len, loff_t *pos, int more)
 {
@@ -182,7 +176,6 @@ static const struct file_operations bad_file_ops =
        .aio_fsync      = bad_file_aio_fsync,
        .fasync         = bad_file_fasync,
        .lock           = bad_file_lock,
-        .sendfile       = bad_file_sendfile,
        .sendpage       = bad_file_sendpage,
        .get_unmapped_area = bad_file_get_unmapped_area,
        .check_flags    = bad_file_check_flags,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index ef4d1fa04e65..24310e9ee05a 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -24,7 +24,7 @@ const struct file_operations bfs_file_operations = {
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fa8ea33ab0be..08e4414b8374 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1499,6 +1499,9 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 #endif
        int thread_status_size = 0;
        elf_addr_t *auxv;
+#ifdef ELF_CORE_WRITE_EXTRA_NOTES
+        int extra_notes_size;
+#endif
        /*
         * We no longer stop all VM operations.
@@ -1628,7 +1631,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
                sz += thread_status_size;
 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
-                sz += ELF_CORE_EXTRA_NOTES_SIZE;
+                extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
+                sz += extra_notes_size;
 #endif
                fill_elf_note_phdr(&phdr, sz, offset);
@@ -1674,6 +1678,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
        ELF_CORE_WRITE_EXTRA_NOTES;
+        foffset += extra_notes_size;
 #endif
        /* write out the thread status notes section */
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 7b0265d7f3a8..861141b4f6d6 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -558,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                        if (!realdatastart)
                                realdatastart = (unsigned long) -ENOMEM;
                        printk("Unable to allocate RAM for process data, errno %d\n",
-                                        (int)-datapos);
+                                        (int)-realdatastart);
                        do_munmap(current->mm, textpos, text_len);
                        ret = realdatastart;
                        goto err;
diff --git a/fs/bio.c b/fs/bio.c
index 093345f00128..33e46340a766 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1223,8 +1223,6 @@ EXPORT_SYMBOL(bio_hw_segments);
 EXPORT_SYMBOL(bio_add_page);
 EXPORT_SYMBOL(bio_add_pc_page);
 EXPORT_SYMBOL(bio_get_nr_vecs);
-EXPORT_SYMBOL(bio_map_user);
-EXPORT_SYMBOL(bio_unmap_user);
 EXPORT_SYMBOL(bio_map_kern);
 EXPORT_SYMBOL(bio_pair_release);
 EXPORT_SYMBOL(bio_split);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ea1480a16f51..b3e9bfa748cf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1346,7 +1346,6 @@ const struct file_operations def_blk_fops = {
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = compat_blkdev_ioctl,
 #endif
-        .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
 };
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 6017c465440e..07838b2ac1ce 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -7,16 +7,16 @@
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or 
+ *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
- * 
+ *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
- *   along with this program;  if not, write to the Free Software 
+ *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
 #include <linux/fs.h>
@@ -39,7 +39,7 @@ cifs_dump_mem(char *label, void *data, int length)
        char *charptr = data;
        char buf[10], line[80];
-        printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n", 
+        printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n",
                label, length, data);
        for (i = 0; i < length; i += 16) {
                line[0] = 0;
@@ -60,10 +60,10 @@ cifs_dump_mem(char *label, void *data, int length)
 #ifdef CONFIG_CIFS_DEBUG2
 void cifs_dump_detail(struct smb_hdr * smb)
 {
-        cERROR(1,("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
+        cERROR(1, ("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
                  smb->Command, smb->Status.CifsError,
                  smb->Flags, smb->Flags2, smb->Mid, smb->Pid));
-        cERROR(1,("smb buf %p len %d", smb, smbCalcSize_LE(smb)));
+        cERROR(1, ("smb buf %p len %d", smb, smbCalcSize_LE(smb)));
 }
@@ -72,36 +72,35 @@ void cifs_dump_mids(struct TCP_Server_Info * server)
        struct list_head *tmp;
        struct mid_q_entry * mid_entry;
-        if(server == NULL)
+        if (server == NULL)
                return;
-        cERROR(1,("Dump pending requests:"));
+        cERROR(1, ("Dump pending requests:"));
        spin_lock(&GlobalMid_Lock);
        list_for_each(tmp, &server->pending_mid_q) {
                mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-                if(mid_entry) {
+                if (mid_entry) {
-                        cERROR(1,("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
+                        cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
                                mid_entry->midState,
                                (int)mid_entry->command,
                                mid_entry->pid,
                                mid_entry->tsk,
                                mid_entry->mid));
 #ifdef CONFIG_CIFS_STATS2
-                        cERROR(1,("IsLarge: %d buf: %p time rcv: %ld now: %ld",
+                        cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
                                mid_entry->largeBuf,
                                mid_entry->resp_buf,
                                mid_entry->when_received,
                                jiffies));
 #endif /* STATS2 */
-                        cERROR(1,("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
+                        cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
                                  mid_entry->multiEnd));
-                        if(mid_entry->resp_buf) {
+                        if (mid_entry->resp_buf) {
                                cifs_dump_detail(mid_entry->resp_buf);
                                cifs_dump_mem("existing buf: ",
                                        mid_entry->resp_buf,
                                        62 /* fixme */);
                        }
-                        
                }
        }
        spin_unlock(&GlobalMid_Lock);
@@ -129,9 +128,10 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
                    "Display Internal CIFS Data Structures for Debugging\n"
                    "---------------------------------------------------\n");
        buf += length;
-        length = sprintf(buf,"CIFS Version %s\n",CIFS_VERSION);
+        length = sprintf(buf, "CIFS Version %s\n", CIFS_VERSION);
        buf += length;
-        length = sprintf(buf,"Active VFS Requests: %d\n", GlobalTotalActiveXid);
+        length = sprintf(buf,
+                "Active VFS Requests: %d\n", GlobalTotalActiveXid);
        buf += length;
        length = sprintf(buf, "Servers:");
        buf += length;
@@ -141,7 +141,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
        list_for_each(tmp, &GlobalSMBSessionList) {
                i++;
                ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList);
-                if((ses->serverDomain == NULL) || (ses->serverOS == NULL) ||
+                if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) ||
                   (ses->serverNOS == NULL)) {
                        buf += sprintf(buf, "\nentry for %s not fully "
                                        "displayed\n\t", ses->serverName);
@@ -149,15 +149,18 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
                } else {
                        length =
                            sprintf(buf,
-                                    "\n%d) Name: %s  Domain: %s Mounts: %d OS: %s  \n\tNOS: %s\tCapability: 0x%x\n\tSMB session status: %d\t",
+                                    "\n%d) Name: %s  Domain: %s Mounts: %d OS:"
+                                    " %s  \n\tNOS: %s\tCapability: 0x%x\n\tSMB"
+                                    " session status: %d\t",
                                i, ses->serverName, ses->serverDomain,
                                atomic_read(&ses->inUse),
                                ses->serverOS, ses->serverNOS,
-                                ses->capabilities,ses->status);
+                                ses->capabilities, ses->status);
                        buf += length;
                }
-                if(ses->server) {
+                if (ses->server) {
-                        buf += sprintf(buf, "TCP status: %d\n\tLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d",
+                        buf += sprintf(buf, "TCP status: %d\n\tLocal Users To "
+                                    "Server: %d SecMode: 0x%x Req On Wire: %d",
                                ses->server->tcpStatus,
                                atomic_read(&ses->server->socketUseCount),
                                ses->server->secMode,
@@ -165,7 +168,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 #ifdef CONFIG_CIFS_STATS2
                        buf += sprintf(buf, " In Send: %d In MaxReq Wait: %d",
-                                atomic_read(&ses->server->inSend), 
+                                atomic_read(&ses->server->inSend),
                                atomic_read(&ses->server->num_waiters));
 #endif
@@ -177,17 +180,19 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
                                mid_entry = list_entry(tmp1, struct
                                        mid_q_entry,
                                        qhead);
-                                if(mid_entry) {
+                                if (mid_entry) {
-                                        length = sprintf(buf,"State: %d com: %d pid: %d tsk: %p mid %d\n",
+                                        length = sprintf(buf,
-                                                mid_entry->midState,
+                                                        "State: %d com: %d pid:"
-                                                (int)mid_entry->command,
+                                                        " %d tsk: %p mid %d\n",
-                                                mid_entry->pid,
+                                                        mid_entry->midState,
-                                                mid_entry->tsk,
+                                                        (int)mid_entry->command,
-                                                mid_entry->mid);
+                                                        mid_entry->pid,
+                                                        mid_entry->tsk,
+                                                        mid_entry->mid);
                                        buf += length;
                                }
                        }
-                        spin_unlock(&GlobalMid_Lock); 
+                        spin_unlock(&GlobalMid_Lock);
                }
        }
@@ -207,7 +212,8 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
                dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
                length =
                    sprintf(buf,
-                            "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x Attributes: 0x%x\nPathComponentMax: %d Status: %d",
+                            "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x "
+                            "Attributes: 0x%x\nPathComponentMax: %d Status: %d",
                            i, tcon->treeName,
                            atomic_read(&tcon->useCount),
                            tcon->nativeFileSystem,
@@ -215,7 +221,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
                            le32_to_cpu(tcon->fsAttrInfo.Attributes),
                            le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
                            tcon->tidStatus);
-                buf += length;        
+                buf += length;
                if (dev_type == FILE_DEVICE_DISK)
                        length = sprintf(buf, " type: DISK ");
                else if (dev_type == FILE_DEVICE_CD_ROM)
@@ -224,7 +230,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
                        length =
                            sprintf(buf, " type: %d ", dev_type);
                buf += length;
-                if(tcon->tidStatus == CifsNeedReconnect) {
+                if (tcon->tidStatus == CifsNeedReconnect) {
                        buf += sprintf(buf, "\tDISCONNECTED ");
                        length += 14;
                }
@@ -238,9 +244,9 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
        /* Now calculate total size of returned data */
        length = buf - original_buf;
-        if(offset + count >= length)
+        if (offset + count >= length)
                *eof = 1;
-        if(length < offset) {
+        if (length < offset) {
                *eof = 1;
                return 0;
        } else {
@@ -256,18 +262,18 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 static int
 cifs_stats_write(struct file *file, const char __user *buffer,
-               unsigned long count, void *data)
+                 unsigned long count, void *data)
 {
-        char c;
+        char c;
-        int rc;
+        int rc;
        struct list_head *tmp;
        struct cifsTconInfo *tcon;
-        rc = get_user(c, buffer);
+        rc = get_user(c, buffer);
-        if (rc)
+        if (rc)
-                return rc;
+                return rc;
-        if (c == '1' || c == 'y' || c == 'Y' || c == '0') {
+        if (c == '1' || c == 'y' || c == 'Y' || c == '0') {
                read_lock(&GlobalSMBSeslock);
 #ifdef CONFIG_CIFS_STATS2
                atomic_set(&totBufAllocCount, 0);
@@ -297,14 +303,14 @@ cifs_stats_write(struct file *file, const char __user *buffer,
                read_unlock(&GlobalSMBSeslock);
        }
-        return count;
+        return count;
 }
 static int
 cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
                  int count, int *eof, void *data)
 {
-        int item_length,i,length;
+        int item_length, i, length;
        struct list_head *tmp;
        struct cifsTconInfo *tcon;
@@ -314,44 +320,44 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
                        "Resources in use\nCIFS Session: %d\n",
                        sesInfoAllocCount.counter);
        buf += length;
-        item_length = 
+        item_length =
-                sprintf(buf,"Share (unique mount targets): %d\n",
+                sprintf(buf, "Share (unique mount targets): %d\n",
                        tconInfoAllocCount.counter);
        length += item_length;
-        buf += item_length;      
+        buf += item_length;
-        item_length = 
+        item_length =
-                sprintf(buf,"SMB Request/Response Buffer: %d Pool size: %d\n",
+                sprintf(buf, "SMB Request/Response Buffer: %d Pool size: %d\n",
                        bufAllocCount.counter,
                        cifs_min_rcv + tcpSesAllocCount.counter);
        length += item_length;
        buf += item_length;
-        item_length = 
+        item_length =
-                sprintf(buf,"SMB Small Req/Resp Buffer: %d Pool size: %d\n",
+                sprintf(buf, "SMB Small Req/Resp Buffer: %d Pool size: %d\n",
-                        smBufAllocCount.counter,cifs_min_small);
+                        smBufAllocCount.counter, cifs_min_small);
        length += item_length;
        buf += item_length;
 #ifdef CONFIG_CIFS_STATS2
-        item_length = sprintf(buf, "Total Large %d Small %d Allocations\n",
+        item_length = sprintf(buf, "Total Large %d Small %d Allocations\n",
                                atomic_read(&totBufAllocCount),
-                                atomic_read(&totSmBufAllocCount));
+                                atomic_read(&totSmBufAllocCount));
        length += item_length;
        buf += item_length;
 #endif /* CONFIG_CIFS_STATS2 */
-        item_length = 
+        item_length =
-                sprintf(buf,"Operations (MIDs): %d\n",
+                sprintf(buf, "Operations (MIDs): %d\n",
                        midCount.counter);
        length += item_length;
        buf += item_length;
        item_length = sprintf(buf,
                "\n%d session %d share reconnects\n",
-                tcpSesReconnectCount.counter,tconInfoReconnectCount.counter);
+                tcpSesReconnectCount.counter, tconInfoReconnectCount.counter);
        length += item_length;
        buf += item_length;
        item_length = sprintf(buf,
                "Total vfs operations: %d maximum at one time: %d\n",
-                GlobalCurrentXid,GlobalMaxActiveXid);
+                GlobalCurrentXid, GlobalMaxActiveXid);
        length += item_length;
        buf += item_length;
@@ -360,10 +366,10 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
        list_for_each(tmp, &GlobalTreeConnectionList) {
                i++;
                tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
-                item_length = sprintf(buf,"\n%d) %s",i, tcon->treeName);
+                item_length = sprintf(buf, "\n%d) %s", i, tcon->treeName);
                buf += item_length;
                length += item_length;
-                if(tcon->tidStatus == CifsNeedReconnect) {
+                if (tcon->tidStatus == CifsNeedReconnect) {
                        buf += sprintf(buf, "\tDISCONNECTED ");
                        length += 14;
                }
@@ -380,15 +386,15 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
                item_length = sprintf(buf, "\nWrites: %d Bytes: %lld",
                        atomic_read(&tcon->num_writes),
                        (long long)(tcon->bytes_written));
-                buf += item_length;
+                buf += item_length;
-                length += item_length;
+                length += item_length;
-                item_length = sprintf(buf, 
+                item_length = sprintf(buf,
                        "\nLocks: %d HardLinks: %d Symlinks: %d",
-                        atomic_read(&tcon->num_locks),
+                        atomic_read(&tcon->num_locks),
                        atomic_read(&tcon->num_hardlinks),
                        atomic_read(&tcon->num_symlinks));
-                buf += item_length;
+                buf += item_length;
-                length += item_length;
+                length += item_length;
                item_length = sprintf(buf, "\nOpens: %d Closes: %d Deletes: %d",
                        atomic_read(&tcon->num_opens),
@@ -415,12 +421,12 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
        }
        read_unlock(&GlobalSMBSeslock);
-        buf += sprintf(buf,"\n");
+        buf += sprintf(buf, "\n");
        length++;
-        if(offset + count >= length)
+        if (offset + count >= length)
                *eof = 1;
-        if(length < offset) {
+        if (length < offset) {
                *eof = 1;
                return 0;
        } else {
@@ -428,7 +434,7 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
        }
        if (length > count)
                length = count;
-                
        return length;
 }
 #endif
@@ -547,11 +553,11 @@ cifs_proc_clean(void)
        remove_proc_entry("MultiuserMount", proc_fs_cifs);
        remove_proc_entry("OplockEnabled", proc_fs_cifs);
 /*      remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */
-        remove_proc_entry("SecurityFlags",proc_fs_cifs);
+        remove_proc_entry("SecurityFlags", proc_fs_cifs);
-/*      remove_proc_entry("PacketSigningEnabled",proc_fs_cifs); */
+/*      remove_proc_entry("PacketSigningEnabled", proc_fs_cifs); */
-        remove_proc_entry("LinuxExtensionsEnabled",proc_fs_cifs);
+        remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
-        remove_proc_entry("Experimental",proc_fs_cifs);
+        remove_proc_entry("Experimental", proc_fs_cifs);
-        remove_proc_entry("LookupCacheEnabled",proc_fs_cifs);
+        remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
        remove_proc_entry("cifs", proc_root_fs);
 }
@@ -590,7 +596,7 @@ cifsFYI_write(struct file *file, const char __user *buffer,
                cifsFYI = 0;
        else if (c == '1' || c == 'y' || c == 'Y')
                cifsFYI = 1;
-        else if((c > '1') && (c <= '9'))
+        else if ((c > '1') && (c <= '9'))
                cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */
        return count;
@@ -637,28 +643,28 @@ oplockEnabled_write(struct file *file, const char __user *buffer,
 static int
 experimEnabled_read(char *page, char **start, off_t off,
-                   int count, int *eof, void *data)
+                    int count, int *eof, void *data)
 {
-        int len;
+        int len;
-        len = sprintf(page, "%d\n", experimEnabled);
+        len = sprintf(page, "%d\n", experimEnabled);
-        len -= off;
+        len -= off;
-        *start = page + off;
+        *start = page + off;
-        if (len > count)
+        if (len > count)
-                len = count;
+                len = count;
-        else
+        else
-                *eof = 1;
+                *eof = 1;
-        if (len < 0)
+        if (len < 0)
-                len = 0;
+                len = 0;
-        return len;
+        return len;
 }
 static int
 experimEnabled_write(struct file *file, const char __user *buffer,
-                    unsigned long count, void *data)
+                     unsigned long count, void *data)
 {
        char c;
        int rc;
@@ -678,46 +684,46 @@ experimEnabled_write(struct file *file, const char __user *buffer,
 static int
 linuxExtensionsEnabled_read(char *page, char **start, off_t off,
-                   int count, int *eof, void *data)
+                            int count, int *eof, void *data)
 {
-        int len;
+        int len;
-        len = sprintf(page, "%d\n", linuxExtEnabled);
+        len = sprintf(page, "%d\n", linuxExtEnabled);
-        len -= off;
+        len -= off;
-        *start = page + off;
+        *start = page + off;
-        if (len > count)
+        if (len > count)
-                len = count;
+                len = count;
-        else
+        else
-                *eof = 1;
+                *eof = 1;
-        if (len < 0)
+        if (len < 0)
-                len = 0;
+                len = 0;
-        return len;
+        return len;
 }
 static int
 linuxExtensionsEnabled_write(struct file *file, const char __user *buffer,
-                    unsigned long count, void *data)
+                             unsigned long count, void *data)
 {
-        char c;
+        char c;
-        int rc;
+        int rc;
-        rc = get_user(c, buffer);
+        rc = get_user(c, buffer);
-        if (rc)
+        if (rc)
-                return rc;
+                return rc;
-        if (c == '0' || c == 'n' || c == 'N')
+        if (c == '0' || c == 'n' || c == 'N')
-                linuxExtEnabled = 0;
+                linuxExtEnabled = 0;
-        else if (c == '1' || c == 'y' || c == 'Y')
+        else if (c == '1' || c == 'y' || c == 'Y')
-                linuxExtEnabled = 1;
+                linuxExtEnabled = 1;
-        return count;
+        return count;
 }
 static int
 lookupFlag_read(char *page, char **start, off_t off,
-                   int count, int *eof, void *data)
+                int count, int *eof, void *data)
 {
        int len;
@@ -860,15 +866,15 @@ security_flags_write(struct file *file, const char __user *buffer,
        char flags_string[12];
        char c;
-        if((count < 1) || (count > 11))
+        if ((count < 1) || (count > 11))
                return -EINVAL;
        memset(flags_string, 0, 12);
-        if(copy_from_user(flags_string, buffer, count))
+        if (copy_from_user(flags_string, buffer, count))
                return -EFAULT;
-        if(count < 3) {
+        if (count < 3) {
                /* single char or single char followed by null */
                c = flags_string[0];
                if (c == '0' || c == 'n' || c == 'N')
@@ -881,15 +887,15 @@ security_flags_write(struct file *file, const char __user *buffer,
        flags = simple_strtoul(flags_string, NULL, 0);
-        cFYI(1,("sec flags 0x%x", flags));
+        cFYI(1, ("sec flags 0x%x", flags));
-        if(flags <= 0)  {
+        if (flags <= 0)  {
-                cERROR(1,("invalid security flags %s",flags_string));
+                cERROR(1, ("invalid security flags %s", flags_string));
                return -EINVAL;
        }
-        if(flags & ~CIFSSEC_MASK) {
+        if (flags & ~CIFSSEC_MASK) {
-                cERROR(1,("attempt to set unsupported security flags 0x%x",
+                cERROR(1, ("attempt to set unsupported security flags 0x%x",
                        flags & ~CIFSSEC_MASK));
                return -EINVAL;
        }
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 793c4b95c164..701e9a9185f2 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -6,16 +6,16 @@
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or 
+ *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
- * 
+ *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
- *   along with this program;  if not, write to the Free Software 
+ *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
 #include <linux/fs.h>
@@ -32,7 +32,7 @@
 *
 */
 int
-cifs_strfromUCS_le(char *to, const __le16 * from,       
+cifs_strfromUCS_le(char *to, const __le16 * from,
                   int len, const struct nls_table *codepage)
 {
        int i;
@@ -66,7 +66,7 @@ cifs_strtoUCS(__le16 * to, const char *from, int len,
 {
        int charlen;
        int i;
-        wchar_t * wchar_to = (wchar_t *)to; /* needed to quiet sparse */  
+        wchar_t * wchar_to = (wchar_t *)to; /* needed to quiet sparse */
        for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
@@ -79,7 +79,7 @@ cifs_strtoUCS(__le16 * to, const char *from, int len,
                        /* A question mark */
                        to[i] = cpu_to_le16(0x003f);
                        charlen = 1;
-                } else 
+                } else
                        to[i] = cpu_to_le16(wchar_to[i]);
        }
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d38c69b591cf..8b0cbf4a4ad0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -616,7 +616,7 @@ const struct file_operations cifs_file_ops = {
        .fsync = cifs_fsync,
        .flush = cifs_flush,
        .mmap  = cifs_file_mmap,
-        .sendfile = generic_file_sendfile,
+        .splice_read = generic_file_splice_read,
        .llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
@@ -637,7 +637,7 @@ const struct file_operations cifs_file_direct_ops = {
        .lock = cifs_lock,
        .fsync = cifs_fsync,
        .flush = cifs_flush,
-        .sendfile = generic_file_sendfile, /* BB removeme BB */
+        .splice_read = generic_file_splice_read,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
 #endif /* CONFIG_CIFS_POSIX */
@@ -656,7 +656,7 @@ const struct file_operations cifs_file_nobrl_ops = {
        .fsync = cifs_fsync,
        .flush = cifs_flush,
        .mmap  = cifs_file_mmap,
-        .sendfile = generic_file_sendfile,
+        .splice_read = generic_file_splice_read,
        .llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
@@ -676,7 +676,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
        .release = cifs_close,
        .fsync = cifs_fsync,
        .flush = cifs_flush,
-        .sendfile = generic_file_sendfile, /* BB removeme BB */
+        .splice_read = generic_file_splice_read,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
 #endif /* CONFIG_CIFS_POSIX */
@@ -825,8 +825,8 @@ cifs_init_mids(void)
                                sizeof (struct oplock_q_entry), 0,
                                SLAB_HWCACHE_ALIGN, NULL, NULL);
        if (cifs_oplock_cachep == NULL) {
-                kmem_cache_destroy(cifs_mid_cachep);
                mempool_destroy(cifs_mid_poolp);
+                kmem_cache_destroy(cifs_mid_cachep);
                return -ENOMEM;
        }
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 14de58fa1437..57419a176688 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -433,8 +433,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
        cFYI(1,("secFlags 0x%x",secFlags));
        pSMB->hdr.Mid = GetNextMid(server);
-        pSMB->hdr.Flags2 |= SMBFLG2_UNICODE;
+        pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS);
-        if((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
+        if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
                pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
        
        count = 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 216fb625843f..f4e92661b223 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2069,8 +2069,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                        srvTcp->tcpStatus = CifsExiting;
                        spin_unlock(&GlobalMid_Lock);
                        if (srvTcp->tsk) {
+                                struct task_struct *tsk;
+                                /* If we could verify that kthread_stop would
+                                   always wake up processes blocked in
+                                   tcp in recv_mesg then we could remove the
+                                   send_sig call */
                                send_sig(SIGKILL,srvTcp->tsk,1);
-                                kthread_stop(srvTcp->tsk);
+                                tsk = srvTcp->tsk;
+                                if(tsk)
+                                        kthread_stop(tsk);
                        }
                }
                 /* If find_unc succeeded then rc == 0 so we can not end */
@@ -2085,8 +2092,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                                        /* if the socketUseCount is now zero */
                                        if ((temp_rc == -ESHUTDOWN) &&
                                           (pSesInfo->server) && (pSesInfo->server->tsk)) {
+                                                struct task_struct *tsk;
                                                send_sig(SIGKILL,pSesInfo->server->tsk,1);
-                                                kthread_stop(pSesInfo->server->tsk);
+                                                tsk = pSesInfo->server->tsk;
+                                                if (tsk)
+                                                        kthread_stop(tsk);
                                        }
                                } else
                                        cFYI(1, ("No session or bad tcon"));
@@ -3334,7 +3344,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
                                return 0;
                        } else if (rc == -ESHUTDOWN) {
                                cFYI(1,("Waking up socket by sending it signal"));
-                                if(cifsd_task) {
+                                if (cifsd_task) {
                                        send_sig(SIGKILL,cifsd_task,1);
                                        kthread_stop(cifsd_task);
                                }
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index e5210519ac4b..8e86aaceb68a 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -2,7 +2,7 @@
 *   fs/cifs/dir.c
 *
 *   vfs operations that deal with dentries
- * 
+ *
 *   Copyright (C) International Business Machines  Corp., 2002,2005
 *   Author(s): Steve French (sfrench@us.ibm.com)
 *
@@ -34,11 +34,12 @@
 static void
 renew_parental_timestamps(struct dentry *direntry)
 {
-        /* BB check if there is a way to get the kernel to do this or if we really need this */
+        /* BB check if there is a way to get the kernel to do this or if we
+           really need this */
        do {
                direntry->d_time = jiffies;
                direntry = direntry->d_parent;
-        } while (!IS_ROOT(direntry));   
+        } while (!IS_ROOT(direntry));
 }
 /* Note: caller must free return buffer */
@@ -51,7 +52,7 @@ build_path_from_dentry(struct dentry *direntry)
        char *full_path;
        char dirsep;
-        if(direntry == NULL)
+        if (direntry == NULL)
                return NULL;  /* not much we can do if dentry is freed and
                we need to reopen the file after it was closed implicitly
                when the server crashed */
@@ -59,18 +60,18 @@ build_path_from_dentry(struct dentry *direntry)
        dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb));
        pplen = CIFS_SB(direntry->d_sb)->prepathlen;
 cifs_bp_rename_retry:
-        namelen = pplen; 
+        namelen = pplen;
        for (temp = direntry; !IS_ROOT(temp);) {
                namelen += (1 + temp->d_name.len);
                temp = temp->d_parent;
-                if(temp == NULL) {
+                if (temp == NULL) {
-                        cERROR(1,("corrupt dentry"));
+                        cERROR(1, ("corrupt dentry"));
                        return NULL;
                }
        }
        full_path = kmalloc(namelen+1, GFP_KERNEL);
-        if(full_path == NULL)
+        if (full_path == NULL)
                return full_path;
        full_path[namelen] = 0; /* trailing null */
        for (temp = direntry; !IS_ROOT(temp);) {
@@ -84,8 +85,8 @@ cifs_bp_rename_retry:
                        cFYI(0, ("name: %s", full_path + namelen));
                }
                temp = temp->d_parent;
-                if(temp == NULL) {
+                if (temp == NULL) {
-                        cERROR(1,("corrupt dentry"));
+                        cERROR(1, ("corrupt dentry"));
                        kfree(full_path);
                        return NULL;
                }
@@ -94,7 +95,7 @@ cifs_bp_rename_retry:
                cERROR(1,
                       ("did not end path lookup where expected namelen is %d",
                        namelen));
-                /* presumably this is only possible if racing with a rename 
+                /* presumably this is only possible if racing with a rename
                of one of the parent directories  (we can not lock the dentries
                above us to prevent this, but retrying should be harmless) */
                kfree(full_path);
@@ -106,7 +107,7 @@ cifs_bp_rename_retry:
           since the '\' is a valid posix character so we can not switch
           those safely to '/' if any are found in the middle of the prepath */
        /* BB test paths to Windows with '/' in the midst of prepath */
-        strncpy(full_path,CIFS_SB(direntry->d_sb)->prepath,pplen);
+        strncpy(full_path, CIFS_SB(direntry->d_sb)->prepath, pplen);
        return full_path;
 }
@@ -147,12 +148,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
        pTcon = cifs_sb->tcon;
        full_path = build_path_from_dentry(direntry);
-        if(full_path == NULL) {
+        if (full_path == NULL) {
                FreeXid(xid);
                return -ENOMEM;
        }
-        if(nd && (nd->flags & LOOKUP_OPEN)) {
+        if (nd && (nd->flags & LOOKUP_OPEN)) {
                int oflags = nd->intent.open.flags;
                desiredAccess = 0;
@@ -164,28 +165,29 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                                write_only = TRUE;
                }
-                if((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+                if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
                        disposition = FILE_CREATE;
-                else if((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
+                else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
                        disposition = FILE_OVERWRITE_IF;
-                else if((oflags & O_CREAT) == O_CREAT)
+                else if ((oflags & O_CREAT) == O_CREAT)
                        disposition = FILE_OPEN_IF;
                else {
-                        cFYI(1,("Create flag not set in create function"));
+                        cFYI(1, ("Create flag not set in create function"));
                }
        }
-        /* BB add processing to set equivalent of mode - e.g. via CreateX with ACLs */
+        /* BB add processing to set equivalent of mode - e.g. via CreateX with
+           ACLs */
        if (oplockEnabled)
                oplock = REQ_OPLOCK;
-        buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL);
+        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
-        if(buf == NULL) {
+        if (buf == NULL) {
                kfree(full_path);
                FreeXid(xid);
                return -ENOMEM;
        }
-        if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) 
+        if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
                rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
                         desiredAccess, CREATE_NOT_DIR,
                         &fileHandle, &oplock, buf, cifs_sb->local_nls,
@@ -193,27 +195,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
        else
                rc = -EIO; /* no NT SMB support fall into legacy open below */
-        if(rc == -EIO) {
+        if (rc == -EIO) {
                /* old server, retry the open legacy style */
                rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
                        desiredAccess, CREATE_NOT_DIR,
                        &fileHandle, &oplock, buf, cifs_sb->local_nls,
                        cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
-        } 
+        }
        if (rc) {
                cFYI(1, ("cifs_create returned 0x%x", rc));
        } else {
                /* If Open reported that we actually created a file
                then we now have to set the mode if possible */
                if ((cifs_sb->tcon->ses->capabilities & CAP_UNIX) &&
-                        (oplock & CIFS_CREATE_ACTION))
+                        (oplock & CIFS_CREATE_ACTION)) {
-                        if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+                        mode &= ~current->fs->umask;
+                        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
                                CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
                                        (__u64)current->fsuid,
                                        (__u64)current->fsgid,
                                        0 /* dev */,
-                                        cifs_sb->local_nls, 
+                                        cifs_sb->local_nls,
-                                        cifs_sb->mnt_cifs_flags & 
+                                        cifs_sb->mnt_cifs_flags &
                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
                        } else {
                                CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
@@ -221,26 +224,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                                        (__u64)-1,
                                        0 /* dev */,
                                        cifs_sb->local_nls,
-                                        cifs_sb->mnt_cifs_flags & 
+                                        cifs_sb->mnt_cifs_flags &
                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
                        }
-                else {
+                } else {
-                        /* BB implement mode setting via Windows security descriptors */
+                        /* BB implement mode setting via Windows security
-                        /* eg CIFSSMBWinSetPerms(xid,pTcon,full_path,mode,-1,-1,local_nls);*/
+                           descriptors e.g. */
-                        /* could set r/o dos attribute if mode & 0222 == 0 */
+                        /* CIFSSMBWinSetPerms(xid,pTcon,path,mode,-1,-1,nls);*/
+                        /* Could set r/o dos attribute if mode & 0222 == 0 */
                }
        /* BB server might mask mode so we have to query for Unix case*/
                if (pTcon->ses->capabilities & CAP_UNIX)
                        rc = cifs_get_inode_info_unix(&newinode, full_path,
-                                                 inode->i_sb,xid);
+                                                 inode->i_sb, xid);
                else {
                        rc = cifs_get_inode_info(&newinode, full_path,
-                                                 buf, inode->i_sb,xid);
+                                                 buf, inode->i_sb, xid);
-                        if(newinode) {
+                        if (newinode) {
                                newinode->i_mode = mode;
-                                if((oplock & CIFS_CREATE_ACTION) &&
+                                if ((oplock & CIFS_CREATE_ACTION) &&
-                                  (cifs_sb->mnt_cifs_flags & 
+                                    (cifs_sb->mnt_cifs_flags &
                                     CIFS_MOUNT_SET_UID)) {
                                        newinode->i_uid = current->fsuid;
                                        newinode->i_gid = current->fsgid;
@@ -259,14 +264,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                                direntry->d_op = &cifs_dentry_ops;
                        d_instantiate(direntry, newinode);
                }
-                if((nd->flags & LOOKUP_OPEN) == FALSE) {
+                if ((nd->flags & LOOKUP_OPEN) == FALSE) {
                        /* mknod case - do not leave file open */
                        CIFSSMBClose(xid, pTcon, fileHandle);
-                } else if(newinode) {
+                } else if (newinode) {
                        pCifsFile =
                           kzalloc(sizeof (struct cifsFileInfo), GFP_KERNEL);
-                        
-                        if(pCifsFile == NULL)
+                        if (pCifsFile == NULL)
                                goto cifs_create_out;
                        pCifsFile->netfid = fileHandle;
                        pCifsFile->pid = current->tgid;
@@ -276,33 +281,33 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                        init_MUTEX(&pCifsFile->fh_sem);
                        mutex_init(&pCifsFile->lock_mutex);
                        INIT_LIST_HEAD(&pCifsFile->llist);
-                        atomic_set(&pCifsFile->wrtPending,0);
+                        atomic_set(&pCifsFile->wrtPending, 0);
-                        /* set the following in open now 
+                        /* set the following in open now
                                pCifsFile->pfile = file; */
                        write_lock(&GlobalSMBSeslock);
-                        list_add(&pCifsFile->tlist,&pTcon->openFileList);
+                        list_add(&pCifsFile->tlist, &pTcon->openFileList);
                        pCifsInode = CIFS_I(newinode);
-                        if(pCifsInode) {
+                        if (pCifsInode) {
                                /* if readable file instance put first in list*/
                                if (write_only == TRUE) {
-                                        list_add_tail(&pCifsFile->flist,
+                                        list_add_tail(&pCifsFile->flist,
                                                &pCifsInode->openFileList);
                                } else {
                                        list_add(&pCifsFile->flist,
                                                &pCifsInode->openFileList);
                                }
-                                if((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
+                                if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
                                        pCifsInode->clientCanCacheAll = TRUE;
                                        pCifsInode->clientCanCacheRead = TRUE;
-                                        cFYI(1,("Exclusive Oplock for inode %p",
+                                        cFYI(1, ("Exclusive Oplock inode %p",
                                                newinode));
-                                } else if((oplock & 0xF) == OPLOCK_READ)
+                                } else if ((oplock & 0xF) == OPLOCK_READ)
                                        pCifsInode->clientCanCacheRead = TRUE;
                        }
                        write_unlock(&GlobalSMBSeslock);
                }
-        } 
+        }
 cifs_create_out:
        kfree(buf);
        kfree(full_path);
@@ -310,8 +315,8 @@ cifs_create_out:
        return rc;
 }
-int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, 
+int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
-                dev_t device_number) 
+                dev_t device_number)
 {
        int rc = -EPERM;
        int xid;
@@ -329,43 +334,45 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
        pTcon = cifs_sb->tcon;
        full_path = build_path_from_dentry(direntry);
-        if(full_path == NULL)
+        if (full_path == NULL)
                rc = -ENOMEM;
        else if (pTcon->ses->capabilities & CAP_UNIX) {
-                if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+                mode &= ~current->fs->umask;
+                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
                        rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path,
-                                mode,(__u64)current->fsuid,(__u64)current->fsgid,
+                                mode, (__u64)current->fsuid,
+                                (__u64)current->fsgid,
                                device_number, cifs_sb->local_nls,
-                                cifs_sb->mnt_cifs_flags & 
+                                cifs_sb->mnt_cifs_flags &
                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
                } else {
                        rc = CIFSSMBUnixSetPerms(xid, pTcon,
                                full_path, mode, (__u64)-1, (__u64)-1,
                                device_number, cifs_sb->local_nls,
-                                cifs_sb->mnt_cifs_flags & 
+                                cifs_sb->mnt_cifs_flags &
                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
                }
-                if(!rc) {
+                if (!rc) {
                        rc = cifs_get_inode_info_unix(&newinode, full_path,
-                                                inode->i_sb,xid);
+                                                inode->i_sb, xid);
                        if (pTcon->nocase)
                                direntry->d_op = &cifs_ci_dentry_ops;
                        else
                                direntry->d_op = &cifs_dentry_ops;
-                        if(rc == 0)
+                        if (rc == 0)
                                d_instantiate(direntry, newinode);
                }
        } else {
-                if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
+                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
                        int oplock = 0;
                        u16 fileHandle;
                        FILE_ALL_INFO * buf;
-                        cFYI(1,("sfu compat create special file"));
+                        cFYI(1, ("sfu compat create special file"));
-                        buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL);
+                        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
-                        if(buf == NULL) {
+                        if (buf == NULL) {
                                kfree(full_path);
                                FreeXid(xid);
                                return -ENOMEM;
@@ -373,39 +380,38 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
                        rc = CIFSSMBOpen(xid, pTcon, full_path,
                                         FILE_CREATE, /* fail if exists */
-                                         GENERIC_WRITE /* BB would 
+                                         GENERIC_WRITE /* BB would
                                          WRITE_OWNER | WRITE_DAC be better? */,
                                         /* Create a file and set the
                                            file attribute to SYSTEM */
                                         CREATE_NOT_DIR | CREATE_OPTION_SPECIAL,
                                         &fileHandle, &oplock, buf,
                                         cifs_sb->local_nls,
-                                         cifs_sb->mnt_cifs_flags & 
+                                         cifs_sb->mnt_cifs_flags &
                                            CIFS_MOUNT_MAP_SPECIAL_CHR);
                        /* BB FIXME - add handling for backlevel servers
                           which need legacy open and check for all
-                           calls to SMBOpen for fallback to 
+                           calls to SMBOpen for fallback to SMBLeagcyOpen */
-                           SMBLeagcyOpen */
+                        if (!rc) {
-                        if(!rc) {
                                /* BB Do not bother to decode buf since no
                                   local inode yet to put timestamps in,
                                   but we can reuse it safely */
                                int bytes_written;
                                struct win_dev *pdev;
                                pdev = (struct win_dev *)buf;
-                                if(S_ISCHR(mode)) {
+                                if (S_ISCHR(mode)) {
                                        memcpy(pdev->type, "IntxCHR", 8);
                                        pdev->major =
                                              cpu_to_le64(MAJOR(device_number));
-                                        pdev->minor = 
+                                        pdev->minor =
                                              cpu_to_le64(MINOR(device_number));
                                        rc = CIFSSMBWrite(xid, pTcon,
                                                fileHandle,
                                                sizeof(struct win_dev),
                                                0, &bytes_written, (char *)pdev,
                                                NULL, 0);
-                                } else if(S_ISBLK(mode)) {
+                                } else if (S_ISBLK(mode)) {
                                        memcpy(pdev->type, "IntxBLK", 8);
                                        pdev->major =
                                              cpu_to_le64(MAJOR(device_number));
@@ -432,7 +438,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 struct dentry *
-cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct nameidata *nd)
+cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
+            struct nameidata *nd)
 {
        int xid;
        int rc = 0; /* to get around spurious gcc warning, set to zero here */
@@ -447,8 +454,6 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
             (" parent inode = 0x%p name is: %s and dentry = 0x%p",
              parent_dir_inode, direntry->d_name.name, direntry));
-        /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */
        /* check whether path exists */
        cifs_sb = CIFS_SB(parent_dir_inode->i_sb);
@@ -472,7 +477,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
        deadlock in the cases (beginning of sys_rename itself)
        in which we already have the sb rename sem */
        full_path = build_path_from_dentry(direntry);
-        if(full_path == NULL) {
+        if (full_path == NULL) {
                FreeXid(xid);
                return ERR_PTR(-ENOMEM);
        }
@@ -487,10 +492,10 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
        if (pTcon->ses->capabilities & CAP_UNIX)
                rc = cifs_get_inode_info_unix(&newInode, full_path,
-                                              parent_dir_inode->i_sb,xid);
+                                              parent_dir_inode->i_sb, xid);
        else
                rc = cifs_get_inode_info(&newInode, full_path, NULL,
-                                         parent_dir_inode->i_sb,xid);
+                                         parent_dir_inode->i_sb, xid);
        if ((rc == 0) && (newInode != NULL)) {
                if (pTcon->nocase)
@@ -499,7 +504,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
                        direntry->d_op = &cifs_dentry_ops;
                d_add(direntry, newInode);
-                /* since paths are not looked up by component - the parent 
+                /* since paths are not looked up by component - the parent
                   directories are presumed to be good here */
                renew_parental_timestamps(direntry);
@@ -511,13 +516,13 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
                else
                        direntry->d_op = &cifs_dentry_ops;
                d_add(direntry, NULL);
-        /*      if it was once a directory (but how can we tell?) we could do  
+        /*      if it was once a directory (but how can we tell?) we could do
-                        shrink_dcache_parent(direntry); */
+                shrink_dcache_parent(direntry); */
        } else {
-                cERROR(1,("Error 0x%x on cifs_get_inode_info in lookup of %s",
+                cERROR(1, ("Error 0x%x on cifs_get_inode_info in lookup of %s",
-                           rc,full_path));
+                           rc, full_path));
-                /* BB special case check for Access Denied - watch security 
+                /* BB special case check for Access Denied - watch security
-                exposure of returning dir info implicitly via different rc 
+                exposure of returning dir info implicitly via different rc
                if file exists or not but no access BB */
        }
@@ -538,11 +543,11 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
        } else {
                cFYI(1, ("neg dentry 0x%p name = %s",
                         direntry, direntry->d_name.name));
-                if(time_after(jiffies, direntry->d_time + HZ) || 
+                if (time_after(jiffies, direntry->d_time + HZ) ||
                        !lookupCacheEnabled) {
                        d_drop(direntry);
                        isValid = 0;
-                } 
+                }
        }
        return isValid;
@@ -559,8 +564,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
 struct dentry_operations cifs_dentry_ops = {
        .d_revalidate = cifs_d_revalidate,
-/* d_delete:       cifs_d_delete,       *//* not needed except for debugging */
+/* d_delete:       cifs_d_delete,      */ /* not needed except for debugging */
-        /* no need for d_hash, d_compare, d_release, d_iput ... yet. BB confirm this BB */
 };
 static int cifs_ci_hash(struct dentry *dentry, struct qstr *q)
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index da12b482ebe5..8e375bb4b379 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -2,7 +2,7 @@
 *   fs/cifs/fcntl.c
 *
 *   vfs operations that deal with the file control API
- * 
+ *
 *   Copyright (C) International Business Machines  Corp., 2003,2004
 *   Author(s): Steve French (sfrench@us.ibm.com)
 *
@@ -35,35 +35,34 @@ static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags)
        /* No way on Linux VFS to ask to monitor xattr
        changes (and no stream support either */
-        if(fcntl_notify_flags & DN_ACCESS) {
+        if (fcntl_notify_flags & DN_ACCESS) {
                cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
        }
-        if(fcntl_notify_flags & DN_MODIFY) {
+        if (fcntl_notify_flags & DN_MODIFY) {
                /* What does this mean on directories? */
                cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE |
                        FILE_NOTIFY_CHANGE_SIZE;
        }
-        if(fcntl_notify_flags & DN_CREATE) {
+        if (fcntl_notify_flags & DN_CREATE) {
-                cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION | 
+                cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION |
                        FILE_NOTIFY_CHANGE_LAST_WRITE;
        }
-        if(fcntl_notify_flags & DN_DELETE) {
+        if (fcntl_notify_flags & DN_DELETE) {
                cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE;
        }
-        if(fcntl_notify_flags & DN_RENAME) {
+        if (fcntl_notify_flags & DN_RENAME) {
                /* BB review this - checking various server behaviors */
-                cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME | 
+                cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME |
                        FILE_NOTIFY_CHANGE_FILE_NAME;
        }
-        if(fcntl_notify_flags & DN_ATTRIB) {
+        if (fcntl_notify_flags & DN_ATTRIB) {
-                cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY | 
+                cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY |
                        FILE_NOTIFY_CHANGE_ATTRIBUTES;
        }
-/*      if(fcntl_notify_flags & DN_MULTISHOT) {
+/*      if (fcntl_notify_flags & DN_MULTISHOT) {
                cifs_ntfy_flags |= ;
        } */ /* BB fixme - not sure how to handle this with CIFS yet */
        return cifs_ntfy_flags;
 }
@@ -78,8 +77,7 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
        __u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES;
        __u16 netfid;
+        if (experimEnabled == 0)
-        if(experimEnabled == 0)
                return 0;
        xid = GetXid();
@@ -88,21 +86,21 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
        full_path = build_path_from_dentry(file->f_path.dentry);
-        if(full_path == NULL) {
+        if (full_path == NULL) {
                rc = -ENOMEM;
        } else {
-                cFYI(1,("dir notify on file %s Arg 0x%lx",full_path,arg));
+                cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg));
-                rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, 
+                rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
                        GENERIC_READ | SYNCHRONIZE, 0 /* create options */,
-                        &netfid, &oplock,NULL, cifs_sb->local_nls,
+                        &netfid, &oplock, NULL, cifs_sb->local_nls,
                        cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
                /* BB fixme - add this handle to a notify handle list */
-                if(rc) {
+                if (rc) {
-                        cFYI(1,("Could not open directory for notify"));
+                        cFYI(1, ("Could not open directory for notify"));
                } else {
                        filter = convert_to_cifs_notify_flags(arg);
-                        if(filter != 0) {
+                        if (filter != 0) {
-                                rc = CIFSSMBNotify(xid, pTcon, 
+                                rc = CIFSSMBNotify(xid, pTcon,
                                        0 /* no subdirs */, netfid,
                                        filter, file, arg & DN_MULTISHOT,
                                        cifs_sb->local_nls);
@@ -113,10 +111,10 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
                        it would close automatically but may be a way
                        to do it easily when inode freed or when
                        notify info is cleared/changed */
-                        cFYI(1,("notify rc %d",rc));
+                        cFYI(1, ("notify rc %d", rc));
                }
        }
-        
        FreeXid(xid);
        return rc;
 }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 3e87dad3367c..f0ff12b3f398 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -986,7 +986,8 @@ mkdir_get_info:
                  * failed to get it from the server or was set bogus */ 
                if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
                                direntry->d_inode->i_nlink = 2; 
-                if (cifs_sb->tcon->ses->capabilities & CAP_UNIX)
+                if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) {
+                        mode &= ~current->fs->umask;
                        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
                                CIFSSMBUnixSetPerms(xid, pTcon, full_path,
                                                    mode,
@@ -1004,7 +1005,7 @@ mkdir_get_info:
                                                    cifs_sb->mnt_cifs_flags & 
                                                    CIFS_MOUNT_MAP_SPECIAL_CHR);
                        }
-                else {
+                } else {
                        /* BB to be implemented via Windows secrty descriptors
                           eg CIFSSMBWinSetPerms(xid, pTcon, full_path, mode,
                                                 -1, -1, local_nls); */
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index e34c7db00f6f..a414f1775ae0 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -30,7 +30,7 @@
 #define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2)
-int cifs_ioctl (struct inode * inode, struct file * filep, 
+int cifs_ioctl (struct inode * inode, struct file * filep,
                unsigned int command, unsigned long arg)
 {
        int rc = -ENOTTY; /* strange error - but the precedent */
@@ -47,13 +47,13 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
        xid = GetXid();
-        cFYI(1,("ioctl file %p  cmd %u  arg %lu",filep,command,arg));
+        cFYI(1, ("ioctl file %p  cmd %u  arg %lu", filep, command, arg));
        cifs_sb = CIFS_SB(inode->i_sb);
 #ifdef CONFIG_CIFS_POSIX
        tcon = cifs_sb->tcon;
-        if(tcon)
+        if (tcon)
                caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
        else {
                rc = -EIO;
@@ -62,24 +62,24 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
        }
 #endif /* CONFIG_CIFS_POSIX */
-        switch(command) {
+        switch (command) {
                case CIFS_IOC_CHECKUMOUNT:
-                        cFYI(1,("User unmount attempted"));
+                        cFYI(1, ("User unmount attempted"));
-                        if(cifs_sb->mnt_uid == current->uid)
+                        if (cifs_sb->mnt_uid == current->uid)
                                rc = 0;
                        else {
                                rc = -EACCES;
-                                cFYI(1,("uids do not match"));
+                                cFYI(1, ("uids do not match"));
                        }
                        break;
 #ifdef CONFIG_CIFS_POSIX
                case FS_IOC_GETFLAGS:
-                        if(CIFS_UNIX_EXTATTR_CAP & caps) {
+                        if (CIFS_UNIX_EXTATTR_CAP & caps) {
                                if (pSMBFile == NULL)
                                        break;
                                rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid,
                                        &ExtAttrBits, &ExtAttrMask);
-                                if(rc == 0)
+                                if (rc == 0)
                                        rc = put_user(ExtAttrBits &
                                                FS_FL_USER_VISIBLE,
                                                (int __user *)arg);
@@ -87,8 +87,8 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
                        break;
                case FS_IOC_SETFLAGS:
-                        if(CIFS_UNIX_EXTATTR_CAP & caps) {
+                        if (CIFS_UNIX_EXTATTR_CAP & caps) {
-                                if(get_user(ExtAttrBits,(int __user *)arg)) {
+                                if (get_user(ExtAttrBits, (int __user *)arg)) {
                                        rc = -EFAULT;
                                        break;
                                }
@@ -96,16 +96,15 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
                                        break;
                                /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid,
                                        extAttrBits, &ExtAttrMask);*/
-                                
                        }
-                        cFYI(1,("set flags not implemented yet"));
+                        cFYI(1, ("set flags not implemented yet"));
                        break;
 #endif /* CONFIG_CIFS_POSIX */
                default:
-                        cFYI(1,("unsupported ioctl"));
+                        cFYI(1, ("unsupported ioctl"));
                        break;
        }
        FreeXid(xid);
        return rc;
-} 
+}
diff --git a/fs/cifs/rfc1002pdu.h b/fs/cifs/rfc1002pdu.h
index aede606132aa..8b69fcceb597 100644
--- a/fs/cifs/rfc1002pdu.h
+++ b/fs/cifs/rfc1002pdu.h
@@ -18,7 +18,7 @@
 *
 *   You should have received a copy of the GNU Lesser General Public License
 *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
 /* NB: unlike smb/cifs packets, the RFC1002 structures are big endian */
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 9ddf5ed62162..898a86dde8f5 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -470,7 +470,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
                ret = -ENOENT;
                if (!IS_DEADDIR(host_inode)) {
-                        ret = host_file->f_op->readdir(host_file, filldir, dirent);
+                        ret = host_file->f_op->readdir(host_file, dirent, filldir);
                        file_accessed(host_file);
                }
        }
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 5ef2b609ec7d..99dbe866816d 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -47,8 +47,9 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p
 }
 static ssize_t
-coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
+coda_file_splice_read(struct file *coda_file, loff_t *ppos,
-                   read_actor_t actor, void *target)
+                      struct pipe_inode_info *pipe, size_t count,
+                      unsigned int flags)
 {
        struct coda_file_info *cfi;
        struct file *host_file;
@@ -57,10 +58,10 @@ coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
        BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
        host_file = cfi->cfi_container;
-        if (!host_file->f_op || !host_file->f_op->sendfile)
+        if (!host_file->f_op || !host_file->f_op->splice_read)
                return -EINVAL;
-        return host_file->f_op->sendfile(host_file, ppos, count, actor, target);
+        return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags);
 }
 static ssize_t
@@ -295,6 +296,6 @@ const struct file_operations coda_file_operations = {
        .flush          = coda_flush,
        .release        = coda_release,
        .fsync          = coda_fsync,
-        .sendfile       = coda_file_sendfile,
+        .splice_read    = coda_file_splice_read,
 };
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index ec8896b264de..1d533a2ec3a6 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -368,6 +368,69 @@ void debugfs_remove(struct dentry *dentry)
 }
 EXPORT_SYMBOL_GPL(debugfs_remove);
+/**
+ * debugfs_rename - rename a file/directory in the debugfs filesystem
+ * @old_dir: a pointer to the parent dentry for the renamed object. This
+ *          should be a directory dentry.
+ * @old_dentry: dentry of an object to be renamed.
+ * @new_dir: a pointer to the parent dentry where the object should be
+ *          moved. This should be a directory dentry.
+ * @new_name: a pointer to a string containing the target name.
+ *
+ * This function renames a file/directory in debugfs.  The target must not
+ * exist for rename to succeed.
+ *
+ * This function will return a pointer to old_dentry (which is updated to
+ * reflect renaming) if it succeeds. If an error occurs, %NULL will be
+ * returned.
+ *
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
+                struct dentry *new_dir, const char *new_name)
+{
+        int error;
+        struct dentry *dentry = NULL, *trap;
+        const char *old_name;
+        trap = lock_rename(new_dir, old_dir);
+        /* Source or destination directories don't exist? */
+        if (!old_dir->d_inode || !new_dir->d_inode)
+                goto exit;
+        /* Source does not exist, cyclic rename, or mountpoint? */
+        if (!old_dentry->d_inode || old_dentry == trap ||
+            d_mountpoint(old_dentry))
+                goto exit;
+        dentry = lookup_one_len(new_name, new_dir, strlen(new_name));
+        /* Lookup failed, cyclic rename or target exists? */
+        if (IS_ERR(dentry) || dentry == trap || dentry->d_inode)
+                goto exit;
+        old_name = fsnotify_oldname_init(old_dentry->d_name.name);
+        error = simple_rename(old_dir->d_inode, old_dentry, new_dir->d_inode,
+                dentry);
+        if (error) {
+                fsnotify_oldname_free(old_name);
+                goto exit;
+        }
+        d_move(old_dentry, dentry);
+        fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name,
+                old_dentry->d_name.name, S_ISDIR(old_dentry->d_inode->i_mode),
+                NULL, old_dentry->d_inode);
+        fsnotify_oldname_free(old_name);
+        unlock_rename(new_dir, old_dir);
+        dput(dentry);
+        return old_dentry;
+exit:
+        if (dentry && !IS_ERR(dentry))
+                dput(dentry);
+        unlock_rename(new_dir, old_dir);
+        return NULL;
+}
+EXPORT_SYMBOL_GPL(debugfs_rename);
 static decl_subsys(debug, NULL, NULL);
 static int __init debugfs_init(void)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 8593f3dfd299..52bb2638f7ab 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1106,7 +1106,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
        spin_lock_irqsave(&dio->bio_lock, flags);
        ret2 = --dio->refcount;
        spin_unlock_irqrestore(&dio->bio_lock, flags);
-        BUG_ON(!dio->is_async && ret2 != 0);
        if (ret2 == 0) {
                ret = dio_complete(dio, offset, ret);
                kfree(dio);
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 69a94690e493..54bcc00ec8df 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,7 +3,7 @@ menu "Distributed Lock Manager"
 config DLM
        tristate "Distributed Lock Manager (DLM)"
-        depends on IPV6 || IPV6=n
+        depends on SYSFS && (IPV6 || IPV6=n)
        select CONFIGFS_FS
        select IP_SCTP
        help
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 604cf7dc5f39..d248e60951ba 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -8,6 +8,7 @@ dlm-y :=			ast.o \
                                member.o \
                                memory.o \
                                midcomms.o \
+                                netlink.o \
                                lowcomms.o \
                                rcom.o \
                                recover.o \
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 822abdcd1434..5069b2cb5a1f 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -90,6 +90,7 @@ struct cluster {
        unsigned int cl_scan_secs;
        unsigned int cl_log_debug;
        unsigned int cl_protocol;
+        unsigned int cl_timewarn_cs;
 };
 enum {
@@ -103,6 +104,7 @@ enum {
        CLUSTER_ATTR_SCAN_SECS,
        CLUSTER_ATTR_LOG_DEBUG,
        CLUSTER_ATTR_PROTOCOL,
+        CLUSTER_ATTR_TIMEWARN_CS,
 };
 struct cluster_attribute {
@@ -162,6 +164,7 @@ CLUSTER_ATTR(toss_secs, 1);
 CLUSTER_ATTR(scan_secs, 1);
 CLUSTER_ATTR(log_debug, 0);
 CLUSTER_ATTR(protocol, 0);
+CLUSTER_ATTR(timewarn_cs, 1);
 static struct configfs_attribute *cluster_attrs[] = {
        [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -174,6 +177,7 @@ static struct configfs_attribute *cluster_attrs[] = {
        [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
        [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
        [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
+        [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
        NULL,
 };
@@ -429,6 +433,8 @@ static struct config_group *make_cluster(struct config_group *g,
        cl->cl_toss_secs = dlm_config.ci_toss_secs;
        cl->cl_scan_secs = dlm_config.ci_scan_secs;
        cl->cl_log_debug = dlm_config.ci_log_debug;
+        cl->cl_protocol = dlm_config.ci_protocol;
+        cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
        space_list = &sps->ss_group;
        comm_list = &cms->cs_group;
@@ -748,9 +754,16 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
 static struct space *get_space(char *name)
 {
+        struct config_item *i;
        if (!space_list)
                return NULL;
-        return to_space(config_group_find_obj(space_list, name));
+        down(&space_list->cg_subsys->su_sem);
+        i = config_group_find_obj(space_list, name);
+        up(&space_list->cg_subsys->su_sem);
+        return to_space(i);
 }
 static void put_space(struct space *sp)
@@ -776,20 +789,20 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
                        if (cm->nodeid != nodeid)
                                continue;
                        found = 1;
+                        config_item_get(i);
                        break;
                } else {
                        if (!cm->addr_count ||
                            memcmp(cm->addr[0], addr, sizeof(*addr)))
                                continue;
                        found = 1;
+                        config_item_get(i);
                        break;
                }
        }
        up(&clusters_root.subsys.su_sem);
-        if (found)
+        if (!found)
-                config_item_get(i);
-        else
                cm = NULL;
        return cm;
 }
@@ -909,6 +922,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_SCAN_SECS          5
 #define DEFAULT_LOG_DEBUG          0
 #define DEFAULT_PROTOCOL           0
+#define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
 struct dlm_config_info dlm_config = {
        .ci_tcp_port = DEFAULT_TCP_PORT,
@@ -920,6 +934,7 @@ struct dlm_config_info dlm_config = {
        .ci_toss_secs = DEFAULT_TOSS_SECS,
        .ci_scan_secs = DEFAULT_SCAN_SECS,
        .ci_log_debug = DEFAULT_LOG_DEBUG,
-        .ci_protocol = DEFAULT_PROTOCOL
+        .ci_protocol = DEFAULT_PROTOCOL,
+        .ci_timewarn_cs = DEFAULT_TIMEWARN_CS
 };
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 967cc3d72e5e..a3170fe22090 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -27,6 +27,7 @@ struct dlm_config_info {
        int ci_scan_secs;
        int ci_log_debug;
        int ci_protocol;
+        int ci_timewarn_cs;
 };
 extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 61ba670b9e02..12c3bfd5e660 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -17,6 +17,7 @@
 #include <linux/debugfs.h>
 #include "dlm_internal.h"
+#include "lock.h"
 #define DLM_DEBUG_BUF_LEN 4096
 static char debug_buf[DLM_DEBUG_BUF_LEN];
@@ -26,6 +27,8 @@ static struct dentry *dlm_root;
 struct rsb_iter {
        int entry;
+        int locks;
+        int header;
        struct dlm_ls *ls;
        struct list_head *next;
        struct dlm_rsb *rsb;
@@ -57,8 +60,8 @@ static char *print_lockmode(int mode)
        }
 }
-static void print_lock(struct seq_file *s, struct dlm_lkb *lkb,
+static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb,
-                       struct dlm_rsb *res)
+                                struct dlm_rsb *res)
 {
        seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
@@ -85,6 +88,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
        struct dlm_lkb *lkb;
        int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
+        lock_rsb(res);
        seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
        for (i = 0; i < res->res_length; i++) {
                if (isprint(res->res_name[i]))
@@ -129,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
        /* Print the locks attached to this resource */
        seq_printf(s, "Granted Queue\n");
        list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue)
-                print_lock(s, lkb, res);
+                print_resource_lock(s, lkb, res);
        seq_printf(s, "Conversion Queue\n");
        list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue)
-                print_lock(s, lkb, res);
+                print_resource_lock(s, lkb, res);
        seq_printf(s, "Waiting Queue\n");
        list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue)
-                print_lock(s, lkb, res);
+                print_resource_lock(s, lkb, res);
        if (list_empty(&res->res_lookup))
                goto out;
@@ -151,6 +156,61 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
                seq_printf(s, "\n");
        }
 out:
+        unlock_rsb(res);
+        return 0;
+}
+static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r)
+{
+        struct dlm_user_args *ua;
+        unsigned int waiting = 0;
+        uint64_t xid = 0;
+        if (lkb->lkb_flags & DLM_IFL_USER) {
+                ua = (struct dlm_user_args *) lkb->lkb_astparam;
+                if (ua)
+                        xid = ua->xid;
+        }
+        if (lkb->lkb_timestamp)
+                waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp);
+        /* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms
+           r_nodeid r_len r_name */
+        seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n",
+                   lkb->lkb_id,
+                   lkb->lkb_nodeid,
+                   lkb->lkb_remid,
+                   lkb->lkb_ownpid,
+                   (unsigned long long)xid,
+                   lkb->lkb_exflags,
+                   lkb->lkb_flags,
+                   lkb->lkb_status,
+                   lkb->lkb_grmode,
+                   lkb->lkb_rqmode,
+                   waiting,
+                   r->res_nodeid,
+                   r->res_length,
+                   r->res_name);
+}
+static int print_locks(struct dlm_rsb *r, struct seq_file *s)
+{
+        struct dlm_lkb *lkb;
+        lock_rsb(r);
+        list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
+                print_lock(s, lkb, r);
+        list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
+                print_lock(s, lkb, r);
+        list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
+                print_lock(s, lkb, r);
+        unlock_rsb(r);
        return 0;
 }
@@ -166,6 +226,9 @@ static int rsb_iter_next(struct rsb_iter *ri)
                        read_lock(&ls->ls_rsbtbl[i].lock);
                        if (!list_empty(&ls->ls_rsbtbl[i].list)) {
                                ri->next = ls->ls_rsbtbl[i].list.next;
+                                ri->rsb = list_entry(ri->next, struct dlm_rsb,
+                                                        res_hashchain);
+                                dlm_hold_rsb(ri->rsb);
                                read_unlock(&ls->ls_rsbtbl[i].lock);
                                break;
                        }
@@ -176,6 +239,7 @@ static int rsb_iter_next(struct rsb_iter *ri)
                if (ri->entry >= ls->ls_rsbtbl_size)
                        return 1;
        } else {
+                struct dlm_rsb *old = ri->rsb;
                i = ri->entry;
                read_lock(&ls->ls_rsbtbl[i].lock);
                ri->next = ri->next->next;
@@ -184,11 +248,14 @@ static int rsb_iter_next(struct rsb_iter *ri)
                        ri->next = NULL;
                        ri->entry++;
                        read_unlock(&ls->ls_rsbtbl[i].lock);
+                        dlm_put_rsb(old);
                        goto top;
                }
+                ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
+                dlm_hold_rsb(ri->rsb);
                read_unlock(&ls->ls_rsbtbl[i].lock);
+                dlm_put_rsb(old);
        }
-        ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
        return 0;
 }
@@ -202,7 +269,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls)
 {
        struct rsb_iter *ri;
-        ri = kmalloc(sizeof *ri, GFP_KERNEL);
+        ri = kzalloc(sizeof *ri, GFP_KERNEL);
        if (!ri)
                return NULL;
@@ -260,7 +327,17 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr)
 {
        struct rsb_iter *ri = iter_ptr;
-        print_resource(ri->rsb, file);
+        if (ri->locks) {
+                if (ri->header) {
+                        seq_printf(file, "id nodeid remid pid xid exflags flags "
+                                         "sts grmode rqmode time_ms r_nodeid "
+                                         "r_len r_name\n");
+                        ri->header = 0;
+                }
+                print_locks(ri->rsb, file);
+        } else {
+                print_resource(ri->rsb, file);
+        }
        return 0;
 }
@@ -296,6 +373,83 @@ static const struct file_operations rsb_fops = {
 };
 /*
+ * Dump state in compact per-lock listing
+ */
+static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos)
+{
+        struct rsb_iter *ri;
+        ri = kzalloc(sizeof *ri, GFP_KERNEL);
+        if (!ri)
+                return NULL;
+        ri->ls = ls;
+        ri->entry = 0;
+        ri->next = NULL;
+        ri->locks = 1;
+        if (*pos == 0)
+                ri->header = 1;
+        if (rsb_iter_next(ri)) {
+                rsb_iter_free(ri);
+                return NULL;
+        }
+        return ri;
+}
+static void *locks_seq_start(struct seq_file *file, loff_t *pos)
+{
+        struct rsb_iter *ri;
+        loff_t n = *pos;
+        ri = locks_iter_init(file->private, pos);
+        if (!ri)
+                return NULL;
+        while (n--) {
+                if (rsb_iter_next(ri)) {
+                        rsb_iter_free(ri);
+                        return NULL;
+                }
+        }
+        return ri;
+}
+static struct seq_operations locks_seq_ops = {
+        .start = locks_seq_start,
+        .next  = rsb_seq_next,
+        .stop  = rsb_seq_stop,
+        .show  = rsb_seq_show,
+};
+static int locks_open(struct inode *inode, struct file *file)
+{
+        struct seq_file *seq;
+        int ret;
+        ret = seq_open(file, &locks_seq_ops);
+        if (ret)
+                return ret;
+        seq = file->private_data;
+        seq->private = inode->i_private;
+        return 0;
+}
+static const struct file_operations locks_fops = {
+        .owner   = THIS_MODULE,
+        .open    = locks_open,
+        .read    = seq_read,
+        .llseek  = seq_lseek,
+        .release = seq_release
+};
+/*
 * dump lkb's on the ls_waiters list
 */
@@ -362,6 +516,20 @@ int dlm_create_debug_file(struct dlm_ls *ls)
                return -ENOMEM;
        }
+        memset(name, 0, sizeof(name));
+        snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name);
+        ls->ls_debug_locks_dentry = debugfs_create_file(name,
+                                                        S_IFREG | S_IRUGO,
+                                                        dlm_root,
+                                                        ls,
+                                                        &locks_fops);
+        if (!ls->ls_debug_locks_dentry) {
+                debugfs_remove(ls->ls_debug_waiters_dentry);
+                debugfs_remove(ls->ls_debug_rsb_dentry);
+                return -ENOMEM;
+        }
        return 0;
 }
@@ -371,6 +539,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
                debugfs_remove(ls->ls_debug_rsb_dentry);
        if (ls->ls_debug_waiters_dentry)
                debugfs_remove(ls->ls_debug_waiters_dentry);
+        if (ls->ls_debug_locks_dentry)
+                debugfs_remove(ls->ls_debug_locks_dentry);
 }
 int dlm_register_debugfs(void)
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 30994d68f6a0..74901e981e10 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -151,6 +151,7 @@ struct dlm_args {
        void                    *bastaddr;
        int                     mode;
        struct dlm_lksb         *lksb;
+        unsigned long           timeout;
 };
@@ -213,6 +214,9 @@ struct dlm_args {
 #define DLM_IFL_OVERLAP_UNLOCK  0x00080000
 #define DLM_IFL_OVERLAP_CANCEL  0x00100000
 #define DLM_IFL_ENDOFLIFE       0x00200000
+#define DLM_IFL_WATCH_TIMEWARN  0x00400000
+#define DLM_IFL_TIMEOUT_CANCEL  0x00800000
+#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
 #define DLM_IFL_USER            0x00000001
 #define DLM_IFL_ORPHAN          0x00000002
@@ -243,6 +247,9 @@ struct dlm_lkb {
        struct list_head        lkb_wait_reply; /* waiting for remote reply */
        struct list_head        lkb_astqueue;   /* need ast to be sent */
        struct list_head        lkb_ownqueue;   /* list of locks for a process */
+        struct list_head        lkb_time_list;
+        unsigned long           lkb_timestamp;
+        unsigned long           lkb_timeout_cs;
        char                    *lkb_lvbptr;
        struct dlm_lksb         *lkb_lksb;      /* caller's status block */
@@ -447,12 +454,16 @@ struct dlm_ls {
        struct mutex            ls_orphans_mutex;
        struct list_head        ls_orphans;
+        struct mutex            ls_timeout_mutex;
+        struct list_head        ls_timeout;
        struct list_head        ls_nodes;       /* current nodes in ls */
        struct list_head        ls_nodes_gone;  /* dead node list, recovery */
        int                     ls_num_nodes;   /* number of nodes in ls */
        int                     ls_low_nodeid;
        int                     ls_total_weight;
        int                     *ls_node_array;
+        gfp_t                   ls_allocation;
        struct dlm_rsb          ls_stub_rsb;    /* for returning errors */
        struct dlm_lkb          ls_stub_lkb;    /* for returning errors */
@@ -460,9 +471,12 @@ struct dlm_ls {
        struct dentry           *ls_debug_rsb_dentry; /* debugfs */
        struct dentry           *ls_debug_waiters_dentry; /* debugfs */
+        struct dentry           *ls_debug_locks_dentry; /* debugfs */
        wait_queue_head_t       ls_uevent_wait; /* user part of join/leave */
        int                     ls_uevent_result;
+        struct completion       ls_members_done;
+        int                     ls_members_result;
        struct miscdevice       ls_device;
@@ -472,6 +486,7 @@ struct dlm_ls {
        struct task_struct      *ls_recoverd_task;
        struct mutex            ls_recoverd_active;
        spinlock_t              ls_recover_lock;
+        unsigned long           ls_recover_begin; /* jiffies timestamp */
        uint32_t                ls_recover_status; /* DLM_RS_ */
        uint64_t                ls_recover_seq;
        struct dlm_recover      *ls_recover_args;
@@ -501,6 +516,7 @@ struct dlm_ls {
 #define LSFL_RCOM_READY         3
 #define LSFL_RCOM_WAIT          4
 #define LSFL_UEVENT_WAIT        5
+#define LSFL_TIMEWARN           6
 /* much of this is just saving user space pointers associated with the
   lock that we pass back to the user lib with an ast */
@@ -518,6 +534,7 @@ struct dlm_user_args {
        void __user             *castaddr;
        void __user             *bastparam;
        void __user             *bastaddr;
+        uint64_t                xid;
 };
 #define DLM_PROC_FLAGS_CLOSING 1
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index d8d6e729f96b..b455919c1998 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
 static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static int send_remove(struct dlm_rsb *r);
 static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
+static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
                                    struct dlm_message *ms);
 static int receive_extralen(struct dlm_message *ms);
 static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
+static void del_timeout(struct dlm_lkb *lkb);
+void dlm_timeout_warn(struct dlm_lkb *lkb);
 /*
 * Lock compatibilty matrix - thanks Steve
@@ -194,17 +197,17 @@ void dlm_dump_rsb(struct dlm_rsb *r)
 /* Threads cannot use the lockspace while it's being recovered */
-static inline void lock_recovery(struct dlm_ls *ls)
+static inline void dlm_lock_recovery(struct dlm_ls *ls)
 {
        down_read(&ls->ls_in_recovery);
 }
-static inline void unlock_recovery(struct dlm_ls *ls)
+void dlm_unlock_recovery(struct dlm_ls *ls)
 {
        up_read(&ls->ls_in_recovery);
 }
-static inline int lock_recovery_try(struct dlm_ls *ls)
+int dlm_lock_recovery_try(struct dlm_ls *ls)
 {
        return down_read_trylock(&ls->ls_in_recovery);
 }
@@ -286,8 +289,22 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
        if (is_master_copy(lkb))
                return;
+        del_timeout(lkb);
        DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
+        /* if the operation was a cancel, then return -DLM_ECANCEL, if a
+           timeout caused the cancel then return -ETIMEDOUT */
+        if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
+                lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
+                rv = -ETIMEDOUT;
+        }
+        if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
+                lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
+                rv = -EDEADLK;
+        }
        lkb->lkb_lksb->sb_status = rv;
        lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
@@ -581,6 +598,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
        kref_init(&lkb->lkb_ref);
        INIT_LIST_HEAD(&lkb->lkb_ownqueue);
        INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
+        INIT_LIST_HEAD(&lkb->lkb_time_list);
        get_random_bytes(&bucket, sizeof(bucket));
        bucket &= (ls->ls_lkbtbl_size - 1);
@@ -985,15 +1003,136 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
 {
        int i;
-        if (dlm_locking_stopped(ls))
-                return;
        for (i = 0; i < ls->ls_rsbtbl_size; i++) {
                shrink_bucket(ls, i);
+                if (dlm_locking_stopped(ls))
+                        break;
                cond_resched();
        }
 }
+static void add_timeout(struct dlm_lkb *lkb)
+{
+        struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+        if (is_master_copy(lkb)) {
+                lkb->lkb_timestamp = jiffies;
+                return;
+        }
+        if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
+            !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+                lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
+                goto add_it;
+        }
+        if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
+                goto add_it;
+        return;
+ add_it:
+        DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
+        mutex_lock(&ls->ls_timeout_mutex);
+        hold_lkb(lkb);
+        lkb->lkb_timestamp = jiffies;
+        list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
+        mutex_unlock(&ls->ls_timeout_mutex);
+}
+static void del_timeout(struct dlm_lkb *lkb)
+{
+        struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+        mutex_lock(&ls->ls_timeout_mutex);
+        if (!list_empty(&lkb->lkb_time_list)) {
+                list_del_init(&lkb->lkb_time_list);
+                unhold_lkb(lkb);
+        }
+        mutex_unlock(&ls->ls_timeout_mutex);
+}
+/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
+   lkb_lksb_timeout without lock_rsb?  Note: we can't lock timeout_mutex
+   and then lock rsb because of lock ordering in add_timeout.  We may need
+   to specify some special timeout-related bits in the lkb that are just to
+   be accessed under the timeout_mutex. */
+void dlm_scan_timeout(struct dlm_ls *ls)
+{
+        struct dlm_rsb *r;
+        struct dlm_lkb *lkb;
+        int do_cancel, do_warn;
+        for (;;) {
+                if (dlm_locking_stopped(ls))
+                        break;
+                do_cancel = 0;
+                do_warn = 0;
+                mutex_lock(&ls->ls_timeout_mutex);
+                list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
+                        if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
+                            time_after_eq(jiffies, lkb->lkb_timestamp +
+                                          lkb->lkb_timeout_cs * HZ/100))
+                                do_cancel = 1;
+                        if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
+                            time_after_eq(jiffies, lkb->lkb_timestamp +
+                                           dlm_config.ci_timewarn_cs * HZ/100))
+                                do_warn = 1;
+                        if (!do_cancel && !do_warn)
+                                continue;
+                        hold_lkb(lkb);
+                        break;
+                }
+                mutex_unlock(&ls->ls_timeout_mutex);
+                if (!do_cancel && !do_warn)
+                        break;
+                r = lkb->lkb_resource;
+                hold_rsb(r);
+                lock_rsb(r);
+                if (do_warn) {
+                        /* clear flag so we only warn once */
+                        lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+                        if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
+                                del_timeout(lkb);
+                        dlm_timeout_warn(lkb);
+                }
+                if (do_cancel) {
+                        log_debug(ls, "timeout cancel %x node %d %s",
+                                  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+                        lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+                        lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
+                        del_timeout(lkb);
+                        _cancel_lock(r, lkb);
+                }
+                unlock_rsb(r);
+                unhold_rsb(r);
+                dlm_put_lkb(lkb);
+        }
+}
+/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
+   dlm_recoverd before checking/setting ls_recover_begin. */
+void dlm_adjust_timeouts(struct dlm_ls *ls)
+{
+        struct dlm_lkb *lkb;
+        long adj = jiffies - ls->ls_recover_begin;
+        ls->ls_recover_begin = 0;
+        mutex_lock(&ls->ls_timeout_mutex);
+        list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
+                lkb->lkb_timestamp += adj;
+        mutex_unlock(&ls->ls_timeout_mutex);
+}
 /* lkb is master or local copy */
 static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -1275,10 +1414,8 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
 * queue for one resource.  The granted mode of each lock blocks the requested
 * mode of the other lock."
 *
- * Part 2: if the granted mode of lkb is preventing the first lkb in the
+ * Part 2: if the granted mode of lkb is preventing an earlier lkb in the
- * convert queue from being granted, then demote lkb (set grmode to NL).
+ * convert queue from being granted, then deadlk/demote lkb.
- * This second form requires that we check for conv-deadlk even when
- * now == 0 in _can_be_granted().
 *
 * Example:
 * Granted Queue: empty
@@ -1287,41 +1424,52 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
 *
 * The first lock can't be granted because of the granted mode of the second
 * lock and the second lock can't be granted because it's not first in the
- * list.  We demote the granted mode of the second lock (the lkb passed to this
+ * list.  We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we
- * function).
+ * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK
+ * flag set and return DEMOTED in the lksb flags.
+ *
+ * Originally, this function detected conv-deadlk in a more limited scope:
+ * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or
+ * - if lkb1 was the first entry in the queue (not just earlier), and was
+ *   blocked by the granted mode of lkb2, and there was nothing on the
+ *   granted queue preventing lkb1 from being granted immediately, i.e.
+ *   lkb2 was the only thing preventing lkb1 from being granted.
+ *
+ * That second condition meant we'd only say there was conv-deadlk if
+ * resolving it (by demotion) would lead to the first lock on the convert
+ * queue being granted right away.  It allowed conversion deadlocks to exist
+ * between locks on the convert queue while they couldn't be granted anyway.
 *
- * After the resolution, the "grant pending" function needs to go back and try
+ * Now, we detect and take action on conversion deadlocks immediately when
- * to grant locks on the convert queue again since the first lock can now be
+ * they're created, even if they may not be immediately consequential.  If
- * granted.
+ * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted
+ * mode that would prevent lkb1's conversion from being granted, we do a
+ * deadlk/demote on lkb2 right away and don't let it onto the convert queue.
+ * I think this means that the lkb_is_ahead condition below should always
+ * be zero, i.e. there will never be conv-deadlk between two locks that are
+ * both already on the convert queue.
 */
-static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
+static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2)
 {
-        struct dlm_lkb *this, *first = NULL, *self = NULL;
+        struct dlm_lkb *lkb1;
+        int lkb_is_ahead = 0;
-        list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) {
+        list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) {
-                if (!first)
+                if (lkb1 == lkb2) {
-                        first = this;
+                        lkb_is_ahead = 1;
-                if (this == lkb) {
-                        self = lkb;
                        continue;
                }
-                if (!modes_compat(this, lkb) && !modes_compat(lkb, this))
+                if (!lkb_is_ahead) {
-                        return 1;
+                        if (!modes_compat(lkb2, lkb1))
-        }
+                                return 1;
+                } else {
-        /* if lkb is on the convert queue and is preventing the first
+                        if (!modes_compat(lkb2, lkb1) &&
-           from being granted, then there's deadlock and we demote lkb.
+                            !modes_compat(lkb1, lkb2))
-           multiple converting locks may need to do this before the first
+                                return 1;
-           converting lock can be granted. */
+                }
-        if (self && self != first) {
-                if (!modes_compat(lkb, first) &&
-                    !queue_conflict(&rsb->res_grantqueue, first))
-                        return 1;
        }
        return 0;
 }
@@ -1450,42 +1598,57 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
        if (!now && !conv && list_empty(&r->res_convertqueue) &&
            first_in_list(lkb, &r->res_waitqueue))
                return 1;
 out:
-        /*
-         * The following, enabled by CONVDEADLK, departs from VMS.
-         */
-        if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) &&
-            conversion_deadlock_detect(r, lkb)) {
-                lkb->lkb_grmode = DLM_LOCK_NL;
-                lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
-        }
        return 0;
 }
-/*
+static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
- * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a
+                          int *err)
- * simple way to provide a big optimization to applications that can use them.
- */
-static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
 {
-        uint32_t flags = lkb->lkb_exflags;
        int rv;
        int8_t alt = 0, rqmode = lkb->lkb_rqmode;
+        int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV);
+        if (err)
+                *err = 0;
        rv = _can_be_granted(r, lkb, now);
        if (rv)
                goto out;
-        if (lkb->lkb_sbflags & DLM_SBF_DEMOTED)
+        /*
+         * The CONVDEADLK flag is non-standard and tells the dlm to resolve
+         * conversion deadlocks by demoting grmode to NL, otherwise the dlm
+         * cancels one of the locks.
+         */
+        if (is_convert && can_be_queued(lkb) &&
+            conversion_deadlock_detect(r, lkb)) {
+                if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
+                        lkb->lkb_grmode = DLM_LOCK_NL;
+                        lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
+                } else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+                        if (err)
+                                *err = -EDEADLK;
+                        else {
+                                log_print("can_be_granted deadlock %x now %d",
+                                          lkb->lkb_id, now);
+                                dlm_dump_rsb(r);
+                        }
+                }
                goto out;
+        }
-        if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR)
+        /*
+         * The ALTPR and ALTCW flags are non-standard and tell the dlm to try
+         * to grant a request in a mode other than the normal rqmode.  It's a
+         * simple way to provide a big optimization to applications that can
+         * use them.
+         */
+        if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR))
                alt = DLM_LOCK_PR;
-        else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW)
+        else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW))
                alt = DLM_LOCK_CW;
        if (alt) {
@@ -1500,10 +1663,20 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
        return rv;
 }
+/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock
+   for locks pending on the convert list.  Once verified (watch for these
+   log_prints), we should be able to just call _can_be_granted() and not
+   bother with the demote/deadlk cases here (and there's no easy way to deal
+   with a deadlk here, we'd have to generate something like grant_lock with
+   the deadlk error.) */
+/* returns the highest requested mode of all blocked conversions */
 static int grant_pending_convert(struct dlm_rsb *r, int high)
 {
        struct dlm_lkb *lkb, *s;
        int hi, demoted, quit, grant_restart, demote_restart;
+        int deadlk;
        quit = 0;
 restart:
@@ -1513,14 +1686,29 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)
        list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
                demoted = is_demoted(lkb);
-                if (can_be_granted(r, lkb, 0)) {
+                deadlk = 0;
+                if (can_be_granted(r, lkb, 0, &deadlk)) {
                        grant_lock_pending(r, lkb);
                        grant_restart = 1;
-                } else {
+                        continue;
-                        hi = max_t(int, lkb->lkb_rqmode, hi);
-                        if (!demoted && is_demoted(lkb))
-                                demote_restart = 1;
                }
+                if (!demoted && is_demoted(lkb)) {
+                        log_print("WARN: pending demoted %x node %d %s",
+                                  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+                        demote_restart = 1;
+                        continue;
+                }
+                if (deadlk) {
+                        log_print("WARN: pending deadlock %x node %d %s",
+                                  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+                        dlm_dump_rsb(r);
+                        continue;
+                }
+                hi = max_t(int, lkb->lkb_rqmode, hi);
        }
        if (grant_restart)
@@ -1538,7 +1726,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high)
        struct dlm_lkb *lkb, *s;
        list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
-                if (can_be_granted(r, lkb, 0))
+                if (can_be_granted(r, lkb, 0, NULL))
                        grant_lock_pending(r, lkb);
                else
                        high = max_t(int, lkb->lkb_rqmode, high);
@@ -1733,7 +1921,7 @@ static void confirm_master(struct dlm_rsb *r, int error)
 }
 static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
-                         int namelen, uint32_t parent_lkid, void *ast,
+                         int namelen, unsigned long timeout_cs, void *ast,
                         void *astarg, void *bast, struct dlm_args *args)
 {
        int rv = -EINVAL;
@@ -1776,10 +1964,6 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
        if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
                goto out;
-        /* parent/child locks not yet supported */
-        if (parent_lkid)
-                goto out;
        if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
                goto out;
@@ -1791,6 +1975,7 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
        args->astaddr = ast;
        args->astparam = (long) astarg;
        args->bastaddr = bast;
+        args->timeout = timeout_cs;
        args->mode = mode;
        args->lksb = lksb;
        rv = 0;
@@ -1845,6 +2030,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
        lkb->lkb_lksb = args->lksb;
        lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
        lkb->lkb_ownpid = (int) current->pid;
+        lkb->lkb_timeout_cs = args->timeout;
        rv = 0;
 out:
        return rv;
@@ -1903,6 +2089,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
                if (is_overlap(lkb))
                        goto out;
+                /* don't let scand try to do a cancel */
+                del_timeout(lkb);
                if (lkb->lkb_flags & DLM_IFL_RESEND) {
                        lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
                        rv = -EBUSY;
@@ -1934,6 +2123,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
                if (is_overlap_unlock(lkb))
                        goto out;
+                /* don't let scand try to do a cancel */
+                del_timeout(lkb);
                if (lkb->lkb_flags & DLM_IFL_RESEND) {
                        lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
                        rv = -EBUSY;
@@ -1984,7 +2176,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 {
        int error = 0;
-        if (can_be_granted(r, lkb, 1)) {
+        if (can_be_granted(r, lkb, 1, NULL)) {
                grant_lock(r, lkb);
                queue_cast(r, lkb, 0);
                goto out;
@@ -1994,6 +2186,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
                error = -EINPROGRESS;
                add_lkb(r, lkb, DLM_LKSTS_WAITING);
                send_blocking_asts(r, lkb);
+                add_timeout(lkb);
                goto out;
        }
@@ -2009,16 +2202,32 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
 {
        int error = 0;
+        int deadlk = 0;
        /* changing an existing lock may allow others to be granted */
-        if (can_be_granted(r, lkb, 1)) {
+        if (can_be_granted(r, lkb, 1, &deadlk)) {
                grant_lock(r, lkb);
                queue_cast(r, lkb, 0);
                grant_pending_locks(r);
                goto out;
        }
+        /* can_be_granted() detected that this lock would block in a conversion
+           deadlock, so we leave it on the granted queue and return EDEADLK in
+           the ast for the convert. */
+        if (deadlk) {
+                /* it's left on the granted queue */
+                log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
+                          lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
+                          lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
+                revert_lock(r, lkb);
+                queue_cast(r, lkb, -EDEADLK);
+                error = -EDEADLK;
+                goto out;
+        }
        /* is_demoted() means the can_be_granted() above set the grmode
           to NL, and left us on the granted queue.  This auto-demotion
           (due to CONVDEADLK) might mean other locks, and/or this lock, are
@@ -2041,6 +2250,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
                del_lkb(r, lkb);
                add_lkb(r, lkb, DLM_LKSTS_CONVERT);
                send_blocking_asts(r, lkb);
+                add_timeout(lkb);
                goto out;
        }
@@ -2274,7 +2484,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
        if (!ls)
                return -EINVAL;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        if (convert)
                error = find_lkb(ls, lksb->sb_lkid, &lkb);
@@ -2284,7 +2494,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
        if (error)
                goto out;
-        error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast,
+        error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
                              astarg, bast, &args);
        if (error)
                goto out_put;
@@ -2299,10 +2509,10 @@ int dlm_lock(dlm_lockspace_t *lockspace,
 out_put:
        if (convert || error)
                __put_lkb(ls, lkb);
-        if (error == -EAGAIN)
+        if (error == -EAGAIN || error == -EDEADLK)
                error = 0;
 out:
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
        dlm_put_lockspace(ls);
        return error;
 }
@@ -2322,7 +2532,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
        if (!ls)
                return -EINVAL;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        error = find_lkb(ls, lkid, &lkb);
        if (error)
@@ -2344,7 +2554,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
 out_put:
        dlm_put_lkb(lkb);
 out:
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
        dlm_put_lockspace(ls);
        return error;
 }
@@ -2384,7 +2594,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
           pass into lowcomms_commit and a message buffer (mb) that we
           write our data into */
-        mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+        mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
        if (!mh)
                return -ENOBUFS;
@@ -3111,9 +3321,10 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
                lkb->lkb_remid = ms->m_lkid;
                if (is_altmode(lkb))
                        munge_altmode(lkb, ms);
-                if (result)
+                if (result) {
                        add_lkb(r, lkb, DLM_LKSTS_WAITING);
-                else {
+                        add_timeout(lkb);
+                } else {
                        grant_lock_pc(r, lkb, ms);
                        queue_cast(r, lkb, 0);
                }
@@ -3172,6 +3383,12 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
                queue_cast(r, lkb, -EAGAIN);
                break;
+        case -EDEADLK:
+                receive_flags_reply(lkb, ms);
+                revert_lock_pc(r, lkb);
+                queue_cast(r, lkb, -EDEADLK);
+                break;
        case -EINPROGRESS:
                /* convert was queued on remote master */
                receive_flags_reply(lkb, ms);
@@ -3179,6 +3396,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
                        munge_demoted(lkb, ms);
                del_lkb(r, lkb);
                add_lkb(r, lkb, DLM_LKSTS_CONVERT);
+                add_timeout(lkb);
                break;
        case 0:
@@ -3298,8 +3516,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
        case -DLM_ECANCEL:
                receive_flags_reply(lkb, ms);
                revert_lock_pc(r, lkb);
-                if (ms->m_result)
+                queue_cast(r, lkb, -DLM_ECANCEL);
-                        queue_cast(r, lkb, -DLM_ECANCEL);
                break;
        case 0:
                break;
@@ -3424,7 +3641,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
                        }
                }
-                if (lock_recovery_try(ls))
+                if (dlm_lock_recovery_try(ls))
                        break;
                schedule();
        }
@@ -3503,7 +3720,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
                log_error(ls, "unknown message type %d", ms->m_type);
        }
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
 out:
        dlm_put_lockspace(ls);
        dlm_astd_wake();
@@ -4034,13 +4251,13 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
 int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
                     int mode, uint32_t flags, void *name, unsigned int namelen,
-                     uint32_t parent_lkid)
+                     unsigned long timeout_cs)
 {
        struct dlm_lkb *lkb;
        struct dlm_args args;
        int error;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        error = create_lkb(ls, &lkb);
        if (error) {
@@ -4062,7 +4279,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
           When DLM_IFL_USER is set, the dlm knows that this is a userspace
           lock and that lkb_astparam is the dlm_user_args structure. */
-        error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid,
+        error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
                              DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
        lkb->lkb_flags |= DLM_IFL_USER;
        ua->old_mode = DLM_LOCK_IV;
@@ -4094,19 +4311,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
        list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
        spin_unlock(&ua->proc->locks_spin);
 out:
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
        return error;
 }
 int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
-                     int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
+                     int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+                     unsigned long timeout_cs)
 {
        struct dlm_lkb *lkb;
        struct dlm_args args;
        struct dlm_user_args *ua;
        int error;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        error = find_lkb(ls, lkid, &lkb);
        if (error)
@@ -4127,6 +4345,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        if (lvb_in && ua->lksb.sb_lvbptr)
                memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
+        ua->xid = ua_tmp->xid;
        ua->castparam = ua_tmp->castparam;
        ua->castaddr = ua_tmp->castaddr;
        ua->bastparam = ua_tmp->bastparam;
@@ -4134,19 +4353,19 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        ua->user_lksb = ua_tmp->user_lksb;
        ua->old_mode = lkb->lkb_grmode;
-        error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST,
+        error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
-                              ua, DLM_FAKE_USER_AST, &args);
+                              DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
        if (error)
                goto out_put;
        error = convert_lock(ls, lkb, &args);
-        if (error == -EINPROGRESS || error == -EAGAIN)
+        if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
                error = 0;
 out_put:
        dlm_put_lkb(lkb);
 out:
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
        kfree(ua_tmp);
        return error;
 }
@@ -4159,7 +4378,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        struct dlm_user_args *ua;
        int error;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        error = find_lkb(ls, lkid, &lkb);
        if (error)
@@ -4194,7 +4413,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 out_put:
        dlm_put_lkb(lkb);
 out:
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
        kfree(ua_tmp);
        return error;
 }
@@ -4207,7 +4426,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        struct dlm_user_args *ua;
        int error;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        error = find_lkb(ls, lkid, &lkb);
        if (error)
@@ -4231,11 +4450,59 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 out_put:
        dlm_put_lkb(lkb);
 out:
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
        kfree(ua_tmp);
        return error;
 }
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
+{
+        struct dlm_lkb *lkb;
+        struct dlm_args args;
+        struct dlm_user_args *ua;
+        struct dlm_rsb *r;
+        int error;
+        dlm_lock_recovery(ls);
+        error = find_lkb(ls, lkid, &lkb);
+        if (error)
+                goto out;
+        ua = (struct dlm_user_args *)lkb->lkb_astparam;
+        error = set_unlock_args(flags, ua, &args);
+        if (error)
+                goto out_put;
+        /* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */
+        r = lkb->lkb_resource;
+        hold_rsb(r);
+        lock_rsb(r);
+        error = validate_unlock_args(lkb, &args);
+        if (error)
+                goto out_r;
+        lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL;
+        error = _cancel_lock(r, lkb);
+ out_r:
+        unlock_rsb(r);
+        put_rsb(r);
+        if (error == -DLM_ECANCEL)
+                error = 0;
+        /* from validate_unlock_args() */
+        if (error == -EBUSY)
+                error = 0;
+ out_put:
+        dlm_put_lkb(lkb);
+ out:
+        dlm_unlock_recovery(ls);
+        return error;
+}
 /* lkb's that are removed from the waiters list by revert are just left on the
   orphans list with the granted orphan locks, to be freed by purge */
@@ -4314,12 +4581,13 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 {
        struct dlm_lkb *lkb, *safe;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        while (1) {
                lkb = del_proc_lock(ls, proc);
                if (!lkb)
                        break;
+                del_timeout(lkb);
                if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
                        orphan_proc_lock(ls, lkb);
                else
@@ -4347,7 +4615,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
        }
        mutex_unlock(&ls->ls_clear_proc_locks);
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
 }
 static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
@@ -4429,12 +4697,12 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
        if (nodeid != dlm_our_nodeid()) {
                error = send_purge(ls, nodeid, pid);
        } else {
-                lock_recovery(ls);
+                dlm_lock_recovery(ls);
                if (pid == current->pid)
                        purge_proc_locks(ls, proc);
                else
                        do_purge(ls, nodeid, pid);
-                unlock_recovery(ls);
+                dlm_unlock_recovery(ls);
        }
        return error;
 }
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 64fc4ec40668..1720313c22df 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -24,6 +24,10 @@ void dlm_put_rsb(struct dlm_rsb *r);
 void dlm_hold_rsb(struct dlm_rsb *r);
 int dlm_put_lkb(struct dlm_lkb *lkb);
 void dlm_scan_rsbs(struct dlm_ls *ls);
+int dlm_lock_recovery_try(struct dlm_ls *ls);
+void dlm_unlock_recovery(struct dlm_ls *ls);
+void dlm_scan_timeout(struct dlm_ls *ls);
+void dlm_adjust_timeouts(struct dlm_ls *ls);
 int dlm_purge_locks(struct dlm_ls *ls);
 void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
@@ -34,15 +38,18 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
 int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
 int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
-        uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid);
+        uint32_t flags, void *name, unsigned int namelen,
+        unsigned long timeout_cs);
 int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
-        int mode, uint32_t flags, uint32_t lkid, char *lvb_in);
+        int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+        unsigned long timeout_cs);
 int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        uint32_t flags, uint32_t lkid, char *lvb_in);
 int dlm_user_cancel(struct dlm_ls *ls,  struct dlm_user_args *ua_tmp,
        uint32_t flags, uint32_t lkid);
 int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
        int nodeid, int pid);
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
 void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
 static inline int is_master(struct dlm_rsb *r)
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index a677b2a5eed4..1dc72105ab12 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -197,13 +197,24 @@ static int do_uevent(struct dlm_ls *ls, int in)
        else
                kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
+        log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
+        /* dlm_controld will see the uevent, do the necessary group management
+           and then write to sysfs to wake us */
        error = wait_event_interruptible(ls->ls_uevent_wait,
                        test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
+        log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
        if (error)
                goto out;
        error = ls->ls_uevent_result;
 out:
+        if (error)
+                log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
+                          error, ls->ls_uevent_result);
        return error;
 }
@@ -234,8 +245,13 @@ static int dlm_scand(void *data)
        struct dlm_ls *ls;
        while (!kthread_should_stop()) {
-                list_for_each_entry(ls, &lslist, ls_list)
+                list_for_each_entry(ls, &lslist, ls_list) {
-                        dlm_scan_rsbs(ls);
+                        if (dlm_lock_recovery_try(ls)) {
+                                dlm_scan_rsbs(ls);
+                                dlm_scan_timeout(ls);
+                                dlm_unlock_recovery(ls);
+                        }
+                }
                schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
        }
        return 0;
@@ -395,6 +411,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 {
        struct dlm_ls *ls;
        int i, size, error = -ENOMEM;
+        int do_unreg = 0;
        if (namelen > DLM_LOCKSPACE_LEN)
                return -EINVAL;
@@ -417,11 +434,22 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
                goto out;
        memcpy(ls->ls_name, name, namelen);
        ls->ls_namelen = namelen;
-        ls->ls_exflags = flags;
        ls->ls_lvblen = lvblen;
        ls->ls_count = 0;
        ls->ls_flags = 0;
+        if (flags & DLM_LSFL_TIMEWARN)
+                set_bit(LSFL_TIMEWARN, &ls->ls_flags);
+        if (flags & DLM_LSFL_FS)
+                ls->ls_allocation = GFP_NOFS;
+        else
+                ls->ls_allocation = GFP_KERNEL;
+        /* ls_exflags are forced to match among nodes, and we don't
+           need to require all nodes to have TIMEWARN or FS set */
+        ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS));
        size = dlm_config.ci_rsbtbl_size;
        ls->ls_rsbtbl_size = size;
@@ -461,6 +489,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
        mutex_init(&ls->ls_waiters_mutex);
        INIT_LIST_HEAD(&ls->ls_orphans);
        mutex_init(&ls->ls_orphans_mutex);
+        INIT_LIST_HEAD(&ls->ls_timeout);
+        mutex_init(&ls->ls_timeout_mutex);
        INIT_LIST_HEAD(&ls->ls_nodes);
        INIT_LIST_HEAD(&ls->ls_nodes_gone);
@@ -477,6 +507,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
        init_waitqueue_head(&ls->ls_uevent_wait);
        ls->ls_uevent_result = 0;
+        init_completion(&ls->ls_members_done);
+        ls->ls_members_result = -1;
        ls->ls_recoverd_task = NULL;
        mutex_init(&ls->ls_recoverd_active);
@@ -513,32 +545,49 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
        error = dlm_recoverd_start(ls);
        if (error) {
                log_error(ls, "can't start dlm_recoverd %d", error);
-                goto out_rcomfree;
+                goto out_delist;
        }
-        dlm_create_debug_file(ls);
        error = kobject_setup(ls);
        if (error)
-                goto out_del;
+                goto out_stop;
        error = kobject_register(&ls->ls_kobj);
        if (error)
-                goto out_del;
+                goto out_stop;
+        /* let kobject handle freeing of ls if there's an error */
+        do_unreg = 1;
+        /* This uevent triggers dlm_controld in userspace to add us to the
+           group of nodes that are members of this lockspace (managed by the
+           cluster infrastructure.)  Once it's done that, it tells us who the
+           current lockspace members are (via configfs) and then tells the
+           lockspace to start running (via sysfs) in dlm_ls_start(). */
        error = do_uevent(ls, 1);
        if (error)
-                goto out_unreg;
+                goto out_stop;
+        wait_for_completion(&ls->ls_members_done);
+        error = ls->ls_members_result;
+        if (error)
+                goto out_members;
+        dlm_create_debug_file(ls);
+        log_debug(ls, "join complete");
        *lockspace = ls;
        return 0;
- out_unreg:
+ out_members:
-        kobject_unregister(&ls->ls_kobj);
+        do_uevent(ls, 0);
- out_del:
+        dlm_clear_members(ls);
-        dlm_delete_debug_file(ls);
+        kfree(ls->ls_node_array);
+ out_stop:
        dlm_recoverd_stop(ls);
- out_rcomfree:
+ out_delist:
        spin_lock(&lslist_lock);
        list_del(&ls->ls_list);
        spin_unlock(&lslist_lock);
@@ -550,7 +599,10 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 out_rsbfree:
        kfree(ls->ls_rsbtbl);
 out_lsfree:
-        kfree(ls);
+        if (do_unreg)
+                kobject_unregister(&ls->ls_kobj);
+        else
+                kfree(ls);
 out:
        module_put(THIS_MODULE);
        return error;
@@ -570,6 +622,8 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace,
        error = new_lockspace(name, namelen, lockspace, flags, lvblen);
        if (!error)
                ls_count++;
+        else if (!ls_count)
+                threads_stop();
 out:
        mutex_unlock(&ls_lock);
        return error;
@@ -696,7 +750,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
        dlm_clear_members_gone(ls);
        kfree(ls->ls_node_array);
        kobject_unregister(&ls->ls_kobj);
-        /* The ls structure will be freed when the kobject is done with */
+        /* The ls structure will be freed when the kobject is done with */
        mutex_lock(&ls_lock);
        ls_count--;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 27970a58d29b..0553a6158dcb 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -260,7 +260,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
 static void lowcomms_data_ready(struct sock *sk, int count_unused)
 {
        struct connection *con = sock2con(sk);
-        if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
+        if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
                queue_work(recv_workqueue, &con->rwork);
 }
@@ -268,7 +268,7 @@ static void lowcomms_write_space(struct sock *sk)
 {
        struct connection *con = sock2con(sk);
-        if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+        if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags))
                queue_work(send_workqueue, &con->swork);
 }
@@ -720,11 +720,17 @@ static int tcp_accept_from_sock(struct connection *con)
                        INIT_WORK(&othercon->rwork, process_recv_sockets);
                        set_bit(CF_IS_OTHERCON, &othercon->flags);
                        newcon->othercon = othercon;
+                        othercon->sock = newsock;
+                        newsock->sk->sk_user_data = othercon;
+                        add_sock(newsock, othercon);
+                        addcon = othercon;
+                }
+                else {
+                        printk("Extra connection from node %d attempted\n", nodeid);
+                        result = -EAGAIN;
+                        mutex_unlock(&newcon->sock_mutex);
+                        goto accept_err;
                }
-                othercon->sock = newsock;
-                newsock->sk->sk_user_data = othercon;
-                add_sock(newsock, othercon);
-                addcon = othercon;
        }
        else {
                newsock->sk->sk_user_data = newcon;
@@ -1400,8 +1406,11 @@ void dlm_lowcomms_stop(void)
        down(&connections_lock);
        for (i = 0; i <= max_nodeid; i++) {
                con = __nodeid2con(i, 0);
-                if (con)
+                if (con) {
                        con->flags |= 0xFF;
+                        if (con->sock)
+                                con->sock->sk->sk_user_data = NULL;
+                }
        }
        up(&connections_lock);
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index 162fbae58fe5..eca2907f2386 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -25,6 +25,8 @@ void dlm_unregister_debugfs(void);
 static inline int dlm_register_debugfs(void) { return 0; }
 static inline void dlm_unregister_debugfs(void) { }
 #endif
+int dlm_netlink_init(void);
+void dlm_netlink_exit(void);
 static int __init init_dlm(void)
 {
@@ -50,10 +52,16 @@ static int __init init_dlm(void)
        if (error)
                goto out_debug;
+        error = dlm_netlink_init();
+        if (error)
+                goto out_user;
        printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
        return 0;
+ out_user:
+        dlm_user_exit();
 out_debug:
        dlm_unregister_debugfs();
 out_config:
@@ -68,6 +76,7 @@ static int __init init_dlm(void)
 static void __exit exit_dlm(void)
 {
+        dlm_netlink_exit();
        dlm_user_exit();
        dlm_config_exit();
        dlm_memory_exit();
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 85e2897bd740..073599dced2a 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -233,6 +233,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
        *neg_out = neg;
        error = ping_members(ls);
+        if (!error || error == -EPROTO) {
+                /* new_lockspace() may be waiting to know if the config
+                   is good or bad */
+                ls->ls_members_result = error;
+                complete(&ls->ls_members_done);
+        }
        if (error)
                goto out;
@@ -284,6 +290,9 @@ int dlm_ls_stop(struct dlm_ls *ls)
        dlm_recoverd_suspend(ls);
        ls->ls_recover_status = 0;
        dlm_recoverd_resume(ls);
+        if (!ls->ls_recover_begin)
+                ls->ls_recover_begin = jiffies;
        return 0;
 }
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
new file mode 100644
index 000000000000..863b87d0dc71
--- /dev/null
+++ b/fs/dlm/netlink.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+#include <net/genetlink.h>
+#include <linux/dlm.h>
+#include <linux/dlm_netlink.h>
+#include "dlm_internal.h"
+static uint32_t dlm_nl_seqnum;
+static uint32_t listener_nlpid;
+static struct genl_family family = {
+        .id             = GENL_ID_GENERATE,
+        .name           = DLM_GENL_NAME,
+        .version        = DLM_GENL_VERSION,
+};
+static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
+{
+        struct sk_buff *skb;
+        void *data;
+        skb = genlmsg_new(size, GFP_KERNEL);
+        if (!skb)
+                return -ENOMEM;
+        /* add the message headers */
+        data = genlmsg_put(skb, 0, dlm_nl_seqnum++, &family, 0, cmd);
+        if (!data) {
+                nlmsg_free(skb);
+                return -EINVAL;
+        }
+        *skbp = skb;
+        return 0;
+}
+static struct dlm_lock_data *mk_data(struct sk_buff *skb)
+{
+        struct nlattr *ret;
+        ret = nla_reserve(skb, DLM_TYPE_LOCK, sizeof(struct dlm_lock_data));
+        if (!ret)
+                return NULL;
+        return nla_data(ret);
+}
+static int send_data(struct sk_buff *skb)
+{
+        struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+        void *data = genlmsg_data(genlhdr);
+        int rv;
+        rv = genlmsg_end(skb, data);
+        if (rv < 0) {
+                nlmsg_free(skb);
+                return rv;
+        }
+        return genlmsg_unicast(skb, listener_nlpid);
+}
+static int user_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+        listener_nlpid = info->snd_pid;
+        printk("user_cmd nlpid %u\n", listener_nlpid);
+        return 0;
+}
+static struct genl_ops dlm_nl_ops = {
+        .cmd            = DLM_CMD_HELLO,
+        .doit           = user_cmd,
+};
+int dlm_netlink_init(void)
+{
+        int rv;
+        rv = genl_register_family(&family);
+        if (rv)
+                return rv;
+        rv = genl_register_ops(&family, &dlm_nl_ops);
+        if (rv < 0)
+                goto err;
+        return 0;
+ err:
+        genl_unregister_family(&family);
+        return rv;
+}
+void dlm_netlink_exit(void)
+{
+        genl_unregister_ops(&family, &dlm_nl_ops);
+        genl_unregister_family(&family);
+}
+static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
+{
+        struct dlm_rsb *r = lkb->lkb_resource;
+        struct dlm_user_args *ua = (struct dlm_user_args *) lkb->lkb_astparam;
+        memset(data, 0, sizeof(struct dlm_lock_data));
+        data->version = DLM_LOCK_DATA_VERSION;
+        data->nodeid = lkb->lkb_nodeid;
+        data->ownpid = lkb->lkb_ownpid;
+        data->id = lkb->lkb_id;
+        data->remid = lkb->lkb_remid;
+        data->status = lkb->lkb_status;
+        data->grmode = lkb->lkb_grmode;
+        data->rqmode = lkb->lkb_rqmode;
+        data->timestamp = lkb->lkb_timestamp;
+        if (ua)
+                data->xid = ua->xid;
+        if (r) {
+                data->lockspace_id = r->res_ls->ls_global_id;
+                data->resource_namelen = r->res_length;
+                memcpy(data->resource_name, r->res_name, r->res_length);
+        }
+}
+void dlm_timeout_warn(struct dlm_lkb *lkb)
+{
+        struct dlm_lock_data *data;
+        struct sk_buff *send_skb;
+        size_t size;
+        int rv;
+        size = nla_total_size(sizeof(struct dlm_lock_data)) +
+               nla_total_size(0); /* why this? */
+        rv = prepare_data(DLM_CMD_TIMEOUT, &send_skb, size);
+        if (rv < 0)
+                return;
+        data = mk_data(send_skb);
+        if (!data) {
+                nlmsg_free(send_skb);
+                return;
+        }
+        fill_data(data, lkb);
+        send_data(send_skb);
+}
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 6bfbd6153809..e3a1527cbdbe 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -38,7 +38,7 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
        char *mb;
        int mb_len = sizeof(struct dlm_rcom) + len;
-        mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+        mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
        if (!mh) {
                log_print("create_rcom to %d type %d len %d ENOBUFS",
                          to_nodeid, type, len);
@@ -90,7 +90,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
                log_error(ls, "version mismatch: %x nodeid %d: %x",
                          DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
                          rc->rc_header.h_version);
-                return -EINVAL;
+                return -EPROTO;
        }
        if (rf->rf_lvblen != ls->ls_lvblen ||
@@ -98,7 +98,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
                log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
                          ls->ls_lvblen, ls->ls_exflags,
                          nodeid, rf->rf_lvblen, rf->rf_lsflags);
-                return -EINVAL;
+                return -EPROTO;
        }
        return 0;
 }
@@ -386,7 +386,8 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
        dlm_recover_process_copy(ls, rc_in);
 }
-static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
+static int send_ls_not_ready(struct dlm_ls *ls, int nodeid,
+                             struct dlm_rcom *rc_in)
 {
        struct dlm_rcom *rc;
        struct rcom_config *rf;
@@ -394,7 +395,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
        char *mb;
        int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
-        mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb);
+        mh = dlm_lowcomms_get_buffer(nodeid, mb_len, ls->ls_allocation, &mb);
        if (!mh)
                return -ENOBUFS;
        memset(mb, 0, mb_len);
@@ -464,7 +465,7 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
                log_print("lockspace %x from %d type %x not found",
                          hd->h_lockspace, nodeid, rc->rc_type);
                if (rc->rc_type == DLM_RCOM_STATUS)
-                        send_ls_not_ready(nodeid, rc);
+                        send_ls_not_ready(ls, nodeid, rc);
                return;
        }
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 3cb636d60249..66575997861c 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -190,6 +190,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
        dlm_clear_members_gone(ls);
+        dlm_adjust_timeouts(ls);
        error = enable_locking(ls, rv->seq);
        if (error) {
                log_debug(ls, "enable_locking failed %d", error);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index b0201ec325a7..6438941ab1f8 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -33,16 +33,17 @@ static const struct file_operations device_fops;
 struct dlm_lock_params32 {
        __u8 mode;
        __u8 namelen;
-        __u16 flags;
+        __u16 unused;
+        __u32 flags;
        __u32 lkid;
        __u32 parent;
+        __u64 xid;
+        __u64 timeout;
        __u32 castparam;
        __u32 castaddr;
        __u32 bastparam;
        __u32 bastaddr;
        __u32 lksb;
        char lvb[DLM_USER_LVB_LEN];
        char name[0];
 };
@@ -68,6 +69,7 @@ struct dlm_lksb32 {
 };
 struct dlm_lock_result32 {
+        __u32 version[3];
        __u32 length;
        __u32 user_astaddr;
        __u32 user_astparam;
@@ -102,6 +104,8 @@ static void compat_input(struct dlm_write_request *kb,
                kb->i.lock.flags = kb32->i.lock.flags;
                kb->i.lock.lkid = kb32->i.lock.lkid;
                kb->i.lock.parent = kb32->i.lock.parent;
+                kb->i.lock.xid = kb32->i.lock.xid;
+                kb->i.lock.timeout = kb32->i.lock.timeout;
                kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
                kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
                kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
@@ -115,6 +119,10 @@ static void compat_input(struct dlm_write_request *kb,
 static void compat_output(struct dlm_lock_result *res,
                          struct dlm_lock_result32 *res32)
 {
+        res32->version[0] = res->version[0];
+        res32->version[1] = res->version[1];
+        res32->version[2] = res->version[2];
        res32->user_astaddr = (__u32)(long)res->user_astaddr;
        res32->user_astparam = (__u32)(long)res->user_astparam;
        res32->user_lksb = (__u32)(long)res->user_lksb;
@@ -130,6 +138,36 @@ static void compat_output(struct dlm_lock_result *res,
 }
 #endif
+/* Figure out if this lock is at the end of its life and no longer
+   available for the application to use.  The lkb still exists until
+   the final ast is read.  A lock becomes EOL in three situations:
+     1. a noqueue request fails with EAGAIN
+     2. an unlock completes with EUNLOCK
+     3. a cancel of a waiting request completes with ECANCEL/EDEADLK
+   An EOL lock needs to be removed from the process's list of locks.
+   And we can't allow any new operation on an EOL lock.  This is
+   not related to the lifetime of the lkb struct which is managed
+   entirely by refcount. */
+static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
+{
+        switch (sb_status) {
+        case -DLM_EUNLOCK:
+                return 1;
+        case -DLM_ECANCEL:
+        case -ETIMEDOUT:
+        case -EDEADLK:
+                if (lkb->lkb_grmode == DLM_LOCK_IV)
+                        return 1;
+                break;
+        case -EAGAIN:
+                if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
+                        return 1;
+                break;
+        }
+        return 0;
+}
 /* we could possibly check if the cancel of an orphan has resulted in the lkb
   being removed and then remove that lkb from the orphans list and free it */
@@ -176,25 +214,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
                log_debug(ls, "ast overlap %x status %x %x",
                          lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
-        /* Figure out if this lock is at the end of its life and no longer
+        eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
-           available for the application to use.  The lkb still exists until
-           the final ast is read.  A lock becomes EOL in three situations:
-             1. a noqueue request fails with EAGAIN
-             2. an unlock completes with EUNLOCK
-             3. a cancel of a waiting request completes with ECANCEL
-           An EOL lock needs to be removed from the process's list of locks.
-           And we can't allow any new operation on an EOL lock.  This is
-           not related to the lifetime of the lkb struct which is managed
-           entirely by refcount. */
-        if (type == AST_COMP &&
-            lkb->lkb_grmode == DLM_LOCK_IV &&
-            ua->lksb.sb_status == -EAGAIN)
-                eol = 1;
-        else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
-            (ua->lksb.sb_status == -DLM_ECANCEL &&
-             lkb->lkb_grmode == DLM_LOCK_IV))
-                eol = 1;
        if (eol) {
                lkb->lkb_ast_type &= ~AST_BAST;
                lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
@@ -252,16 +272,18 @@ static int device_user_lock(struct dlm_user_proc *proc,
        ua->castaddr = params->castaddr;
        ua->bastparam = params->bastparam;
        ua->bastaddr = params->bastaddr;
+        ua->xid = params->xid;
        if (params->flags & DLM_LKF_CONVERT)
                error = dlm_user_convert(ls, ua,
                                         params->mode, params->flags,
-                                         params->lkid, params->lvb);
+                                         params->lkid, params->lvb,
+                                         (unsigned long) params->timeout);
        else {
                error = dlm_user_request(ls, ua,
                                         params->mode, params->flags,
                                         params->name, params->namelen,
-                                         params->parent);
+                                         (unsigned long) params->timeout);
                if (!error)
                        error = ua->lksb.sb_lkid;
        }
@@ -299,6 +321,22 @@ static int device_user_unlock(struct dlm_user_proc *proc,
        return error;
 }
+static int device_user_deadlock(struct dlm_user_proc *proc,
+                                struct dlm_lock_params *params)
+{
+        struct dlm_ls *ls;
+        int error;
+        ls = dlm_find_lockspace_local(proc->lockspace);
+        if (!ls)
+                return -ENOENT;
+        error = dlm_user_deadlock(ls, params->flags, params->lkid);
+        dlm_put_lockspace(ls);
+        return error;
+}
 static int create_misc_device(struct dlm_ls *ls, char *name)
 {
        int error, len;
@@ -348,7 +386,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
                return -EPERM;
        error = dlm_new_lockspace(params->name, strlen(params->name),
-                                  &lockspace, 0, DLM_USER_LVB_LEN);
+                                  &lockspace, params->flags, DLM_USER_LVB_LEN);
        if (error)
                return error;
@@ -524,6 +562,14 @@ static ssize_t device_write(struct file *file, const char __user *buf,
                error = device_user_unlock(proc, &kbuf->i.lock);
                break;
+        case DLM_USER_DEADLOCK:
+                if (!proc) {
+                        log_print("no locking on control device");
+                        goto out_sig;
+                }
+                error = device_user_deadlock(proc, &kbuf->i.lock);
+                break;
        case DLM_USER_CREATE_LOCKSPACE:
                if (proc) {
                        log_print("create/remove only on control device");
@@ -641,6 +687,9 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
        int struct_len;
        memset(&result, 0, sizeof(struct dlm_lock_result));
+        result.version[0] = DLM_DEVICE_VERSION_MAJOR;
+        result.version[1] = DLM_DEVICE_VERSION_MINOR;
+        result.version[2] = DLM_DEVICE_VERSION_PATCH;
        memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
        result.user_lksb = ua->user_lksb;
@@ -699,6 +748,20 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
        return error;
 }
+static int copy_version_to_user(char __user *buf, size_t count)
+{
+        struct dlm_device_version ver;
+        memset(&ver, 0, sizeof(struct dlm_device_version));
+        ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
+        ver.version[1] = DLM_DEVICE_VERSION_MINOR;
+        ver.version[2] = DLM_DEVICE_VERSION_PATCH;
+        if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
+                return -EFAULT;
+        return sizeof(struct dlm_device_version);
+}
 /* a read returns a single ast described in a struct dlm_lock_result */
 static ssize_t device_read(struct file *file, char __user *buf, size_t count,
@@ -710,6 +773,16 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
        DECLARE_WAITQUEUE(wait, current);
        int error, type=0, bmode=0, removed = 0;
+        if (count == sizeof(struct dlm_device_version)) {
+                error = copy_version_to_user(buf, count);
+                return error;
+        }
+        if (!proc) {
+                log_print("non-version read from control device %zu", count);
+                return -EINVAL;
+        }
 #ifdef CONFIG_COMPAT
        if (count < sizeof(struct dlm_lock_result32))
 #else
@@ -747,11 +820,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
                }
        }
-        if (list_empty(&proc->asts)) {
-                spin_unlock(&proc->asts_spin);
-                return -EAGAIN;
-        }
        /* there may be both completion and blocking asts to return for
           the lkb, don't remove lkb from asts list unless no asts remain */
@@ -823,6 +891,7 @@ static const struct file_operations device_fops = {
 static const struct file_operations ctl_device_fops = {
        .open    = ctl_device_open,
        .release = ctl_device_close,
+        .read    = device_read,
        .write   = device_write,
        .owner   = THIS_MODULE,
 };
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 403e3bad1455..1b9dd9a96f19 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -580,5 +580,7 @@ void
 ecryptfs_write_header_metadata(char *virt,
                               struct ecryptfs_crypt_stat *crypt_stat,
                               size_t *written);
+int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start,
+                         int num_zeros);
 #endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 59288d817078..94f456fe4d9b 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -338,16 +338,17 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
        return rc;
 }
-static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos,
+static ssize_t ecryptfs_splice_read(struct file *file, loff_t * ppos,
-                                 size_t count, read_actor_t actor, void *target)
+                                    struct pipe_inode_info *pipe, size_t count,
+                                    unsigned int flags)
 {
        struct file *lower_file = NULL;
        int rc = -EINVAL;
        lower_file = ecryptfs_file_to_lower(file);
-        if (lower_file->f_op && lower_file->f_op->sendfile)
+        if (lower_file->f_op && lower_file->f_op->splice_read)
-                rc = lower_file->f_op->sendfile(lower_file, ppos, count,
+                rc = lower_file->f_op->splice_read(lower_file, ppos, pipe,
-                                                actor, target);
+                                                count, flags);
        return rc;
 }
@@ -364,7 +365,7 @@ const struct file_operations ecryptfs_dir_fops = {
        .release = ecryptfs_release,
        .fsync = ecryptfs_fsync,
        .fasync = ecryptfs_fasync,
-        .sendfile = ecryptfs_sendfile,
+        .splice_read = ecryptfs_splice_read,
 };
 const struct file_operations ecryptfs_main_fops = {
@@ -381,7 +382,7 @@ const struct file_operations ecryptfs_main_fops = {
        .release = ecryptfs_release,
        .fsync = ecryptfs_fsync,
        .fasync = ecryptfs_fasync,
-        .sendfile = ecryptfs_sendfile,
+        .splice_read = ecryptfs_splice_read,
 };
 static int
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 1548be26b5e6..83e94fedd4e9 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -800,6 +800,25 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
                        goto out_fput;
                }
        } else { /* new_length < i_size_read(inode) */
+                pgoff_t index = 0;
+                int end_pos_in_page = -1;
+                if (new_length != 0) {
+                        index = ((new_length - 1) >> PAGE_CACHE_SHIFT);
+                        end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK);
+                }
+                if (end_pos_in_page != (PAGE_CACHE_SIZE - 1)) {
+                        if ((rc = ecryptfs_write_zeros(&fake_ecryptfs_file,
+                                                       index,
+                                                       (end_pos_in_page + 1),
+                                                       ((PAGE_CACHE_SIZE - 1)
+                                                        - end_pos_in_page)))) {
+                                printk(KERN_ERR "Error attempting to zero out "
+                                       "the remainder of the end page on "
+                                       "reducing truncate; rc = [%d]\n", rc);
+                                goto out_fput;
+                        }
+                }
                vmtruncate(inode, new_length);
                rc = ecryptfs_write_inode_size_to_metadata(
                        lower_file, lower_dentry->d_inode, inode, dentry,
@@ -875,9 +894,54 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
        struct ecryptfs_crypt_stat *crypt_stat;
        crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
-        lower_dentry = ecryptfs_dentry_to_lower(dentry);
+        if (!(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
+                ecryptfs_init_crypt_stat(crypt_stat);
        inode = dentry->d_inode;
        lower_inode = ecryptfs_inode_to_lower(inode);
+        lower_dentry = ecryptfs_dentry_to_lower(dentry);
+        mutex_lock(&crypt_stat->cs_mutex);
+        if (S_ISDIR(dentry->d_inode->i_mode))
+                crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
+        else if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
+                 || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
+                struct vfsmount *lower_mnt;
+                struct file *lower_file = NULL;
+                struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
+                int lower_flags;
+                lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
+                lower_flags = O_RDONLY;
+                if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry,
+                                                   lower_mnt, lower_flags))) {
+                        printk(KERN_ERR
+                               "Error opening lower file; rc = [%d]\n", rc);
+                        mutex_unlock(&crypt_stat->cs_mutex);
+                        goto out;
+                }
+                mount_crypt_stat = &ecryptfs_superblock_to_private(
+                        dentry->d_sb)->mount_crypt_stat;
+                if ((rc = ecryptfs_read_metadata(dentry, lower_file))) {
+                        if (!(mount_crypt_stat->flags
+                              & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
+                                rc = -EIO;
+                                printk(KERN_WARNING "Attempt to read file that "
+                                       "is not in a valid eCryptfs format, "
+                                       "and plaintext passthrough mode is not "
+                                       "enabled; returning -EIO\n");
+                                mutex_unlock(&crypt_stat->cs_mutex);
+                                fput(lower_file);
+                                goto out;
+                        }
+                        rc = 0;
+                        crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
+                        mutex_unlock(&crypt_stat->cs_mutex);
+                        fput(lower_file);
+                        goto out;
+                }
+                fput(lower_file);
+        }
+        mutex_unlock(&crypt_stat->cs_mutex);
        if (ia->ia_valid & ATTR_SIZE) {
                ecryptfs_printk(KERN_DEBUG,
                                "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n",
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 606128f5c927..02ca6f1e55d7 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -840,8 +840,6 @@ static int __init ecryptfs_init(void)
                goto out;
        }
        kobj_set_kset_s(&ecryptfs_subsys, fs_subsys);
-        sysfs_attr_version.attr.owner = THIS_MODULE;
-        sysfs_attr_version_str.attr.owner = THIS_MODULE;
        rc = do_sysfs_registration();
        if (rc) {
                printk(KERN_ERR "sysfs registration failed\n");
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 55cec98a84e7..7d5a43cb0d5c 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -56,9 +56,6 @@ static struct page *ecryptfs_get1page(struct file *file, int index)
        return read_mapping_page(mapping, index, (void *)file);
 }
-static
-int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros);
 /**
 * ecryptfs_fill_zeros
 * @file: The ecryptfs file
@@ -101,10 +98,13 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
        if (old_end_page_index == new_end_page_index) {
                /* Start and end are in the same page; we just need to
                 * set a portion of the existing page to zero's */
-                rc = write_zeros(file, index, (old_end_pos_in_page + 1),
+                rc = ecryptfs_write_zeros(file, index,
-                                 (new_end_pos_in_page - old_end_pos_in_page));
+                                          (old_end_pos_in_page + 1),
+                                          (new_end_pos_in_page
+                                           - old_end_pos_in_page));
                if (rc)
-                        ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
+                        ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros("
+                                        "file=[%p], "
                                        "index=[0x%.16x], "
                                        "old_end_pos_in_page=[d], "
                                        "(PAGE_CACHE_SIZE - new_end_pos_in_page"
@@ -117,10 +117,10 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
                goto out;
        }
        /* Fill the remainder of the previous last page with zeros */
-        rc = write_zeros(file, index, (old_end_pos_in_page + 1),
+        rc = ecryptfs_write_zeros(file, index, (old_end_pos_in_page + 1),
                         ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page));
        if (rc) {
-                ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
+                ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file=[%p], "
                                "index=[0x%.16x], old_end_pos_in_page=[d], "
                                "(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) "
                                "returned [%d]\n", file, index,
@@ -131,9 +131,10 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
        index++;
        while (index < new_end_page_index) {
                /* Fill all intermediate pages with zeros */
-                rc = write_zeros(file, index, 0, PAGE_CACHE_SIZE);
+                rc = ecryptfs_write_zeros(file, index, 0, PAGE_CACHE_SIZE);
                if (rc) {
-                        ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
+                        ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros("
+                                        "file=[%p], "
                                        "index=[0x%.16x], "
                                        "old_end_pos_in_page=[d], "
                                        "(PAGE_CACHE_SIZE - new_end_pos_in_page"
@@ -149,9 +150,9 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
        }
        /* Fill the portion at the beginning of the last new page with
         * zero's */
-        rc = write_zeros(file, index, 0, (new_end_pos_in_page + 1));
+        rc = ecryptfs_write_zeros(file, index, 0, (new_end_pos_in_page + 1));
        if (rc) {
-                ecryptfs_printk(KERN_ERR, "write_zeros(file="
+                ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file="
                                "[%p], index=[0x%.16x], 0, "
                                "new_end_pos_in_page=[%d]"
                                "returned [%d]\n", file, index,
@@ -400,7 +401,6 @@ out:
 static int ecryptfs_prepare_write(struct file *file, struct page *page,
                                  unsigned from, unsigned to)
 {
-        loff_t pos;
        int rc = 0;
        if (from == 0 && to == PAGE_CACHE_SIZE)
@@ -408,15 +408,22 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
                                   up to date. */
        if (!PageUptodate(page))
                rc = ecryptfs_do_readpage(file, page, page->index);
-        pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+        if (page->index != 0) {
-        if (pos > i_size_read(page->mapping->host)) {
+                loff_t end_of_prev_pg_pos =
-                rc = ecryptfs_truncate(file->f_path.dentry, pos);
+                        (((loff_t)page->index << PAGE_CACHE_SHIFT) - 1);
-                if (rc) {
-                        printk(KERN_ERR "Error on attempt to "
+                if (end_of_prev_pg_pos > i_size_read(page->mapping->host)) {
-                               "truncate to (higher) offset [%lld];"
+                        rc = ecryptfs_truncate(file->f_path.dentry,
-                               " rc = [%d]\n", pos, rc);
+                                               end_of_prev_pg_pos);
-                        goto out;
+                        if (rc) {
+                                printk(KERN_ERR "Error on attempt to "
+                                       "truncate to (higher) offset [%lld];"
+                                       " rc = [%d]\n", end_of_prev_pg_pos, rc);
+                                goto out;
+                        }
                }
+                if (end_of_prev_pg_pos + 1 > i_size_read(page->mapping->host))
+                        zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
        }
 out:
        return rc;
@@ -753,7 +760,7 @@ out:
 }
 /**
- * write_zeros
+ * ecryptfs_write_zeros
 * @file: The ecryptfs file
 * @index: The index in which we are writing
 * @start: The position after the last block of data
@@ -763,8 +770,8 @@ out:
 *
 * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE
 */
-static
+int
-int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
+ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
 {
        int rc = 0;
        struct page *tmp_page;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 566d4e2d3852..04afeecaaef3 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -53,7 +53,6 @@ const struct file_operations ext2_file_operations = {
        .open           = generic_file_open,
        .release        = ext2_release_file,
        .fsync          = ext2_sync_file,
-        .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
 };
@@ -71,7 +70,6 @@ const struct file_operations ext2_xip_file_operations = {
        .open           = generic_file_open,
        .release        = ext2_release_file,
        .fsync          = ext2_sync_file,
-        .sendfile       = xip_file_sendfile,
 };
 #endif
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 16337bff0272..5de5061eb331 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1038,6 +1038,15 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
                ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+        ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
+                                    EXT2_MOUNT_XIP if not */
+        if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) {
+                printk("XIP: Unsupported blocksize\n");
+                err = -EINVAL;
+                goto restore_opts;
+        }
        es = sbi->s_es;
        if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
            (old_mount_opt & EXT2_MOUNT_XIP)) &&
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 1e6f13864536..acc4913d3019 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext3_file_operations = {
        .open           = generic_file_open,
        .release        = ext3_release_file,
        .fsync          = ext3_sync_file,
-        .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
 };
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a6cb6171c3af..2a85ddee4740 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2677,8 +2677,10 @@ void ext3_read_inode(struct inode * inode)
                 */
                ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
-                    EXT3_INODE_SIZE(inode->i_sb))
+                    EXT3_INODE_SIZE(inode->i_sb)) {
+                        brelse (bh);
                        goto bad_inode;
+                }
                if (ei->i_extra_isize == 0) {
                        /* The extra space is currently unused. Use it. */
                        ei->i_extra_isize = sizeof(struct ext3_inode) -
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 8a23483ca8d0..3b64bb16c727 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -30,15 +30,15 @@
 void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
                unsigned long *blockgrpp, ext4_grpblk_t *offsetp)
 {
-        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
        ext4_grpblk_t offset;
-        blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
+        blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
        offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb));
        if (offsetp)
                *offsetp = offset;
        if (blockgrpp)
-                *blockgrpp = blocknr;
+                *blockgrpp = blocknr;
 }
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a0f0c04e79b2..b9ce24129070 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -374,7 +374,7 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc
                                       le32_to_cpu(ix[-1].ei_block));
                        }
                        BUG_ON(k && le32_to_cpu(ix->ei_block)
-                                           <= le32_to_cpu(ix[-1].ei_block));
+                                           <= le32_to_cpu(ix[-1].ei_block));
                        if (block < le32_to_cpu(ix->ei_block))
                                break;
                        chix = ix;
@@ -423,8 +423,8 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
        path->p_ext = l - 1;
        ext_debug("  -> %d:%llu:%d ",
-                        le32_to_cpu(path->p_ext->ee_block),
+                        le32_to_cpu(path->p_ext->ee_block),
-                        ext_pblock(path->p_ext),
+                        ext_pblock(path->p_ext),
                        le16_to_cpu(path->p_ext->ee_len));
 #ifdef CHECK_BINSEARCH
@@ -435,7 +435,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
                chex = ex = EXT_FIRST_EXTENT(eh);
                for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
                        BUG_ON(k && le32_to_cpu(ex->ee_block)
-                                          <= le32_to_cpu(ex[-1].ee_block));
+                                          <= le32_to_cpu(ex[-1].ee_block));
                        if (block < le32_to_cpu(ex->ee_block))
                                break;
                        chex = ex;
@@ -577,7 +577,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
        curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);
        BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
-                             > le16_to_cpu(curp->p_hdr->eh_max));
+                             > le16_to_cpu(curp->p_hdr->eh_max));
        BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr));
        err = ext4_ext_dirty(handle, inode, curp);
@@ -621,12 +621,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                border = path[depth].p_ext[1].ee_block;
                ext_debug("leaf will be split."
                                " next leaf starts at %d\n",
-                                  le32_to_cpu(border));
+                                  le32_to_cpu(border));
        } else {
                border = newext->ee_block;
                ext_debug("leaf will be added."
                                " next leaf starts at %d\n",
-                                le32_to_cpu(border));
+                                le32_to_cpu(border));
        }
        /*
@@ -684,9 +684,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
        while (path[depth].p_ext <=
                        EXT_MAX_EXTENT(path[depth].p_hdr)) {
                ext_debug("move %d:%llu:%d in new leaf %llu\n",
-                                le32_to_cpu(path[depth].p_ext->ee_block),
+                                le32_to_cpu(path[depth].p_ext->ee_block),
-                                ext_pblock(path[depth].p_ext),
+                                ext_pblock(path[depth].p_ext),
-                                le16_to_cpu(path[depth].p_ext->ee_len),
+                                le16_to_cpu(path[depth].p_ext->ee_len),
                                newblock);
                /*memmove(ex++, path[depth].p_ext++,
                                sizeof(struct ext4_extent));
@@ -765,9 +765,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                                EXT_LAST_INDEX(path[i].p_hdr));
                while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
                        ext_debug("%d: move %d:%d in new index %llu\n", i,
-                                        le32_to_cpu(path[i].p_idx->ei_block),
+                                        le32_to_cpu(path[i].p_idx->ei_block),
-                                        idx_pblock(path[i].p_idx),
+                                        idx_pblock(path[i].p_idx),
-                                        newblock);
+                                        newblock);
                        /*memmove(++fidx, path[i].p_idx++,
                                        sizeof(struct ext4_extent_idx));
                        neh->eh_entries++;
@@ -1128,6 +1128,55 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 }
 /*
+ * check if a portion of the "newext" extent overlaps with an
+ * existing extent.
+ *
+ * If there is an overlap discovered, it updates the length of the newext
+ * such that there will be no overlap, and then returns 1.
+ * If there is no overlap found, it returns 0.
+ */
+unsigned int ext4_ext_check_overlap(struct inode *inode,
+                                    struct ext4_extent *newext,
+                                    struct ext4_ext_path *path)
+{
+        unsigned long b1, b2;
+        unsigned int depth, len1;
+        unsigned int ret = 0;
+        b1 = le32_to_cpu(newext->ee_block);
+        len1 = le16_to_cpu(newext->ee_len);
+        depth = ext_depth(inode);
+        if (!path[depth].p_ext)
+                goto out;
+        b2 = le32_to_cpu(path[depth].p_ext->ee_block);
+        /*
+         * get the next allocated block if the extent in the path
+         * is before the requested block(s) 
+         */
+        if (b2 < b1) {
+                b2 = ext4_ext_next_allocated_block(path);
+                if (b2 == EXT_MAX_BLOCK)
+                        goto out;
+        }
+        /* check for wrap through zero */
+        if (b1 + len1 < b1) {
+                len1 = EXT_MAX_BLOCK - b1;
+                newext->ee_len = cpu_to_le16(len1);
+                ret = 1;
+        }
+        /* check for overlap */
+        if (b1 + len1 > b2) {
+                newext->ee_len = cpu_to_le16(b2 - b1);
+                ret = 1;
+        }
+out:
+        return ret;
+}
+/*
 * ext4_ext_insert_extent:
 * tries to merge requsted extent into the existing extent or
 * inserts requested extent as new one into the tree,
@@ -1212,12 +1261,12 @@ has_space:
        if (!nearex) {
                /* there is no extent in this leaf, create first one */
                ext_debug("first extent in the leaf: %d:%llu:%d\n",
-                                le32_to_cpu(newext->ee_block),
+                                le32_to_cpu(newext->ee_block),
-                                ext_pblock(newext),
+                                ext_pblock(newext),
-                                le16_to_cpu(newext->ee_len));
+                                le16_to_cpu(newext->ee_len));
                path[depth].p_ext = EXT_FIRST_EXTENT(eh);
        } else if (le32_to_cpu(newext->ee_block)
-                           > le32_to_cpu(nearex->ee_block)) {
+                           > le32_to_cpu(nearex->ee_block)) {
 /*              BUG_ON(newext->ee_block == nearex->ee_block); */
                if (nearex != EXT_LAST_EXTENT(eh)) {
                        len = EXT_MAX_EXTENT(eh) - nearex;
@@ -1225,9 +1274,9 @@ has_space:
                        len = len < 0 ? 0 : len;
                        ext_debug("insert %d:%llu:%d after: nearest 0x%p, "
                                        "move %d from 0x%p to 0x%p\n",
-                                        le32_to_cpu(newext->ee_block),
+                                        le32_to_cpu(newext->ee_block),
-                                        ext_pblock(newext),
+                                        ext_pblock(newext),
-                                        le16_to_cpu(newext->ee_len),
+                                        le16_to_cpu(newext->ee_len),
                                        nearex, len, nearex + 1, nearex + 2);
                        memmove(nearex + 2, nearex + 1, len);
                }
@@ -1358,9 +1407,9 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
                        cbex.ec_start = 0;
                        cbex.ec_type = EXT4_EXT_CACHE_GAP;
                } else {
-                        cbex.ec_block = le32_to_cpu(ex->ee_block);
+                        cbex.ec_block = le32_to_cpu(ex->ee_block);
-                        cbex.ec_len = le16_to_cpu(ex->ee_len);
+                        cbex.ec_len = le16_to_cpu(ex->ee_len);
-                        cbex.ec_start = ext_pblock(ex);
+                        cbex.ec_start = ext_pblock(ex);
                        cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
                }
@@ -1431,16 +1480,16 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
                len = le32_to_cpu(ex->ee_block) - block;
                ext_debug("cache gap(before): %lu [%lu:%lu]",
                                (unsigned long) block,
-                                (unsigned long) le32_to_cpu(ex->ee_block),
+                                (unsigned long) le32_to_cpu(ex->ee_block),
-                                (unsigned long) le16_to_cpu(ex->ee_len));
+                                (unsigned long) le16_to_cpu(ex->ee_len));
        } else if (block >= le32_to_cpu(ex->ee_block)
-                            + le16_to_cpu(ex->ee_len)) {
+                            + le16_to_cpu(ex->ee_len)) {
-                lblock = le32_to_cpu(ex->ee_block)
+                lblock = le32_to_cpu(ex->ee_block)
-                         + le16_to_cpu(ex->ee_len);
+                         + le16_to_cpu(ex->ee_len);
                len = ext4_ext_next_allocated_block(path);
                ext_debug("cache gap(after): [%lu:%lu] %lu",
-                                (unsigned long) le32_to_cpu(ex->ee_block),
+                                (unsigned long) le32_to_cpu(ex->ee_block),
-                                (unsigned long) le16_to_cpu(ex->ee_len),
+                                (unsigned long) le16_to_cpu(ex->ee_len),
                                (unsigned long) block);
                BUG_ON(len == lblock);
                len = len - lblock;
@@ -1468,9 +1517,9 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block,
        BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
                        cex->ec_type != EXT4_EXT_CACHE_EXTENT);
        if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
-                ex->ee_block = cpu_to_le32(cex->ec_block);
+                ex->ee_block = cpu_to_le32(cex->ec_block);
                ext4_ext_store_pblock(ex, cex->ec_start);
-                ex->ee_len = cpu_to_le16(cex->ec_len);
+                ex->ee_len = cpu_to_le16(cex->ec_len);
                ext_debug("%lu cached by %lu:%lu:%llu\n",
                                (unsigned long) block,
                                (unsigned long) cex->ec_block,
@@ -1956,9 +2005,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                        /* we should allocate requested block */
                } else if (goal == EXT4_EXT_CACHE_EXTENT) {
                        /* block is already allocated */
-                        newblock = iblock
+                        newblock = iblock
-                                   - le32_to_cpu(newex.ee_block)
+                                   - le32_to_cpu(newex.ee_block)
-                                   + ext_pblock(&newex);
+                                   + ext_pblock(&newex);
                        /* number of remaining blocks in the extent */
                        allocated = le16_to_cpu(newex.ee_len) -
                                        (iblock - le32_to_cpu(newex.ee_block));
@@ -1987,7 +2036,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        ex = path[depth].p_ext;
        if (ex) {
-                unsigned long ee_block = le32_to_cpu(ex->ee_block);
+                unsigned long ee_block = le32_to_cpu(ex->ee_block);
                ext4_fsblk_t ee_start = ext_pblock(ex);
                unsigned short ee_len  = le16_to_cpu(ex->ee_len);
@@ -2000,7 +2049,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                if (ee_len > EXT_MAX_LEN)
                        goto out2;
                /* if found extent covers block, simply return it */
-                if (iblock >= ee_block && iblock < ee_block + ee_len) {
+                if (iblock >= ee_block && iblock < ee_block + ee_len) {
                        newblock = iblock - ee_block + ee_start;
                        /* number of remaining blocks in the extent */
                        allocated = ee_len - (iblock - ee_block);
@@ -2031,7 +2080,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        /* allocate new block */
        goal = ext4_ext_find_goal(inode, path, iblock);
-        allocated = max_blocks;
+        /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
+        newex.ee_block = cpu_to_le32(iblock);
+        newex.ee_len = cpu_to_le16(max_blocks);
+        err = ext4_ext_check_overlap(inode, &newex, path);
+        if (err)
+                allocated = le16_to_cpu(newex.ee_len);
+        else
+                allocated = max_blocks;
        newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
        if (!newblock)
                goto out2;
@@ -2039,12 +2096,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                        goal, newblock, allocated);
        /* try to insert new extent into found leaf and return */
-        newex.ee_block = cpu_to_le32(iblock);
        ext4_ext_store_pblock(&newex, newblock);
        newex.ee_len = cpu_to_le16(allocated);
        err = ext4_ext_insert_extent(handle, inode, path, &newex);
-        if (err)
+        if (err) {
+                /* free data blocks we just allocated */
+                ext4_free_blocks(handle, inode, ext_pblock(&newex),
+                                        le16_to_cpu(newex.ee_len));
                goto out2;
+        }
        if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize)
                EXT4_I(inode)->i_disksize = inode->i_size;
@@ -2157,11 +2217,3 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
        return needed;
 }
-EXPORT_SYMBOL(ext4_mark_inode_dirty);
-EXPORT_SYMBOL(ext4_ext_invalidate_cache);
-EXPORT_SYMBOL(ext4_ext_insert_extent);
-EXPORT_SYMBOL(ext4_ext_walk_space);
-EXPORT_SYMBOL(ext4_ext_find_goal);
-EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3c6c1fd2be90..d4c8186aed64 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext4_file_operations = {
        .open           = generic_file_open,
        .release        = ext4_release_file,
        .fsync          = ext4_sync_file,
-        .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
 };
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b34182b6ee4d..8416fa28c422 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -255,8 +255,8 @@ static int verify_chain(Indirect *from, Indirect *to)
 *      @inode: inode in question (we are only interested in its superblock)
 *      @i_block: block number to be parsed
 *      @offsets: array to store the offsets in
- *      @boundary: set this non-zero if the referred-to block is likely to be
+ *      @boundary: set this non-zero if the referred-to block is likely to be
- *             followed (on disk) by an indirect block.
+ *             followed (on disk) by an indirect block.
 *
 *      To store the locations of file's data ext4 uses a data structure common
 *      for UNIX filesystems - tree of pointers anchored in the inode, with
@@ -2673,8 +2673,10 @@ void ext4_read_inode(struct inode * inode)
                 */
                ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
-                    EXT4_INODE_SIZE(inode->i_sb))
+                    EXT4_INODE_SIZE(inode->i_sb)) {
+                        brelse (bh);
                        goto bad_inode;
+                }
                if (ei->i_extra_isize == 0) {
                        /* The extra space is currently unused. Use it. */
                        ei->i_extra_isize = sizeof(struct ext4_inode) -
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 4ec57be5baf5..2811e5720ad0 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -46,7 +46,7 @@
 */
 #define NAMEI_RA_CHUNKS  2
 #define NAMEI_RA_BLOCKS  4
-#define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
+#define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
 #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
 static struct buffer_head *ext4_append(handle_t *handle,
@@ -241,7 +241,7 @@ static inline unsigned dx_node_limit (struct inode *dir)
 static void dx_show_index (char * label, struct dx_entry *entries)
 {
        int i, n = dx_get_count (entries);
-        printk("%s index ", label);
+        printk("%s index ", label);
        for (i = 0; i < n; i++) {
                printk("%x->%u ", i? dx_get_hash(entries + i) :
                                0, dx_get_block(entries + i));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cb9afdd0e26e..175b68c60968 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1985,7 +1985,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
        if (bd_claim(bdev, sb)) {
                printk(KERN_ERR
-                        "EXT4: failed to claim external journal device.\n");
+                        "EXT4: failed to claim external journal device.\n");
                blkdev_put(bdev);
                return NULL;
        }
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 55d3c7461c5b..69a83b59dce8 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -134,7 +134,7 @@ const struct file_operations fat_file_operations = {
        .release        = fat_file_release,
        .ioctl          = fat_generic_ioctl,
        .fsync          = file_fsync,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 static int fat_cont_expand(struct inode *inode, loff_t size)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index adf7995232b8..f79de7c8cdfa 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -802,7 +802,7 @@ static const struct file_operations fuse_file_operations = {
        .release        = fuse_release,
        .fsync          = fuse_fsync,
        .lock           = fuse_file_lock,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 static const struct file_operations fuse_direct_io_file_operations = {
@@ -814,7 +814,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
        .release        = fuse_release,
        .fsync          = fuse_fsync,
        .lock           = fuse_file_lock,
-        /* no mmap and sendfile */
+        /* no mmap and splice_read */
 };
 static const struct address_space_operations fuse_file_aops  = {
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9804c0cdcb42..cc5efc13496a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -655,10 +655,9 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type,
 static struct file_system_type fuseblk_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "fuseblk",
-        .fs_flags       = FS_HAS_SUBTYPE,
        .get_sb         = fuse_get_sb_blk,
        .kill_sb        = kill_block_super,
-        .fs_flags       = FS_REQUIRES_DEV,
+        .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
 };
 static inline int register_fuseblk(void)
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index e3f1ada643ac..04ad0caebedb 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
        glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
-        mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
+        mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
        ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
        recovery.o rgrp.o super.o sys.o trans.o util.o
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index c53a5d2d0590..cd805a66880d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -718,7 +718,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        for (x = 0; x < rlist.rl_rgrps; x++) {
                struct gfs2_rgrpd *rgd;
                rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-                rg_blocks += rgd->rd_ri.ri_length;
+                rg_blocks += rgd->rd_length;
        }
        error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
@@ -772,7 +772,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
                        gfs2_free_data(ip, bstart, blen);
        }
-        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_dinode_out(ip, dibh->b_data);
@@ -824,7 +824,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
                goto out_gunlock_q;
        error = gfs2_trans_begin(sdp,
-                        sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
+                        sdp->sd_max_height + al->al_rgd->rd_length +
                        RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
        if (error)
                goto out_ipres;
@@ -847,7 +847,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
        }
        ip->i_di.di_size = size;
-        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (error)
@@ -885,7 +885,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
        unsigned blocksize, iblock, length, pos;
        struct buffer_head *bh;
        struct page *page;
-        void *kaddr;
        int err;
        page = grab_cache_page(mapping, index);
@@ -928,15 +927,13 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
                /* Uhhuh. Read error. Complain and punt. */
                if (!buffer_uptodate(bh))
                        goto unlock;
+                err = 0;
        }
        if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
                gfs2_trans_add_bh(ip->i_gl, bh, 0);
-        kaddr = kmap_atomic(page, KM_USER0);
+        zero_user_page(page, offset, length, KM_USER0);
-        memset(kaddr + offset, 0, length);
-        flush_dcache_page(page);
-        kunmap_atomic(kaddr, KM_USER0);
 unlock:
        unlock_page(page);
@@ -962,7 +959,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
        if (gfs2_is_stuffed(ip)) {
                ip->i_di.di_size = size;
-                ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@@ -974,7 +971,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
                if (!error) {
                        ip->i_di.di_size = size;
-                        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
                        ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
                        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                        gfs2_dinode_out(ip, dibh->b_data);
@@ -1044,10 +1041,10 @@ static int trunc_end(struct gfs2_inode *ip)
                ip->i_di.di_height = 0;
                ip->i_di.di_goal_meta =
                        ip->i_di.di_goal_data =
-                        ip->i_num.no_addr;
+                        ip->i_no_addr;
                gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
        }
-        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
        ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index 683cb5bda870..3548d9f31e0d 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
+#include <linux/freezer.h>
 #include "gfs2.h"
 #include "incore.h"
@@ -49,6 +50,8 @@ int gfs2_scand(void *data)
        while (!kthread_should_stop()) {
                gfs2_scand_internal(sdp);
                t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
+                if (freezing(current))
+                        refrigerator();
                schedule_timeout_interruptible(t);
        }
@@ -74,6 +77,8 @@ int gfs2_glockd(void *data)
                wait_event_interruptible(sdp->sd_reclaim_wq,
                                         (atomic_read(&sdp->sd_reclaim_count) ||
                                         kthread_should_stop()));
+                if (freezing(current))
+                        refrigerator();
        }
        return 0;
@@ -93,6 +98,8 @@ int gfs2_recoverd(void *data)
        while (!kthread_should_stop()) {
                gfs2_check_journals(sdp);
                t = gfs2_tune_get(sdp,  gt_recoverd_secs) * HZ;
+                if (freezing(current))
+                        refrigerator();
                schedule_timeout_interruptible(t);
        }
@@ -141,6 +148,8 @@ int gfs2_logd(void *data)
                }
                t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
+                if (freezing(current))
+                        refrigerator();
                schedule_timeout_interruptible(t);
        }
@@ -191,6 +200,8 @@ int gfs2_quotad(void *data)
                gfs2_quota_scan(sdp);
                t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
+                if (freezing(current))
+                        refrigerator();
                schedule_timeout_interruptible(t);
        }
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index a96fa07b3f3b..2beb2f401aa2 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -130,7 +130,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
        memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
        if (ip->i_di.di_size < offset + size)
                ip->i_di.di_size = offset + size;
-        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
@@ -228,7 +228,7 @@ out:
        if (ip->i_di.di_size < offset + copied)
                ip->i_di.di_size = offset + copied;
-        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
        gfs2_dinode_out(ip, dibh->b_data);
@@ -1456,7 +1456,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
                if (dip->i_di.di_entries != g.offset) {
                        fs_warn(sdp, "Number of entries corrupt in dir %llu, "
                                "ip->i_di.di_entries (%u) != g.offset (%u)\n",
-                                (unsigned long long)dip->i_num.no_addr,
+                                (unsigned long long)dip->i_no_addr,
                                dip->i_di.di_entries,
                                g.offset);
                        error = -EIO;
@@ -1488,24 +1488,55 @@ out:
 * Returns: errno
 */
-int gfs2_dir_search(struct inode *dir, const struct qstr *name,
+struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
-                    struct gfs2_inum_host *inum, unsigned int *type)
 {
        struct buffer_head *bh;
        struct gfs2_dirent *dent;
+        struct inode *inode;
+        dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
+        if (dent) {
+                if (IS_ERR(dent))
+                        return ERR_PTR(PTR_ERR(dent));
+                inode = gfs2_inode_lookup(dir->i_sb, 
+                                be16_to_cpu(dent->de_type),
+                                be64_to_cpu(dent->de_inum.no_addr),
+                                be64_to_cpu(dent->de_inum.no_formal_ino));
+                brelse(bh);
+                return inode;
+        }
+        return ERR_PTR(-ENOENT);
+}
+int gfs2_dir_check(struct inode *dir, const struct qstr *name,
+                   const struct gfs2_inode *ip)
+{
+        struct buffer_head *bh;
+        struct gfs2_dirent *dent;
+        int ret = -ENOENT;
        dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
        if (dent) {
                if (IS_ERR(dent))
                        return PTR_ERR(dent);
-                if (inum)
+                if (ip) {
-                        gfs2_inum_in(inum, (char *)&dent->de_inum);
+                        if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr)
-                if (type)
+                                goto out;
-                        *type = be16_to_cpu(dent->de_type);
+                        if (be64_to_cpu(dent->de_inum.no_formal_ino) !=
+                            ip->i_no_formal_ino)
+                                goto out;
+                        if (unlikely(IF2DT(ip->i_inode.i_mode) !=
+                            be16_to_cpu(dent->de_type))) {
+                                gfs2_consist_inode(GFS2_I(dir));
+                                ret = -EIO;
+                                goto out;
+                        }
+                }
+                ret = 0;
+out:
                brelse(bh);
-                return 0;
        }
-        return -ENOENT;
+        return ret;
 }
 static int dir_new_leaf(struct inode *inode, const struct qstr *name)
@@ -1565,7 +1596,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
 */
 int gfs2_dir_add(struct inode *inode, const struct qstr *name,
-                 const struct gfs2_inum_host *inum, unsigned type)
+                 const struct gfs2_inode *nip, unsigned type)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct buffer_head *bh;
@@ -1580,7 +1611,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
                        if (IS_ERR(dent))
                                return PTR_ERR(dent);
                        dent = gfs2_init_dirent(inode, dent, name, bh);
-                        gfs2_inum_out(inum, (char *)&dent->de_inum);
+                        gfs2_inum_out(nip, dent);
                        dent->de_type = cpu_to_be16(type);
                        if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
                                leaf = (struct gfs2_leaf *)bh->b_data;
@@ -1592,7 +1623,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
                                break;
                        gfs2_trans_add_bh(ip->i_gl, bh, 1);
                        ip->i_di.di_entries++;
-                        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
                        gfs2_dinode_out(ip, bh->b_data);
                        brelse(bh);
                        error = 0;
@@ -1678,7 +1709,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
                gfs2_consist_inode(dip);
        gfs2_trans_add_bh(dip->i_gl, bh, 1);
        dip->i_di.di_entries--;
-        dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_dinode_out(dip, bh->b_data);
        brelse(bh);
        mark_inode_dirty(&dip->i_inode);
@@ -1700,7 +1731,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
 */
 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-                   struct gfs2_inum_host *inum, unsigned int new_type)
+                   const struct gfs2_inode *nip, unsigned int new_type)
 {
        struct buffer_head *bh;
        struct gfs2_dirent *dent;
@@ -1715,7 +1746,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
                return PTR_ERR(dent);
        gfs2_trans_add_bh(dip->i_gl, bh, 1);
-        gfs2_inum_out(inum, (char *)&dent->de_inum);
+        gfs2_inum_out(nip, dent);
        dent->de_type = cpu_to_be16(new_type);
        if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
@@ -1726,7 +1757,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
                gfs2_trans_add_bh(dip->i_gl, bh, 1);
        }
-        dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_dinode_out(dip, bh->b_data);
        brelse(bh);
        return 0;
@@ -1867,7 +1898,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
        for (x = 0; x < rlist.rl_rgrps; x++) {
                struct gfs2_rgrpd *rgd;
                rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-                rg_blocks += rgd->rd_ri.ri_length;
+                rg_blocks += rgd->rd_length;
        }
        error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 48fe89046bba..8a468cac9328 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -16,15 +16,16 @@ struct inode;
 struct gfs2_inode;
 struct gfs2_inum;
-int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
+struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename);
-                    struct gfs2_inum_host *inum, unsigned int *type);
+int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
+                   const struct gfs2_inode *ip);
 int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
-                 const struct gfs2_inum_host *inum, unsigned int type);
+                 const struct gfs2_inode *ip, unsigned int type);
 int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
 int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
                  filldir_t filldir);
 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-                   struct gfs2_inum_host *new_inum, unsigned int new_type);
+                   const struct gfs2_inode *nip, unsigned int new_type);
 int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 5b83ca6acab1..2a7435b5c4dc 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -254,7 +254,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
        if (error)
                return error;
-        error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length + RES_DINODE +
+        error = gfs2_trans_begin(sdp, rgd->rd_length + RES_DINODE +
                                 RES_EATTR + RES_STATFS + RES_QUOTA, blks);
        if (error)
                goto out_gunlock;
@@ -300,7 +300,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (!error) {
-                ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
@@ -700,7 +700,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
                goto out_gunlock_q;
        error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
-                                 blks + al->al_rgd->rd_ri.ri_length +
+                                 blks + al->al_rgd->rd_length +
                                 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
        if (error)
                goto out_ipres;
@@ -717,7 +717,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
                                            (er->er_mode & S_IFMT));
                        ip->i_inode.i_mode = er->er_mode;
                }
-                ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
@@ -852,7 +852,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
                        (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
                ip->i_inode.i_mode = er->er_mode;
        }
-        ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
@@ -1133,7 +1133,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (!error) {
-                ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
@@ -1352,7 +1352,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
        for (x = 0; x < rlist.rl_rgrps; x++) {
                struct gfs2_rgrpd *rgd;
                rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-                rg_blocks += rgd->rd_ri.ri_length;
+                rg_blocks += rgd->rd_length;
        }
        error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1815429a2978..3f0974e1afef 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -422,11 +422,11 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
 static void gfs2_holder_wake(struct gfs2_holder *gh)
 {
        clear_bit(HIF_WAIT, &gh->gh_iflags);
-        smp_mb();
+        smp_mb__after_clear_bit();
        wake_up_bit(&gh->gh_iflags, HIF_WAIT);
 }
-static int holder_wait(void *word)
+static int just_schedule(void *word)
 {
        schedule();
        return 0;
@@ -435,7 +435,20 @@ static int holder_wait(void *word)
 static void wait_on_holder(struct gfs2_holder *gh)
 {
        might_sleep();
-        wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE);
+        wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
+}
+static void gfs2_demote_wake(struct gfs2_glock *gl)
+{
+        clear_bit(GLF_DEMOTE, &gl->gl_flags);
+        smp_mb__after_clear_bit();
+        wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
+}
+static void wait_on_demote(struct gfs2_glock *gl)
+{
+        might_sleep();
+        wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE);
 }
 /**
@@ -528,7 +541,7 @@ static int rq_demote(struct gfs2_glock *gl)
        if (gl->gl_state == gl->gl_demote_state ||
            gl->gl_state == LM_ST_UNLOCKED) {
-                clear_bit(GLF_DEMOTE, &gl->gl_flags);
+                gfs2_demote_wake(gl);
                return 0;
        }
        set_bit(GLF_LOCK, &gl->gl_flags);
@@ -666,12 +679,22 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
 * practise: LM_ST_SHARED and LM_ST_UNLOCKED
 */
-static void handle_callback(struct gfs2_glock *gl, unsigned int state)
+static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote)
 {
        spin_lock(&gl->gl_spin);
        if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
                gl->gl_demote_state = state;
                gl->gl_demote_time = jiffies;
+                if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
+                    gl->gl_object) {
+                        struct inode *inode = igrab(gl->gl_object);
+                        spin_unlock(&gl->gl_spin);
+                        if (inode) {
+                                d_prune_aliases(inode);
+                                iput(inode);
+                        }
+                        return;
+                }
        } else if (gl->gl_demote_state != LM_ST_UNLOCKED) {
                gl->gl_demote_state = state;
        }
@@ -740,7 +763,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
                if (ret & LM_OUT_CANCELED)
                        op_done = 0;
                else
-                        clear_bit(GLF_DEMOTE, &gl->gl_flags);
+                        gfs2_demote_wake(gl);
        } else {
                spin_lock(&gl->gl_spin);
                list_del_init(&gh->gh_list);
@@ -848,7 +871,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
        gfs2_assert_warn(sdp, !ret);
        state_change(gl, LM_ST_UNLOCKED);
-        clear_bit(GLF_DEMOTE, &gl->gl_flags);
+        gfs2_demote_wake(gl);
        if (glops->go_inval)
                glops->go_inval(gl, DIO_METADATA);
@@ -1174,7 +1197,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
        const struct gfs2_glock_operations *glops = gl->gl_ops;
        if (gh->gh_flags & GL_NOCACHE)
-                handle_callback(gl, LM_ST_UNLOCKED);
+                handle_callback(gl, LM_ST_UNLOCKED, 0);
        gfs2_glmutex_lock(gl);
@@ -1196,6 +1219,13 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
        spin_unlock(&gl->gl_spin);
 }
+void gfs2_glock_dq_wait(struct gfs2_holder *gh)
+{
+        struct gfs2_glock *gl = gh->gh_gl;
+        gfs2_glock_dq(gh);
+        wait_on_demote(gl);
+}
 /**
 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
 * @gh: the holder structure
@@ -1297,10 +1327,6 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
 * @num_gh: the number of structures
 * @ghs: an array of struct gfs2_holder structures
 *
- * Figure out how big an impact this function has.  Either:
- * 1) Replace this code with code that calls gfs2_glock_prefetch()
- * 2) Forget async stuff and just call nq_m_sync()
- * 3) Leave it like it is
 *
 * Returns: 0 on success (all glocks acquired),
 *          errno on failure (no glocks acquired)
@@ -1308,62 +1334,28 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
 {
-        int *e;
+        struct gfs2_holder *tmp[4];
-        unsigned int x;
+        struct gfs2_holder **pph = tmp;
-        int borked = 0, serious = 0;
        int error = 0;
-        if (!num_gh)
+        switch(num_gh) {
+        case 0:
                return 0;
+        case 1:
-        if (num_gh == 1) {
                ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
                return gfs2_glock_nq(ghs);
-        }
+        default:
+                if (num_gh <= 4)
-        e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
-        if (!e)
-                return -ENOMEM;
-        for (x = 0; x < num_gh; x++) {
-                ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
-                error = gfs2_glock_nq(&ghs[x]);
-                if (error) {
-                        borked = 1;
-                        serious = error;
-                        num_gh = x;
                        break;
-                }
+                pph = kmalloc(num_gh * sizeof(struct gfs2_holder *), GFP_NOFS);
-        }
+                if (!pph)
+                        return -ENOMEM;
-        for (x = 0; x < num_gh; x++) {
-                error = e[x] = glock_wait_internal(&ghs[x]);
-                if (error) {
-                        borked = 1;
-                        if (error != GLR_TRYFAILED && error != GLR_CANCELED)
-                                serious = error;
-                }
        }
-        if (!borked) {
+        error = nq_m_sync(num_gh, ghs, pph);
-                kfree(e);
-                return 0;
-        }
-        for (x = 0; x < num_gh; x++)
-                if (!e[x])
-                        gfs2_glock_dq(&ghs[x]);
-        if (serious)
-                error = serious;
-        else {
-                for (x = 0; x < num_gh; x++)
-                        gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
-                                          &ghs[x]);
-                error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
-        }
-        kfree(e);
+        if (pph != tmp)
+                kfree(pph);
        return error;
 }
@@ -1456,7 +1448,7 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
        if (!gl)
                return;
-        handle_callback(gl, state);
+        handle_callback(gl, state, 1);
        spin_lock(&gl->gl_spin);
        run_queue(gl);
@@ -1596,7 +1588,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
        if (gfs2_glmutex_trylock(gl)) {
                if (list_empty(&gl->gl_holders) &&
                    gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
-                        handle_callback(gl, LM_ST_UNLOCKED);
+                        handle_callback(gl, LM_ST_UNLOCKED, 0);
                gfs2_glmutex_unlock(gl);
        }
@@ -1709,7 +1701,7 @@ static void clear_glock(struct gfs2_glock *gl)
        if (gfs2_glmutex_trylock(gl)) {
                if (list_empty(&gl->gl_holders) &&
                    gl->gl_state != LM_ST_UNLOCKED)
-                        handle_callback(gl, LM_ST_UNLOCKED);
+                        handle_callback(gl, LM_ST_UNLOCKED, 0);
                gfs2_glmutex_unlock(gl);
        }
 }
@@ -1823,7 +1815,8 @@ static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip)
        print_dbg(gi, "  Inode:\n");
        print_dbg(gi, "    num = %llu/%llu\n",
-                    ip->i_num.no_formal_ino, ip->i_num.no_addr);
+                  (unsigned long long)ip->i_no_formal_ino,
+                  (unsigned long long)ip->i_no_addr);
        print_dbg(gi, "    type = %u\n", IF2DT(ip->i_inode.i_mode));
        print_dbg(gi, "    i_flags =");
        for (x = 0; x < 32; x++)
@@ -1909,8 +1902,8 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
        }
        if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
                print_dbg(gi, "  Demotion req to state %u (%llu uS ago)\n",
-                          gl->gl_demote_state,
+                          gl->gl_demote_state, (unsigned long long)
-                          (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ));
+                          (jiffies - gl->gl_demote_time)*(1000000/HZ));
        }
        if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
                if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index b3e152db70c8..7721ca3fff9e 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -87,6 +87,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh);
 int gfs2_glock_poll(struct gfs2_holder *gh);
 int gfs2_glock_wait(struct gfs2_holder *gh);
 void gfs2_glock_dq(struct gfs2_holder *gh);
+void gfs2_glock_dq_wait(struct gfs2_holder *gh);
 void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
 int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 7b82657a9910..777ca46010e8 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -156,9 +156,9 @@ static void inode_go_sync(struct gfs2_glock *gl)
                ip = NULL;
        if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
-                gfs2_log_flush(gl->gl_sbd, gl);
                if (ip)
                        filemap_fdatawrite(ip->i_inode.i_mapping);
+                gfs2_log_flush(gl->gl_sbd, gl);
                gfs2_meta_sync(gl);
                if (ip) {
                        struct address_space *mapping = ip->i_inode.i_mapping;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index d995441373ab..170ba93829c0 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -28,6 +28,14 @@ struct gfs2_sbd;
 typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
+struct gfs2_log_header_host {
+        u64 lh_sequence;        /* Sequence number of this transaction */
+        u32 lh_flags;           /* GFS2_LOG_HEAD_... */
+        u32 lh_tail;            /* Block number of log tail */
+        u32 lh_blkno;
+        u32 lh_hash;
+};
 /*
 * Structure of operations that are associated with each
 * type of element in the log.
@@ -60,12 +68,23 @@ struct gfs2_bitmap {
        u32 bi_len;
 };
+struct gfs2_rgrp_host {
+        u32 rg_flags;
+        u32 rg_free;
+        u32 rg_dinodes;
+        u64 rg_igeneration;
+};
 struct gfs2_rgrpd {
        struct list_head rd_list;       /* Link with superblock */
        struct list_head rd_list_mru;
        struct list_head rd_recent;     /* Recently used rgrps */
        struct gfs2_glock *rd_gl;       /* Glock for this rgrp */
-        struct gfs2_rindex_host rd_ri;
+        u64 rd_addr;                    /* grp block disk address */
+        u64 rd_data0;                   /* first data location */
+        u32 rd_length;                  /* length of rgrp header in fs blocks */
+        u32 rd_data;                    /* num of data blocks in rgrp */
+        u32 rd_bitbytes;                /* number of bytes in data bitmaps */
        struct gfs2_rgrp_host rd_rg;
        u64 rd_rg_vn;
        struct gfs2_bitmap *rd_bits;
@@ -76,6 +95,8 @@ struct gfs2_rgrpd {
        u32 rd_last_alloc_data;
        u32 rd_last_alloc_meta;
        struct gfs2_sbd *rd_sbd;
+        unsigned long rd_flags;
+#define GFS2_RDF_CHECK        0x0001          /* Need to check for unlinked inodes */
 };
 enum gfs2_state_bits {
@@ -211,10 +232,24 @@ enum {
        GIF_SW_PAGED            = 3,
 };
+struct gfs2_dinode_host {
+        u64 di_size;            /* number of bytes in file */
+        u64 di_blocks;          /* number of blocks in file */
+        u64 di_goal_meta;       /* rgrp to alloc from next */
+        u64 di_goal_data;       /* data block goal */
+        u64 di_generation;      /* generation number for NFS */
+        u32 di_flags;           /* GFS2_DIF_... */
+        u16 di_height;          /* height of metadata */
+        /* These only apply to directories  */
+        u16 di_depth;           /* Number of bits in the table */
+        u32 di_entries;         /* The number of entries in the directory */
+        u64 di_eattr;           /* extended attribute block number */
+};
 struct gfs2_inode {
        struct inode i_inode;
-        struct gfs2_inum_host i_num;
+        u64 i_no_addr;
+        u64 i_no_formal_ino;
        unsigned long i_flags;          /* GIF_... */
        struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
@@ -275,14 +310,6 @@ enum {
        QDF_LOCKED              = 2,
 };
-struct gfs2_quota_lvb {
-        __be32 qb_magic;
-        u32 __pad;
-        __be64 qb_limit;      /* Hard limit of # blocks to alloc */
-        __be64 qb_warn;       /* Warn user when alloc is above this # */
-        __be64 qb_value;       /* Current # blocks allocated */
-};
 struct gfs2_quota_data {
        struct list_head qd_list;
        unsigned int qd_count;
@@ -327,7 +354,9 @@ struct gfs2_trans {
        unsigned int tr_num_buf;
        unsigned int tr_num_buf_new;
+        unsigned int tr_num_databuf_new;
        unsigned int tr_num_buf_rm;
+        unsigned int tr_num_databuf_rm;
        struct list_head tr_list_buf;
        unsigned int tr_num_revoke;
@@ -354,6 +383,12 @@ struct gfs2_jdesc {
        unsigned int jd_blocks;
 };
+struct gfs2_statfs_change_host {
+        s64 sc_total;
+        s64 sc_free;
+        s64 sc_dinodes;
+};
 #define GFS2_GLOCKD_DEFAULT     1
 #define GFS2_GLOCKD_MAX         16
@@ -426,6 +461,28 @@ enum {
 #define GFS2_FSNAME_LEN         256
+struct gfs2_inum_host {
+        u64 no_formal_ino;
+        u64 no_addr;
+};
+struct gfs2_sb_host {
+        u32 sb_magic;
+        u32 sb_type;
+        u32 sb_format;
+        u32 sb_fs_format;
+        u32 sb_multihost_format;
+        u32 sb_bsize;
+        u32 sb_bsize_shift;
+        struct gfs2_inum_host sb_master_dir;
+        struct gfs2_inum_host sb_root_dir;
+        char sb_lockproto[GFS2_LOCKNAME_LEN];
+        char sb_locktable[GFS2_LOCKNAME_LEN];
+};
 struct gfs2_sbd {
        struct super_block *sd_vfs;
        struct super_block *sd_vfs_meta;
@@ -544,6 +601,7 @@ struct gfs2_sbd {
        unsigned int sd_log_blks_reserved;
        unsigned int sd_log_commited_buf;
+        unsigned int sd_log_commited_databuf;
        unsigned int sd_log_commited_revoke;
        unsigned int sd_log_num_gl;
@@ -552,7 +610,6 @@ struct gfs2_sbd {
        unsigned int sd_log_num_rg;
        unsigned int sd_log_num_databuf;
        unsigned int sd_log_num_jdata;
-        unsigned int sd_log_num_hdrs;
        struct list_head sd_log_le_gl;
        struct list_head sd_log_le_buf;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index df0b8b3018b9..34f7bcdea1e9 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -38,12 +38,17 @@
 #include "trans.h"
 #include "util.h"
+struct gfs2_inum_range_host {
+        u64 ir_start;
+        u64 ir_length;
+};
 static int iget_test(struct inode *inode, void *opaque)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_inum_host *inum = opaque;
+        u64 *no_addr = opaque;
-        if (ip->i_num.no_addr == inum->no_addr &&
+        if (ip->i_no_addr == *no_addr &&
            inode->i_private != NULL)
                return 1;
@@ -53,37 +58,70 @@ static int iget_test(struct inode *inode, void *opaque)
 static int iget_set(struct inode *inode, void *opaque)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_inum_host *inum = opaque;
+        u64 *no_addr = opaque;
-        ip->i_num = *inum;
+        inode->i_ino = (unsigned long)*no_addr;
-        inode->i_ino = inum->no_addr;
+        ip->i_no_addr = *no_addr;
        return 0;
 }
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum)
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
+{
+        unsigned long hash = (unsigned long)no_addr;
+        return ilookup5(sb, hash, iget_test, &no_addr);
+}
+static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
 {
-        return ilookup5(sb, (unsigned long)inum->no_addr,
+        unsigned long hash = (unsigned long)no_addr;
-                        iget_test, inum);
+        return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
 }
-static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum)
+/**
+ * GFS2 lookup code fills in vfs inode contents based on info obtained
+ * from directory entry inside gfs2_inode_lookup(). This has caused issues
+ * with NFS code path since its get_dentry routine doesn't have the relevant
+ * directory entry when gfs2_inode_lookup() is invoked. Part of the code
+ * segment inside gfs2_inode_lookup code needs to get moved around.
+ *
+ * Clean up I_LOCK and I_NEW as well.
+ **/
+void gfs2_set_iop(struct inode *inode)
 {
-        return iget5_locked(sb, (unsigned long)inum->no_addr,
+        umode_t mode = inode->i_mode;
-                     iget_test, iget_set, inum);
+        if (S_ISREG(mode)) {
+                inode->i_op = &gfs2_file_iops;
+                inode->i_fop = &gfs2_file_fops;
+                inode->i_mapping->a_ops = &gfs2_file_aops;
+        } else if (S_ISDIR(mode)) {
+                inode->i_op = &gfs2_dir_iops;
+                inode->i_fop = &gfs2_dir_fops;
+        } else if (S_ISLNK(mode)) {
+                inode->i_op = &gfs2_symlink_iops;
+        } else {
+                inode->i_op = &gfs2_dev_iops;
+        }
+        unlock_new_inode(inode);
 }
 /**
 * gfs2_inode_lookup - Lookup an inode
 * @sb: The super block
- * @inum: The inode number
+ * @no_addr: The inode number
 * @type: The type of the inode
 *
 * Returns: A VFS inode, or an error
 */
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type)
+struct inode *gfs2_inode_lookup(struct super_block *sb, 
+                                unsigned int type,
+                                u64 no_addr,
+                                u64 no_formal_ino)
 {
-        struct inode *inode = gfs2_iget(sb, inum);
+        struct inode *inode = gfs2_iget(sb, no_addr);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_glock *io_gl;
        int error;
@@ -93,29 +131,15 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
        if (inode->i_state & I_NEW) {
                struct gfs2_sbd *sdp = GFS2_SB(inode);
-                umode_t mode = DT2IF(type);
                inode->i_private = ip;
-                inode->i_mode = mode;
+                ip->i_no_formal_ino = no_formal_ino;
-                if (S_ISREG(mode)) {
-                        inode->i_op = &gfs2_file_iops;
-                        inode->i_fop = &gfs2_file_fops;
-                        inode->i_mapping->a_ops = &gfs2_file_aops;
-                } else if (S_ISDIR(mode)) {
-                        inode->i_op = &gfs2_dir_iops;
-                        inode->i_fop = &gfs2_dir_fops;
-                } else if (S_ISLNK(mode)) {
-                        inode->i_op = &gfs2_symlink_iops;
-                } else {
-                        inode->i_op = &gfs2_dev_iops;
-                }
-                error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
+                error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
                if (unlikely(error))
                        goto fail;
                ip->i_gl->gl_object = ip;
-                error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
+                error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
                if (unlikely(error))
                        goto fail_put;
@@ -123,12 +147,38 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
                error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
                if (unlikely(error))
                        goto fail_iopen;
+                ip->i_iopen_gh.gh_gl->gl_object = ip;
                gfs2_glock_put(io_gl);
-                unlock_new_inode(inode);
+                if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
+                        goto gfs2_nfsbypass;
+                inode->i_mode = DT2IF(type);
+                /*
+                 * We must read the inode in order to work out its type in
+                 * this case. Note that this doesn't happen often as we normally
+                 * know the type beforehand. This code path only occurs during
+                 * unlinked inode recovery (where it is safe to do this glock,
+                 * which is not true in the general case).
+                 */
+                if (type == DT_UNKNOWN) {
+                        struct gfs2_holder gh;
+                        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+                        if (unlikely(error))
+                                goto fail_glock;
+                        /* Inode is now uptodate */
+                        gfs2_glock_dq_uninit(&gh);
+                }
+                gfs2_set_iop(inode);
        }
+gfs2_nfsbypass:
        return inode;
+fail_glock:
+        gfs2_glock_dq(&ip->i_iopen_gh);
 fail_iopen:
        gfs2_glock_put(io_gl);
 fail_put:
@@ -144,14 +194,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
        struct gfs2_dinode_host *di = &ip->i_di;
        const struct gfs2_dinode *str = buf;
-        if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) {
+        if (ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)) {
                if (gfs2_consist_inode(ip))
                        gfs2_dinode_print(ip);
                return -EIO;
        }
-        if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino))
+        ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
-                return -ESTALE;
        ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
        ip->i_inode.i_rdev = 0;
        switch (ip->i_inode.i_mode & S_IFMT) {
@@ -175,11 +223,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
        di->di_blocks = be64_to_cpu(str->di_blocks);
        gfs2_set_inode_blocks(&ip->i_inode);
        ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
-        ip->i_inode.i_atime.tv_nsec = 0;
+        ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
        ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
-        ip->i_inode.i_mtime.tv_nsec = 0;
+        ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
        ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
-        ip->i_inode.i_ctime.tv_nsec = 0;
+        ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
        di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
        di->di_goal_data = be64_to_cpu(str->di_goal_data);
@@ -247,7 +295,7 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
        if (error)
                goto out_qs;
-        rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
        if (!rgd) {
                gfs2_consist_inode(ip);
                error = -EIO;
@@ -314,7 +362,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
        else
                drop_nlink(&ip->i_inode);
-        ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
        gfs2_dinode_out(ip, dibh->b_data);
@@ -366,9 +414,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
        struct super_block *sb = dir->i_sb;
        struct gfs2_inode *dip = GFS2_I(dir);
        struct gfs2_holder d_gh;
-        struct gfs2_inum_host inum;
+        int error = 0;
-        unsigned int type;
-        int error;
        struct inode *inode = NULL;
        int unlock = 0;
@@ -395,12 +441,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
                        goto out;
        }
-        error = gfs2_dir_search(dir, name, &inum, &type);
+        inode = gfs2_dir_search(dir, name);
-        if (error)
+        if (IS_ERR(inode))
-                goto out;
+                error = PTR_ERR(inode);
-        inode = gfs2_inode_lookup(sb, &inum, type);
 out:
        if (unlock)
                gfs2_glock_dq_uninit(&d_gh);
@@ -409,6 +452,22 @@ out:
        return inode ? inode : ERR_PTR(error);
 }
+static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
+{
+        const struct gfs2_inum_range *str = buf;
+        ir->ir_start = be64_to_cpu(str->ir_start);
+        ir->ir_length = be64_to_cpu(str->ir_length);
+}
+static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
+{
+        struct gfs2_inum_range *str = buf;
+        str->ir_start = cpu_to_be64(ir->ir_start);
+        str->ir_length = cpu_to_be64(ir->ir_length);
+}
 static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
 {
        struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
@@ -548,7 +607,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
        if (!dip->i_inode.i_nlink)
                return -EPERM;
-        error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL);
+        error = gfs2_dir_check(&dip->i_inode, name, NULL);
        switch (error) {
        case -ENOENT:
                error = 0;
@@ -588,8 +647,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
                *gid = current->fsgid;
 }
-static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
+static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
-                        u64 *generation)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        int error;
@@ -605,7 +663,7 @@ static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
        if (error)
                goto out_ipreserv;
-        inum->no_addr = gfs2_alloc_di(dip, generation);
+        *no_addr = gfs2_alloc_di(dip, generation);
        gfs2_trans_end(sdp);
@@ -635,6 +693,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        struct gfs2_dinode *di;
        struct buffer_head *dibh;
+        struct timespec tv = CURRENT_TIME;
        dibh = gfs2_meta_new(gl, inum->no_addr);
        gfs2_trans_add_bh(gl, dibh, 1);
@@ -650,7 +709,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        di->di_nlink = 0;
        di->di_size = 0;
        di->di_blocks = cpu_to_be64(1);
-        di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
+        di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
        di->di_major = cpu_to_be32(MAJOR(dev));
        di->di_minor = cpu_to_be32(MINOR(dev));
        di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
@@ -680,6 +739,9 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        di->di_entries = 0;
        memset(&di->__pad4, 0, sizeof(di->__pad4));
        di->di_eattr = 0;
+        di->di_atime_nsec = cpu_to_be32(tv.tv_nsec);
+        di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
+        di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
        memset(&di->di_reserved, 0, sizeof(di->di_reserved));
        brelse(dibh);
@@ -749,7 +811,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
                        goto fail_quota_locks;
                error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-                                         al->al_rgd->rd_ri.ri_length +
+                                         al->al_rgd->rd_length +
                                         2 * RES_DINODE +
                                         RES_STATFS + RES_QUOTA, 0);
                if (error)
@@ -760,7 +822,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
                        goto fail_quota_locks;
        }
-        error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode));
+        error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode));
        if (error)
                goto fail_end_trans;
@@ -840,11 +902,11 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
                           unsigned int mode, dev_t dev)
 {
-        struct inode *inode;
+        struct inode *inode = NULL;
        struct gfs2_inode *dip = ghs->gh_gl->gl_object;
        struct inode *dir = &dip->i_inode;
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-        struct gfs2_inum_host inum;
+        struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
        int error;
        u64 generation;
@@ -864,7 +926,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
        if (error)
                goto fail_gunlock;
-        error = alloc_dinode(dip, &inum, &generation);
+        error = alloc_dinode(dip, &inum.no_addr, &generation);
        if (error)
                goto fail_gunlock;
@@ -877,34 +939,36 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
        if (error)
                goto fail_gunlock2;
-        inode = gfs2_inode_lookup(dir->i_sb, &inum, IF2DT(mode));
+        inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
+                                        inum.no_addr,
+                                        inum.no_formal_ino);
        if (IS_ERR(inode))
                goto fail_gunlock2;
        error = gfs2_inode_refresh(GFS2_I(inode));
        if (error)
-                goto fail_iput;
+                goto fail_gunlock2;
        error = gfs2_acl_create(dip, GFS2_I(inode));
        if (error)
-                goto fail_iput;
+                goto fail_gunlock2;
        error = gfs2_security_init(dip, GFS2_I(inode));
        if (error)
-                goto fail_iput;
+                goto fail_gunlock2;
        error = link_dinode(dip, name, GFS2_I(inode));
        if (error)
-                goto fail_iput;
+                goto fail_gunlock2;
        if (!inode)
                return ERR_PTR(-ENOMEM);
        return inode;
-fail_iput:
-        iput(inode);
 fail_gunlock2:
        gfs2_glock_dq_uninit(ghs + 1);
+        if (inode)
+                iput(inode);
 fail_gunlock:
        gfs2_glock_dq(ghs);
 fail:
@@ -976,10 +1040,8 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 */
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-                   struct gfs2_inode *ip)
+                   const struct gfs2_inode *ip)
 {
-        struct gfs2_inum_host inum;
-        unsigned int type;
        int error;
        if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
@@ -997,18 +1059,10 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
        if (error)
                return error;
-        error = gfs2_dir_search(&dip->i_inode, name, &inum, &type);
+        error = gfs2_dir_check(&dip->i_inode, name, ip);
        if (error)
                return error;
-        if (!gfs2_inum_equal(&inum, &ip->i_num))
-                return -ENOENT;
-        if (IF2DT(ip->i_inode.i_mode) != type) {
-                gfs2_consist_inode(dip);
-                return -EIO;
-        }
        return 0;
 }
@@ -1132,10 +1186,11 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
        struct gfs2_glock *gl = gh->gh_gl;
        struct gfs2_sbd *sdp = gl->gl_sbd;
        struct gfs2_inode *ip = gl->gl_object;
-        s64 curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
+        s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum);
        unsigned int state;
        int flags;
        int error;
+        struct timespec tv = CURRENT_TIME;
        if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
            gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
@@ -1153,8 +1208,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
            (sdp->sd_vfs->s_flags & MS_RDONLY))
                return 0;
-        curtime = get_seconds();
+        if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
-        if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
                gfs2_glock_dq(gh);
                gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
                                   gh);
@@ -1165,8 +1219,8 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
                /* Verify that atime hasn't been updated while we were
                   trying to get exclusive lock. */
-                curtime = get_seconds();
+                tv = CURRENT_TIME;
-                if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
+                if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
                        struct buffer_head *dibh;
                        struct gfs2_dinode *di;
@@ -1180,11 +1234,12 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
                        if (error)
                                goto fail_end_trans;
-                        ip->i_inode.i_atime.tv_sec = curtime;
+                        ip->i_inode.i_atime = tv;
                        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                        di = (struct gfs2_dinode *)dibh->b_data;
                        di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+                        di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
                        brelse(dibh);
                        gfs2_trans_end(sdp);
@@ -1252,3 +1307,66 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
        return error;
 }
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
+{
+        const struct gfs2_dinode_host *di = &ip->i_di;
+        struct gfs2_dinode *str = buf;
+        str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+        str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
+        str->di_header.__pad0 = 0;
+        str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
+        str->di_header.__pad1 = 0;
+        str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
+        str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+        str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
+        str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+        str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
+        str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
+        str->di_size = cpu_to_be64(di->di_size);
+        str->di_blocks = cpu_to_be64(di->di_blocks);
+        str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+        str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+        str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
+        str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
+        str->di_goal_data = cpu_to_be64(di->di_goal_data);
+        str->di_generation = cpu_to_be64(di->di_generation);
+        str->di_flags = cpu_to_be32(di->di_flags);
+        str->di_height = cpu_to_be16(di->di_height);
+        str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+                                             !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
+                                             GFS2_FORMAT_DE : 0);
+        str->di_depth = cpu_to_be16(di->di_depth);
+        str->di_entries = cpu_to_be32(di->di_entries);
+        str->di_eattr = cpu_to_be64(di->di_eattr);
+        str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
+        str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
+        str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
+}
+void gfs2_dinode_print(const struct gfs2_inode *ip)
+{
+        const struct gfs2_dinode_host *di = &ip->i_di;
+        printk(KERN_INFO "  no_formal_ino = %llu\n",
+               (unsigned long long)ip->i_no_formal_ino);
+        printk(KERN_INFO "  no_addr = %llu\n",
+               (unsigned long long)ip->i_no_addr);
+        printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
+        printk(KERN_INFO "  di_blocks = %llu\n",
+               (unsigned long long)di->di_blocks);
+        printk(KERN_INFO "  di_goal_meta = %llu\n",
+               (unsigned long long)di->di_goal_meta);
+        printk(KERN_INFO "  di_goal_data = %llu\n",
+               (unsigned long long)di->di_goal_data);
+        printk(KERN_INFO "  di_flags = 0x%.8X\n", di->di_flags);
+        printk(KERN_INFO "  di_height = %u\n", di->di_height);
+        printk(KERN_INFO "  di_depth = %u\n", di->di_depth);
+        printk(KERN_INFO "  di_entries = %u\n", di->di_entries);
+        printk(KERN_INFO "  di_eattr = %llu\n",
+               (unsigned long long)di->di_eattr);
+}
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index b57f448b15bc..4517ac82c01c 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -10,17 +10,17 @@
 #ifndef __INODE_DOT_H__
 #define __INODE_DOT_H__
-static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
+static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
 {
        return !ip->i_di.di_height;
 }
-static inline int gfs2_is_jdata(struct gfs2_inode *ip)
+static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
 {
        return ip->i_di.di_flags & GFS2_DIF_JDATA;
 }
-static inline int gfs2_is_dir(struct gfs2_inode *ip)
+static inline int gfs2_is_dir(const struct gfs2_inode *ip)
 {
        return S_ISDIR(ip->i_inode.i_mode);
 }
@@ -32,9 +32,25 @@ static inline void gfs2_set_inode_blocks(struct inode *inode)
                (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
 }
+static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr,
+                                  u64 no_formal_ino)
+{
+        return ip->i_no_addr == no_addr && ip->i_no_formal_ino == no_formal_ino;
+}
+static inline void gfs2_inum_out(const struct gfs2_inode *ip,
+                                 struct gfs2_dirent *dent)
+{
+        dent->de_inum.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+        dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr);
+}
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
+void gfs2_set_iop(struct inode *inode);
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
+struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
+                                u64 no_addr, u64 no_formal_ino);
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
 int gfs2_inode_refresh(struct gfs2_inode *ip);
@@ -47,12 +63,14 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
                struct gfs2_inode *ip);
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-                   struct gfs2_inode *ip);
+                   const struct gfs2_inode *ip);
 int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
 int gfs2_glock_nq_atime(struct gfs2_holder *gh);
 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
+void gfs2_dinode_print(const struct gfs2_inode *ip);
 #endif /* __INODE_DOT_H__ */
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
index c305255bfe8a..542a797ac89a 100644
--- a/fs/gfs2/locking/dlm/lock.c
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -174,7 +174,6 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
        lp->cur = DLM_LOCK_IV;
        lp->lvb = NULL;
        lp->hold_null = NULL;
-        init_completion(&lp->ast_wait);
        INIT_LIST_HEAD(&lp->clist);
        INIT_LIST_HEAD(&lp->blist);
        INIT_LIST_HEAD(&lp->delay_list);
@@ -399,6 +398,12 @@ static void gdlm_del_lvb(struct gdlm_lock *lp)
        lp->lksb.sb_lvbptr = NULL;
 }
+static int gdlm_ast_wait(void *word)
+{
+        schedule();
+        return 0;
+}
 /* This can do a synchronous dlm request (requiring a lock_dlm thread to get
   the completion) because gfs won't call hold_lvb() during a callback (from
   the context of a lock_dlm thread). */
@@ -424,10 +429,10 @@ static int hold_null_lock(struct gdlm_lock *lp)
        lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
        set_bit(LFL_NOBAST, &lpn->flags);
        set_bit(LFL_INLOCK, &lpn->flags);
+        set_bit(LFL_AST_WAIT, &lpn->flags);
-        init_completion(&lpn->ast_wait);
        gdlm_do_lock(lpn);
-        wait_for_completion(&lpn->ast_wait);
+        wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE);
        error = lpn->lksb.sb_status;
        if (error) {
                printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index d074c6e6f9bf..24d70f73b651 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -101,6 +101,7 @@ enum {
        LFL_NOBAST              = 10,
        LFL_HEADQUE             = 11,
        LFL_UNLOCK_DELETE       = 12,
+        LFL_AST_WAIT            = 13,
 };
 struct gdlm_lock {
@@ -117,7 +118,6 @@ struct gdlm_lock {
        unsigned long           flags;          /* lock_dlm flags LFL_ */
        int                     bast_mode;      /* protected by async_lock */
-        struct completion       ast_wait;
        struct list_head        clist;          /* complete */
        struct list_head        blist;          /* blocking */
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index 1d8faa3da8af..41c5b04caaba 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -147,7 +147,7 @@ static int gdlm_mount(char *table_name, char *host_data,
        error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
                                  &ls->dlm_lockspace,
-                                  nodir ? DLM_LSFL_NODIR : 0,
+                                  DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0),
                                  GDLM_LVB_SIZE);
        if (error) {
                log_error("dlm_new_lockspace error %d", error);
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index f82495e18c2d..fba1f1d87e4f 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -242,7 +242,7 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
        op->info.number         = name->ln_number;
        op->info.start          = fl->fl_start;
        op->info.end            = fl->fl_end;
+        op->info.owner          = (__u64)(long) fl->fl_owner;
        send_op(op);
        wait_event(recv_wq, (op->done != 0));
@@ -254,16 +254,20 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
        }
        spin_unlock(&ops_lock);
+        /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
+           -ENOENT if there are no locks on the file */
        rv = op->info.rv;
        fl->fl_type = F_UNLCK;
        if (rv == -ENOENT)
                rv = 0;
-        else if (rv == 0 && op->info.pid != fl->fl_pid) {
+        else if (rv > 0) {
                fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
                fl->fl_pid = op->info.pid;
                fl->fl_start = op->info.start;
                fl->fl_end = op->info.end;
+                rv = 0;
        }
        kfree(op);
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
index 9cf1f168eaf8..1aca51e45092 100644
--- a/fs/gfs2/locking/dlm/thread.c
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -44,6 +44,13 @@ static void process_blocking(struct gdlm_lock *lp, int bast_mode)
        ls->fscb(ls->sdp, cb, &lp->lockname);
 }
+static void wake_up_ast(struct gdlm_lock *lp)
+{
+        clear_bit(LFL_AST_WAIT, &lp->flags);
+        smp_mb__after_clear_bit();
+        wake_up_bit(&lp->flags, LFL_AST_WAIT);
+}
 static void process_complete(struct gdlm_lock *lp)
 {
        struct gdlm_ls *ls = lp->ls;
@@ -136,7 +143,7 @@ static void process_complete(struct gdlm_lock *lp)
         */
        if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
-                complete(&lp->ast_wait);
+                wake_up_ast(lp);
                return;
        }
@@ -214,7 +221,7 @@ out:
        if (test_bit(LFL_INLOCK, &lp->flags)) {
                clear_bit(LFL_NOBLOCK, &lp->flags);
                lp->cur = lp->req;
-                complete(&lp->ast_wait);
+                wake_up_ast(lp);
                return;
        }
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 291415ddfe51..f49a12e24086 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -83,6 +83,11 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
                        gfs2_assert(sdp, bd->bd_ail == ai);
+                        if (!bh){
+                                list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+                                continue;
+                        }
                        if (!buffer_busy(bh)) {
                                if (!buffer_uptodate(bh)) {
                                        gfs2_log_unlock(sdp);
@@ -125,6 +130,11 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
                                         bd_ail_st_list) {
                bh = bd->bd_bh;
+                if (!bh){
+                        list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+                        continue;
+                }
                gfs2_assert(sdp, bd->bd_ail == ai);
                if (buffer_busy(bh)) {
@@ -262,8 +272,8 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 * @sdp: The GFS2 superblock
 * @blks: The number of blocks to reserve
 *
- * Note that we never give out the last 6 blocks of the journal. Thats
+ * Note that we never give out the last few blocks of the journal. Thats
- * due to the fact that there is are a small number of header blocks
+ * due to the fact that there is a small number of header blocks
 * associated with each log flush. The exact number can't be known until
 * flush time, so we ensure that we have just enough free blocks at all
 * times to avoid running out during a log flush.
@@ -274,6 +284,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 {
        unsigned int try = 0;
+        unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);
        if (gfs2_assert_warn(sdp, blks) ||
            gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
@@ -281,7 +292,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
        mutex_lock(&sdp->sd_log_reserve_mutex);
        gfs2_log_lock(sdp);
-        while(sdp->sd_log_blks_free <= (blks + 6)) {
+        while(sdp->sd_log_blks_free <= (blks + reserved_blks)) {
                gfs2_log_unlock(sdp);
                gfs2_ail1_empty(sdp, 0);
                gfs2_log_flush(sdp, NULL);
@@ -357,6 +368,58 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
        return dist;
 }
+/**
+ * calc_reserved - Calculate the number of blocks to reserve when
+ *                 refunding a transaction's unused buffers.
+ * @sdp: The GFS2 superblock
+ *
+ * This is complex.  We need to reserve room for all our currently used
+ * metadata buffers (e.g. normal file I/O rewriting file time stamps) and 
+ * all our journaled data buffers for journaled files (e.g. files in the 
+ * meta_fs like rindex, or files for which chattr +j was done.)
+ * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
+ * will count it as free space (sd_log_blks_free) and corruption will follow.
+ *
+ * We can have metadata bufs and jdata bufs in the same journal.  So each
+ * type gets its own log header, for which we need to reserve a block.
+ * In fact, each type has the potential for needing more than one header 
+ * in cases where we have more buffers than will fit on a journal page.
+ * Metadata journal entries take up half the space of journaled buffer entries.
+ * Thus, metadata entries have buf_limit (502) and journaled buffers have
+ * databuf_limit (251) before they cause a wrap around.
+ *
+ * Also, we need to reserve blocks for revoke journal entries and one for an
+ * overall header for the lot.
+ *
+ * Returns: the number of blocks reserved
+ */
+static unsigned int calc_reserved(struct gfs2_sbd *sdp)
+{
+        unsigned int reserved = 0;
+        unsigned int mbuf_limit, metabufhdrs_needed;
+        unsigned int dbuf_limit, databufhdrs_needed;
+        unsigned int revokes = 0;
+        mbuf_limit = buf_limit(sdp);
+        metabufhdrs_needed = (sdp->sd_log_commited_buf +
+                              (mbuf_limit - 1)) / mbuf_limit;
+        dbuf_limit = databuf_limit(sdp);
+        databufhdrs_needed = (sdp->sd_log_commited_databuf +
+                              (dbuf_limit - 1)) / dbuf_limit;
+        if (sdp->sd_log_commited_revoke)
+                revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
+                                          sizeof(u64));
+        reserved = sdp->sd_log_commited_buf + metabufhdrs_needed +
+                sdp->sd_log_commited_databuf + databufhdrs_needed +
+                revokes;
+        /* One for the overall header */
+        if (reserved)
+                reserved++;
+        return reserved;
+}
 static unsigned int current_tail(struct gfs2_sbd *sdp)
 {
        struct gfs2_ail *ai;
@@ -447,14 +510,14 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
        return bh;
 }
-static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
+static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
 {
        unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
        ail2_empty(sdp, new_tail);
        gfs2_log_lock(sdp);
-        sdp->sd_log_blks_free += dist - (pull ? 1 : 0);
+        sdp->sd_log_blks_free += dist;
        gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
        gfs2_log_unlock(sdp);
@@ -504,7 +567,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
        brelse(bh);
        if (sdp->sd_log_tail != tail)
-                log_pull_tail(sdp, tail, pull);
+                log_pull_tail(sdp, tail);
        else
                gfs2_assert_withdraw(sdp, !pull);
@@ -517,6 +580,7 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
        struct list_head *head = &sdp->sd_log_flush_list;
        struct gfs2_log_buf *lb;
        struct buffer_head *bh;
+        int flushcount = 0;
        while (!list_empty(head)) {
                lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
@@ -533,9 +597,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
                } else
                        brelse(bh);
                kfree(lb);
+                flushcount++;
        }
-        log_write_header(sdp, 0, 0);
+        /* If nothing was journaled, the header is unplanned and unwanted. */
+        if (flushcount) {
+                log_write_header(sdp, 0, 0);
+        } else {
+                unsigned int tail;
+                tail = current_tail(sdp);
+                gfs2_ail1_empty(sdp, 0);
+                if (sdp->sd_log_tail != tail)
+                        log_pull_tail(sdp, tail);
+        }
 }
 /**
@@ -565,7 +640,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
        INIT_LIST_HEAD(&ai->ai_ail1_list);
        INIT_LIST_HEAD(&ai->ai_ail2_list);
-        gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
+        gfs2_assert_withdraw(sdp,
+                             sdp->sd_log_num_buf + sdp->sd_log_num_jdata ==
+                             sdp->sd_log_commited_buf +
+                             sdp->sd_log_commited_databuf);
        gfs2_assert_withdraw(sdp,
                        sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
@@ -576,16 +654,19 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
        lops_before_commit(sdp);
        if (!list_empty(&sdp->sd_log_flush_list))
                log_flush_commit(sdp);
-        else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
+        else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
+                gfs2_log_lock(sdp);
+                sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */
+                gfs2_log_unlock(sdp);
                log_write_header(sdp, 0, PULL);
+        }
        lops_after_commit(sdp, ai);
        gfs2_log_lock(sdp);
        sdp->sd_log_head = sdp->sd_log_flush_head;
-        sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
        sdp->sd_log_blks_reserved = 0;
        sdp->sd_log_commited_buf = 0;
-        sdp->sd_log_num_hdrs = 0;
+        sdp->sd_log_commited_databuf = 0;
        sdp->sd_log_commited_revoke = 0;
        if (!list_empty(&ai->ai_ail1_list)) {
@@ -602,32 +683,26 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 {
-        unsigned int reserved = 0;
+        unsigned int reserved;
        unsigned int old;
        gfs2_log_lock(sdp);
        sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
-        gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
+        sdp->sd_log_commited_databuf += tr->tr_num_databuf_new -
+                tr->tr_num_databuf_rm;
+        gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) ||
+                             (((int)sdp->sd_log_commited_databuf) >= 0));
        sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
        gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
+        reserved = calc_reserved(sdp);
-        if (sdp->sd_log_commited_buf)
-                reserved += sdp->sd_log_commited_buf;
-        if (sdp->sd_log_commited_revoke)
-                reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
-                                            sizeof(u64));
-        if (reserved)
-                reserved++;
        old = sdp->sd_log_blks_free;
        sdp->sd_log_blks_free += tr->tr_reserved -
                                 (reserved - sdp->sd_log_blks_reserved);
        gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
-        gfs2_assert_withdraw(sdp,
+        gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <=
-                             sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks +
+                             sdp->sd_jdesc->jd_blocks);
-                             sdp->sd_log_num_hdrs);
        sdp->sd_log_blks_reserved = reserved;
@@ -673,13 +748,13 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
-        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs);
        gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
        sdp->sd_log_flush_head = sdp->sd_log_head;
        sdp->sd_log_flush_wrapped = 0;
-        log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
+        log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT,
+                         (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL);
        gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
        gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index f82d84d05d23..aff70f0698fd 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -17,6 +17,7 @@
 #include "gfs2.h"
 #include "incore.h"
+#include "inode.h"
 #include "glock.h"
 #include "log.h"
 #include "lops.h"
@@ -117,15 +118,13 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
        struct gfs2_log_descriptor *ld;
        struct gfs2_bufdata *bd1 = NULL, *bd2;
        unsigned int total = sdp->sd_log_num_buf;
-        unsigned int offset = sizeof(struct gfs2_log_descriptor);
+        unsigned int offset = BUF_OFFSET;
        unsigned int limit;
        unsigned int num;
        unsigned n;
        __be64 *ptr;
-        offset += sizeof(__be64) - 1;
+        limit = buf_limit(sdp);
-        offset &= ~(sizeof(__be64) - 1);
-        limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
        /* for 4k blocks, limit = 503 */
        bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
@@ -134,7 +133,6 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
                if (total > limit)
                        num = limit;
                bh = gfs2_log_get_buf(sdp);
-                sdp->sd_log_num_hdrs++;
                ld = (struct gfs2_log_descriptor *)bh->b_data;
                ptr = (__be64 *)(bh->b_data + offset);
                ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@@ -469,25 +467,28 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
        struct gfs2_inode *ip = GFS2_I(mapping->host);
        gfs2_log_lock(sdp);
+        if (!list_empty(&bd->bd_list_tr)) {
+                gfs2_log_unlock(sdp);
+                return;
+        }
        tr->tr_touched = 1;
-        if (list_empty(&bd->bd_list_tr) &&
+        if (gfs2_is_jdata(ip)) {
-            (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
                tr->tr_num_buf++;
                list_add(&bd->bd_list_tr, &tr->tr_list_buf);
-                gfs2_log_unlock(sdp);
-                gfs2_pin(sdp, bd->bd_bh);
-                tr->tr_num_buf_new++;
-        } else {
-                gfs2_log_unlock(sdp);
        }
+        gfs2_log_unlock(sdp);
+        if (!list_empty(&le->le_list))
+                return;
        gfs2_trans_add_gl(bd->bd_gl);
-        gfs2_log_lock(sdp);
+        if (gfs2_is_jdata(ip)) {
-        if (list_empty(&le->le_list)) {
+                sdp->sd_log_num_jdata++;
-                if (ip->i_di.di_flags & GFS2_DIF_JDATA)
+                gfs2_pin(sdp, bd->bd_bh);
-                        sdp->sd_log_num_jdata++;
+                tr->tr_num_databuf_new++;
-                sdp->sd_log_num_databuf++;
-                list_add(&le->le_list, &sdp->sd_log_le_databuf);
        }
+        sdp->sd_log_num_databuf++;
+        gfs2_log_lock(sdp);
+        list_add(&le->le_list, &sdp->sd_log_le_databuf);
        gfs2_log_unlock(sdp);
 }
@@ -520,7 +521,6 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
        LIST_HEAD(started);
        struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
        struct buffer_head *bh = NULL,*bh1 = NULL;
-        unsigned int offset = sizeof(struct gfs2_log_descriptor);
        struct gfs2_log_descriptor *ld;
        unsigned int limit;
        unsigned int total_dbuf = sdp->sd_log_num_databuf;
@@ -528,9 +528,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
        unsigned int num, n;
        __be64 *ptr = NULL;
-        offset += 2*sizeof(__be64) - 1;
+        limit = databuf_limit(sdp);
-        offset &= ~(2*sizeof(__be64) - 1);
-        limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
        /*
         * Start writing ordered buffers, write journaled buffers
@@ -581,10 +579,10 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
                                gfs2_log_unlock(sdp);
                                if (!bh) {
                                        bh = gfs2_log_get_buf(sdp);
-                                        sdp->sd_log_num_hdrs++;
                                        ld = (struct gfs2_log_descriptor *)
                                             bh->b_data;
-                                        ptr = (__be64 *)(bh->b_data + offset);
+                                        ptr = (__be64 *)(bh->b_data +
+                                                         DATABUF_OFFSET);
                                        ld->ld_header.mh_magic =
                                                cpu_to_be32(GFS2_MAGIC);
                                        ld->ld_header.mh_type =
@@ -605,7 +603,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
                                if (unlikely(magic != 0))
                                        set_buffer_escaped(bh1);
                                gfs2_log_lock(sdp);
-                                if (n++ > num)
+                                if (++n >= num)
                                        break;
                        } else if (!bh1) {
                                total_dbuf--;
@@ -622,6 +620,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
                }
                gfs2_log_unlock(sdp);
                if (bh) {
+                        set_buffer_mapped(bh);
                        set_buffer_dirty(bh);
                        ll_rw_block(WRITE, 1, &bh);
                        bh = NULL;
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 965bc65c7c64..41a00df75587 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -13,6 +13,13 @@
 #include <linux/list.h>
 #include "incore.h"
+#define BUF_OFFSET \
+        ((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \
+         ~(sizeof(__be64) - 1))
+#define DATABUF_OFFSET \
+        ((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \
+         ~(2 * sizeof(__be64) - 1))
 extern const struct gfs2_log_operations gfs2_glock_lops;
 extern const struct gfs2_log_operations gfs2_buf_lops;
 extern const struct gfs2_log_operations gfs2_revoke_lops;
@@ -21,6 +28,22 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;
 extern const struct gfs2_log_operations *gfs2_log_ops[];
+static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
+{
+        unsigned int limit;
+        limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64);
+        return limit;
+}
+static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
+{
+        unsigned int limit;
+        limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64));
+        return limit;
+}
 static inline void lops_init_le(struct gfs2_log_element *le,
                                const struct gfs2_log_operations *lops)
 {
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index e62d4f620c58..8da343b34ae7 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -387,12 +387,18 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
                        if (test_clear_buffer_pinned(bh)) {
                                struct gfs2_trans *tr = current->journal_info;
+                                struct gfs2_inode *bh_ip =
+                                        GFS2_I(bh->b_page->mapping->host);
                                gfs2_log_lock(sdp);
                                list_del_init(&bd->bd_le.le_list);
                                gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
                                sdp->sd_log_num_buf--;
                                gfs2_log_unlock(sdp);
-                                tr->tr_num_buf_rm++;
+                                if (bh_ip->i_inode.i_private != NULL)
+                                        tr->tr_num_databuf_rm++;
+                                else
+                                        tr->tr_num_buf_rm++;
                                brelse(bh);
                        }
                        if (bd) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index e037425bc042..527bf19d9690 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -63,7 +63,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
 static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
                                         struct buffer_head **bhp)
 {
-        return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp);
+        return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, 0, bhp);
 }
 struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 4864659555d4..6f006a804db3 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -82,20 +82,19 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
        char *options, *o, *v;
        int error = 0;
-        if (!remount) {
+        /*  If someone preloaded options, use those instead  */
-                /*  If someone preloaded options, use those instead  */
+        spin_lock(&gfs2_sys_margs_lock);
-                spin_lock(&gfs2_sys_margs_lock);
+        if (!remount && gfs2_sys_margs) {
-                if (gfs2_sys_margs) {
+                data = gfs2_sys_margs;
-                        data = gfs2_sys_margs;
+                gfs2_sys_margs = NULL;
-                        gfs2_sys_margs = NULL;
-                }
-                spin_unlock(&gfs2_sys_margs_lock);
-                /*  Set some defaults  */
-                args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
-                args->ar_quota = GFS2_QUOTA_DEFAULT;
-                args->ar_data = GFS2_DATA_DEFAULT;
        }
+        spin_unlock(&gfs2_sys_margs_lock);
+        /*  Set some defaults  */
+        memset(args, 0, sizeof(struct gfs2_args));
+        args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
+        args->ar_quota = GFS2_QUOTA_DEFAULT;
+        args->ar_data = GFS2_DATA_DEFAULT;
        /* Split the options into tokens with the "," character and
           process them */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
deleted file mode 100644
index d9ecfd23a49e..000000000000
--- a/fs/gfs2/ondisk.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include "gfs2.h"
-#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
-#include "incore.h"
-#define pv(struct, member, fmt) printk(KERN_INFO "  "#member" = "fmt"\n", \
-                                       struct->member);
-/*
- * gfs2_xxx_in - read in an xxx struct
- * first arg: the cpu-order structure
- * buf: the disk-order buffer
- *
- * gfs2_xxx_out - write out an xxx struct
- * first arg: the cpu-order structure
- * buf: the disk-order buffer
- *
- * gfs2_xxx_print - print out an xxx struct
- * first arg: the cpu-order structure
- */
-void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf)
-{
-        const struct gfs2_inum *str = buf;
-        no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
-        no->no_addr = be64_to_cpu(str->no_addr);
-}
-void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf)
-{
-        struct gfs2_inum *str = buf;
-        str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
-        str->no_addr = cpu_to_be64(no->no_addr);
-}
-static void gfs2_inum_print(const struct gfs2_inum_host *no)
-{
-        printk(KERN_INFO "  no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
-        printk(KERN_INFO "  no_addr = %llu\n", (unsigned long long)no->no_addr);
-}
-static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
-{
-        const struct gfs2_meta_header *str = buf;
-        mh->mh_magic = be32_to_cpu(str->mh_magic);
-        mh->mh_type = be32_to_cpu(str->mh_type);
-        mh->mh_format = be32_to_cpu(str->mh_format);
-}
-void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
-{
-        const struct gfs2_sb *str = buf;
-        gfs2_meta_header_in(&sb->sb_header, buf);
-        sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
-        sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
-        sb->sb_bsize = be32_to_cpu(str->sb_bsize);
-        sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
-        gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
-        gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
-        memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
-        memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-}
-void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf)
-{
-        const struct gfs2_rindex *str = buf;
-        ri->ri_addr = be64_to_cpu(str->ri_addr);
-        ri->ri_length = be32_to_cpu(str->ri_length);
-        ri->ri_data0 = be64_to_cpu(str->ri_data0);
-        ri->ri_data = be32_to_cpu(str->ri_data);
-        ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
-}
-void gfs2_rindex_print(const struct gfs2_rindex_host *ri)
-{
-        printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
-        pv(ri, ri_length, "%u");
-        printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)ri->ri_data0);
-        pv(ri, ri_data, "%u");
-        pv(ri, ri_bitbytes, "%u");
-}
-void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
-{
-        const struct gfs2_rgrp *str = buf;
-        rg->rg_flags = be32_to_cpu(str->rg_flags);
-        rg->rg_free = be32_to_cpu(str->rg_free);
-        rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
-        rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
-}
-void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
-{
-        struct gfs2_rgrp *str = buf;
-        str->rg_flags = cpu_to_be32(rg->rg_flags);
-        str->rg_free = cpu_to_be32(rg->rg_free);
-        str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
-        str->__pad = cpu_to_be32(0);
-        str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
-        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
-}
-void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
-{
-        const struct gfs2_quota *str = buf;
-        qu->qu_limit = be64_to_cpu(str->qu_limit);
-        qu->qu_warn = be64_to_cpu(str->qu_warn);
-        qu->qu_value = be64_to_cpu(str->qu_value);
-}
-void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
-{
-        const struct gfs2_dinode_host *di = &ip->i_di;
-        struct gfs2_dinode *str = buf;
-        str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
-        str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
-        str->di_header.__pad0 = 0;
-        str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
-        str->di_header.__pad1 = 0;
-        gfs2_inum_out(&ip->i_num, &str->di_num);
-        str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
-        str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
-        str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
-        str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
-        str->di_size = cpu_to_be64(di->di_size);
-        str->di_blocks = cpu_to_be64(di->di_blocks);
-        str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
-        str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
-        str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
-        str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
-        str->di_goal_data = cpu_to_be64(di->di_goal_data);
-        str->di_generation = cpu_to_be64(di->di_generation);
-        str->di_flags = cpu_to_be32(di->di_flags);
-        str->di_height = cpu_to_be16(di->di_height);
-        str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
-                                             !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
-                                             GFS2_FORMAT_DE : 0);
-        str->di_depth = cpu_to_be16(di->di_depth);
-        str->di_entries = cpu_to_be32(di->di_entries);
-        str->di_eattr = cpu_to_be64(di->di_eattr);
-}
-void gfs2_dinode_print(const struct gfs2_inode *ip)
-{
-        const struct gfs2_dinode_host *di = &ip->i_di;
-        gfs2_inum_print(&ip->i_num);
-        printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
-        printk(KERN_INFO "  di_blocks = %llu\n", (unsigned long long)di->di_blocks);
-        printk(KERN_INFO "  di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
-        printk(KERN_INFO "  di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
-        pv(di, di_flags, "0x%.8X");
-        pv(di, di_height, "%u");
-        pv(di, di_depth, "%u");
-        pv(di, di_entries, "%u");
-        printk(KERN_INFO "  di_eattr = %llu\n", (unsigned long long)di->di_eattr);
-}
-void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
-{
-        const struct gfs2_log_header *str = buf;
-        gfs2_meta_header_in(&lh->lh_header, buf);
-        lh->lh_sequence = be64_to_cpu(str->lh_sequence);
-        lh->lh_flags = be32_to_cpu(str->lh_flags);
-        lh->lh_tail = be32_to_cpu(str->lh_tail);
-        lh->lh_blkno = be32_to_cpu(str->lh_blkno);
-        lh->lh_hash = be32_to_cpu(str->lh_hash);
-}
-void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
-{
-        const struct gfs2_inum_range *str = buf;
-        ir->ir_start = be64_to_cpu(str->ir_start);
-        ir->ir_length = be64_to_cpu(str->ir_length);
-}
-void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
-{
-        struct gfs2_inum_range *str = buf;
-        str->ir_start = cpu_to_be64(ir->ir_start);
-        str->ir_length = cpu_to_be64(ir->ir_length);
-}
-void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
-{
-        const struct gfs2_statfs_change *str = buf;
-        sc->sc_total = be64_to_cpu(str->sc_total);
-        sc->sc_free = be64_to_cpu(str->sc_free);
-        sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
-}
-void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
-{
-        struct gfs2_statfs_change *str = buf;
-        str->sc_total = cpu_to_be64(sc->sc_total);
-        str->sc_free = cpu_to_be64(sc->sc_free);
-        str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
-}
-void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
-{
-        const struct gfs2_quota_change *str = buf;
-        qc->qc_change = be64_to_cpu(str->qc_change);
-        qc->qc_flags = be32_to_cpu(str->qc_flags);
-        qc->qc_id = be32_to_cpu(str->qc_id);
-}
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 30c15622174f..26c888890c24 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
@@ -32,6 +32,7 @@
 #include "trans.h"
 #include "rgrp.h"
 #include "ops_file.h"
+#include "super.h"
 #include "util.h"
 #include "glops.h"
@@ -49,6 +50,8 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
                end = start + bsize;
                if (end <= from || start >= to)
                        continue;
+                if (gfs2_is_jdata(ip))
+                        set_buffer_uptodate(bh);
                gfs2_trans_add_bh(ip->i_gl, bh, 0);
        }
 }
@@ -134,7 +137,9 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
                return 0; /* don't care */
        }
-        if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
+        if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) &&
+            PageChecked(page)) {
+                ClearPageChecked(page);
                error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
                if (error)
                        goto out_ignore;
@@ -203,11 +208,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
         * so we need to supply one here. It doesn't happen often.
         */
        if (unlikely(page->index)) {
-                kaddr = kmap_atomic(page, KM_USER0);
+                zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
-                memset(kaddr, 0, PAGE_CACHE_SIZE);
-                kunmap_atomic(kaddr, KM_USER0);
-                flush_dcache_page(page);
-                SetPageUptodate(page);
                return 0;
        }
@@ -450,6 +451,31 @@ out_uninit:
 }
 /**
+ * adjust_fs_space - Adjusts the free space available due to gfs2_grow
+ * @inode: the rindex inode
+ */
+static void adjust_fs_space(struct inode *inode)
+{
+        struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+        struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+        struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
+        u64 fs_total, new_free;
+        /* Total up the file system space, according to the latest rindex. */
+        fs_total = gfs2_ri_total(sdp);
+        spin_lock(&sdp->sd_statfs_spin);
+        if (fs_total > (m_sc->sc_total + l_sc->sc_total))
+                new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
+        else
+                new_free = 0;
+        spin_unlock(&sdp->sd_statfs_spin);
+        fs_warn(sdp, "File system extended by %llu blocks.\n",
+                (unsigned long long)new_free);
+        gfs2_statfs_change(sdp, new_free, new_free, 0);
+}
+/**
 * gfs2_commit_write - Commit write to a file
 * @file: The file to write to
 * @page: The page containing the data
@@ -511,6 +537,9 @@ static int gfs2_commit_write(struct file *file, struct page *page,
                di->di_size = cpu_to_be64(inode->i_size);
        }
+        if (inode == sdp->sd_rindex)
+                adjust_fs_space(inode);
        brelse(dibh);
        gfs2_trans_end(sdp);
        if (al->al_requested) {
@@ -543,6 +572,23 @@ fail_nounlock:
 }
 /**
+ * gfs2_set_page_dirty - Page dirtying function
+ * @page: The page to dirty
+ *
+ * Returns: 1 if it dirtyed the page, or 0 otherwise
+ */
+ 
+static int gfs2_set_page_dirty(struct page *page)
+{
+        struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+        struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
+        if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+                SetPageChecked(page);
+        return __set_page_dirty_buffers(page);
+}
+/**
 * gfs2_bmap - Block map function
 * @mapping: Address space info
 * @lblock: The block to map
@@ -578,6 +624,8 @@ static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
        if (bd) {
                bd->bd_bh = NULL;
                bh->b_private = NULL;
+                if (!bd->bd_ail && list_empty(&bd->bd_le.le_list))
+                        kmem_cache_free(gfs2_bufdata_cachep, bd);
        }
        gfs2_log_unlock(sdp);
@@ -598,6 +646,8 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
        unsigned int curr_off = 0;
        BUG_ON(!PageLocked(page));
+        if (offset == 0)
+                ClearPageChecked(page);
        if (!page_has_buffers(page))
                return;
@@ -728,8 +778,8 @@ static unsigned limit = 0;
                        return;
                fs_warn(sdp, "ip = %llu %llu\n",
-                        (unsigned long long)ip->i_num.no_formal_ino,
+                        (unsigned long long)ip->i_no_formal_ino,
-                        (unsigned long long)ip->i_num.no_addr);
+                        (unsigned long long)ip->i_no_addr);
                for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
                        fs_warn(sdp, "ip->i_cache[%u] = %s\n",
@@ -810,6 +860,7 @@ const struct address_space_operations gfs2_file_aops = {
        .sync_page = block_sync_page,
        .prepare_write = gfs2_prepare_write,
        .commit_write = gfs2_commit_write,
+        .set_page_dirty = gfs2_set_page_dirty,
        .bmap = gfs2_bmap,
        .invalidatepage = gfs2_invalidatepage,
        .releasepage = gfs2_releasepage,
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
index 35aaee4aa7e1..fa1b5b3d28b9 100644
--- a/fs/gfs2/ops_address.h
+++ b/fs/gfs2/ops_address.h
@@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index a6fdc52f554a..793e334d098e 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -21,6 +21,7 @@
 #include "glock.h"
 #include "ops_dentry.h"
 #include "util.h"
+#include "inode.h"
 /**
 * gfs2_drevalidate - Check directory lookup consistency
@@ -40,14 +41,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
        struct gfs2_inode *dip = GFS2_I(parent->d_inode);
        struct inode *inode = dentry->d_inode;
        struct gfs2_holder d_gh;
-        struct gfs2_inode *ip;
+        struct gfs2_inode *ip = NULL;
-        struct gfs2_inum_host inum;
-        unsigned int type;
        int error;
        int had_lock=0;
-        if (inode && is_bad_inode(inode))
+        if (inode) {
-                goto invalid;
+                if (is_bad_inode(inode))
+                        goto invalid;
+                ip = GFS2_I(inode);
+        }
        if (sdp->sd_args.ar_localcaching)
                goto valid;
@@ -59,7 +61,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
                        goto fail;
        } 
-        error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
+        error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip);
        switch (error) {
        case 0:
                if (!inode)
@@ -73,16 +75,6 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
                goto fail_gunlock;
        }
-        ip = GFS2_I(inode);
-        if (!gfs2_inum_equal(&ip->i_num, &inum))
-                goto invalid_gunlock;
-        if (IF2DT(ip->i_inode.i_mode) != type) {
-                gfs2_consist_inode(dip);
-                goto fail_gunlock;
-        }
 valid_gunlock:
        if (!had_lock)
                gfs2_glock_dq_uninit(&d_gh);
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index aad918337a46..99ea5659bc2c 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -22,10 +22,14 @@
 #include "glops.h"
 #include "inode.h"
 #include "ops_dentry.h"
-#include "ops_export.h"
+#include "ops_fstype.h"
 #include "rgrp.h"
 #include "util.h"
+#define GFS2_SMALL_FH_SIZE 4
+#define GFS2_LARGE_FH_SIZE 8
+#define GFS2_OLD_FH_SIZE 10
 static struct dentry *gfs2_decode_fh(struct super_block *sb,
                                     __u32 *p,
                                     int fh_len,
@@ -35,31 +39,28 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb,
                                     void *context)
 {
        __be32 *fh = (__force __be32 *)p;
-        struct gfs2_fh_obj fh_obj;
+        struct gfs2_inum_host inum, parent;
-        struct gfs2_inum_host *this, parent;
-        this            = &fh_obj.this;
-        fh_obj.imode    = DT_UNKNOWN;
        memset(&parent, 0, sizeof(struct gfs2_inum));
        switch (fh_len) {
        case GFS2_LARGE_FH_SIZE:
+        case GFS2_OLD_FH_SIZE:
                parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
                parent.no_formal_ino |= be32_to_cpu(fh[5]);
                parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
                parent.no_addr |= be32_to_cpu(fh[7]);
-                fh_obj.imode = be32_to_cpu(fh[8]);
        case GFS2_SMALL_FH_SIZE:
-                this->no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
+                inum.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
-                this->no_formal_ino |= be32_to_cpu(fh[1]);
+                inum.no_formal_ino |= be32_to_cpu(fh[1]);
-                this->no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
+                inum.no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
-                this->no_addr |= be32_to_cpu(fh[3]);
+                inum.no_addr |= be32_to_cpu(fh[3]);
                break;
        default:
                return NULL;
        }
-        return gfs2_export_ops.find_exported_dentry(sb, &fh_obj, &parent,
+        return gfs2_export_ops.find_exported_dentry(sb, &inum, &parent,
                                                    acceptable, context);
 }
@@ -75,10 +76,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
            (connectable && *len < GFS2_LARGE_FH_SIZE))
                return 255;
-        fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
+        fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
-        fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
+        fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
-        fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32);
+        fh[2] = cpu_to_be32(ip->i_no_addr >> 32);
-        fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
+        fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
        *len = GFS2_SMALL_FH_SIZE;
        if (!connectable || inode == sb->s_root->d_inode)
@@ -90,13 +91,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
        igrab(inode);
        spin_unlock(&dentry->d_lock);
-        fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
+        fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32);
-        fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
+        fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
-        fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32);
+        fh[6] = cpu_to_be32(ip->i_no_addr >> 32);
-        fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
+        fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
-        fh[8]  = cpu_to_be32(inode->i_mode);
-        fh[9]  = 0;     /* pad to double word */
        *len = GFS2_LARGE_FH_SIZE;
        iput(inode);
@@ -144,7 +142,8 @@ static int gfs2_get_name(struct dentry *parent, char *name,
        ip = GFS2_I(inode);
        *name = 0;
-        gnfd.inum = ip->i_num;
+        gnfd.inum.no_addr = ip->i_no_addr;
+        gnfd.inum.no_formal_ino = ip->i_no_formal_ino;
        gnfd.name = name;
        error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
@@ -192,8 +191,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
 static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 {
        struct gfs2_sbd *sdp = sb->s_fs_info;
-        struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj;
+        struct gfs2_inum_host *inum = inum_obj;
-        struct gfs2_inum_host *inum = &fh_obj->this;
        struct gfs2_holder i_gh, ri_gh, rgd_gh;
        struct gfs2_rgrpd *rgd;
        struct inode *inode;
@@ -202,9 +200,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
        /* System files? */
-        inode = gfs2_ilookup(sb, inum);
+        inode = gfs2_ilookup(sb, inum->no_addr);
        if (inode) {
-                if (GFS2_I(inode)->i_num.no_formal_ino != inum->no_formal_ino) {
+                if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
                        iput(inode);
                        return ERR_PTR(-ESTALE);
                }
@@ -236,7 +234,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
        gfs2_glock_dq_uninit(&rgd_gh);
        gfs2_glock_dq_uninit(&ri_gh);
-        inode = gfs2_inode_lookup(sb, inum, fh_obj->imode);
+        inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
+                                        inum->no_addr,
+                                        0);
        if (!inode)
                goto fail;
        if (IS_ERR(inode)) {
@@ -250,6 +250,15 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
                goto fail;
        }
+        /* Pick up the works we bypass in gfs2_inode_lookup */
+        if (inode->i_state & I_NEW) 
+                gfs2_set_iop(inode);
+        if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
+                iput(inode);
+                goto fail;
+        }
        error = -EIO;
        if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) {
                iput(inode);
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
deleted file mode 100644
index f925a955b3b8..000000000000
--- a/fs/gfs2/ops_export.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-#ifndef __OPS_EXPORT_DOT_H__
-#define __OPS_EXPORT_DOT_H__
-#define GFS2_SMALL_FH_SIZE 4
-#define GFS2_LARGE_FH_SIZE 10
-extern struct export_operations gfs2_export_ops;
-struct gfs2_fh_obj {
-        struct gfs2_inum_host this;
-        __u32            imode;
-};
-#endif /* __OPS_EXPORT_DOT_H__ */
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 064df8804582..196d83266e34 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -502,7 +502,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
        struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
        struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
        struct lm_lockname name =
-                { .ln_number = ip->i_num.no_addr,
+                { .ln_number = ip->i_no_addr,
                  .ln_type = LM_TYPE_PLOCK };
        if (!(fl->fl_flags & FL_POSIX))
@@ -557,7 +557,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
                gfs2_glock_dq_uninit(fl_gh);
        } else {
                error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
-                                      ip->i_num.no_addr, &gfs2_flock_glops,
+                                      ip->i_no_addr, &gfs2_flock_glops,
                                      CREATE, &gl);
                if (error)
                        goto out;
@@ -635,7 +635,6 @@ const struct file_operations gfs2_file_fops = {
        .release        = gfs2_close,
        .fsync          = gfs2_fsync,
        .lock           = gfs2_lock,
-        .sendfile       = generic_file_sendfile,
        .flock          = gfs2_flock,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 2c5f8e7def0d..cf5aa5050548 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -27,7 +27,6 @@
 #include "inode.h"
 #include "lm.h"
 #include "mount.h"
-#include "ops_export.h"
 #include "ops_fstype.h"
 #include "ops_super.h"
 #include "recovery.h"
@@ -105,6 +104,7 @@ static void init_vfs(struct super_block *sb, unsigned noatime)
        sb->s_magic = GFS2_MAGIC;
        sb->s_op = &gfs2_super_ops;
        sb->s_export_op = &gfs2_export_ops;
+        sb->s_time_gran = 1;
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
@@ -116,7 +116,6 @@ static void init_vfs(struct super_block *sb, unsigned noatime)
 static int init_names(struct gfs2_sbd *sdp, int silent)
 {
-        struct page *page;
        char *proto, *table;
        int error = 0;
@@ -126,14 +125,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
        /*  Try to autodetect  */
        if (!proto[0] || !table[0]) {
-                struct gfs2_sb *sb;
+                error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
-                page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+                if (error)
-                if (!page)
+                        return error;
-                        return -ENOBUFS;
-                sb = kmap(page);
-                gfs2_sb_in(&sdp->sd_sb, sb);
-                kunmap(page);
-                __free_page(page);
                error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
                if (error)
@@ -151,6 +145,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
        snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
        snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
+        while ((table = strchr(sdp->sd_table_name, '/')))
+                *table = '_';
 out:
        return error;
 }
@@ -236,17 +233,17 @@ fail:
        return error;
 }
-static struct inode *gfs2_lookup_root(struct super_block *sb,
+static inline struct inode *gfs2_lookup_root(struct super_block *sb,
-                                      struct gfs2_inum_host *inum)
+                                             u64 no_addr)
 {
-        return gfs2_inode_lookup(sb, inum, DT_DIR);
+        return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
 }
 static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
 {
        struct super_block *sb = sdp->sd_vfs;
        struct gfs2_holder sb_gh;
-        struct gfs2_inum_host *inum;
+        u64 no_addr;
        struct inode *inode;
        int error = 0;
@@ -289,10 +286,10 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
        sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
        /* Get the root inode */
-        inum = &sdp->sd_sb.sb_root_dir;
+        no_addr = sdp->sd_sb.sb_root_dir.no_addr;
        if (sb->s_type == &gfs2meta_fs_type)
-                inum = &sdp->sd_sb.sb_master_dir;
+                no_addr = sdp->sd_sb.sb_master_dir.no_addr;
-        inode = gfs2_lookup_root(sb, inum);
+        inode = gfs2_lookup_root(sb, no_addr);
        if (IS_ERR(inode)) {
                error = PTR_ERR(inode);
                fs_err(sdp, "can't read in root inode: %d\n", error);
@@ -449,7 +446,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
        if (undo)
                goto fail_qinode;
-        inode = gfs2_lookup_root(sdp->sd_vfs, &sdp->sd_sb.sb_master_dir);
+        inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr);
        if (IS_ERR(inode)) {
                error = PTR_ERR(inode);
                fs_err(sdp, "can't read in master directory: %d\n", error);
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h
index 7cc2c296271b..407029b3b2b3 100644
--- a/fs/gfs2/ops_fstype.h
+++ b/fs/gfs2/ops_fstype.h
@@ -14,5 +14,6 @@
 extern struct file_system_type gfs2_fs_type;
 extern struct file_system_type gfs2meta_fs_type;
+extern struct export_operations gfs2_export_ops;
 #endif /* __OPS_FSTYPE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d85f6e05cb95..911c115b5c6c 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -157,7 +157,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
        if (error)
                goto out_gunlock;
-        error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL);
+        error = gfs2_dir_check(dir, &dentry->d_name, NULL);
        switch (error) {
        case -ENOENT:
                break;
@@ -206,7 +206,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
                        goto out_gunlock_q;
                error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-                                         al->al_rgd->rd_ri.ri_length +
+                                         al->al_rgd->rd_length +
                                         2 * RES_DINODE + RES_STATFS +
                                         RES_QUOTA, 0);
                if (error)
@@ -217,8 +217,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
                        goto out_ipres;
        }
-        error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num,
+        error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
-                             IF2DT(inode->i_mode));
        if (error)
                goto out_end_trans;
@@ -275,7 +274,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
        gfs2_holder_init(ip->i_gl,  LM_ST_EXCLUSIVE, 0, ghs + 1);
-        rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
        gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
@@ -420,7 +419,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
                gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
-                gfs2_inum_out(&dip->i_num, &dent->de_inum);
+                gfs2_inum_out(dip, dent);
                dent->de_type = cpu_to_be16(DT_DIR);
                gfs2_dinode_out(ip, di);
@@ -472,7 +471,7 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
-        rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
        gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
        error = gfs2_glock_nq_m(3, ghs);
@@ -614,7 +613,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                 * this is the case of the target file already existing
                 * so we unlink before doing the rename
                 */
-                nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr);
+                nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
                if (nrgd)
                        gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
        }
@@ -653,7 +652,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                if (error)
                        goto out_gunlock;
-                error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL);
+                error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
                switch (error) {
                case -ENOENT:
                        error = 0;
@@ -712,7 +711,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                        goto out_gunlock_q;
                error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-                                         al->al_rgd->rd_ri.ri_length +
+                                         al->al_rgd->rd_length +
                                         4 * RES_DINODE + 4 * RES_LEAF +
                                         RES_STATFS + RES_QUOTA + 4, 0);
                if (error)
@@ -750,7 +749,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                if (error)
                        goto out_end_trans;
-                error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR);
+                error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR);
                if (error)
                        goto out_end_trans;
        } else {
@@ -758,7 +757,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                error = gfs2_meta_inode_buffer(ip, &dibh);
                if (error)
                        goto out_end_trans;
-                ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
@@ -768,8 +767,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
        if (error)
                goto out_end_trans;
-        error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num,
+        error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
-                             IF2DT(ip->i_inode.i_mode));
        if (error)
                goto out_end_trans;
@@ -905,8 +903,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
        }
        error = gfs2_truncatei(ip, attr->ia_size);
-        if (error)
+        if (error && (inode->i_size != ip->i_di.di_size))
-                return error;
+                i_size_write(inode, ip->i_di.di_size);
        return error;
 }
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 485ce3d49923..603d940f1159 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -326,8 +326,10 @@ static void gfs2_clear_inode(struct inode *inode)
                gfs2_glock_schedule_for_reclaim(ip->i_gl);
                gfs2_glock_put(ip->i_gl);
                ip->i_gl = NULL;
-                if (ip->i_iopen_gh.gh_gl)
+                if (ip->i_iopen_gh.gh_gl) {
+                        ip->i_iopen_gh.gh_gl->gl_object = NULL;
                        gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+                }
        }
 }
@@ -422,13 +424,13 @@ static void gfs2_delete_inode(struct inode *inode)
        if (!inode->i_private)
                goto out;
-        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh);
+        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
        if (unlikely(error)) {
                gfs2_glock_dq_uninit(&ip->i_iopen_gh);
                goto out;
        }
-        gfs2_glock_dq(&ip->i_iopen_gh);
+        gfs2_glock_dq_wait(&ip->i_iopen_gh);
        gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
        error = gfs2_glock_nq(&ip->i_iopen_gh);
        if (error)
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index aa0dbd2aac1b..404b7cc9f8c4 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -66,7 +66,7 @@ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
        if (error)
                goto out_gunlock_q;
-        error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length +
+        error = gfs2_trans_begin(sdp, al->al_rgd->rd_length +
                                 ind_blocks + RES_DINODE +
                                 RES_STATFS + RES_QUOTA, 0);
        if (error)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c186857e48a8..6e546ee8f3d4 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -66,6 +66,18 @@
 #define QUOTA_USER 1
 #define QUOTA_GROUP 0
+struct gfs2_quota_host {
+        u64 qu_limit;
+        u64 qu_warn;
+        s64 qu_value;
+};
+struct gfs2_quota_change_host {
+        u64 qc_change;
+        u32 qc_flags; /* GFS2_QCF_... */
+        u32 qc_id;
+};
 static u64 qd2offset(struct gfs2_quota_data *qd)
 {
        u64 offset;
@@ -561,6 +573,25 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
        mutex_unlock(&sdp->sd_quota_mutex);
 }
+static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
+{
+        const struct gfs2_quota *str = buf;
+        qu->qu_limit = be64_to_cpu(str->qu_limit);
+        qu->qu_warn = be64_to_cpu(str->qu_warn);
+        qu->qu_value = be64_to_cpu(str->qu_value);
+}
+static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
+{
+        struct gfs2_quota *str = buf;
+        str->qu_limit = cpu_to_be64(qu->qu_limit);
+        str->qu_warn = cpu_to_be64(qu->qu_warn);
+        str->qu_value = cpu_to_be64(qu->qu_value);
+        memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
+}
 /**
 * gfs2_adjust_quota
 *
@@ -573,12 +604,13 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
        struct inode *inode = &ip->i_inode;
        struct address_space *mapping = inode->i_mapping;
        unsigned long index = loc >> PAGE_CACHE_SHIFT;
-        unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
+        unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
        unsigned blocksize, iblock, pos;
        struct buffer_head *bh;
        struct page *page;
        void *kaddr;
-        __be64 *ptr;
+        char *ptr;
+        struct gfs2_quota_host qp;
        s64 value;
        int err = -EIO;
@@ -620,13 +652,17 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
        kaddr = kmap_atomic(page, KM_USER0);
        ptr = kaddr + offset;
-        value = (s64)be64_to_cpu(*ptr) + change;
+        gfs2_quota_in(&qp, ptr);
-        *ptr = cpu_to_be64(value);
+        qp.qu_value += change;
+        value = qp.qu_value;
+        gfs2_quota_out(&qp, ptr);
        flush_dcache_page(page);
        kunmap_atomic(kaddr, KM_USER0);
        err = 0;
        qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
        qd->qd_qb.qb_value = cpu_to_be64(value);
+        ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC);
+        ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value);
 unlock:
        unlock_page(page);
        page_cache_release(page);
@@ -689,7 +725,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
                        goto out_alloc;
                error = gfs2_trans_begin(sdp,
-                                         al->al_rgd->rd_ri.ri_length +
+                                         al->al_rgd->rd_length +
                                         num_qd * data_blocks +
                                         nalloc * ind_blocks +
                                         RES_DINODE + num_qd +
@@ -709,7 +745,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
                offset = qd2offset(qd);
                error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
                                          (struct gfs2_quota_data *)
-                                          qd->qd_gl->gl_lvb);
+                                          qd);
                if (error)
                        goto out_end_trans;
@@ -1050,6 +1086,15 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
        return error;
 }
+static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
+{
+        const struct gfs2_quota_change *str = buf;
+        qc->qc_change = be64_to_cpu(str->qc_change);
+        qc->qc_flags = be32_to_cpu(str->qc_flags);
+        qc->qc_id = be32_to_cpu(str->qc_id);
+}
 int gfs2_quota_init(struct gfs2_sbd *sdp)
 {
        struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 8bc182c7e2ef..5ada38c99a2c 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -116,6 +116,22 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
        }
 }
+static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
+{
+        const struct gfs2_log_header *str = buf;
+        if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
+            str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
+                return 1;
+        lh->lh_sequence = be64_to_cpu(str->lh_sequence);
+        lh->lh_flags = be32_to_cpu(str->lh_flags);
+        lh->lh_tail = be32_to_cpu(str->lh_tail);
+        lh->lh_blkno = be32_to_cpu(str->lh_blkno);
+        lh->lh_hash = be32_to_cpu(str->lh_hash);
+        return 0;
+}
 /**
 * get_log_header - read the log header for a given segment
 * @jd: the journal
@@ -147,12 +163,10 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
                                             sizeof(u32));
        hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
        hash ^= (u32)~0;
-        gfs2_log_header_in(&lh, bh->b_data);
+        error = gfs2_log_header_in(&lh, bh->b_data);
        brelse(bh);
-        if (lh.lh_header.mh_magic != GFS2_MAGIC ||
+        if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
-            lh.lh_header.mh_type != GFS2_METATYPE_LH ||
-            lh.lh_blkno != blk || lh.lh_hash != hash)
                return 1;
        *head = lh;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 1727f5012efe..e4e040625153 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
@@ -28,6 +28,7 @@
 #include "ops_file.h"
 #include "util.h"
 #include "log.h"
+#include "inode.h"
 #define BFITNOENT ((u32)~0)
@@ -50,6 +51,9 @@ static const char valid_change[16] = {
                1, 0, 0, 0
 };
+static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
+                        unsigned char old_state, unsigned char new_state);
 /**
 * gfs2_setbit - Set a bit in the bitmaps
 * @buffer: the buffer that holds the bitmaps
@@ -204,7 +208,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
        struct gfs2_bitmap *bi = NULL;
-        u32 length = rgd->rd_ri.ri_length;
+        u32 length = rgd->rd_length;
        u32 count[4], tmp;
        int buf, x;
@@ -227,7 +231,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
                return;
        }
-        tmp = rgd->rd_ri.ri_data -
+        tmp = rgd->rd_data -
                rgd->rd_rg.rg_free -
                rgd->rd_rg.rg_dinodes;
        if (count[1] + count[2] != tmp) {
@@ -253,10 +257,10 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 }
-static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block)
+static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 {
-        u64 first = ri->ri_data0;
+        u64 first = rgd->rd_data0;
-        u64 last = first + ri->ri_data;
+        u64 last = first + rgd->rd_data;
        return first <= block && block < last;
 }
@@ -275,7 +279,7 @@ struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
        spin_lock(&sdp->sd_rindex_spin);
        list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
-                if (rgrp_contains_block(&rgd->rd_ri, blk)) {
+                if (rgrp_contains_block(rgd, blk)) {
                        list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
                        spin_unlock(&sdp->sd_rindex_spin);
                        return rgd;
@@ -354,6 +358,15 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
        mutex_unlock(&sdp->sd_rindex_mutex);
 }
+static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
+{
+        printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
+        printk(KERN_INFO "  ri_length = %u\n", rgd->rd_length);
+        printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
+        printk(KERN_INFO "  ri_data = %u\n", rgd->rd_data);
+        printk(KERN_INFO "  ri_bitbytes = %u\n", rgd->rd_bitbytes);
+}
 /**
 * gfs2_compute_bitstructs - Compute the bitmap sizes
 * @rgd: The resource group descriptor
@@ -367,7 +380,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
        struct gfs2_bitmap *bi;
-        u32 length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
+        u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
        u32 bytes_left, bytes;
        int x;
@@ -378,7 +391,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
        if (!rgd->rd_bits)
                return -ENOMEM;
-        bytes_left = rgd->rd_ri.ri_bitbytes;
+        bytes_left = rgd->rd_bitbytes;
        for (x = 0; x < length; x++) {
                bi = rgd->rd_bits + x;
@@ -399,14 +412,14 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
                } else if (x + 1 == length) {
                        bytes = bytes_left;
                        bi->bi_offset = sizeof(struct gfs2_meta_header);
-                        bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
                        bi->bi_len = bytes;
                /* other blocks */
                } else {
                        bytes = sdp->sd_sb.sb_bsize -
                                sizeof(struct gfs2_meta_header);
                        bi->bi_offset = sizeof(struct gfs2_meta_header);
-                        bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
                        bi->bi_len = bytes;
                }
@@ -418,9 +431,9 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
                return -EIO;
        }
        bi = rgd->rd_bits + (length - 1);
-        if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) {
+        if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
                if (gfs2_consist_rgrpd(rgd)) {
-                        gfs2_rindex_print(&rgd->rd_ri);
+                        gfs2_rindex_print(rgd);
                        fs_err(sdp, "start=%u len=%u offset=%u\n",
                               bi->bi_start, bi->bi_len, bi->bi_offset);
                }
@@ -431,9 +444,104 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 }
 /**
- * gfs2_ri_update - Pull in a new resource index from the disk
+ * gfs2_ri_total - Total up the file system space, according to the rindex.
+ *
+ */
+u64 gfs2_ri_total(struct gfs2_sbd *sdp)
+{
+        u64 total_data = 0;     
+        struct inode *inode = sdp->sd_rindex;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        char buf[sizeof(struct gfs2_rindex)];
+        struct file_ra_state ra_state;
+        int error, rgrps;
+        mutex_lock(&sdp->sd_rindex_mutex);
+        file_ra_state_init(&ra_state, inode->i_mapping);
+        for (rgrps = 0;; rgrps++) {
+                loff_t pos = rgrps * sizeof(struct gfs2_rindex);
+                if (pos + sizeof(struct gfs2_rindex) >= ip->i_di.di_size)
+                        break;
+                error = gfs2_internal_read(ip, &ra_state, buf, &pos,
+                                           sizeof(struct gfs2_rindex));
+                if (error != sizeof(struct gfs2_rindex))
+                        break;
+                total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
+        }
+        mutex_unlock(&sdp->sd_rindex_mutex);
+        return total_data;
+}
+static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf)
+{
+        const struct gfs2_rindex *str = buf;
+        rgd->rd_addr = be64_to_cpu(str->ri_addr);
+        rgd->rd_length = be32_to_cpu(str->ri_length);
+        rgd->rd_data0 = be64_to_cpu(str->ri_data0);
+        rgd->rd_data = be32_to_cpu(str->ri_data);
+        rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes);
+}
+/**
+ * read_rindex_entry - Pull in a new resource index entry from the disk
 * @gl: The glock covering the rindex inode
 *
+ * Returns: 0 on success, error code otherwise
+ */
+static int read_rindex_entry(struct gfs2_inode *ip,
+                             struct file_ra_state *ra_state)
+{
+        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+        loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
+        char buf[sizeof(struct gfs2_rindex)];
+        int error;
+        struct gfs2_rgrpd *rgd;
+        error = gfs2_internal_read(ip, ra_state, buf, &pos,
+                                   sizeof(struct gfs2_rindex));
+        if (!error)
+                return 0;
+        if (error != sizeof(struct gfs2_rindex)) {
+                if (error > 0)
+                        error = -EIO;
+                return error;
+        }
+        rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
+        error = -ENOMEM;
+        if (!rgd)
+                return error;
+        mutex_init(&rgd->rd_mutex);
+        lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
+        rgd->rd_sbd = sdp;
+        list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
+        list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
+        gfs2_rindex_in(rgd, buf);
+        error = compute_bitstructs(rgd);
+        if (error)
+                return error;
+        error = gfs2_glock_get(sdp, rgd->rd_addr,
+                               &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
+        if (error)
+                return error;
+        rgd->rd_gl->gl_object = rgd;
+        rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
+        rgd->rd_flags |= GFS2_RDF_CHECK;
+        return error;
+}
+/**
+ * gfs2_ri_update - Pull in a new resource index from the disk
+ * @ip: pointer to the rindex inode
+ *
 * Returns: 0 on successful update, error code otherwise
 */
@@ -441,13 +549,11 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct inode *inode = &ip->i_inode;
-        struct gfs2_rgrpd *rgd;
-        char buf[sizeof(struct gfs2_rindex)];
        struct file_ra_state ra_state;
-        u64 junk = ip->i_di.di_size;
+        u64 rgrp_count = ip->i_di.di_size;
        int error;
-        if (do_div(junk, sizeof(struct gfs2_rindex))) {
+        if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) {
                gfs2_consist_inode(ip);
                return -EIO;
        }
@@ -455,50 +561,50 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
        clear_rgrpdi(sdp);
        file_ra_state_init(&ra_state, inode->i_mapping);
-        for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
+        for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) {
-                loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
+                error = read_rindex_entry(ip, &ra_state);
-                error = gfs2_internal_read(ip, &ra_state, buf, &pos,
+                if (error) {
-                                            sizeof(struct gfs2_rindex));
+                        clear_rgrpdi(sdp);
-                if (!error)
+                        return error;
-                        break;
-                if (error != sizeof(struct gfs2_rindex)) {
-                        if (error > 0)
-                                error = -EIO;
-                        goto fail;
                }
+        }
-                rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
+        sdp->sd_rindex_vn = ip->i_gl->gl_vn;
-                error = -ENOMEM;
+        return 0;
-                if (!rgd)
+}
-                        goto fail;
-                mutex_init(&rgd->rd_mutex);
-                lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
-                rgd->rd_sbd = sdp;
-                list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
-                list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
-                gfs2_rindex_in(&rgd->rd_ri, buf);
-                error = compute_bitstructs(rgd);
-                if (error)
-                        goto fail;
-                error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr,
+/**
-                                       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
+ * gfs2_ri_update_special - Pull in a new resource index from the disk
-                if (error)
+ *
-                        goto fail;
+ * This is a special version that's safe to call from gfs2_inplace_reserve_i.
+ * In this case we know that we don't have any resource groups in memory yet.
+ *
+ * @ip: pointer to the rindex inode
+ *
+ * Returns: 0 on successful update, error code otherwise
+ */
+static int gfs2_ri_update_special(struct gfs2_inode *ip)
+{
+        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+        struct inode *inode = &ip->i_inode;
+        struct file_ra_state ra_state;
+        int error;
-                rgd->rd_gl->gl_object = rgd;
+        file_ra_state_init(&ra_state, inode->i_mapping);
-                rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
+        for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
+                /* Ignore partials */
+                if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
+                    ip->i_di.di_size)
+                        break;
+                error = read_rindex_entry(ip, &ra_state);
+                if (error) {
+                        clear_rgrpdi(sdp);
+                        return error;
+                }
        }
        sdp->sd_rindex_vn = ip->i_gl->gl_vn;
        return 0;
-fail:
-        clear_rgrpdi(sdp);
-        return error;
 }
 /**
@@ -543,6 +649,28 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
        return error;
 }
+static void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
+{
+        const struct gfs2_rgrp *str = buf;
+        rg->rg_flags = be32_to_cpu(str->rg_flags);
+        rg->rg_free = be32_to_cpu(str->rg_free);
+        rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
+        rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
+}
+static void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
+{
+        struct gfs2_rgrp *str = buf;
+        str->rg_flags = cpu_to_be32(rg->rg_flags);
+        str->rg_free = cpu_to_be32(rg->rg_free);
+        str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
+        str->__pad = cpu_to_be32(0);
+        str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
+        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
+}
 /**
 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
 * @rgd: the struct gfs2_rgrpd describing the RG to read in
@@ -557,7 +685,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
        struct gfs2_glock *gl = rgd->rd_gl;
-        unsigned int length = rgd->rd_ri.ri_length;
+        unsigned int length = rgd->rd_length;
        struct gfs2_bitmap *bi;
        unsigned int x, y;
        int error;
@@ -575,7 +703,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
        for (x = 0; x < length; x++) {
                bi = rgd->rd_bits + x;
-                error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, 0, &bi->bi_bh);
+                error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
                if (error)
                        goto fail;
        }
@@ -637,7 +765,7 @@ void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
 void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
-        int x, length = rgd->rd_ri.ri_length;
+        int x, length = rgd->rd_length;
        spin_lock(&sdp->sd_rindex_spin);
        gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
@@ -660,7 +788,7 @@ void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
 void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
-        unsigned int length = rgd->rd_ri.ri_length;
+        unsigned int length = rgd->rd_length;
        unsigned int x;
        for (x = 0; x < length; x++) {
@@ -722,6 +850,38 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
 }
 /**
+ * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
+ * @rgd: The rgrp
+ *
+ * Returns: The inode, if one has been found
+ */
+static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
+{
+        struct inode *inode;
+        u32 goal = 0;
+        u64 no_addr;
+        for(;;) {
+                goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
+                                    GFS2_BLKST_UNLINKED);
+                if (goal == 0)
+                        return 0;
+                no_addr = goal + rgd->rd_data0;
+                if (no_addr <= *last_unlinked)
+                        continue;
+                *last_unlinked = no_addr;
+                inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
+                                        no_addr, -1);
+                if (!IS_ERR(inode))
+                        return inode;
+        }
+        rgd->rd_flags &= ~GFS2_RDF_CHECK;
+        return NULL;
+}
+/**
 * recent_rgrp_first - get first RG from "recent" list
 * @sdp: The GFS2 superblock
 * @rglast: address of the rgrp used last
@@ -743,7 +903,7 @@ static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
                goto first;
        list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
-                if (rgd->rd_ri.ri_addr == rglast)
+                if (rgd->rd_addr == rglast)
                        goto out;
        }
@@ -882,8 +1042,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
 * Returns: errno
 */
-static int get_local_rgrp(struct gfs2_inode *ip)
+static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 {
+        struct inode *inode = NULL;
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_rgrpd *rgd, *begin = NULL;
        struct gfs2_alloc *al = &ip->i_alloc;
@@ -903,7 +1064,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                case 0:
                        if (try_rgrp_fit(rgd, al))
                                goto out;
+                        if (rgd->rd_flags & GFS2_RDF_CHECK)
+                                inode = try_rgrp_unlink(rgd, last_unlinked);
                        gfs2_glock_dq_uninit(&al->al_rgd_gh);
+                        if (inode)
+                                return inode;
                        rgd = recent_rgrp_next(rgd, 1);
                        break;
@@ -912,7 +1077,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                        break;
                default:
-                        return error;
+                        return ERR_PTR(error);
                }
        }
@@ -927,7 +1092,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                case 0:
                        if (try_rgrp_fit(rgd, al))
                                goto out;
+                        if (rgd->rd_flags & GFS2_RDF_CHECK)
+                                inode = try_rgrp_unlink(rgd, last_unlinked);
                        gfs2_glock_dq_uninit(&al->al_rgd_gh);
+                        if (inode)
+                                return inode;
                        break;
                case GLR_TRYFAILED:
@@ -935,7 +1104,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                        break;
                default:
-                        return error;
+                        return ERR_PTR(error);
                }
                rgd = gfs2_rgrpd_get_next(rgd);
@@ -944,7 +1113,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                if (rgd == begin) {
                        if (++loops >= 3)
-                                return -ENOSPC;
+                                return ERR_PTR(-ENOSPC);
                        if (!skipped)
                                loops++;
                        flags = 0;
@@ -954,7 +1123,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
        }
 out:
-        ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
+        ip->i_last_rg_alloc = rgd->rd_addr;
        if (begin) {
                recent_rgrp_add(rgd);
@@ -964,7 +1133,7 @@ out:
                forward_rgrp_set(sdp, rgd);
        }
-        return 0;
+        return NULL;
 }
 /**
@@ -978,19 +1147,33 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_alloc *al = &ip->i_alloc;
-        int error;
+        struct inode *inode;
+        int error = 0;
+        u64 last_unlinked = 0;
        if (gfs2_assert_warn(sdp, al->al_requested))
                return -EINVAL;
-        error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+try_again:
+        /* We need to hold the rindex unless the inode we're using is
+           the rindex itself, in which case it's already held. */
+        if (ip != GFS2_I(sdp->sd_rindex))
+                error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+        else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */
+                error = gfs2_ri_update_special(ip);
        if (error)
                return error;
-        error = get_local_rgrp(ip);
+        inode = get_local_rgrp(ip, &last_unlinked);
-        if (error) {
+        if (inode) {
-                gfs2_glock_dq_uninit(&al->al_ri_gh);
+                if (ip != GFS2_I(sdp->sd_rindex))
-                return error;
+                        gfs2_glock_dq_uninit(&al->al_ri_gh);
+                if (IS_ERR(inode))
+                        return PTR_ERR(inode);
+                iput(inode);
+                gfs2_log_flush(sdp, NULL);
+                goto try_again;
        }
        al->al_file = file;
@@ -1019,7 +1202,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
        al->al_rgd = NULL;
        gfs2_glock_dq_uninit(&al->al_rgd_gh);
-        gfs2_glock_dq_uninit(&al->al_ri_gh);
+        if (ip != GFS2_I(sdp->sd_rindex))
+                gfs2_glock_dq_uninit(&al->al_ri_gh);
 }
 /**
@@ -1037,8 +1221,8 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
        unsigned int buf;
        unsigned char type;
-        length = rgd->rd_ri.ri_length;
+        length = rgd->rd_length;
-        rgrp_block = block - rgd->rd_ri.ri_data0;
+        rgrp_block = block - rgd->rd_data0;
        for (buf = 0; buf < length; buf++) {
                bi = rgd->rd_bits + buf;
@@ -1077,10 +1261,10 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
 */
 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
-                             unsigned char old_state, unsigned char new_state)
+                        unsigned char old_state, unsigned char new_state)
 {
        struct gfs2_bitmap *bi = NULL;
-        u32 length = rgd->rd_ri.ri_length;
+        u32 length = rgd->rd_length;
        u32 blk = 0;
        unsigned int buf, x;
@@ -1118,17 +1302,18 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
                goal = 0;
        }
-        if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
+        if (old_state != new_state) {
-                blk = 0;
+                gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT);
-        gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+                gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
-        gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
+                gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
-                    bi->bi_len, blk, new_state);
-        if (bi->bi_clone)
-                gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
                            bi->bi_len, blk, new_state);
+                if (bi->bi_clone)
+                        gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
+                                    bi->bi_len, blk, new_state);
+        }
-        return bi->bi_start * GFS2_NBBY + blk;
+        return (blk == BFITNOENT) ? 0 : (bi->bi_start * GFS2_NBBY) + blk;
 }
 /**
@@ -1156,9 +1341,9 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
                return NULL;
        }
-        length = rgd->rd_ri.ri_length;
+        length = rgd->rd_length;
-        rgrp_blk = bstart - rgd->rd_ri.ri_data0;
+        rgrp_blk = bstart - rgd->rd_data0;
        while (blen--) {
                for (buf = 0; buf < length; buf++) {
@@ -1202,15 +1387,15 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip)
        u32 goal, blk;
        u64 block;
-        if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data))
+        if (rgrp_contains_block(rgd, ip->i_di.di_goal_data))
-                goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0;
+                goal = ip->i_di.di_goal_data - rgd->rd_data0;
        else
                goal = rgd->rd_last_alloc_data;
        blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
        rgd->rd_last_alloc_data = blk;
-        block = rgd->rd_ri.ri_data0 + blk;
+        block = rgd->rd_data0 + blk;
        ip->i_di.di_goal_data = block;
        gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1246,15 +1431,15 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip)
        u32 goal, blk;
        u64 block;
-        if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta))
+        if (rgrp_contains_block(rgd, ip->i_di.di_goal_meta))
-                goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0;
+                goal = ip->i_di.di_goal_meta - rgd->rd_data0;
        else
                goal = rgd->rd_last_alloc_meta;
        blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
        rgd->rd_last_alloc_meta = blk;
-        block = rgd->rd_ri.ri_data0 + blk;
+        block = rgd->rd_data0 + blk;
        ip->i_di.di_goal_meta = block;
        gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1296,7 +1481,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
        rgd->rd_last_alloc_meta = blk;
-        block = rgd->rd_ri.ri_data0 + blk;
+        block = rgd->rd_data0 + blk;
        gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
        rgd->rd_rg.rg_free--;
@@ -1379,7 +1564,7 @@ void gfs2_unlink_di(struct inode *inode)
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_rgrpd *rgd;
-        u64 blkno = ip->i_num.no_addr;
+        u64 blkno = ip->i_no_addr;
        rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
        if (!rgd)
@@ -1414,9 +1599,9 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
-        gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
+        gfs2_free_uninit_di(rgd, ip->i_no_addr);
        gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
-        gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
+        gfs2_meta_wipe(ip, ip->i_no_addr, 1);
 }
 /**
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b01e0cfc99b5..b4c6adfc6f2e 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -65,5 +65,6 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
                      int flags);
 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
+u64 gfs2_ri_total(struct gfs2_sbd *sdp);
 #endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4fdda974dc83..f916b9740c75 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -95,8 +95,8 @@ int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
 {
        unsigned int x;
-        if (sb->sb_header.mh_magic != GFS2_MAGIC ||
+        if (sb->sb_magic != GFS2_MAGIC ||
-            sb->sb_header.mh_type != GFS2_METATYPE_SB) {
+            sb->sb_type != GFS2_METATYPE_SB) {
                if (!silent)
                        printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
                return -EINVAL;
@@ -174,10 +174,31 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
        return 0;
 }
+static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
+{
+        const struct gfs2_sb *str = buf;
+        sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
+        sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
+        sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
+        sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
+        sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
+        sb->sb_bsize = be32_to_cpu(str->sb_bsize);
+        sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
+        sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
+        sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
+        sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
+        sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
+        memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
+        memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
+}
 /**
 * gfs2_read_super - Read the gfs2 super block from disk
- * @sb: The VFS super block
+ * @sdp: The GFS2 super block
 * @sector: The location of the super block
+ * @error: The error code to return
 *
 * This uses the bio functions to read the super block from disk
 * because we want to be 100% sure that we never read cached data.
@@ -189,17 +210,19 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
 * the master directory (contains pointers to journals etc) and the
 * root directory.
 *
- * Returns: A page containing the sb or NULL
+ * Returns: 0 on success or error
 */
-struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
+int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
 {
+        struct super_block *sb = sdp->sd_vfs;
+        struct gfs2_sb *p;
        struct page *page;
        struct bio *bio;
        page = alloc_page(GFP_KERNEL);
        if (unlikely(!page))
-                return NULL;
+                return -ENOBUFS;
        ClearPageUptodate(page);
        ClearPageDirty(page);
@@ -208,7 +231,7 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
        bio = bio_alloc(GFP_KERNEL, 1);
        if (unlikely(!bio)) {
                __free_page(page);
-                return NULL;
+                return -ENOBUFS;
        }
        bio->bi_sector = sector * (sb->s_blocksize >> 9);
@@ -222,9 +245,13 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
        bio_put(bio);
        if (!PageUptodate(page)) {
                __free_page(page);
-                return NULL;
+                return -EIO;
        }
-        return page;
+        p = kmap(page);
+        gfs2_sb_in(&sdp->sd_sb, p);
+        kunmap(page);
+        __free_page(page);
+        return 0;
 }
 /**
@@ -241,19 +268,13 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
        u32 tmp_blocks;
        unsigned int x;
        int error;
-        struct page *page;
-        char *sb;
-        page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+        error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
-        if (!page) {
+        if (error) {
                if (!silent)
                        fs_err(sdp, "can't read superblock\n");
-                return -EIO;
+                return error;
        }
-        sb = kmap(page);
-        gfs2_sb_in(&sdp->sd_sb, sb);
-        kunmap(page);
-        __free_page(page);
        error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
        if (error)
@@ -360,7 +381,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
                name.len = sprintf(buf, "journal%u", sdp->sd_journals);
                name.hash = gfs2_disk_hash(name.name, name.len);
-                error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL);
+                error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
                if (error == -ENOENT) {
                        error = 0;
                        break;
@@ -593,6 +614,24 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
        return error;
 }
+static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
+{
+        const struct gfs2_statfs_change *str = buf;
+        sc->sc_total = be64_to_cpu(str->sc_total);
+        sc->sc_free = be64_to_cpu(str->sc_free);
+        sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
+}
+static void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
+{
+        struct gfs2_statfs_change *str = buf;
+        str->sc_total = cpu_to_be64(sc->sc_total);
+        str->sc_free = cpu_to_be64(sc->sc_free);
+        str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
+}
 int gfs2_statfs_init(struct gfs2_sbd *sdp)
 {
        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
@@ -772,7 +811,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
                            struct gfs2_statfs_change_host *sc)
 {
        gfs2_rgrp_verify(rgd);
-        sc->sc_total += rgd->rd_ri.ri_data;
+        sc->sc_total += rgd->rd_data;
        sc->sc_free += rgd->rd_rg.rg_free;
        sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
        return 0;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index e590b2df11dc..60a870e430be 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -16,7 +16,7 @@ void gfs2_tune_init(struct gfs2_tune *gt);
 int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
 int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
-struct page *gfs2_read_super(struct super_block *sb, sector_t sector);
+int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector);
 static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
 {
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 601eaa1b9ed6..424a0774eda8 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -115,8 +115,8 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
                "GFS2: fsid=%s:   inode = %llu %llu\n"
                "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
                sdp->sd_fsname,
-                sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino,
+                sdp->sd_fsname, (unsigned long long)ip->i_no_formal_ino,
-                (unsigned long long)ip->i_num.no_addr,
+                (unsigned long long)ip->i_no_addr,
                sdp->sd_fsname, function, file, line);
        return rv;
 }
@@ -137,7 +137,7 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
                "GFS2: fsid=%s:   RG = %llu\n"
                "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
                sdp->sd_fsname,
-                sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr,
+                sdp->sd_fsname, (unsigned long long)rgd->rd_addr,
                sdp->sd_fsname, function, file, line);
        return rv;
 }
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9a934db0bd8a..bc835f272a6e 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -607,7 +607,7 @@ static const struct file_operations hfs_file_operations = {
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
        .fsync          = file_fsync,
        .open           = hfs_file_open,
        .release        = hfs_file_release,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 45dab5d6cc10..409ce5429c91 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -288,7 +288,7 @@ static const struct file_operations hfsplus_file_operations = {
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
        .fsync          = file_fsync,
        .open           = hfsplus_file_open,
        .release        = hfsplus_file_release,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 8286491dbf31..c77862032e84 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -390,7 +390,7 @@ int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 static const struct file_operations hostfs_file_fops = {
        .llseek         = generic_file_llseek,
        .read           = do_sync_read,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
        .aio_read       = generic_file_aio_read,
        .aio_write      = generic_file_aio_write,
        .write          = do_sync_write,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index b4eafc0f1e54..5b53e5c5d8df 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -129,7 +129,7 @@ const struct file_operations hpfs_file_ops =
        .mmap           = generic_file_mmap,
        .release        = hpfs_file_release,
        .fsync          = hpfs_file_fsync,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 const struct inode_operations hpfs_file_iops =
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index aa083dd34e92..e6b46b3ac2fe 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -736,15 +736,13 @@ static int can_do_hugetlb_shm(void)
                        can_do_mlock());
 }
-struct file *hugetlb_zero_setup(size_t size)
+struct file *hugetlb_file_setup(const char *name, size_t size)
 {
        int error = -ENOMEM;
        struct file *file;
        struct inode *inode;
        struct dentry *dentry, *root;
        struct qstr quick_string;
-        char buf[16];
-        static atomic_t counter;
        if (!hugetlbfs_vfsmount)
                return ERR_PTR(-ENOENT);
@@ -756,8 +754,7 @@ struct file *hugetlb_zero_setup(size_t size)
                return ERR_PTR(-ENOMEM);
        root = hugetlbfs_vfsmount->mnt_root;
-        snprintf(buf, 16, "%u", atomic_inc_return(&counter));
+        quick_string.name = name;
-        quick_string.name = buf;
        quick_string.len = strlen(quick_string.name);
        quick_string.hash = 0;
        dentry = d_alloc(root, &quick_string);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 479c1038ed4a..8c90cbc903fa 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -12,6 +12,7 @@
 #include <linux/fs.h>
 #include <linux/security.h>
 #include <linux/module.h>
+#include <linux/kallsyms.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -20,6 +21,7 @@ static long do_ioctl(struct file *filp, unsigned int cmd,
                unsigned long arg)
 {
        int error = -ENOTTY;
+        void *f;
        if (!filp->f_op)
                goto out;
@@ -29,10 +31,16 @@ static long do_ioctl(struct file *filp, unsigned int cmd,
                if (error == -ENOIOCTLCMD)
                        error = -EINVAL;
                goto out;
-        } else if (filp->f_op->ioctl) {
+        } else if ((f = filp->f_op->ioctl)) {
                lock_kernel();
-                error = filp->f_op->ioctl(filp->f_path.dentry->d_inode,
+                if (!filp->f_op->ioctl) {
-                                          filp, cmd, arg);
+                        printk("%s: ioctl %p disappeared\n", __FUNCTION__, f);
+                        print_symbol("symbol: %s\n", (unsigned long)f);
+                        dump_stack();
+                } else {
+                        error = filp->f_op->ioctl(filp->f_path.dentry->d_inode,
+                                                  filp, cmd, arg);
+                }
                unlock_kernel();
        }
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 99871279a1ed..c2530197be0c 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -47,7 +47,7 @@ const struct file_operations jffs2_file_operations =
        .ioctl =        jffs2_ioctl,
        .mmap =         generic_file_readonly_mmap,
        .fsync =        jffs2_fsync,
-        .sendfile =     generic_file_sendfile
+        .splice_read =  generic_file_splice_read,
 };
 /* jffs2_file_inode_operations */
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 4884d5edfe65..7b363786c2d2 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -210,8 +210,7 @@ static void jffs2_kill_tn(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *
 * offset, and the one with the smallest length will come first in the
 * ordering.
 *
- * Returns 0 if the node was inserted
+ * Returns 0 if the node was handled (including marking it obsolete)
- *         1 if the node is obsolete (because we can't mark it so yet)
 *         < 0 an if error occurred
 */
 static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
@@ -229,9 +228,16 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
           check anyway. */
        if (!tn->fn->size) {
                if (rii->mdata_tn) {
-                        /* We had a candidate mdata node already */
+                        if (rii->mdata_tn->version < tn->version) {
-                        dbg_readinode("kill old mdata with ver %d\n", rii->mdata_tn->version);
+                                /* We had a candidate mdata node already */
-                        jffs2_kill_tn(c, rii->mdata_tn);
+                                dbg_readinode("kill old mdata with ver %d\n", rii->mdata_tn->version);
+                                jffs2_kill_tn(c, rii->mdata_tn);
+                        } else {
+                                dbg_readinode("kill new mdata with ver %d (older than existing %d\n",
+                                              tn->version, rii->mdata_tn->version);
+                                jffs2_kill_tn(c, tn);
+                                return 0;
+                        }
                }
                rii->mdata_tn = tn;
                dbg_readinode("keep new mdata with ver %d\n", tn->version);
@@ -565,8 +571,7 @@ static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_r
 * Helper function for jffs2_get_inode_nodes().
 * It is called every time an directory entry node is found.
 *
- * Returns: 0 on succes;
+ * Returns: 0 on success;
- *          1 if the node should be marked obsolete;
 *          negative error code on failure.
 */
 static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
@@ -673,8 +678,7 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
 * Helper function for jffs2_get_inode_nodes().
 * It is called every time an inode node is found.
 *
- * Returns: 0 on success;
+ * Returns: 0 on success (possibly after marking a bad node obsolete);
- *          1 if the node should be marked obsolete;
 *          negative error code on failure.
 */
 static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
@@ -683,7 +687,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
 {
        struct jffs2_tmp_dnode_info *tn;
        uint32_t len, csize;
-        int ret = 1;
+        int ret = 0;
        uint32_t crc;
        /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
@@ -712,8 +716,9 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
                /* Sanity checks */
                if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) ||
                    unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) {
-                                JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
+                        JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
-                                jffs2_dbg_dump_node(c, ref_offset(ref));
+                        jffs2_dbg_dump_node(c, ref_offset(ref));
+                        jffs2_mark_node_obsolete(c, ref);
                        goto free_out;
                }
@@ -768,6 +773,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
                        if (len >= csize && unlikely(tn->partial_crc != je32_to_cpu(rd->data_crc))) {
                                JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
                                        ref_offset(ref), tn->partial_crc, je32_to_cpu(rd->data_crc));
+                                jffs2_mark_node_obsolete(c, ref);
                                goto free_out;
                        }
@@ -847,7 +853,6 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
 * It is called every time an unknown node is found.
 *
 * Returns: 0 on success;
- *          1 if the node should be marked obsolete;
 *          negative error code on failure.
 */
 static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un)
@@ -1044,7 +1049,8 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
                case JFFS2_NODETYPE_DIRENT:
-                        if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent)) {
+                        if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent) &&
+                            len < sizeof(struct jffs2_raw_dirent)) {
                                err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf);
                                if (unlikely(err))
                                        goto free_out;
@@ -1058,7 +1064,8 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
                case JFFS2_NODETYPE_INODE:
-                        if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode)) {
+                        if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode) &&
+                            len < sizeof(struct jffs2_raw_inode)) {
                                err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf);
                                if (unlikely(err))
                                        goto free_out;
@@ -1071,17 +1078,15 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
                        break;
                default:
-                        if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node)) {
+                        if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node) &&
+                            len < sizeof(struct jffs2_unknown_node)) {
                                err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf);
                                if (unlikely(err))
                                        goto free_out;
                        }
                        err = read_unknown(c, ref, &node->u);
-                        if (err == 1) {
+                        if (unlikely(err))
-                                jffs2_mark_node_obsolete(c, ref);
-                                break;
-                        } else if (unlikely(err))
                                goto free_out;
                }
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 6488af43bc9b..e220d3bd610d 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -19,7 +19,7 @@
 #include <linux/mount.h>
 #include <linux/jffs2.h>
 #include <linux/pagemap.h>
-#include <linux/mtd/mtd.h>
+#include <linux/mtd/super.h>
 #include <linux/ctype.h>
 #include <linux/namei.h>
 #include "compr.h"
@@ -75,69 +75,27 @@ static const struct super_operations jffs2_super_operations =
        .sync_fs =      jffs2_sync_fs,
 };
-static int jffs2_sb_compare(struct super_block *sb, void *data)
+/*
-{
+ * fill in the superblock
-        struct jffs2_sb_info *p = data;
+ */
-        struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
+static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
-        /* The superblocks are considered to be equivalent if the underlying MTD
-           device is the same one */
-        if (c->mtd == p->mtd) {
-                D1(printk(KERN_DEBUG "jffs2_sb_compare: match on device %d (\"%s\")\n", p->mtd->index, p->mtd->name));
-                return 1;
-        } else {
-                D1(printk(KERN_DEBUG "jffs2_sb_compare: No match, device %d (\"%s\"), device %d (\"%s\")\n",
-                          c->mtd->index, c->mtd->name, p->mtd->index, p->mtd->name));
-                return 0;
-        }
-}
-static int jffs2_sb_set(struct super_block *sb, void *data)
-{
-        struct jffs2_sb_info *p = data;
-        /* For persistence of NFS exports etc. we use the same s_dev
-           each time we mount the device, don't just use an anonymous
-           device */
-        sb->s_fs_info = p;
-        p->os_priv = sb;
-        sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, p->mtd->index);
-        return 0;
-}
-static int jffs2_get_sb_mtd(struct file_system_type *fs_type,
-                            int flags, const char *dev_name,
-                            void *data, struct mtd_info *mtd,
-                            struct vfsmount *mnt)
 {
-        struct super_block *sb;
        struct jffs2_sb_info *c;
-        int ret;
+        D1(printk(KERN_DEBUG "jffs2_get_sb_mtd():"
+                  " New superblock for device %d (\"%s\")\n",
+                  sb->s_mtd->index, sb->s_mtd->name));
        c = kzalloc(sizeof(*c), GFP_KERNEL);
        if (!c)
                return -ENOMEM;
-        c->mtd = mtd;
-        sb = sget(fs_type, jffs2_sb_compare, jffs2_sb_set, c);
-        if (IS_ERR(sb))
-                goto out_error;
-        if (sb->s_root) {
-                /* New mountpoint for JFFS2 which is already mounted */
-                D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): Device %d (\"%s\") is already mounted\n",
-                          mtd->index, mtd->name));
-                ret = simple_set_mnt(mnt, sb);
-                goto out_put;
-        }
-        D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): New superblock for device %d (\"%s\")\n",
+        c->mtd = sb->s_mtd;
-                  mtd->index, mtd->name));
+        c->os_priv = sb;
+        sb->s_fs_info = c;
-        /* Initialize JFFS2 superblock locks, the further initialization will be
+        /* Initialize JFFS2 superblock locks, the further initialization will
-         * done later */
+         * be done later */
        init_MUTEX(&c->alloc_sem);
        init_MUTEX(&c->erase_free_sem);
        init_waitqueue_head(&c->erase_wait);
@@ -146,133 +104,20 @@ static int jffs2_get_sb_mtd(struct file_system_type *fs_type,
        spin_lock_init(&c->inocache_lock);
        sb->s_op = &jffs2_super_operations;
-        sb->s_flags = flags | MS_NOATIME;
+        sb->s_flags = sb->s_flags | MS_NOATIME;
        sb->s_xattr = jffs2_xattr_handlers;
 #ifdef CONFIG_JFFS2_FS_POSIX_ACL
        sb->s_flags |= MS_POSIXACL;
 #endif
-        ret = jffs2_do_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
+        return jffs2_do_fill_super(sb, data, silent);
-        if (ret) {
-                /* Failure case... */
-                up_write(&sb->s_umount);
-                deactivate_super(sb);
-                return ret;
-        }
-        sb->s_flags |= MS_ACTIVE;
-        return simple_set_mnt(mnt, sb);
-out_error:
-        ret = PTR_ERR(sb);
- out_put:
-        kfree(c);
-        put_mtd_device(mtd);
-        return ret;
-}
-static int jffs2_get_sb_mtdnr(struct file_system_type *fs_type,
-                              int flags, const char *dev_name,
-                              void *data, int mtdnr,
-                              struct vfsmount *mnt)
-{
-        struct mtd_info *mtd;
-        mtd = get_mtd_device(NULL, mtdnr);
-        if (IS_ERR(mtd)) {
-                D1(printk(KERN_DEBUG "jffs2: MTD device #%u doesn't appear to exist\n", mtdnr));
-                return PTR_ERR(mtd);
-        }
-        return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt);
 }
 static int jffs2_get_sb(struct file_system_type *fs_type,
                        int flags, const char *dev_name,
                        void *data, struct vfsmount *mnt)
 {
-        int err;
+        return get_sb_mtd(fs_type, flags, dev_name, data, jffs2_fill_super,
-        struct nameidata nd;
+                          mnt);
-        int mtdnr;
-        if (!dev_name)
-                return -EINVAL;
-        D1(printk(KERN_DEBUG "jffs2_get_sb(): dev_name \"%s\"\n", dev_name));
-        /* The preferred way of mounting in future; especially when
-           CONFIG_BLK_DEV is implemented - we specify the underlying
-           MTD device by number or by name, so that we don't require
-           block device support to be present in the kernel. */
-        /* FIXME: How to do the root fs this way? */
-        if (dev_name[0] == 'm' && dev_name[1] == 't' && dev_name[2] == 'd') {
-                /* Probably mounting without the blkdev crap */
-                if (dev_name[3] == ':') {
-                        struct mtd_info *mtd;
-                        /* Mount by MTD device name */
-                        D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd:%%s, name \"%s\"\n", dev_name+4));
-                        for (mtdnr = 0; mtdnr < MAX_MTD_DEVICES; mtdnr++) {
-                                mtd = get_mtd_device(NULL, mtdnr);
-                                if (!IS_ERR(mtd)) {
-                                        if (!strcmp(mtd->name, dev_name+4))
-                                                return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt);
-                                        put_mtd_device(mtd);
-                                }
-                        }
-                        printk(KERN_NOTICE "jffs2_get_sb(): MTD device with name \"%s\" not found.\n", dev_name+4);
-                } else if (isdigit(dev_name[3])) {
-                        /* Mount by MTD device number name */
-                        char *endptr;
-                        mtdnr = simple_strtoul(dev_name+3, &endptr, 0);
-                        if (!*endptr) {
-                                /* It was a valid number */
-                                D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd%%d, mtdnr %d\n", mtdnr));
-                                return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt);
-                        }
-                }
-        }
-        /* Try the old way - the hack where we allowed users to mount
-           /dev/mtdblock$(n) but didn't actually _use_ the blkdev */
-        err = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
-        D1(printk(KERN_DEBUG "jffs2_get_sb(): path_lookup() returned %d, inode %p\n",
-                  err, nd.dentry->d_inode));
-        if (err)
-                return err;
-        err = -EINVAL;
-        if (!S_ISBLK(nd.dentry->d_inode->i_mode))
-                goto out;
-        if (nd.mnt->mnt_flags & MNT_NODEV) {
-                err = -EACCES;
-                goto out;
-        }
-        if (imajor(nd.dentry->d_inode) != MTD_BLOCK_MAJOR) {
-                if (!(flags & MS_SILENT))
-                        printk(KERN_NOTICE "Attempt to mount non-MTD device \"%s\" as JFFS2\n",
-                               dev_name);
-                goto out;
-        }
-        mtdnr = iminor(nd.dentry->d_inode);
-        path_release(&nd);
-        return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt);
-out:
-        path_release(&nd);
-        return err;
 }
 static void jffs2_put_super (struct super_block *sb)
@@ -307,8 +152,7 @@ static void jffs2_kill_sb(struct super_block *sb)
        struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
        if (!(sb->s_flags & MS_RDONLY))
                jffs2_stop_garbage_collect_thread(c);
-        generic_shutdown_super(sb);
+        kill_mtd_super(sb);
-        put_mtd_device(c->mtd);
        kfree(c);
 }
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 78fc08893a6c..e48665984cb3 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -754,6 +754,10 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
                list_del(&xd->xindex);
                jffs2_free_xattr_datum(xd);
        }
+        list_for_each_entry_safe(xd, _xd, &c->xattr_unchecked, xindex) {
+                list_del(&xd->xindex);
+                jffs2_free_xattr_datum(xd);
+        }
 }
 #define XREF_TMPHASH_SIZE       (128)
@@ -825,7 +829,7 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
                           ref->xd and ref->ic are not valid yet. */
                        xd = jffs2_find_xattr_datum(c, ref->xid);
                        ic = jffs2_get_ino_cache(c, ref->ino);
-                        if (!xd || !ic) {
+                        if (!xd || !ic || !ic->nlink) {
                                dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n",
                                          ref->ino, ref->xid, ref->xseqno);
                                ref->xseqno |= XREF_DELETE_MARKER;
diff --git a/fs/jfs/endian24.h b/fs/jfs/endian24.h
index 79494c4f2b10..fa92f7f1d0d0 100644
--- a/fs/jfs/endian24.h
+++ b/fs/jfs/endian24.h
@@ -29,7 +29,7 @@
        __u32 __x = (x); \
        ((__u32)( \
                ((__x & (__u32)0x000000ffUL) << 16) | \
-                 (__x & (__u32)0x0000ff00UL)        | \
+                 (__x & (__u32)0x0000ff00UL)        | \
                ((__x & (__u32)0x00ff0000UL) >> 16) )); \
 })
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index f7f8eff19b7b..87eb93694af7 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -108,7 +108,6 @@ const struct file_operations jfs_file_operations = {
        .aio_read       = generic_file_aio_read,
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
-        .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
        .fsync          = jfs_fsync,
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 9c5d59632aac..887f5759e536 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -26,34 +26,6 @@
 #include "jfs_filsys.h"
 #include "jfs_debug.h"
-#ifdef CONFIG_JFS_DEBUG
-void dump_mem(char *label, void *data, int length)
-{
-        int i, j;
-        int *intptr = data;
-        char *charptr = data;
-        char buf[10], line[80];
-        printk("%s: dump of %d bytes of data at 0x%p\n\n", label, length,
-               data);
-        for (i = 0; i < length; i += 16) {
-                line[0] = 0;
-                for (j = 0; (j < 4) && (i + j * 4 < length); j++) {
-                        sprintf(buf, " %08x", intptr[i / 4 + j]);
-                        strcat(line, buf);
-                }
-                buf[0] = ' ';
-                buf[2] = 0;
-                for (j = 0; (j < 16) && (i + j < length); j++) {
-                        buf[1] =
-                            isprint(charptr[i + j]) ? charptr[i + j] : '.';
-                        strcat(line, buf);
-                }
-                printk("%s\n", line);
-        }
-}
-#endif
 #ifdef PROC_FS_JFS /* see jfs_debug.h */
 static struct proc_dir_entry *base;
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index 7378798f0b21..044c1e654cc0 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -62,7 +62,6 @@ extern void jfs_proc_clean(void);
 extern int jfsloglevel;
-extern void dump_mem(char *label, void *data, int length);
 extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
 /* information message: e.g., configuration, major event */
@@ -94,7 +93,6 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
 *      ---------
 */
 #else                           /* CONFIG_JFS_DEBUG */
-#define dump_mem(label,data,length) do {} while (0)
 #define ASSERT(p) do {} while (0)
 #define jfs_info(fmt, arg...) do {} while (0)
 #define jfs_debug(fmt, arg...) do {} while (0)
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h
index 40b20111383c..c387540d3425 100644
--- a/fs/jfs/jfs_dinode.h
+++ b/fs/jfs/jfs_dinode.h
@@ -19,23 +19,23 @@
 #define _H_JFS_DINODE
 /*
- *      jfs_dinode.h: on-disk inode manager
+ *      jfs_dinode.h: on-disk inode manager
 */
-#define INODESLOTSIZE           128
+#define INODESLOTSIZE           128
-#define L2INODESLOTSIZE         7
+#define L2INODESLOTSIZE         7
-#define log2INODESIZE           9       /* log2(bytes per dinode) */
+#define log2INODESIZE           9       /* log2(bytes per dinode) */
 /*
- *      on-disk inode : 512 bytes
+ *      on-disk inode : 512 bytes
 *
 * note: align 64-bit fields on 8-byte boundary.
 */
 struct dinode {
        /*
-         *      I. base area (128 bytes)
+         *      I. base area (128 bytes)
-         *      ------------------------
+         *      ------------------------
         *
         * define generic/POSIX attributes
         */
@@ -70,16 +70,16 @@ struct dinode {
        __le32 di_acltype;      /* 4: Type of ACL */
        /*
-         *      Extension Areas.
+         *      Extension Areas.
         *
-         *      Historically, the inode was partitioned into 4 128-byte areas,
+         *      Historically, the inode was partitioned into 4 128-byte areas,
-         *      the last 3 being defined as unions which could have multiple
+         *      the last 3 being defined as unions which could have multiple
-         *      uses.  The first 96 bytes had been completely unused until
+         *      uses.  The first 96 bytes had been completely unused until
-         *      an index table was added to the directory.  It is now more
+         *      an index table was added to the directory.  It is now more
-         *      useful to describe the last 3/4 of the inode as a single
+         *      useful to describe the last 3/4 of the inode as a single
-         *      union.  We would probably be better off redesigning the
+         *      union.  We would probably be better off redesigning the
-         *      entire structure from scratch, but we don't want to break
+         *      entire structure from scratch, but we don't want to break
-         *      commonality with OS/2's JFS at this time.
+         *      commonality with OS/2's JFS at this time.
         */
        union {
                struct {
@@ -95,7 +95,7 @@ struct dinode {
                } _dir;                                 /* (384) */
 #define di_dirtable     u._dir._table
 #define di_dtroot       u._dir._dtroot
-#define di_parent       di_dtroot.header.idotdot
+#define di_parent       di_dtroot.header.idotdot
 #define di_DASD         di_dtroot.header.DASD
                struct {
@@ -127,14 +127,14 @@ struct dinode {
 #define di_inlinedata   u._file._u2._special._u
 #define di_rdev         u._file._u2._special._u._rdev
 #define di_fastsymlink  u._file._u2._special._u._fastsymlink
-#define di_inlineea     u._file._u2._special._inlineea
+#define di_inlineea     u._file._u2._special._inlineea
        } u;
 };
 /* extended mode bits (on-disk inode di_mode) */
-#define IFJOURNAL       0x00010000      /* journalled file */
+#define IFJOURNAL       0x00010000      /* journalled file */
-#define ISPARSE         0x00020000      /* sparse file enabled */
+#define ISPARSE         0x00020000      /* sparse file enabled */
-#define INLINEEA        0x00040000      /* inline EA area free */
+#define INLINEEA        0x00040000      /* inline EA area free */
 #define ISWAPFILE       0x00800000      /* file open for pager swap space */
 /* more extended mode bits: attributes for OS/2 */
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index f3b1ebb22280..e1985066b1c6 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -154,12 +154,12 @@ static const s8 budtab[256] = {
 *              the in-core descriptor is initialized from disk.
 *
 * PARAMETERS:
- *      ipbmap  -  pointer to in-core inode for the block map.
+ *      ipbmap  - pointer to in-core inode for the block map.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOMEM - insufficient memory
+ *      -ENOMEM - insufficient memory
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int dbMount(struct inode *ipbmap)
 {
@@ -232,11 +232,11 @@ int dbMount(struct inode *ipbmap)
 *              the memory for this descriptor is freed.
 *
 * PARAMETERS:
- *      ipbmap  -  pointer to in-core inode for the block map.
+ *      ipbmap  - pointer to in-core inode for the block map.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int dbUnmount(struct inode *ipbmap, int mounterror)
 {
@@ -320,13 +320,13 @@ int dbSync(struct inode *ipbmap)
 *              at a time.
 *
 * PARAMETERS:
- *      ip      -  pointer to in-core inode;
+ *      ip      - pointer to in-core inode;
- *      blkno   -  starting block number to be freed.
+ *      blkno   - starting block number to be freed.
- *      nblocks -  number of blocks to be freed.
+ *      nblocks - number of blocks to be freed.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
 {
@@ -395,23 +395,23 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
 /*
 * NAME:        dbUpdatePMap()
 *
- * FUNCTION:    update the allocation state (free or allocate) of the
+ * FUNCTION:    update the allocation state (free or allocate) of the
 *              specified block range in the persistent block allocation map.
 *
 *              the blocks will be updated in the persistent map one
 *              dmap at a time.
 *
 * PARAMETERS:
- *      ipbmap  -  pointer to in-core inode for the block map.
+ *      ipbmap  - pointer to in-core inode for the block map.
- *      free    -  'true' if block range is to be freed from the persistent
+ *      free    - 'true' if block range is to be freed from the persistent
- *                 map; 'false' if it is to   be allocated.
+ *                map; 'false' if it is to be allocated.
- *      blkno   -  starting block number of the range.
+ *      blkno   - starting block number of the range.
- *      nblocks -  number of contiguous blocks in the range.
+ *      nblocks - number of contiguous blocks in the range.
- *      tblk    -  transaction block;
+ *      tblk    - transaction block;
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int
 dbUpdatePMap(struct inode *ipbmap,
@@ -573,7 +573,7 @@ dbUpdatePMap(struct inode *ipbmap,
 /*
 * NAME:        dbNextAG()
 *
- * FUNCTION:    find the preferred allocation group for new allocations.
+ * FUNCTION:    find the preferred allocation group for new allocations.
 *
 *              Within the allocation groups, we maintain a preferred
 *              allocation group which consists of a group with at least
@@ -589,10 +589,10 @@ dbUpdatePMap(struct inode *ipbmap,
 *              empty ags around for large allocations.
 *
 * PARAMETERS:
- *      ipbmap  -  pointer to in-core inode for the block map.
+ *      ipbmap  - pointer to in-core inode for the block map.
 *
 * RETURN VALUES:
- *      the preferred allocation group number.
+ *      the preferred allocation group number.
 */
 int dbNextAG(struct inode *ipbmap)
 {
@@ -656,7 +656,7 @@ unlock:
 /*
 * NAME:        dbAlloc()
 *
- * FUNCTION:    attempt to allocate a specified number of contiguous free
+ * FUNCTION:    attempt to allocate a specified number of contiguous free
 *              blocks from the working allocation block map.
 *
 *              the block allocation policy uses hints and a multi-step
@@ -680,16 +680,16 @@ unlock:
 *              size or requests that specify no hint value.
 *
 * PARAMETERS:
- *      ip      -  pointer to in-core inode;
+ *      ip      - pointer to in-core inode;
- *      hint    - allocation hint.
+ *      hint    - allocation hint.
- *      nblocks - number of contiguous blocks in the range.
+ *      nblocks - number of contiguous blocks in the range.
- *      results - on successful return, set to the starting block number
+ *      results - on successful return, set to the starting block number
 *                of the newly allocated contiguous range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 {
@@ -706,12 +706,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
        /* assert that nblocks is valid */
        assert(nblocks > 0);
-#ifdef _STILL_TO_PORT
-        /* DASD limit check                                     F226941 */
-        if (OVER_LIMIT(ip, nblocks))
-                return -ENOSPC;
-#endif                          /* _STILL_TO_PORT */
        /* get the log2 number of blocks to be allocated.
         * if the number of blocks is not a log2 multiple,
         * it will be rounded up to the next log2 multiple.
@@ -720,7 +714,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
        bmp = JFS_SBI(ip->i_sb)->bmap;
-//retry:        /* serialize w.r.t.extendfs() */
        mapSize = bmp->db_mapsize;
        /* the hint should be within the map */
@@ -879,17 +872,17 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 /*
 * NAME:        dbAllocExact()
 *
- * FUNCTION:    try to allocate the requested extent;
+ * FUNCTION:    try to allocate the requested extent;
 *
 * PARAMETERS:
- *      ip      - pointer to in-core inode;
+ *      ip      - pointer to in-core inode;
- *      blkno   - extent address;
+ *      blkno   - extent address;
- *      nblocks - extent length;
+ *      nblocks - extent length;
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
 {
@@ -946,7 +939,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
 /*
 * NAME:        dbReAlloc()
 *
- * FUNCTION:    attempt to extend a current allocation by a specified
+ * FUNCTION:    attempt to extend a current allocation by a specified
 *              number of blocks.
 *
 *              this routine attempts to satisfy the allocation request
@@ -959,21 +952,21 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
 *              number of blocks required.
 *
 * PARAMETERS:
- *      ip          -  pointer to in-core inode requiring allocation.
+ *      ip          -  pointer to in-core inode requiring allocation.
- *      blkno       -  starting block of the current allocation.
+ *      blkno       -  starting block of the current allocation.
- *      nblocks     -  number of contiguous blocks within the current
+ *      nblocks     -  number of contiguous blocks within the current
 *                     allocation.
- *      addnblocks  -  number of blocks to add to the allocation.
+ *      addnblocks  -  number of blocks to add to the allocation.
- *      results -      on successful return, set to the starting block number
+ *      results -      on successful return, set to the starting block number
 *                     of the existing allocation if the existing allocation
 *                     was extended in place or to a newly allocated contiguous
 *                     range if the existing allocation could not be extended
 *                     in place.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int
 dbReAlloc(struct inode *ip,
@@ -1004,7 +997,7 @@ dbReAlloc(struct inode *ip,
 /*
 * NAME:        dbExtend()
 *
- * FUNCTION:    attempt to extend a current allocation by a specified
+ * FUNCTION:    attempt to extend a current allocation by a specified
 *              number of blocks.
 *
 *              this routine attempts to satisfy the allocation request
@@ -1013,16 +1006,16 @@ dbReAlloc(struct inode *ip,
 *              immediately following the current allocation.
 *
 * PARAMETERS:
- *      ip          -  pointer to in-core inode requiring allocation.
+ *      ip          -  pointer to in-core inode requiring allocation.
- *      blkno       -  starting block of the current allocation.
+ *      blkno       -  starting block of the current allocation.
- *      nblocks     -  number of contiguous blocks within the current
+ *      nblocks     -  number of contiguous blocks within the current
 *                     allocation.
- *      addnblocks  -  number of blocks to add to the allocation.
+ *      addnblocks  -  number of blocks to add to the allocation.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
 {
@@ -1109,19 +1102,19 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
 /*
 * NAME:        dbAllocNext()
 *
- * FUNCTION:    attempt to allocate the blocks of the specified block
+ * FUNCTION:    attempt to allocate the blocks of the specified block
 *              range within a dmap.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      dp      -  pointer to dmap.
+ *      dp      -  pointer to dmap.
- *      blkno   -  starting block number of the range.
+ *      blkno   -  starting block number of the range.
- *      nblocks -  number of contiguous free blocks of the range.
+ *      nblocks -  number of contiguous free blocks of the range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
 */
@@ -1233,7 +1226,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
 /*
 * NAME:        dbAllocNear()
 *
- * FUNCTION:    attempt to allocate a number of contiguous free blocks near
+ * FUNCTION:    attempt to allocate a number of contiguous free blocks near
 *              a specified block (hint) within a dmap.
 *
 *              starting with the dmap leaf that covers the hint, we'll
@@ -1242,18 +1235,18 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
 *              the desired free space.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      dp      -  pointer to dmap.
+ *      dp      -  pointer to dmap.
- *      blkno   -  block number to allocate near.
+ *      blkno   -  block number to allocate near.
- *      nblocks -  actual number of contiguous free blocks desired.
+ *      nblocks -  actual number of contiguous free blocks desired.
- *      l2nb    -  log2 number of contiguous free blocks desired.
+ *      l2nb    -  log2 number of contiguous free blocks desired.
- *      results -  on successful return, set to the starting block number
+ *      results -  on successful return, set to the starting block number
 *                 of the newly allocated range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
 */
@@ -1316,7 +1309,7 @@ dbAllocNear(struct bmap * bmp,
 /*
 * NAME:        dbAllocAG()
 *
- * FUNCTION:    attempt to allocate the specified number of contiguous
+ * FUNCTION:    attempt to allocate the specified number of contiguous
 *              free blocks within the specified allocation group.
 *
 *              unless the allocation group size is equal to the number
@@ -1353,17 +1346,17 @@ dbAllocNear(struct bmap * bmp,
 *              the allocation group.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
 *      agno    - allocation group number.
- *      nblocks -  actual number of contiguous free blocks desired.
+ *      nblocks -  actual number of contiguous free blocks desired.
- *      l2nb    -  log2 number of contiguous free blocks desired.
+ *      l2nb    -  log2 number of contiguous free blocks desired.
- *      results -  on successful return, set to the starting block number
+ *      results -  on successful return, set to the starting block number
 *                 of the newly allocated range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * note: IWRITE_LOCK(ipmap) held on entry/exit;
 */
@@ -1546,7 +1539,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
 /*
 * NAME:        dbAllocAny()
 *
- * FUNCTION:    attempt to allocate the specified number of contiguous
+ * FUNCTION:    attempt to allocate the specified number of contiguous
 *              free blocks anywhere in the file system.
 *
 *              dbAllocAny() attempts to find the sufficient free space by
@@ -1556,16 +1549,16 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
 *              desired free space is allocated.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      nblocks  -  actual number of contiguous free blocks desired.
+ *      nblocks  -  actual number of contiguous free blocks desired.
- *      l2nb     -  log2 number of contiguous free blocks desired.
+ *      l2nb     -  log2 number of contiguous free blocks desired.
- *      results -  on successful return, set to the starting block number
+ *      results -  on successful return, set to the starting block number
 *                 of the newly allocated range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
 */
@@ -1598,9 +1591,9 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
 /*
 * NAME:        dbFindCtl()
 *
- * FUNCTION:    starting at a specified dmap control page level and block
+ * FUNCTION:    starting at a specified dmap control page level and block
 *              number, search down the dmap control levels for a range of
- *              contiguous free blocks large enough to satisfy an allocation
+ *              contiguous free blocks large enough to satisfy an allocation
 *              request for the specified number of free blocks.
 *
 *              if sufficient contiguous free blocks are found, this routine
@@ -1609,17 +1602,17 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
 *              is sufficient in size.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      level   -  starting dmap control page level.
+ *      level   -  starting dmap control page level.
- *      l2nb    -  log2 number of contiguous free blocks desired.
+ *      l2nb    -  log2 number of contiguous free blocks desired.
- *      *blkno  -  on entry, starting block number for conducting the search.
+ *      *blkno  -  on entry, starting block number for conducting the search.
 *                 on successful return, the first block within a dmap page
 *                 that contains or starts a range of contiguous free blocks.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
 */
@@ -1699,7 +1692,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
 /*
 * NAME:        dbAllocCtl()
 *
- * FUNCTION:    attempt to allocate a specified number of contiguous
+ * FUNCTION:    attempt to allocate a specified number of contiguous
 *              blocks starting within a specific dmap.
 *
 *              this routine is called by higher level routines that search
@@ -1726,18 +1719,18 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
 *              first dmap (i.e. blkno).
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      nblocks  -  actual number of contiguous free blocks to allocate.
+ *      nblocks  -  actual number of contiguous free blocks to allocate.
- *      l2nb     -  log2 number of contiguous free blocks to allocate.
+ *      l2nb     -  log2 number of contiguous free blocks to allocate.
- *      blkno    -  starting block number of the dmap to start the allocation
+ *      blkno    -  starting block number of the dmap to start the allocation
 *                  from.
- *      results -  on successful return, set to the starting block number
+ *      results -  on successful return, set to the starting block number
 *                 of the newly allocated range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
 */
@@ -1870,7 +1863,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
 /*
 * NAME:        dbAllocDmapLev()
 *
- * FUNCTION:    attempt to allocate a specified number of contiguous blocks
+ * FUNCTION:    attempt to allocate a specified number of contiguous blocks
 *              from a specified dmap.
 *
 *              this routine checks if the contiguous blocks are available.
@@ -1878,17 +1871,17 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
 *              returned.
 *
 * PARAMETERS:
- *      mp      -  pointer to bmap descriptor
+ *      mp      -  pointer to bmap descriptor
- *      dp      -  pointer to dmap to attempt to allocate blocks from.
+ *      dp      -  pointer to dmap to attempt to allocate blocks from.
- *      l2nb    -  log2 number of contiguous block desired.
+ *      l2nb    -  log2 number of contiguous block desired.
- *      nblocks -  actual number of contiguous block desired.
+ *      nblocks -  actual number of contiguous block desired.
- *      results -  on successful return, set to the starting block number
+ *      results -  on successful return, set to the starting block number
 *                 of the newly allocated range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or
 *      IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
@@ -1933,7 +1926,7 @@ dbAllocDmapLev(struct bmap * bmp,
 /*
 * NAME:        dbAllocDmap()
 *
- * FUNCTION:    adjust the disk allocation map to reflect the allocation
+ * FUNCTION:    adjust the disk allocation map to reflect the allocation
 *              of a specified block range within a dmap.
 *
 *              this routine allocates the specified blocks from the dmap
@@ -1946,14 +1939,14 @@ dbAllocDmapLev(struct bmap * bmp,
 *              covers this dmap.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      dp      -  pointer to dmap to allocate the block range from.
+ *      dp      -  pointer to dmap to allocate the block range from.
- *      blkno   -  starting block number of the block to be allocated.
+ *      blkno   -  starting block number of the block to be allocated.
- *      nblocks -  number of blocks to be allocated.
+ *      nblocks -  number of blocks to be allocated.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
 */
@@ -1989,7 +1982,7 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
 /*
 * NAME:        dbFreeDmap()
 *
- * FUNCTION:    adjust the disk allocation map to reflect the allocation
+ * FUNCTION:    adjust the disk allocation map to reflect the allocation
 *              of a specified block range within a dmap.
 *
 *              this routine frees the specified blocks from the dmap through
@@ -1997,18 +1990,18 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
 *              causes the maximum string of free blocks within the dmap to
 *              change (i.e. the value of the root of the dmap's dmtree), this
 *              routine will cause this change to be reflected up through the
- *              appropriate levels of the dmap control pages by a call to
+ *              appropriate levels of the dmap control pages by a call to
 *              dbAdjCtl() for the L0 dmap control page that covers this dmap.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      dp      -  pointer to dmap to free the block range from.
+ *      dp      -  pointer to dmap to free the block range from.
- *      blkno   -  starting block number of the block to be freed.
+ *      blkno   -  starting block number of the block to be freed.
- *      nblocks -  number of blocks to be freed.
+ *      nblocks -  number of blocks to be freed.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
 */
@@ -2055,7 +2048,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
 /*
 * NAME:        dbAllocBits()
 *
- * FUNCTION:    allocate a specified block range from a dmap.
+ * FUNCTION:    allocate a specified block range from a dmap.
 *
 *              this routine updates the dmap to reflect the working
 *              state allocation of the specified block range. it directly
@@ -2065,10 +2058,10 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
 *              dmap's dmtree, as a whole, to reflect the allocated range.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      dp      -  pointer to dmap to allocate bits from.
+ *      dp      -  pointer to dmap to allocate bits from.
- *      blkno   -  starting block number of the bits to be allocated.
+ *      blkno   -  starting block number of the bits to be allocated.
- *      nblocks -  number of bits to be allocated.
+ *      nblocks -  number of bits to be allocated.
 *
 * RETURN VALUES: none
 *
@@ -2149,7 +2142,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
                         * the allocated words.
                         */
                        for (; nwords > 0; nwords -= nw) {
-                                if (leaf[word] < BUDMIN) {
+                                if (leaf[word] < BUDMIN) {
                                        jfs_error(bmp->db_ipbmap->i_sb,
                                                  "dbAllocBits: leaf page "
                                                  "corrupt");
@@ -2202,7 +2195,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
 /*
 * NAME:        dbFreeBits()
 *
- * FUNCTION:    free a specified block range from a dmap.
+ * FUNCTION:    free a specified block range from a dmap.
 *
 *              this routine updates the dmap to reflect the working
 *              state allocation of the specified block range. it directly
@@ -2212,10 +2205,10 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
 *              dmtree, as a whole, to reflect the deallocated range.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      dp      -  pointer to dmap to free bits from.
+ *      dp      -  pointer to dmap to free bits from.
- *      blkno   -  starting block number of the bits to be freed.
+ *      blkno   -  starting block number of the bits to be freed.
- *      nblocks -  number of bits to be freed.
+ *      nblocks -  number of bits to be freed.
 *
 * RETURN VALUES: 0 for success
 *
@@ -2388,19 +2381,19 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
 *              the new root value and the next dmap control page level to
 *              be adjusted.
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      blkno   -  the first block of a block range within a dmap.  it is
+ *      blkno   -  the first block of a block range within a dmap.  it is
 *                 the allocation or deallocation of this block range that
 *                 requires the dmap control page to be adjusted.
- *      newval  -  the new value of the lower level dmap or dmap control
+ *      newval  -  the new value of the lower level dmap or dmap control
 *                 page root.
- *      alloc   -  'true' if adjustment is due to an allocation.
+ *      alloc   -  'true' if adjustment is due to an allocation.
- *      level   -  current level of dmap control page (i.e. L0, L1, L2) to
+ *      level   -  current level of dmap control page (i.e. L0, L1, L2) to
 *                 be adjusted.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
 */
@@ -2544,16 +2537,16 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
 /*
 * NAME:        dbSplit()
 *
- * FUNCTION:    update the leaf of a dmtree with a new value, splitting
+ * FUNCTION:    update the leaf of a dmtree with a new value, splitting
 *              the leaf from the binary buddy system of the dmtree's
 *              leaves, as required.
 *
 * PARAMETERS:
- *      tp      - pointer to the tree containing the leaf.
+ *      tp      - pointer to the tree containing the leaf.
- *      leafno  - the number of the leaf to be updated.
+ *      leafno  - the number of the leaf to be updated.
- *      splitsz - the size the binary buddy system starting at the leaf
+ *      splitsz - the size the binary buddy system starting at the leaf
 *                must be split to, specified as the log2 number of blocks.
- *      newval  - the new value for the leaf.
+ *      newval  - the new value for the leaf.
 *
 * RETURN VALUES: none
 *
@@ -2600,7 +2593,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
 /*
 * NAME:        dbBackSplit()
 *
- * FUNCTION:    back split the binary buddy system of dmtree leaves
+ * FUNCTION:    back split the binary buddy system of dmtree leaves
 *              that hold a specified leaf until the specified leaf
 *              starts its own binary buddy system.
 *
@@ -2617,8 +2610,8 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
 *              in which a previous join operation must be backed out.
 *
 * PARAMETERS:
- *      tp      - pointer to the tree containing the leaf.
+ *      tp      - pointer to the tree containing the leaf.
- *      leafno  - the number of the leaf to be updated.
+ *      leafno  - the number of the leaf to be updated.
 *
 * RETURN VALUES: none
 *
@@ -2692,14 +2685,14 @@ static int dbBackSplit(dmtree_t * tp, int leafno)
 /*
 * NAME:        dbJoin()
 *
- * FUNCTION:    update the leaf of a dmtree with a new value, joining
+ * FUNCTION:    update the leaf of a dmtree with a new value, joining
 *              the leaf with other leaves of the dmtree into a multi-leaf
 *              binary buddy system, as required.
 *
 * PARAMETERS:
- *      tp      - pointer to the tree containing the leaf.
+ *      tp      - pointer to the tree containing the leaf.
- *      leafno  - the number of the leaf to be updated.
+ *      leafno  - the number of the leaf to be updated.
- *      newval  - the new value for the leaf.
+ *      newval  - the new value for the leaf.
 *
 * RETURN VALUES: none
 */
@@ -2785,15 +2778,15 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
 /*
 * NAME:        dbAdjTree()
 *
- * FUNCTION:    update a leaf of a dmtree with a new value, adjusting
+ * FUNCTION:    update a leaf of a dmtree with a new value, adjusting
 *              the dmtree, as required, to reflect the new leaf value.
 *              the combination of any buddies must already be done before
 *              this is called.
 *
 * PARAMETERS:
- *      tp      - pointer to the tree to be adjusted.
+ *      tp      - pointer to the tree to be adjusted.
- *      leafno  - the number of the leaf to be updated.
+ *      leafno  - the number of the leaf to be updated.
- *      newval  - the new value for the leaf.
+ *      newval  - the new value for the leaf.
 *
 * RETURN VALUES: none
 */
@@ -2852,7 +2845,7 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
 /*
 * NAME:        dbFindLeaf()
 *
- * FUNCTION:    search a dmtree_t for sufficient free blocks, returning
+ * FUNCTION:    search a dmtree_t for sufficient free blocks, returning
 *              the index of a leaf describing the free blocks if
 *              sufficient free blocks are found.
 *
@@ -2861,15 +2854,15 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
 *              free space.
 *
 * PARAMETERS:
- *      tp      - pointer to the tree to be searched.
+ *      tp      - pointer to the tree to be searched.
- *      l2nb    - log2 number of free blocks to search for.
+ *      l2nb    - log2 number of free blocks to search for.
 *      leafidx - return pointer to be set to the index of the leaf
 *                describing at least l2nb free blocks if sufficient
 *                free blocks are found.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient free blocks.
+ *      -ENOSPC - insufficient free blocks.
 */
 static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
 {
@@ -2916,18 +2909,18 @@ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
 /*
 * NAME:        dbFindBits()
 *
- * FUNCTION:    find a specified number of binary buddy free bits within a
+ * FUNCTION:    find a specified number of binary buddy free bits within a
 *              dmap bitmap word value.
 *
 *              this routine searches the bitmap value for (1 << l2nb) free
 *              bits at (1 << l2nb) alignments within the value.
 *
 * PARAMETERS:
- *      word    -  dmap bitmap word value.
+ *      word    -  dmap bitmap word value.
- *      l2nb    -  number of free bits specified as a log2 number.
+ *      l2nb    -  number of free bits specified as a log2 number.
 *
 * RETURN VALUES:
- *      starting bit number of free bits.
+ *      starting bit number of free bits.
 */
 static int dbFindBits(u32 word, int l2nb)
 {
@@ -2963,14 +2956,14 @@ static int dbFindBits(u32 word, int l2nb)
 /*
 * NAME:        dbMaxBud(u8 *cp)
 *
- * FUNCTION:    determine the largest binary buddy string of free
+ * FUNCTION:    determine the largest binary buddy string of free
 *              bits within 32-bits of the map.
 *
 * PARAMETERS:
- *      cp      -  pointer to the 32-bit value.
+ *      cp      -  pointer to the 32-bit value.
 *
 * RETURN VALUES:
- *      largest binary buddy of free bits within a dmap word.
+ *      largest binary buddy of free bits within a dmap word.
 */
 static int dbMaxBud(u8 * cp)
 {
@@ -3000,14 +2993,14 @@ static int dbMaxBud(u8 * cp)
 /*
 * NAME:        cnttz(uint word)
 *
- * FUNCTION:    determine the number of trailing zeros within a 32-bit
+ * FUNCTION:    determine the number of trailing zeros within a 32-bit
 *              value.
 *
 * PARAMETERS:
- *      value   -  32-bit value to be examined.
+ *      value   -  32-bit value to be examined.
 *
 * RETURN VALUES:
- *      count of trailing zeros
+ *      count of trailing zeros
 */
 static int cnttz(u32 word)
 {
@@ -3025,14 +3018,14 @@ static int cnttz(u32 word)
 /*
 * NAME:        cntlz(u32 value)
 *
- * FUNCTION:    determine the number of leading zeros within a 32-bit
+ * FUNCTION:    determine the number of leading zeros within a 32-bit
 *              value.
 *
 * PARAMETERS:
- *      value   -  32-bit value to be examined.
+ *      value   -  32-bit value to be examined.
 *
 * RETURN VALUES:
- *      count of leading zeros
+ *      count of leading zeros
 */
 static int cntlz(u32 value)
 {
@@ -3050,14 +3043,14 @@ static int cntlz(u32 value)
 * NAME:        blkstol2(s64 nb)
 *
 * FUNCTION:    convert a block count to its log2 value. if the block
- *              count is not a l2 multiple, it is rounded up to the next
+ *              count is not a l2 multiple, it is rounded up to the next
 *              larger l2 multiple.
 *
 * PARAMETERS:
- *      nb      -  number of blocks
+ *      nb      -  number of blocks
 *
 * RETURN VALUES:
- *      log2 number of blocks
+ *      log2 number of blocks
 */
 static int blkstol2(s64 nb)
 {
@@ -3099,13 +3092,13 @@ static int blkstol2(s64 nb)
 *              at a time.
 *
 * PARAMETERS:
- *      ip      -  pointer to in-core inode;
+ *      ip      -  pointer to in-core inode;
- *      blkno   -  starting block number to be freed.
+ *      blkno   -  starting block number to be freed.
- *      nblocks -  number of blocks to be freed.
+ *      nblocks -  number of blocks to be freed.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
 {
@@ -3278,10 +3271,10 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
 * L2
 *  |
 *   L1---------------------------------L1
- *    |                                  |
+ *    |                                  |
- *     L0---------L0---------L0           L0---------L0---------L0
+ *     L0---------L0---------L0           L0---------L0---------L0
- *      |          |          |            |          |          |
+ *      |          |          |            |          |          |
- *       d0,...,dn  d0,...,dn  d0,...,dn    d0,...,dn  d0,...,dn  d0,.,dm;
+ *       d0,...,dn  d0,...,dn  d0,...,dn    d0,...,dn  d0,...,dn  d0,.,dm;
 * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
 *
 * <---old---><----------------------------extend----------------------->
@@ -3307,7 +3300,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
                 (long long) blkno, (long long) nblocks, (long long) newsize);
        /*
-         *      initialize bmap control page.
+         *      initialize bmap control page.
         *
         * all the data in bmap control page should exclude
         * the mkfs hidden dmap page.
@@ -3330,7 +3323,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
        bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
        /*
-         *      reconfigure db_agfree[]
+         *      reconfigure db_agfree[]
         * from old AG configuration to new AG configuration;
         *
         * coalesce contiguous k (newAGSize/oldAGSize) AGs;
@@ -3362,7 +3355,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
        bmp->db_maxag = bmp->db_maxag / k;
        /*
-         *      extend bmap
+         *      extend bmap
         *
         * update bit maps and corresponding level control pages;
         * global control page db_nfree, db_agfree[agno], db_maxfreebud;
@@ -3410,7 +3403,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
                        /* compute start L0 */
                        j = 0;
                        l1leaf = l1dcp->stree + CTLLEAFIND;
-                        p += nbperpage; /* 1st L0 of L1.k  */
+                        p += nbperpage; /* 1st L0 of L1.k */
                }
                /*
@@ -3548,7 +3541,7 @@ errout:
        return -EIO;
        /*
-         *      finalize bmap control page
+         *      finalize bmap control page
         */
 finalize:
@@ -3567,7 +3560,7 @@ void dbFinalizeBmap(struct inode *ipbmap)
        int i, n;
        /*
-         *      finalize bmap control page
+         *      finalize bmap control page
         */
 //finalize:
        /*
@@ -3953,8 +3946,8 @@ static int dbGetL2AGSize(s64 nblocks)
 * convert number of map pages to the zero origin top dmapctl level
 */
 #define BMAPPGTOLEV(npages)     \
-        (((npages) <= 3 + MAXL0PAGES) ? 0 \
+        (((npages) <= 3 + MAXL0PAGES) ? 0 : \
-       : ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
+         ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
 s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
 {
@@ -3981,8 +3974,8 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
                factor =
                    (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1);
                complete = (u32) npages / factor;
-                ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL
+                ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL :
-                                      : ((i == 1) ? LPERCTL : 1));
+                                      ((i == 1) ? LPERCTL : 1));
                /* pages in last/incomplete child */
                npages = (u32) npages % factor;
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 45ea454c74bd..11e6d471b364 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -83,7 +83,7 @@ static __inline signed char TREEMAX(signed char *cp)
 *      - 1 is added to account for the control page of the map.
 */
 #define BLKTODMAP(b,s)    \
-        ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s))
+        ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s))
 /*
 * convert disk block number to the logical block number of the LEVEL 0
@@ -98,7 +98,7 @@ static __inline signed char TREEMAX(signed char *cp)
 *      - 1 is added to account for the control page of the map.
 */
 #define BLKTOL0(b,s)      \
-        (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s))
+        (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s))
 /*
 * convert disk block number to the logical block number of the LEVEL 1
@@ -120,7 +120,7 @@ static __inline signed char TREEMAX(signed char *cp)
 * at the specified level which describes the disk block.
 */
 #define BLKTOCTL(b,s,l)   \
-        (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s)))
+        (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s)))
 /*
 * convert aggregate map size to the zero origin dmapctl level of the
@@ -145,27 +145,27 @@ static __inline signed char TREEMAX(signed char *cp)
 * dmaptree must be consistent with dmapctl.
 */
 struct dmaptree {
-        __le32 nleafs;          /* 4: number of tree leafs      */
+        __le32 nleafs;          /* 4: number of tree leafs      */
-        __le32 l2nleafs;        /* 4: l2 number of tree leafs   */
+        __le32 l2nleafs;        /* 4: l2 number of tree leafs   */
-        __le32 leafidx;         /* 4: index of first tree leaf  */
+        __le32 leafidx;         /* 4: index of first tree leaf  */
-        __le32 height;          /* 4: height of the tree        */
+        __le32 height;          /* 4: height of the tree        */
        s8 budmin;              /* 1: min l2 tree leaf value to combine */
-        s8 stree[TREESIZE];     /* TREESIZE: tree               */
+        s8 stree[TREESIZE];     /* TREESIZE: tree               */
-        u8 pad[2];              /* 2: pad to word boundary      */
+        u8 pad[2];              /* 2: pad to word boundary      */
-};                              /* - 360 -                      */
+};                              /* - 360 -                      */
 /*
 *      dmap page per 8K blocks bitmap
 */
 struct dmap {
-        __le32 nblocks;         /* 4: num blks covered by this dmap     */
+        __le32 nblocks;         /* 4: num blks covered by this dmap     */
-        __le32 nfree;           /* 4: num of free blks in this dmap     */
+        __le32 nfree;           /* 4: num of free blks in this dmap     */
-        __le64 start;           /* 8: starting blkno for this dmap      */
+        __le64 start;           /* 8: starting blkno for this dmap      */
-        struct dmaptree tree;   /* 360: dmap tree                       */
+        struct dmaptree tree;   /* 360: dmap tree                       */
-        u8 pad[1672];           /* 1672: pad to 2048 bytes              */
+        u8 pad[1672];           /* 1672: pad to 2048 bytes              */
-        __le32 wmap[LPERDMAP];  /* 1024: bits of the working map        */
+        __le32 wmap[LPERDMAP];  /* 1024: bits of the working map        */
-        __le32 pmap[LPERDMAP];  /* 1024: bits of the persistent map     */
+        __le32 pmap[LPERDMAP];  /* 1024: bits of the persistent map     */
-};                              /* - 4096 -                             */
+};                              /* - 4096 -                             */
 /*
 *      disk map control page per level.
@@ -173,14 +173,14 @@ struct dmap {
 * dmapctl must be consistent with dmaptree.
 */
 struct dmapctl {
-        __le32 nleafs;          /* 4: number of tree leafs      */
+        __le32 nleafs;          /* 4: number of tree leafs      */
-        __le32 l2nleafs;        /* 4: l2 number of tree leafs   */
+        __le32 l2nleafs;        /* 4: l2 number of tree leafs   */
-        __le32 leafidx;         /* 4: index of the first tree leaf      */
+        __le32 leafidx;         /* 4: index of the first tree leaf      */
-        __le32 height;          /* 4: height of tree            */
+        __le32 height;          /* 4: height of tree            */
-        s8 budmin;              /* 1: minimum l2 tree leaf value        */
+        s8 budmin;              /* 1: minimum l2 tree leaf value        */
-        s8 stree[CTLTREESIZE];  /* CTLTREESIZE: dmapctl tree    */
+        s8 stree[CTLTREESIZE];  /* CTLTREESIZE: dmapctl tree    */
-        u8 pad[2714];           /* 2714: pad to 4096            */
+        u8 pad[2714];           /* 2714: pad to 4096            */
-};                              /* - 4096 -                     */
+};                              /* - 4096 -                     */
 /*
 *      common definition for dmaptree within dmap and dmapctl
@@ -202,41 +202,41 @@ typedef union dmtree {
 *      on-disk aggregate disk allocation map descriptor.
 */
 struct dbmap_disk {
-        __le64 dn_mapsize;      /* 8: number of blocks in aggregate     */
+        __le64 dn_mapsize;      /* 8: number of blocks in aggregate     */
-        __le64 dn_nfree;        /* 8: num free blks in aggregate map    */
+        __le64 dn_nfree;        /* 8: num free blks in aggregate map    */
-        __le32 dn_l2nbperpage;  /* 4: number of blks per page           */
+        __le32 dn_l2nbperpage;  /* 4: number of blks per page           */
-        __le32 dn_numag;        /* 4: total number of ags               */
+        __le32 dn_numag;        /* 4: total number of ags               */
-        __le32 dn_maxlevel;     /* 4: number of active ags              */
+        __le32 dn_maxlevel;     /* 4: number of active ags              */
-        __le32 dn_maxag;        /* 4: max active alloc group number     */
+        __le32 dn_maxag;        /* 4: max active alloc group number     */
-        __le32 dn_agpref;       /* 4: preferred alloc group (hint)      */
+        __le32 dn_agpref;       /* 4: preferred alloc group (hint)      */
-        __le32 dn_aglevel;      /* 4: dmapctl level holding the AG      */
+        __le32 dn_aglevel;      /* 4: dmapctl level holding the AG      */
-        __le32 dn_agheigth;     /* 4: height in dmapctl of the AG       */
+        __le32 dn_agheigth;     /* 4: height in dmapctl of the AG       */
-        __le32 dn_agwidth;      /* 4: width in dmapctl of the AG        */
+        __le32 dn_agwidth;      /* 4: width in dmapctl of the AG        */
-        __le32 dn_agstart;      /* 4: start tree index at AG height     */
+        __le32 dn_agstart;      /* 4: start tree index at AG height     */
-        __le32 dn_agl2size;     /* 4: l2 num of blks per alloc group    */
+        __le32 dn_agl2size;     /* 4: l2 num of blks per alloc group    */
-        __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count           */
+        __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count           */
-        __le64 dn_agsize;       /* 8: num of blks per alloc group       */
+        __le64 dn_agsize;       /* 8: num of blks per alloc group       */
-        s8 dn_maxfreebud;       /* 1: max free buddy system             */
+        s8 dn_maxfreebud;       /* 1: max free buddy system             */
-        u8 pad[3007];           /* 3007: pad to 4096                    */
+        u8 pad[3007];           /* 3007: pad to 4096                    */
-};                              /* - 4096 -                             */
+};                              /* - 4096 -                             */
 struct dbmap {
-        s64 dn_mapsize;         /* number of blocks in aggregate     */
+        s64 dn_mapsize;         /* number of blocks in aggregate        */
-        s64 dn_nfree;           /* num free blks in aggregate map    */
+        s64 dn_nfree;           /* num free blks in aggregate map       */
-        int dn_l2nbperpage;     /* number of blks per page           */
+        int dn_l2nbperpage;     /* number of blks per page              */
-        int dn_numag;           /* total number of ags               */
+        int dn_numag;           /* total number of ags                  */
-        int dn_maxlevel;        /* number of active ags              */
+        int dn_maxlevel;        /* number of active ags                 */
-        int dn_maxag;           /* max active alloc group number     */
+        int dn_maxag;           /* max active alloc group number        */
-        int dn_agpref;          /* preferred alloc group (hint)      */
+        int dn_agpref;          /* preferred alloc group (hint)         */
-        int dn_aglevel;         /* dmapctl level holding the AG      */
+        int dn_aglevel;         /* dmapctl level holding the AG         */
-        int dn_agheigth;        /* height in dmapctl of the AG       */
+        int dn_agheigth;        /* height in dmapctl of the AG          */
-        int dn_agwidth;         /* width in dmapctl of the AG        */
+        int dn_agwidth;         /* width in dmapctl of the AG           */
-        int dn_agstart;         /* start tree index at AG height     */
+        int dn_agstart;         /* start tree index at AG height        */
-        int dn_agl2size;        /* l2 num of blks per alloc group    */
+        int dn_agl2size;        /* l2 num of blks per alloc group       */
-        s64 dn_agfree[MAXAG];   /* per AG free count           */
+        s64 dn_agfree[MAXAG];   /* per AG free count                    */
-        s64 dn_agsize;          /* num of blks per alloc group       */
+        s64 dn_agsize;          /* num of blks per alloc group          */
-        signed char dn_maxfreebud;      /* max free buddy system             */
+        signed char dn_maxfreebud;      /* max free buddy system        */
-};                              /* - 4096 -                             */
+};                              /* - 4096 -                             */
 /*
 *      in-memory aggregate disk allocation map descriptor.
 */
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 6d62f3222892..c14ba3cfa818 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -315,8 +315,8 @@ static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
        lv = &llck->lv[llck->index];
        /*
-         *      Linelock slot size is twice the size of directory table
+         *      Linelock slot size is twice the size of directory table
-         *      slot size.  512 entries per page.
+         *      slot size.  512 entries per page.
         */
        lv->offset = ((index - 2) & 511) >> 1;
        lv->length = 1;
@@ -615,7 +615,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
        btstack->nsplit = 1;
        /*
-         *      search down tree from root:
+         *      search down tree from root:
         *
         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -659,7 +659,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
                        }
                        if (cmp == 0) {
                                /*
-                                 *      search hit
+                                 *      search hit
                                 */
                                /* search hit - leaf page:
                                 * return the entry found
@@ -723,7 +723,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
                }
                /*
-                 *      search miss
+                 *      search miss
                 *
                 * base is the smallest index with key (Kj) greater than
                 * search key (K) and may be zero or (maxindex + 1) index.
@@ -834,7 +834,7 @@ int dtInsert(tid_t tid, struct inode *ip,
        struct lv *lv;
        /*
-         *      retrieve search result
+         *      retrieve search result
         *
         * dtSearch() returns (leaf page pinned, index at which to insert).
         * n.b. dtSearch() may return index of (maxindex + 1) of
@@ -843,7 +843,7 @@ int dtInsert(tid_t tid, struct inode *ip,
        DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
        /*
-         *      insert entry for new key
+         *      insert entry for new key
         */
        if (DO_INDEX(ip)) {
                if (JFS_IP(ip)->next_index == DIREND) {
@@ -860,9 +860,9 @@ int dtInsert(tid_t tid, struct inode *ip,
        data.leaf.ino = *fsn;
        /*
-         *      leaf page does not have enough room for new entry:
+         *      leaf page does not have enough room for new entry:
         *
-         *      extend/split the leaf page;
+         *      extend/split the leaf page;
         *
         * dtSplitUp() will insert the entry and unpin the leaf page.
         */
@@ -877,9 +877,9 @@ int dtInsert(tid_t tid, struct inode *ip,
        }
        /*
-         *      leaf page does have enough room for new entry:
+         *      leaf page does have enough room for new entry:
         *
-         *      insert the new data entry into the leaf page;
+         *      insert the new data entry into the leaf page;
         */
        BT_MARK_DIRTY(mp, ip);
        /*
@@ -967,13 +967,13 @@ static int dtSplitUp(tid_t tid,
        }
        /*
-         *      split leaf page
+         *      split leaf page
         *
         * The split routines insert the new entry, and
         * acquire txLock as appropriate.
         */
        /*
-         *      split root leaf page:
+         *      split root leaf page:
         */
        if (sp->header.flag & BT_ROOT) {
                /*
@@ -1012,7 +1012,7 @@ static int dtSplitUp(tid_t tid,
        }
        /*
-         *      extend first leaf page
+         *      extend first leaf page
         *
         * extend the 1st extent if less than buffer page size
         * (dtExtendPage() reurns leaf page unpinned)
@@ -1068,7 +1068,7 @@ static int dtSplitUp(tid_t tid,
        }
        /*
-         *      split leaf page <sp> into <sp> and a new right page <rp>.
+         *      split leaf page <sp> into <sp> and a new right page <rp>.
         *
         * return <rp> pinned and its extent descriptor <rpxd>
         */
@@ -1433,7 +1433,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
        rp->header.freecnt = rp->header.maxslot - fsi;
        /*
-         *      sequential append at tail: append without split
+         *      sequential append at tail: append without split
         *
         * If splitting the last page on a level because of appending
         * a entry to it (skip is maxentry), it's likely that the access is
@@ -1467,7 +1467,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
        }
        /*
-         *      non-sequential insert (at possibly middle page)
+         *      non-sequential insert (at possibly middle page)
         */
        /*
@@ -1508,7 +1508,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
        left = 0;
        /*
-         *      compute fill factor for split pages
+         *      compute fill factor for split pages
         *
         * <nxt> traces the next entry to move to rp
         * <off> traces the next entry to stay in sp
@@ -1551,7 +1551,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
        /* <nxt> poins to the 1st entry to move */
        /*
-         *      move entries to right page
+         *      move entries to right page
         *
         * dtMoveEntry() initializes rp and reserves entry for insertion
         *
@@ -1677,7 +1677,7 @@ static int dtExtendPage(tid_t tid,
                return (rc);
        /*
-         *      extend the extent
+         *      extend the extent
         */
        pxdlist = split->pxdlist;
        pxd = &pxdlist->pxd[pxdlist->npxd];
@@ -1722,7 +1722,7 @@ static int dtExtendPage(tid_t tid,
        }
        /*
-         *      extend the page
+         *      extend the page
         */
        sp->header.self = *pxd;
@@ -1739,9 +1739,6 @@ static int dtExtendPage(tid_t tid,
        /* update buffer extent descriptor of extended page */
        xlen = lengthPXD(pxd);
        xsize = xlen << JFS_SBI(sb)->l2bsize;
-#ifdef _STILL_TO_PORT
-        bmSetXD(smp, xaddr, xsize);
-#endif                          /*  _STILL_TO_PORT */
        /*
         * copy old stbl to new stbl at start of extended area
@@ -1836,7 +1833,7 @@ static int dtExtendPage(tid_t tid,
        }
        /*
-         *      update parent entry on the parent/root page
+         *      update parent entry on the parent/root page
         */
        /*
         * acquire a transaction lock on the parent/root page
@@ -1904,7 +1901,7 @@ static int dtSplitRoot(tid_t tid,
        sp = &JFS_IP(ip)->i_dtroot;
        /*
-         *      allocate/initialize a single (right) child page
+         *      allocate/initialize a single (right) child page
         *
         * N.B. at first split, a one (or two) block to fit new entry
         * is allocated; at subsequent split, a full page is allocated;
@@ -1943,7 +1940,7 @@ static int dtSplitRoot(tid_t tid,
        rp->header.prev = 0;
        /*
-         *      move in-line root page into new right page extent
+         *      move in-line root page into new right page extent
         */
        /* linelock header + copied entries + new stbl (1st slot) in new page */
        ASSERT(dtlck->index == 0);
@@ -2016,7 +2013,7 @@ static int dtSplitRoot(tid_t tid,
        dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
        /*
-         *      reset parent/root page
+         *      reset parent/root page
         *
         * set the 1st entry offset to 0, which force the left-most key
         * at any level of the tree to be less than any search key.
@@ -2102,7 +2099,7 @@ int dtDelete(tid_t tid,
        dtpage_t *np;
        /*
-         *      search for the entry to delete:
+         *      search for the entry to delete:
         *
         * dtSearch() returns (leaf page pinned, index at which to delete).
         */
@@ -2253,7 +2250,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
        int i;
        /*
-         *      keep the root leaf page which has become empty
+         *      keep the root leaf page which has become empty
         */
        if (BT_IS_ROOT(fmp)) {
                /*
@@ -2269,7 +2266,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
        }
        /*
-         *      free the non-root leaf page
+         *      free the non-root leaf page
         */
        /*
         * acquire a transaction lock on the page
@@ -2299,7 +2296,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
        discard_metapage(fmp);
        /*
-         *      propagate page deletion up the directory tree
+         *      propagate page deletion up the directory tree
         *
         * If the delete from the parent page makes it empty,
         * continue all the way up the tree.
@@ -2440,10 +2437,10 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
 #ifdef _NOTYET
 /*
- * NAME:        dtRelocate()
+ * NAME:        dtRelocate()
 *
- * FUNCTION:    relocate dtpage (internal or leaf) of directory;
+ * FUNCTION:    relocate dtpage (internal or leaf) of directory;
- *              This function is mainly used by defragfs utility.
+ *              This function is mainly used by defragfs utility.
 */
 int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
               s64 nxaddr)
@@ -2471,8 +2468,8 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
                   xlen);
        /*
-         *      1. get the internal parent dtpage covering
+         *      1. get the internal parent dtpage covering
-         *      router entry for the tartget page to be relocated;
+         *      router entry for the tartget page to be relocated;
         */
        rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
        if (rc)
@@ -2483,7 +2480,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
        jfs_info("dtRelocate: parent router entry validated.");
        /*
-         *      2. relocate the target dtpage
+         *      2. relocate the target dtpage
         */
        /* read in the target page from src extent */
        DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
@@ -2581,9 +2578,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
        /* update the buffer extent descriptor of the dtpage */
        xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
-#ifdef _STILL_TO_PORT
-        bmSetXD(mp, nxaddr, xsize);
-#endif /* _STILL_TO_PORT */
        /* unpin the relocated page */
        DT_PUTPAGE(mp);
        jfs_info("dtRelocate: target dtpage relocated.");
@@ -2594,7 +2589,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
         */
        /*
-         *      3. acquire maplock for the source extent to be freed;
+         *      3. acquire maplock for the source extent to be freed;
         */
        /* for dtpage relocation, write a LOG_NOREDOPAGE record
         * for the source dtpage (logredo() will init NoRedoPage
@@ -2609,7 +2604,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
        pxdlock->index = 1;
        /*
-         *      4. update the parent router entry for relocation;
+         *      4. update the parent router entry for relocation;
         *
         * acquire tlck for the parent entry covering the target dtpage;
         * write LOG_REDOPAGE to apply after image only;
@@ -2637,7 +2632,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
 * NAME:        dtSearchNode()
 *
 * FUNCTION:    Search for an dtpage containing a specified address
- *              This function is mainly used by defragfs utility.
+ *              This function is mainly used by defragfs utility.
 *
 * NOTE:        Search result on stack, the found page is pinned at exit.
 *              The result page must be an internal dtpage.
@@ -2660,7 +2655,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
        BT_CLR(btstack);        /* reset stack */
        /*
-         *      descend tree to the level with specified leftmost page
+         *      descend tree to the level with specified leftmost page
         *
         *  by convention, root bn = 0.
         */
@@ -2699,7 +2694,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
        }
        /*
-         *      search each page at the current levevl
+         *      search each page at the current levevl
         */
      loop:
        stbl = DT_GETSTBL(p);
@@ -3044,9 +3039,9 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        if (DO_INDEX(ip)) {
                /*
                 * persistent index is stored in directory entries.
-                 * Special cases:        0 = .
+                 * Special cases:        0 = .
-                 *                       1 = ..
+                 *                       1 = ..
-                 *                      -1 = End of directory
+                 *                      -1 = End of directory
                 */
                do_index = 1;
@@ -3128,10 +3123,10 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                /*
                 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
                 *
-                 * pn = index = 0:      First entry "."
+                 * pn = index = 0:      First entry "."
-                 * pn = 0; index = 1:   Second entry ".."
+                 * pn = 0; index = 1:   Second entry ".."
-                 * pn > 0:              Real entries, pn=1 -> leftmost page
+                 * pn > 0:              Real entries, pn=1 -> leftmost page
-                 * pn = index = -1:     No more entries
+                 * pn = index = -1:     No more entries
                 */
                dtpos = filp->f_pos;
                if (dtpos == 0) {
@@ -3351,7 +3346,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack)
        BT_CLR(btstack);        /* reset stack */
        /*
-         *      descend leftmost path of the tree
+         *      descend leftmost path of the tree
         *
         * by convention, root bn = 0.
         */
@@ -4531,7 +4526,7 @@ int dtModify(tid_t tid, struct inode *ip,
        struct ldtentry *entry;
        /*
-         *      search for the entry to modify:
+         *      search for the entry to modify:
         *
         * dtSearch() returns (leaf page pinned, index at which to modify).
         */
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h
index af8513f78648..8561c6ecece0 100644
--- a/fs/jfs/jfs_dtree.h
+++ b/fs/jfs/jfs_dtree.h
@@ -35,7 +35,7 @@ typedef union {
 /*
- *      entry segment/slot
+ *      entry segment/slot
 *
 * an entry consists of type dependent head/only segment/slot and
 * additional segments/slots linked vi next field;
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index a35bdca6a805..7ae1e3281de9 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -34,8 +34,8 @@ static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *);
 #endif
 static s64 extRoundDown(s64 nb);
-#define DPD(a)          (printk("(a): %d\n",(a)))
+#define DPD(a)          (printk("(a): %d\n",(a)))
-#define DPC(a)          (printk("(a): %c\n",(a)))
+#define DPC(a)          (printk("(a): %c\n",(a)))
 #define DPL1(a)                                 \
 {                                               \
        if ((a) >> 32)                          \
@@ -51,19 +51,19 @@ static s64 extRoundDown(s64 nb);
                printk("(a): %x\n",(a) << 32);  \
 }
-#define DPD1(a)         (printk("(a): %d  ",(a)))
+#define DPD1(a)         (printk("(a): %d  ",(a)))
-#define DPX(a)          (printk("(a): %08x\n",(a)))
+#define DPX(a)          (printk("(a): %08x\n",(a)))
-#define DPX1(a)         (printk("(a): %08x  ",(a)))
+#define DPX1(a)         (printk("(a): %08x  ",(a)))
-#define DPS(a)          (printk("%s\n",(a)))
+#define DPS(a)          (printk("%s\n",(a)))
-#define DPE(a)          (printk("\nENTERING: %s\n",(a)))
+#define DPE(a)          (printk("\nENTERING: %s\n",(a)))
-#define DPE1(a)          (printk("\nENTERING: %s",(a)))
+#define DPE1(a)         (printk("\nENTERING: %s",(a)))
-#define DPS1(a)         (printk("  %s  ",(a)))
+#define DPS1(a)         (printk("  %s  ",(a)))
 /*
 * NAME:        extAlloc()
 *
- * FUNCTION:    allocate an extent for a specified page range within a
+ * FUNCTION:    allocate an extent for a specified page range within a
 *              file.
 *
 * PARAMETERS:
@@ -78,9 +78,9 @@ static s64 extRoundDown(s64 nb);
 *                should be marked as allocated but not recorded.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
 */
 int
 extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
@@ -192,9 +192,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
 #ifdef _NOTYET
 /*
- * NAME:        extRealloc()
+ * NAME:        extRealloc()
 *
- * FUNCTION:    extend the allocation of a file extent containing a
+ * FUNCTION:    extend the allocation of a file extent containing a
 *              partial back last page.
 *
 * PARAMETERS:
@@ -207,9 +207,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
 *                should be marked as allocated but not recorded.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
 */
 int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
 {
@@ -345,9 +345,9 @@ exit:
 /*
- * NAME:        extHint()
+ * NAME:        extHint()
 *
- * FUNCTION:    produce an extent allocation hint for a file offset.
+ * FUNCTION:    produce an extent allocation hint for a file offset.
 *
 * PARAMETERS:
 *      ip      - the inode of the file.
@@ -356,8 +356,8 @@ exit:
 *                the hint.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 int extHint(struct inode *ip, s64 offset, xad_t * xp)
 {
@@ -387,7 +387,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
        lxdl.nlxd = 1;
        lxdl.lxd = &lxd;
        LXDoffset(&lxd, prev)
-            LXDlength(&lxd, nbperpage);
+        LXDlength(&lxd, nbperpage);
        xadl.maxnxad = 1;
        xadl.nxad = 0;
@@ -397,11 +397,11 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
        if ((rc = xtLookupList(ip, &lxdl, &xadl, 0)))
                return (rc);
-        /* check if not extent exists for the previous page.
+        /* check if no extent exists for the previous page.
         * this is possible for sparse files.
         */
        if (xadl.nxad == 0) {
-//              assert(ISSPARSE(ip));
+//              assert(ISSPARSE(ip));
                return (0);
        }
@@ -410,28 +410,28 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
         */
        xp->flag &= XAD_NOTRECORDED;
-        if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
+        if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
                jfs_error(ip->i_sb, "extHint: corrupt xtree");
                return -EIO;
-        }
+        }
        return (0);
 }
 /*
- * NAME:        extRecord()
+ * NAME:        extRecord()
 *
- * FUNCTION:    change a page with a file from not recorded to recorded.
+ * FUNCTION:    change a page with a file from not recorded to recorded.
 *
 * PARAMETERS:
 *      ip      - inode of the file.
 *      cp      - cbuf of the file page.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
 */
 int extRecord(struct inode *ip, xad_t * xp)
 {
@@ -451,9 +451,9 @@ int extRecord(struct inode *ip, xad_t * xp)
 #ifdef _NOTYET
 /*
- * NAME:        extFill()
+ * NAME:        extFill()
 *
- * FUNCTION:    allocate disk space for a file page that represents
+ * FUNCTION:    allocate disk space for a file page that represents
 *              a file hole.
 *
 * PARAMETERS:
@@ -461,16 +461,16 @@ int extRecord(struct inode *ip, xad_t * xp)
 *      cp      - cbuf of the file page represent the hole.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
 */
 int extFill(struct inode *ip, xad_t * xp)
 {
        int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
        s64 blkno = offsetXAD(xp) >> ip->i_blkbits;
-//      assert(ISSPARSE(ip));
+//      assert(ISSPARSE(ip));
        /* initialize the extent allocation hint */
        XADaddress(xp, 0);
@@ -489,7 +489,7 @@ int extFill(struct inode *ip, xad_t * xp)
 /*
 * NAME:        extBalloc()
 *
- * FUNCTION:    allocate disk blocks to form an extent.
+ * FUNCTION:    allocate disk blocks to form an extent.
 *
 *              initially, we will try to allocate disk blocks for the
 *              requested size (nblocks).  if this fails (nblocks
@@ -513,9 +513,9 @@ int extFill(struct inode *ip, xad_t * xp)
 *                 allocated block range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
 */
 static int
 extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
@@ -580,7 +580,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
 /*
 * NAME:        extBrealloc()
 *
- * FUNCTION:    attempt to extend an extent's allocation.
+ * FUNCTION:    attempt to extend an extent's allocation.
 *
 *              Initially, we will try to extend the extent's allocation
 *              in place.  If this fails, we'll try to move the extent
@@ -597,8 +597,8 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
 *
 * PARAMETERS:
 *      ip       - the inode of the file.
- *      blkno    - starting block number of the extents current allocation.
+ *      blkno    - starting block number of the extents current allocation.
- *      nblks    - number of blocks within the extents current allocation.
+ *      nblks    - number of blocks within the extents current allocation.
 *      newnblks - pointer to a s64 value.  on entry, this value is the
 *                 the new desired extent size (number of blocks).  on
 *                 successful exit, this value is set to the extent's actual
@@ -606,9 +606,9 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
 *      newblkno - the starting block number of the extents new allocation.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
 */
 static int
 extBrealloc(struct inode *ip,
@@ -634,16 +634,16 @@ extBrealloc(struct inode *ip,
 /*
- * NAME:        extRoundDown()
+ * NAME:        extRoundDown()
 *
- * FUNCTION:    round down a specified number of blocks to the next
+ * FUNCTION:    round down a specified number of blocks to the next
 *              smallest power of 2 number.
 *
 * PARAMETERS:
 *      nb      - the inode of the file.
 *
 * RETURN VALUES:
- *      next smallest power of 2 number.
+ *      next smallest power of 2 number.
 */
 static s64 extRoundDown(s64 nb)
 {
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index 38f70ac03bec..b3f5463fbe52 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -34,9 +34,9 @@
 #define JFS_UNICODE     0x00000001      /* unicode name */
 /* mount time flags for error handling */
-#define JFS_ERR_REMOUNT_RO 0x00000002   /* remount read-only */
+#define JFS_ERR_REMOUNT_RO 0x00000002   /* remount read-only */
-#define JFS_ERR_CONTINUE   0x00000004   /* continue */
+#define JFS_ERR_CONTINUE   0x00000004   /* continue */
-#define JFS_ERR_PANIC      0x00000008   /* panic */
+#define JFS_ERR_PANIC      0x00000008   /* panic */
 /* Quota support */
 #define JFS_USRQUOTA    0x00000010
@@ -83,7 +83,6 @@
 /*      case-insensitive name/directory support */
 #define JFS_AIX         0x80000000      /* AIX support */
-/*      POSIX name/directory  support - Never implemented*/
 /*
 *      buffer cache configuration
@@ -113,10 +112,10 @@
 #define IDATASIZE       256     /* inode inline data size */
 #define IXATTRSIZE      128     /* inode inline extended attribute size */
-#define XTPAGE_SIZE     4096
+#define XTPAGE_SIZE     4096
-#define log2_PAGESIZE     12
+#define log2_PAGESIZE   12
-#define IAG_SIZE        4096
+#define IAG_SIZE        4096
 #define IAG_EXTENT_SIZE 4096
 #define INOSPERIAG      4096    /* number of disk inodes per iag */
 #define L2INOSPERIAG    12      /* l2 number of disk inodes per iag */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index c6530227cda6..3870ba8b9086 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -93,21 +93,21 @@ static int copy_from_dinode(struct dinode *, struct inode *);
 static void copy_to_dinode(struct dinode *, struct inode *);
 /*
- * NAME:        diMount()
+ * NAME:        diMount()
 *
- * FUNCTION:    initialize the incore inode map control structures for
+ * FUNCTION:    initialize the incore inode map control structures for
 *              a fileset or aggregate init time.
 *
- *              the inode map's control structure (dinomap) is
+ *              the inode map's control structure (dinomap) is
- *              brought in from disk and placed in virtual memory.
+ *              brought in from disk and placed in virtual memory.
 *
 * PARAMETERS:
- *      ipimap  - pointer to inode map inode for the aggregate or fileset.
+ *      ipimap  - pointer to inode map inode for the aggregate or fileset.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOMEM  - insufficient free virtual memory.
+ *      -ENOMEM - insufficient free virtual memory.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 int diMount(struct inode *ipimap)
 {
@@ -180,18 +180,18 @@ int diMount(struct inode *ipimap)
 /*
- * NAME:        diUnmount()
+ * NAME:        diUnmount()
 *
- * FUNCTION:    write to disk the incore inode map control structures for
+ * FUNCTION:    write to disk the incore inode map control structures for
 *              a fileset or aggregate at unmount time.
 *
 * PARAMETERS:
- *      ipimap  - pointer to inode map inode for the aggregate or fileset.
+ *      ipimap  - pointer to inode map inode for the aggregate or fileset.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOMEM  - insufficient free virtual memory.
+ *      -ENOMEM - insufficient free virtual memory.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 int diUnmount(struct inode *ipimap, int mounterror)
 {
@@ -274,9 +274,9 @@ int diSync(struct inode *ipimap)
 /*
- * NAME:        diRead()
+ * NAME:        diRead()
 *
- * FUNCTION:    initialize an incore inode from disk.
+ * FUNCTION:    initialize an incore inode from disk.
 *
 *              on entry, the specifed incore inode should itself
 *              specify the disk inode number corresponding to the
@@ -285,7 +285,7 @@ int diSync(struct inode *ipimap)
 *              this routine handles incore inode initialization for
 *              both "special" and "regular" inodes.  special inodes
 *              are those required early in the mount process and
- *              require special handling since much of the file system
+ *              require special handling since much of the file system
 *              is not yet initialized.  these "special" inodes are
 *              identified by a NULL inode map inode pointer and are
 *              actually initialized by a call to diReadSpecial().
@@ -298,12 +298,12 @@ int diSync(struct inode *ipimap)
 *              incore inode.
 *
 * PARAMETERS:
- *      ip  -  pointer to incore inode to be initialized from disk.
+ *      ip      -  pointer to incore inode to be initialized from disk.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOMEM - insufficient memory
+ *      -ENOMEM - insufficient memory
 *
 */
 int diRead(struct inode *ip)
@@ -410,26 +410,26 @@ int diRead(struct inode *ip)
 /*
- * NAME:        diReadSpecial()
+ * NAME:        diReadSpecial()
 *
- * FUNCTION:    initialize a 'special' inode from disk.
+ * FUNCTION:    initialize a 'special' inode from disk.
 *
 *              this routines handles aggregate level inodes.  The
 *              inode cache cannot differentiate between the
 *              aggregate inodes and the filesystem inodes, so we
 *              handle these here.  We don't actually use the aggregate
- *              inode map, since these inodes are at a fixed location
+ *              inode map, since these inodes are at a fixed location
 *              and in some cases the aggregate inode map isn't initialized
 *              yet.
 *
 * PARAMETERS:
- *      sb - filesystem superblock
+ *      sb - filesystem superblock
 *      inum - aggregate inode number
 *      secondary - 1 if secondary aggregate inode table
 *
 * RETURN VALUES:
- *      new inode       - success
+ *      new inode       - success
- *      NULL            - i/o error.
+ *      NULL            - i/o error.
 */
 struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 {
@@ -502,12 +502,12 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 }
 /*
- * NAME:        diWriteSpecial()
+ * NAME:        diWriteSpecial()
 *
- * FUNCTION:    Write the special inode to disk
+ * FUNCTION:    Write the special inode to disk
 *
 * PARAMETERS:
- *      ip - special inode
+ *      ip - special inode
 *      secondary - 1 if secondary aggregate inode table
 *
 * RETURN VALUES: none
@@ -554,9 +554,9 @@ void diWriteSpecial(struct inode *ip, int secondary)
 }
 /*
- * NAME:        diFreeSpecial()
+ * NAME:        diFreeSpecial()
 *
- * FUNCTION:    Free allocated space for special inode
+ * FUNCTION:    Free allocated space for special inode
 */
 void diFreeSpecial(struct inode *ip)
 {
@@ -572,9 +572,9 @@ void diFreeSpecial(struct inode *ip)
 /*
- * NAME:        diWrite()
+ * NAME:        diWrite()
 *
- * FUNCTION:    write the on-disk inode portion of the in-memory inode
+ * FUNCTION:    write the on-disk inode portion of the in-memory inode
 *              to its corresponding on-disk inode.
 *
 *              on entry, the specifed incore inode should itself
@@ -589,11 +589,11 @@ void diFreeSpecial(struct inode *ip)
 *
 * PARAMETERS:
 *      tid -  transacation id
- *      ip  -  pointer to incore inode to be written to the inode extent.
+ *      ip  -  pointer to incore inode to be written to the inode extent.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 int diWrite(tid_t tid, struct inode *ip)
 {
@@ -730,7 +730,7 @@ int diWrite(tid_t tid, struct inode *ip)
        ilinelock = (struct linelock *) & tlck->lock;
        /*
-         *      regular file: 16 byte (XAD slot) granularity
+         *      regular file: 16 byte (XAD slot) granularity
         */
        if (type & tlckXTREE) {
                xtpage_t *p, *xp;
@@ -755,7 +755,7 @@ int diWrite(tid_t tid, struct inode *ip)
                                xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
        }
        /*
-         *      directory: 32 byte (directory entry slot) granularity
+         *      directory: 32 byte (directory entry slot) granularity
         */
        else if (type & tlckDTREE) {
                dtpage_t *p, *xp;
@@ -800,9 +800,8 @@ int diWrite(tid_t tid, struct inode *ip)
        }
        /*
-         *      lock/copy inode base: 128 byte slot granularity
+         *      lock/copy inode base: 128 byte slot granularity
         */
-// baseDinode:
        lv = & dilinelock->lv[dilinelock->index];
        lv->offset = dioffset >> L2INODESLOTSIZE;
        copy_to_dinode(dp, ip);
@@ -813,17 +812,6 @@ int diWrite(tid_t tid, struct inode *ip)
                lv->length = 1;
        dilinelock->index++;
-#ifdef _JFS_FASTDASD
-        /*
-         * We aren't logging changes to the DASD used in directory inodes,
-         * but we need to write them to disk.  If we don't unmount cleanly,
-         * mount will recalculate the DASD used.
-         */
-        if (S_ISDIR(ip->i_mode)
-            && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED))
-                memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd));
-#endif                          /*  _JFS_FASTDASD */
        /* release the buffer holding the updated on-disk inode.
         * the buffer will be later written by commit processing.
         */
@@ -834,9 +822,9 @@ int diWrite(tid_t tid, struct inode *ip)
 /*
- * NAME:        diFree(ip)
+ * NAME:        diFree(ip)
 *
- * FUNCTION:    free a specified inode from the inode working map
+ * FUNCTION:    free a specified inode from the inode working map
 *              for a fileset or aggregate.
 *
 *              if the inode to be freed represents the first (only)
@@ -865,11 +853,11 @@ int diWrite(tid_t tid, struct inode *ip)
 *              any updates and are held until all updates are complete.
 *
 * PARAMETERS:
- *      ip      - inode to be freed.
+ *      ip      - inode to be freed.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 int diFree(struct inode *ip)
 {
@@ -902,7 +890,8 @@ int diFree(struct inode *ip)
         * the map.
         */
        if (iagno >= imap->im_nextiag) {
-                dump_mem("imap", imap, 32);
+                print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
+                               imap, 32, 0);
                jfs_error(ip->i_sb,
                          "diFree: inum = %d, iagno = %d, nextiag = %d",
                          (uint) inum, iagno, imap->im_nextiag);
@@ -964,8 +953,8 @@ int diFree(struct inode *ip)
                return -EIO;
        }
        /*
-         *      inode extent still has some inodes or below low water mark:
+         *      inode extent still has some inodes or below low water mark:
-         *      keep the inode extent;
+         *      keep the inode extent;
         */
        if (bitmap ||
            imap->im_agctl[agno].numfree < 96 ||
@@ -1047,12 +1036,12 @@ int diFree(struct inode *ip)
        /*
-         *      inode extent has become free and above low water mark:
+         *      inode extent has become free and above low water mark:
-         *      free the inode extent;
+         *      free the inode extent;
         */
        /*
-         *      prepare to update iag list(s) (careful update step 1)
+         *      prepare to update iag list(s) (careful update step 1)
         */
        amp = bmp = cmp = dmp = NULL;
        fwd = back = -1;
@@ -1152,7 +1141,7 @@ int diFree(struct inode *ip)
        invalidate_pxd_metapages(ip, freepxd);
        /*
-         *      update iag list(s) (careful update step 2)
+         *      update iag list(s) (careful update step 2)
         */
        /* add the iag to the ag extent free list if this is the
         * first free extent for the iag.
@@ -1338,20 +1327,20 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
 /*
- * NAME:        diAlloc(pip,dir,ip)
+ * NAME:        diAlloc(pip,dir,ip)
 *
- * FUNCTION:    allocate a disk inode from the inode working map
+ * FUNCTION:    allocate a disk inode from the inode working map
 *              for a fileset or aggregate.
 *
 * PARAMETERS:
- *      pip     - pointer to incore inode for the parent inode.
+ *      pip     - pointer to incore inode for the parent inode.
- *      dir     - 'true' if the new disk inode is for a directory.
+ *      dir     - 'true' if the new disk inode is for a directory.
- *      ip      - pointer to a new inode
+ *      ip      - pointer to a new inode
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 int diAlloc(struct inode *pip, bool dir, struct inode *ip)
 {
@@ -1433,7 +1422,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
        addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
        /*
-         *      try to allocate from the IAG
+         *      try to allocate from the IAG
         */
        /* check if the inode may be allocated from the iag
         * (i.e. the inode has free inodes or new extent can be added).
@@ -1633,9 +1622,9 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
 /*
- * NAME:        diAllocAG(imap,agno,dir,ip)
+ * NAME:        diAllocAG(imap,agno,dir,ip)
 *
- * FUNCTION:    allocate a disk inode from the allocation group.
+ * FUNCTION:    allocate a disk inode from the allocation group.
 *
 *              this routine first determines if a new extent of free
 *              inodes should be added for the allocation group, with
@@ -1649,17 +1638,17 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
 * PRE CONDITION: Already have the AG lock for this AG.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      agno    - allocation group to allocate from.
+ *      agno    - allocation group to allocate from.
- *      dir     - 'true' if the new disk inode is for a directory.
+ *      dir     - 'true' if the new disk inode is for a directory.
- *      ip      - pointer to the new inode to be filled in on successful return
+ *      ip      - pointer to the new inode to be filled in on successful return
 *                with the disk inode number allocated, its extent address
 *                and the start of the ag.
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int
 diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
@@ -1709,9 +1698,9 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
 /*
- * NAME:        diAllocAny(imap,agno,dir,iap)
+ * NAME:        diAllocAny(imap,agno,dir,iap)
 *
- * FUNCTION:    allocate a disk inode from any other allocation group.
+ * FUNCTION:    allocate a disk inode from any other allocation group.
 *
 *              this routine is called when an allocation attempt within
 *              the primary allocation group has failed. if attempts to
@@ -1719,17 +1708,17 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
 *              specified primary group.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      agno    - primary allocation group (to avoid).
+ *      agno    - primary allocation group (to avoid).
- *      dir     - 'true' if the new disk inode is for a directory.
+ *      dir     - 'true' if the new disk inode is for a directory.
- *      ip      - pointer to a new inode to be filled in on successful return
+ *      ip      - pointer to a new inode to be filled in on successful return
 *                with the disk inode number allocated, its extent address
 *                and the start of the ag.
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int
 diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
@@ -1772,9 +1761,9 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
 /*
- * NAME:        diAllocIno(imap,agno,ip)
+ * NAME:        diAllocIno(imap,agno,ip)
 *
- * FUNCTION:    allocate a disk inode from the allocation group's free
+ * FUNCTION:    allocate a disk inode from the allocation group's free
 *              inode list, returning an error if this free list is
 *              empty (i.e. no iags on the list).
 *
@@ -1785,16 +1774,16 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
 * PRE CONDITION: Already have AG lock for this AG.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      agno    - allocation group.
+ *      agno    - allocation group.
- *      ip      - pointer to new inode to be filled in on successful return
+ *      ip      - pointer to new inode to be filled in on successful return
 *                with the disk inode number allocated, its extent address
 *                and the start of the ag.
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
 {
@@ -1890,7 +1879,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
 /*
- * NAME:        diAllocExt(imap,agno,ip)
+ * NAME:        diAllocExt(imap,agno,ip)
 *
 * FUNCTION:    add a new extent of free inodes to an iag, allocating
 *              an inode from this extent to satisfy the current allocation
@@ -1910,16 +1899,16 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
 *              for the purpose of satisfying this request.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      agno    - allocation group number.
+ *      agno    - allocation group number.
- *      ip      - pointer to new inode to be filled in on successful return
+ *      ip      - pointer to new inode to be filled in on successful return
 *                with the disk inode number allocated, its extent address
 *                and the start of the ag.
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
 {
@@ -2010,7 +1999,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
 /*
- * NAME:        diAllocBit(imap,iagp,ino)
+ * NAME:        diAllocBit(imap,iagp,ino)
 *
 * FUNCTION:    allocate a backed inode from an iag.
 *
@@ -2030,14 +2019,14 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
 *      this AG.  Must have read lock on imap inode.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      iagp    - pointer to iag.
+ *      iagp    - pointer to iag.
- *      ino     - inode number to be allocated within the iag.
+ *      ino     - inode number to be allocated within the iag.
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
 {
@@ -2144,11 +2133,11 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
 /*
- * NAME:        diNewExt(imap,iagp,extno)
+ * NAME:        diNewExt(imap,iagp,extno)
 *
- * FUNCTION:    initialize a new extent of inodes for an iag, allocating
+ * FUNCTION:    initialize a new extent of inodes for an iag, allocating
- *              the first inode of the extent for use for the current
+ *              the first inode of the extent for use for the current
- *              allocation request.
+ *              allocation request.
 *
 *              disk resources are allocated for the new extent of inodes
 *              and the inodes themselves are initialized to reflect their
@@ -2177,14 +2166,14 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
 *      this AG.  Must have read lock on imap inode.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      iagp    - pointer to iag.
+ *      iagp    - pointer to iag.
- *      extno   - extent number.
+ *      extno   - extent number.
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
 {
@@ -2430,7 +2419,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
 /*
- * NAME:        diNewIAG(imap,iagnop,agno)
+ * NAME:        diNewIAG(imap,iagnop,agno)
 *
 * FUNCTION:    allocate a new iag for an allocation group.
 *
@@ -2443,16 +2432,16 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
 *              and returned to satisfy the request.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      iagnop  - pointer to an iag number set with the number of the
+ *      iagnop  - pointer to an iag number set with the number of the
 *                newly allocated iag upon successful return.
- *      agno    - allocation group number.
+ *      agno    - allocation group number.
 *      bpp     - Buffer pointer to be filled in with new IAG's buffer
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 *
 * serialization:
 *      AG lock held on entry/exit;
@@ -2461,7 +2450,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
 *
 * note: new iag transaction:
 * . synchronously write iag;
- * . write log of xtree and inode  of imap;
+ * . write log of xtree and inode of imap;
 * . commit;
 * . synchronous write of xtree (right to left, bottom to top);
 * . at start of logredo(): init in-memory imap with one additional iag page;
@@ -2481,9 +2470,6 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
        s64 xaddr = 0;
        s64 blkno;
        tid_t tid;
-#ifdef _STILL_TO_PORT
-        xad_t xad;
-#endif                          /*  _STILL_TO_PORT */
        struct inode *iplist[1];
        /* pick up pointers to the inode map and mount inodes */
@@ -2674,15 +2660,15 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 }
 /*
- * NAME:        diIAGRead()
+ * NAME:        diIAGRead()
 *
- * FUNCTION:    get the buffer for the specified iag within a fileset
+ * FUNCTION:    get the buffer for the specified iag within a fileset
 *              or aggregate inode map.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      iagno   - iag number.
+ *      iagno   - iag number.
- *      bpp     - point to buffer pointer to be filled in on successful
+ *      bpp     - point to buffer pointer to be filled in on successful
 *                exit.
 *
 * SERIALIZATION:
@@ -2691,8 +2677,8 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 *       the read lock is unnecessary.)
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
 {
@@ -2712,17 +2698,17 @@ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
 }
 /*
- * NAME:        diFindFree()
+ * NAME:        diFindFree()
 *
- * FUNCTION:    find the first free bit in a word starting at
+ * FUNCTION:    find the first free bit in a word starting at
 *              the specified bit position.
 *
 * PARAMETERS:
- *      word    - word to be examined.
+ *      word    - word to be examined.
- *      start   - starting bit position.
+ *      start   - starting bit position.
 *
 * RETURN VALUES:
- *      bit position of first free bit in the word or 32 if
+ *      bit position of first free bit in the word or 32 if
 *      no free bits were found.
 */
 static int diFindFree(u32 word, int start)
@@ -2897,7 +2883,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
                   atomic_read(&imap->im_numfree));
        /*
-         *      reconstruct imap
+         *      reconstruct imap
         *
         * coalesce contiguous k (newAGSize/oldAGSize) AGs;
         * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
@@ -2913,7 +2899,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
        }
        /*
-         *      process each iag page of the map.
+         *      process each iag page of the map.
         *
         * rebuild AG Free Inode List, AG Free Inode Extent List;
         */
@@ -2932,7 +2918,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
                /* leave free iag in the free iag list */
                if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
-                        release_metapage(bp);
+                        release_metapage(bp);
                        continue;
                }
@@ -3063,13 +3049,13 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno,
 }
 /*
- * NAME:        copy_from_dinode()
+ * NAME:        copy_from_dinode()
 *
- * FUNCTION:    Copies inode info from disk inode to in-memory inode
+ * FUNCTION:    Copies inode info from disk inode to in-memory inode
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOMEM - insufficient memory
+ *      -ENOMEM - insufficient memory
 */
 static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 {
@@ -3151,9 +3137,9 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 }
 /*
- * NAME:        copy_to_dinode()
+ * NAME:        copy_to_dinode()
 *
- * FUNCTION:    Copies inode info from in-memory inode to disk inode
+ * FUNCTION:    Copies inode info from in-memory inode to disk inode
 */
 static void copy_to_dinode(struct dinode * dip, struct inode *ip)
 {
diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h
index 4f9c346ed498..610a0e9d8941 100644
--- a/fs/jfs/jfs_imap.h
+++ b/fs/jfs/jfs_imap.h
@@ -24,17 +24,17 @@
 *      jfs_imap.h: disk inode manager
 */
-#define EXTSPERIAG      128     /* number of disk inode extent per iag  */
+#define EXTSPERIAG      128     /* number of disk inode extent per iag  */
-#define IMAPBLKNO       0       /* lblkno of dinomap within inode map   */
+#define IMAPBLKNO       0       /* lblkno of dinomap within inode map   */
-#define SMAPSZ          4       /* number of words per summary map      */
+#define SMAPSZ          4       /* number of words per summary map      */
 #define EXTSPERSUM      32      /* number of extents per summary map entry */
 #define L2EXTSPERSUM    5       /* l2 number of extents per summary map */
 #define PGSPERIEXT      4       /* number of 4K pages per dinode extent */
-#define MAXIAGS         ((1<<20)-1)     /* maximum number of iags       */
+#define MAXIAGS         ((1<<20)-1)     /* maximum number of iags       */
-#define MAXAG           128     /* maximum number of allocation groups  */
+#define MAXAG           128     /* maximum number of allocation groups  */
-#define AMAPSIZE      512       /* bytes in the IAG allocation maps */
+#define AMAPSIZE        512     /* bytes in the IAG allocation maps */
-#define SMAPSIZE      16        /* bytes in the IAG summary maps */
+#define SMAPSIZE        16      /* bytes in the IAG summary maps */
 /* convert inode number to iag number */
 #define INOTOIAG(ino)   ((ino) >> L2INOSPERIAG)
@@ -60,31 +60,31 @@
 *      inode allocation group page (per 4096 inodes of an AG)
 */
 struct iag {
-        __le64 agstart;         /* 8: starting block of ag              */
+        __le64 agstart;         /* 8: starting block of ag              */
-        __le32 iagnum;          /* 4: inode allocation group number     */
+        __le32 iagnum;          /* 4: inode allocation group number     */
-        __le32 inofreefwd;      /* 4: ag inode free list forward        */
+        __le32 inofreefwd;      /* 4: ag inode free list forward        */
-        __le32 inofreeback;     /* 4: ag inode free list back           */
+        __le32 inofreeback;     /* 4: ag inode free list back           */
-        __le32 extfreefwd;      /* 4: ag inode extent free list forward */
+        __le32 extfreefwd;      /* 4: ag inode extent free list forward */
-        __le32 extfreeback;     /* 4: ag inode extent free list back    */
+        __le32 extfreeback;     /* 4: ag inode extent free list back    */
-        __le32 iagfree;         /* 4: iag free list                     */
+        __le32 iagfree;         /* 4: iag free list                     */
        /* summary map: 1 bit per inode extent */
        __le32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes;
-                                 *      note: this indicates free and backed
+                                 *      note: this indicates free and backed
-                                 *      inodes, if the extent is not backed the
+                                 *      inodes, if the extent is not backed the
-                                 *      value will be 1.  if the extent is
+                                 *      value will be 1.  if the extent is
-                                 *      backed but all inodes are being used the
+                                 *      backed but all inodes are being used the
-                                 *      value will be 1.  if the extent is
+                                 *      value will be 1.  if the extent is
-                                 *      backed but at least one of the inodes is
+                                 *      backed but at least one of the inodes is
-                                 *      free the value will be 0.
+                                 *      free the value will be 0.
                                 */
        __le32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */
-        __le32 nfreeinos;               /* 4: number of free inodes             */
+        __le32 nfreeinos;       /* 4: number of free inodes             */
-        __le32 nfreeexts;               /* 4: number of free extents            */
+        __le32 nfreeexts;       /* 4: number of free extents            */
        /* (72) */
        u8 pad[1976];           /* 1976: pad to 2048 bytes */
        /* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */
-        __le32 wmap[EXTSPERIAG];        /* 512: working allocation map  */
+        __le32 wmap[EXTSPERIAG];        /* 512: working allocation map */
        __le32 pmap[EXTSPERIAG];        /* 512: persistent allocation map */
        pxd_t inoext[EXTSPERIAG];       /* 1024: inode extent addresses */
 };                              /* (4096) */
@@ -93,44 +93,44 @@ struct iag {
 *      per AG control information (in inode map control page)
 */
 struct iagctl_disk {
-        __le32 inofree;         /* 4: free inode list anchor            */
+        __le32 inofree;         /* 4: free inode list anchor            */
-        __le32 extfree;         /* 4: free extent list anchor           */
+        __le32 extfree;         /* 4: free extent list anchor           */
-        __le32 numinos;         /* 4: number of backed inodes           */
+        __le32 numinos;         /* 4: number of backed inodes           */
-        __le32 numfree;         /* 4: number of free inodes             */
+        __le32 numfree;         /* 4: number of free inodes             */
 };                              /* (16) */
 struct iagctl {
-        int inofree;            /* free inode list anchor            */
+        int inofree;            /* free inode list anchor               */
-        int extfree;            /* free extent list anchor           */
+        int extfree;            /* free extent list anchor              */
-        int numinos;            /* number of backed inodes           */
+        int numinos;            /* number of backed inodes              */
-        int numfree;            /* number of free inodes             */
+        int numfree;            /* number of free inodes                */
 };
 /*
 *      per fileset/aggregate inode map control page
 */
 struct dinomap_disk {
-        __le32 in_freeiag;      /* 4: free iag list anchor     */
+        __le32 in_freeiag;      /* 4: free iag list anchor      */
-        __le32 in_nextiag;      /* 4: next free iag number     */
+        __le32 in_nextiag;      /* 4: next free iag number      */
-        __le32 in_numinos;      /* 4: num of backed inodes */
+        __le32 in_numinos;      /* 4: num of backed inodes      */
        __le32 in_numfree;      /* 4: num of free backed inodes */
        __le32 in_nbperiext;    /* 4: num of blocks per inode extent */
-        __le32 in_l2nbperiext;  /* 4: l2 of in_nbperiext */
+        __le32 in_l2nbperiext;  /* 4: l2 of in_nbperiext        */
-        __le32 in_diskblock;    /* 4: for standalone test driver  */
+        __le32 in_diskblock;    /* 4: for standalone test driver */
-        __le32 in_maxag;        /* 4: for standalone test driver  */
+        __le32 in_maxag;        /* 4: for standalone test driver */
-        u8 pad[2016];           /* 2016: pad to 2048 */
+        u8 pad[2016];           /* 2016: pad to 2048            */
        struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */
 };                              /* (4096) */
 struct dinomap {
-        int in_freeiag;         /* free iag list anchor     */
+        int in_freeiag;         /* free iag list anchor         */
-        int in_nextiag;         /* next free iag number     */
+        int in_nextiag;         /* next free iag number         */
-        int in_numinos;         /* num of backed inodes */
+        int in_numinos;         /* num of backed inodes         */
-        int in_numfree;         /* num of free backed inodes */
+        int in_numfree;         /* num of free backed inodes    */
        int in_nbperiext;       /* num of blocks per inode extent */
-        int in_l2nbperiext;     /* l2 of in_nbperiext */
+        int in_l2nbperiext;     /* l2 of in_nbperiext           */
-        int in_diskblock;       /* for standalone test driver  */
+        int in_diskblock;       /* for standalone test driver   */
-        int in_maxag;           /* for standalone test driver  */
+        int in_maxag;           /* for standalone test driver   */
        struct iagctl in_agctl[MAXAG];  /* AG control information */
 };
@@ -139,9 +139,9 @@ struct dinomap {
 */
 struct inomap {
        struct dinomap im_imap;         /* 4096: inode allocation control */
-        struct inode *im_ipimap;        /* 4: ptr to inode for imap   */
+        struct inode *im_ipimap;        /* 4: ptr to inode for imap     */
-        struct mutex im_freelock;       /* 4: iag free list lock      */
+        struct mutex im_freelock;       /* 4: iag free list lock        */
-        struct mutex im_aglock[MAXAG];  /* 512: per AG locks          */
+        struct mutex im_aglock[MAXAG];  /* 512: per AG locks            */
        u32 *im_DBGdimap;
        atomic_t im_numinos;    /* num of backed inodes */
        atomic_t im_numfree;    /* num of free backed inodes */
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 8f453eff3c83..cb8f30985ad1 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -40,7 +40,7 @@ struct jfs_inode_info {
        uint    mode2;          /* jfs-specific mode            */
        uint    saved_uid;      /* saved for uid mount option */
        uint    saved_gid;      /* saved for gid mount option */
-        pxd_t   ixpxd;          /* inode extent descriptor      */
+        pxd_t   ixpxd;          /* inode extent descriptor      */
        dxd_t   acl;            /* dxd describing acl   */
        dxd_t   ea;             /* dxd describing ea    */
        time_t  otime;          /* time created */
@@ -190,7 +190,7 @@ struct jfs_sb_info {
        uint            gengen;         /* inode generation generator*/
        uint            inostamp;       /* shows inode belongs to fileset*/
-        /* Formerly in ipbmap */
+        /* Formerly in ipbmap */
        struct bmap     *bmap;          /* incore bmap descriptor       */
        struct nls_table *nls_tab;      /* current codepage             */
        struct inode *direct_inode;     /* metadata inode */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 44a2f33cb98d..de3e4a506dbc 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -244,7 +244,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                goto writeRecord;
        /*
-         *      initialize/update page/transaction recovery lsn
+         *      initialize/update page/transaction recovery lsn
         */
        lsn = log->lsn;
@@ -263,7 +263,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        }
        /*
-         *      initialize/update lsn of tblock of the page
+         *      initialize/update lsn of tblock of the page
         *
         * transaction inherits oldest lsn of pages associated
         * with allocation/deallocation of resources (their
@@ -307,7 +307,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        LOGSYNC_UNLOCK(log, flags);
        /*
-         *      write the log record
+         *      write the log record
         */
      writeRecord:
        lsn = lmWriteRecord(log, tblk, lrd, tlck);
@@ -372,7 +372,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                goto moveLrd;
        /*
-         *      move log record data
+         *      move log record data
         */
        /* retrieve source meta-data page to log */
        if (tlck->flag & tlckPAGELOCK) {
@@ -465,7 +465,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        }
        /*
-         *      move log record descriptor
+         *      move log record descriptor
         */
      moveLrd:
        lrd->length = cpu_to_le16(len);
@@ -574,7 +574,7 @@ static int lmNextPage(struct jfs_log * log)
        LOGGC_LOCK(log);
        /*
-         *      write or queue the full page at the tail of write queue
+         *      write or queue the full page at the tail of write queue
         */
        /* get the tail tblk on commit queue */
        if (list_empty(&log->cqueue))
@@ -625,7 +625,7 @@ static int lmNextPage(struct jfs_log * log)
        LOGGC_UNLOCK(log);
        /*
-         *      allocate/initialize next page
+         *      allocate/initialize next page
         */
        /* if log wraps, the first data page of log is 2
         * (0 never used, 1 is superblock).
@@ -953,7 +953,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
                }
        /*
-         *      forward syncpt
+         *      forward syncpt
         */
        /* if last sync is same as last syncpt,
         * invoke sync point forward processing to update sync.
@@ -989,7 +989,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
                lsn = log->lsn;
        /*
-         *      setup next syncpt trigger (SWAG)
+         *      setup next syncpt trigger (SWAG)
         */
        logsize = log->logsize;
@@ -1000,11 +1000,11 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
        if (more < 2 * LOGPSIZE) {
                jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
                /*
-                 *      log wrapping
+                 *      log wrapping
                 *
                 * option 1 - panic ? No.!
                 * option 2 - shutdown file systems
-                 *            associated with log ?
+                 *            associated with log ?
                 * option 3 - extend log ?
                 */
                /*
@@ -1062,7 +1062,7 @@ void jfs_syncpt(struct jfs_log *log, int hard_sync)
 /*
 * NAME:        lmLogOpen()
 *
- * FUNCTION:    open the log on first open;
+ * FUNCTION:    open the log on first open;
 *      insert filesystem in the active list of the log.
 *
 * PARAMETER:   ipmnt   - file system mount inode
@@ -1113,7 +1113,7 @@ int lmLogOpen(struct super_block *sb)
        init_waitqueue_head(&log->syncwait);
        /*
-         *      external log as separate logical volume
+         *      external log as separate logical volume
         *
         * file systems to log may have n-to-1 relationship;
         */
@@ -1155,7 +1155,7 @@ journal_found:
        return 0;
        /*
-         *      unwind on error
+         *      unwind on error
         */
      shutdown:         /* unwind lbmLogInit() */
        list_del(&log->journal_list);
@@ -1427,7 +1427,7 @@ int lmLogInit(struct jfs_log * log)
        return 0;
        /*
-         *      unwind on error
+         *      unwind on error
         */
      errout30:         /* release log page */
        log->wqueue = NULL;
@@ -1480,7 +1480,7 @@ int lmLogClose(struct super_block *sb)
        if (test_bit(log_INLINELOG, &log->flag)) {
                /*
-                 *      in-line log in host file system
+                 *      in-line log in host file system
                 */
                rc = lmLogShutdown(log);
                kfree(log);
@@ -1504,7 +1504,7 @@ int lmLogClose(struct super_block *sb)
                goto out;
        /*
-         *      external log as separate logical volume
+         *      external log as separate logical volume
         */
        list_del(&log->journal_list);
        bdev = log->bdev;
@@ -1622,20 +1622,26 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
        if (!list_empty(&log->synclist)) {
                struct logsyncblk *lp;
+                printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
                list_for_each_entry(lp, &log->synclist, synclist) {
                        if (lp->xflag & COMMIT_PAGE) {
                                struct metapage *mp = (struct metapage *)lp;
-                                dump_mem("orphan metapage", lp,
+                                print_hex_dump(KERN_ERR, "metapage: ",
-                                         sizeof(struct metapage));
+                                               DUMP_PREFIX_ADDRESS, 16, 4,
-                                dump_mem("page", mp->page, sizeof(struct page));
+                                               mp, sizeof(struct metapage), 0);
-                        }
+                                print_hex_dump(KERN_ERR, "page: ",
-                        else
+                                               DUMP_PREFIX_ADDRESS, 16,
-                                dump_mem("orphan tblock", lp,
+                                               sizeof(long), mp->page,
-                                         sizeof(struct tblock));
+                                               sizeof(struct page), 0);
+                        } else
+                                print_hex_dump(KERN_ERR, "tblock:",
+                                               DUMP_PREFIX_ADDRESS, 16, 4,
+                                               lp, sizeof(struct tblock), 0);
                }
        }
+#else
+        WARN_ON(!list_empty(&log->synclist));
 #endif
-        //assert(list_empty(&log->synclist));
        clear_bit(log_FLUSH, &log->flag);
 }
@@ -1723,7 +1729,7 @@ int lmLogShutdown(struct jfs_log * log)
 *
 * PARAMETE:    log     - pointer to logs inode.
 *              fsdev   - kdev_t of filesystem.
- *              serial  - pointer to returned log serial number
+ *              serial  - pointer to returned log serial number
 *              activate - insert/remove device from active list.
 *
 * RETURN:      0       - success
@@ -1963,7 +1969,7 @@ static void lbmfree(struct lbuf * bp)
 * FUNCTION:    add a log buffer to the log redrive list
 *
 * PARAMETER:
- *     bp       - log buffer
+ *      bp      - log buffer
 *
 * NOTES:
 *      Takes log_redrive_lock.
@@ -2054,7 +2060,7 @@ static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
        bp->l_flag = flag;
        /*
-         *      insert bp at tail of write queue associated with log
+         *      insert bp at tail of write queue associated with log
         *
         * (request is either for bp already/currently at head of queue
         * or new bp to be inserted at tail)
@@ -2117,7 +2123,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
            log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
        /*
-         *      initiate pageout of the page
+         *      initiate pageout of the page
         */
        lbmStartIO(bp);
 }
@@ -2128,7 +2134,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
 *
 * FUNCTION:    Interface to DD strategy routine
 *
- * RETURN:      none
+ * RETURN:      none
 *
 * serialization: LCACHE_LOCK() is NOT held during log i/o;
 */
@@ -2222,7 +2228,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
        bio_put(bio);
        /*
-         *      pagein completion
+         *      pagein completion
         */
        if (bp->l_flag & lbmREAD) {
                bp->l_flag &= ~lbmREAD;
@@ -2236,7 +2242,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
        }
        /*
-         *      pageout completion
+         *      pageout completion
         *
         * the bp at the head of write queue has completed pageout.
         *
@@ -2302,7 +2308,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
        }
        /*
-         *      synchronous pageout:
+         *      synchronous pageout:
         *
         * buffer has not necessarily been removed from write queue
         * (e.g., synchronous write of partial-page with COMMIT):
@@ -2316,7 +2322,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
        }
        /*
-         *      Group Commit pageout:
+         *      Group Commit pageout:
         */
        else if (bp->l_flag & lbmGC) {
                LCACHE_UNLOCK(flags);
@@ -2324,7 +2330,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
        }
        /*
-         *      asynchronous pageout:
+         *      asynchronous pageout:
         *
         * buffer must have been removed from write queue:
         * insert buffer at head of freelist where it can be recycled
@@ -2375,7 +2381,7 @@ int jfsIOWait(void *arg)
 * FUNCTION:    format file system log
 *
 * PARAMETERS:
- *      log     - volume log
+ *      log     - volume log
 *      logAddress - start address of log space in FS block
 *      logSize - length of log space in FS block;
 *
@@ -2407,16 +2413,16 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
        npages = logSize >> sbi->l2nbperpage;
        /*
-         *      log space:
+         *      log space:
         *
         * page 0 - reserved;
         * page 1 - log superblock;
         * page 2 - log data page: A SYNC log record is written
-         *          into this page at logform time;
+         *          into this page at logform time;
         * pages 3-N - log data page: set to empty log data pages;
         */
        /*
-         *      init log superblock: log page 1
+         *      init log superblock: log page 1
         */
        logsuper = (struct logsuper *) bp->l_ldata;
@@ -2436,7 +2442,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
                goto exit;
        /*
-         *      init pages 2 to npages-1 as log data pages:
+         *      init pages 2 to npages-1 as log data pages:
         *
         * log page sequence number (lpsn) initialization:
         *
@@ -2479,7 +2485,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
                goto exit;
        /*
-         *      initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
+         *      initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
         */
        for (lspn = 0; lspn < npages - 3; lspn++) {
                lp->h.page = lp->t.page = cpu_to_le32(lspn);
@@ -2495,7 +2501,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
        rc = 0;
 exit:
        /*
-         *      finalize log
+         *      finalize log
         */
        /* release the buffer */
        lbmFree(bp);
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index a53fb17ea219..1f85ef0ec045 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -144,7 +144,7 @@ struct logpage {
 *
 * (this comment should be rewritten !)
 * jfs uses only "after" log records (only a single writer is allowed
- * in a  page, pages are written to temporary paging space if
+ * in a page, pages are written to temporary paging space if
 * if they must be written to disk before commit, and i/o is
 * scheduled for modified pages to their home location after
 * the log records containing the after values and the commit
@@ -153,7 +153,7 @@ struct logpage {
 *
 * a log record consists of a data area of variable length followed by
 * a descriptor of fixed size LOGRDSIZE bytes.
- * the  data area is rounded up to an integral number of 4-bytes and
+ * the data area is rounded up to an integral number of 4-bytes and
 * must be no longer than LOGPSIZE.
 * the descriptor is of size of multiple of 4-bytes and aligned on a
 * 4-byte boundary.
@@ -215,13 +215,13 @@ struct lrd {
        union {
                /*
-                 *      COMMIT: commit
+                 *      COMMIT: commit
                 *
                 * transaction commit: no type-dependent information;
                 */
                /*
-                 *      REDOPAGE: after-image
+                 *      REDOPAGE: after-image
                 *
                 * apply after-image;
                 *
@@ -236,7 +236,7 @@ struct lrd {
                } redopage;     /* (20) */
                /*
-                 *      NOREDOPAGE: the page is freed
+                 *      NOREDOPAGE: the page is freed
                 *
                 * do not apply after-image records which precede this record
                 * in the log with the same page block number to this page.
@@ -252,7 +252,7 @@ struct lrd {
                } noredopage;   /* (20) */
                /*
-                 *      UPDATEMAP: update block allocation map
+                 *      UPDATEMAP: update block allocation map
                 *
                 * either in-line PXD,
                 * or     out-of-line  XADLIST;
@@ -268,7 +268,7 @@ struct lrd {
                } updatemap;    /* (20) */
                /*
-                 *      NOREDOINOEXT: the inode extent is freed
+                 *      NOREDOINOEXT: the inode extent is freed
                 *
                 * do not apply after-image records which precede this
                 * record in the log with the any of the 4 page block
@@ -286,7 +286,7 @@ struct lrd {
                } noredoinoext; /* (20) */
                /*
-                 *      SYNCPT: log sync point
+                 *      SYNCPT: log sync point
                 *
                 * replay log upto syncpt address specified;
                 */
@@ -295,13 +295,13 @@ struct lrd {
                } syncpt;
                /*
-                 *      MOUNT: file system mount
+                 *      MOUNT: file system mount
                 *
                 * file system mount: no type-dependent information;
                 */
                /*
-                 *      ? FREEXTENT: free specified extent(s)
+                 *      ? FREEXTENT: free specified extent(s)
                 *
                 * free specified extent(s) from block allocation map
                 * N.B.: nextents should be length of data/sizeof(xad_t)
@@ -314,7 +314,7 @@ struct lrd {
                } freextent;
                /*
-                 *      ? NOREDOFILE: this file is freed
+                 *      ? NOREDOFILE: this file is freed
                 *
                 * do not apply records which precede this record in the log
                 * with the same inode number.
@@ -330,7 +330,7 @@ struct lrd {
                } noredofile;
                /*
-                 *      ? NEWPAGE:
+                 *      ? NEWPAGE:
                 *
                 * metadata type dependent
                 */
@@ -342,7 +342,7 @@ struct lrd {
                } newpage;
                /*
-                 *      ? DUMMY: filler
+                 *      ? DUMMY: filler
                 *
                 * no type-dependent information
                 */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 43d4f69afbec..77c7f1129dde 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -472,7 +472,8 @@ add_failed:
        printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
        goto skip;
 dump_bio:
-        dump_mem("bio", bio, sizeof(*bio));
+        print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16,
+                       4, bio, sizeof(*bio), 0);
 skip:
        bio_put(bio);
        unlock_page(page);
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index 4dd479834897..644429acb8c0 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -80,7 +80,7 @@ static int logMOUNT(struct super_block *sb);
 */
 int jfs_mount(struct super_block *sb)
 {
-        int rc = 0;             /* Return code          */
+        int rc = 0;             /* Return code */
        struct jfs_sb_info *sbi = JFS_SBI(sb);
        struct inode *ipaimap = NULL;
        struct inode *ipaimap2 = NULL;
@@ -169,7 +169,7 @@ int jfs_mount(struct super_block *sb)
                sbi->ipaimap2 = NULL;
        /*
-         *      mount (the only/single) fileset
+         *      mount (the only/single) fileset
         */
        /*
         * open fileset inode allocation map (aka fileset inode)
@@ -195,7 +195,7 @@ int jfs_mount(struct super_block *sb)
        goto out;
        /*
-         *      unwind on error
+         *      unwind on error
         */
      errout41:         /* close fileset inode allocation map inode */
        diFreeSpecial(ipimap);
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 25430d0b0d59..7aa1f7004eaf 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -18,7 +18,7 @@
 */
 /*
- *      jfs_txnmgr.c: transaction manager
+ *      jfs_txnmgr.c: transaction manager
 *
 * notes:
 * transaction starts with txBegin() and ends with txCommit()
@@ -60,7 +60,7 @@
 #include "jfs_debug.h"
 /*
- *      transaction management structures
+ *      transaction management structures
 */
 static struct {
        int freetid;            /* index of a free tid structure */
@@ -103,19 +103,19 @@ module_param(nTxLock, int, 0);
 MODULE_PARM_DESC(nTxLock,
                 "Number of transaction locks (max:65536)");
-struct tblock *TxBlock;         /* transaction block table */
+struct tblock *TxBlock; /* transaction block table */
-static int TxLockLWM;           /* Low water mark for number of txLocks used */
+static int TxLockLWM;   /* Low water mark for number of txLocks used */
-static int TxLockHWM;           /* High water mark for number of txLocks used */
+static int TxLockHWM;   /* High water mark for number of txLocks used */
-static int TxLockVHWM;          /* Very High water mark */
+static int TxLockVHWM;  /* Very High water mark */
-struct tlock *TxLock;           /* transaction lock table */
+struct tlock *TxLock;   /* transaction lock table */
 /*
- *      transaction management lock
+ *      transaction management lock
 */
 static DEFINE_SPINLOCK(jfsTxnLock);
-#define TXN_LOCK()              spin_lock(&jfsTxnLock)
+#define TXN_LOCK()              spin_lock(&jfsTxnLock)
-#define TXN_UNLOCK()            spin_unlock(&jfsTxnLock)
+#define TXN_UNLOCK()            spin_unlock(&jfsTxnLock)
 #define LAZY_LOCK_INIT()        spin_lock_init(&TxAnchor.LazyLock);
 #define LAZY_LOCK(flags)        spin_lock_irqsave(&TxAnchor.LazyLock, flags)
@@ -148,7 +148,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 #define TXN_WAKEUP(event) wake_up_all(event)
 /*
- *      statistics
+ *      statistics
 */
 static struct {
        tid_t maxtid;           /* 4: biggest tid ever used */
@@ -181,8 +181,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 static void LogSyncRelease(struct metapage * mp);
 /*
- *              transaction block/lock management
+ *              transaction block/lock management
- *              ---------------------------------
+ *              ---------------------------------
 */
 /*
@@ -227,9 +227,9 @@ static void txLockFree(lid_t lid)
 }
 /*
- * NAME:        txInit()
+ * NAME:        txInit()
 *
- * FUNCTION:    initialize transaction management structures
+ * FUNCTION:    initialize transaction management structures
 *
 * RETURN:
 *
@@ -333,9 +333,9 @@ int txInit(void)
 }
 /*
- * NAME:        txExit()
+ * NAME:        txExit()
 *
- * FUNCTION:    clean up when module is unloaded
+ * FUNCTION:    clean up when module is unloaded
 */
 void txExit(void)
 {
@@ -346,12 +346,12 @@ void txExit(void)
 }
 /*
- * NAME:        txBegin()
+ * NAME:        txBegin()
 *
- * FUNCTION:    start a transaction.
+ * FUNCTION:    start a transaction.
 *
- * PARAMETER:   sb      - superblock
+ * PARAMETER:   sb      - superblock
- *              flag    - force for nested tx;
+ *              flag    - force for nested tx;
 *
 * RETURN:      tid     - transaction id
 *
@@ -447,13 +447,13 @@ tid_t txBegin(struct super_block *sb, int flag)
 }
 /*
- * NAME:        txBeginAnon()
+ * NAME:        txBeginAnon()
 *
- * FUNCTION:    start an anonymous transaction.
+ * FUNCTION:    start an anonymous transaction.
 *              Blocks if logsync or available tlocks are low to prevent
 *              anonymous tlocks from depleting supply.
 *
- * PARAMETER:   sb      - superblock
+ * PARAMETER:   sb      - superblock
 *
 * RETURN:      none
 */
@@ -489,11 +489,11 @@ void txBeginAnon(struct super_block *sb)
 }
 /*
- *      txEnd()
+ *      txEnd()
 *
 * function: free specified transaction block.
 *
- *      logsync barrier processing:
+ *      logsync barrier processing:
 *
 * serialization:
 */
@@ -577,13 +577,13 @@ wakeup:
 }
 /*
- *      txLock()
+ *      txLock()
 *
 * function: acquire a transaction lock on the specified <mp>
 *
 * parameter:
 *
- * return:      transaction lock id
+ * return:      transaction lock id
 *
 * serialization:
 */
@@ -829,12 +829,16 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
        /* Only locks on ipimap or ipaimap should reach here */
        /* assert(jfs_ip->fileset == AGGREGATE_I); */
        if (jfs_ip->fileset != AGGREGATE_I) {
-                jfs_err("txLock: trying to lock locked page!");
+                printk(KERN_ERR "txLock: trying to lock locked page!");
-                dump_mem("ip", ip, sizeof(struct inode));
+                print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
-                dump_mem("mp", mp, sizeof(struct metapage));
+                               ip, sizeof(*ip), 0);
-                dump_mem("Locker's tblk", tid_to_tblock(tid),
+                print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
-                         sizeof(struct tblock));
+                               mp, sizeof(*mp), 0);
-                dump_mem("Tlock", tlck, sizeof(struct tlock));
+                print_hex_dump(KERN_ERR, "Locker's tblock: ",
+                               DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
+                               sizeof(struct tblock), 0);
+                print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
+                               tlck, sizeof(*tlck), 0);
                BUG();
        }
        INCREMENT(stattx.waitlock);     /* statistics */
@@ -857,17 +861,17 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 }
 /*
- * NAME:        txRelease()
+ * NAME:        txRelease()
 *
- * FUNCTION:    Release buffers associated with transaction locks, but don't
+ * FUNCTION:    Release buffers associated with transaction locks, but don't
 *              mark homeok yet.  The allows other transactions to modify
 *              buffers, but won't let them go to disk until commit record
 *              actually gets written.
 *
 * PARAMETER:
- *              tblk    -
+ *              tblk    -
 *
- * RETURN:      Errors from subroutines.
+ * RETURN:      Errors from subroutines.
 */
 static void txRelease(struct tblock * tblk)
 {
@@ -896,10 +900,10 @@ static void txRelease(struct tblock * tblk)
 }
 /*
- * NAME:        txUnlock()
+ * NAME:        txUnlock()
 *
- * FUNCTION:    Initiates pageout of pages modified by tid in journalled
+ * FUNCTION:    Initiates pageout of pages modified by tid in journalled
- *              objects and frees their lockwords.
+ *              objects and frees their lockwords.
 */
 static void txUnlock(struct tblock * tblk)
 {
@@ -983,10 +987,10 @@ static void txUnlock(struct tblock * tblk)
 }
 /*
- *      txMaplock()
+ *      txMaplock()
 *
 * function: allocate a transaction lock for freed page/entry;
- *      for freed page, maplock is used as xtlock/dtlock type;
+ *      for freed page, maplock is used as xtlock/dtlock type;
 */
 struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 {
@@ -1057,7 +1061,7 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 }
 /*
- *      txLinelock()
+ *      txLinelock()
 *
 * function: allocate a transaction lock for log vector list
 */
@@ -1092,39 +1096,39 @@ struct linelock *txLinelock(struct linelock * tlock)
 }
 /*
- *              transaction commit management
+ *              transaction commit management
- *              -----------------------------
+ *              -----------------------------
 */
 /*
- * NAME:        txCommit()
+ * NAME:        txCommit()
- *
+ *
- * FUNCTION:    commit the changes to the objects specified in
+ * FUNCTION:    commit the changes to the objects specified in
- *              clist.  For journalled segments only the
+ *              clist.  For journalled segments only the
- *              changes of the caller are committed, ie by tid.
+ *              changes of the caller are committed, ie by tid.
- *              for non-journalled segments the data are flushed to
+ *              for non-journalled segments the data are flushed to
- *              disk and then the change to the disk inode and indirect
+ *              disk and then the change to the disk inode and indirect
- *              blocks committed (so blocks newly allocated to the
+ *              blocks committed (so blocks newly allocated to the
- *              segment will be made a part of the segment atomically).
+ *              segment will be made a part of the segment atomically).
- *
+ *
- *              all of the segments specified in clist must be in
+ *              all of the segments specified in clist must be in
- *              one file system. no more than 6 segments are needed
+ *              one file system. no more than 6 segments are needed
- *              to handle all unix svcs.
+ *              to handle all unix svcs.
- *
+ *
- *              if the i_nlink field (i.e. disk inode link count)
+ *              if the i_nlink field (i.e. disk inode link count)
- *              is zero, and the type of inode is a regular file or
+ *              is zero, and the type of inode is a regular file or
- *              directory, or symbolic link , the inode is truncated
+ *              directory, or symbolic link , the inode is truncated
- *              to zero length. the truncation is committed but the
+ *              to zero length. the truncation is committed but the
- *              VM resources are unaffected until it is closed (see
+ *              VM resources are unaffected until it is closed (see
- *              iput and iclose).
+ *              iput and iclose).
 *
 * PARAMETER:
 *
 * RETURN:
 *
 * serialization:
- *              on entry the inode lock on each segment is assumed
+ *              on entry the inode lock on each segment is assumed
- *              to be held.
+ *              to be held.
 *
 * i/o error:
 */
@@ -1175,7 +1179,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
        if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
                tblk->xflag |= COMMIT_LAZY;
        /*
-         *      prepare non-journaled objects for commit
+         *      prepare non-journaled objects for commit
         *
         * flush data pages of non-journaled file
         * to prevent the file getting non-initialized disk blocks
@@ -1186,7 +1190,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
        cd.nip = nip;
        /*
-         *      acquire transaction lock on (on-disk) inodes
+         *      acquire transaction lock on (on-disk) inodes
         *
         * update on-disk inode from in-memory inode
         * acquiring transaction locks for AFTER records
@@ -1262,7 +1266,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
        }
        /*
-         *      write log records from transaction locks
+         *      write log records from transaction locks
         *
         * txUpdateMap() resets XAD_NEW in XAD.
         */
@@ -1294,7 +1298,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
                !test_cflag(COMMIT_Nolink, tblk->u.ip)));
        /*
-         *      write COMMIT log record
+         *      write COMMIT log record
         */
        lrd->type = cpu_to_le16(LOG_COMMIT);
        lrd->length = 0;
@@ -1303,7 +1307,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
        lmGroupCommit(log, tblk);
        /*
-         *      - transaction is now committed -
+         *      - transaction is now committed -
         */
        /*
@@ -1314,11 +1318,11 @@ int txCommit(tid_t tid,		/* transaction identifier */
                txForce(tblk);
        /*
-         *      update allocation map.
+         *      update allocation map.
         *
         * update inode allocation map and inode:
         * free pager lock on memory object of inode if any.
-         * update  block allocation map.
+         * update block allocation map.
         *
         * txUpdateMap() resets XAD_NEW in XAD.
         */
@@ -1326,7 +1330,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
                txUpdateMap(tblk);
        /*
-         *      free transaction locks and pageout/free pages
+         *      free transaction locks and pageout/free pages
         */
        txRelease(tblk);
@@ -1335,7 +1339,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
        /*
-         *      reset in-memory object state
+         *      reset in-memory object state
         */
        for (k = 0; k < cd.nip; k++) {
                ip = cd.iplist[k];
@@ -1358,11 +1362,11 @@ int txCommit(tid_t tid,		/* transaction identifier */
 }
 /*
- * NAME:        txLog()
+ * NAME:        txLog()
 *
- * FUNCTION:    Writes AFTER log records for all lines modified
+ * FUNCTION:    Writes AFTER log records for all lines modified
- *              by tid for segments specified by inodes in comdata.
+ *              by tid for segments specified by inodes in comdata.
- *              Code assumes only WRITELOCKS are recorded in lockwords.
+ *              Code assumes only WRITELOCKS are recorded in lockwords.
 *
 * PARAMETERS:
 *
@@ -1421,12 +1425,12 @@ static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
 }
 /*
- *      diLog()
+ *      diLog()
 *
- * function:    log inode tlock and format maplock to update bmap;
+ * function:    log inode tlock and format maplock to update bmap;
 */
 static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
-          struct tlock * tlck, struct commit * cd)
+                 struct tlock * tlck, struct commit * cd)
 {
        int rc = 0;
        struct metapage *mp;
@@ -1442,7 +1446,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        pxd = &lrd->log.redopage.pxd;
        /*
-         *      inode after image
+         *      inode after image
         */
        if (tlck->type & tlckENTRY) {
                /* log after-image for logredo(): */
@@ -1456,7 +1460,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                tlck->flag |= tlckWRITEPAGE;
        } else if (tlck->type & tlckFREE) {
                /*
-                 *      free inode extent
+                 *      free inode extent
                 *
                 * (pages of the freed inode extent have been invalidated and
                 * a maplock for free of the extent has been formatted at
@@ -1498,7 +1502,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                jfs_err("diLog: UFO type tlck:0x%p", tlck);
 #ifdef  _JFS_WIP
        /*
-         *      alloc/free external EA extent
+         *      alloc/free external EA extent
         *
         * a maplock for txUpdateMap() to update bPWMAP for alloc/free
         * of the extent has been formatted at txLock() time;
@@ -1534,9 +1538,9 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 /*
- *      dataLog()
+ *      dataLog()
 *
- * function:    log data tlock
+ * function:    log data tlock
 */
 static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
            struct tlock * tlck)
@@ -1580,9 +1584,9 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 /*
- *      dtLog()
+ *      dtLog()
 *
- * function:    log dtree tlock and format maplock to update bmap;
+ * function:    log dtree tlock and format maplock to update bmap;
 */
 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
           struct tlock * tlck)
@@ -1603,10 +1607,10 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
        /*
-         *      page extension via relocation: entry insertion;
+         *      page extension via relocation: entry insertion;
-         *      page extension in-place: entry insertion;
+         *      page extension in-place: entry insertion;
-         *      new right page from page split, reinitialized in-line
+         *      new right page from page split, reinitialized in-line
-         *      root from root page split: entry insertion;
+         *      root from root page split: entry insertion;
         */
        if (tlck->type & (tlckNEW | tlckEXTEND)) {
                /* log after-image of the new page for logredo():
@@ -1641,8 +1645,8 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        }
        /*
-         *      entry insertion/deletion,
+         *      entry insertion/deletion,
-         *      sibling page link update (old right page before split);
+         *      sibling page link update (old right page before split);
         */
        if (tlck->type & (tlckENTRY | tlckRELINK)) {
                /* log after-image for logredo(): */
@@ -1658,11 +1662,11 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        }
        /*
-         *      page deletion: page has been invalidated
+         *      page deletion: page has been invalidated
-         *      page relocation: source extent
+         *      page relocation: source extent
         *
-         *      a maplock for free of the page has been formatted
+         *      a maplock for free of the page has been formatted
-         *      at txLock() time);
+         *      at txLock() time);
         */
        if (tlck->type & (tlckFREE | tlckRELOCATE)) {
                /* log LOG_NOREDOPAGE of the deleted page for logredo()
@@ -1683,9 +1687,9 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 /*
- *      xtLog()
+ *      xtLog()
 *
- * function:    log xtree tlock and format maplock to update bmap;
+ * function:    log xtree tlock and format maplock to update bmap;
 */
 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
           struct tlock * tlck)
@@ -1725,8 +1729,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        xadlock = (struct xdlistlock *) maplock;
        /*
-         *      entry insertion/extension;
+         *      entry insertion/extension;
-         *      sibling page link update (old right page before split);
+         *      sibling page link update (old right page before split);
         */
        if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
                /* log after-image for logredo():
@@ -1801,7 +1805,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        }
        /*
-         *      page deletion: file deletion/truncation (ref. xtTruncate())
+         *      page deletion: file deletion/truncation (ref. xtTruncate())
         *
         * (page will be invalidated after log is written and bmap
         * is updated from the page);
@@ -1908,13 +1912,13 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        }
        /*
-         *      page/entry truncation: file truncation (ref. xtTruncate())
+         *      page/entry truncation: file truncation (ref. xtTruncate())
         *
-         *     |----------+------+------+---------------|
+         *      |----------+------+------+---------------|
-         *                |      |      |
+         *                 |      |      |
-         *                |      |     hwm - hwm before truncation
+         *                 |      |     hwm - hwm before truncation
-         *                |     next - truncation point
+         *                 |     next - truncation point
-         *               lwm - lwm before truncation
+         *                lwm - lwm before truncation
         * header ?
         */
        if (tlck->type & tlckTRUNCATE) {
@@ -1937,7 +1941,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                twm = xtlck->twm.offset;
                /*
-                 *      write log records
+                 *      write log records
                 */
                /* log after-image for logredo():
                 *
@@ -1997,7 +2001,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                }
                /*
-                 *      format maplock(s) for txUpdateMap() to update bmap
+                 *      format maplock(s) for txUpdateMap() to update bmap
                 */
                maplock->index = 0;
@@ -2069,9 +2073,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 /*
- *      mapLog()
+ *      mapLog()
 *
- * function:    log from maplock of freed data extents;
+ * function:    log from maplock of freed data extents;
 */
 static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                   struct tlock * tlck)
@@ -2081,7 +2085,7 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        pxd_t *pxd;
        /*
-         *      page relocation: free the source page extent
+         *      page relocation: free the source page extent
         *
         * a maplock for txUpdateMap() for free of the page
         * has been formatted at txLock() time saving the src
@@ -2155,10 +2159,10 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 /*
- *      txEA()
+ *      txEA()
 *
- * function:    acquire maplock for EA/ACL extents or
+ * function:    acquire maplock for EA/ACL extents or
- *              set COMMIT_INLINE flag;
+ *              set COMMIT_INLINE flag;
 */
 void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
 {
@@ -2207,10 +2211,10 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
 }
 /*
- *      txForce()
+ *      txForce()
 *
 * function: synchronously write pages locked by transaction
- *              after txLog() but before txUpdateMap();
+ *           after txLog() but before txUpdateMap();
 */
 static void txForce(struct tblock * tblk)
 {
@@ -2273,10 +2277,10 @@ static void txForce(struct tblock * tblk)
 }
 /*
- *      txUpdateMap()
+ *      txUpdateMap()
 *
- * function:    update persistent allocation map (and working map
+ * function:    update persistent allocation map (and working map
- *              if appropriate);
+ *              if appropriate);
 *
 * parameter:
 */
@@ -2298,7 +2302,7 @@ static void txUpdateMap(struct tblock * tblk)
        /*
-         *      update block allocation map
+         *      update block allocation map
         *
         * update allocation state in pmap (and wmap) and
         * update lsn of the pmap page;
@@ -2382,7 +2386,7 @@ static void txUpdateMap(struct tblock * tblk)
                }
        }
        /*
-         *      update inode allocation map
+         *      update inode allocation map
         *
         * update allocation state in pmap and
         * update lsn of the pmap page;
@@ -2407,24 +2411,24 @@ static void txUpdateMap(struct tblock * tblk)
 }
 /*
- *      txAllocPMap()
+ *      txAllocPMap()
 *
 * function: allocate from persistent map;
 *
 * parameter:
- *      ipbmap  -
+ *      ipbmap  -
- *      malock -
+ *      malock  -
- *              xad list:
+ *              xad list:
- *              pxd:
+ *              pxd:
- *
+ *
- *      maptype -
+ *      maptype -
- *              allocate from persistent map;
+ *              allocate from persistent map;
- *              free from persistent map;
+ *              free from persistent map;
- *              (e.g., tmp file - free from working map at releae
+ *              (e.g., tmp file - free from working map at releae
- *               of last reference);
+ *               of last reference);
- *              free from persistent and working map;
+ *              free from persistent and working map;
- *
+ *
- *      lsn     - log sequence number;
+ *      lsn     - log sequence number;
 */
 static void txAllocPMap(struct inode *ip, struct maplock * maplock,
                        struct tblock * tblk)
@@ -2478,9 +2482,9 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 }
 /*
- *      txFreeMap()
+ *      txFreeMap()
 *
- * function:    free from persistent and/or working map;
+ * function:    free from persistent and/or working map;
 *
 * todo: optimization
 */
@@ -2579,9 +2583,9 @@ void txFreeMap(struct inode *ip,
 }
 /*
- *      txFreelock()
+ *      txFreelock()
 *
- * function:    remove tlock from inode anonymous locklist
+ * function:    remove tlock from inode anonymous locklist
 */
 void txFreelock(struct inode *ip)
 {
@@ -2619,7 +2623,7 @@ void txFreelock(struct inode *ip)
 }
 /*
- *      txAbort()
+ *      txAbort()
 *
 * function: abort tx before commit;
 *
@@ -2679,7 +2683,7 @@ void txAbort(tid_t tid, int dirty)
 }
 /*
- *      txLazyCommit(void)
+ *      txLazyCommit(void)
 *
 *      All transactions except those changing ipimap (COMMIT_FORCE) are
 *      processed by this routine.  This insures that the inode and block
@@ -2728,7 +2732,7 @@ static void txLazyCommit(struct tblock * tblk)
 }
 /*
- *      jfs_lazycommit(void)
+ *      jfs_lazycommit(void)
 *
 *      To be run as a kernel daemon.  If lbmIODone is called in an interrupt
 *      context, or where blocking is not wanted, this routine will process
@@ -2913,7 +2917,7 @@ void txResume(struct super_block *sb)
 }
 /*
- *      jfs_sync(void)
+ *      jfs_sync(void)
 *
 *      To be run as a kernel daemon.  This is awakened when tlocks run low.
 *      We write any inodes that have anonymous tlocks so they will become
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h
index 7863cf21afca..ab7288937019 100644
--- a/fs/jfs/jfs_txnmgr.h
+++ b/fs/jfs/jfs_txnmgr.h
@@ -94,7 +94,7 @@ extern struct tblock *TxBlock;	/* transaction block table */
 */
 struct tlock {
        lid_t next;             /* 2: index next lockword on tid locklist
-                                 *          next lockword on freelist
+                                 *          next lockword on freelist
                                 */
        tid_t tid;              /* 2: transaction id holding lock */
diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h
index 09b252958687..649f9817accd 100644
--- a/fs/jfs/jfs_types.h
+++ b/fs/jfs/jfs_types.h
@@ -21,7 +21,7 @@
 /*
 *      jfs_types.h:
 *
- * basic type/utility  definitions
+ * basic type/utility definitions
 *
 * note: this header file must be the 1st include file
 * of JFS include list in all JFS .c file.
@@ -54,8 +54,8 @@ struct timestruc_t {
 */
 #define LEFTMOSTONE     0x80000000
-#define HIGHORDER       0x80000000u     /* high order bit on            */
+#define HIGHORDER       0x80000000u     /* high order bit on    */
-#define ONES            0xffffffffu     /* all bit on                   */
+#define ONES            0xffffffffu     /* all bit on           */
 /*
 *      logical xd (lxd)
@@ -148,7 +148,7 @@ typedef struct {
 #define sizeDXD(dxd)    le32_to_cpu((dxd)->size)
 /*
- *      directory entry argument
+ *      directory entry argument
 */
 struct component_name {
        int namlen;
@@ -160,14 +160,14 @@ struct component_name {
 *      DASD limit information - stored in directory inode
 */
 struct dasd {
-        u8 thresh;              /* Alert Threshold (in percent) */
+        u8 thresh;              /* Alert Threshold (in percent)         */
-        u8 delta;               /* Alert Threshold delta (in percent)   */
+        u8 delta;               /* Alert Threshold delta (in percent)   */
        u8 rsrvd1;
-        u8 limit_hi;            /* DASD limit (in logical blocks)       */
+        u8 limit_hi;            /* DASD limit (in logical blocks)       */
-        __le32 limit_lo;        /* DASD limit (in logical blocks)       */
+        __le32 limit_lo;        /* DASD limit (in logical blocks)       */
        u8 rsrvd2[3];
-        u8 used_hi;             /* DASD usage (in logical blocks)       */
+        u8 used_hi;             /* DASD usage (in logical blocks)       */
-        __le32 used_lo;         /* DASD usage (in logical blocks)       */
+        __le32 used_lo;         /* DASD usage (in logical blocks)       */
 };
 #define DASDLIMIT(dasdp) \
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index a386f48c73fc..7971f37534a3 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -60,7 +60,7 @@ int jfs_umount(struct super_block *sb)
        jfs_info("UnMount JFS: sb:0x%p", sb);
        /*
-         *      update superblock and close log
+         *      update superblock and close log
         *
         * if mounted read-write and log based recovery was enabled
         */
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index acc97c46d8a4..1543906a2e0d 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -16,7 +16,7 @@
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
 /*
- *      jfs_xtree.c: extent allocation descriptor B+-tree manager
+ *      jfs_xtree.c: extent allocation descriptor B+-tree manager
 */
 #include <linux/fs.h>
@@ -32,30 +32,30 @@
 /*
 * xtree local flag
 */
-#define XT_INSERT       0x00000001
+#define XT_INSERT       0x00000001
 /*
- *       xtree key/entry comparison: extent offset
+ *      xtree key/entry comparison: extent offset
 *
 * return:
- *      -1: k < start of extent
+ *      -1: k < start of extent
- *       0: start_of_extent <= k <= end_of_extent
+ *       0: start_of_extent <= k <= end_of_extent
- *       1: k > end_of_extent
+ *       1: k > end_of_extent
 */
 #define XT_CMP(CMP, K, X, OFFSET64)\
 {\
-        OFFSET64 = offsetXAD(X);\
+        OFFSET64 = offsetXAD(X);\
-        (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
+        (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
-              ((K) < OFFSET64) ? -1 : 0;\
+                ((K) < OFFSET64) ? -1 : 0;\
 }
 /* write a xad entry */
 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
 {\
-        (XAD)->flag = (FLAG);\
+        (XAD)->flag = (FLAG);\
-        XADoffset((XAD), (OFF));\
+        XADoffset((XAD), (OFF));\
-        XADlength((XAD), (LEN));\
+        XADlength((XAD), (LEN));\
-        XADaddress((XAD), (ADDR));\
+        XADaddress((XAD), (ADDR));\
 }
 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
@@ -76,13 +76,13 @@
                        MP = NULL;\
                        RC = -EIO;\
                }\
-        }\
+        }\
 }
 /* for consistency */
 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
-#define XT_GETSEARCH(IP, LEAF, BN, MP,  P, INDEX) \
+#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
        BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
 /* xtree entry parameter descriptor */
 struct xtsplit {
@@ -97,7 +97,7 @@ struct xtsplit {
 /*
- *      statistics
+ *      statistics
 */
 #ifdef CONFIG_JFS_STATISTICS
 static struct {
@@ -136,7 +136,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
 #endif                          /*  _STILL_TO_PORT */
 /*
- *      xtLookup()
+ *      xtLookup()
 *
 * function: map a single page into a physical extent;
 */
@@ -179,7 +179,7 @@ int xtLookup(struct inode *ip, s64 lstart,
        }
        /*
-         *      compute the physical extent covering logical extent
+         *      compute the physical extent covering logical extent
         *
         * N.B. search may have failed (e.g., hole in sparse file),
         * and returned the index of the next entry.
@@ -220,27 +220,27 @@ int xtLookup(struct inode *ip, s64 lstart,
 /*
- *      xtLookupList()
+ *      xtLookupList()
 *
 * function: map a single logical extent into a list of physical extent;
 *
 * parameter:
- *      struct inode    *ip,
+ *      struct inode    *ip,
- *      struct lxdlist  *lxdlist,       lxd list (in)
+ *      struct lxdlist  *lxdlist,       lxd list (in)
- *      struct xadlist  *xadlist,       xad list (in/out)
+ *      struct xadlist  *xadlist,       xad list (in/out)
- *      int             flag)
+ *      int             flag)
 *
 * coverage of lxd by xad under assumption of
 * . lxd's are ordered and disjoint.
 * . xad's are ordered and disjoint.
 *
 * return:
- *      0:      success
+ *      0:      success
 *
 * note: a page being written (even a single byte) is backed fully,
- *      except the last page which is only backed with blocks
+ *      except the last page which is only backed with blocks
- *      required to cover the last byte;
+ *      required to cover the last byte;
- *      the extent backing a page is fully contained within an xad;
+ *      the extent backing a page is fully contained within an xad;
 */
 int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
                 struct xadlist * xadlist, int flag)
@@ -284,7 +284,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
                return rc;
        /*
-         *      compute the physical extent covering logical extent
+         *      compute the physical extent covering logical extent
         *
         * N.B. search may have failed (e.g., hole in sparse file),
         * and returned the index of the next entry.
@@ -343,7 +343,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
                if (lstart >= size)
                        goto mapend;
-                /* compare with the current xad  */
+                /* compare with the current xad */
                goto compare1;
        }
        /* lxd is covered by xad */
@@ -430,7 +430,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
        /*
         * lxd is partially covered by xad
         */
-        else {                  /* (xend < lend)  */
+        else {                  /* (xend < lend) */
                /*
                 * get next xad
@@ -477,22 +477,22 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
 /*
- *      xtSearch()
+ *      xtSearch()
 *
- * function:    search for the xad entry covering specified offset.
+ * function:    search for the xad entry covering specified offset.
 *
 * parameters:
- *      ip      - file object;
+ *      ip      - file object;
- *      xoff    - extent offset;
+ *      xoff    - extent offset;
- *      nextp   - address of next extent (if any) for search miss
+ *      nextp   - address of next extent (if any) for search miss
- *      cmpp    - comparison result:
+ *      cmpp    - comparison result:
- *      btstack - traverse stack;
+ *      btstack - traverse stack;
- *      flag    - search process flag (XT_INSERT);
+ *      flag    - search process flag (XT_INSERT);
 *
 * returns:
- *      btstack contains (bn, index) of search path traversed to the entry.
+ *      btstack contains (bn, index) of search path traversed to the entry.
- *      *cmpp is set to result of comparison with the entry returned.
+ *      *cmpp is set to result of comparison with the entry returned.
- *      the page containing the entry is pinned at exit.
+ *      the page containing the entry is pinned at exit.
 */
 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
                    int *cmpp, struct btstack * btstack, int flag)
@@ -517,7 +517,7 @@ static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
        btstack->nsplit = 0;
        /*
-         *      search down tree from root:
+         *      search down tree from root:
         *
         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -642,7 +642,7 @@ static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
                        XT_CMP(cmp, xoff, &p->xad[index], t64);
                        if (cmp == 0) {
                                /*
-                                 *      search hit
+                                 *      search hit
                                 */
                                /* search hit - leaf page:
                                 * return the entry found
@@ -692,7 +692,7 @@ static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
                }
                /*
-                 *      search miss
+                 *      search miss
                 *
                 * base is the smallest index with key (Kj) greater than
                 * search key (K) and may be zero or maxentry index.
@@ -773,22 +773,22 @@ static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
 }
 /*
- *      xtInsert()
+ *      xtInsert()
 *
 * function:
 *
 * parameter:
- *      tid     - transaction id;
+ *      tid     - transaction id;
- *      ip      - file object;
+ *      ip      - file object;
- *      xflag   - extent flag (XAD_NOTRECORDED):
+ *      xflag   - extent flag (XAD_NOTRECORDED):
- *      xoff    - extent offset;
+ *      xoff    - extent offset;
- *      xlen    - extent length;
+ *      xlen    - extent length;
- *      xaddrp  - extent address pointer (in/out):
+ *      xaddrp  - extent address pointer (in/out):
- *              if (*xaddrp)
+ *              if (*xaddrp)
- *                      caller allocated data extent at *xaddrp;
+ *                      caller allocated data extent at *xaddrp;
- *              else
+ *              else
- *                      allocate data extent and return its xaddr;
+ *                      allocate data extent and return its xaddr;
- *      flag    -
+ *      flag    -
 *
 * return:
 */
@@ -813,7 +813,7 @@ int xtInsert(tid_t tid,		/* transaction id */
        jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
        /*
-         *      search for the entry location at which to insert:
+         *      search for the entry location at which to insert:
         *
         * xtFastSearch() and xtSearch() both returns (leaf page
         * pinned, index at which to insert).
@@ -853,13 +853,13 @@ int xtInsert(tid_t tid,		/* transaction id */
        }
        /*
-         *      insert entry for new extent
+         *      insert entry for new extent
         */
        xflag |= XAD_NEW;
        /*
-         *      if the leaf page is full, split the page and
+         *      if the leaf page is full, split the page and
-         *      propagate up the router entry for the new page from split
+         *      propagate up the router entry for the new page from split
         *
         * The xtSplitUp() will insert the entry and unpin the leaf page.
         */
@@ -886,7 +886,7 @@ int xtInsert(tid_t tid,		/* transaction id */
        }
        /*
-         *      insert the new entry into the leaf page
+         *      insert the new entry into the leaf page
         */
        /*
         * acquire a transaction lock on the leaf page;
@@ -930,16 +930,16 @@ int xtInsert(tid_t tid,		/* transaction id */
 /*
- *      xtSplitUp()
+ *      xtSplitUp()
 *
 * function:
- *      split full pages as propagating insertion up the tree
+ *      split full pages as propagating insertion up the tree
 *
 * parameter:
- *      tid     - transaction id;
+ *      tid     - transaction id;
- *      ip      - file object;
+ *      ip      - file object;
- *      split   - entry parameter descriptor;
+ *      split   - entry parameter descriptor;
- *      btstack - traverse stack from xtSearch()
+ *      btstack - traverse stack from xtSearch()
 *
 * return:
 */
@@ -1199,22 +1199,22 @@ xtSplitUp(tid_t tid,
 /*
- *      xtSplitPage()
+ *      xtSplitPage()
 *
 * function:
- *      split a full non-root page into
+ *      split a full non-root page into
- *      original/split/left page and new right page
+ *      original/split/left page and new right page
- *      i.e., the original/split page remains as left page.
+ *      i.e., the original/split page remains as left page.
 *
 * parameter:
- *      int             tid,
+ *      int             tid,
- *      struct inode    *ip,
+ *      struct inode    *ip,
- *      struct xtsplit  *split,
+ *      struct xtsplit  *split,
- *      struct metapage **rmpp,
+ *      struct metapage **rmpp,
- *      u64             *rbnp,
+ *      u64             *rbnp,
 *
 * return:
- *      Pointer to page in which to insert or NULL on error.
+ *      Pointer to page in which to insert or NULL on error.
 */
 static int
 xtSplitPage(tid_t tid, struct inode *ip,
@@ -1248,9 +1248,9 @@ xtSplitPage(tid_t tid, struct inode *ip,
        rbn = addressPXD(pxd);
        /* Allocate blocks to quota. */
-       if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
+        if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
-               rc = -EDQUOT;
+                rc = -EDQUOT;
-               goto clean_up;
+                goto clean_up;
        }
        quota_allocation += lengthPXD(pxd);
@@ -1304,7 +1304,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
        skip = split->index;
        /*
-         *      sequential append at tail (after last entry of last page)
+         *      sequential append at tail (after last entry of last page)
         *
         * if splitting the last page on a level because of appending
         * a entry to it (skip is maxentry), it's likely that the access is
@@ -1342,7 +1342,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
        }
        /*
-         *      non-sequential insert (at possibly middle page)
+         *      non-sequential insert (at possibly middle page)
         */
        /*
@@ -1465,25 +1465,24 @@ xtSplitPage(tid_t tid, struct inode *ip,
 /*
- *      xtSplitRoot()
+ *      xtSplitRoot()
 *
 * function:
- *      split the full root page into
+ *      split the full root page into original/root/split page and new
- *      original/root/split page and new right page
+ *      right page
- *      i.e., root remains fixed in tree anchor (inode) and
+ *      i.e., root remains fixed in tree anchor (inode) and the root is
- *      the root is copied to a single new right child page
+ *      copied to a single new right child page since root page <<
- *      since root page << non-root page, and
+ *      non-root page, and the split root page contains a single entry
- *      the split root page contains a single entry for the
+ *      for the new right child page.
- *      new right child page.
 *
 * parameter:
- *      int             tid,
+ *      int             tid,
- *      struct inode    *ip,
+ *      struct inode    *ip,
- *      struct xtsplit  *split,
+ *      struct xtsplit  *split,
- *      struct metapage **rmpp)
+ *      struct metapage **rmpp)
 *
 * return:
- *      Pointer to page in which to insert or NULL on error.
+ *      Pointer to page in which to insert or NULL on error.
 */
 static int
 xtSplitRoot(tid_t tid,
@@ -1505,7 +1504,7 @@ xtSplitRoot(tid_t tid,
        INCREMENT(xtStat.split);
        /*
-         *      allocate a single (right) child page
+         *      allocate a single (right) child page
         */
        pxdlist = split->pxdlist;
        pxd = &pxdlist->pxd[pxdlist->npxd];
@@ -1573,7 +1572,7 @@ xtSplitRoot(tid_t tid,
        }
        /*
-         *      reset the root
+         *      reset the root
         *
         * init root with the single entry for the new right page
         * set the 1st entry offset to 0, which force the left-most key
@@ -1610,7 +1609,7 @@ xtSplitRoot(tid_t tid,
 /*
- *      xtExtend()
+ *      xtExtend()
 *
 * function: extend in-place;
 *
@@ -1677,7 +1676,7 @@ int xtExtend(tid_t tid,		/* transaction id */
                goto extendOld;
        /*
-         *      extent overflow: insert entry for new extent
+         *      extent overflow: insert entry for new extent
         */
 //insertNew:
        xoff = offsetXAD(xad) + MAXXLEN;
@@ -1685,8 +1684,8 @@ int xtExtend(tid_t tid,		/* transaction id */
        nextindex = le16_to_cpu(p->header.nextindex);
        /*
-         *      if the leaf page is full, insert the new entry and
+         *      if the leaf page is full, insert the new entry and
-         *      propagate up the router entry for the new page from split
+         *      propagate up the router entry for the new page from split
         *
         * The xtSplitUp() will insert the entry and unpin the leaf page.
         */
@@ -1731,7 +1730,7 @@ int xtExtend(tid_t tid,		/* transaction id */
                }
        }
        /*
-         *      insert the new entry into the leaf page
+         *      insert the new entry into the leaf page
         */
        else {
                /* insert the new entry: mark the entry NEW */
@@ -1771,11 +1770,11 @@ int xtExtend(tid_t tid,		/* transaction id */
 #ifdef _NOTYET
 /*
- *      xtTailgate()
+ *      xtTailgate()
 *
 * function: split existing 'tail' extent
- *      (split offset >= start offset of tail extent), and
+ *      (split offset >= start offset of tail extent), and
- *      relocate and extend the split tail half;
+ *      relocate and extend the split tail half;
 *
 * note: existing extent may or may not have been committed.
 * caller is responsible for pager buffer cache update, and
@@ -1804,7 +1803,7 @@ int xtTailgate(tid_t tid,		/* transaction id */
 /*
 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
-        (ulong)xoff, xlen, (ulong)xaddr);
+        (ulong)xoff, xlen, (ulong)xaddr);
 */
        /* there must exist extent to be tailgated */
@@ -1842,18 +1841,18 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
        xad = &p->xad[index];
 /*
 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
-        (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
+        (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
 */
        if ((llen = xoff - offsetXAD(xad)) == 0)
                goto updateOld;
        /*
-         *      partially replace extent: insert entry for new extent
+         *      partially replace extent: insert entry for new extent
         */
 //insertNew:
        /*
-         *      if the leaf page is full, insert the new entry and
+         *      if the leaf page is full, insert the new entry and
-         *      propagate up the router entry for the new page from split
+         *      propagate up the router entry for the new page from split
         *
         * The xtSplitUp() will insert the entry and unpin the leaf page.
         */
@@ -1898,7 +1897,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
                }
        }
        /*
-         *      insert the new entry into the leaf page
+         *      insert the new entry into the leaf page
         */
        else {
                /* insert the new entry: mark the entry NEW */
@@ -1955,17 +1954,17 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
 #endif /* _NOTYET */
 /*
- *      xtUpdate()
+ *      xtUpdate()
 *
 * function: update XAD;
 *
- *      update extent for allocated_but_not_recorded or
+ *      update extent for allocated_but_not_recorded or
- *      compressed extent;
+ *      compressed extent;
 *
 * parameter:
- *      nxad    - new XAD;
+ *      nxad    - new XAD;
- *                logical extent of the specified XAD must be completely
+ *              logical extent of the specified XAD must be completely
- *                contained by an existing XAD;
+ *              contained by an existing XAD;
 */
 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
 {                               /* new XAD */
@@ -2416,19 +2415,19 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p);
 /*
- *      xtAppend()
+ *      xtAppend()
 *
 * function: grow in append mode from contiguous region specified ;
 *
 * parameter:
- *      tid             - transaction id;
+ *      tid             - transaction id;
- *      ip              - file object;
+ *      ip              - file object;
- *      xflag           - extent flag:
+ *      xflag           - extent flag:
- *      xoff            - extent offset;
+ *      xoff            - extent offset;
- *      maxblocks       - max extent length;
+ *      maxblocks       - max extent length;
- *      xlen            - extent length (in/out);
+ *      xlen            - extent length (in/out);
- *      xaddrp          - extent address pointer (in/out):
+ *      xaddrp          - extent address pointer (in/out):
- *      flag            -
+ *      flag            -
 *
 * return:
 */
@@ -2460,7 +2459,7 @@ int xtAppend(tid_t tid,		/* transaction id */
                 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
        /*
-         *      search for the entry location at which to insert:
+         *      search for the entry location at which to insert:
         *
         * xtFastSearch() and xtSearch() both returns (leaf page
         * pinned, index at which to insert).
@@ -2482,13 +2481,13 @@ int xtAppend(tid_t tid,		/* transaction id */
                xlen = min(xlen, (int)(next - xoff));
 //insert:
        /*
-         *      insert entry for new extent
+         *      insert entry for new extent
         */
        xflag |= XAD_NEW;
        /*
-         *      if the leaf page is full, split the page and
+         *      if the leaf page is full, split the page and
-         *      propagate up the router entry for the new page from split
+         *      propagate up the router entry for the new page from split
         *
         * The xtSplitUp() will insert the entry and unpin the leaf page.
         */
@@ -2545,7 +2544,7 @@ int xtAppend(tid_t tid,		/* transaction id */
        return 0;
        /*
-         *      insert the new entry into the leaf page
+         *      insert the new entry into the leaf page
         */
      insertLeaf:
        /*
@@ -2589,17 +2588,17 @@ int xtAppend(tid_t tid,		/* transaction id */
 /* - TBD for defragmentaion/reorganization -
 *
- *      xtDelete()
+ *      xtDelete()
 *
 * function:
- *      delete the entry with the specified key.
+ *      delete the entry with the specified key.
 *
- *      N.B.: whole extent of the entry is assumed to be deleted.
+ *      N.B.: whole extent of the entry is assumed to be deleted.
 *
 * parameter:
 *
 * return:
- *       ENOENT: if the entry is not found.
+ *      ENOENT: if the entry is not found.
 *
 * exception:
 */
@@ -2665,10 +2664,10 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
 /* - TBD for defragmentaion/reorganization -
 *
- *      xtDeleteUp()
+ *      xtDeleteUp()
 *
 * function:
- *      free empty pages as propagating deletion up the tree
+ *      free empty pages as propagating deletion up the tree
 *
 * parameter:
 *
@@ -2815,15 +2814,15 @@ xtDeleteUp(tid_t tid, struct inode *ip,
 /*
- * NAME:        xtRelocate()
+ * NAME:        xtRelocate()
 *
- * FUNCTION:    relocate xtpage or data extent of regular file;
+ * FUNCTION:    relocate xtpage or data extent of regular file;
- *              This function is mainly used by defragfs utility.
+ *              This function is mainly used by defragfs utility.
 *
- * NOTE:        This routine does not have the logic to handle
+ * NOTE:        This routine does not have the logic to handle
- *              uncommitted allocated extent. The caller should call
+ *              uncommitted allocated extent. The caller should call
- *              txCommit() to commit all the allocation before call
+ *              txCommit() to commit all the allocation before call
- *              this routine.
+ *              this routine.
 */
 int
 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,  /* old XAD */
@@ -2865,8 +2864,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
                 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
        /*
-         *      1. get and validate the parent xtpage/xad entry
+         *      1. get and validate the parent xtpage/xad entry
-         *      covering the source extent to be relocated;
+         *      covering the source extent to be relocated;
         */
        if (xtype == DATAEXT) {
                /* search in leaf entry */
@@ -2910,7 +2909,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
        jfs_info("xtRelocate: parent xad entry validated.");
        /*
-         *      2. relocate the extent
+         *      2. relocate the extent
         */
        if (xtype == DATAEXT) {
                /* if the extent is allocated-but-not-recorded
@@ -2923,7 +2922,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
                        XT_PUTPAGE(pmp);
                /*
-                 *      cmRelocate()
+                 *      cmRelocate()
                 *
                 * copy target data pages to be relocated;
                 *
@@ -2945,8 +2944,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
                pno = offset >> CM_L2BSIZE;
                npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
 /*
-                npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
+                npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
-                         (offset >> CM_L2BSIZE) + 1;
+                          (offset >> CM_L2BSIZE) + 1;
 */
                sxaddr = oxaddr;
                dxaddr = nxaddr;
@@ -2981,7 +2980,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
                jfs_info("xtRelocate: target data extent relocated.");
-        } else {                /* (xtype  == XTPAGE) */
+        } else {                /* (xtype == XTPAGE) */
                /*
                 * read in the target xtpage from the source extent;
@@ -3026,16 +3025,14 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
                 */
                if (lmp) {
                        BT_MARK_DIRTY(lmp, ip);
-                        tlck =
+                        tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
-                            txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
                        lp->header.next = cpu_to_le64(nxaddr);
                        XT_PUTPAGE(lmp);
                }
                if (rmp) {
                        BT_MARK_DIRTY(rmp, ip);
-                        tlck =
+                        tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
-                            txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
                        rp->header.prev = cpu_to_le64(nxaddr);
                        XT_PUTPAGE(rmp);
                }
@@ -3062,7 +3059,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
                 * scan may be skipped by commit() and logredo();
                 */
                BT_MARK_DIRTY(mp, ip);
-                /* tlckNEW init  xtlck->lwm.offset = XTENTRYSTART; */
+                /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
                xtlck = (struct xtlock *) & tlck->lock;
@@ -3084,7 +3081,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
        }
        /*
-         *      3. acquire maplock for the source extent to be freed;
+         *      3. acquire maplock for the source extent to be freed;
         *
         * acquire a maplock saving the src relocated extent address;
         * to free of the extent at commit time;
@@ -3105,7 +3102,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
         *      is no buffer associated with this lock since the buffer
         *      has been redirected to the target location.
         */
-        else                    /* (xtype  == XTPAGE) */
+        else                    /* (xtype == XTPAGE) */
                tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
        pxdlock = (struct pxd_lock *) & tlck->lock;
@@ -3115,7 +3112,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
        pxdlock->index = 1;
        /*
-         *      4. update the parent xad entry for relocation;
+         *      4. update the parent xad entry for relocation;
         *
         * acquire tlck for the parent entry with XAD_NEW as entry
         * update which will write LOG_REDOPAGE and update bmap for
@@ -3143,22 +3140,22 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 /*
- *      xtSearchNode()
+ *      xtSearchNode()
 *
- * function:    search for the internal xad entry covering specified extent.
+ * function:    search for the internal xad entry covering specified extent.
- *              This function is mainly used by defragfs utility.
+ *              This function is mainly used by defragfs utility.
 *
 * parameters:
- *      ip      - file object;
+ *      ip      - file object;
- *      xad     - extent to find;
+ *      xad     - extent to find;
- *      cmpp    - comparison result:
+ *      cmpp    - comparison result:
- *      btstack - traverse stack;
+ *      btstack - traverse stack;
- *      flag    - search process flag;
+ *      flag    - search process flag;
 *
 * returns:
- *      btstack contains (bn, index) of search path traversed to the entry.
+ *      btstack contains (bn, index) of search path traversed to the entry.
- *      *cmpp is set to result of comparison with the entry returned.
+ *      *cmpp is set to result of comparison with the entry returned.
- *      the page containing the entry is pinned at exit.
+ *      the page containing the entry is pinned at exit.
 */
 static int xtSearchNode(struct inode *ip, xad_t * xad,  /* required XAD entry */
                        int *cmpp, struct btstack * btstack, int flag)
@@ -3181,7 +3178,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
        xaddr = addressXAD(xad);
        /*
-         *      search down tree from root:
+         *      search down tree from root:
         *
         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -3217,7 +3214,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
                        XT_CMP(cmp, xoff, &p->xad[index], t64);
                        if (cmp == 0) {
                                /*
-                                 *      search hit
+                                 *      search hit
                                 *
                                 * verify for exact match;
                                 */
@@ -3245,7 +3242,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
                }
                /*
-                 *      search miss - non-leaf page:
+                 *      search miss - non-leaf page:
                 *
                 * base is the smallest index with key (Kj) greater than
                 * search key (K) and may be zero or maxentry index.
@@ -3268,15 +3265,15 @@ static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
 /*
- *      xtRelink()
+ *      xtRelink()
 *
 * function:
- *      link around a freed page.
+ *      link around a freed page.
 *
 * Parameter:
- *      int           tid,
+ *      int             tid,
- *      struct inode    *ip,
+ *      struct inode    *ip,
- *      xtpage_t        *p)
+ *      xtpage_t        *p)
 *
 * returns:
 */
@@ -3338,7 +3335,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
 /*
- *      xtInitRoot()
+ *      xtInitRoot()
 *
 * initialize file root (inline in inode)
 */
@@ -3385,42 +3382,42 @@ void xtInitRoot(tid_t tid, struct inode *ip)
 #define MAX_TRUNCATE_LEAVES 50
 /*
- *      xtTruncate()
+ *      xtTruncate()
 *
 * function:
- *      traverse for truncation logging backward bottom up;
+ *      traverse for truncation logging backward bottom up;
- *      terminate at the last extent entry at the current subtree
+ *      terminate at the last extent entry at the current subtree
- *      root page covering new down size.
+ *      root page covering new down size.
- *      truncation may occur within the last extent entry.
+ *      truncation may occur within the last extent entry.
 *
 * parameter:
- *      int           tid,
+ *      int             tid,
- *      struct inode    *ip,
+ *      struct inode    *ip,
- *      s64           newsize,
+ *      s64             newsize,
- *      int           type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
+ *      int             type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
 *
 * return:
 *
 * note:
- *      PWMAP:
+ *      PWMAP:
- *       1. truncate (non-COMMIT_NOLINK file)
+ *       1. truncate (non-COMMIT_NOLINK file)
- *          by jfs_truncate() or jfs_open(O_TRUNC):
+ *          by jfs_truncate() or jfs_open(O_TRUNC):
- *          xtree is updated;
+ *          xtree is updated;
 *       2. truncate index table of directory when last entry removed
- *       map update via tlock at commit time;
+ *      map update via tlock at commit time;
- *      PMAP:
+ *      PMAP:
 *       Call xtTruncate_pmap instead
- *      WMAP:
+ *      WMAP:
- *       1. remove (free zero link count) on last reference release
+ *       1. remove (free zero link count) on last reference release
- *          (pmap has been freed at commit zero link count);
+ *          (pmap has been freed at commit zero link count);
- *       2. truncate (COMMIT_NOLINK file, i.e., tmp file):
+ *       2. truncate (COMMIT_NOLINK file, i.e., tmp file):
- *          xtree is updated;
+ *          xtree is updated;
- *       map update directly at truncation time;
+ *       map update directly at truncation time;
 *
- *      if (DELETE)
+ *      if (DELETE)
- *              no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
+ *              no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
- *      else if (TRUNCATE)
+ *      else if (TRUNCATE)
- *              must write LOG_NOREDOPAGE for deleted index page;
+ *              must write LOG_NOREDOPAGE for deleted index page;
 *
 * pages may already have been tlocked by anonymous transactions
 * during file growth (i.e., write) before truncation;
@@ -3493,7 +3490,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
         * retained in the new sized file.
         * if type is PMAP, the data and index pages are NOT
         * freed, and the data and index blocks are NOT freed
-         * from  working map.
+         * from working map.
         * (this will allow continued access of data/index of
         * temporary file (zerolink count file truncated to zero-length)).
         */
@@ -3542,7 +3539,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
                goto getChild;
        /*
-         *      leaf page
+         *      leaf page
         */
        freed = 0;
@@ -3916,7 +3913,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
        }
        /*
-         *      internal page: go down to child page of current entry
+         *      internal page: go down to child page of current entry
         */
      getChild:
        /* save current parent entry for the child page */
@@ -3965,7 +3962,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 /*
- *      xtTruncate_pmap()
+ *      xtTruncate_pmap()
 *
 * function:
 *      Perform truncate to zero lenghth for deleted file, leaving the
@@ -3974,9 +3971,9 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 *      is committed to disk.
 *
 * parameter:
- *      tid_t           tid,
+ *      tid_t           tid,
- *      struct inode    *ip,
+ *      struct inode    *ip,
- *      s64             committed_size)
+ *      s64             committed_size)
 *
 * return: new committed size
 *
@@ -4050,7 +4047,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
        }
        /*
-         *      leaf page
+         *      leaf page
         */
        if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
@@ -4062,7 +4059,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
                xoff = offsetXAD(xad);
                xlen = lengthXAD(xad);
                XT_PUTPAGE(mp);
-                return  (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
+                return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
        }
        tlck = txLock(tid, ip, mp, tlckXTREE);
        tlck->type = tlckXTREE | tlckFREE;
@@ -4099,8 +4096,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
                 */
                tlck = txLock(tid, ip, mp, tlckXTREE);
                xtlck = (struct xtlock *) & tlck->lock;
-                xtlck->hwm.offset =
+                xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
-                    le16_to_cpu(p->header.nextindex) - 1;
                tlck->type = tlckXTREE | tlckFREE;
                XT_PUTPAGE(mp);
@@ -4118,7 +4114,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
        else
                index--;
        /*
-         *      internal page: go down to child page of current entry
+         *      internal page: go down to child page of current entry
         */
      getChild:
        /* save current parent entry for the child page */
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index 164f6f2b1019..70815c8a3d6a 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -19,14 +19,14 @@
 #define _H_JFS_XTREE
 /*
- *      jfs_xtree.h: extent allocation descriptor B+-tree manager
+ *      jfs_xtree.h: extent allocation descriptor B+-tree manager
 */
 #include "jfs_btree.h"
 /*
- *      extent allocation descriptor (xad)
+ *      extent allocation descriptor (xad)
 */
 typedef struct xad {
        unsigned flag:8;        /* 1: flag */
@@ -38,30 +38,30 @@ typedef struct xad {
        __le32 addr2;           /* 4: address in unit of fsblksize */
 } xad_t;                        /* (16) */
-#define MAXXLEN         ((1 << 24) - 1)
+#define MAXXLEN         ((1 << 24) - 1)
-#define XTSLOTSIZE      16
+#define XTSLOTSIZE      16
-#define L2XTSLOTSIZE    4
+#define L2XTSLOTSIZE    4
 /* xad_t field construction */
 #define XADoffset(xad, offset64)\
 {\
-        (xad)->off1 = ((u64)offset64) >> 32;\
+        (xad)->off1 = ((u64)offset64) >> 32;\
-        (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\
+        (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\
 }
 #define XADaddress(xad, address64)\
 {\
-        (xad)->addr1 = ((u64)address64) >> 32;\
+        (xad)->addr1 = ((u64)address64) >> 32;\
-        (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
+        (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
 }
-#define XADlength(xad, length32)        (xad)->len = __cpu_to_le24(length32)
+#define XADlength(xad, length32)        (xad)->len = __cpu_to_le24(length32)
 /* xad_t field extraction */
 #define offsetXAD(xad)\
-        ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2))
+        ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2))
 #define addressXAD(xad)\
-        ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2))
+        ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2))
-#define lengthXAD(xad)  __le24_to_cpu((xad)->len)
+#define lengthXAD(xad)  __le24_to_cpu((xad)->len)
 /* xad list */
 struct xadlist {
@@ -71,22 +71,22 @@ struct xadlist {
 };
 /* xad_t flags */
-#define XAD_NEW         0x01    /* new */
+#define XAD_NEW         0x01    /* new */
-#define XAD_EXTENDED    0x02    /* extended */
+#define XAD_EXTENDED    0x02    /* extended */
-#define XAD_COMPRESSED  0x04    /* compressed with recorded length */
+#define XAD_COMPRESSED  0x04    /* compressed with recorded length */
 #define XAD_NOTRECORDED 0x08    /* allocated but not recorded */
-#define XAD_COW         0x10    /* copy-on-write */
+#define XAD_COW         0x10    /* copy-on-write */
 /* possible values for maxentry */
-#define XTROOTINITSLOT_DIR  6
+#define XTROOTINITSLOT_DIR 6
-#define XTROOTINITSLOT  10
+#define XTROOTINITSLOT  10
-#define XTROOTMAXSLOT   18
+#define XTROOTMAXSLOT   18
-#define XTPAGEMAXSLOT   256
+#define XTPAGEMAXSLOT   256
-#define XTENTRYSTART    2
+#define XTENTRYSTART    2
 /*
- *      xtree page:
+ *      xtree page:
 */
 typedef union {
        struct xtheader {
@@ -106,7 +106,7 @@ typedef union {
 } xtpage_t;
 /*
- *      external declaration
+ *      external declaration
 */
 extern int xtLookup(struct inode *ip, s64 lstart, s64 llen,
                    int *pflag, s64 * paddr, int *plen, int flag);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 41c204771262..25161c4121e4 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -328,7 +328,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 *              dentry  - child directory dentry
 *
 * RETURN:      -EINVAL - if name is . or ..
- *              -EINVAL  - if . or .. exist but are invalid.
+ *              -EINVAL - if . or .. exist but are invalid.
 *              errors from subroutines
 *
 * note:
@@ -517,7 +517,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
        inode_dec_link_count(ip);
        /*
-         *      commit zero link count object
+         *      commit zero link count object
         */
        if (ip->i_nlink == 0) {
                assert(!test_cflag(COMMIT_Nolink, ip));
@@ -596,7 +596,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 /*
 * NAME:        commitZeroLink()
 *
- * FUNCTION:    for non-directory, called by jfs_remove(),
+ * FUNCTION:    for non-directory, called by jfs_remove(),
 *              truncate a regular file, directory or symbolic
 *              link to zero length. return 0 if type is not
 *              one of these.
@@ -676,7 +676,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip)
 /*
 * NAME:        jfs_free_zero_link()
 *
- * FUNCTION:    for non-directory, called by iClose(),
+ * FUNCTION:    for non-directory, called by iClose(),
 *              free resources of a file from cache and WORKING map
 *              for a file previously committed with zero link count
 *              while associated with a pager object,
@@ -855,12 +855,12 @@ static int jfs_link(struct dentry *old_dentry,
 * NAME:        jfs_symlink(dip, dentry, name)
 *
 * FUNCTION:    creates a symbolic link to <symlink> by name <name>
- *                      in directory <dip>
+ *                      in directory <dip>
 *
- * PARAMETER:   dip         - parent directory vnode
+ * PARAMETER:   dip     - parent directory vnode
- *                      dentry  - dentry of symbolic link
+ *              dentry  - dentry of symbolic link
- *                      name    - the path name of the existing object
+ *              name    - the path name of the existing object
- *                                    that will be the source of the link
+ *                        that will be the source of the link
 *
 * RETURN:      errors from subroutines
 *
@@ -1052,9 +1052,9 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 /*
- * NAME:        jfs_rename
+ * NAME:        jfs_rename
 *
- * FUNCTION:    rename a file or directory
+ * FUNCTION:    rename a file or directory
 */
 static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
               struct inode *new_dir, struct dentry *new_dentry)
@@ -1331,9 +1331,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 /*
- * NAME:        jfs_mknod
+ * NAME:        jfs_mknod
 *
- * FUNCTION:    Create a special file (device)
+ * FUNCTION:    Create a special file (device)
 */
 static int jfs_mknod(struct inode *dir, struct dentry *dentry,
                int mode, dev_t rdev)
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 79d625f3f733..71984ee95346 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -29,17 +29,17 @@
 #include "jfs_txnmgr.h"
 #include "jfs_debug.h"
-#define BITSPERPAGE     (PSIZE << 3)
+#define BITSPERPAGE     (PSIZE << 3)
-#define L2MEGABYTE      20
+#define L2MEGABYTE      20
-#define MEGABYTE        (1 << L2MEGABYTE)
+#define MEGABYTE        (1 << L2MEGABYTE)
-#define MEGABYTE32     (MEGABYTE << 5)
+#define MEGABYTE32      (MEGABYTE << 5)
 /* convert block number to bmap file page number */
 #define BLKTODMAPN(b)\
-        (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
+        (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
 /*
- *      jfs_extendfs()
+ *      jfs_extendfs()
 *
 * function: extend file system;
 *
@@ -48,9 +48,9 @@
 *                                   workspace  space
 *
 * input:
- *      new LVSize: in LV blocks (required)
+ *      new LVSize: in LV blocks (required)
- *      new LogSize: in LV blocks (optional)
+ *      new LogSize: in LV blocks (optional)
- *      new FSSize: in LV blocks (optional)
+ *      new FSSize: in LV blocks (optional)
 *
 * new configuration:
 * 1. set new LogSize as specified or default from new LVSize;
@@ -125,8 +125,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        }
        /*
-         *      reconfigure LV spaces
+         *      reconfigure LV spaces
-         *      ---------------------
+         *      ---------------------
         *
         * validate new size, or, if not specified, determine new size
         */
@@ -198,7 +198,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
                log_formatted = 1;
        }
        /*
-         *      quiesce file system
+         *      quiesce file system
         *
         * (prepare to move the inline log and to prevent map update)
         *
@@ -270,8 +270,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        }
        /*
-         *      extend block allocation map
+         *      extend block allocation map
-         *      ---------------------------
+         *      ---------------------------
         *
         * extendfs() for new extension, retry after crash recovery;
         *
@@ -283,7 +283,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
         *  s_size: aggregate size in physical blocks;
         */
        /*
-         *      compute the new block allocation map configuration
+         *      compute the new block allocation map configuration
         *
         * map dinode:
         *  di_size: map file size in byte;
@@ -301,7 +301,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        newNpages = BLKTODMAPN(t64) + 1;
        /*
-         *      extend map from current map (WITHOUT growing mapfile)
+         *      extend map from current map (WITHOUT growing mapfile)
         *
         * map new extension with unmapped part of the last partial
         * dmap page, if applicable, and extra page(s) allocated
@@ -341,8 +341,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        XSize -= nblocks;
        /*
-         *      grow map file to cover remaining extension
+         *      grow map file to cover remaining extension
-         *      and/or one extra dmap page for next extendfs();
+         *      and/or one extra dmap page for next extendfs();
         *
         * allocate new map pages and its backing blocks, and
         * update map file xtree
@@ -422,8 +422,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        dbFinalizeBmap(ipbmap);
        /*
-         *      update inode allocation map
+         *      update inode allocation map
-         *      ---------------------------
+         *      ---------------------------
         *
         * move iag lists from old to new iag;
         * agstart field is not updated for logredo() to reconstruct
@@ -442,8 +442,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        }
        /*
-         *      finalize
+         *      finalize
-         *      --------
+         *      --------
         *
         * extension is committed when on-disk super block is
         * updated with new descriptors: logredo will recover
@@ -480,7 +480,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        diFreeSpecial(ipbmap2);
        /*
-         *      update superblock
+         *      update superblock
         */
        if ((rc = readSuper(sb, &bh)))
                goto error_out;
@@ -530,7 +530,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
      resume:
        /*
-         *      resume file system transactions
+         *      resume file system transactions
         */
        txResume(sb);
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index b753ba216450..b2375f0774b7 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -63,9 +63,9 @@
 *
 *   On-disk:
 *
- *     FEALISTs are stored on disk using blocks allocated by dbAlloc() and
+ *      FEALISTs are stored on disk using blocks allocated by dbAlloc() and
- *     written directly. An EA list may be in-lined in the inode if there is
+ *      written directly. An EA list may be in-lined in the inode if there is
- *     sufficient room available.
+ *      sufficient room available.
 */
 struct ea_buffer {
@@ -590,7 +590,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
      size_check:
        if (EALIST_SIZE(ea_buf->xattr) != ea_size) {
                printk(KERN_ERR "ea_get: invalid extended attribute\n");
-                dump_mem("xattr", ea_buf->xattr, ea_size);
+                print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1,
+                                     ea_buf->xattr, ea_size, 1);
                ea_release(inode, ea_buf);
                rc = -EIO;
                goto clean_up;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 96070bff93fc..572601e98dcd 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -44,9 +44,8 @@ static struct nsm_handle *	nsm_find(const struct sockaddr_in *sin,
 */
 static struct nlm_host *
 nlm_lookup_host(int server, const struct sockaddr_in *sin,
-                                        int proto, int version,
+                int proto, int version, const char *hostname,
-                                        const char *hostname,
+                int hostname_len, const struct sockaddr_in *ssin)
-                                        int hostname_len)
 {
        struct hlist_head *chain;
        struct hlist_node *pos;
@@ -54,7 +53,9 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
        struct nsm_handle *nsm = NULL;
        int             hash;
-        dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
+        dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT
+                        ", p=%d, v=%d, my role=%s, name=%.*s)\n",
+                        NIPQUAD(ssin->sin_addr.s_addr),
                        NIPQUAD(sin->sin_addr.s_addr), proto, version,
                        server? "server" : "client",
                        hostname_len,
@@ -91,6 +92,8 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
                        continue;
                if (host->h_server != server)
                        continue;
+                if (!nlm_cmp_addr(&host->h_saddr, ssin))
+                        continue;
                /* Move to head of hash chain. */
                hlist_del(&host->h_hash);
@@ -118,6 +121,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
        host->h_name       = nsm->sm_name;
        host->h_addr       = *sin;
        host->h_addr.sin_port = 0;      /* ouch! */
+        host->h_saddr      = *ssin;
        host->h_version    = version;
        host->h_proto      = proto;
        host->h_rpcclnt    = NULL;
@@ -161,15 +165,9 @@ nlm_destroy_host(struct nlm_host *host)
         */
        nsm_unmonitor(host);
-        if ((clnt = host->h_rpcclnt) != NULL) {
+        clnt = host->h_rpcclnt;
-                if (atomic_read(&clnt->cl_users)) {
+        if (clnt != NULL)
-                        printk(KERN_WARNING
+                rpc_shutdown_client(clnt);
-                                "lockd: active RPC handle\n");
-                        clnt->cl_dead = 1;
-                } else {
-                        rpc_destroy_client(host->h_rpcclnt);
-                }
-        }
        kfree(host);
 }
@@ -180,8 +178,10 @@ struct nlm_host *
 nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
                        const char *hostname, int hostname_len)
 {
+        struct sockaddr_in ssin = {0};
        return nlm_lookup_host(0, sin, proto, version,
-                               hostname, hostname_len);
+                               hostname, hostname_len, &ssin);
 }
 /*
@@ -191,9 +191,12 @@ struct nlm_host *
 nlmsvc_lookup_host(struct svc_rqst *rqstp,
                        const char *hostname, int hostname_len)
 {
+        struct sockaddr_in ssin = {0};
+        ssin.sin_addr = rqstp->rq_daddr.addr;
        return nlm_lookup_host(1, svc_addr_in(rqstp),
                               rqstp->rq_prot, rqstp->rq_vers,
-                               hostname, hostname_len);
+                               hostname, hostname_len, &ssin);
 }
 /*
@@ -204,8 +207,9 @@ nlm_bind_host(struct nlm_host *host)
 {
        struct rpc_clnt *clnt;
-        dprintk("lockd: nlm_bind_host(%08x)\n",
+        dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n",
-                        (unsigned)ntohl(host->h_addr.sin_addr.s_addr));
+                        NIPQUAD(host->h_saddr.sin_addr),
+                        NIPQUAD(host->h_addr.sin_addr));
        /* Lock host handle */
        mutex_lock(&host->h_mutex);
@@ -232,6 +236,7 @@ nlm_bind_host(struct nlm_host *host)
                        .protocol       = host->h_proto,
                        .address        = (struct sockaddr *)&host->h_addr,
                        .addrsize       = sizeof(host->h_addr),
+                        .saddress       = (struct sockaddr *)&host->h_saddr,
                        .timeout        = &timeparms,
                        .servername     = host->h_name,
                        .program        = &nlm_program,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 2102e2d0134d..3353ed8421a7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -61,6 +61,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
                        status);
        else
                status = 0;
+        rpc_shutdown_client(clnt);
 out:
        return status;
 }
@@ -138,7 +139,6 @@ nsm_create(void)
                .program        = &nsm_program,
                .version        = SM_VERSION,
                .authflavor     = RPC_AUTH_NULL,
-                .flags          = (RPC_CLNT_CREATE_ONESHOT),
        };
        return rpc_create(&args);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 126b1bf02c0e..26809325469c 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -123,9 +123,6 @@ lockd(struct svc_rqst *rqstp)
        /* Process request with signals blocked, but allow SIGKILL.  */
        allow_signal(SIGKILL);
-        /* kick rpciod */
-        rpciod_up();
        dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
        if (!nlm_timeout)
@@ -202,9 +199,6 @@ lockd(struct svc_rqst *rqstp)
        /* Exit the RPC thread */
        svc_exit_thread(rqstp);
-        /* release rpciod */
-        rpciod_down();
        /* Release module */
        unlock_kernel();
        module_put_and_exit(0);
diff --git a/fs/minix/file.c b/fs/minix/file.c
index f92baa1d7570..17765f697e50 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -23,7 +23,7 @@ const struct file_operations minix_file_operations = {
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
        .fsync          = minix_sync_file,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 const struct inode_operations minix_file_inode_operations = {
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index f4580b44eef4..b55cb236cf74 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,8 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
 nfs-y                   := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
                           pagelist.o proc.o read.o symlink.o unlink.o \
-                           write.o namespace.o
+                           write.o namespace.o mount_clnt.o
-nfs-$(CONFIG_ROOT_NFS)  += nfsroot.o mount_clnt.o      
+nfs-$(CONFIG_ROOT_NFS)  += nfsroot.o
 nfs-$(CONFIG_NFS_V3)    += nfs3proc.o nfs3xdr.o
 nfs-$(CONFIG_NFS_V3_ACL)        += nfs3acl.o
 nfs-$(CONFIG_NFS_V4)    += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 881fa4900923..ccb455053ee4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -102,19 +102,10 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
                                           int nfsversion)
 {
        struct nfs_client *clp;
-        int error;
        if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
                goto error_0;
-        error = rpciod_up();
-        if (error < 0) {
-                dprintk("%s: couldn't start rpciod! Error = %d\n",
-                                __FUNCTION__, error);
-                goto error_1;
-        }
-        __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
        if (nfsversion == 4) {
                if (nfs_callback_up() < 0)
                        goto error_2;
@@ -139,8 +130,6 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 #ifdef CONFIG_NFS_V4
        init_rwsem(&clp->cl_sem);
        INIT_LIST_HEAD(&clp->cl_delegations);
-        INIT_LIST_HEAD(&clp->cl_state_owners);
-        INIT_LIST_HEAD(&clp->cl_unused);
        spin_lock_init(&clp->cl_lock);
        INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
        rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -154,9 +143,6 @@ error_3:
        if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
                nfs_callback_down();
 error_2:
-        rpciod_down();
-        __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
-error_1:
        kfree(clp);
 error_0:
        return NULL;
@@ -167,16 +153,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 #ifdef CONFIG_NFS_V4
        if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
                nfs4_kill_renewd(clp);
-        while (!list_empty(&clp->cl_unused)) {
+        BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
-                struct nfs4_state_owner *sp;
-                sp = list_entry(clp->cl_unused.next,
-                                struct nfs4_state_owner,
-                                so_list);
-                list_del(&sp->so_list);
-                kfree(sp);
-        }
-        BUG_ON(!list_empty(&clp->cl_state_owners));
        if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
                nfs_idmap_delete(clp);
 #endif
@@ -198,9 +175,6 @@ static void nfs_free_client(struct nfs_client *clp)
        if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
                nfs_callback_down();
-        if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
-                rpciod_down();
        kfree(clp->cl_hostname);
        kfree(clp);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7f37d1bea83f..20ac403469a0 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -27,6 +27,13 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
        kfree(delegation);
 }
+static void nfs_free_delegation_callback(struct rcu_head *head)
+{
+        struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu);
+        nfs_free_delegation(delegation);
+}
 static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
        struct inode *inode = state->inode;
@@ -57,7 +64,7 @@ out_err:
        return status;
 }
-static void nfs_delegation_claim_opens(struct inode *inode)
+static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_open_context *ctx;
@@ -72,9 +79,11 @@ again:
                        continue;
                if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
                        continue;
+                if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
+                        continue;
                get_nfs_open_context(ctx);
                spin_unlock(&inode->i_lock);
-                err = nfs4_open_delegation_recall(ctx->dentry, state);
+                err = nfs4_open_delegation_recall(ctx, state, stateid);
                if (err >= 0)
                        err = nfs_delegation_claim_locks(ctx, state);
                put_nfs_open_context(ctx);
@@ -115,10 +124,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
        struct nfs_delegation *delegation;
        int status = 0;
-        /* Ensure we first revalidate the attributes and page cache! */
-        if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)))
-                __nfs_revalidate_inode(NFS_SERVER(inode), inode);
        delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
        if (delegation == NULL)
                return -ENOMEM;
@@ -131,10 +136,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
        delegation->inode = inode;
        spin_lock(&clp->cl_lock);
-        if (nfsi->delegation == NULL) {
+        if (rcu_dereference(nfsi->delegation) == NULL) {
-                list_add(&delegation->super_list, &clp->cl_delegations);
+                list_add_rcu(&delegation->super_list, &clp->cl_delegations);
-                nfsi->delegation = delegation;
                nfsi->delegation_state = delegation->type;
+                rcu_assign_pointer(nfsi->delegation, delegation);
                delegation = NULL;
        } else {
                if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
@@ -145,6 +150,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
                        status = -EIO;
                }
        }
+        /* Ensure we revalidate the attributes and page cache! */
+        spin_lock(&inode->i_lock);
+        nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
+        spin_unlock(&inode->i_lock);
        spin_unlock(&clp->cl_lock);
        kfree(delegation);
        return status;
@@ -155,7 +166,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
        int res = 0;
        res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
-        nfs_free_delegation(delegation);
+        call_rcu(&delegation->rcu, nfs_free_delegation_callback);
        return res;
 }
@@ -170,33 +181,55 @@ static void nfs_msync_inode(struct inode *inode)
 /*
 * Basic procedure for returning a delegation to the server
 */
-int __nfs_inode_return_delegation(struct inode *inode)
+static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
 {
        struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
        struct nfs_inode *nfsi = NFS_I(inode);
-        struct nfs_delegation *delegation;
-        int res = 0;
        nfs_msync_inode(inode);
        down_read(&clp->cl_sem);
        /* Guard against new delegated open calls */
        down_write(&nfsi->rwsem);
-        spin_lock(&clp->cl_lock);
+        nfs_delegation_claim_opens(inode, &delegation->stateid);
-        delegation = nfsi->delegation;
-        if (delegation != NULL) {
-                list_del_init(&delegation->super_list);
-                nfsi->delegation = NULL;
-                nfsi->delegation_state = 0;
-        }
-        spin_unlock(&clp->cl_lock);
-        nfs_delegation_claim_opens(inode);
        up_write(&nfsi->rwsem);
        up_read(&clp->cl_sem);
        nfs_msync_inode(inode);
-        if (delegation != NULL)
+        return nfs_do_return_delegation(inode, delegation);
-                res = nfs_do_return_delegation(inode, delegation);
+}
-        return res;
+static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
+{
+        struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
+        if (delegation == NULL)
+                goto nomatch;
+        if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
+                                sizeof(delegation->stateid.data)) != 0)
+                goto nomatch;
+        list_del_rcu(&delegation->super_list);
+        nfsi->delegation_state = 0;
+        rcu_assign_pointer(nfsi->delegation, NULL);
+        return delegation;
+nomatch:
+        return NULL;
+}
+int nfs_inode_return_delegation(struct inode *inode)
+{
+        struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+        struct nfs_inode *nfsi = NFS_I(inode);
+        struct nfs_delegation *delegation;
+        int err = 0;
+        if (rcu_dereference(nfsi->delegation) != NULL) {
+                spin_lock(&clp->cl_lock);
+                delegation = nfs_detach_delegation_locked(nfsi, NULL);
+                spin_unlock(&clp->cl_lock);
+                if (delegation != NULL)
+                        err = __nfs_inode_return_delegation(inode, delegation);
+        }
+        return err;
 }
 /*
@@ -211,19 +244,23 @@ void nfs_return_all_delegations(struct super_block *sb)
        if (clp == NULL)
                return;
 restart:
-        spin_lock(&clp->cl_lock);
+        rcu_read_lock();
-        list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+        list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
                if (delegation->inode->i_sb != sb)
                        continue;
                inode = igrab(delegation->inode);
                if (inode == NULL)
                        continue;
+                spin_lock(&clp->cl_lock);
+                delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
                spin_unlock(&clp->cl_lock);
-                nfs_inode_return_delegation(inode);
+                rcu_read_unlock();
+                if (delegation != NULL)
+                        __nfs_inode_return_delegation(inode, delegation);
                iput(inode);
                goto restart;
        }
-        spin_unlock(&clp->cl_lock);
+        rcu_read_unlock();
 }
 static int nfs_do_expire_all_delegations(void *ptr)
@@ -234,22 +271,26 @@ static int nfs_do_expire_all_delegations(void *ptr)
        allow_signal(SIGKILL);
 restart:
-        spin_lock(&clp->cl_lock);
        if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0)
                goto out;
        if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0)
                goto out;
-        list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+        rcu_read_lock();
+        list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
                inode = igrab(delegation->inode);
                if (inode == NULL)
                        continue;
+                spin_lock(&clp->cl_lock);
+                delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
                spin_unlock(&clp->cl_lock);
-                nfs_inode_return_delegation(inode);
+                rcu_read_unlock();
+                if (delegation)
+                        __nfs_inode_return_delegation(inode, delegation);
                iput(inode);
                goto restart;
        }
+        rcu_read_unlock();
 out:
-        spin_unlock(&clp->cl_lock);
        nfs_put_client(clp);
        module_put_and_exit(0);
 }
@@ -280,17 +321,21 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
        if (clp == NULL)
                return;
 restart:
-        spin_lock(&clp->cl_lock);
+        rcu_read_lock();
-        list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+        list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
                inode = igrab(delegation->inode);
                if (inode == NULL)
                        continue;
+                spin_lock(&clp->cl_lock);
+                delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
                spin_unlock(&clp->cl_lock);
-                nfs_inode_return_delegation(inode);
+                rcu_read_unlock();
+                if (delegation != NULL)
+                        __nfs_inode_return_delegation(inode, delegation);
                iput(inode);
                goto restart;
        }
-        spin_unlock(&clp->cl_lock);
+        rcu_read_unlock();
 }
 struct recall_threadargs {
@@ -316,21 +361,14 @@ static int recall_thread(void *data)
        down_read(&clp->cl_sem);
        down_write(&nfsi->rwsem);
        spin_lock(&clp->cl_lock);
-        delegation = nfsi->delegation;
+        delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
-        if (delegation != NULL && memcmp(delegation->stateid.data,
+        if (delegation != NULL)
-                                args->stateid->data,
-                                sizeof(delegation->stateid.data)) == 0) {
-                list_del_init(&delegation->super_list);
-                nfsi->delegation = NULL;
-                nfsi->delegation_state = 0;
                args->result = 0;
-        } else {
+        else
-                delegation = NULL;
                args->result = -ENOENT;
-        }
        spin_unlock(&clp->cl_lock);
        complete(&args->started);
-        nfs_delegation_claim_opens(inode);
+        nfs_delegation_claim_opens(inode, args->stateid);
        up_write(&nfsi->rwsem);
        up_read(&clp->cl_sem);
        nfs_msync_inode(inode);
@@ -371,14 +409,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
 {
        struct nfs_delegation *delegation;
        struct inode *res = NULL;
-        spin_lock(&clp->cl_lock);
+        rcu_read_lock();
-        list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+        list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
                if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
                        res = igrab(delegation->inode);
                        break;
                }
        }
-        spin_unlock(&clp->cl_lock);
+        rcu_read_unlock();
        return res;
 }
@@ -388,10 +426,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
 void nfs_delegation_mark_reclaim(struct nfs_client *clp)
 {
        struct nfs_delegation *delegation;
-        spin_lock(&clp->cl_lock);
+        rcu_read_lock();
-        list_for_each_entry(delegation, &clp->cl_delegations, super_list)
+        list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
                delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
-        spin_unlock(&clp->cl_lock);
+        rcu_read_unlock();
 }
 /*
@@ -399,39 +437,35 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
 */
 void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 {
-        struct nfs_delegation *delegation, *n;
+        struct nfs_delegation *delegation;
-        LIST_HEAD(head);
+restart:
-        spin_lock(&clp->cl_lock);
+        rcu_read_lock();
-        list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) {
+        list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
                if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
                        continue;
-                list_move(&delegation->super_list, &head);
+                spin_lock(&clp->cl_lock);
-                NFS_I(delegation->inode)->delegation = NULL;
+                delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL);
-                NFS_I(delegation->inode)->delegation_state = 0;
+                spin_unlock(&clp->cl_lock);
-        }
+                rcu_read_unlock();
-        spin_unlock(&clp->cl_lock);
+                if (delegation != NULL)
-        while(!list_empty(&head)) {
+                        call_rcu(&delegation->rcu, nfs_free_delegation_callback);
-                delegation = list_entry(head.next, struct nfs_delegation, super_list);
+                goto restart;
-                list_del(&delegation->super_list);
-                nfs_free_delegation(delegation);
        }
+        rcu_read_unlock();
 }
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 {
-        struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_delegation *delegation;
-        int res = 0;
+        int ret = 0;
-        if (nfsi->delegation_state == 0)
+        rcu_read_lock();
-                return 0;
+        delegation = rcu_dereference(nfsi->delegation);
-        spin_lock(&clp->cl_lock);
-        delegation = nfsi->delegation;
        if (delegation != NULL) {
                memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
-                res = 1;
+                ret = 1;
        }
-        spin_unlock(&clp->cl_lock);
+        rcu_read_unlock();
-        return res;
+        return ret;
 }
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2cfd4b24c7fe..5874ce7fdbae 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -22,11 +22,12 @@ struct nfs_delegation {
        long flags;
        loff_t maxsize;
        __u64 change_attr;
+        struct rcu_head rcu;
 };
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
-int __nfs_inode_return_delegation(struct inode *inode);
+int nfs_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
@@ -39,27 +40,24 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 /* NFSv4 delegation-related procedures */
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
 int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
+        struct nfs_delegation *delegation;
+        int ret = 0;
        flags &= FMODE_READ|FMODE_WRITE;
-        smp_rmb();
+        rcu_read_lock();
-        if ((NFS_I(inode)->delegation_state & flags) == flags)
+        delegation = rcu_dereference(NFS_I(inode)->delegation);
-                return 1;
+        if (delegation != NULL && (delegation->type & flags) == flags)
-        return 0;
+                ret = 1;
+        rcu_read_unlock();
+        return ret;
 }
-static inline int nfs_inode_return_delegation(struct inode *inode)
-{
-        int err = 0;
-        if (NFS_I(inode)->delegation != NULL)
-                err = __nfs_inode_return_delegation(inode);
-        return err;
-}
 #else
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c27258b5d3e1..322141f4ab48 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -897,14 +897,13 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
        return (nd->intent.open.flags & O_EXCL) != 0;
 }
-static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
+static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
-                                 struct nfs_fh *fh, struct nfs_fattr *fattr)
 {
        struct nfs_server *server = NFS_SERVER(dir);
        if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
-                /* Revalidate fsid on root dir */
+                /* Revalidate fsid using the parent directory */
-                return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
+                return __nfs_revalidate_inode(server, dir);
        return 0;
 }
@@ -946,7 +945,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
                res = ERR_PTR(error);
                goto out_unlock;
        }
-        error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
+        error = nfs_reval_fsid(dir, &fattr);
        if (error < 0) {
                res = ERR_PTR(error);
                goto out_unlock;
@@ -1244,7 +1243,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
        attr.ia_mode = mode;
        attr.ia_valid = ATTR_MODE;
-        if (nd && (nd->flags & LOOKUP_CREATE))
+        if ((nd->flags & LOOKUP_CREATE) != 0)
                open_flags = nd->intent.open.flags;
        lock_kernel();
@@ -1535,7 +1534,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
        lock_kernel();
-        page = alloc_page(GFP_KERNEL);
+        page = alloc_page(GFP_HIGHUSER);
        if (!page) {
                unlock_kernel();
                return -ENOMEM;
@@ -1744,8 +1743,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
        struct nfs_inode *nfsi;
        struct nfs_access_entry *cache;
-        spin_lock(&nfs_access_lru_lock);
 restart:
+        spin_lock(&nfs_access_lru_lock);
        list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
                struct inode *inode;
@@ -1770,6 +1769,7 @@ remove_lru_entry:
                        clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
                }
                spin_unlock(&inode->i_lock);
+                spin_unlock(&nfs_access_lru_lock);
                iput(inode);
                goto restart;
        }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 00eee87510fe..a5c82b6f3b45 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -266,7 +266,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
 static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
 {
        struct nfs_open_context *ctx = dreq->ctx;
-        struct inode *inode = ctx->dentry->d_inode;
+        struct inode *inode = ctx->path.dentry->d_inode;
        size_t rsize = NFS_SERVER(inode)->rsize;
        unsigned int pgbase;
        int result;
@@ -295,9 +295,14 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
                        break;
                }
                if ((unsigned)result < data->npages) {
-                        nfs_direct_release_pages(data->pagevec, result);
+                        bytes = result * PAGE_SIZE;
-                        nfs_readdata_release(data);
+                        if (bytes <= pgbase) {
-                        break;
+                                nfs_direct_release_pages(data->pagevec, result);
+                                nfs_readdata_release(data);
+                                break;
+                        }
+                        bytes -= pgbase;
+                        data->npages = result;
                }
                get_dreq(dreq);
@@ -601,7 +606,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
 static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
 {
        struct nfs_open_context *ctx = dreq->ctx;
-        struct inode *inode = ctx->dentry->d_inode;
+        struct inode *inode = ctx->path.dentry->d_inode;
        size_t wsize = NFS_SERVER(inode)->wsize;
        unsigned int pgbase;
        int result;
@@ -630,9 +635,14 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
                        break;
                }
                if ((unsigned)result < data->npages) {
-                        nfs_direct_release_pages(data->pagevec, result);
+                        bytes = result * PAGE_SIZE;
-                        nfs_writedata_release(data);
+                        if (bytes <= pgbase) {
-                        break;
+                                nfs_direct_release_pages(data->pagevec, result);
+                                nfs_writedata_release(data);
+                                break;
+                        }
+                        bytes -= pgbase;
+                        data->npages = result;
                }
                get_dreq(dreq);
@@ -763,10 +773,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
                (unsigned long) count, (long long) pos);
        if (nr_segs != 1)
-                return -EINVAL;
-        if (count < 0)
                goto out;
        retval = -EFAULT;
        if (!access_ok(VERIFY_WRITE, buf, count))
                goto out;
@@ -814,7 +822,7 @@ out:
 ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
                                unsigned long nr_segs, loff_t pos)
 {
-        ssize_t retval;
+        ssize_t retval = -EINVAL;
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        /* XXX: temporary */
@@ -827,7 +835,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
                (unsigned long) count, (long long) pos);
        if (nr_segs != 1)
-                return -EINVAL;
+                goto out;
        retval = generic_write_checks(file, &pos, &count, 0);
        if (retval)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9eb8eb4e4a08..8689b736fdd9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -41,7 +41,9 @@ static int nfs_file_open(struct inode *, struct file *);
 static int nfs_file_release(struct inode *, struct file *);
 static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
 static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
-static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
+static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
+                                        struct pipe_inode_info *pipe,
+                                        size_t count, unsigned int flags);
 static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
                                unsigned long nr_segs, loff_t pos);
 static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
@@ -65,7 +67,7 @@ const struct file_operations nfs_file_operations = {
        .fsync          = nfs_fsync,
        .lock           = nfs_lock,
        .flock          = nfs_flock,
-        .sendfile       = nfs_file_sendfile,
+        .splice_read    = nfs_file_splice_read,
        .check_flags    = nfs_check_flags,
 };
@@ -224,20 +226,21 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 }
 static ssize_t
-nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
+nfs_file_splice_read(struct file *filp, loff_t *ppos,
-                read_actor_t actor, void *target)
+                     struct pipe_inode_info *pipe, size_t count,
+                     unsigned int flags)
 {
        struct dentry *dentry = filp->f_path.dentry;
        struct inode *inode = dentry->d_inode;
        ssize_t res;
-        dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n",
+        dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n",
                dentry->d_parent->d_name.name, dentry->d_name.name,
                (unsigned long) count, (unsigned long long) *ppos);
        res = nfs_revalidate_mapping(inode, filp->f_mapping);
        if (!res)
-                res = generic_file_sendfile(filp, ppos, count, actor, target);
+                res = generic_file_splice_read(filp, ppos, pipe, count, flags);
        return res;
 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd9f5a836592..3d9fccf4ef93 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -461,14 +461,14 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
        ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
        if (ctx != NULL) {
-                atomic_set(&ctx->count, 1);
+                ctx->path.dentry = dget(dentry);
-                ctx->dentry = dget(dentry);
+                ctx->path.mnt = mntget(mnt);
-                ctx->vfsmnt = mntget(mnt);
                ctx->cred = get_rpccred(cred);
                ctx->state = NULL;
                ctx->lockowner = current->files;
                ctx->error = 0;
                ctx->dir_cookie = 0;
+                kref_init(&ctx->kref);
        }
        return ctx;
 }
@@ -476,27 +476,33 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
 struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
 {
        if (ctx != NULL)
-                atomic_inc(&ctx->count);
+                kref_get(&ctx->kref);
        return ctx;
 }
-void put_nfs_open_context(struct nfs_open_context *ctx)
+static void nfs_free_open_context(struct kref *kref)
 {
-        if (atomic_dec_and_test(&ctx->count)) {
+        struct nfs_open_context *ctx = container_of(kref,
-                if (!list_empty(&ctx->list)) {
+                        struct nfs_open_context, kref);
-                        struct inode *inode = ctx->dentry->d_inode;
-                        spin_lock(&inode->i_lock);
+        if (!list_empty(&ctx->list)) {
-                        list_del(&ctx->list);
+                struct inode *inode = ctx->path.dentry->d_inode;
-                        spin_unlock(&inode->i_lock);
+                spin_lock(&inode->i_lock);
-                }
+                list_del(&ctx->list);
-                if (ctx->state != NULL)
+                spin_unlock(&inode->i_lock);
-                        nfs4_close_state(ctx->state, ctx->mode);
-                if (ctx->cred != NULL)
-                        put_rpccred(ctx->cred);
-                dput(ctx->dentry);
-                mntput(ctx->vfsmnt);
-                kfree(ctx);
        }
+        if (ctx->state != NULL)
+                nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+        if (ctx->cred != NULL)
+                put_rpccred(ctx->cred);
+        dput(ctx->path.dentry);
+        mntput(ctx->path.mnt);
+        kfree(ctx);
+}
+void put_nfs_open_context(struct nfs_open_context *ctx)
+{
+        kref_put(&ctx->kref, nfs_free_open_context);
 }
 /*
@@ -961,8 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                goto out_changed;
        server = NFS_SERVER(inode);
-        /* Update the fsid if and only if this is the root directory */
+        /* Update the fsid? */
-        if (inode == inode->i_sb->s_root->d_inode
+        if (S_ISDIR(inode->i_mode)
                        && !nfs_fsid_equal(&server->fsid, &fattr->fsid))
                server->fsid = fattr->fsid;
@@ -1066,8 +1072,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                invalid &= ~NFS_INO_INVALID_DATA;
        if (data_stable)
                invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
-        if (!nfs_have_delegation(inode, FMODE_READ))
+        if (!nfs_have_delegation(inode, FMODE_READ) ||
+                        (nfsi->cache_validity & NFS_INO_REVAL_FORCED))
                nfsi->cache_validity |= invalid;
+        nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
        return 0;
 out_changed:
@@ -1103,27 +1111,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 */
 void nfs4_clear_inode(struct inode *inode)
 {
-        struct nfs_inode *nfsi = NFS_I(inode);
        /* If we are holding a delegation, return it! */
        nfs_inode_return_delegation(inode);
        /* First call standard NFS clear_inode() code */
        nfs_clear_inode(inode);
-        /* Now clear out any remaining state */
-        while (!list_empty(&nfsi->open_states)) {
-                struct nfs4_state *state;
-                
-                state = list_entry(nfsi->open_states.next,
-                                struct nfs4_state,
-                                inode_states);
-                dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
-                                __FUNCTION__,
-                                inode->i_sb->s_id,
-                                (long long)NFS_FILEID(inode),
-                                state);
-                BUG_ON(atomic_read(&state->count) != 1);
-                nfs4_close_state(state, state->state);
-        }
 }
 #endif
@@ -1165,15 +1156,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
        struct nfs_inode *nfsi = (struct nfs_inode *) foo;
        inode_init_once(&nfsi->vfs_inode);
-        spin_lock_init(&nfsi->req_lock);
-        INIT_LIST_HEAD(&nfsi->dirty);
-        INIT_LIST_HEAD(&nfsi->commit);
        INIT_LIST_HEAD(&nfsi->open_files);
        INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
        INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
        INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
        atomic_set(&nfsi->data_updates, 0);
-        nfsi->ndirty = 0;
        nfsi->ncommit = 0;
        nfsi->npages = 0;
        nfs4_init_once(nfsi);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ad2b40db1e65..76cf55d57101 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -183,9 +183,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
 /*
 * Calculate the number of 512byte blocks used.
 */
-static inline unsigned long nfs_calc_block_size(u64 tsize)
+static inline blkcnt_t nfs_calc_block_size(u64 tsize)
 {
-        loff_t used = (tsize + 511) >> 9;
+        blkcnt_t used = (tsize + 511) >> 9;
        return (used > ULONG_MAX) ? ULONG_MAX : used;
 }
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index ca5a266a3140..8afd9f7e7a97 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -1,7 +1,5 @@
 /*
- * linux/fs/nfs/mount_clnt.c
+ * In-kernel MOUNT protocol client
- *
- * MOUNT client to support NFSroot.
 *
 * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
 */
@@ -18,33 +16,31 @@
 #include <linux/nfs_fs.h>
 #ifdef RPC_DEBUG
-# define NFSDBG_FACILITY        NFSDBG_ROOT
+# define NFSDBG_FACILITY        NFSDBG_MOUNT
 #endif
-/*
-#define MOUNT_PROGRAM           100005
-#define MOUNT_VERSION           1
-#define MOUNT_MNT               1
-#define MOUNT_UMNT              3
- */
-static struct rpc_clnt *        mnt_create(char *, struct sockaddr_in *,
-                                                                int, int);
 static struct rpc_program       mnt_program;
 struct mnt_fhstatus {
-        unsigned int            status;
+        u32 status;
-        struct nfs_fh *         fh;
+        struct nfs_fh *fh;
 };
-/*
+/**
- * Obtain an NFS file handle for the given host and path
+ * nfs_mount - Obtain an NFS file handle for the given host and path
+ * @addr: pointer to server's address
+ * @len: size of server's address
+ * @hostname: name of server host, or NULL
+ * @path: pointer to string containing export path to mount
+ * @version: mount version to use for this request
+ * @protocol: transport protocol to use for thie request
+ * @fh: pointer to location to place returned file handle
+ *
+ * Uses default timeout parameters specified by underlying transport.
 */
-int
+int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path,
-nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
+              int version, int protocol, struct nfs_fh *fh)
-                int version, int protocol)
 {
-        struct rpc_clnt         *mnt_clnt;
        struct mnt_fhstatus     result = {
                .fh             = fh
        };
@@ -52,16 +48,25 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
                .rpc_argp       = path,
                .rpc_resp       = &result,
        };
-        char                    hostname[32];
+        struct rpc_create_args args = {
+                .protocol       = protocol,
+                .address        = addr,
+                .addrsize       = len,
+                .servername     = hostname,
+                .program        = &mnt_program,
+                .version        = version,
+                .authflavor     = RPC_AUTH_UNIX,
+                .flags          = RPC_CLNT_CREATE_INTR,
+        };
+        struct rpc_clnt         *mnt_clnt;
        int                     status;
-        dprintk("NFS:      nfs_mount(%08x:%s)\n",
+        dprintk("NFS: sending MNT request for %s:%s\n",
-                        (unsigned)ntohl(addr->sin_addr.s_addr), path);
+                (hostname ? hostname : "server"), path);
-        sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr));
+        mnt_clnt = rpc_create(&args);
-        mnt_clnt = mnt_create(hostname, addr, version, protocol);
        if (IS_ERR(mnt_clnt))
-                return PTR_ERR(mnt_clnt);
+                goto out_clnt_err;
        if (version == NFS_MNT3_VERSION)
                msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
@@ -69,33 +74,39 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
                msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
        status = rpc_call_sync(mnt_clnt, &msg, 0);
-        return status < 0? status : (result.status? -EACCES : 0);
+        rpc_shutdown_client(mnt_clnt);
-}
-static struct rpc_clnt *
+        if (status < 0)
-mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
+                goto out_call_err;
-                int protocol)
+        if (result.status != 0)
-{
+                goto out_mnt_err;
-        struct rpc_create_args args = {
-                .protocol       = protocol,
+        dprintk("NFS: MNT request succeeded\n");
-                .address        = (struct sockaddr *)srvaddr,
+        status = 0;
-                .addrsize       = sizeof(*srvaddr),
-                .servername     = hostname,
+out:
-                .program        = &mnt_program,
+        return status;
-                .version        = version,
-                .authflavor     = RPC_AUTH_UNIX,
+out_clnt_err:
-                .flags          = (RPC_CLNT_CREATE_ONESHOT |
+        status = PTR_ERR(mnt_clnt);
-                                   RPC_CLNT_CREATE_INTR),
+        dprintk("NFS: failed to create RPC client, status=%d\n", status);
-        };
+        goto out;
+out_call_err:
+        dprintk("NFS: failed to start MNT request, status=%d\n", status);
+        goto out;
-        return rpc_create(&args);
+out_mnt_err:
+        dprintk("NFS: MNT server returned result %d\n", result.status);
+        status = -EACCES;
+        goto out;
 }
 /*
 * XDR encode/decode functions for MOUNT
 */
-static int
+static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p,
-xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
+                              const char *path)
 {
        p = xdr_encode_string(p, path);
@@ -103,8 +114,8 @@ xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
        return 0;
 }
-static int
+static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p,
-xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
+                               struct mnt_fhstatus *res)
 {
        struct nfs_fh *fh = res->fh;
@@ -115,8 +126,8 @@ xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
        return 0;
 }
-static int
+static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
-xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
+                                struct mnt_fhstatus *res)
 {
        struct nfs_fh *fh = res->fh;
@@ -135,53 +146,53 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
 #define MNT_fhstatus_sz         (1 + 8)
 #define MNT_fhstatus3_sz        (1 + 16)
-static struct rpc_procinfo      mnt_procedures[] = {
+static struct rpc_procinfo mnt_procedures[] = {
-[MNTPROC_MNT] = {
+        [MNTPROC_MNT] = {
-          .p_proc               = MNTPROC_MNT,
+                .p_proc         = MNTPROC_MNT,
-          .p_encode             = (kxdrproc_t) xdr_encode_dirpath,      
+                .p_encode       = (kxdrproc_t) xdr_encode_dirpath,
-          .p_decode             = (kxdrproc_t) xdr_decode_fhstatus,
+                .p_decode       = (kxdrproc_t) xdr_decode_fhstatus,
-          .p_arglen             = MNT_dirpath_sz,
+                .p_arglen       = MNT_dirpath_sz,
-          .p_replen             = MNT_fhstatus_sz,
+                .p_replen       = MNT_fhstatus_sz,
-          .p_statidx            = MNTPROC_MNT,
+                .p_statidx      = MNTPROC_MNT,
-          .p_name               = "MOUNT",
+                .p_name         = "MOUNT",
        },
 };
 static struct rpc_procinfo mnt3_procedures[] = {
-[MOUNTPROC3_MNT] = {
+        [MOUNTPROC3_MNT] = {
-          .p_proc               = MOUNTPROC3_MNT,
+                .p_proc         = MOUNTPROC3_MNT,
-          .p_encode             = (kxdrproc_t) xdr_encode_dirpath,
+                .p_encode       = (kxdrproc_t) xdr_encode_dirpath,
-          .p_decode             = (kxdrproc_t) xdr_decode_fhstatus3,
+                .p_decode       = (kxdrproc_t) xdr_decode_fhstatus3,
-          .p_arglen             = MNT_dirpath_sz,
+                .p_arglen       = MNT_dirpath_sz,
-          .p_replen             = MNT_fhstatus3_sz,
+                .p_replen       = MNT_fhstatus3_sz,
-          .p_statidx            = MOUNTPROC3_MNT,
+                .p_statidx      = MOUNTPROC3_MNT,
-          .p_name               = "MOUNT",
+                .p_name         = "MOUNT",
        },
 };
-static struct rpc_version       mnt_version1 = {
+static struct rpc_version mnt_version1 = {
-                .number         = 1,
+        .number         = 1,
-                .nrprocs        = 2,
+        .nrprocs        = 2,
-                .procs          = mnt_procedures
+        .procs          = mnt_procedures,
 };
-static struct rpc_version       mnt_version3 = {
+static struct rpc_version mnt_version3 = {
-                .number         = 3,
+        .number         = 3,
-                .nrprocs        = 2,
+        .nrprocs        = 2,
-                .procs          = mnt3_procedures
+        .procs          = mnt3_procedures,
 };
-static struct rpc_version *     mnt_version[] = {
+static struct rpc_version *mnt_version[] = {
        NULL,
        &mnt_version1,
        NULL,
        &mnt_version3,
 };
-static struct rpc_stat          mnt_stats;
+static struct rpc_stat mnt_stats;
-static struct rpc_program       mnt_program = {
+static struct rpc_program mnt_program = {
        .name           = "mount",
        .number         = NFS_MNT_PROGRAM,
        .nrvers         = ARRAY_SIZE(mnt_version),
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index cd3ca7b5d3db..7fcc78f2aa71 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -223,7 +223,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args)
 static int
 nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-        struct rpc_auth *auth = req->rq_task->tk_auth;
+        struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
        unsigned int replen;
        u32 offset = (u32)args->offset;
        u32 count = args->count;
@@ -380,7 +380,7 @@ static int
 nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
 {
        struct rpc_task *task = req->rq_task;
-        struct rpc_auth *auth = task->tk_auth;
+        struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth;
        unsigned int replen;
        u32 count = args->count;
@@ -541,7 +541,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
 static int
 nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
 {
-        struct rpc_auth *auth = req->rq_task->tk_auth;
+        struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
        unsigned int replen;
        p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 45268d6def2e..814d886b6aa4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -335,9 +335,7 @@ again:
                 * not sure this buys us anything (and I'd have
                 * to revamp the NFSv3 XDR code) */
                status = nfs3_proc_setattr(dentry, &fattr, sattr);
-                if (status == 0)
+                nfs_post_op_update_inode(dentry->d_inode, &fattr);
-                        nfs_setattr_update_inode(dentry->d_inode, sattr);
-                nfs_refresh_inode(dentry->d_inode, &fattr);
                dprintk("NFS reply setattr (post-create): %d\n", status);
        }
        if (status != 0)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b51df8eb9f01..b4647a22f349 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -319,7 +319,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg
 static int
 nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-        struct rpc_auth *auth = req->rq_task->tk_auth;
+        struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
        unsigned int replen;
        u32 count = args->count;
@@ -458,7 +458,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
 static int
 nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
 {
-        struct rpc_auth *auth = req->rq_task->tk_auth;
+        struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
        unsigned int replen;
        u32 count = args->count;
@@ -643,7 +643,7 @@ static int
 nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
                    struct nfs3_getaclargs *args)
 {
-        struct rpc_auth *auth = req->rq_task->tk_auth;
+        struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
        unsigned int replen;
        p = xdr_encode_fhandle(p, args->fh);
@@ -773,7 +773,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
 static int
 nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
 {
-        struct rpc_auth *auth = req->rq_task->tk_auth;
+        struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
        unsigned int replen;
        p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cf3a17eb5c09..6c028e734fe6 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -70,19 +70,26 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
                seqid->flags |= NFS_SEQID_CONFIRMED;
 }
+struct nfs_unique_id {
+        struct rb_node rb_node;
+        __u64 id;
+};
 /*
 * NFS4 state_owners and lock_owners are simply labels for ordered
 * sequences of RPC calls. Their sole purpose is to provide once-only
 * semantics by allowing the server to identify replayed requests.
 */
 struct nfs4_state_owner {
-        spinlock_t           so_lock;
+        struct nfs_unique_id so_owner_id;
-        struct list_head     so_list;    /* per-clientid list of state_owners */
        struct nfs_client    *so_client;
-        u32                  so_id;      /* 32-bit identifier, unique */
+        struct nfs_server    *so_server;
-        atomic_t             so_count;
+        struct rb_node       so_client_node;
        struct rpc_cred      *so_cred;   /* Associated cred */
+        spinlock_t           so_lock;
+        atomic_t             so_count;
        struct list_head     so_states;
        struct list_head     so_delegations;
        struct nfs_seqid_counter so_seqid;
@@ -108,7 +115,7 @@ struct nfs4_lock_state {
 #define NFS_LOCK_INITIALIZED 1
        int                     ls_flags;
        struct nfs_seqid_counter        ls_seqid;
-        u32                     ls_id;
+        struct nfs_unique_id    ls_id;
        nfs4_stateid            ls_stateid;
        atomic_t                ls_count;
 };
@@ -116,7 +123,10 @@ struct nfs4_lock_state {
 /* bits for nfs4_state->flags */
 enum {
        LK_STATE_IN_USE,
-        NFS_DELEGATED_STATE,
+        NFS_DELEGATED_STATE,            /* Current stateid is delegation */
+        NFS_O_RDONLY_STATE,             /* OPEN stateid has read-only state */
+        NFS_O_WRONLY_STATE,             /* OPEN stateid has write-only state */
+        NFS_O_RDWR_STATE,               /* OPEN stateid has read/write state */
 };
 struct nfs4_state {
@@ -130,11 +140,14 @@ struct nfs4_state {
        unsigned long flags;            /* Do we hold any locks? */
        spinlock_t state_lock;          /* Protects the lock_states list */
-        nfs4_stateid stateid;
+        seqlock_t seqlock;              /* Protects the stateid/open_stateid */
+        nfs4_stateid stateid;           /* Current stateid: may be delegation */
+        nfs4_stateid open_stateid;      /* OPEN stateid */
-        unsigned int n_rdonly;
+        /* The following 3 fields are protected by owner->so_lock */
-        unsigned int n_wronly;
+        unsigned int n_rdonly;          /* Number of read-only references */
-        unsigned int n_rdwr;
+        unsigned int n_wronly;          /* Number of write-only references */
+        unsigned int n_rdwr;            /* Number of read/write references */
        int state;                      /* State on the server (R,W, or RW) */
        atomic_t count;
 };
@@ -165,7 +178,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
+extern int nfs4_do_close(struct path *path, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
@@ -189,14 +202,13 @@ extern void nfs4_renew_state(struct work_struct *);
 /* nfs4state.c */
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
-extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
 extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
-extern void nfs4_close_state(struct nfs4_state *, mode_t);
+extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
 extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
 extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
@@ -222,7 +234,7 @@ extern struct svc_version nfs4_callback_version1;
 #else
-#define nfs4_close_state(a, b) do { } while (0)
+#define nfs4_close_state(a, b, c) do { } while (0)
 #endif /* CONFIG_NFS_V4 */
 #endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 648e0ac0f90e..fee2da856c95 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -65,6 +65,7 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *)
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
 static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
 static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
+static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags);
 /* Prevent leaks of NFSv4 errors into userland */
 int nfs4_map_errors(int err)
@@ -214,27 +215,39 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
 }
 struct nfs4_opendata {
-        atomic_t count;
+        struct kref kref;
        struct nfs_openargs o_arg;
        struct nfs_openres o_res;
        struct nfs_open_confirmargs c_arg;
        struct nfs_open_confirmres c_res;
        struct nfs_fattr f_attr;
        struct nfs_fattr dir_attr;
-        struct dentry *dentry;
+        struct path path;
        struct dentry *dir;
        struct nfs4_state_owner *owner;
+        struct nfs4_state *state;
        struct iattr attrs;
        unsigned long timestamp;
+        unsigned int rpc_done : 1;
        int rpc_status;
        int cancelled;
 };
-static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
+static void nfs4_init_opendata_res(struct nfs4_opendata *p)
+{
+        p->o_res.f_attr = &p->f_attr;
+        p->o_res.dir_attr = &p->dir_attr;
+        p->o_res.server = p->o_arg.server;
+        nfs_fattr_init(&p->f_attr);
+        nfs_fattr_init(&p->dir_attr);
+}
+static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
                struct nfs4_state_owner *sp, int flags,
                const struct iattr *attrs)
 {
-        struct dentry *parent = dget_parent(dentry);
+        struct dentry *parent = dget_parent(path->dentry);
        struct inode *dir = parent->d_inode;
        struct nfs_server *server = NFS_SERVER(dir);
        struct nfs4_opendata *p;
@@ -245,24 +258,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
        p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
        if (p->o_arg.seqid == NULL)
                goto err_free;
-        atomic_set(&p->count, 1);
+        p->path.mnt = mntget(path->mnt);
-        p->dentry = dget(dentry);
+        p->path.dentry = dget(path->dentry);
        p->dir = parent;
        p->owner = sp;
        atomic_inc(&sp->so_count);
        p->o_arg.fh = NFS_FH(dir);
        p->o_arg.open_flags = flags,
        p->o_arg.clientid = server->nfs_client->cl_clientid;
-        p->o_arg.id = sp->so_id;
+        p->o_arg.id = sp->so_owner_id.id;
-        p->o_arg.name = &dentry->d_name;
+        p->o_arg.name = &p->path.dentry->d_name;
        p->o_arg.server = server;
        p->o_arg.bitmask = server->attr_bitmask;
        p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
-        p->o_res.f_attr = &p->f_attr;
-        p->o_res.dir_attr = &p->dir_attr;
-        p->o_res.server = server;
-        nfs_fattr_init(&p->f_attr);
-        nfs_fattr_init(&p->dir_attr);
        if (flags & O_EXCL) {
                u32 *s = (u32 *) p->o_arg.u.verifier.data;
                s[0] = jiffies;
@@ -274,6 +282,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
        p->c_arg.fh = &p->o_res.fh;
        p->c_arg.stateid = &p->o_res.stateid;
        p->c_arg.seqid = p->o_arg.seqid;
+        nfs4_init_opendata_res(p);
+        kref_init(&p->kref);
        return p;
 err_free:
        kfree(p);
@@ -282,27 +292,25 @@ err:
        return NULL;
 }
-static void nfs4_opendata_free(struct nfs4_opendata *p)
+static void nfs4_opendata_free(struct kref *kref)
 {
-        if (p != NULL && atomic_dec_and_test(&p->count)) {
+        struct nfs4_opendata *p = container_of(kref,
-                nfs_free_seqid(p->o_arg.seqid);
+                        struct nfs4_opendata, kref);
-                nfs4_put_state_owner(p->owner);
-                dput(p->dir);
+        nfs_free_seqid(p->o_arg.seqid);
-                dput(p->dentry);
+        if (p->state != NULL)
-                kfree(p);
+                nfs4_put_open_state(p->state);
-        }
+        nfs4_put_state_owner(p->owner);
+        dput(p->dir);
+        dput(p->path.dentry);
+        mntput(p->path.mnt);
+        kfree(p);
 }
-/* Helper for asynchronous RPC calls */
+static void nfs4_opendata_put(struct nfs4_opendata *p)
-static int nfs4_call_async(struct rpc_clnt *clnt,
-                const struct rpc_call_ops *tk_ops, void *calldata)
 {
-        struct rpc_task *task;
+        if (p != NULL)
+                kref_put(&p->kref, nfs4_opendata_free);
-        if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
-                return -ENOMEM;
-        rpc_execute(task);
-        return 0;
 }
 static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
@@ -316,7 +324,34 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
        return ret;
 }
-static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
+static int can_open_cached(struct nfs4_state *state, int mode)
+{
+        int ret = 0;
+        switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) {
+                case FMODE_READ:
+                        ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
+                        ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+                        break;
+                case FMODE_WRITE:
+                        ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0;
+                        ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+                        break;
+                case FMODE_READ|FMODE_WRITE:
+                        ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+        }
+        return ret;
+}
+static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags)
+{
+        if ((delegation->type & open_flags) != open_flags)
+                return 0;
+        if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM)
+                return 0;
+        return 1;
+}
+static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
 {
        switch (open_flags) {
                case FMODE_WRITE:
@@ -328,41 +363,176 @@ static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_
                case FMODE_READ|FMODE_WRITE:
                        state->n_rdwr++;
        }
+        nfs4_state_set_mode_locked(state, state->state | open_flags);
 }
-static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
 {
-        struct inode *inode = state->inode;
+        if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+                memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
+        memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
+        switch (open_flags) {
+                case FMODE_READ:
+                        set_bit(NFS_O_RDONLY_STATE, &state->flags);
+                        break;
+                case FMODE_WRITE:
+                        set_bit(NFS_O_WRONLY_STATE, &state->flags);
+                        break;
+                case FMODE_READ|FMODE_WRITE:
+                        set_bit(NFS_O_RDWR_STATE, &state->flags);
+        }
+}
+static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+{
+        write_seqlock(&state->seqlock);
+        nfs_set_open_stateid_locked(state, stateid, open_flags);
+        write_sequnlock(&state->seqlock);
+}
+static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags)
+{
        open_flags &= (FMODE_READ|FMODE_WRITE);
-        /* Protect against nfs4_find_state_byowner() */
+        /*
+         * Protect the call to nfs4_state_set_mode_locked and
+         * serialise the stateid update
+         */
+        write_seqlock(&state->seqlock);
+        if (deleg_stateid != NULL) {
+                memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
+                set_bit(NFS_DELEGATED_STATE, &state->flags);
+        }
+        if (open_stateid != NULL)
+                nfs_set_open_stateid_locked(state, open_stateid, open_flags);
+        write_sequnlock(&state->seqlock);
        spin_lock(&state->owner->so_lock);
-        spin_lock(&inode->i_lock);
-        memcpy(&state->stateid, stateid, sizeof(state->stateid));
        update_open_stateflags(state, open_flags);
-        nfs4_state_set_mode_locked(state, state->state | open_flags);
-        spin_unlock(&inode->i_lock);
        spin_unlock(&state->owner->so_lock);
 }
+static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags)
+{
+        struct nfs_delegation *delegation;
+        rcu_read_lock();
+        delegation = rcu_dereference(NFS_I(inode)->delegation);
+        if (delegation == NULL || (delegation->type & open_flags) == open_flags) {
+                rcu_read_unlock();
+                return;
+        }
+        rcu_read_unlock();
+        nfs_inode_return_delegation(inode);
+}
+static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
+{
+        struct nfs4_state *state = opendata->state;
+        struct nfs_inode *nfsi = NFS_I(state->inode);
+        struct nfs_delegation *delegation;
+        int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL);
+        nfs4_stateid stateid;
+        int ret = -EAGAIN;
+        rcu_read_lock();
+        delegation = rcu_dereference(nfsi->delegation);
+        for (;;) {
+                if (can_open_cached(state, open_mode)) {
+                        spin_lock(&state->owner->so_lock);
+                        if (can_open_cached(state, open_mode)) {
+                                update_open_stateflags(state, open_mode);
+                                spin_unlock(&state->owner->so_lock);
+                                rcu_read_unlock();
+                                goto out_return_state;
+                        }
+                        spin_unlock(&state->owner->so_lock);
+                }
+                if (delegation == NULL)
+                        break;
+                if (!can_open_delegated(delegation, open_mode))
+                        break;
+                /* Save the delegation */
+                memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
+                rcu_read_unlock();
+                lock_kernel();
+                ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode);
+                unlock_kernel();
+                if (ret != 0)
+                        goto out;
+                ret = -EAGAIN;
+                rcu_read_lock();
+                delegation = rcu_dereference(nfsi->delegation);
+                /* If no delegation, try a cached open */
+                if (delegation == NULL)
+                        continue;
+                /* Is the delegation still valid? */
+                if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0)
+                        continue;
+                rcu_read_unlock();
+                update_open_stateid(state, NULL, &stateid, open_mode);
+                goto out_return_state;
+        }
+        rcu_read_unlock();
+out:
+        return ERR_PTR(ret);
+out_return_state:
+        atomic_inc(&state->count);
+        return state;
+}
 static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
 {
        struct inode *inode;
        struct nfs4_state *state = NULL;
+        struct nfs_delegation *delegation;
+        nfs4_stateid *deleg_stateid = NULL;
+        int ret;
-        if (!(data->f_attr.valid & NFS_ATTR_FATTR))
+        if (!data->rpc_done) {
+                state = nfs4_try_open_cached(data);
                goto out;
+        }
+        ret = -EAGAIN;
+        if (!(data->f_attr.valid & NFS_ATTR_FATTR))
+                goto err;
        inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
+        ret = PTR_ERR(inode);
        if (IS_ERR(inode))
-                goto out;
+                goto err;
+        ret = -ENOMEM;
        state = nfs4_get_open_state(inode, data->owner);
        if (state == NULL)
-                goto put_inode;
+                goto err_put_inode;
-        update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags);
+        if (data->o_res.delegation_type != 0) {
-put_inode:
+                int delegation_flags = 0;
+                rcu_read_lock();
+                delegation = rcu_dereference(NFS_I(inode)->delegation);
+                if (delegation)
+                        delegation_flags = delegation->flags;
+                rcu_read_unlock();
+                if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
+                        nfs_inode_set_delegation(state->inode,
+                                        data->owner->so_cred,
+                                        &data->o_res);
+                else
+                        nfs_inode_reclaim_delegation(state->inode,
+                                        data->owner->so_cred,
+                                        &data->o_res);
+        }
+        rcu_read_lock();
+        delegation = rcu_dereference(NFS_I(inode)->delegation);
+        if (delegation != NULL)
+                deleg_stateid = &delegation->stateid;
+        update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags);
+        rcu_read_unlock();
        iput(inode);
 out:
        return state;
+err_put_inode:
+        iput(inode);
+err:
+        return ERR_PTR(ret);
 }
 static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
@@ -382,79 +552,66 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *
        return ERR_PTR(-ENOENT);
 }
-static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid)
+static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res)
 {
+        struct nfs4_state *newstate;
        int ret;
        opendata->o_arg.open_flags = openflags;
+        memset(&opendata->o_res, 0, sizeof(opendata->o_res));
+        memset(&opendata->c_res, 0, sizeof(opendata->c_res));
+        nfs4_init_opendata_res(opendata);
        ret = _nfs4_proc_open(opendata);
        if (ret != 0)
                return ret; 
-        memcpy(stateid->data, opendata->o_res.stateid.data,
+        newstate = nfs4_opendata_to_nfs4_state(opendata);
-                        sizeof(stateid->data));
+        if (IS_ERR(newstate))
+                return PTR_ERR(newstate);
+        nfs4_close_state(&opendata->path, newstate, openflags);
+        *res = newstate;
        return 0;
 }
 static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
 {
-        nfs4_stateid stateid;
        struct nfs4_state *newstate;
-        int mode = 0;
-        int delegation = 0;
        int ret;
        /* memory barrier prior to reading state->n_* */
+        clear_bit(NFS_DELEGATED_STATE, &state->flags);
        smp_rmb();
        if (state->n_rdwr != 0) {
-                ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid);
+                ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
                if (ret != 0)
                        return ret;
-                mode |= FMODE_READ|FMODE_WRITE;
+                if (newstate != state)
-                if (opendata->o_res.delegation_type != 0)
+                        return -ESTALE;
-                        delegation = opendata->o_res.delegation_type;
-                smp_rmb();
        }
        if (state->n_wronly != 0) {
-                ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid);
+                ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
                if (ret != 0)
                        return ret;
-                mode |= FMODE_WRITE;
+                if (newstate != state)
-                if (opendata->o_res.delegation_type != 0)
+                        return -ESTALE;
-                        delegation = opendata->o_res.delegation_type;
-                smp_rmb();
        }
        if (state->n_rdonly != 0) {
-                ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid);
+                ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
                if (ret != 0)
                        return ret;
-                mode |= FMODE_READ;
+                if (newstate != state)
+                        return -ESTALE;
        }
-        clear_bit(NFS_DELEGATED_STATE, &state->flags);
+        /*
-        if (mode == 0)
+         * We may have performed cached opens for all three recoveries.
-                return 0;
+         * Check if we need to update the current stateid.
-        if (opendata->o_res.delegation_type == 0)
+         */
-                opendata->o_res.delegation_type = delegation;
+        if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
-        opendata->o_arg.open_flags |= mode;
+            memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
-        newstate = nfs4_opendata_to_nfs4_state(opendata);
+                write_seqlock(&state->seqlock);
-        if (newstate != NULL) {
+                if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
-                if (opendata->o_res.delegation_type != 0) {
+                        memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
-                        struct nfs_inode *nfsi = NFS_I(newstate->inode);
+                write_sequnlock(&state->seqlock);
-                        int delegation_flags = 0;
-                        if (nfsi->delegation)
-                                delegation_flags = nfsi->delegation->flags;
-                        if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
-                                nfs_inode_set_delegation(newstate->inode,
-                                                opendata->owner->so_cred,
-                                                &opendata->o_res);
-                        else
-                                nfs_inode_reclaim_delegation(newstate->inode,
-                                                opendata->owner->so_cred,
-                                                &opendata->o_res);
-                }
-                nfs4_close_state(newstate, opendata->o_arg.open_flags);
        }
-        if (newstate != state)
-                return -ESTALE;
        return 0;
 }
@@ -462,41 +619,37 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
 * OPEN_RECLAIM:
 *      reclaim state on the server after a reboot.
 */
-static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
-        struct nfs_delegation *delegation = NFS_I(state->inode)->delegation;
+        struct nfs_delegation *delegation;
        struct nfs4_opendata *opendata;
        int delegation_type = 0;
        int status;
-        if (delegation != NULL) {
+        opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
-                if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
-                        memcpy(&state->stateid, &delegation->stateid,
-                                        sizeof(state->stateid));
-                        set_bit(NFS_DELEGATED_STATE, &state->flags);
-                        return 0;
-                }
-                delegation_type = delegation->type;
-        }
-        opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
        if (opendata == NULL)
                return -ENOMEM;
        opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
        opendata->o_arg.fh = NFS_FH(state->inode);
        nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh);
+        rcu_read_lock();
+        delegation = rcu_dereference(NFS_I(state->inode)->delegation);
+        if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0)
+                delegation_type = delegation->flags;
+        rcu_read_unlock();
        opendata->o_arg.u.delegation_type = delegation_type;
        status = nfs4_open_recover(opendata, state);
-        nfs4_opendata_free(opendata);
+        nfs4_opendata_put(opendata);
        return status;
 }
-static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
        struct nfs_server *server = NFS_SERVER(state->inode);
        struct nfs4_exception exception = { };
        int err;
        do {
-                err = _nfs4_do_open_reclaim(sp, state, dentry);
+                err = _nfs4_do_open_reclaim(ctx, state);
                if (err != -NFS4ERR_DELAY)
                        break;
                nfs4_handle_exception(server, err, &exception);
@@ -512,37 +665,35 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
        ctx = nfs4_state_find_open_context(state);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
-        ret = nfs4_do_open_reclaim(sp, state, ctx->dentry);
+        ret = nfs4_do_open_reclaim(ctx, state);
        put_nfs_open_context(ctx);
        return ret;
 }
-static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
 {
        struct nfs4_state_owner  *sp  = state->owner;
        struct nfs4_opendata *opendata;
        int ret;
-        if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+        opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL);
-                return 0;
-        opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
        if (opendata == NULL)
                return -ENOMEM;
        opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
-        memcpy(opendata->o_arg.u.delegation.data, state->stateid.data,
+        memcpy(opendata->o_arg.u.delegation.data, stateid->data,
                        sizeof(opendata->o_arg.u.delegation.data));
        ret = nfs4_open_recover(opendata, state);
-        nfs4_opendata_free(opendata);
+        nfs4_opendata_put(opendata);
        return ret;
 }
-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
 {
        struct nfs4_exception exception = { };
-        struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+        struct nfs_server *server = NFS_SERVER(state->inode);
        int err;
        do {
-                err = _nfs4_open_delegation_recall(dentry, state);
+                err = _nfs4_open_delegation_recall(ctx, state, stateid);
                switch (err) {
                        case 0:
                                return err;
@@ -582,9 +733,10 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
                memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
                                sizeof(data->o_res.stateid.data));
                renew_lease(data->o_res.server, data->timestamp);
+                data->rpc_done = 1;
        }
-        nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
        nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
+        nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
 }
 static void nfs4_open_confirm_release(void *calldata)
@@ -596,14 +748,14 @@ static void nfs4_open_confirm_release(void *calldata)
        if (data->cancelled == 0)
                goto out_free;
        /* In case of error, no cleanup! */
-        if (data->rpc_status != 0)
+        if (!data->rpc_done)
                goto out_free;
        nfs_confirm_seqid(&data->owner->so_seqid, 0);
        state = nfs4_opendata_to_nfs4_state(data);
-        if (state != NULL)
+        if (!IS_ERR(state))
-                nfs4_close_state(state, data->o_arg.open_flags);
+                nfs4_close_state(&data->path, state, data->o_arg.open_flags);
 out_free:
-        nfs4_opendata_free(data);
+        nfs4_opendata_put(data);
 }
 static const struct rpc_call_ops nfs4_open_confirm_ops = {
@@ -621,12 +773,9 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
        struct rpc_task *task;
        int status;
-        atomic_inc(&data->count);
+        kref_get(&data->kref);
-        /*
+        data->rpc_done = 0;
-         * If rpc_run_task() ends up calling ->rpc_release(), we
+        data->rpc_status = 0;
-         * want to ensure that it takes the 'error' code path.
-         */
-        data->rpc_status = -ENOMEM;
        task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
        if (IS_ERR(task))
                return PTR_ERR(task);
@@ -653,13 +802,35 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
        
        if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
                return;
+        /*
+         * Check if we still need to send an OPEN call, or if we can use
+         * a delegation instead.
+         */
+        if (data->state != NULL) {
+                struct nfs_delegation *delegation;
+                if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL)))
+                        goto out_no_action;
+                rcu_read_lock();
+                delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
+                if (delegation != NULL &&
+                   (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) {
+                        rcu_read_unlock();
+                        goto out_no_action;
+                }
+                rcu_read_unlock();
+        }
        /* Update sequence id. */
-        data->o_arg.id = sp->so_id;
+        data->o_arg.id = sp->so_owner_id.id;
        data->o_arg.clientid = sp->so_client->cl_clientid;
        if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
                msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
        data->timestamp = jiffies;
        rpc_call_setup(task, &msg, 0);
+        return;
+out_no_action:
+        task->tk_action = NULL;
 }
 static void nfs4_open_done(struct rpc_task *task, void *calldata)
@@ -683,8 +854,11 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
                                data->rpc_status = -ENOTDIR;
                }
                renew_lease(data->o_res.server, data->timestamp);
+                if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
+                        nfs_confirm_seqid(&data->owner->so_seqid, 0);
        }
        nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid);
+        data->rpc_done = 1;
 }
 static void nfs4_open_release(void *calldata)
@@ -696,17 +870,17 @@ static void nfs4_open_release(void *calldata)
        if (data->cancelled == 0)
                goto out_free;
        /* In case of error, no cleanup! */
-        if (data->rpc_status != 0)
+        if (data->rpc_status != 0 || !data->rpc_done)
                goto out_free;
        /* In case we need an open_confirm, no cleanup! */
        if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
                goto out_free;
        nfs_confirm_seqid(&data->owner->so_seqid, 0);
        state = nfs4_opendata_to_nfs4_state(data);
-        if (state != NULL)
+        if (!IS_ERR(state))
-                nfs4_close_state(state, data->o_arg.open_flags);
+                nfs4_close_state(&data->path, state, data->o_arg.open_flags);
 out_free:
-        nfs4_opendata_free(data);
+        nfs4_opendata_put(data);
 }
 static const struct rpc_call_ops nfs4_open_ops = {
@@ -727,12 +901,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
        struct rpc_task *task;
        int status;
-        atomic_inc(&data->count);
+        kref_get(&data->kref);
-        /*
+        data->rpc_done = 0;
-         * If rpc_run_task() ends up calling ->rpc_release(), we
+        data->rpc_status = 0;
-         * want to ensure that it takes the 'error' code path.
+        data->cancelled = 0;
-         */
-        data->rpc_status = -ENOMEM;
        task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
        if (IS_ERR(task))
                return PTR_ERR(task);
@@ -743,7 +915,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
        } else
                status = data->rpc_status;
        rpc_put_task(task);
-        if (status != 0)
+        if (status != 0 || !data->rpc_done)
                return status;
        if (o_arg->open_flags & O_CREAT) {
@@ -756,7 +928,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
                if (status != 0)
                        return status;
        }
-        nfs_confirm_seqid(&data->owner->so_seqid, 0);
        if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
                return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
        return 0;
@@ -772,6 +943,8 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf
                mask |= MAY_READ;
        if (openflags & FMODE_WRITE)
                mask |= MAY_WRITE;
+        if (openflags & FMODE_EXEC)
+                mask |= MAY_EXEC;
        status = nfs_access_get_cached(inode, cred, &cache);
        if (status == 0)
                goto out;
@@ -811,43 +984,32 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
 *      reclaim state on the server after a network partition.
 *      Assumes caller holds the appropriate lock
 */
-static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
-        struct inode *inode = state->inode;
-        struct nfs_delegation *delegation = NFS_I(inode)->delegation;
        struct nfs4_opendata *opendata;
-        int openflags = state->state & (FMODE_READ|FMODE_WRITE);
        int ret;
-        if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
+        opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
-                ret = _nfs4_do_access(inode, sp->so_cred, openflags);
-                if (ret < 0)
-                        return ret;
-                memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
-                set_bit(NFS_DELEGATED_STATE, &state->flags);
-                return 0;
-        }
-        opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL);
        if (opendata == NULL)
                return -ENOMEM;
        ret = nfs4_open_recover(opendata, state);
        if (ret == -ESTALE) {
                /* Invalidate the state owner so we don't ever use it again */
-                nfs4_drop_state_owner(sp);
+                nfs4_drop_state_owner(state->owner);
-                d_drop(dentry);
+                d_drop(ctx->path.dentry);
        }
-        nfs4_opendata_free(opendata);
+        nfs4_opendata_put(opendata);
        return ret;
 }
-static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
-        struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+        struct nfs_server *server = NFS_SERVER(state->inode);
        struct nfs4_exception exception = { };
        int err;
        do {
-                err = _nfs4_open_expired(sp, state, dentry);
+                err = _nfs4_open_expired(ctx, state);
                if (err == -NFS4ERR_DELAY)
                        nfs4_handle_exception(server, err, &exception);
        } while (exception.retry);
@@ -862,107 +1024,38 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
        ctx = nfs4_state_find_open_context(state);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
-        ret = nfs4_do_open_expired(sp, state, ctx->dentry);
+        ret = nfs4_do_open_expired(ctx, state);
        put_nfs_open_context(ctx);
        return ret;
 }
 /*
- * Returns a referenced nfs4_state if there is an open delegation on the file
+ * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
+ * fields corresponding to attributes that were used to store the verifier.
+ * Make sure we clobber those fields in the later setattr call
 */
-static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res)
+static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr)
-{
-        struct nfs_delegation *delegation;
-        struct nfs_server *server = NFS_SERVER(inode);
-        struct nfs_client *clp = server->nfs_client;
-        struct nfs_inode *nfsi = NFS_I(inode);
-        struct nfs4_state_owner *sp = NULL;
-        struct nfs4_state *state = NULL;
-        int open_flags = flags & (FMODE_READ|FMODE_WRITE);
-        int err;
-        err = -ENOMEM;
-        if (!(sp = nfs4_get_state_owner(server, cred))) {
-                dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
-                return err;
-        }
-        err = nfs4_recover_expired_lease(server);
-        if (err != 0)
-                goto out_put_state_owner;
-        /* Protect against reboot recovery - NOTE ORDER! */
-        down_read(&clp->cl_sem);
-        /* Protect against delegation recall */
-        down_read(&nfsi->rwsem);
-        delegation = NFS_I(inode)->delegation;
-        err = -ENOENT;
-        if (delegation == NULL || (delegation->type & open_flags) != open_flags)
-                goto out_err;
-        err = -ENOMEM;
-        state = nfs4_get_open_state(inode, sp);
-        if (state == NULL)
-                goto out_err;
-        err = -ENOENT;
-        if ((state->state & open_flags) == open_flags) {
-                spin_lock(&inode->i_lock);
-                update_open_stateflags(state, open_flags);
-                spin_unlock(&inode->i_lock);
-                goto out_ok;
-        } else if (state->state != 0)
-                goto out_put_open_state;
-        lock_kernel();
-        err = _nfs4_do_access(inode, cred, open_flags);
-        unlock_kernel();
-        if (err != 0)
-                goto out_put_open_state;
-        set_bit(NFS_DELEGATED_STATE, &state->flags);
-        update_open_stateid(state, &delegation->stateid, open_flags);
-out_ok:
-        nfs4_put_state_owner(sp);
-        up_read(&nfsi->rwsem);
-        up_read(&clp->cl_sem);
-        *res = state;
-        return 0;
-out_put_open_state:
-        nfs4_put_open_state(state);
-out_err:
-        up_read(&nfsi->rwsem);
-        up_read(&clp->cl_sem);
-        if (err != -EACCES)
-                nfs_inode_return_delegation(inode);
-out_put_state_owner:
-        nfs4_put_state_owner(sp);
-        return err;
-}
-static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
 {
-        struct nfs4_exception exception = { };
+        if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) &&
-        struct nfs4_state *res = ERR_PTR(-EIO);
+            !(sattr->ia_valid & ATTR_ATIME_SET))
-        int err;
+                sattr->ia_valid |= ATTR_ATIME;
-        do {
+        if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) &&
-                err = _nfs4_open_delegated(inode, flags, cred, &res);
+            !(sattr->ia_valid & ATTR_MTIME_SET))
-                if (err == 0)
+                sattr->ia_valid |= ATTR_MTIME;
-                        break;
-                res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
-                                        err, &exception));
-        } while (exception.retry);
-        return res;
 }
 /*
 * Returns a referenced nfs4_state
 */
-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
 {
        struct nfs4_state_owner  *sp;
        struct nfs4_state     *state = NULL;
        struct nfs_server       *server = NFS_SERVER(dir);
        struct nfs_client *clp = server->nfs_client;
        struct nfs4_opendata *opendata;
-        int                     status;
+        int status;
        /* Protect against reboot recovery conflicts */
        status = -ENOMEM;
@@ -973,29 +1066,35 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
        status = nfs4_recover_expired_lease(server);
        if (status != 0)
                goto err_put_state_owner;
+        if (path->dentry->d_inode != NULL)
+                nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE));
        down_read(&clp->cl_sem);
        status = -ENOMEM;
-        opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
+        opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
        if (opendata == NULL)
                goto err_release_rwsem;
+        if (path->dentry->d_inode != NULL)
+                opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
        status = _nfs4_proc_open(opendata);
        if (status != 0)
-                goto err_opendata_free;
+                goto err_opendata_put;
+        if (opendata->o_arg.open_flags & O_EXCL)
+                nfs4_exclusive_attrset(opendata, sattr);
-        status = -ENOMEM;
        state = nfs4_opendata_to_nfs4_state(opendata);
-        if (state == NULL)
+        status = PTR_ERR(state);
-                goto err_opendata_free;
+        if (IS_ERR(state))
-        if (opendata->o_res.delegation_type != 0)
+                goto err_opendata_put;
-                nfs_inode_set_delegation(state->inode, cred, &opendata->o_res);
+        nfs4_opendata_put(opendata);
-        nfs4_opendata_free(opendata);
        nfs4_put_state_owner(sp);
        up_read(&clp->cl_sem);
        *res = state;
        return 0;
-err_opendata_free:
+err_opendata_put:
-        nfs4_opendata_free(opendata);
+        nfs4_opendata_put(opendata);
 err_release_rwsem:
        up_read(&clp->cl_sem);
 err_put_state_owner:
@@ -1006,14 +1105,14 @@ out_err:
 }
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred)
 {
        struct nfs4_exception exception = { };
        struct nfs4_state *res;
        int status;
        do {
-                status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
+                status = _nfs4_do_open(dir, path, flags, sattr, cred, &res);
                if (status == 0)
                        break;
                /* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1028,7 +1127,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
                 * the user though...
                 */
                if (status == -NFS4ERR_BAD_SEQID) {
-                        printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n");
+                        printk(KERN_WARNING "NFS: v4 server %s "
+                                        " returned a bad sequence-id error!\n",
+                                        NFS_SERVER(dir)->nfs_client->cl_hostname);
                        exception.retry = 1;
                        continue;
                }
@@ -1042,6 +1143,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
                        exception.retry = 1;
                        continue;
                }
+                if (status == -EAGAIN) {
+                        /* We must have found a delegation */
+                        exception.retry = 1;
+                        continue;
+                }
                res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
                                        status, &exception));
        } while (exception.retry);
@@ -1101,6 +1207,7 @@ static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
 }
 struct nfs4_closedata {
+        struct path path;
        struct inode *inode;
        struct nfs4_state *state;
        struct nfs_closeargs arg;
@@ -1117,6 +1224,8 @@ static void nfs4_free_closedata(void *data)
        nfs4_put_open_state(calldata->state);
        nfs_free_seqid(calldata->arg.seqid);
        nfs4_put_state_owner(sp);
+        dput(calldata->path.dentry);
+        mntput(calldata->path.mnt);
        kfree(calldata);
 }
@@ -1134,8 +1243,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
        nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
        switch (task->tk_status) {
                case 0:
-                        memcpy(&state->stateid, &calldata->res.stateid,
+                        nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags);
-                                        sizeof(state->stateid));
                        renew_lease(server, calldata->timestamp);
                        break;
                case -NFS4ERR_STALE_STATEID:
@@ -1160,26 +1268,30 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
                .rpc_resp = &calldata->res,
                .rpc_cred = state->owner->so_cred,
        };
-        int mode = 0, old_mode;
+        int clear_rd, clear_wr, clear_rdwr;
+        int mode;
        if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
                return;
-        /* Recalculate the new open mode in case someone reopened the file
-         * while we were waiting in line to be scheduled.
+        mode = FMODE_READ|FMODE_WRITE;
-         */
+        clear_rd = clear_wr = clear_rdwr = 0;
        spin_lock(&state->owner->so_lock);
-        spin_lock(&calldata->inode->i_lock);
+        /* Calculate the change in open mode */
-        mode = old_mode = state->state;
        if (state->n_rdwr == 0) {
-                if (state->n_rdonly == 0)
+                if (state->n_rdonly == 0) {
                        mode &= ~FMODE_READ;
-                if (state->n_wronly == 0)
+                        clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+                        clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
+                }
+                if (state->n_wronly == 0) {
                        mode &= ~FMODE_WRITE;
+                        clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+                        clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
+                }
        }
-        nfs4_state_set_mode_locked(state, mode);
-        spin_unlock(&calldata->inode->i_lock);
        spin_unlock(&state->owner->so_lock);
-        if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+        if (!clear_rd && !clear_wr && !clear_rdwr) {
                /* Note: exit _without_ calling nfs4_close_done */
                task->tk_action = NULL;
                return;
@@ -1209,19 +1321,21 @@ static const struct rpc_call_ops nfs4_close_ops = {
 *
 * NOTE: Caller must be holding the sp->so_owner semaphore!
 */
-int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
+int nfs4_do_close(struct path *path, struct nfs4_state *state)
 {
-        struct nfs_server *server = NFS_SERVER(inode);
+        struct nfs_server *server = NFS_SERVER(state->inode);
        struct nfs4_closedata *calldata;
+        struct nfs4_state_owner *sp = state->owner;
+        struct rpc_task *task;
        int status = -ENOMEM;
        calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
        if (calldata == NULL)
                goto out;
-        calldata->inode = inode;
+        calldata->inode = state->inode;
        calldata->state = state;
-        calldata->arg.fh = NFS_FH(inode);
+        calldata->arg.fh = NFS_FH(state->inode);
-        calldata->arg.stateid = &state->stateid;
+        calldata->arg.stateid = &state->open_stateid;
        /* Serialization for the sequence id */
        calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
        if (calldata->arg.seqid == NULL)
@@ -1229,36 +1343,55 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
        calldata->arg.bitmask = server->attr_bitmask;
        calldata->res.fattr = &calldata->fattr;
        calldata->res.server = server;
+        calldata->path.mnt = mntget(path->mnt);
+        calldata->path.dentry = dget(path->dentry);
-        status = nfs4_call_async(server->client, &nfs4_close_ops, calldata);
+        task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
-        if (status == 0)
+        if (IS_ERR(task))
-                goto out;
+                return PTR_ERR(task);
+        rpc_put_task(task);
-        nfs_free_seqid(calldata->arg.seqid);
+        return 0;
 out_free_calldata:
        kfree(calldata);
 out:
+        nfs4_put_open_state(state);
+        nfs4_put_state_owner(sp);
        return status;
 }
-static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
+static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state)
 {
        struct file *filp;
+        int ret;
-        filp = lookup_instantiate_filp(nd, dentry, NULL);
+        /* If the open_intent is for execute, we have an extra check to make */
+        if (nd->intent.open.flags & FMODE_EXEC) {
+                ret = _nfs4_do_access(state->inode,
+                                state->owner->so_cred,
+                                nd->intent.open.flags);
+                if (ret < 0)
+                        goto out_close;
+        }
+        filp = lookup_instantiate_filp(nd, path->dentry, NULL);
        if (!IS_ERR(filp)) {
                struct nfs_open_context *ctx;
                ctx = (struct nfs_open_context *)filp->private_data;
                ctx->state = state;
                return 0;
        }
-        nfs4_close_state(state, nd->intent.open.flags);
+        ret = PTR_ERR(filp);
-        return PTR_ERR(filp);
+out_close:
+        nfs4_close_state(path, state, nd->intent.open.flags);
+        return ret;
 }
 struct dentry *
 nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
+        struct path path = {
+                .mnt = nd->mnt,
+                .dentry = dentry,
+        };
        struct iattr attr;
        struct rpc_cred *cred;
        struct nfs4_state *state;
@@ -1277,7 +1410,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
        cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
        if (IS_ERR(cred))
                return (struct dentry *)cred;
-        state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
+        state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
        put_rpccred(cred);
        if (IS_ERR(state)) {
                if (PTR_ERR(state) == -ENOENT)
@@ -1287,22 +1420,24 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
        res = d_add_unique(dentry, igrab(state->inode));
        if (res != NULL)
                dentry = res;
-        nfs4_intent_set_file(nd, dentry, state);
+        nfs4_intent_set_file(nd, &path, state);
        return res;
 }
 int
 nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
 {
+        struct path path = {
+                .mnt = nd->mnt,
+                .dentry = dentry,
+        };
        struct rpc_cred *cred;
        struct nfs4_state *state;
        cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
        if (IS_ERR(cred))
                return PTR_ERR(cred);
-        state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
+        state = nfs4_do_open(dir, &path, openflags, NULL, cred);
-        if (IS_ERR(state))
-                state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
        put_rpccred(cred);
        if (IS_ERR(state)) {
                switch (PTR_ERR(state)) {
@@ -1318,10 +1453,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
                }
        }
        if (state->inode == dentry->d_inode) {
-                nfs4_intent_set_file(nd, dentry, state);
+                nfs4_intent_set_file(nd, &path, state);
                return 1;
        }
-        nfs4_close_state(state, openflags);
+        nfs4_close_state(&path, state, openflags);
 out_drop:
        d_drop(dentry);
        return 0;
@@ -1559,8 +1694,6 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
        dprintk("NFS call  lookupfh %s\n", name->name);
        status = rpc_call_sync(server->client, &msg, 0);
        dprintk("NFS reply lookupfh: %d\n", status);
-        if (status == -NFS4ERR_MOVED)
-                status = -EREMOTE;
        return status;
 }
@@ -1571,10 +1704,13 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
        struct nfs4_exception exception = { };
        int err;
        do {
-                err = nfs4_handle_exception(server,
+                err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr);
-                                _nfs4_proc_lookupfh(server, dirfh, name,
+                /* FIXME: !!!! */
-                                                    fhandle, fattr),
+                if (err == -NFS4ERR_MOVED) {
-                                &exception);
+                        err = -EREMOTE;
+                        break;
+                }
+                err = nfs4_handle_exception(server, err, &exception);
        } while (exception.retry);
        return err;
 }
@@ -1582,28 +1718,10 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
 static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
                struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
-        int                    status;
+        int status;
-        struct nfs_server *server = NFS_SERVER(dir);
-        struct nfs4_lookup_arg args = {
-                .bitmask = server->attr_bitmask,
-                .dir_fh = NFS_FH(dir),
-                .name = name,
-        };
-        struct nfs4_lookup_res res = {
-                .server = server,
-                .fattr = fattr,
-                .fh = fhandle,
-        };
-        struct rpc_message msg = {
-                .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
-                .rpc_argp = &args,
-                .rpc_resp = &res,
-        };
-        
-        nfs_fattr_init(fattr);
        
        dprintk("NFS call  lookup %s\n", name->name);
-        status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+        status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
        if (status == -NFS4ERR_MOVED)
                status = nfs4_get_referral(dir, name, fattr, fhandle);
        dprintk("NFS reply lookup: %d\n", status);
@@ -1752,6 +1870,10 @@ static int
 nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                 int flags, struct nameidata *nd)
 {
+        struct path path = {
+                .mnt = nd->mnt,
+                .dentry = dentry,
+        };
        struct nfs4_state *state;
        struct rpc_cred *cred;
        int status = 0;
@@ -1761,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                status = PTR_ERR(cred);
                goto out;
        }
-        state = nfs4_do_open(dir, dentry, flags, sattr, cred);
+        state = nfs4_do_open(dir, &path, flags, sattr, cred);
        put_rpccred(cred);
        if (IS_ERR(state)) {
                status = PTR_ERR(state);
@@ -1773,11 +1895,12 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
                if (status == 0)
                        nfs_setattr_update_inode(state->inode, sattr);
+                nfs_post_op_update_inode(state->inode, &fattr);
        }
-        if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
+        if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
-                status = nfs4_intent_set_file(nd, dentry, state);
+                status = nfs4_intent_set_file(nd, &path, state);
        else
-                nfs4_close_state(state, flags);
+                nfs4_close_state(&path, state, flags);
 out:
        return status;
 }
@@ -3008,7 +3131,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
        if (status != 0)
                goto out;
        lsp = request->fl_u.nfs4_fl.owner;
-        arg.lock_owner.id = lsp->ls_id; 
+        arg.lock_owner.id = lsp->ls_id.id;
        status = rpc_call_sync(server->client, &msg, 0);
        switch (status) {
                case 0:
@@ -3152,6 +3275,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 {
        struct nfs4_unlockdata *data;
+        /* Ensure this is an unlock - when canceling a lock, the
+         * canceled lock is passed in, and it won't be an unlock.
+         */
+        fl->fl_type = F_UNLCK;
        data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
        if (data == NULL) {
                nfs_free_seqid(seqid);
@@ -3222,7 +3350,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
                goto out_free;
        p->arg.lock_stateid = &lsp->ls_stateid;
        p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
-        p->arg.lock_owner.id = lsp->ls_id;
+        p->arg.lock_owner.id = lsp->ls_id.id;
        p->lsp = lsp;
        atomic_inc(&lsp->ls_count);
        p->ctx = get_nfs_open_context(ctx);
@@ -3285,7 +3413,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
                memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
                                        sizeof(data->lsp->ls_stateid.data));
                data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
-                renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
+                renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
        }
        nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
 out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8ed79d5c54f9..e9662ba81d86 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -38,12 +38,14 @@
 * subsequent patch.
 */
+#include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_idmap.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
+#include <linux/random.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
@@ -69,33 +71,14 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
        return status;
 }
-u32
-nfs4_alloc_lockowner_id(struct nfs_client *clp)
-{
-        return clp->cl_lockowner_id ++;
-}
-static struct nfs4_state_owner *
-nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
-{
-        struct nfs4_state_owner *sp = NULL;
-        if (!list_empty(&clp->cl_unused)) {
-                sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
-                atomic_inc(&sp->so_count);
-                sp->so_cred = cred;
-                list_move(&sp->so_list, &clp->cl_state_owners);
-                clp->cl_nunused--;
-        }
-        return sp;
-}
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 {
        struct nfs4_state_owner *sp;
+        struct rb_node *pos;
        struct rpc_cred *cred = NULL;
-        list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+        for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+                sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
                if (list_empty(&sp->so_states))
                        continue;
                cred = get_rpccred(sp->so_cred);
@@ -107,32 +90,146 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
 {
        struct nfs4_state_owner *sp;
+        struct rb_node *pos;
-        if (!list_empty(&clp->cl_state_owners)) {
+        pos = rb_first(&clp->cl_state_owners);
-                sp = list_entry(clp->cl_state_owners.next,
+        if (pos != NULL) {
-                                struct nfs4_state_owner, so_list);
+                sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
                return get_rpccred(sp->so_cred);
        }
        return NULL;
 }
+static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
+                __u64 minval, int maxbits)
+{
+        struct rb_node **p, *parent;
+        struct nfs_unique_id *pos;
+        __u64 mask = ~0ULL;
+        if (maxbits < 64)
+                mask = (1ULL << maxbits) - 1ULL;
+        /* Ensure distribution is more or less flat */
+        get_random_bytes(&new->id, sizeof(new->id));
+        new->id &= mask;
+        if (new->id < minval)
+                new->id += minval;
+retry:
+        p = &root->rb_node;
+        parent = NULL;
+        while (*p != NULL) {
+                parent = *p;
+                pos = rb_entry(parent, struct nfs_unique_id, rb_node);
+                if (new->id < pos->id)
+                        p = &(*p)->rb_left;
+                else if (new->id > pos->id)
+                        p = &(*p)->rb_right;
+                else
+                        goto id_exists;
+        }
+        rb_link_node(&new->rb_node, parent, p);
+        rb_insert_color(&new->rb_node, root);
+        return;
+id_exists:
+        for (;;) {
+                new->id++;
+                if (new->id < minval || (new->id & mask) != new->id) {
+                        new->id = minval;
+                        break;
+                }
+                parent = rb_next(parent);
+                if (parent == NULL)
+                        break;
+                pos = rb_entry(parent, struct nfs_unique_id, rb_node);
+                if (new->id < pos->id)
+                        break;
+        }
+        goto retry;
+}
+static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
+{
+        rb_erase(&id->rb_node, root);
+}
 static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
+nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
+        struct nfs_client *clp = server->nfs_client;
+        struct rb_node **p = &clp->cl_state_owners.rb_node,
+                       *parent = NULL;
        struct nfs4_state_owner *sp, *res = NULL;
-        list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+        while (*p != NULL) {
-                if (sp->so_cred != cred)
+                parent = *p;
+                sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+                if (server < sp->so_server) {
+                        p = &parent->rb_left;
                        continue;
-                atomic_inc(&sp->so_count);
+                }
-                /* Move to the head of the list */
+                if (server > sp->so_server) {
-                list_move(&sp->so_list, &clp->cl_state_owners);
+                        p = &parent->rb_right;
-                res = sp;
+                        continue;
-                break;
+                }
+                if (cred < sp->so_cred)
+                        p = &parent->rb_left;
+                else if (cred > sp->so_cred)
+                        p = &parent->rb_right;
+                else {
+                        atomic_inc(&sp->so_count);
+                        res = sp;
+                        break;
+                }
        }
        return res;
 }
+static struct nfs4_state_owner *
+nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
+{
+        struct rb_node **p = &clp->cl_state_owners.rb_node,
+                       *parent = NULL;
+        struct nfs4_state_owner *sp;
+        while (*p != NULL) {
+                parent = *p;
+                sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+                if (new->so_server < sp->so_server) {
+                        p = &parent->rb_left;
+                        continue;
+                }
+                if (new->so_server > sp->so_server) {
+                        p = &parent->rb_right;
+                        continue;
+                }
+                if (new->so_cred < sp->so_cred)
+                        p = &parent->rb_left;
+                else if (new->so_cred > sp->so_cred)
+                        p = &parent->rb_right;
+                else {
+                        atomic_inc(&sp->so_count);
+                        return sp;
+                }
+        }
+        nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
+        rb_link_node(&new->so_client_node, parent, p);
+        rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
+        return new;
+}
+static void
+nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
+{
+        if (!RB_EMPTY_NODE(&sp->so_client_node))
+                rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+        nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
+}
 /*
 * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
 * create a new state_owner.
@@ -160,10 +257,14 @@ nfs4_alloc_state_owner(void)
 void
 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 {
-        struct nfs_client *clp = sp->so_client;
+        if (!RB_EMPTY_NODE(&sp->so_client_node)) {
-        spin_lock(&clp->cl_lock);
+                struct nfs_client *clp = sp->so_client;
-        list_del_init(&sp->so_list);
-        spin_unlock(&clp->cl_lock);
+                spin_lock(&clp->cl_lock);
+                rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+                RB_CLEAR_NODE(&sp->so_client_node);
+                spin_unlock(&clp->cl_lock);
+        }
 }
 /*
@@ -175,26 +276,25 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
        struct nfs_client *clp = server->nfs_client;
        struct nfs4_state_owner *sp, *new;
-        get_rpccred(cred);
-        new = nfs4_alloc_state_owner();
        spin_lock(&clp->cl_lock);
-        sp = nfs4_find_state_owner(clp, cred);
+        sp = nfs4_find_state_owner(server, cred);
-        if (sp == NULL)
-                sp = nfs4_client_grab_unused(clp, cred);
-        if (sp == NULL && new != NULL) {
-                list_add(&new->so_list, &clp->cl_state_owners);
-                new->so_client = clp;
-                new->so_id = nfs4_alloc_lockowner_id(clp);
-                new->so_cred = cred;
-                sp = new;
-                new = NULL;
-        }
        spin_unlock(&clp->cl_lock);
-        kfree(new);
        if (sp != NULL)
                return sp;
-        put_rpccred(cred);
+        new = nfs4_alloc_state_owner();
-        return NULL;
+        if (new == NULL)
+                return NULL;
+        new->so_client = clp;
+        new->so_server = server;
+        new->so_cred = cred;
+        spin_lock(&clp->cl_lock);
+        sp = nfs4_insert_state_owner(clp, new);
+        spin_unlock(&clp->cl_lock);
+        if (sp == new)
+                get_rpccred(cred);
+        else
+                kfree(new);
+        return sp;
 }
 /*
@@ -208,18 +308,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
        if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
                return;
-        if (clp->cl_nunused >= OPENOWNER_POOL_SIZE)
+        nfs4_remove_state_owner(clp, sp);
-                goto out_free;
-        if (list_empty(&sp->so_list))
-                goto out_free;
-        list_move(&sp->so_list, &clp->cl_unused);
-        clp->cl_nunused++;
-        spin_unlock(&clp->cl_lock);
-        put_rpccred(cred);
-        cred = NULL;
-        return;
-out_free:
-        list_del(&sp->so_list);
        spin_unlock(&clp->cl_lock);
        put_rpccred(cred);
        kfree(sp);
@@ -236,6 +325,7 @@ nfs4_alloc_open_state(void)
        atomic_set(&state->count, 1);
        INIT_LIST_HEAD(&state->lock_states);
        spin_lock_init(&state->state_lock);
+        seqlock_init(&state->seqlock);
        return state;
 }
@@ -263,13 +353,10 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
        struct nfs4_state *state;
        list_for_each_entry(state, &nfsi->open_states, inode_states) {
-                /* Is this in the process of being freed? */
+                if (state->owner != owner)
-                if (state->state == 0)
                        continue;
-                if (state->owner == owner) {
+                if (atomic_inc_not_zero(&state->count))
-                        atomic_inc(&state->count);
                        return state;
-                }
        }
        return NULL;
 }
@@ -341,16 +428,15 @@ void nfs4_put_open_state(struct nfs4_state *state)
 /*
 * Close the current file.
 */
-void nfs4_close_state(struct nfs4_state *state, mode_t mode)
+void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
 {
-        struct inode *inode = state->inode;
        struct nfs4_state_owner *owner = state->owner;
-        int oldstate, newstate = 0;
+        int call_close = 0;
+        int newstate;
        atomic_inc(&owner->so_count);
        /* Protect against nfs4_find_state() */
        spin_lock(&owner->so_lock);
-        spin_lock(&inode->i_lock);
        switch (mode & (FMODE_READ | FMODE_WRITE)) {
                case FMODE_READ:
                        state->n_rdonly--;
@@ -361,24 +447,29 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
                case FMODE_READ|FMODE_WRITE:
                        state->n_rdwr--;
        }
-        oldstate = newstate = state->state;
+        newstate = FMODE_READ|FMODE_WRITE;
        if (state->n_rdwr == 0) {
-                if (state->n_rdonly == 0)
+                if (state->n_rdonly == 0) {
                        newstate &= ~FMODE_READ;
-                if (state->n_wronly == 0)
+                        call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
+                        call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+                }
+                if (state->n_wronly == 0) {
                        newstate &= ~FMODE_WRITE;
+                        call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
+                        call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+                }
+                if (newstate == 0)
+                        clear_bit(NFS_DELEGATED_STATE, &state->flags);
        }
-        if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+        nfs4_state_set_mode_locked(state, newstate);
-                nfs4_state_set_mode_locked(state, newstate);
-                oldstate = newstate;
-        }
-        spin_unlock(&inode->i_lock);
        spin_unlock(&owner->so_lock);
-        if (oldstate != newstate && nfs4_do_close(inode, state) == 0)
+        if (!call_close) {
-                return;
+                nfs4_put_open_state(state);
-        nfs4_put_open_state(state);
+                nfs4_put_state_owner(owner);
-        nfs4_put_state_owner(owner);
+        } else
+                nfs4_do_close(path, state);
 }
 /*
@@ -415,12 +506,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
        atomic_set(&lsp->ls_count, 1);
        lsp->ls_owner = fl_owner;
        spin_lock(&clp->cl_lock);
-        lsp->ls_id = nfs4_alloc_lockowner_id(clp);
+        nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
        spin_unlock(&clp->cl_lock);
        INIT_LIST_HEAD(&lsp->ls_locks);
        return lsp;
 }
+static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
+{
+        struct nfs_client *clp = lsp->ls_state->owner->so_client;
+        spin_lock(&clp->cl_lock);
+        nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+        spin_unlock(&clp->cl_lock);
+        kfree(lsp);
+}
 /*
 * Return a compatible lock_state. If no initialized lock_state structure
 * exists, return an uninitialized one.
@@ -450,7 +551,8 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
                        return NULL;
        }
        spin_unlock(&state->state_lock);
-        kfree(new);
+        if (new != NULL)
+                nfs4_free_lock_state(new);
        return lsp;
 }
@@ -471,7 +573,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
        if (list_empty(&state->lock_states))
                clear_bit(LK_STATE_IN_USE, &state->flags);
        spin_unlock(&state->state_lock);
-        kfree(lsp);
+        nfs4_free_lock_state(lsp);
 }
 static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -513,8 +615,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
 void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
 {
        struct nfs4_lock_state *lsp;
+        int seq;
-        memcpy(dst, &state->stateid, sizeof(*dst));
+        do {
+                seq = read_seqbegin(&state->seqlock);
+                memcpy(dst, &state->stateid, sizeof(*dst));
+        } while (read_seqretry(&state->seqlock, seq));
        if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
                return;
@@ -557,12 +663,18 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
 * failed with a seqid incrementing error -
 * see comments nfs_fs.h:seqid_mutating_error()
 */
-static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
+static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
 {
        switch (status) {
                case 0:
                        break;
                case -NFS4ERR_BAD_SEQID:
+                        if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
+                                return;
+                        printk(KERN_WARNING "NFS: v4 server returned a bad"
+                                        "sequence-id error on an"
+                                        "unconfirmed sequence %p!\n",
+                                        seqid->sequence);
                case -NFS4ERR_STALE_CLIENTID:
                case -NFS4ERR_STALE_STATEID:
                case -NFS4ERR_BAD_STATEID:
@@ -586,7 +698,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
                                struct nfs4_state_owner, so_seqid);
                nfs4_drop_state_owner(sp);
        }
-        return nfs_increment_seqid(status, seqid);
+        nfs_increment_seqid(status, seqid);
 }
 /*
@@ -596,7 +708,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
 */
 void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
 {
-        return nfs_increment_seqid(status, seqid);
+        nfs_increment_seqid(status, seqid);
 }
 int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
@@ -748,15 +860,21 @@ out_err:
 static void nfs4_state_mark_reclaim(struct nfs_client *clp)
 {
        struct nfs4_state_owner *sp;
+        struct rb_node *pos;
        struct nfs4_state *state;
        struct nfs4_lock_state *lock;
        /* Reset all sequence ids to zero */
-        list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+        for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+                sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
                sp->so_seqid.counter = 0;
                sp->so_seqid.flags = 0;
                spin_lock(&sp->so_lock);
                list_for_each_entry(state, &sp->so_states, open_states) {
+                        clear_bit(NFS_DELEGATED_STATE, &state->flags);
+                        clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+                        clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+                        clear_bit(NFS_O_RDWR_STATE, &state->flags);
                        list_for_each_entry(lock, &state->lock_states, ls_locks) {
                                lock->ls_seqid.counter = 0;
                                lock->ls_seqid.flags = 0;
@@ -771,6 +889,7 @@ static int reclaimer(void *ptr)
 {
        struct nfs_client *clp = ptr;
        struct nfs4_state_owner *sp;
+        struct rb_node *pos;
        struct nfs4_state_recovery_ops *ops;
        struct rpc_cred *cred;
        int status = 0;
@@ -816,7 +935,8 @@ restart_loop:
        /* Mark all delegations for reclaim */
        nfs_delegation_mark_reclaim(clp);
        /* Note: list is protected by exclusive lock on cl->cl_sem */
-        list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+        for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+                sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
                status = nfs4_reclaim_open_state(ops, sp);
                if (status < 0) {
                        if (status == -NFS4ERR_NO_GRACE) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8003c91ccb9a..c08738441f73 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -68,9 +68,10 @@ static int nfs4_stat_to_errno(int);
 #endif
 /* lock,open owner id: 
- * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT  >> 2)
+ * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT  >> 2)
 */
-#define owner_id_maxsz          (1 + 1)
+#define open_owner_id_maxsz     (1 + 4)
+#define lock_owner_id_maxsz     (1 + 4)
 #define compound_encode_hdr_maxsz       (3 + (NFS4_MAXTAGLEN >> 2))
 #define compound_decode_hdr_maxsz       (3 + (NFS4_MAXTAGLEN >> 2))
 #define op_encode_hdr_maxsz     (1)
@@ -87,9 +88,11 @@ static int nfs4_stat_to_errno(int);
 #define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
 #define nfs4_name_maxsz         (1 + ((3 + NFS4_MAXNAMLEN) >> 2))
 #define nfs4_path_maxsz         (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
+#define nfs4_owner_maxsz        (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define nfs4_group_maxsz        (1 + XDR_QUADLEN(IDMAP_NAMESZ))
 /* This is based on getfattr, which uses the most attributes: */
 #define nfs4_fattr_value_maxsz  (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
-                                3 + 3 + 3 + 2 * nfs4_name_maxsz))
+                                3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
 #define nfs4_fattr_maxsz        (nfs4_fattr_bitmap_maxsz + \
                                nfs4_fattr_value_maxsz)
 #define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -116,8 +119,27 @@ static int nfs4_stat_to_errno(int);
                                3 + (NFS4_VERIFIER_SIZE >> 2))
 #define decode_setclientid_confirm_maxsz \
                                (op_decode_hdr_maxsz)
-#define encode_lookup_maxsz     (op_encode_hdr_maxsz + \
+#define encode_lookup_maxsz     (op_encode_hdr_maxsz + nfs4_name_maxsz)
-                                1 + ((3 + NFS4_FHSIZE) >> 2))
+#define decode_lookup_maxsz     (op_decode_hdr_maxsz)
+#define encode_share_access_maxsz \
+                                (2)
+#define encode_createmode_maxsz (1 + nfs4_fattr_maxsz)
+#define encode_opentype_maxsz   (1 + encode_createmode_maxsz)
+#define encode_claim_null_maxsz (1 + nfs4_name_maxsz)
+#define encode_open_maxsz       (op_encode_hdr_maxsz + \
+                                2 + encode_share_access_maxsz + 2 + \
+                                open_owner_id_maxsz + \
+                                encode_opentype_maxsz + \
+                                encode_claim_null_maxsz)
+#define decode_ace_maxsz        (3 + nfs4_owner_maxsz)
+#define decode_delegation_maxsz (1 + XDR_QUADLEN(NFS4_STATEID_SIZE) + 1 + \
+                                decode_ace_maxsz)
+#define decode_change_info_maxsz        (5)
+#define decode_open_maxsz       (op_decode_hdr_maxsz + \
+                                XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+                                decode_change_info_maxsz + 1 + \
+                                nfs4_fattr_bitmap_maxsz + \
+                                decode_delegation_maxsz)
 #define encode_remove_maxsz     (op_encode_hdr_maxsz + \
                                nfs4_name_maxsz)
 #define encode_rename_maxsz     (op_encode_hdr_maxsz + \
@@ -134,9 +156,15 @@ static int nfs4_stat_to_errno(int);
 #define encode_create_maxsz     (op_encode_hdr_maxsz + \
                                2 + nfs4_name_maxsz + \
                                nfs4_fattr_maxsz)
-#define decode_create_maxsz     (op_decode_hdr_maxsz + 8)
+#define decode_create_maxsz     (op_decode_hdr_maxsz + \
+                                decode_change_info_maxsz + \
+                                nfs4_fattr_bitmap_maxsz)
 #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
 #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
+#define encode_fs_locations_maxsz \
+                                (encode_getattr_maxsz)
+#define decode_fs_locations_maxsz \
+                                (0)
 #define NFS4_enc_compound_sz    (1024)  /* XXX: large enough? */
 #define NFS4_dec_compound_sz    (1024)  /* XXX: large enough? */
 #define NFS4_enc_read_sz        (compound_encode_hdr_maxsz + \
@@ -174,16 +202,21 @@ static int nfs4_stat_to_errno(int);
                                op_decode_hdr_maxsz + 2 + \
                                decode_getattr_maxsz)
 #define NFS4_enc_open_sz        (compound_encode_hdr_maxsz + \
-                                encode_putfh_maxsz + \
+                                encode_putfh_maxsz + \
-                                op_encode_hdr_maxsz + \
+                                encode_savefh_maxsz + \
-                                13 + 3 + 2 + 64 + \
+                                encode_open_maxsz + \
-                                encode_getattr_maxsz + \
+                                encode_getfh_maxsz + \
-                                encode_getfh_maxsz)
+                                encode_getattr_maxsz + \
+                                encode_restorefh_maxsz + \
+                                encode_getattr_maxsz)
 #define NFS4_dec_open_sz        (compound_decode_hdr_maxsz + \
-                                decode_putfh_maxsz + \
+                                decode_putfh_maxsz + \
-                                op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \
+                                decode_savefh_maxsz + \
-                                decode_getattr_maxsz + \
+                                decode_open_maxsz + \
-                                decode_getfh_maxsz)
+                                decode_getfh_maxsz + \
+                                decode_getattr_maxsz + \
+                                decode_restorefh_maxsz + \
+                                decode_getattr_maxsz)
 #define NFS4_enc_open_confirm_sz      \
                                (compound_encode_hdr_maxsz + \
                                encode_putfh_maxsz + \
@@ -193,12 +226,12 @@ static int nfs4_stat_to_errno(int);
                                        op_decode_hdr_maxsz + 4)
 #define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \
                                        encode_putfh_maxsz + \
-                                        op_encode_hdr_maxsz + \
+                                        encode_open_maxsz + \
-                                        11)
+                                        encode_getattr_maxsz)
 #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \
                                        decode_putfh_maxsz + \
-                                        op_decode_hdr_maxsz + \
+                                        decode_open_maxsz + \
-                                        4 + 5 + 2 + 3)
+                                        decode_getattr_maxsz)
 #define NFS4_enc_open_downgrade_sz \
                                (compound_encode_hdr_maxsz + \
                                encode_putfh_maxsz + \
@@ -256,19 +289,19 @@ static int nfs4_stat_to_errno(int);
                                op_encode_hdr_maxsz + \
                                1 + 1 + 2 + 2 + \
                                1 + 4 + 1 + 2 + \
-                                owner_id_maxsz)
+                                lock_owner_id_maxsz)
 #define NFS4_dec_lock_sz        (compound_decode_hdr_maxsz + \
                                decode_putfh_maxsz + \
                                decode_getattr_maxsz + \
                                op_decode_hdr_maxsz + \
                                2 + 2 + 1 + 2 + \
-                                owner_id_maxsz)
+                                lock_owner_id_maxsz)
 #define NFS4_enc_lockt_sz       (compound_encode_hdr_maxsz + \
                                encode_putfh_maxsz + \
                                encode_getattr_maxsz + \
                                op_encode_hdr_maxsz + \
                                1 + 2 + 2 + 2 + \
-                                owner_id_maxsz)
+                                lock_owner_id_maxsz)
 #define NFS4_dec_lockt_sz       (NFS4_dec_lock_sz)
 #define NFS4_enc_locku_sz       (compound_encode_hdr_maxsz + \
                                encode_putfh_maxsz + \
@@ -298,7 +331,7 @@ static int nfs4_stat_to_errno(int);
                                encode_getfh_maxsz)
 #define NFS4_dec_lookup_sz      (compound_decode_hdr_maxsz + \
                                decode_putfh_maxsz + \
-                                op_decode_hdr_maxsz + \
+                                decode_lookup_maxsz + \
                                decode_getattr_maxsz + \
                                decode_getfh_maxsz)
 #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
@@ -417,12 +450,13 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_enc_fs_locations_sz \
                                (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
-                                 encode_getattr_maxsz)
+                                 encode_lookup_maxsz + \
+                                 encode_fs_locations_maxsz)
 #define NFS4_dec_fs_locations_sz \
                                (compound_decode_hdr_maxsz + \
                                 decode_putfh_maxsz + \
-                                 op_decode_hdr_maxsz + \
+                                 decode_lookup_maxsz + \
-                                 nfs4_fattr_bitmap_maxsz)
+                                 decode_fs_locations_maxsz)
 static struct {
        unsigned int    mode;
@@ -793,13 +827,14 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
        WRITE64(nfs4_lock_length(args->fl));
        WRITE32(args->new_lock_owner);
        if (args->new_lock_owner){
-                RESERVE_SPACE(4+NFS4_STATEID_SIZE+20);
+                RESERVE_SPACE(4+NFS4_STATEID_SIZE+32);
                WRITE32(args->open_seqid->sequence->counter);
                WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE);
                WRITE32(args->lock_seqid->sequence->counter);
                WRITE64(args->lock_owner.clientid);
-                WRITE32(4);
+                WRITE32(16);
-                WRITE32(args->lock_owner.id);
+                WRITEMEM("lock id:", 8);
+                WRITE64(args->lock_owner.id);
        }
        else {
                RESERVE_SPACE(NFS4_STATEID_SIZE+4);
@@ -814,14 +849,15 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg
 {
        __be32 *p;
-        RESERVE_SPACE(40);
+        RESERVE_SPACE(52);
        WRITE32(OP_LOCKT);
        WRITE32(nfs4_lock_type(args->fl, 0));
        WRITE64(args->fl->fl_start);
        WRITE64(nfs4_lock_length(args->fl));
        WRITE64(args->lock_owner.clientid);
-        WRITE32(4);
+        WRITE32(16);
-        WRITE32(args->lock_owner.id);
+        WRITEMEM("lock id:", 8);
+        WRITE64(args->lock_owner.id);
        return 0;
 }
@@ -886,10 +922,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
        WRITE32(OP_OPEN);
        WRITE32(arg->seqid->sequence->counter);
        encode_share_access(xdr, arg->open_flags);
-        RESERVE_SPACE(16);
+        RESERVE_SPACE(28);
        WRITE64(arg->clientid);
-        WRITE32(4);
+        WRITE32(16);
-        WRITE32(arg->id);
+        WRITEMEM("open id:", 8);
+        WRITE64(arg->id);
 }
 static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1071,7 +1108,7 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
 static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
 {
-        struct rpc_auth *auth = req->rq_task->tk_auth;
+        struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
        uint32_t attrs[2] = {
                FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
                FATTR4_WORD1_MOUNTED_ON_FILEID,
@@ -1117,7 +1154,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req)
 {
-        struct rpc_auth *auth = req->rq_task->tk_auth;
+        struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
        unsigned int replen;
        __be32 *p;
@@ -1735,7 +1772,7 @@ out:
 */
 static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-        struct rpc_auth *auth = req->rq_task->tk_auth;
+        struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
        struct xdr_stream xdr;
        struct compound_hdr hdr = {
                .nops = 2,
@@ -1795,7 +1832,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
                struct nfs_getaclargs *args)
 {
        struct xdr_stream xdr;
-        struct rpc_auth *auth = req->rq_task->tk_auth;
+        struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
        struct compound_hdr hdr = {
                .nops   = 2,
        };
@@ -2030,7 +2067,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
        struct compound_hdr hdr = {
                .nops = 3,
        };
-        struct rpc_auth *auth = req->rq_task->tk_auth;
+        struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
        int replen;
        int status;
@@ -3269,7 +3306,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
 static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
 {
        __be32 *p;
-        uint32_t bmlen;
+        uint32_t savewords, bmlen, i;
        int status;
        status = decode_op_hdr(xdr, OP_OPEN);
@@ -3287,7 +3324,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
                goto xdr_error;
        READ_BUF(bmlen << 2);
-        p += bmlen;
+        savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
+        for (i = 0; i < savewords; ++i)
+                READ32(res->attrset[i]);
+        for (; i < NFS4_BITMAP_SIZE; i++)
+                res->attrset[i] = 0;
        return decode_delegation(xdr, res);
 xdr_error:
        dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 49d1008ce1d7..3490322d1145 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto)
        printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
                program, version, NIPQUAD(servaddr));
        set_sockaddr(&sin, servaddr, 0);
-        return rpcb_getport_external(&sin, program, version, proto);
+        return rpcb_getport_sync(&sin, program, version, proto);
 }
@@ -496,7 +496,8 @@ static int __init root_nfs_get_handle(void)
                                        NFS_MNT3_VERSION : NFS_MNT_VERSION;
        set_sockaddr(&sin, servaddr, htons(mount_port));
-        status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol);
+        status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL,
+                           nfs_path, version, protocol, &fh);
        if (status < 0)
                printk(KERN_ERR "Root-NFS: Server returned error %d "
                                "while mounting %s\n", status, nfs_path);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index c5bb51a29e80..f56dae5216f4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,9 +85,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
        req->wb_offset  = offset;
        req->wb_pgbase  = offset;
        req->wb_bytes   = count;
-        atomic_set(&req->wb_count, 1);
        req->wb_context = get_nfs_open_context(ctx);
+        kref_init(&req->wb_kref);
        return req;
 }
@@ -109,30 +108,31 @@ void nfs_unlock_request(struct nfs_page *req)
 }
 /**
- * nfs_set_page_writeback_locked - Lock a request for writeback
+ * nfs_set_page_tag_locked - Tag a request as locked
 * @req:
 */
-int nfs_set_page_writeback_locked(struct nfs_page *req)
+static int nfs_set_page_tag_locked(struct nfs_page *req)
 {
-        struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+        struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
        if (!nfs_lock_request(req))
                return 0;
-        radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+        radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
        return 1;
 }
 /**
- * nfs_clear_page_writeback - Unlock request and wake up sleepers
+ * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
 */
-void nfs_clear_page_writeback(struct nfs_page *req)
+void nfs_clear_page_tag_locked(struct nfs_page *req)
 {
-        struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+        struct inode *inode = req->wb_context->path.dentry->d_inode;
+        struct nfs_inode *nfsi = NFS_I(inode);
        if (req->wb_page != NULL) {
-                spin_lock(&nfsi->req_lock);
+                spin_lock(&inode->i_lock);
-                radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+                radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
-                spin_unlock(&nfsi->req_lock);
+                spin_unlock(&inode->i_lock);
        }
        nfs_unlock_request(req);
 }
@@ -160,11 +160,9 @@ void nfs_clear_request(struct nfs_page *req)
 *
 * Note: Should never be called with the spinlock held!
 */
-void
+static void nfs_free_request(struct kref *kref)
-nfs_release_request(struct nfs_page *req)
 {
-        if (!atomic_dec_and_test(&req->wb_count))
+        struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
-                return;
        /* Release struct file or cached credential */
        nfs_clear_request(req);
@@ -172,6 +170,11 @@ nfs_release_request(struct nfs_page *req)
        nfs_page_free(req);
 }
+void nfs_release_request(struct nfs_page *req)
+{
+        kref_put(&req->wb_kref, nfs_free_request);
+}
 static int nfs_wait_bit_interruptible(void *word)
 {
        int ret = 0;
@@ -193,7 +196,7 @@ static int nfs_wait_bit_interruptible(void *word)
 int
 nfs_wait_on_request(struct nfs_page *req)
 {
-        struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode);
+        struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
        sigset_t oldmask;
        int ret = 0;
@@ -379,20 +382,20 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
 /**
 * nfs_scan_list - Scan a list for matching requests
 * @nfsi: NFS inode
- * @head: One of the NFS inode request lists
 * @dst: Destination list
 * @idx_start: lower bound of page->index to scan
 * @npages: idx_start + npages sets the upper bound to scan.
+ * @tag: tag to scan for
 *
 * Moves elements from one of the inode request lists.
 * If the number of requests is set to 0, the entire address_space
 * starting at index idx_start, is scanned.
 * The requests are *not* checked to ensure that they form a contiguous set.
- * You must be holding the inode's req_lock when calling this function
+ * You must be holding the inode's i_lock when calling this function
 */
-int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
+int nfs_scan_list(struct nfs_inode *nfsi,
                struct list_head *dst, pgoff_t idx_start,
-                unsigned int npages)
+                unsigned int npages, int tag)
 {
        struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
        struct nfs_page *req;
@@ -407,9 +410,9 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
                idx_end = idx_start + npages - 1;
        for (;;) {
-                found = radix_tree_gang_lookup(&nfsi->nfs_page_tree,
+                found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
                                (void **)&pgvec[0], idx_start,
-                                NFS_SCAN_MAXENTRIES);
+                                NFS_SCAN_MAXENTRIES, tag);
                if (found <= 0)
                        break;
                for (i = 0; i < found; i++) {
@@ -417,15 +420,18 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
                        if (req->wb_index > idx_end)
                                goto out;
                        idx_start = req->wb_index + 1;
-                        if (req->wb_list_head != head)
+                        if (nfs_set_page_tag_locked(req)) {
-                                continue;
-                        if (nfs_set_page_writeback_locked(req)) {
                                nfs_list_remove_request(req);
+                                radix_tree_tag_clear(&nfsi->nfs_page_tree,
+                                                req->wb_index, tag);
                                nfs_list_add_request(req, dst);
                                res++;
+                                if (res == INT_MAX)
+                                        goto out;
                        }
                }
+                /* for latency reduction */
+                cond_resched_lock(&nfsi->vfs_inode.i_lock);
        }
 out:
        return res;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7bd7cb95c034..6ae2e58ed05a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -145,8 +145,8 @@ static void nfs_readpage_release(struct nfs_page *req)
        unlock_page(req->wb_page);
        dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
-                        req->wb_context->dentry->d_inode->i_sb->s_id,
+                        req->wb_context->path.dentry->d_inode->i_sb->s_id,
-                        (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+                        (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
                        req->wb_bytes,
                        (long long)req_offset(req));
        nfs_clear_request(req);
@@ -164,7 +164,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
        int flags;
        data->req         = req;
-        data->inode       = inode = req->wb_context->dentry->d_inode;
+        data->inode       = inode = req->wb_context->path.dentry->d_inode;
        data->cred        = req->wb_context->cred;
        data->args.fh     = NFS_FH(inode);
@@ -483,17 +483,19 @@ int nfs_readpage(struct file *file, struct page *page)
         */
        error = nfs_wb_page(inode, page);
        if (error)
-                goto out_error;
+                goto out_unlock;
+        if (PageUptodate(page))
+                goto out_unlock;
        error = -ESTALE;
        if (NFS_STALE(inode))
-                goto out_error;
+                goto out_unlock;
        if (file == NULL) {
                error = -EBADF;
                ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
                if (ctx == NULL)
-                        goto out_error;
+                        goto out_unlock;
        } else
                ctx = get_nfs_open_context((struct nfs_open_context *)
                                file->private_data);
@@ -502,8 +504,7 @@ int nfs_readpage(struct file *file, struct page *page)
        put_nfs_open_context(ctx);
        return error;
+out_unlock:
-out_error:
        unlock_page(page);
        return error;
 }
@@ -520,21 +521,32 @@ readpage_async_filler(void *data, struct page *page)
        struct inode *inode = page->mapping->host;
        struct nfs_page *new;
        unsigned int len;
+        int error;
+        error = nfs_wb_page(inode, page);
+        if (error)
+                goto out_unlock;
+        if (PageUptodate(page))
+                goto out_unlock;
-        nfs_wb_page(inode, page);
        len = nfs_page_length(page);
        if (len == 0)
                return nfs_return_empty_page(page);
        new = nfs_create_request(desc->ctx, inode, page, 0, len);
-        if (IS_ERR(new)) {
+        if (IS_ERR(new))
-                        SetPageError(page);
+                goto out_error;
-                        unlock_page(page);
-                        return PTR_ERR(new);
-        }
        if (len < PAGE_CACHE_SIZE)
                zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
        nfs_pageio_add_request(desc->pgio, new);
        return 0;
+out_error:
+        error = PTR_ERR(new);
+        SetPageError(page);
+out_unlock:
+        unlock_page(page);
+        return error;
 }
 int nfs_readpages(struct file *filp, struct address_space *mapping,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ca20d3cc2609..a2b1af89ca1a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -45,6 +45,7 @@
 #include <linux/inet.h>
 #include <linux/nfs_xdr.h>
 #include <linux/magic.h>
+#include <linux/parser.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -57,6 +58,167 @@
 #define NFSDBG_FACILITY         NFSDBG_VFS
+struct nfs_parsed_mount_data {
+        int                     flags;
+        int                     rsize, wsize;
+        int                     timeo, retrans;
+        int                     acregmin, acregmax,
+                                acdirmin, acdirmax;
+        int                     namlen;
+        unsigned int            bsize;
+        unsigned int            auth_flavor_len;
+        rpc_authflavor_t        auth_flavors[1];
+        char                    *client_address;
+        struct {
+                struct sockaddr_in      address;
+                unsigned int            program;
+                unsigned int            version;
+                unsigned short          port;
+                int                     protocol;
+        } mount_server;
+        struct {
+                struct sockaddr_in      address;
+                char                    *hostname;
+                char                    *export_path;
+                unsigned int            program;
+                int                     protocol;
+        } nfs_server;
+};
+enum {
+        /* Mount options that take no arguments */
+        Opt_soft, Opt_hard,
+        Opt_intr, Opt_nointr,
+        Opt_posix, Opt_noposix,
+        Opt_cto, Opt_nocto,
+        Opt_ac, Opt_noac,
+        Opt_lock, Opt_nolock,
+        Opt_v2, Opt_v3,
+        Opt_udp, Opt_tcp,
+        Opt_acl, Opt_noacl,
+        Opt_rdirplus, Opt_nordirplus,
+        Opt_sharecache, Opt_nosharecache,
+        /* Mount options that take integer arguments */
+        Opt_port,
+        Opt_rsize, Opt_wsize, Opt_bsize,
+        Opt_timeo, Opt_retrans,
+        Opt_acregmin, Opt_acregmax,
+        Opt_acdirmin, Opt_acdirmax,
+        Opt_actimeo,
+        Opt_namelen,
+        Opt_mountport,
+        Opt_mountprog, Opt_mountvers,
+        Opt_nfsprog, Opt_nfsvers,
+        /* Mount options that take string arguments */
+        Opt_sec, Opt_proto, Opt_mountproto,
+        Opt_addr, Opt_mounthost, Opt_clientaddr,
+        /* Mount options that are ignored */
+        Opt_userspace, Opt_deprecated,
+        Opt_err
+};
+static match_table_t nfs_mount_option_tokens = {
+        { Opt_userspace, "bg" },
+        { Opt_userspace, "fg" },
+        { Opt_soft, "soft" },
+        { Opt_hard, "hard" },
+        { Opt_intr, "intr" },
+        { Opt_nointr, "nointr" },
+        { Opt_posix, "posix" },
+        { Opt_noposix, "noposix" },
+        { Opt_cto, "cto" },
+        { Opt_nocto, "nocto" },
+        { Opt_ac, "ac" },
+        { Opt_noac, "noac" },
+        { Opt_lock, "lock" },
+        { Opt_nolock, "nolock" },
+        { Opt_v2, "v2" },
+        { Opt_v3, "v3" },
+        { Opt_udp, "udp" },
+        { Opt_tcp, "tcp" },
+        { Opt_acl, "acl" },
+        { Opt_noacl, "noacl" },
+        { Opt_rdirplus, "rdirplus" },
+        { Opt_nordirplus, "nordirplus" },
+        { Opt_sharecache, "sharecache" },
+        { Opt_nosharecache, "nosharecache" },
+        { Opt_port, "port=%u" },
+        { Opt_rsize, "rsize=%u" },
+        { Opt_wsize, "wsize=%u" },
+        { Opt_bsize, "bsize=%u" },
+        { Opt_timeo, "timeo=%u" },
+        { Opt_retrans, "retrans=%u" },
+        { Opt_acregmin, "acregmin=%u" },
+        { Opt_acregmax, "acregmax=%u" },
+        { Opt_acdirmin, "acdirmin=%u" },
+        { Opt_acdirmax, "acdirmax=%u" },
+        { Opt_actimeo, "actimeo=%u" },
+        { Opt_userspace, "retry=%u" },
+        { Opt_namelen, "namlen=%u" },
+        { Opt_mountport, "mountport=%u" },
+        { Opt_mountprog, "mountprog=%u" },
+        { Opt_mountvers, "mountvers=%u" },
+        { Opt_nfsprog, "nfsprog=%u" },
+        { Opt_nfsvers, "nfsvers=%u" },
+        { Opt_nfsvers, "vers=%u" },
+        { Opt_sec, "sec=%s" },
+        { Opt_proto, "proto=%s" },
+        { Opt_mountproto, "mountproto=%s" },
+        { Opt_addr, "addr=%s" },
+        { Opt_clientaddr, "clientaddr=%s" },
+        { Opt_mounthost, "mounthost=%s" },
+        { Opt_err, NULL }
+};
+enum {
+        Opt_xprt_udp, Opt_xprt_tcp,
+        Opt_xprt_err
+};
+static match_table_t nfs_xprt_protocol_tokens = {
+        { Opt_xprt_udp, "udp" },
+        { Opt_xprt_tcp, "tcp" },
+        { Opt_xprt_err, NULL }
+};
+enum {
+        Opt_sec_none, Opt_sec_sys,
+        Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
+        Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp,
+        Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp,
+        Opt_sec_err
+};
+static match_table_t nfs_secflavor_tokens = {
+        { Opt_sec_none, "none" },
+        { Opt_sec_none, "null" },
+        { Opt_sec_sys, "sys" },
+        { Opt_sec_krb5, "krb5" },
+        { Opt_sec_krb5i, "krb5i" },
+        { Opt_sec_krb5p, "krb5p" },
+        { Opt_sec_lkey, "lkey" },
+        { Opt_sec_lkeyi, "lkeyi" },
+        { Opt_sec_lkeyp, "lkeyp" },
+        { Opt_sec_err, NULL }
+};
 static void nfs_umount_begin(struct vfsmount *, int);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
@@ -263,11 +425,11 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
                { RPC_AUTH_GSS_SPKM, "spkm" },
                { RPC_AUTH_GSS_SPKMI, "spkmi" },
                { RPC_AUTH_GSS_SPKMP, "spkmp" },
-                { -1, "unknown" }
+                { UINT_MAX, "unknown" }
        };
        int i;
-        for (i=0; sec_flavours[i].flavour != -1; i++) {
+        for (i = 0; sec_flavours[i].flavour != UINT_MAX; i++) {
                if (sec_flavours[i].flavour == flavour)
                        break;
        }
@@ -291,6 +453,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
                { NFS_MOUNT_NONLM, ",nolock", "" },
                { NFS_MOUNT_NOACL, ",noacl", "" },
                { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
+                { NFS_MOUNT_UNSHARED, ",nosharecache", ""},
                { 0, NULL, NULL }
        };
        const struct proc_nfs_info *nfs_infop;
@@ -430,87 +593,641 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 */
 static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
 {
+        struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb);
+        struct rpc_clnt *rpc;
        shrink_submounts(vfsmnt, &nfs_automount_list);
+        if (!(flags & MNT_FORCE))
+                return;
+        /* -EIO all pending I/O */
+        rpc = server->client_acl;
+        if (!IS_ERR(rpc))
+                rpc_killall_tasks(rpc);
+        rpc = server->client;
+        if (!IS_ERR(rpc))
+                rpc_killall_tasks(rpc);
 }
 /*
- * Validate the NFS2/NFS3 mount data
+ * Sanity-check a server address provided by the mount command
- * - fills in the mount root filehandle
 */
-static int nfs_validate_mount_data(struct nfs_mount_data *data,
+static int nfs_verify_server_address(struct sockaddr *addr)
-                                   struct nfs_fh *mntfh)
 {
-        if (data == NULL) {
+        switch (addr->sa_family) {
-                dprintk("%s: missing data argument\n", __FUNCTION__);
+        case AF_INET: {
-                return -EINVAL;
+                struct sockaddr_in *sa = (struct sockaddr_in *) addr;
+                if (sa->sin_addr.s_addr != INADDR_ANY)
+                        return 1;
+                break;
+        }
        }
-        if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
+        return 0;
-                dprintk("%s: bad mount version\n", __FUNCTION__);
+}
-                return -EINVAL;
+/*
+ * Error-check and convert a string of mount options from user space into
+ * a data structure
+ */
+static int nfs_parse_mount_options(char *raw,
+                                   struct nfs_parsed_mount_data *mnt)
+{
+        char *p, *string;
+        if (!raw) {
+                dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
+                return 1;
        }
+        dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
-        switch (data->version) {
+        while ((p = strsep(&raw, ",")) != NULL) {
-                case 1:
+                substring_t args[MAX_OPT_ARGS];
-                        data->namlen = 0;
+                int option, token;
-                case 2:
-                        data->bsize  = 0;
+                if (!*p)
-                case 3:
+                        continue;
-                        if (data->flags & NFS_MOUNT_VER3) {
-                                dprintk("%s: mount structure version %d does not support NFSv3\n",
+                dfprintk(MOUNT, "NFS:   parsing nfs mount option '%s'\n", p);
-                                                __FUNCTION__,
-                                                data->version);
+                token = match_token(p, nfs_mount_option_tokens, args);
-                                return -EINVAL;
+                switch (token) {
+                case Opt_soft:
+                        mnt->flags |= NFS_MOUNT_SOFT;
+                        break;
+                case Opt_hard:
+                        mnt->flags &= ~NFS_MOUNT_SOFT;
+                        break;
+                case Opt_intr:
+                        mnt->flags |= NFS_MOUNT_INTR;
+                        break;
+                case Opt_nointr:
+                        mnt->flags &= ~NFS_MOUNT_INTR;
+                        break;
+                case Opt_posix:
+                        mnt->flags |= NFS_MOUNT_POSIX;
+                        break;
+                case Opt_noposix:
+                        mnt->flags &= ~NFS_MOUNT_POSIX;
+                        break;
+                case Opt_cto:
+                        mnt->flags &= ~NFS_MOUNT_NOCTO;
+                        break;
+                case Opt_nocto:
+                        mnt->flags |= NFS_MOUNT_NOCTO;
+                        break;
+                case Opt_ac:
+                        mnt->flags &= ~NFS_MOUNT_NOAC;
+                        break;
+                case Opt_noac:
+                        mnt->flags |= NFS_MOUNT_NOAC;
+                        break;
+                case Opt_lock:
+                        mnt->flags &= ~NFS_MOUNT_NONLM;
+                        break;
+                case Opt_nolock:
+                        mnt->flags |= NFS_MOUNT_NONLM;
+                        break;
+                case Opt_v2:
+                        mnt->flags &= ~NFS_MOUNT_VER3;
+                        break;
+                case Opt_v3:
+                        mnt->flags |= NFS_MOUNT_VER3;
+                        break;
+                case Opt_udp:
+                        mnt->flags &= ~NFS_MOUNT_TCP;
+                        mnt->nfs_server.protocol = IPPROTO_UDP;
+                        mnt->timeo = 7;
+                        mnt->retrans = 5;
+                        break;
+                case Opt_tcp:
+                        mnt->flags |= NFS_MOUNT_TCP;
+                        mnt->nfs_server.protocol = IPPROTO_TCP;
+                        mnt->timeo = 600;
+                        mnt->retrans = 2;
+                        break;
+                case Opt_acl:
+                        mnt->flags &= ~NFS_MOUNT_NOACL;
+                        break;
+                case Opt_noacl:
+                        mnt->flags |= NFS_MOUNT_NOACL;
+                        break;
+                case Opt_rdirplus:
+                        mnt->flags &= ~NFS_MOUNT_NORDIRPLUS;
+                        break;
+                case Opt_nordirplus:
+                        mnt->flags |= NFS_MOUNT_NORDIRPLUS;
+                        break;
+                case Opt_sharecache:
+                        mnt->flags &= ~NFS_MOUNT_UNSHARED;
+                        break;
+                case Opt_nosharecache:
+                        mnt->flags |= NFS_MOUNT_UNSHARED;
+                        break;
+                case Opt_port:
+                        if (match_int(args, &option))
+                                return 0;
+                        if (option < 0 || option > 65535)
+                                return 0;
+                        mnt->nfs_server.address.sin_port = htonl(option);
+                        break;
+                case Opt_rsize:
+                        if (match_int(args, &mnt->rsize))
+                                return 0;
+                        break;
+                case Opt_wsize:
+                        if (match_int(args, &mnt->wsize))
+                                return 0;
+                        break;
+                case Opt_bsize:
+                        if (match_int(args, &option))
+                                return 0;
+                        if (option < 0)
+                                return 0;
+                        mnt->bsize = option;
+                        break;
+                case Opt_timeo:
+                        if (match_int(args, &mnt->timeo))
+                                return 0;
+                        break;
+                case Opt_retrans:
+                        if (match_int(args, &mnt->retrans))
+                                return 0;
+                        break;
+                case Opt_acregmin:
+                        if (match_int(args, &mnt->acregmin))
+                                return 0;
+                        break;
+                case Opt_acregmax:
+                        if (match_int(args, &mnt->acregmax))
+                                return 0;
+                        break;
+                case Opt_acdirmin:
+                        if (match_int(args, &mnt->acdirmin))
+                                return 0;
+                        break;
+                case Opt_acdirmax:
+                        if (match_int(args, &mnt->acdirmax))
+                                return 0;
+                        break;
+                case Opt_actimeo:
+                        if (match_int(args, &option))
+                                return 0;
+                        if (option < 0)
+                                return 0;
+                        mnt->acregmin =
+                        mnt->acregmax =
+                        mnt->acdirmin =
+                        mnt->acdirmax = option;
+                        break;
+                case Opt_namelen:
+                        if (match_int(args, &mnt->namlen))
+                                return 0;
+                        break;
+                case Opt_mountport:
+                        if (match_int(args, &option))
+                                return 0;
+                        if (option < 0 || option > 65535)
+                                return 0;
+                        mnt->mount_server.port = option;
+                        break;
+                case Opt_mountprog:
+                        if (match_int(args, &option))
+                                return 0;
+                        if (option < 0)
+                                return 0;
+                        mnt->mount_server.program = option;
+                        break;
+                case Opt_mountvers:
+                        if (match_int(args, &option))
+                                return 0;
+                        if (option < 0)
+                                return 0;
+                        mnt->mount_server.version = option;
+                        break;
+                case Opt_nfsprog:
+                        if (match_int(args, &option))
+                                return 0;
+                        if (option < 0)
+                                return 0;
+                        mnt->nfs_server.program = option;
+                        break;
+                case Opt_nfsvers:
+                        if (match_int(args, &option))
+                                return 0;
+                        switch (option) {
+                        case 2:
+                                mnt->flags &= ~NFS_MOUNT_VER3;
+                                break;
+                        case 3:
+                                mnt->flags |= NFS_MOUNT_VER3;
+                                break;
+                        default:
+                                goto out_unrec_vers;
                        }
-                        data->root.size = NFS2_FHSIZE;
+                        break;
-                        memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
-                case 4:
+                case Opt_sec:
-                        if (data->flags & NFS_MOUNT_SECFLAVOUR) {
+                        string = match_strdup(args);
-                                dprintk("%s: mount structure version %d does not support strong security\n",
+                        if (string == NULL)
-                                                __FUNCTION__,
+                                goto out_nomem;
-                                                data->version);
+                        token = match_token(string, nfs_secflavor_tokens, args);
-                                return -EINVAL;
+                        kfree(string);
+                        /*
+                         * The flags setting is for v2/v3.  The flavor_len
+                         * setting is for v4.  v2/v3 also need to know the
+                         * difference between NULL and UNIX.
+                         */
+                        switch (token) {
+                        case Opt_sec_none:
+                                mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
+                                mnt->auth_flavor_len = 0;
+                                mnt->auth_flavors[0] = RPC_AUTH_NULL;
+                                break;
+                        case Opt_sec_sys:
+                                mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
+                                mnt->auth_flavor_len = 0;
+                                mnt->auth_flavors[0] = RPC_AUTH_UNIX;
+                                break;
+                        case Opt_sec_krb5:
+                                mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+                                mnt->auth_flavor_len = 1;
+                                mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
+                                break;
+                        case Opt_sec_krb5i:
+                                mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+                                mnt->auth_flavor_len = 1;
+                                mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
+                                break;
+                        case Opt_sec_krb5p:
+                                mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+                                mnt->auth_flavor_len = 1;
+                                mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
+                                break;
+                        case Opt_sec_lkey:
+                                mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+                                mnt->auth_flavor_len = 1;
+                                mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
+                                break;
+                        case Opt_sec_lkeyi:
+                                mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+                                mnt->auth_flavor_len = 1;
+                                mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
+                                break;
+                        case Opt_sec_lkeyp:
+                                mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+                                mnt->auth_flavor_len = 1;
+                                mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
+                                break;
+                        case Opt_sec_spkm:
+                                mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+                                mnt->auth_flavor_len = 1;
+                                mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
+                                break;
+                        case Opt_sec_spkmi:
+                                mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+                                mnt->auth_flavor_len = 1;
+                                mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
+                                break;
+                        case Opt_sec_spkmp:
+                                mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+                                mnt->auth_flavor_len = 1;
+                                mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
+                                break;
+                        default:
+                                goto out_unrec_sec;
                        }
-                case 5:
+                        break;
-                        memset(data->context, 0, sizeof(data->context));
+                case Opt_proto:
-        }
+                        string = match_strdup(args);
+                        if (string == NULL)
+                                goto out_nomem;
+                        token = match_token(string,
+                                            nfs_xprt_protocol_tokens, args);
+                        kfree(string);
+                        switch (token) {
+                        case Opt_udp:
+                                mnt->flags &= ~NFS_MOUNT_TCP;
+                                mnt->nfs_server.protocol = IPPROTO_UDP;
+                                mnt->timeo = 7;
+                                mnt->retrans = 5;
+                                break;
+                        case Opt_tcp:
+                                mnt->flags |= NFS_MOUNT_TCP;
+                                mnt->nfs_server.protocol = IPPROTO_TCP;
+                                mnt->timeo = 600;
+                                mnt->retrans = 2;
+                                break;
+                        default:
+                                goto out_unrec_xprt;
+                        }
+                        break;
+                case Opt_mountproto:
+                        string = match_strdup(args);
+                        if (string == NULL)
+                                goto out_nomem;
+                        token = match_token(string,
+                                            nfs_xprt_protocol_tokens, args);
+                        kfree(string);
+                        switch (token) {
+                        case Opt_udp:
+                                mnt->mount_server.protocol = IPPROTO_UDP;
+                                break;
+                        case Opt_tcp:
+                                mnt->mount_server.protocol = IPPROTO_TCP;
+                                break;
+                        default:
+                                goto out_unrec_xprt;
+                        }
+                        break;
+                case Opt_addr:
+                        string = match_strdup(args);
+                        if (string == NULL)
+                                goto out_nomem;
+                        mnt->nfs_server.address.sin_family = AF_INET;
+                        mnt->nfs_server.address.sin_addr.s_addr =
+                                                        in_aton(string);
+                        kfree(string);
+                        break;
+                case Opt_clientaddr:
+                        string = match_strdup(args);
+                        if (string == NULL)
+                                goto out_nomem;
+                        mnt->client_address = string;
+                        break;
+                case Opt_mounthost:
+                        string = match_strdup(args);
+                        if (string == NULL)
+                                goto out_nomem;
+                        mnt->mount_server.address.sin_family = AF_INET;
+                        mnt->mount_server.address.sin_addr.s_addr =
+                                                        in_aton(string);
+                        kfree(string);
+                        break;
-        /* Set the pseudoflavor */
+                case Opt_userspace:
-        if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
+                case Opt_deprecated:
-                data->pseudoflavor = RPC_AUTH_UNIX;
+                        break;
-#ifndef CONFIG_NFS_V3
+                default:
-        /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
+                        goto out_unknown;
-        if (data->flags & NFS_MOUNT_VER3) {
+                }
-                dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
-                return -EPROTONOSUPPORT;
        }
-#endif /* CONFIG_NFS_V3 */
-        /* We now require that the mount process passes the remote address */
+        return 1;
-        if (data->addr.sin_addr.s_addr == INADDR_ANY) {
-                dprintk("%s: mount program didn't pass remote address!\n",
+out_nomem:
-                        __FUNCTION__);
+        printk(KERN_INFO "NFS: not enough memory to parse option\n");
-                return -EINVAL;
+        return 0;
+out_unrec_vers:
+        printk(KERN_INFO "NFS: unrecognized NFS version number\n");
+        return 0;
+out_unrec_xprt:
+        printk(KERN_INFO "NFS: unrecognized transport protocol\n");
+        return 0;
+out_unrec_sec:
+        printk(KERN_INFO "NFS: unrecognized security flavor\n");
+        return 0;
+out_unknown:
+        printk(KERN_INFO "NFS: unknown mount option: %s\n", p);
+        return 0;
+}
+/*
+ * Use the remote server's MOUNT service to request the NFS file handle
+ * corresponding to the provided path.
+ */
+static int nfs_try_mount(struct nfs_parsed_mount_data *args,
+                         struct nfs_fh *root_fh)
+{
+        struct sockaddr_in sin;
+        int status;
+        if (args->mount_server.version == 0) {
+                if (args->flags & NFS_MOUNT_VER3)
+                        args->mount_server.version = NFS_MNT3_VERSION;
+                else
+                        args->mount_server.version = NFS_MNT_VERSION;
        }
-        /* Prepare the root filehandle */
+        /*
-        if (data->flags & NFS_MOUNT_VER3)
+         * Construct the mount server's address.
-                mntfh->size = data->root.size;
+         */
+        if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY)
+                sin = args->mount_server.address;
        else
-                mntfh->size = NFS2_FHSIZE;
+                sin = args->nfs_server.address;
+        if (args->mount_server.port == 0) {
+                status = rpcb_getport_sync(&sin,
+                                           args->mount_server.program,
+                                           args->mount_server.version,
+                                           args->mount_server.protocol);
+                if (status < 0)
+                        goto out_err;
+                sin.sin_port = htons(status);
+        } else
+                sin.sin_port = htons(args->mount_server.port);
+        /*
+         * Now ask the mount server to map our export path
+         * to a file handle.
+         */
+        status = nfs_mount((struct sockaddr *) &sin,
+                           sizeof(sin),
+                           args->nfs_server.hostname,
+                           args->nfs_server.export_path,
+                           args->mount_server.version,
+                           args->mount_server.protocol,
+                           root_fh);
+        if (status < 0)
+                goto out_err;
+        return status;
-        if (mntfh->size > sizeof(mntfh->data)) {
+out_err:
-                dprintk("%s: invalid root filehandle\n", __FUNCTION__);
+        dfprintk(MOUNT, "NFS: unable to contact server on host "
-                return -EINVAL;
+                 NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr));
+        return status;
+}
+/*
+ * Validate the NFS2/NFS3 mount data
+ * - fills in the mount root filehandle
+ *
+ * For option strings, user space handles the following behaviors:
+ *
+ * + DNS: mapping server host name to IP address ("addr=" option)
+ *
+ * + failure mode: how to behave if a mount request can't be handled
+ *   immediately ("fg/bg" option)
+ *
+ * + retry: how often to retry a mount request ("retry=" option)
+ *
+ * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
+ *   mountproto=tcp after mountproto=udp, and so on
+ *
+ * XXX: as far as I can tell, changing the NFS program number is not
+ *      supported in the NFS client.
+ */
+static int nfs_validate_mount_data(struct nfs_mount_data **options,
+                                   struct nfs_fh *mntfh,
+                                   const char *dev_name)
+{
+        struct nfs_mount_data *data = *options;
+        if (data == NULL)
+                goto out_no_data;
+        switch (data->version) {
+        case 1:
+                data->namlen = 0;
+        case 2:
+                data->bsize = 0;
+        case 3:
+                if (data->flags & NFS_MOUNT_VER3)
+                        goto out_no_v3;
+                data->root.size = NFS2_FHSIZE;
+                memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+        case 4:
+                if (data->flags & NFS_MOUNT_SECFLAVOUR)
+                        goto out_no_sec;
+        case 5:
+                memset(data->context, 0, sizeof(data->context));
+        case 6:
+                if (data->flags & NFS_MOUNT_VER3)
+                        mntfh->size = data->root.size;
+                else
+                        mntfh->size = NFS2_FHSIZE;
+                if (mntfh->size > sizeof(mntfh->data))
+                        goto out_invalid_fh;
+                memcpy(mntfh->data, data->root.data, mntfh->size);
+                if (mntfh->size < sizeof(mntfh->data))
+                        memset(mntfh->data + mntfh->size, 0,
+                               sizeof(mntfh->data) - mntfh->size);
+                break;
+        default: {
+                unsigned int len;
+                char *c;
+                int status;
+                struct nfs_parsed_mount_data args = {
+                        .flags          = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP),
+                        .rsize          = NFS_MAX_FILE_IO_SIZE,
+                        .wsize          = NFS_MAX_FILE_IO_SIZE,
+                        .timeo          = 600,
+                        .retrans        = 2,
+                        .acregmin       = 3,
+                        .acregmax       = 60,
+                        .acdirmin       = 30,
+                        .acdirmax       = 60,
+                        .mount_server.protocol = IPPROTO_UDP,
+                        .mount_server.program = NFS_MNT_PROGRAM,
+                        .nfs_server.protocol = IPPROTO_TCP,
+                        .nfs_server.program = NFS_PROGRAM,
+                };
+                if (nfs_parse_mount_options((char *) *options, &args) == 0)
+                        return -EINVAL;
+                data = kzalloc(sizeof(*data), GFP_KERNEL);
+                if (data == NULL)
+                        return -ENOMEM;
+                /*
+                 * NB: after this point, caller will free "data"
+                 * if we return an error
+                 */
+                *options = data;
+                c = strchr(dev_name, ':');
+                if (c == NULL)
+                        return -EINVAL;
+                len = c - dev_name - 1;
+                if (len > sizeof(data->hostname))
+                        return -EINVAL;
+                strncpy(data->hostname, dev_name, len);
+                args.nfs_server.hostname = data->hostname;
+                c++;
+                if (strlen(c) > NFS_MAXPATHLEN)
+                        return -EINVAL;
+                args.nfs_server.export_path = c;
+                status = nfs_try_mount(&args, mntfh);
+                if (status)
+                        return -EINVAL;
+                /*
+                 * Translate to nfs_mount_data, which nfs_fill_super
+                 * can deal with.
+                 */
+                data->version           = 6;
+                data->flags             = args.flags;
+                data->rsize             = args.rsize;
+                data->wsize             = args.wsize;
+                data->timeo             = args.timeo;
+                data->retrans           = args.retrans;
+                data->acregmin          = args.acregmin;
+                data->acregmax          = args.acregmax;
+                data->acdirmin          = args.acdirmin;
+                data->acdirmax          = args.acdirmax;
+                data->addr              = args.nfs_server.address;
+                data->namlen            = args.namlen;
+                data->bsize             = args.bsize;
+                data->pseudoflavor      = args.auth_flavors[0];
+                break;
+                }
        }
-        memcpy(mntfh->data, data->root.data, mntfh->size);
+        if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
-        if (mntfh->size < sizeof(mntfh->data))
+                data->pseudoflavor = RPC_AUTH_UNIX;
-                memset(mntfh->data + mntfh->size, 0,
-                       sizeof(mntfh->data) - mntfh->size);
+#ifndef CONFIG_NFS_V3
+        if (data->flags & NFS_MOUNT_VER3)
+                goto out_v3_not_compiled;
+#endif /* !CONFIG_NFS_V3 */
+        if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
+                goto out_no_address;
        return 0;
+out_no_data:
+        dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n");
+        return -EINVAL;
+out_no_v3:
+        dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n",
+                 data->version);
+        return -EINVAL;
+out_no_sec:
+        dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n");
+        return -EINVAL;
+#ifndef CONFIG_NFS_V3
+out_v3_not_compiled:
+        dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n");
+        return -EPROTONOSUPPORT;
+#endif /* !CONFIG_NFS_V3 */
+out_no_address:
+        dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
+        return -EINVAL;
+out_invalid_fh:
+        dfprintk(MOUNT, "NFS: invalid root filehandle\n");
+        return -EINVAL;
 }
 /*
@@ -600,13 +1317,51 @@ static int nfs_compare_super(struct super_block *sb, void *data)
 {
        struct nfs_server *server = data, *old = NFS_SB(sb);
-        if (old->nfs_client != server->nfs_client)
+        if (memcmp(&old->nfs_client->cl_addr,
+                                &server->nfs_client->cl_addr,
+                                sizeof(old->nfs_client->cl_addr)) != 0)
+                return 0;
+        /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
+        if (old->flags & NFS_MOUNT_UNSHARED)
                return 0;
        if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
                return 0;
        return 1;
 }
+#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
+static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
+{
+        const struct nfs_server *a = s->s_fs_info;
+        const struct rpc_clnt *clnt_a = a->client;
+        const struct rpc_clnt *clnt_b = b->client;
+        if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
+                goto Ebusy;
+        if (a->nfs_client != b->nfs_client)
+                goto Ebusy;
+        if (a->flags != b->flags)
+                goto Ebusy;
+        if (a->wsize != b->wsize)
+                goto Ebusy;
+        if (a->rsize != b->rsize)
+                goto Ebusy;
+        if (a->acregmin != b->acregmin)
+                goto Ebusy;
+        if (a->acregmax != b->acregmax)
+                goto Ebusy;
+        if (a->acdirmin != b->acdirmin)
+                goto Ebusy;
+        if (a->acdirmax != b->acdirmax)
+                goto Ebusy;
+        if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
+                goto Ebusy;
+        return 0;
+Ebusy:
+        return -EBUSY;
+}
 static int nfs_get_sb(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
@@ -615,30 +1370,37 @@ static int nfs_get_sb(struct file_system_type *fs_type,
        struct nfs_fh mntfh;
        struct nfs_mount_data *data = raw_data;
        struct dentry *mntroot;
+        int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
        int error;
        /* Validate the mount data */
-        error = nfs_validate_mount_data(data, &mntfh);
+        error = nfs_validate_mount_data(&data, &mntfh, dev_name);
        if (error < 0)
-                return error;
+                goto out;
        /* Get a volume representation */
        server = nfs_create_server(data, &mntfh);
        if (IS_ERR(server)) {
                error = PTR_ERR(server);
-                goto out_err_noserver;
+                goto out;
        }
+        if (server->flags & NFS_MOUNT_UNSHARED)
+                compare_super = NULL;
        /* Get a superblock - note that we may end up sharing one that already exists */
-        s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+        s = sget(fs_type, compare_super, nfs_set_super, server);
        if (IS_ERR(s)) {
                error = PTR_ERR(s);
                goto out_err_nosb;
        }
        if (s->s_fs_info != server) {
+                error = nfs_compare_mount_options(s, server, flags);
                nfs_free_server(server);
                server = NULL;
+                if (error < 0)
+                        goto error_splat_super;
        }
        if (!s->s_root) {
@@ -656,17 +1418,21 @@ static int nfs_get_sb(struct file_system_type *fs_type,
        s->s_flags |= MS_ACTIVE;
        mnt->mnt_sb = s;
        mnt->mnt_root = mntroot;
-        return 0;
+        error = 0;
+out:
+        if (data != raw_data)
+                kfree(data);
+        return error;
 out_err_nosb:
        nfs_free_server(server);
-out_err_noserver:
+        goto out;
-        return error;
 error_splat_super:
        up_write(&s->s_umount);
        deactivate_super(s);
-        return error;
+        goto out;
 }
 /*
@@ -691,6 +1457,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
        struct super_block *s;
        struct nfs_server *server;
        struct dentry *mntroot;
+        int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
        int error;
        dprintk("--> nfs_xdev_get_sb()\n");
@@ -702,16 +1469,22 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
                goto out_err_noserver;
        }
+        if (server->flags & NFS_MOUNT_UNSHARED)
+                compare_super = NULL;
        /* Get a superblock - note that we may end up sharing one that already exists */
-        s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+        s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
        if (IS_ERR(s)) {
                error = PTR_ERR(s);
                goto out_err_nosb;
        }
        if (s->s_fs_info != server) {
+                error = nfs_compare_mount_options(s, server, flags);
                nfs_free_server(server);
                server = NULL;
+                if (error < 0)
+                        goto error_splat_super;
        }
        if (!s->s_root) {
@@ -772,25 +1545,164 @@ static void nfs4_fill_super(struct super_block *sb)
        nfs_initialise_sb(sb);
 }
-static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+/*
+ * Validate NFSv4 mount options
+ */
+static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
+                                    const char *dev_name,
+                                    struct sockaddr_in *addr,
+                                    rpc_authflavor_t *authflavour,
+                                    char **hostname,
+                                    char **mntpath,
+                                    char **ip_addr)
 {
-        void *p = NULL;
+        struct nfs4_mount_data *data = *options;
+        char *c;
-        if (!src->len)
-                return ERR_PTR(-EINVAL);
+        if (data == NULL)
-        if (src->len < maxlen)
+                goto out_no_data;
-                maxlen = src->len;
-        if (dst == NULL) {
+        switch (data->version) {
-                p = dst = kmalloc(maxlen + 1, GFP_KERNEL);
+        case 1:
-                if (p == NULL)
+                if (data->host_addrlen != sizeof(*addr))
-                        return ERR_PTR(-ENOMEM);
+                        goto out_no_address;
-        }
+                if (copy_from_user(addr, data->host_addr, sizeof(*addr)))
-        if (copy_from_user(dst, src->data, maxlen)) {
+                        return -EFAULT;
-                kfree(p);
+                if (addr->sin_port == 0)
-                return ERR_PTR(-EFAULT);
+                        addr->sin_port = htons(NFS_PORT);
+                if (!nfs_verify_server_address((struct sockaddr *) addr))
+                        goto out_no_address;
+                switch (data->auth_flavourlen) {
+                case 0:
+                        *authflavour = RPC_AUTH_UNIX;
+                        break;
+                case 1:
+                        if (copy_from_user(authflavour, data->auth_flavours,
+                                           sizeof(*authflavour)))
+                                return -EFAULT;
+                        break;
+                default:
+                        goto out_inval_auth;
+                }
+                c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
+                if (IS_ERR(c))
+                        return PTR_ERR(c);
+                *hostname = c;
+                c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
+                if (IS_ERR(c))
+                        return PTR_ERR(c);
+                *mntpath = c;
+                dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath);
+                c = strndup_user(data->client_addr.data, 16);
+                if (IS_ERR(c))
+                        return PTR_ERR(c);
+                *ip_addr = c;
+                break;
+        default: {
+                unsigned int len;
+                struct nfs_parsed_mount_data args = {
+                        .rsize          = NFS_MAX_FILE_IO_SIZE,
+                        .wsize          = NFS_MAX_FILE_IO_SIZE,
+                        .timeo          = 600,
+                        .retrans        = 2,
+                        .acregmin       = 3,
+                        .acregmax       = 60,
+                        .acdirmin       = 30,
+                        .acdirmax       = 60,
+                        .nfs_server.protocol = IPPROTO_TCP,
+                };
+                if (nfs_parse_mount_options((char *) *options, &args) == 0)
+                        return -EINVAL;
+                if (!nfs_verify_server_address((struct sockaddr *)
+                                                &args.nfs_server.address))
+                        return -EINVAL;
+                *addr = args.nfs_server.address;
+                switch (args.auth_flavor_len) {
+                case 0:
+                        *authflavour = RPC_AUTH_UNIX;
+                        break;
+                case 1:
+                        *authflavour = (rpc_authflavor_t) args.auth_flavors[0];
+                        break;
+                default:
+                        goto out_inval_auth;
+                }
+                /*
+                 * Translate to nfs4_mount_data, which nfs4_fill_super
+                 * can deal with.
+                 */
+                data = kzalloc(sizeof(*data), GFP_KERNEL);
+                if (data == NULL)
+                        return -ENOMEM;
+                *options = data;
+                data->version   = 1;
+                data->flags     = args.flags & NFS4_MOUNT_FLAGMASK;
+                data->rsize     = args.rsize;
+                data->wsize     = args.wsize;
+                data->timeo     = args.timeo;
+                data->retrans   = args.retrans;
+                data->acregmin  = args.acregmin;
+                data->acregmax  = args.acregmax;
+                data->acdirmin  = args.acdirmin;
+                data->acdirmax  = args.acdirmax;
+                data->proto     = args.nfs_server.protocol;
+                /*
+                 * Split "dev_name" into "hostname:mntpath".
+                 */
+                c = strchr(dev_name, ':');
+                if (c == NULL)
+                        return -EINVAL;
+                /* while calculating len, pretend ':' is '\0' */
+                len = c - dev_name;
+                if (len > NFS4_MAXNAMLEN)
+                        return -EINVAL;
+                *hostname = kzalloc(len, GFP_KERNEL);
+                if (*hostname == NULL)
+                        return -ENOMEM;
+                strncpy(*hostname, dev_name, len - 1);
+                c++;                    /* step over the ':' */
+                len = strlen(c);
+                if (len > NFS4_MAXPATHLEN)
+                        return -EINVAL;
+                *mntpath = kzalloc(len + 1, GFP_KERNEL);
+                if (*mntpath == NULL)
+                        return -ENOMEM;
+                strncpy(*mntpath, c, len);
+                dprintk("MNTPATH: %s\n", *mntpath);
+                *ip_addr = args.client_address;
+                break;
+                }
        }
-        dst[maxlen] = '\0';
-        return dst;
+        return 0;
+out_no_data:
+        dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n");
+        return -EINVAL;
+out_inval_auth:
+        dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n",
+                 data->auth_flavourlen);
+        return -EINVAL;
+out_no_address:
+        dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
+        return -EINVAL;
 }
 /*
@@ -806,81 +1718,29 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
        rpc_authflavor_t authflavour;
        struct nfs_fh mntfh;
        struct dentry *mntroot;
-        char *mntpath = NULL, *hostname = NULL, ip_addr[16];
+        char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL;
-        void *p;
+        int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
        int error;
-        if (data == NULL) {
+        /* Validate the mount data */
-                dprintk("%s: missing data argument\n", __FUNCTION__);
+        error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour,
-                return -EINVAL;
+                                         &hostname, &mntpath, &ip_addr);
-        }
+        if (error < 0)
-        if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) {
+                goto out;
-                dprintk("%s: bad mount version\n", __FUNCTION__);
-                return -EINVAL;
-        }
-        /* We now require that the mount process passes the remote address */
-        if (data->host_addrlen != sizeof(addr))
-                return -EINVAL;
-        if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
-                return -EFAULT;
-        if (addr.sin_family != AF_INET ||
-            addr.sin_addr.s_addr == INADDR_ANY
-            ) {
-                dprintk("%s: mount program didn't pass remote IP address!\n",
-                                __FUNCTION__);
-                return -EINVAL;
-        }
-        /* RFC3530: The default port for NFS is 2049 */
-        if (addr.sin_port == 0)
-                addr.sin_port = htons(NFS_PORT);
-        /* Grab the authentication type */
-        authflavour = RPC_AUTH_UNIX;
-        if (data->auth_flavourlen != 0) {
-                if (data->auth_flavourlen != 1) {
-                        dprintk("%s: Invalid number of RPC auth flavours %d.\n",
-                                        __FUNCTION__, data->auth_flavourlen);
-                        error = -EINVAL;
-                        goto out_err_noserver;
-                }
-                if (copy_from_user(&authflavour, data->auth_flavours,
-                                   sizeof(authflavour))) {
-                        error = -EFAULT;
-                        goto out_err_noserver;
-                }
-        }
-        p = nfs_copy_user_string(NULL, &data->hostname, 256);
-        if (IS_ERR(p))
-                goto out_err;
-        hostname = p;
-        p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
-        if (IS_ERR(p))
-                goto out_err;
-        mntpath = p;
-        dprintk("MNTPATH: %s\n", mntpath);
-        p = nfs_copy_user_string(ip_addr, &data->client_addr,
-                                 sizeof(ip_addr) - 1);
-        if (IS_ERR(p))
-                goto out_err;
        /* Get a volume representation */
        server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
                                    authflavour, &mntfh);
        if (IS_ERR(server)) {
                error = PTR_ERR(server);
-                goto out_err_noserver;
+                goto out;
        }
+        if (server->flags & NFS4_MOUNT_UNSHARED)
+                compare_super = NULL;
        /* Get a superblock - note that we may end up sharing one that already exists */
-        s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+        s = sget(fs_type, compare_super, nfs_set_super, server);
        if (IS_ERR(s)) {
                error = PTR_ERR(s);
                goto out_free;
@@ -906,25 +1766,22 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
        s->s_flags |= MS_ACTIVE;
        mnt->mnt_sb = s;
        mnt->mnt_root = mntroot;
+        error = 0;
+out:
+        kfree(ip_addr);
        kfree(mntpath);
        kfree(hostname);
-        return 0;
+        return error;
-out_err:
-        error = PTR_ERR(p);
-        goto out_err_noserver;
 out_free:
        nfs_free_server(server);
-out_err_noserver:
+        goto out;
-        kfree(mntpath);
-        kfree(hostname);
-        return error;
 error_splat_super:
        up_write(&s->s_umount);
        deactivate_super(s);
-        goto out_err_noserver;
+        goto out;
 }
 static void nfs4_kill_super(struct super_block *sb)
@@ -949,6 +1806,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
        struct super_block *s;
        struct nfs_server *server;
        struct dentry *mntroot;
+        int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
        int error;
        dprintk("--> nfs4_xdev_get_sb()\n");
@@ -960,8 +1818,11 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
                goto out_err_noserver;
        }
+        if (server->flags & NFS4_MOUNT_UNSHARED)
+                compare_super = NULL;
        /* Get a superblock - note that we may end up sharing one that already exists */
-        s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+        s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
        if (IS_ERR(s)) {
                error = PTR_ERR(s);
                goto out_err_nosb;
@@ -1016,6 +1877,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
        struct nfs_server *server;
        struct dentry *mntroot;
        struct nfs_fh mntfh;
+        int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
        int error;
        dprintk("--> nfs4_referral_get_sb()\n");
@@ -1027,8 +1889,11 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
                goto out_err_noserver;
        }
+        if (server->flags & NFS4_MOUNT_UNSHARED)
+                compare_super = NULL;
        /* Get a superblock - note that we may end up sharing one that already exists */
-        s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+        s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
        if (IS_ERR(s)) {
                error = PTR_ERR(s);
                goto out_err_nosb;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index af344a158e01..73ac992ece85 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -117,19 +117,19 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page)
        if (PagePrivate(page)) {
                req = (struct nfs_page *)page_private(page);
                if (req != NULL)
-                        atomic_inc(&req->wb_count);
+                        kref_get(&req->wb_kref);
        }
        return req;
 }
 static struct nfs_page *nfs_page_find_request(struct page *page)
 {
+        struct inode *inode = page->mapping->host;
        struct nfs_page *req = NULL;
-        spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
-        spin_lock(req_lock);
+        spin_lock(&inode->i_lock);
        req = nfs_page_find_request_locked(page);
-        spin_unlock(req_lock);
+        spin_unlock(&inode->i_lock);
        return req;
 }
@@ -191,8 +191,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
        }
        /* Update file length */
        nfs_grow_file(page, offset, count);
-        /* Set the PG_uptodate flag? */
-        nfs_mark_uptodate(page, offset, count);
        nfs_unlock_request(req);
        return 0;
 }
@@ -253,16 +251,16 @@ static void nfs_end_page_writeback(struct page *page)
 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
                                struct page *page)
 {
+        struct inode *inode = page->mapping->host;
+        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_page *req;
-        struct nfs_inode *nfsi = NFS_I(page->mapping->host);
-        spinlock_t *req_lock = &nfsi->req_lock;
        int ret;
-        spin_lock(req_lock);
+        spin_lock(&inode->i_lock);
        for(;;) {
                req = nfs_page_find_request_locked(page);
                if (req == NULL) {
-                        spin_unlock(req_lock);
+                        spin_unlock(&inode->i_lock);
                        return 1;
                }
                if (nfs_lock_request_dontget(req))
@@ -272,28 +270,28 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
                 *       succeed provided that someone hasn't already marked the
                 *       request as dirty (in which case we don't care).
                 */
-                spin_unlock(req_lock);
+                spin_unlock(&inode->i_lock);
                ret = nfs_wait_on_request(req);
                nfs_release_request(req);
                if (ret != 0)
                        return ret;
-                spin_lock(req_lock);
+                spin_lock(&inode->i_lock);
        }
        if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
                /* This request is marked for commit */
-                spin_unlock(req_lock);
+                spin_unlock(&inode->i_lock);
                nfs_unlock_request(req);
                nfs_pageio_complete(pgio);
                return 1;
        }
        if (nfs_set_page_writeback(page) != 0) {
-                spin_unlock(req_lock);
+                spin_unlock(&inode->i_lock);
                BUG();
        }
        radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
-                        NFS_PAGE_TAG_WRITEBACK);
+                        NFS_PAGE_TAG_LOCKED);
        ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
-        spin_unlock(req_lock);
+        spin_unlock(&inode->i_lock);
        nfs_pageio_add_request(pgio, req);
        return ret;
 }
@@ -400,7 +398,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
        if (PageDirty(req->wb_page))
                set_bit(PG_NEED_FLUSH, &req->wb_flags);
        nfsi->npages++;
-        atomic_inc(&req->wb_count);
+        kref_get(&req->wb_kref);
        return 0;
 }
@@ -409,12 +407,12 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 */
 static void nfs_inode_remove_request(struct nfs_page *req)
 {
-        struct inode *inode = req->wb_context->dentry->d_inode;
+        struct inode *inode = req->wb_context->path.dentry->d_inode;
        struct nfs_inode *nfsi = NFS_I(inode);
        BUG_ON (!NFS_WBACK_BUSY(req));
-        spin_lock(&nfsi->req_lock);
+        spin_lock(&inode->i_lock);
        set_page_private(req->wb_page, 0);
        ClearPagePrivate(req->wb_page);
        radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
@@ -422,11 +420,11 @@ static void nfs_inode_remove_request(struct nfs_page *req)
                __set_page_dirty_nobuffers(req->wb_page);
        nfsi->npages--;
        if (!nfsi->npages) {
-                spin_unlock(&nfsi->req_lock);
+                spin_unlock(&inode->i_lock);
                nfs_end_data_update(inode);
                iput(inode);
        } else
-                spin_unlock(&nfsi->req_lock);
+                spin_unlock(&inode->i_lock);
        nfs_clear_request(req);
        nfs_release_request(req);
 }
@@ -457,14 +455,16 @@ nfs_dirty_request(struct nfs_page *req)
 static void
 nfs_mark_request_commit(struct nfs_page *req)
 {
-        struct inode *inode = req->wb_context->dentry->d_inode;
+        struct inode *inode = req->wb_context->path.dentry->d_inode;
        struct nfs_inode *nfsi = NFS_I(inode);
-        spin_lock(&nfsi->req_lock);
+        spin_lock(&inode->i_lock);
-        nfs_list_add_request(req, &nfsi->commit);
        nfsi->ncommit++;
        set_bit(PG_NEED_COMMIT, &(req)->wb_flags);
-        spin_unlock(&nfsi->req_lock);
+        radix_tree_tag_set(&nfsi->nfs_page_tree,
+                        req->wb_index,
+                        NFS_PAGE_TAG_COMMIT);
+        spin_unlock(&inode->i_lock);
        inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
        __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 }
@@ -526,18 +526,18 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
                idx_end = idx_start + npages - 1;
        next = idx_start;
-        while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
+        while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
                if (req->wb_index > idx_end)
                        break;
                next = req->wb_index + 1;
                BUG_ON(!NFS_WBACK_BUSY(req));
-                atomic_inc(&req->wb_count);
+                kref_get(&req->wb_kref);
-                spin_unlock(&nfsi->req_lock);
+                spin_unlock(&inode->i_lock);
                error = nfs_wait_on_request(req);
                nfs_release_request(req);
-                spin_lock(&nfsi->req_lock);
+                spin_lock(&inode->i_lock);
                if (error < 0)
                        return error;
                res++;
@@ -577,10 +577,9 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
        int res = 0;
        if (nfsi->ncommit != 0) {
-                res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages);
+                res = nfs_scan_list(nfsi, dst, idx_start, npages,
+                                NFS_PAGE_TAG_COMMIT);
                nfsi->ncommit -= res;
-                if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
-                        printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
        }
        return res;
 }
@@ -603,7 +602,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 {
        struct address_space *mapping = page->mapping;
        struct inode *inode = mapping->host;
-        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_page         *req, *new = NULL;
        pgoff_t         rqend, end;
@@ -613,13 +611,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
                /* Loop over all inode entries and see if we find
                 * A request for the page we wish to update
                 */
-                spin_lock(&nfsi->req_lock);
+                spin_lock(&inode->i_lock);
                req = nfs_page_find_request_locked(page);
                if (req) {
                        if (!nfs_lock_request_dontget(req)) {
                                int error;
-                                spin_unlock(&nfsi->req_lock);
+                                spin_unlock(&inode->i_lock);
                                error = nfs_wait_on_request(req);
                                nfs_release_request(req);
                                if (error < 0) {
@@ -629,7 +627,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
                                }
                                continue;
                        }
-                        spin_unlock(&nfsi->req_lock);
+                        spin_unlock(&inode->i_lock);
                        if (new)
                                nfs_release_request(new);
                        break;
@@ -640,14 +638,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
                        nfs_lock_request_dontget(new);
                        error = nfs_inode_add_request(inode, new);
                        if (error) {
-                                spin_unlock(&nfsi->req_lock);
+                                spin_unlock(&inode->i_lock);
                                nfs_unlock_request(new);
                                return ERR_PTR(error);
                        }
-                        spin_unlock(&nfsi->req_lock);
+                        spin_unlock(&inode->i_lock);
                        return new;
                }
-                spin_unlock(&nfsi->req_lock);
+                spin_unlock(&inode->i_lock);
                new = nfs_create_request(ctx, inode, page, offset, bytes);
                if (IS_ERR(new))
@@ -751,12 +749,17 @@ int nfs_updatepage(struct file *file, struct page *page,
 static void nfs_writepage_release(struct nfs_page *req)
 {
-        if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
+        if (PageError(req->wb_page)) {
+                nfs_end_page_writeback(req->wb_page);
+                nfs_inode_remove_request(req);
+        } else if (!nfs_reschedule_unstable_write(req)) {
+                /* Set the PG_uptodate flag */
+                nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes);
                nfs_end_page_writeback(req->wb_page);
                nfs_inode_remove_request(req);
        } else
                nfs_end_page_writeback(req->wb_page);
-        nfs_clear_page_writeback(req);
+        nfs_clear_page_tag_locked(req);
 }
 static inline int flush_task_priority(int how)
@@ -786,7 +789,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
         * NB: take care not to mess about with data->commit et al. */
        data->req = req;
-        data->inode = inode = req->wb_context->dentry->d_inode;
+        data->inode = inode = req->wb_context->path.dentry->d_inode;
        data->cred = req->wb_context->cred;
        data->args.fh     = NFS_FH(inode);
@@ -885,7 +888,7 @@ out_bad:
        }
        nfs_redirty_request(req);
        nfs_end_page_writeback(req->wb_page);
-        nfs_clear_page_writeback(req);
+        nfs_clear_page_tag_locked(req);
        return -ENOMEM;
 }
@@ -928,7 +931,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
                nfs_list_remove_request(req);
                nfs_redirty_request(req);
                nfs_end_page_writeback(req->wb_page);
-                nfs_clear_page_writeback(req);
+                nfs_clear_page_tag_locked(req);
        }
        return -ENOMEM;
 }
@@ -954,8 +957,8 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
        struct page             *page = req->wb_page;
        dprintk("NFS: write (%s/%Ld %d@%Ld)",
-                req->wb_context->dentry->d_inode->i_sb->s_id,
+                req->wb_context->path.dentry->d_inode->i_sb->s_id,
-                (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+                (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
                req->wb_bytes,
                (long long)req_offset(req));
@@ -970,9 +973,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
        }
        if (nfs_write_need_commit(data)) {
-                spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
+                struct inode *inode = page->mapping->host;
-                spin_lock(req_lock);
+                spin_lock(&inode->i_lock);
                if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
                        /* Do nothing we need to resend the writes */
                } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
@@ -983,7 +986,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
                        clear_bit(PG_NEED_COMMIT, &req->wb_flags);
                        dprintk(" server reboot detected\n");
                }
-                spin_unlock(req_lock);
+                spin_unlock(&inode->i_lock);
        } else
                dprintk(" OK\n");
@@ -1020,8 +1023,8 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
                page = req->wb_page;
                dprintk("NFS: write (%s/%Ld %d@%Ld)",
-                        req->wb_context->dentry->d_inode->i_sb->s_id,
+                        req->wb_context->path.dentry->d_inode->i_sb->s_id,
-                        (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+                        (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
                        req->wb_bytes,
                        (long long)req_offset(req));
@@ -1039,12 +1042,14 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
                        dprintk(" marked for commit\n");
                        goto next;
                }
+                /* Set the PG_uptodate flag? */
+                nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
                dprintk(" OK\n");
 remove_request:
                nfs_end_page_writeback(page);
                nfs_inode_remove_request(req);
        next:
-                nfs_clear_page_writeback(req);
+                nfs_clear_page_tag_locked(req);
        }
 }
@@ -1157,7 +1162,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
        list_splice_init(head, &data->pages);
        first = nfs_list_entry(data->pages.next);
-        inode = first->wb_context->dentry->d_inode;
+        inode = first->wb_context->path.dentry->d_inode;
        data->inode       = inode;
        data->cred        = first->wb_context->cred;
@@ -1207,7 +1212,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
                nfs_list_remove_request(req);
                nfs_mark_request_commit(req);
                dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-                nfs_clear_page_writeback(req);
+                nfs_clear_page_tag_locked(req);
        }
        return -ENOMEM;
 }
@@ -1234,8 +1239,8 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
                dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
                dprintk("NFS: commit (%s/%Ld %d@%Ld)",
-                        req->wb_context->dentry->d_inode->i_sb->s_id,
+                        req->wb_context->path.dentry->d_inode->i_sb->s_id,
-                        (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+                        (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
                        req->wb_bytes,
                        (long long)req_offset(req));
                if (task->tk_status < 0) {
@@ -1249,6 +1254,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
                 * returned by the server against all stored verfs. */
                if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
                        /* We have a match */
+                        /* Set the PG_uptodate flag */
+                        nfs_mark_uptodate(req->wb_page, req->wb_pgbase,
+                                        req->wb_bytes);
                        nfs_inode_remove_request(req);
                        dprintk(" OK\n");
                        goto next;
@@ -1257,7 +1265,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
                dprintk(" mismatch\n");
                nfs_redirty_request(req);
        next:
-                nfs_clear_page_writeback(req);
+                nfs_clear_page_tag_locked(req);
        }
 }
@@ -1268,13 +1276,12 @@ static const struct rpc_call_ops nfs_commit_ops = {
 int nfs_commit_inode(struct inode *inode, int how)
 {
-        struct nfs_inode *nfsi = NFS_I(inode);
        LIST_HEAD(head);
        int res;
-        spin_lock(&nfsi->req_lock);
+        spin_lock(&inode->i_lock);
        res = nfs_scan_commit(inode, &head, 0, 0);
-        spin_unlock(&nfsi->req_lock);
+        spin_unlock(&inode->i_lock);
        if (res) {
                int error = nfs_commit_list(inode, &head, how);
                if (error < 0)
@@ -1292,7 +1299,6 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i
 long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
 {
        struct inode *inode = mapping->host;
-        struct nfs_inode *nfsi = NFS_I(inode);
        pgoff_t idx_start, idx_end;
        unsigned int npages = 0;
        LIST_HEAD(head);
@@ -1314,7 +1320,7 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
                }
        }
        how &= ~FLUSH_NOCOMMIT;
-        spin_lock(&nfsi->req_lock);
+        spin_lock(&inode->i_lock);
        do {
                ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
                if (ret != 0)
@@ -1325,18 +1331,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
                if (pages == 0)
                        break;
                if (how & FLUSH_INVALIDATE) {
-                        spin_unlock(&nfsi->req_lock);
+                        spin_unlock(&inode->i_lock);
                        nfs_cancel_commit_list(&head);
                        ret = pages;
-                        spin_lock(&nfsi->req_lock);
+                        spin_lock(&inode->i_lock);
                        continue;
                }
                pages += nfs_scan_commit(inode, &head, 0, 0);
-                spin_unlock(&nfsi->req_lock);
+                spin_unlock(&inode->i_lock);
                ret = nfs_commit_list(inode, &head, how);
-                spin_lock(&nfsi->req_lock);
+                spin_lock(&inode->i_lock);
        } while (ret >= 0);
-        spin_unlock(&nfsi->req_lock);
+        spin_unlock(&inode->i_lock);
        return ret;
 }
@@ -1430,7 +1437,6 @@ int nfs_set_page_dirty(struct page *page)
 {
        struct address_space *mapping = page->mapping;
        struct inode *inode;
-        spinlock_t *req_lock;
        struct nfs_page *req;
        int ret;
@@ -1439,18 +1445,17 @@ int nfs_set_page_dirty(struct page *page)
        inode = mapping->host;
        if (!inode)
                goto out_raced;
-        req_lock = &NFS_I(inode)->req_lock;
+        spin_lock(&inode->i_lock);
-        spin_lock(req_lock);
        req = nfs_page_find_request_locked(page);
        if (req != NULL) {
                /* Mark any existing write requests for flushing */
                ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags);
-                spin_unlock(req_lock);
+                spin_unlock(&inode->i_lock);
                nfs_release_request(req);
                return ret;
        }
        ret = __set_page_dirty_nobuffers(page);
-        spin_unlock(req_lock);
+        spin_unlock(&inode->i_lock);
        return ret;
 out_raced:
        return !TestSetPageDirty(page);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 864090edc28b..5443c52b57aa 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -394,7 +394,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
                .rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
                .rpc_argp       = clp,
        };
-        char clientname[16];
        int status;
        if (atomic_read(&cb->cb_set))
@@ -417,11 +416,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
        memset(program->stats, 0, sizeof(cb->cb_stat));
        program->stats->program = program;
-        /* Just here to make some printk's more useful: */
-        snprintf(clientname, sizeof(clientname),
-                "%u.%u.%u.%u", NIPQUAD(addr.sin_addr));
-        args.servername = clientname;
        /* Create RPC client */
        cb->cb_client = rpc_create(&args);
        if (IS_ERR(cb->cb_client)) {
@@ -429,29 +423,23 @@ nfsd4_probe_callback(struct nfs4_client *clp)
                goto out_err;
        }
-        /* Kick rpciod, put the call on the wire. */
-        if (rpciod_up() != 0)
-                goto out_clnt;
        /* the task holds a reference to the nfs4_client struct */
        atomic_inc(&clp->cl_count);
        msg.rpc_cred = nfsd4_lookupcred(clp,0);
        if (IS_ERR(msg.rpc_cred))
-                goto out_rpciod;
+                goto out_release_clp;
        status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
        put_rpccred(msg.rpc_cred);
        if (status != 0) {
                dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
-                goto out_rpciod;
+                goto out_release_clp;
        }
        return;
-out_rpciod:
+out_release_clp:
        atomic_dec(&clp->cl_count);
-        rpciod_down();
-out_clnt:
        rpc_shutdown_client(cb->cb_client);
 out_err:
        cb->cb_client = NULL;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3cc8ce422ab1..8c52913d7cb6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -378,7 +378,6 @@ shutdown_callback_client(struct nfs4_client *clp)
        if (clnt) {
                clp->cl_callback.cb_client = NULL;
                rpc_shutdown_client(clnt);
-                rpciod_down();
        }
 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7e6aa245b5d5..945b1cedde2b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -23,7 +23,7 @@
 #include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/major.h>
-#include <linux/ext2_fs.h>
+#include <linux/splice.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
@@ -801,26 +801,32 @@ found:
 }
 /*
- * Grab and keep cached pages assosiated with a file in the svc_rqst
+ * Grab and keep cached pages associated with a file in the svc_rqst
- * so that they can be passed to the netowork sendmsg/sendpage routines
+ * so that they can be passed to the network sendmsg/sendpage routines
- * directrly. They will be released after the sending has completed.
+ * directly. They will be released after the sending has completed.
 */
 static int
-nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size)
+nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+                  struct splice_desc *sd)
 {
-        unsigned long count = desc->count;
+        struct svc_rqst *rqstp = sd->u.data;
-        struct svc_rqst *rqstp = desc->arg.data;
        struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
+        struct page *page = buf->page;
+        size_t size;
+        int ret;
+        ret = buf->ops->confirm(pipe, buf);
+        if (unlikely(ret))
+                return ret;
-        if (size > count)
+        size = sd->len;
-                size = count;
        if (rqstp->rq_res.page_len == 0) {
                get_page(page);
                put_page(*pp);
                *pp = page;
                rqstp->rq_resused++;
-                rqstp->rq_res.page_base = offset;
+                rqstp->rq_res.page_base = buf->offset;
                rqstp->rq_res.page_len = size;
        } else if (page != pp[-1]) {
                get_page(page);
@@ -832,11 +838,15 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
        } else
                rqstp->rq_res.page_len += size;
-        desc->count = count - size;
-        desc->written += size;
        return size;
 }
+static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
+                                    struct splice_desc *sd)
+{
+        return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
+}
 static __be32
 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
              loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
@@ -861,10 +871,16 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        if (ra && ra->p_set)
                file->f_ra = ra->p_ra;
-        if (file->f_op->sendfile && rqstp->rq_sendfile_ok) {
+        if (file->f_op->splice_read && rqstp->rq_splice_ok) {
+                struct splice_desc sd = {
+                        .len            = 0,
+                        .total_len      = *count,
+                        .pos            = offset,
+                        .u.data         = rqstp,
+                };
                rqstp->rq_resused = 1;
-                host_err = file->f_op->sendfile(file, &offset, *count,
+                host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
-                                                 nfsd_read_actor, rqstp);
        } else {
                oldfs = get_fs();
                set_fs(KERNEL_DS);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 7ed56390b582..ffcc504a1667 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2276,7 +2276,7 @@ const struct file_operations ntfs_file_ops = {
                                                    mounted filesystem. */
        .mmap           = generic_file_mmap,     /* Mmap file. */
        .open           = ntfs_file_open,        /* Open file. */
-        .sendfile       = generic_file_sendfile, /* Zero-copy data send with
+        .splice_read    = generic_file_splice_read /* Zero-copy data send with
                                                    the data source being on
                                                    the ntfs partition.  We do
                                                    not need to care about the
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 074791ce4ab2..b532a730cec2 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -140,7 +140,7 @@ static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)
                if (!ni->name)
                        return -ENOMEM;
                memcpy(ni->name, na->name, i);
-                ni->name[i] = 0;
+                ni->name[na->name_len] = 0;
        }
        return 0;
 }
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 0023b31e48a8..a480b09c79b9 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -798,6 +798,11 @@ int ocfs2_map_and_write_splice_data(struct inode *inode,
        }
        to = from + bytes;
+        BUG_ON(from > PAGE_CACHE_SIZE);
+        BUG_ON(to > PAGE_CACHE_SIZE);
+        BUG_ON(from < cluster_start);
+        BUG_ON(to > cluster_end);
        if (wc->w_this_page_new)
                ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
                                            cluster_start, cluster_end, 1);
@@ -809,11 +814,6 @@ int ocfs2_map_and_write_splice_data(struct inode *inode,
                goto out;
        }
-        BUG_ON(from > PAGE_CACHE_SIZE);
-        BUG_ON(to > PAGE_CACHE_SIZE);
-        BUG_ON(from > osb->s_clustersize);
-        BUG_ON(to > osb->s_clustersize);
        src = buf->ops->map(sp->s_pipe, buf, 1);
        dst = kmap_atomic(wc->w_this_page, KM_USER1);
        memcpy(dst + from, src + src_from, bytes);
@@ -890,6 +890,11 @@ int ocfs2_map_and_write_user_data(struct inode *inode,
        to = from + bytes;
+        BUG_ON(from > PAGE_CACHE_SIZE);
+        BUG_ON(to > PAGE_CACHE_SIZE);
+        BUG_ON(from < cluster_start);
+        BUG_ON(to > cluster_end);
        if (wc->w_this_page_new)
                ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
                                            cluster_start, cluster_end, 1);
@@ -901,11 +906,6 @@ int ocfs2_map_and_write_user_data(struct inode *inode,
                goto out;
        }
-        BUG_ON(from > PAGE_CACHE_SIZE);
-        BUG_ON(to > PAGE_CACHE_SIZE);
-        BUG_ON(from > osb->s_clustersize);
-        BUG_ON(to > osb->s_clustersize);
        dst = kmap(wc->w_this_page);
        memcpy(dst + from, bp->b_src_buf + src_from, bytes);
        kunmap(wc->w_this_page);
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index a93620ce4aca..e9e042b93dbf 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -74,7 +74,6 @@ struct mlog_attribute {
 #define define_mask(_name) {                    \
        .attr = {                               \
                .name = #_name,                 \
-                .owner = THIS_MODULE,           \
                .mode = S_IRUGO | S_IWUSR,      \
        },                                      \
        .mask = ML_##_name,                     \
@@ -144,8 +143,7 @@ static struct kobj_type mlog_ktype = {
 };
 static struct kset mlog_kset = {
-        .kobj  = {.name = "logmask"},
+        .kobj   = {.name = "logmask", .ktype = &mlog_ktype},
-        .ktype = &mlog_ktype
 };
 int mlog_sys_init(struct kset *o2cb_subsys)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ac6c96431bbc..4979b6675717 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -31,7 +31,7 @@
 #include <linux/pagemap.h>
 #include <linux/uio.h>
 #include <linux/sched.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
 #include <linux/mount.h>
 #include <linux/writeback.h>
@@ -1583,7 +1583,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
        ssize_t copied = 0;
        struct ocfs2_splice_write_priv sp;
-        ret = buf->ops->pin(pipe, buf);
+        ret = buf->ops->confirm(pipe, buf);
        if (ret)
                goto out;
@@ -1604,7 +1604,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
                 * might enter ocfs2_buffered_write_cluster() more
                 * than once, so keep track of our progress here.
                 */
-                copied = ocfs2_buffered_write_cluster(sd->file,
+                copied = ocfs2_buffered_write_cluster(sd->u.file,
                                                      (loff_t)sd->pos + total,
                                                      count,
                                                      ocfs2_map_and_write_splice_data,
@@ -1636,9 +1636,14 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
        int ret, err;
        struct address_space *mapping = out->f_mapping;
        struct inode *inode = mapping->host;
+        struct splice_desc sd = {
-        ret = __splice_from_pipe(pipe, out, ppos, len, flags,
+                .total_len = len,
-                                 ocfs2_splice_write_actor);
+                .flags = flags,
+                .pos = *ppos,
+                .u.file = out,
+        };
+        ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);
        if (ret > 0) {
                *ppos += ret;
@@ -1817,7 +1822,6 @@ const struct inode_operations ocfs2_special_file_iops = {
 const struct file_operations ocfs2_fops = {
        .read           = do_sync_read,
        .write          = do_sync_write,
-        .sendfile       = generic_file_sendfile,
        .mmap           = ocfs2_mmap,
        .fsync          = ocfs2_sync_file,
        .release        = ocfs2_file_release,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 9a3a058f3553..98e0b85a9bb2 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -397,7 +397,6 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
                static struct attribute addpartattr = {
                        .name = "whole_disk",
                        .mode = S_IRUSR | S_IRGRP | S_IROTH,
-                        .owner = THIS_MODULE,
                };
                sysfs_create_file(&p->kobj, &addpartattr);
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 9f7ad4244f63..1e064c4a4f86 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -45,7 +45,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 {
        int blocksize, offset, size,res;
        loff_t i_size;
-        dasd_information_t *info;
+        dasd_information2_t *info;
        struct hd_geometry *geo;
        char type[5] = {0,};
        char name[7] = {0,};
@@ -64,14 +64,17 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
        if (i_size == 0)
                goto out_exit;
-        if ((info = kmalloc(sizeof(dasd_information_t), GFP_KERNEL)) == NULL)
+        info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL);
+        if (info == NULL)
                goto out_exit;
-        if ((geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL)) == NULL)
+        geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL);
+        if (geo == NULL)
                goto out_nogeo;
-        if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL)
+        label = kmalloc(sizeof(union label_t), GFP_KERNEL);
+        if (label == NULL)
                goto out_nolab;
-        if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 ||
+        if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0 ||
            ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
                goto out_freeall;
@@ -96,84 +99,108 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
        res = 1;
        /*
-         * Three different types: CMS1, VOL1 and LNX1/unlabeled
+         * Three different formats: LDL, CDL and unformated disk
+         *
+         * identified by info->format
+         *
+         * unformated disks we do not have to care about
         */
-        if (strncmp(type, "CMS1", 4) == 0) {
+        if (info->format == DASD_FORMAT_LDL) {
-                /*
+                if (strncmp(type, "CMS1", 4) == 0) {
-                 * VM style CMS1 labeled disk
+                        /*
-                 */
+                         * VM style CMS1 labeled disk
-                if (label->cms.disk_offset != 0) {
+                         */
-                        printk("CMS1/%8s(MDSK):", name);
+                        if (label->cms.disk_offset != 0) {
-                        /* disk is reserved minidisk */
+                                printk("CMS1/%8s(MDSK):", name);
-                        blocksize = label->cms.block_size;
+                                /* disk is reserved minidisk */
-                        offset = label->cms.disk_offset;
+                                blocksize = label->cms.block_size;
-                        size = (label->cms.block_count - 1) * (blocksize >> 9);
+                                offset = label->cms.disk_offset;
+                                size = (label->cms.block_count - 1)
+                                        * (blocksize >> 9);
+                        } else {
+                                printk("CMS1/%8s:", name);
+                                offset = (info->label_block + 1);
+                                size = i_size >> 9;
+                        }
                } else {
-                        printk("CMS1/%8s:", name);
+                        /*
+                         * Old style LNX1 or unlabeled disk
+                         */
+                        if (strncmp(type, "LNX1", 4) == 0)
+                                printk ("LNX1/%8s:", name);
+                        else
+                                printk("(nonl)");
                        offset = (info->label_block + 1);
                        size = i_size >> 9;
                }
                put_partition(state, 1, offset*(blocksize >> 9),
-                                 size-offset*(blocksize >> 9));
+                                      size-offset*(blocksize >> 9));
-        } else if ((strncmp(type, "VOL1", 4) == 0) &&
+        } else if (info->format == DASD_FORMAT_CDL) {
-                (!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) {
                /*
-                 * New style VOL1 labeled disk
+                 * New style CDL formatted disk
                 */
                unsigned int blk;
                int counter;
-                printk("VOL1/%8s:", name);
-                /* get block number and read then go through format1 labels */
-                blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
-                counter = 0;
-                while ((data = read_dev_sector(bdev, blk*(blocksize/512),
-                                               &sect)) != NULL) {
-                        struct vtoc_format1_label f1;
-                        memcpy(&f1, data, sizeof(struct vtoc_format1_label));
-                        put_dev_sector(sect);
-                        /* skip FMT4 / FMT5 / FMT7 labels */
-                        if (f1.DS1FMTID == _ascebc['4']
-                            || f1.DS1FMTID == _ascebc['5']
-                            || f1.DS1FMTID == _ascebc['7']) {
-                                blk++;
-                                continue;
-                        }
-                        /* only FMT1 valid at this point */
-                        if (f1.DS1FMTID != _ascebc['1'])
-                                break;
-                        /* OK, we got valid partition data */
-                        offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
-                        size  = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
-                                offset + geo->sectors;
-                        if (counter >= state->limit)
-                                break;
-                        put_partition(state, counter + 1,
-                                      offset * (blocksize >> 9),
-                                      size * (blocksize >> 9));
-                        counter++;
-                        blk++;
-                }
-                if (!data)
-                /* Are we not supposed to report this ? */
-                        goto out_readerr;
-        } else {
                /*
-                 * Old style LNX1 or unlabeled disk
+                 * check if VOL1 label is available
+                 * if not, something is wrong, skipping partition detection
                 */
-                if (strncmp(type, "LNX1", 4) == 0)
+                if (strncmp(type, "VOL1",  4) == 0) {
-                        printk ("LNX1/%8s:", name);
+                        printk("VOL1/%8s:", name);
-                else
+                        /*
-                        printk("(nonl)/%8s:", name);
+                         * get block number and read then go through format1
-                offset = (info->label_block + 1);
+                         * labels
-                size = i_size >> 9;
+                         */
-                put_partition(state, 1, offset*(blocksize >> 9),
+                        blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
-                              size-offset*(blocksize >> 9));
+                        counter = 0;
+                        data = read_dev_sector(bdev, blk * (blocksize/512),
+                                               &sect);
+                        while (data != NULL) {
+                                struct vtoc_format1_label f1;
+                                memcpy(&f1, data,
+                                       sizeof(struct vtoc_format1_label));
+                                put_dev_sector(sect);
+                                /* skip FMT4 / FMT5 / FMT7 labels */
+                                if (f1.DS1FMTID == _ascebc['4']
+                                    || f1.DS1FMTID == _ascebc['5']
+                                    || f1.DS1FMTID == _ascebc['7']) {
+                                        blk++;
+                                        data = read_dev_sector(bdev, blk *
+                                                               (blocksize/512),
+                                                                &sect);
+                                        continue;
+                                }
+                                /* only FMT1 valid at this point */
+                                if (f1.DS1FMTID != _ascebc['1'])
+                                        break;
+                                /* OK, we got valid partition data */
+                                offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
+                                size  = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
+                                        offset + geo->sectors;
+                                if (counter >= state->limit)
+                                        break;
+                                put_partition(state, counter + 1,
+                                              offset * (blocksize >> 9),
+                                              size * (blocksize >> 9));
+                                counter++;
+                                blk++;
+                                data = read_dev_sector(bdev,
+                                                       blk * (blocksize/512),
+                                                       &sect);
+                        }
+                        if (!data)
+                                /* Are we not supposed to report this ? */
+                                goto out_readerr;
+                } else
+                        printk(KERN_WARNING "Warning, expected Label VOL1 not "
+                               "found, treating as CDL formated Disk");
        }
        printk("\n");
diff --git a/fs/pipe.c b/fs/pipe.c
index 3a89592bdf57..d007830d9c87 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -164,6 +164,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
                page_cache_release(page);
 }
+/**
+ * generic_pipe_buf_map - virtually map a pipe buffer
+ * @pipe:       the pipe that the buffer belongs to
+ * @buf:        the buffer that should be mapped
+ * @atomic:     whether to use an atomic map
+ *
+ * Description:
+ *      This function returns a kernel virtual address mapping for the
+ *      passed in @pipe_buffer. If @atomic is set, an atomic map is provided
+ *      and the caller has to be careful not to fault before calling
+ *      the unmap function.
+ *
+ *      Note that this function occupies KM_USER0 if @atomic != 0.
+ */
 void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
                           struct pipe_buffer *buf, int atomic)
 {
@@ -175,6 +189,15 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
        return kmap(buf->page);
 }
+/**
+ * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
+ * @pipe:       the pipe that the buffer belongs to
+ * @buf:        the buffer that should be unmapped
+ * @map_data:   the data that the mapping function returned
+ *
+ * Description:
+ *      This function undoes the mapping that ->map() provided.
+ */
 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
                            struct pipe_buffer *buf, void *map_data)
 {
@@ -185,11 +208,28 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
                kunmap(buf->page);
 }
+/**
+ * generic_pipe_buf_steal - attempt to take ownership of a @pipe_buffer
+ * @pipe:       the pipe that the buffer belongs to
+ * @buf:        the buffer to attempt to steal
+ *
+ * Description:
+ *      This function attempts to steal the @struct page attached to
+ *      @buf. If successful, this function returns 0 and returns with
+ *      the page locked. The caller may then reuse the page for whatever
+ *      he wishes, the typical use is insertion into a different file
+ *      page cache.
+ */
 int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
                           struct pipe_buffer *buf)
 {
        struct page *page = buf->page;
+        /*
+         * A reference of one is golden, that means that the owner of this
+         * page is the only one holding a reference to it. lock the page
+         * and return OK.
+         */
        if (page_count(page) == 1) {
                lock_page(page);
                return 0;
@@ -198,12 +238,32 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
        return 1;
 }
-void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf)
+/**
+ * generic_pipe_buf_get - get a reference to a @struct pipe_buffer
+ * @pipe:       the pipe that the buffer belongs to
+ * @buf:        the buffer to get a reference to
+ *
+ * Description:
+ *      This function grabs an extra reference to @buf. It's used in
+ *      in the tee() system call, when we duplicate the buffers in one
+ *      pipe into another.
+ */
+void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 {
        page_cache_get(buf->page);
 }
-int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf)
+/**
+ * generic_pipe_buf_confirm - verify contents of the pipe buffer
+ * @pipe:       the pipe that the buffer belongs to
+ * @buf:        the buffer to confirm
+ *
+ * Description:
+ *      This function does nothing, because the generic pipe code uses
+ *      pages that are always good when inserted into the pipe.
+ */
+int generic_pipe_buf_confirm(struct pipe_inode_info *info,
+                             struct pipe_buffer *buf)
 {
        return 0;
 }
@@ -212,7 +272,7 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
        .can_merge = 1,
        .map = generic_pipe_buf_map,
        .unmap = generic_pipe_buf_unmap,
-        .pin = generic_pipe_buf_pin,
+        .confirm = generic_pipe_buf_confirm,
        .release = anon_pipe_buf_release,
        .steal = generic_pipe_buf_steal,
        .get = generic_pipe_buf_get,
@@ -252,7 +312,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
                        if (chars > total_len)
                                chars = total_len;
-                        error = ops->pin(pipe, buf);
+                        error = ops->confirm(pipe, buf);
                        if (error) {
                                if (!ret)
                                        error = ret;
@@ -373,7 +433,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
                        int error, atomic = 1;
                        void *addr;
-                        error = ops->pin(pipe, buf);
+                        error = ops->confirm(pipe, buf);
                        if (error)
                                goto out;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 74f30e0c0381..98e78e2f18d6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -165,7 +165,6 @@ static inline char * task_state(struct task_struct *p, char *buffer)
        rcu_read_lock();
        buffer += sprintf(buffer,
                "State:\t%s\n"
-                "SleepAVG:\t%lu%%\n"
                "Tgid:\t%d\n"
                "Pid:\t%d\n"
                "PPid:\t%d\n"
@@ -173,7 +172,6 @@ static inline char * task_state(struct task_struct *p, char *buffer)
                "Uid:\t%d\t%d\t%d\t%d\n"
                "Gid:\t%d\t%d\t%d\t%d\n",
                get_task_state(p),
-                (p->sleep_avg/1024)*100/(1020000000/1024),
                p->tgid, p->pid,
                pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
                pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
@@ -312,6 +310,41 @@ int proc_pid_status(struct task_struct *task, char * buffer)
        return buffer - orig;
 }
+static clock_t task_utime(struct task_struct *p)
+{
+        clock_t utime = cputime_to_clock_t(p->utime),
+                total = utime + cputime_to_clock_t(p->stime);
+        u64 temp;
+        /*
+         * Use CFS's precise accounting:
+         */
+        temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
+        if (total) {
+                temp *= utime;
+                do_div(temp, total);
+        }
+        utime = (clock_t)temp;
+        return utime;
+}
+static clock_t task_stime(struct task_struct *p)
+{
+        clock_t stime = cputime_to_clock_t(p->stime);
+        /*
+         * Use CFS's precise accounting. (we subtract utime from
+         * the total, to make sure the total observed by userspace
+         * grows monotonically - apps rely on that):
+         */
+        stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p);
+        return stime;
+}
 static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 {
        unsigned long vsize, eip, esp, wchan = ~0UL;
@@ -326,7 +359,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
        unsigned long long start_time;
        unsigned long cmin_flt = 0, cmaj_flt = 0;
        unsigned long  min_flt = 0,  maj_flt = 0;
-        cputime_t cutime, cstime, utime, stime;
+        cputime_t cutime, cstime;
+        clock_t utime, stime;
        unsigned long rsslim = 0;
        char tcomm[sizeof(task->comm)];
        unsigned long flags;
@@ -344,7 +378,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
        sigemptyset(&sigign);
        sigemptyset(&sigcatch);
-        cutime = cstime = utime = stime = cputime_zero;
+        cutime = cstime = cputime_zero;
+        utime = stime = 0;
        rcu_read_lock();
        if (lock_task_sighand(task, &flags)) {
@@ -370,15 +405,15 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                        do {
                                min_flt += t->min_flt;
                                maj_flt += t->maj_flt;
-                                utime = cputime_add(utime, t->utime);
+                                utime += task_utime(t);
-                                stime = cputime_add(stime, t->stime);
+                                stime += task_stime(t);
                                t = next_thread(t);
                        } while (t != task);
                        min_flt += sig->min_flt;
                        maj_flt += sig->maj_flt;
-                        utime = cputime_add(utime, sig->utime);
+                        utime += cputime_to_clock_t(sig->utime);
-                        stime = cputime_add(stime, sig->stime);
+                        stime += cputime_to_clock_t(sig->stime);
                }
                sid = signal_session(sig);
@@ -394,8 +429,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
        if (!whole) {
                min_flt = task->min_flt;
                maj_flt = task->maj_flt;
-                utime = task->utime;
+                utime = task_utime(task);
-                stime = task->stime;
+                stime = task_stime(task);
        }
        /* scale priority and nice values from timeslices to -20..20 */
@@ -426,8 +461,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                cmin_flt,
                maj_flt,
                cmaj_flt,
-                cputime_to_clock_t(utime),
+                utime,
-                cputime_to_clock_t(stime),
+                stime,
                cputime_to_clock_t(cutime),
                cputime_to_clock_t(cstime),
                priority,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a5fa1fdafc4e..46ea5d56e1bb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -296,7 +296,7 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
 */
 static int proc_pid_schedstat(struct task_struct *task, char *buffer)
 {
-        return sprintf(buffer, "%lu %lu %lu\n",
+        return sprintf(buffer, "%llu %llu %lu\n",
                        task->sched_info.cpu_time,
                        task->sched_info.run_delay,
                        task->sched_info.pcnt);
@@ -929,6 +929,69 @@ static const struct file_operations proc_fault_inject_operations = {
 };
 #endif
+#ifdef CONFIG_SCHED_DEBUG
+/*
+ * Print out various scheduling related per-task fields:
+ */
+static int sched_show(struct seq_file *m, void *v)
+{
+        struct inode *inode = m->private;
+        struct task_struct *p;
+        WARN_ON(!inode);
+        p = get_proc_task(inode);
+        if (!p)
+                return -ESRCH;
+        proc_sched_show_task(p, m);
+        put_task_struct(p);
+        return 0;
+}
+static ssize_t
+sched_write(struct file *file, const char __user *buf,
+            size_t count, loff_t *offset)
+{
+        struct inode *inode = file->f_path.dentry->d_inode;
+        struct task_struct *p;
+        WARN_ON(!inode);
+        p = get_proc_task(inode);
+        if (!p)
+                return -ESRCH;
+        proc_sched_set_task(p);
+        put_task_struct(p);
+        return count;
+}
+static int sched_open(struct inode *inode, struct file *filp)
+{
+        int ret;
+        ret = single_open(filp, sched_show, NULL);
+        if (!ret) {
+                struct seq_file *m = filp->private_data;
+                m->private = inode;
+        }
+        return ret;
+}
+static const struct file_operations proc_pid_sched_operations = {
+        .open           = sched_open,
+        .read           = seq_read,
+        .write          = sched_write,
+        .llseek         = seq_lseek,
+        .release        = seq_release,
+};
+#endif
 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        struct inode *inode = dentry->d_inode;
@@ -1963,6 +2026,9 @@ static const struct pid_entry tgid_base_stuff[] = {
        INF("environ",    S_IRUSR, pid_environ),
        INF("auxv",       S_IRUSR, pid_auxv),
        INF("status",     S_IRUGO, pid_status),
+#ifdef CONFIG_SCHED_DEBUG
+        REG("sched",      S_IRUGO|S_IWUSR, pid_sched),
+#endif
        INF("cmdline",    S_IRUGO, pid_cmdline),
        INF("stat",       S_IRUGO, tgid_stat),
        INF("statm",      S_IRUGO, pid_statm),
@@ -2247,6 +2313,9 @@ static const struct pid_entry tid_base_stuff[] = {
        INF("environ",   S_IRUSR, pid_environ),
        INF("auxv",      S_IRUSR, pid_auxv),
        INF("status",    S_IRUGO, pid_status),
+#ifdef CONFIG_SCHED_DEBUG
+        REG("sched",     S_IRUGO|S_IWUSR, pid_sched),
+#endif
        INF("cmdline",   S_IRUGO, pid_cmdline),
        INF("stat",      S_IRUGO, tid_stat),
        INF("statm",     S_IRUGO, pid_statm),
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index 44649981bbc8..867f42b02035 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -25,7 +25,7 @@ const struct file_operations qnx4_file_operations =
        .read           = do_sync_read,
        .aio_read       = generic_file_aio_read,
        .mmap           = generic_file_mmap,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 #ifdef CONFIG_QNX4FS_RW
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 2f14774a124f..97bdc0b2f9d2 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -41,7 +41,7 @@ const struct file_operations ramfs_file_operations = {
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
        .fsync          = simple_sync_file,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
        .llseek         = generic_file_llseek,
 };
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 9345a46ffb32..cad2b7ace630 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -42,7 +42,7 @@ const struct file_operations ramfs_file_operations = {
        .write                  = do_sync_write,
        .aio_write              = generic_file_aio_write,
        .fsync                  = simple_sync_file,
-        .sendfile               = generic_file_sendfile,
+        .splice_read            = generic_file_splice_read,
        .llseek                 = generic_file_llseek,
 };
@@ -195,6 +195,11 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
        unsigned int old_ia_valid = ia->ia_valid;
        int ret = 0;
+        /* POSIX UID/GID verification for setting inode attributes */
+        ret = inode_change_ok(inode, ia);
+        if (ret)
+                return ret;
        /* by providing our own setattr() method, we skip this quotaism */
        if ((old_ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) ||
            (old_ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid))
diff --git a/fs/read_write.c b/fs/read_write.c
index 4d03008f015b..507ddff48a9a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
+#include <linux/splice.h>
 #include "read_write.h"
 #include <asm/uaccess.h>
@@ -25,7 +26,7 @@ const struct file_operations generic_ro_fops = {
        .read           = do_sync_read,
        .aio_read       = generic_file_aio_read,
        .mmap           = generic_file_readonly_mmap,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 EXPORT_SYMBOL(generic_ro_fops);
@@ -708,7 +709,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
        struct inode * in_inode, * out_inode;
        loff_t pos;
        ssize_t retval;
-        int fput_needed_in, fput_needed_out;
+        int fput_needed_in, fput_needed_out, fl;
        /*
         * Get input file, and verify that it is ok..
@@ -723,7 +724,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
        in_inode = in_file->f_path.dentry->d_inode;
        if (!in_inode)
                goto fput_in;
-        if (!in_file->f_op || !in_file->f_op->sendfile)
+        if (!in_file->f_op || !in_file->f_op->splice_read)
                goto fput_in;
        retval = -ESPIPE;
        if (!ppos)
@@ -776,7 +777,18 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                count = max - pos;
        }
-        retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
+        fl = 0;
+#if 0
+        /*
+         * We need to debate whether we can enable this or not. The
+         * man page documents EAGAIN return for the output at least,
+         * and the application is arguably buggy if it doesn't expect
+         * EAGAIN on a non-blocking file descriptor.
+         */
+        if (in_file->f_flags & O_NONBLOCK)
+                fl = SPLICE_F_NONBLOCK;
+#endif
+        retval = do_splice_direct(in_file, ppos, out_file, count, fl);
        if (retval > 0) {
                add_rchar(current, retval);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9e451a68580f..30eebfb1b2d8 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1531,7 +1531,6 @@ const struct file_operations reiserfs_file_operations = {
        .open = generic_file_open,
        .release = reiserfs_file_release,
        .fsync = reiserfs_sync_file,
-        .sendfile = generic_file_sendfile,
        .aio_read = generic_file_aio_read,
        .aio_write = generic_file_aio_write,
        .splice_read = generic_file_splice_read,
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 0ac22af7afe5..49194a4e6b91 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -447,3 +447,37 @@ int seq_puts(struct seq_file *m, const char *s)
        return -1;
 }
 EXPORT_SYMBOL(seq_puts);
+struct list_head *seq_list_start(struct list_head *head, loff_t pos)
+{
+        struct list_head *lh;
+        list_for_each(lh, head)
+                if (pos-- == 0)
+                        return lh;
+        return NULL;
+}
+EXPORT_SYMBOL(seq_list_start);
+struct list_head *seq_list_start_head(struct list_head *head, loff_t pos)
+{
+        if (!pos)
+                return head;
+        return seq_list_start(head, pos - 1);
+}
+EXPORT_SYMBOL(seq_list_start_head);
+struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
+{
+        struct list_head *lh;
+        lh = ((struct list_head *)v)->next;
+        ++*ppos;
+        return lh == head ? NULL : lh;
+}
+EXPORT_SYMBOL(seq_list_next);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index f1da89203a9a..3b07f26d984d 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -133,7 +133,8 @@ static unsigned int signalfd_poll(struct file *file, poll_table *wait)
         * the peer disconnects.
         */
        if (signalfd_lock(ctx, &lk)) {
-                if (next_signal(&lk.tsk->pending, &ctx->sigmask) > 0 ||
+                if ((lk.tsk == current &&
+                     next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) ||
                    next_signal(&lk.tsk->signal->shared_pending,
                                &ctx->sigmask) > 0)
                        events |= POLLIN;
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index aea3f8aa54c0..c5d78a7e492b 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -262,8 +262,9 @@ out:
 }
 static ssize_t
-smb_file_sendfile(struct file *file, loff_t *ppos,
+smb_file_splice_read(struct file *file, loff_t *ppos,
-                  size_t count, read_actor_t actor, void *target)
+                     struct pipe_inode_info *pipe, size_t count,
+                     unsigned int flags)
 {
        struct dentry *dentry = file->f_path.dentry;
        ssize_t status;
@@ -277,7 +278,7 @@ smb_file_sendfile(struct file *file, loff_t *ppos,
                         DENTRY_PATH(dentry), status);
                goto out;
        }
-        status = generic_file_sendfile(file, ppos, count, actor, target);
+        status = generic_file_splice_read(file, ppos, pipe, count, flags);
 out:
        return status;
 }
@@ -416,7 +417,7 @@ const struct file_operations smb_file_operations =
        .open           = smb_file_open,
        .release        = smb_file_release,
        .fsync          = smb_fsync,
-        .sendfile       = smb_file_sendfile,
+        .splice_read    = smb_file_splice_read,
 };
 const struct inode_operations smb_file_inode_operations =
diff --git a/fs/splice.c b/fs/splice.c
index 12f28281d2b1..6c9828651e6f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -20,7 +20,7 @@
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/pagemap.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
 #include <linux/mm_inline.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
@@ -28,22 +28,7 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/uio.h>
+#include <linux/security.h>
-struct partial_page {
-        unsigned int offset;
-        unsigned int len;
-};
-/*
- * Passed to splice_to_pipe
- */
-struct splice_pipe_desc {
-        struct page **pages;            /* page map */
-        struct partial_page *partial;   /* pages[] may not be contig */
-        int nr_pages;                   /* number of pages in map */
-        unsigned int flags;             /* splice flags */
-        const struct pipe_buf_operations *ops;/* ops associated with output pipe */
-};
 /*
 * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -101,8 +86,12 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
        buf->flags &= ~PIPE_BUF_FLAG_LRU;
 }
-static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe,
+/*
-                                   struct pipe_buffer *buf)
+ * Check whether the contents of buf is OK to access. Since the content
+ * is a page cache page, IO may be in flight.
+ */
+static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
+                                       struct pipe_buffer *buf)
 {
        struct page *page = buf->page;
        int err;
@@ -143,7 +132,7 @@ static const struct pipe_buf_operations page_cache_pipe_buf_ops = {
        .can_merge = 0,
        .map = generic_pipe_buf_map,
        .unmap = generic_pipe_buf_unmap,
-        .pin = page_cache_pipe_buf_pin,
+        .confirm = page_cache_pipe_buf_confirm,
        .release = page_cache_pipe_buf_release,
        .steal = page_cache_pipe_buf_steal,
        .get = generic_pipe_buf_get,
@@ -163,19 +152,27 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
        .can_merge = 0,
        .map = generic_pipe_buf_map,
        .unmap = generic_pipe_buf_unmap,
-        .pin = generic_pipe_buf_pin,
+        .confirm = generic_pipe_buf_confirm,
        .release = page_cache_pipe_buf_release,
        .steal = user_page_pipe_buf_steal,
        .get = generic_pipe_buf_get,
 };
-/*
+/**
- * Pipe output worker. This sets up our pipe format with the page cache
+ * splice_to_pipe - fill passed data into a pipe
- * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
+ * @pipe:       pipe to fill
+ * @spd:        data to fill
+ *
+ * Description:
+ *    @spd contains a map of pages and len/offset tupples, a long with
+ *    the struct pipe_buf_operations associated with these pages. This
+ *    function will link that data to the pipe.
+ *
 */
-static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
+ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
-                              struct splice_pipe_desc *spd)
+                       struct splice_pipe_desc *spd)
 {
+        unsigned int spd_pages = spd->nr_pages;
        int ret, do_wakeup, page_nr;
        ret = 0;
@@ -200,6 +197,7 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
                        buf->page = spd->pages[page_nr];
                        buf->offset = spd->partial[page_nr].offset;
                        buf->len = spd->partial[page_nr].len;
+                        buf->private = spd->partial[page_nr].private;
                        buf->ops = spd->ops;
                        if (spd->flags & SPLICE_F_GIFT)
                                buf->flags |= PIPE_BUF_FLAG_GIFT;
@@ -244,17 +242,18 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
                pipe->waiting_writers--;
        }
-        if (pipe->inode)
+        if (pipe->inode) {
                mutex_unlock(&pipe->inode->i_mutex);
-        if (do_wakeup) {
+                if (do_wakeup) {
-                smp_mb();
+                        smp_mb();
-                if (waitqueue_active(&pipe->wait))
+                        if (waitqueue_active(&pipe->wait))
-                        wake_up_interruptible(&pipe->wait);
+                                wake_up_interruptible(&pipe->wait);
-                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+                        kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+                }
        }
-        while (page_nr < spd->nr_pages)
+        while (page_nr < spd_pages)
                page_cache_release(spd->pages[page_nr++]);
        return ret;
@@ -272,7 +271,6 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
        struct page *page;
        pgoff_t index, end_index;
        loff_t isize;
-        size_t total_len;
        int error, page_nr;
        struct splice_pipe_desc spd = {
                .pages = pages,
@@ -295,20 +293,15 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
        page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);
        /*
-         * Now fill in the holes:
-         */
-        error = 0;
-        total_len = 0;
-        /*
         * Lookup the (hopefully) full range of pages we need.
         */
        spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);
        /*
         * If find_get_pages_contig() returned fewer pages than we needed,
-         * allocate the rest.
+         * allocate the rest and fill in the holes.
         */
+        error = 0;
        index += spd.nr_pages;
        while (spd.nr_pages < nr_pages) {
                /*
@@ -415,43 +408,47 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
                                break;
                        }
+                }
+fill_it:
+                /*
+                 * i_size must be checked after PageUptodate.
+                 */
+                isize = i_size_read(mapping->host);
+                end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+                if (unlikely(!isize || index > end_index))
+                        break;
+                /*
+                 * if this is the last page, see if we need to shrink
+                 * the length and stop
+                 */
+                if (end_index == index) {
+                        unsigned int plen;
                        /*
-                         * i_size must be checked after ->readpage().
+                         * max good bytes in this page
                         */
-                        isize = i_size_read(mapping->host);
+                        plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
-                        end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+                        if (plen <= loff)
-                        if (unlikely(!isize || index > end_index))
                                break;
                        /*
-                         * if this is the last page, see if we need to shrink
+                         * force quit after adding this page
-                         * the length and stop
                         */
-                        if (end_index == index) {
+                        this_len = min(this_len, plen - loff);
-                                loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK);
+                        len = this_len;
-                                if (total_len + loff > isize)
-                                        break;
-                                /*
-                                 * force quit after adding this page
-                                 */
-                                len = this_len;
-                                this_len = min(this_len, loff);
-                                loff = 0;
-                        }
                }
-fill_it:
                partial[page_nr].offset = loff;
                partial[page_nr].len = this_len;
                len -= this_len;
-                total_len += this_len;
                loff = 0;
                spd.nr_pages++;
                index++;
        }
        /*
-         * Release any pages at the end, if we quit early. 'i' is how far
+         * Release any pages at the end, if we quit early. 'page_nr' is how far
         * we got, 'nr_pages' is how many pages are in the map.
         */
        while (page_nr < nr_pages)
@@ -466,11 +463,16 @@ fill_it:
 /**
 * generic_file_splice_read - splice data from file to a pipe
 * @in:         file to splice from
+ * @ppos:       position in @in
 * @pipe:       pipe to splice to
 * @len:        number of bytes to splice
 * @flags:      splice modifier flags
 *
- * Will read pages from given file and fill them into a pipe.
+ * Description:
+ *    Will read pages from given file and fill them into a pipe. Can be
+ *    used as long as the address_space operations for the source implements
+ *    a readpage() hook.
+ *
 */
 ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
                                 struct pipe_inode_info *pipe, size_t len,
@@ -478,11 +480,19 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 {
        ssize_t spliced;
        int ret;
+        loff_t isize, left;
+        isize = i_size_read(in->f_mapping->host);
+        if (unlikely(*ppos >= isize))
+                return 0;
+        left = isize - *ppos;
+        if (unlikely(left < len))
+                len = left;
        ret = 0;
        spliced = 0;
+        while (len && !spliced) {
-        while (len) {
                ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
                if (ret < 0)
@@ -516,11 +526,11 @@ EXPORT_SYMBOL(generic_file_splice_read);
 static int pipe_to_sendpage(struct pipe_inode_info *pipe,
                            struct pipe_buffer *buf, struct splice_desc *sd)
 {
-        struct file *file = sd->file;
+        struct file *file = sd->u.file;
        loff_t pos = sd->pos;
        int ret, more;
-        ret = buf->ops->pin(pipe, buf);
+        ret = buf->ops->confirm(pipe, buf);
        if (!ret) {
                more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
@@ -554,7 +564,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
 static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
                        struct splice_desc *sd)
 {
-        struct file *file = sd->file;
+        struct file *file = sd->u.file;
        struct address_space *mapping = file->f_mapping;
        unsigned int offset, this_len;
        struct page *page;
@@ -564,7 +574,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        /*
         * make sure the data in this buffer is uptodate
         */
-        ret = buf->ops->pin(pipe, buf);
+        ret = buf->ops->confirm(pipe, buf);
        if (unlikely(ret))
                return ret;
@@ -644,7 +654,6 @@ find_page:
         * accessed, we are now done!
         */
        mark_page_accessed(page);
-        balance_dirty_pages_ratelimited(mapping);
 out:
        page_cache_release(page);
        unlock_page(page);
@@ -652,36 +661,37 @@ out_ret:
        return ret;
 }
-/*
+/**
- * Pipe input worker. Most of this logic works like a regular pipe, the
+ * __splice_from_pipe - splice data from a pipe to given actor
- * key here is the 'actor' worker passed in that actually moves the data
+ * @pipe:       pipe to splice from
- * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
+ * @sd:         information to @actor
+ * @actor:      handler that splices the data
+ *
+ * Description:
+ *    This function does little more than loop over the pipe and call
+ *    @actor to do the actual moving of a single struct pipe_buffer to
+ *    the desired destination. See pipe_to_file, pipe_to_sendpage, or
+ *    pipe_to_user.
+ *
 */
-ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
+ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
-                           struct file *out, loff_t *ppos, size_t len,
+                           splice_actor *actor)
-                           unsigned int flags, splice_actor *actor)
 {
        int ret, do_wakeup, err;
-        struct splice_desc sd;
        ret = 0;
        do_wakeup = 0;
-        sd.total_len = len;
-        sd.flags = flags;
-        sd.file = out;
-        sd.pos = *ppos;
        for (;;) {
                if (pipe->nrbufs) {
                        struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
                        const struct pipe_buf_operations *ops = buf->ops;
-                        sd.len = buf->len;
+                        sd->len = buf->len;
-                        if (sd.len > sd.total_len)
+                        if (sd->len > sd->total_len)
-                                sd.len = sd.total_len;
+                                sd->len = sd->total_len;
-                        err = actor(pipe, buf, &sd);
+                        err = actor(pipe, buf, sd);
                        if (err <= 0) {
                                if (!ret && err != -ENODATA)
                                        ret = err;
@@ -693,10 +703,10 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
                        buf->offset += err;
                        buf->len -= err;
-                        sd.len -= err;
+                        sd->len -= err;
-                        sd.pos += err;
+                        sd->pos += err;
-                        sd.total_len -= err;
+                        sd->total_len -= err;
-                        if (sd.len)
+                        if (sd->len)
                                continue;
                        if (!buf->len) {
@@ -708,7 +718,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
                                        do_wakeup = 1;
                        }
-                        if (!sd.total_len)
+                        if (!sd->total_len)
                                break;
                }
@@ -721,7 +731,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
                                break;
                }
-                if (flags & SPLICE_F_NONBLOCK) {
+                if (sd->flags & SPLICE_F_NONBLOCK) {
                        if (!ret)
                                ret = -EAGAIN;
                        break;
@@ -755,12 +765,32 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
 }
 EXPORT_SYMBOL(__splice_from_pipe);
+/**
+ * splice_from_pipe - splice data from a pipe to a file
+ * @pipe:       pipe to splice from
+ * @out:        file to splice to
+ * @ppos:       position in @out
+ * @len:        how many bytes to splice
+ * @flags:      splice modifier flags
+ * @actor:      handler that splices the data
+ *
+ * Description:
+ *    See __splice_from_pipe. This function locks the input and output inodes,
+ *    otherwise it's identical to __splice_from_pipe().
+ *
+ */
 ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
                         loff_t *ppos, size_t len, unsigned int flags,
                         splice_actor *actor)
 {
        ssize_t ret;
        struct inode *inode = out->f_mapping->host;
+        struct splice_desc sd = {
+                .total_len = len,
+                .flags = flags,
+                .pos = *ppos,
+                .u.file = out,
+        };
        /*
         * The actor worker might be calling ->prepare_write and
@@ -769,7 +799,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
         * pipe->inode, we have to order lock acquiry here.
         */
        inode_double_lock(inode, pipe->inode);
-        ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor);
+        ret = __splice_from_pipe(pipe, &sd, actor);
        inode_double_unlock(inode, pipe->inode);
        return ret;
@@ -779,12 +809,14 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
 * @pipe:       pipe info
 * @out:        file to write to
+ * @ppos:       position in @out
 * @len:        number of bytes to splice
 * @flags:      splice modifier flags
 *
- * Will either move or copy pages (determined by @flags options) from
+ * Description:
- * the given pipe inode to the given file. The caller is responsible
+ *    Will either move or copy pages (determined by @flags options) from
- * for acquiring i_mutex on both inodes.
+ *    the given pipe inode to the given file. The caller is responsible
+ *    for acquiring i_mutex on both inodes.
 *
 */
 ssize_t
@@ -793,6 +825,12 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
 {
        struct address_space *mapping = out->f_mapping;
        struct inode *inode = mapping->host;
+        struct splice_desc sd = {
+                .total_len = len,
+                .flags = flags,
+                .pos = *ppos,
+                .u.file = out,
+        };
        ssize_t ret;
        int err;
@@ -800,9 +838,12 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
        if (unlikely(err))
                return err;
-        ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+        ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
        if (ret > 0) {
+                unsigned long nr_pages;
                *ppos += ret;
+                nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
                /*
                 * If file or inode is SYNC and we actually wrote some data,
@@ -815,6 +856,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
                        if (err)
                                ret = err;
                }
+                balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
        }
        return ret;
@@ -826,11 +868,13 @@ EXPORT_SYMBOL(generic_file_splice_write_nolock);
 * generic_file_splice_write - splice data from a pipe to a file
 * @pipe:       pipe info
 * @out:        file to write to
+ * @ppos:       position in @out
 * @len:        number of bytes to splice
 * @flags:      splice modifier flags
 *
- * Will either move or copy pages (determined by @flags options) from
+ * Description:
- * the given pipe inode to the given file.
+ *    Will either move or copy pages (determined by @flags options) from
+ *    the given pipe inode to the given file.
 *
 */
 ssize_t
@@ -853,7 +897,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
        ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
        if (ret > 0) {
+                unsigned long nr_pages;
                *ppos += ret;
+                nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
                /*
                 * If file or inode is SYNC and we actually wrote some data,
@@ -868,6 +915,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                        if (err)
                                ret = err;
                }
+                balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
        }
        return ret;
@@ -877,13 +925,15 @@ EXPORT_SYMBOL(generic_file_splice_write);
 /**
 * generic_splice_sendpage - splice data from a pipe to a socket
- * @inode:      pipe inode
+ * @pipe:       pipe to splice from
 * @out:        socket to write to
+ * @ppos:       position in @out
 * @len:        number of bytes to splice
 * @flags:      splice modifier flags
 *
- * Will send @len bytes from the pipe to a network socket. No data copying
+ * Description:
- * is involved.
+ *    Will send @len bytes from the pipe to a network socket. No data copying
+ *    is involved.
 *
 */
 ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
@@ -912,6 +962,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
        if (unlikely(ret < 0))
                return ret;
+        ret = security_file_permission(out, MAY_WRITE);
+        if (unlikely(ret < 0))
+                return ret;
        return out->f_op->splice_write(pipe, out, ppos, len, flags);
 }
@@ -922,7 +976,6 @@ static long do_splice_to(struct file *in, loff_t *ppos,
                         struct pipe_inode_info *pipe, size_t len,
                         unsigned int flags)
 {
-        loff_t isize, left;
        int ret;
        if (unlikely(!in->f_op || !in->f_op->splice_read))
@@ -935,25 +988,34 @@ static long do_splice_to(struct file *in, loff_t *ppos,
        if (unlikely(ret < 0))
                return ret;
-        isize = i_size_read(in->f_mapping->host);
+        ret = security_file_permission(in, MAY_READ);
-        if (unlikely(*ppos >= isize))
+        if (unlikely(ret < 0))
-                return 0;
+                return ret;
-        
-        left = isize - *ppos;
-        if (unlikely(left < len))
-                len = left;
        return in->f_op->splice_read(in, ppos, pipe, len, flags);
 }
-long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+/**
-                      size_t len, unsigned int flags)
+ * splice_direct_to_actor - splices data directly between two non-pipes
+ * @in:         file to splice from
+ * @sd:         actor information on where to splice to
+ * @actor:      handles the data splicing
+ *
+ * Description:
+ *    This is a special case helper to splice directly between two
+ *    points, without requiring an explicit pipe. Internally an allocated
+ *    pipe is cached in the process, and reused during the life time of
+ *    that process.
+ *
+ */
+ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
+                               splice_direct_actor *actor)
 {
        struct pipe_inode_info *pipe;
        long ret, bytes;
-        loff_t out_off;
        umode_t i_mode;
-        int i;
+        size_t len;
+        int i, flags;
        /*
         * We require the input being a regular file, as we don't want to
@@ -989,49 +1051,41 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
         */
        ret = 0;
        bytes = 0;
-        out_off = 0;
+        len = sd->total_len;
+        flags = sd->flags;
-        while (len) {
+        /*
-                size_t read_len, max_read_len;
+         * Don't block on output, we have to drain the direct pipe.
+         */
+        sd->flags &= ~SPLICE_F_NONBLOCK;
-                /*
+        while (len) {
-                 * Do at most PIPE_BUFFERS pages worth of transfer:
+                size_t read_len;
-                 */
-                max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
-                ret = do_splice_to(in, ppos, pipe, max_read_len, flags);
+                ret = do_splice_to(in, &sd->pos, pipe, len, flags);
-                if (unlikely(ret < 0))
+                if (unlikely(ret <= 0))
                        goto out_release;
                read_len = ret;
+                sd->total_len = read_len;
                /*
                 * NOTE: nonblocking mode only applies to the input. We
                 * must not do the output in nonblocking mode as then we
                 * could get stuck data in the internal pipe:
                 */
-                ret = do_splice_from(pipe, out, &out_off, read_len,
+                ret = actor(pipe, sd);
-                                     flags & ~SPLICE_F_NONBLOCK);
+                if (unlikely(ret <= 0))
-                if (unlikely(ret < 0))
                        goto out_release;
                bytes += ret;
                len -= ret;
-                /*
+                if (ret < read_len)
-                 * In nonblocking mode, if we got back a short read then
+                        goto out_release;
-                 * that was due to either an IO error or due to the
-                 * pagecache entry not being there. In the IO error case
-                 * the _next_ splice attempt will produce a clean IO error
-                 * return value (not a short read), so in both cases it's
-                 * correct to break out of the loop here:
-                 */
-                if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
-                        break;
        }
        pipe->nrbufs = pipe->curbuf = 0;
        return bytes;
 out_release:
@@ -1056,9 +1110,51 @@ out_release:
                return bytes;
        return ret;
 }
+EXPORT_SYMBOL(splice_direct_to_actor);
-EXPORT_SYMBOL(do_splice_direct);
+static int direct_splice_actor(struct pipe_inode_info *pipe,
+                               struct splice_desc *sd)
+{
+        struct file *file = sd->u.file;
+        return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
+}
+/**
+ * do_splice_direct - splices data directly between two files
+ * @in:         file to splice from
+ * @ppos:       input file offset
+ * @out:        file to splice to
+ * @len:        number of bytes to splice
+ * @flags:      splice modifier flags
+ *
+ * Description:
+ *    For use by do_sendfile(). splice can easily emulate sendfile, but
+ *    doing it in the application would incur an extra system call
+ *    (splice in + splice out, as compared to just sendfile()). So this helper
+ *    can splice directly through a process-private pipe.
+ *
+ */
+long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+                      size_t len, unsigned int flags)
+{
+        struct splice_desc sd = {
+                .len            = len,
+                .total_len      = len,
+                .flags          = flags,
+                .pos            = *ppos,
+                .u.file         = out,
+        };
+        long ret;
+        ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
+        if (ret > 0)
+                *ppos += ret;
+        return ret;
+}
 /*
 * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
@@ -1240,28 +1336,131 @@ static int get_iovec_page_array(const struct iovec __user *iov,
        return error;
 }
+static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+                        struct splice_desc *sd)
+{
+        char *src;
+        int ret;
+        ret = buf->ops->confirm(pipe, buf);
+        if (unlikely(ret))
+                return ret;
+        /*
+         * See if we can use the atomic maps, by prefaulting in the
+         * pages and doing an atomic copy
+         */
+        if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
+                src = buf->ops->map(pipe, buf, 1);
+                ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
+                                                        sd->len);
+                buf->ops->unmap(pipe, buf, src);
+                if (!ret) {
+                        ret = sd->len;
+                        goto out;
+                }
+        }
+        /*
+         * No dice, use slow non-atomic map and copy
+         */
+        src = buf->ops->map(pipe, buf, 0);
+        ret = sd->len;
+        if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
+                ret = -EFAULT;
+out:
+        if (ret > 0)
+                sd->u.userptr += ret;
+        buf->ops->unmap(pipe, buf, src);
+        return ret;
+}
+/*
+ * For lack of a better implementation, implement vmsplice() to userspace
+ * as a simple copy of the pipes pages to the user iov.
+ */
+static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
+                             unsigned long nr_segs, unsigned int flags)
+{
+        struct pipe_inode_info *pipe;
+        struct splice_desc sd;
+        ssize_t size;
+        int error;
+        long ret;
+        pipe = pipe_info(file->f_path.dentry->d_inode);
+        if (!pipe)
+                return -EBADF;
+        if (pipe->inode)
+                mutex_lock(&pipe->inode->i_mutex);
+        error = ret = 0;
+        while (nr_segs) {
+                void __user *base;
+                size_t len;
+                /*
+                 * Get user address base and length for this iovec.
+                 */
+                error = get_user(base, &iov->iov_base);
+                if (unlikely(error))
+                        break;
+                error = get_user(len, &iov->iov_len);
+                if (unlikely(error))
+                        break;
+                /*
+                 * Sanity check this iovec. 0 read succeeds.
+                 */
+                if (unlikely(!len))
+                        break;
+                if (unlikely(!base)) {
+                        error = -EFAULT;
+                        break;
+                }
+                sd.len = 0;
+                sd.total_len = len;
+                sd.flags = flags;
+                sd.u.userptr = base;
+                sd.pos = 0;
+                size = __splice_from_pipe(pipe, &sd, pipe_to_user);
+                if (size < 0) {
+                        if (!ret)
+                                ret = size;
+                        break;
+                }
+                ret += size;
+                if (size < len)
+                        break;
+                nr_segs--;
+                iov++;
+        }
+        if (pipe->inode)
+                mutex_unlock(&pipe->inode->i_mutex);
+        if (!ret)
+                ret = error;
+        return ret;
+}
 /*
 * vmsplice splices a user address range into a pipe. It can be thought of
 * as splice-from-memory, where the regular splice is splice-from-file (or
 * to file). In both cases the output is a pipe, naturally.
- *
- * Note that vmsplice only supports splicing _from_ user memory to a pipe,
- * not the other way around. Splicing from user memory is a simple operation
- * that can be supported without any funky alignment restrictions or nasty
- * vm tricks. We simply map in the user memory and fill them into a pipe.
- * The reverse isn't quite as easy, though. There are two possible solutions
- * for that:
- *
- *      - memcpy() the data internally, at which point we might as well just
- *        do a regular read() on the buffer anyway.
- *      - Lots of nasty vm tricks, that are neither fast nor flexible (it
- *        has restriction limitations on both ends of the pipe).
- *
- * Alas, it isn't here.
- *
 */
-static long do_vmsplice(struct file *file, const struct iovec __user *iov,
+static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
-                        unsigned long nr_segs, unsigned int flags)
+                             unsigned long nr_segs, unsigned int flags)
 {
        struct pipe_inode_info *pipe;
        struct page *pages[PIPE_BUFFERS];
@@ -1276,10 +1475,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
        pipe = pipe_info(file->f_path.dentry->d_inode);
        if (!pipe)
                return -EBADF;
-        if (unlikely(nr_segs > UIO_MAXIOV))
-                return -EINVAL;
-        else if (unlikely(!nr_segs))
-                return 0;
        spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
                                            flags & SPLICE_F_GIFT);
@@ -1289,6 +1484,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
        return splice_to_pipe(pipe, &spd);
 }
+/*
+ * Note that vmsplice only really supports true splicing _from_ user memory
+ * to a pipe, not the other way around. Splicing from user memory is a simple
+ * operation that can be supported without any funky alignment restrictions
+ * or nasty vm tricks. We simply map in the user memory and fill them into
+ * a pipe. The reverse isn't quite as easy, though. There are two possible
+ * solutions for that:
+ *
+ *      - memcpy() the data internally, at which point we might as well just
+ *        do a regular read() on the buffer anyway.
+ *      - Lots of nasty vm tricks, that are neither fast nor flexible (it
+ *        has restriction limitations on both ends of the pipe).
+ *
+ * Currently we punt and implement it as a normal copy, see pipe_to_user().
+ *
+ */
 asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
                             unsigned long nr_segs, unsigned int flags)
 {
@@ -1296,11 +1507,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
        long error;
        int fput;
+        if (unlikely(nr_segs > UIO_MAXIOV))
+                return -EINVAL;
+        else if (unlikely(!nr_segs))
+                return 0;
        error = -EBADF;
        file = fget_light(fd, &fput);
        if (file) {
                if (file->f_mode & FMODE_WRITE)
-                        error = do_vmsplice(file, iov, nr_segs, flags);
+                        error = vmsplice_to_pipe(file, iov, nr_segs, flags);
+                else if (file->f_mode & FMODE_READ)
+                        error = vmsplice_to_user(file, iov, nr_segs, flags);
                fput_light(file, fput);
        }
diff --git a/fs/sync.c b/fs/sync.c
index 2f97576355b8..7cd005ea7639 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -236,6 +236,14 @@ out:
        return ret;
 }
+/* It would be nice if people remember that not all the world's an i386
+   when they introduce new system calls */
+asmlinkage long sys_sync_file_range2(int fd, unsigned int flags,
+                                     loff_t offset, loff_t nbytes)
+{
+        return sys_sync_file_range(fd, offset, nbytes, flags);
+}
 /*
 * `endbyte' is inclusive
 */
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index d3b9f5f07db1..135353f8a296 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -20,29 +20,41 @@
 #include "sysfs.h"
+struct bin_buffer {
+        struct mutex    mutex;
+        void            *buffer;
+        int             mmapped;
+};
 static int
 fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
 {
-        struct bin_attribute * attr = to_bin_attr(dentry);
+        struct sysfs_dirent *attr_sd = dentry->d_fsdata;
-        struct kobject * kobj = to_kobj(dentry->d_parent);
+        struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
+        struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+        int rc;
+        /* need attr_sd for attr, its parent for kobj */
+        if (!sysfs_get_active_two(attr_sd))
+                return -ENODEV;
-        if (!attr->read)
+        rc = -EIO;
-                return -EIO;
+        if (attr->read)
+                rc = attr->read(kobj, attr, buffer, off, count);
-        return attr->read(kobj, buffer, off, count);
+        sysfs_put_active_two(attr_sd);
+        return rc;
 }
 static ssize_t
-read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
+read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
 {
-        char *buffer = file->private_data;
+        struct bin_buffer *bb = file->private_data;
        struct dentry *dentry = file->f_path.dentry;
        int size = dentry->d_inode->i_size;
        loff_t offs = *off;
-        int ret;
+        int count = min_t(size_t, bytes, PAGE_SIZE);
-        if (count > PAGE_SIZE)
-                count = PAGE_SIZE;
        if (size) {
                if (offs > size)
@@ -51,43 +63,56 @@ read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
                        count = size - offs;
        }
-        ret = fill_read(dentry, buffer, offs, count);
+        mutex_lock(&bb->mutex);
-        if (ret < 0) 
-                return ret;
+        count = fill_read(dentry, bb->buffer, offs, count);
-        count = ret;
+        if (count < 0)
+                goto out_unlock;
-        if (copy_to_user(userbuf, buffer, count))
+        if (copy_to_user(userbuf, bb->buffer, count)) {
-                return -EFAULT;
+                count = -EFAULT;
+                goto out_unlock;
+        }
-        pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count);
+        pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
        *off = offs + count;
+ out_unlock:
+        mutex_unlock(&bb->mutex);
        return count;
 }
 static int
 flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
 {
-        struct bin_attribute *attr = to_bin_attr(dentry);
+        struct sysfs_dirent *attr_sd = dentry->d_fsdata;
-        struct kobject *kobj = to_kobj(dentry->d_parent);
+        struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
+        struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+        int rc;
+        /* need attr_sd for attr, its parent for kobj */
+        if (!sysfs_get_active_two(attr_sd))
+                return -ENODEV;
+        rc = -EIO;
+        if (attr->write)
+                rc = attr->write(kobj, attr, buffer, offset, count);
-        if (!attr->write)
+        sysfs_put_active_two(attr_sd);
-                return -EIO;
-        return attr->write(kobj, buffer, offset, count);
+        return rc;
 }
-static ssize_t write(struct file * file, const char __user * userbuf,
+static ssize_t write(struct file *file, const char __user *userbuf,
-                     size_t count, loff_t * off)
+                     size_t bytes, loff_t *off)
 {
-        char *buffer = file->private_data;
+        struct bin_buffer *bb = file->private_data;
        struct dentry *dentry = file->f_path.dentry;
        int size = dentry->d_inode->i_size;
        loff_t offs = *off;
+        int count = min_t(size_t, bytes, PAGE_SIZE);
-        if (count > PAGE_SIZE)
-                count = PAGE_SIZE;
        if (size) {
                if (offs > size)
                        return 0;
@@ -95,72 +120,100 @@ static ssize_t write(struct file * file, const char __user * userbuf,
                        count = size - offs;
        }
-        if (copy_from_user(buffer, userbuf, count))
+        mutex_lock(&bb->mutex);
-                return -EFAULT;
-        count = flush_write(dentry, buffer, offs, count);
+        if (copy_from_user(bb->buffer, userbuf, count)) {
+                count = -EFAULT;
+                goto out_unlock;
+        }
+        count = flush_write(dentry, bb->buffer, offs, count);
        if (count > 0)
                *off = offs + count;
+ out_unlock:
+        mutex_unlock(&bb->mutex);
        return count;
 }
 static int mmap(struct file *file, struct vm_area_struct *vma)
 {
-        struct dentry *dentry = file->f_path.dentry;
+        struct bin_buffer *bb = file->private_data;
-        struct bin_attribute *attr = to_bin_attr(dentry);
+        struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-        struct kobject *kobj = to_kobj(dentry->d_parent);
+        struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
+        struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+        int rc;
+        mutex_lock(&bb->mutex);
+        /* need attr_sd for attr, its parent for kobj */
+        if (!sysfs_get_active_two(attr_sd))
+                return -ENODEV;
-        if (!attr->mmap)
+        rc = -EINVAL;
-                return -EINVAL;
+        if (attr->mmap)
+                rc = attr->mmap(kobj, attr, vma);
-        return attr->mmap(kobj, attr, vma);
+        if (rc == 0 && !bb->mmapped)
+                bb->mmapped = 1;
+        else
+                sysfs_put_active_two(attr_sd);
+        mutex_unlock(&bb->mutex);
+        return rc;
 }
 static int open(struct inode * inode, struct file * file)
 {
-        struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
+        struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-        struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
+        struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
-        int error = -EINVAL;
+        struct bin_buffer *bb = NULL;
+        int error;
-        if (!kobj || !attr)
-                goto Done;
-        /* Grab the module reference for this attribute if we have one */
+        /* need attr_sd for attr */
-        error = -ENODEV;
+        if (!sysfs_get_active(attr_sd))
-        if (!try_module_get(attr->attr.owner)) 
+                return -ENODEV;
-                goto Done;
        error = -EACCES;
        if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
-                goto Error;
+                goto err_out;
        if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
-                goto Error;
+                goto err_out;
        error = -ENOMEM;
-        file->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+        bb = kzalloc(sizeof(*bb), GFP_KERNEL);
-        if (!file->private_data)
+        if (!bb)
-                goto Error;
+                goto err_out;
-        error = 0;
+        bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
-    goto Done;
+        if (!bb->buffer)
+                goto err_out;
- Error:
-        module_put(attr->attr.owner);
+        mutex_init(&bb->mutex);
- Done:
+        file->private_data = bb;
-        if (error)
-                kobject_put(kobj);
+        /* open succeeded, put active reference and pin attr_sd */
+        sysfs_put_active(attr_sd);
+        sysfs_get(attr_sd);
+        return 0;
+ err_out:
+        sysfs_put_active(attr_sd);
+        kfree(bb);
        return error;
 }
 static int release(struct inode * inode, struct file * file)
 {
-        struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent);
+        struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-        struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
+        struct bin_buffer *bb = file->private_data;
-        u8 * buffer = file->private_data;
+        if (bb->mmapped)
-        kobject_put(kobj);
+                sysfs_put_active_two(attr_sd);
-        module_put(attr->attr.owner);
+        sysfs_put(attr_sd);
-        kfree(buffer);
+        kfree(bb->buffer);
+        kfree(bb);
        return 0;
 }
@@ -181,9 +234,9 @@ const struct file_operations bin_fops = {
 int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
-        BUG_ON(!kobj || !kobj->dentry || !attr);
+        BUG_ON(!kobj || !kobj->sd || !attr);
-        return sysfs_add_file(kobj->dentry, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
+        return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
 }
@@ -195,7 +248,7 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
-        if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) {
+        if (sysfs_hash_and_remove(kobj->sd, attr->attr.name) < 0) {
                printk(KERN_ERR "%s: "
                        "bad dentry or inode or no such file: \"%s\"\n",
                        __FUNCTION__, attr->attr.name);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 85a668680f82..aee966c44aac 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -9,18 +9,346 @@
 #include <linux/module.h>
 #include <linux/kobject.h>
 #include <linux/namei.h>
+#include <linux/idr.h>
+#include <linux/completion.h>
 #include <asm/semaphore.h>
 #include "sysfs.h"
-DECLARE_RWSEM(sysfs_rename_sem);
+DEFINE_MUTEX(sysfs_mutex);
+spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_IDA(sysfs_ino_ida);
+/**
+ *      sysfs_link_sibling - link sysfs_dirent into sibling list
+ *      @sd: sysfs_dirent of interest
+ *
+ *      Link @sd into its sibling list which starts from
+ *      sd->s_parent->s_children.
+ *
+ *      Locking:
+ *      mutex_lock(sysfs_mutex)
+ */
+void sysfs_link_sibling(struct sysfs_dirent *sd)
+{
+        struct sysfs_dirent *parent_sd = sd->s_parent;
+        BUG_ON(sd->s_sibling);
+        sd->s_sibling = parent_sd->s_children;
+        parent_sd->s_children = sd;
+}
+/**
+ *      sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
+ *      @sd: sysfs_dirent of interest
+ *
+ *      Unlink @sd from its sibling list which starts from
+ *      sd->s_parent->s_children.
+ *
+ *      Locking:
+ *      mutex_lock(sysfs_mutex)
+ */
+void sysfs_unlink_sibling(struct sysfs_dirent *sd)
+{
+        struct sysfs_dirent **pos;
+        for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
+                if (*pos == sd) {
+                        *pos = sd->s_sibling;
+                        sd->s_sibling = NULL;
+                        break;
+                }
+        }
+}
+/**
+ *      sysfs_get_dentry - get dentry for the given sysfs_dirent
+ *      @sd: sysfs_dirent of interest
+ *
+ *      Get dentry for @sd.  Dentry is looked up if currently not
+ *      present.  This function climbs sysfs_dirent tree till it
+ *      reaches a sysfs_dirent with valid dentry attached and descends
+ *      down from there looking up dentry for each step.
+ *
+ *      LOCKING:
+ *      Kernel thread context (may sleep)
+ *
+ *      RETURNS:
+ *      Pointer to found dentry on success, ERR_PTR() value on error.
+ */
+struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
+{
+        struct sysfs_dirent *cur;
+        struct dentry *parent_dentry, *dentry;
+        int i, depth;
+        /* Find the first parent which has valid s_dentry and get the
+         * dentry.
+         */
+        mutex_lock(&sysfs_mutex);
+ restart0:
+        spin_lock(&sysfs_assoc_lock);
+ restart1:
+        spin_lock(&dcache_lock);
+        dentry = NULL;
+        depth = 0;
+        cur = sd;
+        while (!cur->s_dentry || !cur->s_dentry->d_inode) {
+                if (cur->s_flags & SYSFS_FLAG_REMOVED) {
+                        dentry = ERR_PTR(-ENOENT);
+                        depth = 0;
+                        break;
+                }
+                cur = cur->s_parent;
+                depth++;
+        }
+        if (!IS_ERR(dentry))
+                dentry = dget_locked(cur->s_dentry);
+        spin_unlock(&dcache_lock);
+        spin_unlock(&sysfs_assoc_lock);
+        /* from the found dentry, look up depth times */
+        while (depth--) {
+                /* find and get depth'th ancestor */
+                for (cur = sd, i = 0; cur && i < depth; i++)
+                        cur = cur->s_parent;
+                /* This can happen if tree structure was modified due
+                 * to move/rename.  Restart.
+                 */
+                if (i != depth) {
+                        dput(dentry);
+                        goto restart0;
+                }
+                sysfs_get(cur);
+                mutex_unlock(&sysfs_mutex);
+                /* look it up */
+                parent_dentry = dentry;
+                dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
+                                             strlen(cur->s_name));
+                dput(parent_dentry);
+                if (IS_ERR(dentry)) {
+                        sysfs_put(cur);
+                        return dentry;
+                }
+                mutex_lock(&sysfs_mutex);
+                spin_lock(&sysfs_assoc_lock);
+                /* This, again, can happen if tree structure has
+                 * changed and we looked up the wrong thing.  Restart.
+                 */
+                if (cur->s_dentry != dentry) {
+                        dput(dentry);
+                        sysfs_put(cur);
+                        goto restart1;
+                }
+                spin_unlock(&sysfs_assoc_lock);
+                sysfs_put(cur);
+        }
+        mutex_unlock(&sysfs_mutex);
+        return dentry;
+}
+/**
+ *      sysfs_get_active - get an active reference to sysfs_dirent
+ *      @sd: sysfs_dirent to get an active reference to
+ *
+ *      Get an active reference of @sd.  This function is noop if @sd
+ *      is NULL.
+ *
+ *      RETURNS:
+ *      Pointer to @sd on success, NULL on failure.
+ */
+struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
+{
+        if (unlikely(!sd))
+                return NULL;
+        while (1) {
+                int v, t;
+                v = atomic_read(&sd->s_active);
+                if (unlikely(v < 0))
+                        return NULL;
+                t = atomic_cmpxchg(&sd->s_active, v, v + 1);
+                if (likely(t == v))
+                        return sd;
+                if (t < 0)
+                        return NULL;
+                cpu_relax();
+        }
+}
+/**
+ *      sysfs_put_active - put an active reference to sysfs_dirent
+ *      @sd: sysfs_dirent to put an active reference to
+ *
+ *      Put an active reference to @sd.  This function is noop if @sd
+ *      is NULL.
+ */
+void sysfs_put_active(struct sysfs_dirent *sd)
+{
+        struct completion *cmpl;
+        int v;
+        if (unlikely(!sd))
+                return;
+        v = atomic_dec_return(&sd->s_active);
+        if (likely(v != SD_DEACTIVATED_BIAS))
+                return;
+        /* atomic_dec_return() is a mb(), we'll always see the updated
+         * sd->s_sibling.
+         */
+        cmpl = (void *)sd->s_sibling;
+        complete(cmpl);
+}
+/**
+ *      sysfs_get_active_two - get active references to sysfs_dirent and parent
+ *      @sd: sysfs_dirent of interest
+ *
+ *      Get active reference to @sd and its parent.  Parent's active
+ *      reference is grabbed first.  This function is noop if @sd is
+ *      NULL.
+ *
+ *      RETURNS:
+ *      Pointer to @sd on success, NULL on failure.
+ */
+struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
+{
+        if (sd) {
+                if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
+                        return NULL;
+                if (unlikely(!sysfs_get_active(sd))) {
+                        sysfs_put_active(sd->s_parent);
+                        return NULL;
+                }
+        }
+        return sd;
+}
+/**
+ *      sysfs_put_active_two - put active references to sysfs_dirent and parent
+ *      @sd: sysfs_dirent of interest
+ *
+ *      Put active references to @sd and its parent.  This function is
+ *      noop if @sd is NULL.
+ */
+void sysfs_put_active_two(struct sysfs_dirent *sd)
+{
+        if (sd) {
+                sysfs_put_active(sd);
+                sysfs_put_active(sd->s_parent);
+        }
+}
+/**
+ *      sysfs_deactivate - deactivate sysfs_dirent
+ *      @sd: sysfs_dirent to deactivate
+ *
+ *      Deny new active references and drain existing ones.
+ */
+static void sysfs_deactivate(struct sysfs_dirent *sd)
+{
+        DECLARE_COMPLETION_ONSTACK(wait);
+        int v;
+        BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
+        sd->s_sibling = (void *)&wait;
+        /* atomic_add_return() is a mb(), put_active() will always see
+         * the updated sd->s_sibling.
+         */
+        v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
+        if (v != SD_DEACTIVATED_BIAS)
+                wait_for_completion(&wait);
+        sd->s_sibling = NULL;
+}
+static int sysfs_alloc_ino(ino_t *pino)
+{
+        int ino, rc;
+ retry:
+        spin_lock(&sysfs_ino_lock);
+        rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
+        spin_unlock(&sysfs_ino_lock);
+        if (rc == -EAGAIN) {
+                if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
+                        goto retry;
+                rc = -ENOMEM;
+        }
+        *pino = ino;
+        return rc;
+}
+static void sysfs_free_ino(ino_t ino)
+{
+        spin_lock(&sysfs_ino_lock);
+        ida_remove(&sysfs_ino_ida, ino);
+        spin_unlock(&sysfs_ino_lock);
+}
+void release_sysfs_dirent(struct sysfs_dirent * sd)
+{
+        struct sysfs_dirent *parent_sd;
+ repeat:
+        /* Moving/renaming is always done while holding reference.
+         * sd->s_parent won't change beneath us.
+         */
+        parent_sd = sd->s_parent;
+        if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
+                sysfs_put(sd->s_elem.symlink.target_sd);
+        if (sysfs_type(sd) & SYSFS_COPY_NAME)
+                kfree(sd->s_name);
+        kfree(sd->s_iattr);
+        sysfs_free_ino(sd->s_ino);
+        kmem_cache_free(sysfs_dir_cachep, sd);
+        sd = parent_sd;
+        if (sd && atomic_dec_and_test(&sd->s_count))
+                goto repeat;
+}
 static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
 {
        struct sysfs_dirent * sd = dentry->d_fsdata;
        if (sd) {
-                BUG_ON(sd->s_dentry != dentry);
+                /* sd->s_dentry is protected with sysfs_assoc_lock.
-                sd->s_dentry = NULL;
+                 * This allows sysfs_drop_dentry() to dereference it.
+                 */
+                spin_lock(&sysfs_assoc_lock);
+                /* The dentry might have been deleted or another
+                 * lookup could have happened updating sd->s_dentry to
+                 * point the new dentry.  Ignore if it isn't pointing
+                 * to this dentry.
+                 */
+                if (sd->s_dentry == dentry)
+                        sd->s_dentry = NULL;
+                spin_unlock(&sysfs_assoc_lock);
                sysfs_put(sd);
        }
        iput(inode);
@@ -30,245 +358,402 @@ static struct dentry_operations sysfs_dentry_ops = {
        .d_iput         = sysfs_d_iput,
 };
-/*
+struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
- * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent
- */
-static struct sysfs_dirent * __sysfs_new_dirent(void * element)
 {
-        struct sysfs_dirent * sd;
+        char *dup_name = NULL;
+        struct sysfs_dirent *sd = NULL;
+        if (type & SYSFS_COPY_NAME) {
+                name = dup_name = kstrdup(name, GFP_KERNEL);
+                if (!name)
+                        goto err_out;
+        }
        sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
        if (!sd)
-                return NULL;
+                goto err_out;
+        if (sysfs_alloc_ino(&sd->s_ino))
+                goto err_out;
        atomic_set(&sd->s_count, 1);
+        atomic_set(&sd->s_active, 0);
        atomic_set(&sd->s_event, 1);
-        INIT_LIST_HEAD(&sd->s_children);
-        INIT_LIST_HEAD(&sd->s_sibling);
+        sd->s_name = name;
-        sd->s_element = element;
+        sd->s_mode = mode;
+        sd->s_flags = type;
        return sd;
+ err_out:
+        kfree(dup_name);
+        kmem_cache_free(sysfs_dir_cachep, sd);
+        return NULL;
 }
-static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd,
+/**
-                              struct sysfs_dirent *sd)
+ *      sysfs_attach_dentry - associate sysfs_dirent with dentry
+ *      @sd: target sysfs_dirent
+ *      @dentry: dentry to associate
+ *
+ *      Associate @sd with @dentry.  This is protected by
+ *      sysfs_assoc_lock to avoid race with sysfs_d_iput().
+ *
+ *      LOCKING:
+ *      mutex_lock(sysfs_mutex)
+ */
+static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
 {
-        if (sd)
+        dentry->d_op = &sysfs_dentry_ops;
-                list_add(&sd->s_sibling, &parent_sd->s_children);
+        dentry->d_fsdata = sysfs_get(sd);
+        /* protect sd->s_dentry against sysfs_d_iput */
+        spin_lock(&sysfs_assoc_lock);
+        sd->s_dentry = dentry;
+        spin_unlock(&sysfs_assoc_lock);
+        d_rehash(dentry);
 }
-static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd,
+static int sysfs_ilookup_test(struct inode *inode, void *arg)
-                                                void * element)
 {
-        struct sysfs_dirent *sd;
+        struct sysfs_dirent *sd = arg;
-        sd = __sysfs_new_dirent(element);
+        return inode->i_ino == sd->s_ino;
-        __sysfs_list_dirent(parent_sd, sd);
-        return sd;
 }
-/*
+/**
+ *      sysfs_addrm_start - prepare for sysfs_dirent add/remove
+ *      @acxt: pointer to sysfs_addrm_cxt to be used
+ *      @parent_sd: parent sysfs_dirent
 *
- * Return -EEXIST if there is already a sysfs element with the same name for
+ *      This function is called when the caller is about to add or
- * the same parent.
+ *      remove sysfs_dirent under @parent_sd.  This function acquires
+ *      sysfs_mutex, grabs inode for @parent_sd if available and lock
+ *      i_mutex of it.  @acxt is used to keep and pass context to
+ *      other addrm functions.
 *
- * called with parent inode's i_mutex held
+ *      LOCKING:
+ *      Kernel thread context (may sleep).  sysfs_mutex is locked on
+ *      return.  i_mutex of parent inode is locked on return if
+ *      available.
 */
-int sysfs_dirent_exist(struct sysfs_dirent *parent_sd,
+void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
-                          const unsigned char *new)
+                       struct sysfs_dirent *parent_sd)
 {
-        struct sysfs_dirent * sd;
+        struct inode *inode;
-        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+        memset(acxt, 0, sizeof(*acxt));
-                if (sd->s_element) {
+        acxt->parent_sd = parent_sd;
-                        const unsigned char *existing = sysfs_get_name(sd);
-                        if (strcmp(existing, new))
-                                continue;
-                        else
-                                return -EEXIST;
-                }
-        }
-        return 0;
+        /* Lookup parent inode.  inode initialization and I_NEW
+         * clearing are protected by sysfs_mutex.  By grabbing it and
+         * looking up with _nowait variant, inode state can be
+         * determined reliably.
+         */
+        mutex_lock(&sysfs_mutex);
+        inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
+                                parent_sd);
+        if (inode && !(inode->i_state & I_NEW)) {
+                /* parent inode available */
+                acxt->parent_inode = inode;
+                /* sysfs_mutex is below i_mutex in lock hierarchy.
+                 * First, trylock i_mutex.  If fails, unlock
+                 * sysfs_mutex and lock them in order.
+                 */
+                if (!mutex_trylock(&inode->i_mutex)) {
+                        mutex_unlock(&sysfs_mutex);
+                        mutex_lock(&inode->i_mutex);
+                        mutex_lock(&sysfs_mutex);
+                }
+        } else
+                iput(inode);
 }
+/**
+ *      sysfs_add_one - add sysfs_dirent to parent
+ *      @acxt: addrm context to use
+ *      @sd: sysfs_dirent to be added
+ *
+ *      Get @acxt->parent_sd and set sd->s_parent to it and increment
+ *      nlink of parent inode if @sd is a directory.  @sd is NOT
+ *      linked into the children list of the parent.  The caller
+ *      should invoke sysfs_link_sibling() after this function
+ *      completes if @sd needs to be on the children list.
+ *
+ *      This function should be called between calls to
+ *      sysfs_addrm_start() and sysfs_addrm_finish() and should be
+ *      passed the same @acxt as passed to sysfs_addrm_start().
+ *
+ *      LOCKING:
+ *      Determined by sysfs_addrm_start().
+ */
+void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+{
+        sd->s_parent = sysfs_get(acxt->parent_sd);
+        if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
+                inc_nlink(acxt->parent_inode);
+        acxt->cnt++;
+}
-static struct sysfs_dirent *
+/**
-__sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type)
+ *      sysfs_remove_one - remove sysfs_dirent from parent
+ *      @acxt: addrm context to use
+ *      @sd: sysfs_dirent to be added
+ *
+ *      Mark @sd removed and drop nlink of parent inode if @sd is a
+ *      directory.  @sd is NOT unlinked from the children list of the
+ *      parent.  The caller is repsonsible for removing @sd from the
+ *      children list before calling this function.
+ *
+ *      This function should be called between calls to
+ *      sysfs_addrm_start() and sysfs_addrm_finish() and should be
+ *      passed the same @acxt as passed to sysfs_addrm_start().
+ *
+ *      LOCKING:
+ *      Determined by sysfs_addrm_start().
+ */
+void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 {
-        struct sysfs_dirent * sd;
+        BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));
-        sd = __sysfs_new_dirent(element);
+        sd->s_flags |= SYSFS_FLAG_REMOVED;
-        if (!sd)
+        sd->s_sibling = acxt->removed;
-                goto out;
+        acxt->removed = sd;
-        sd->s_mode = mode;
+        if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
-        sd->s_type = type;
+                drop_nlink(acxt->parent_inode);
-        sd->s_dentry = dentry;
-        if (dentry) {
-                dentry->d_fsdata = sysfs_get(sd);
-                dentry->d_op = &sysfs_dentry_ops;
-        }
-out:
+        acxt->cnt++;
-        return sd;
 }
-int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry,
+/**
-                        void * element, umode_t mode, int type)
+ *      sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
+ *      @sd: target sysfs_dirent
+ *
+ *      Drop dentry for @sd.  @sd must have been unlinked from its
+ *      parent on entry to this function such that it can't be looked
+ *      up anymore.
+ *
+ *      @sd->s_dentry which is protected with sysfs_assoc_lock points
+ *      to the currently associated dentry but we're not holding a
+ *      reference to it and racing with dput().  Grab dcache_lock and
+ *      verify dentry before dropping it.  If @sd->s_dentry is NULL or
+ *      dput() beats us, no need to bother.
+ */
+static void sysfs_drop_dentry(struct sysfs_dirent *sd)
 {
-        struct sysfs_dirent *sd;
+        struct dentry *dentry = NULL;
+        struct inode *inode;
+        /* We're not holding a reference to ->s_dentry dentry but the
+         * field will stay valid as long as sysfs_assoc_lock is held.
+         */
+        spin_lock(&sysfs_assoc_lock);
+        spin_lock(&dcache_lock);
+        /* drop dentry if it's there and dput() didn't kill it yet */
+        if (sd->s_dentry && sd->s_dentry->d_inode) {
+                dentry = dget_locked(sd->s_dentry);
+                spin_lock(&dentry->d_lock);
+                __d_drop(dentry);
+                spin_unlock(&dentry->d_lock);
+        }
-        sd = __sysfs_make_dirent(dentry, element, mode, type);
+        spin_unlock(&dcache_lock);
-        __sysfs_list_dirent(parent_sd, sd);
+        spin_unlock(&sysfs_assoc_lock);
-        return sd ? 0 : -ENOMEM;
+        /* dentries for shadowed inodes are pinned, unpin */
+        if (dentry && sysfs_is_shadowed_inode(dentry->d_inode))
+                dput(dentry);
+        dput(dentry);
+        /* adjust nlink and update timestamp */
+        inode = ilookup(sysfs_sb, sd->s_ino);
+        if (inode) {
+                mutex_lock(&inode->i_mutex);
+                inode->i_ctime = CURRENT_TIME;
+                drop_nlink(inode);
+                if (sysfs_type(sd) == SYSFS_DIR)
+                        drop_nlink(inode);
+                mutex_unlock(&inode->i_mutex);
+                iput(inode);
+        }
 }
-static int init_dir(struct inode * inode)
+/**
+ *      sysfs_addrm_finish - finish up sysfs_dirent add/remove
+ *      @acxt: addrm context to finish up
+ *
+ *      Finish up sysfs_dirent add/remove.  Resources acquired by
+ *      sysfs_addrm_start() are released and removed sysfs_dirents are
+ *      cleaned up.  Timestamps on the parent inode are updated.
+ *
+ *      LOCKING:
+ *      All mutexes acquired by sysfs_addrm_start() are released.
+ *
+ *      RETURNS:
+ *      Number of added/removed sysfs_dirents since sysfs_addrm_start().
+ */
+int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
 {
-        inode->i_op = &sysfs_dir_inode_operations;
+        /* release resources acquired by sysfs_addrm_start() */
-        inode->i_fop = &sysfs_dir_operations;
+        mutex_unlock(&sysfs_mutex);
+        if (acxt->parent_inode) {
+                struct inode *inode = acxt->parent_inode;
-        /* directory inodes start off with i_nlink == 2 (for "." entry) */
+                /* if added/removed, update timestamps on the parent */
-        inc_nlink(inode);
+                if (acxt->cnt)
-        return 0;
+                        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+                mutex_unlock(&inode->i_mutex);
+                iput(inode);
+        }
+        /* kill removed sysfs_dirents */
+        while (acxt->removed) {
+                struct sysfs_dirent *sd = acxt->removed;
+                acxt->removed = sd->s_sibling;
+                sd->s_sibling = NULL;
+                sysfs_drop_dentry(sd);
+                sysfs_deactivate(sd);
+                sysfs_put(sd);
+        }
+        return acxt->cnt;
 }
-static int init_file(struct inode * inode)
+/**
+ *      sysfs_find_dirent - find sysfs_dirent with the given name
+ *      @parent_sd: sysfs_dirent to search under
+ *      @name: name to look for
+ *
+ *      Look for sysfs_dirent with name @name under @parent_sd.
+ *
+ *      LOCKING:
+ *      mutex_lock(sysfs_mutex)
+ *
+ *      RETURNS:
+ *      Pointer to sysfs_dirent if found, NULL if not.
+ */
+struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+                                       const unsigned char *name)
 {
-        inode->i_size = PAGE_SIZE;
+        struct sysfs_dirent *sd;
-        inode->i_fop = &sysfs_file_operations;
-        return 0;
+        for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
+                if (sysfs_type(sd) && !strcmp(sd->s_name, name))
+                        return sd;
+        return NULL;
 }
-static int init_symlink(struct inode * inode)
+/**
+ *      sysfs_get_dirent - find and get sysfs_dirent with the given name
+ *      @parent_sd: sysfs_dirent to search under
+ *      @name: name to look for
+ *
+ *      Look for sysfs_dirent with name @name under @parent_sd and get
+ *      it if found.
+ *
+ *      LOCKING:
+ *      Kernel thread context (may sleep).  Grabs sysfs_mutex.
+ *
+ *      RETURNS:
+ *      Pointer to sysfs_dirent if found, NULL if not.
+ */
+struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+                                      const unsigned char *name)
 {
-        inode->i_op = &sysfs_symlink_inode_operations;
+        struct sysfs_dirent *sd;
-        return 0;
+        mutex_lock(&sysfs_mutex);
+        sd = sysfs_find_dirent(parent_sd, name);
+        sysfs_get(sd);
+        mutex_unlock(&sysfs_mutex);
+        return sd;
 }
-static int create_dir(struct kobject * k, struct dentry * p,
+static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
-                      const char * n, struct dentry ** d)
+                      const char *name, struct sysfs_dirent **p_sd)
 {
-        int error;
        umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
+        struct sysfs_addrm_cxt acxt;
+        struct sysfs_dirent *sd;
-        mutex_lock(&p->d_inode->i_mutex);
+        /* allocate */
-        *d = lookup_one_len(n, p, strlen(n));
+        sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
-        if (!IS_ERR(*d)) {
+        if (!sd)
-                if (sysfs_dirent_exist(p->d_fsdata, n))
+                return -ENOMEM;
-                        error = -EEXIST;
+        sd->s_elem.dir.kobj = kobj;
-                else
-                        error = sysfs_make_dirent(p->d_fsdata, *d, k, mode,
-                                                                SYSFS_DIR);
-                if (!error) {
-                        error = sysfs_create(*d, mode, init_dir);
-                        if (!error) {
-                                inc_nlink(p->d_inode);
-                                (*d)->d_op = &sysfs_dentry_ops;
-                                d_rehash(*d);
-                        }
-                }
-                if (error && (error != -EEXIST)) {
-                        struct sysfs_dirent *sd = (*d)->d_fsdata;
-                        if (sd) {
-                                list_del_init(&sd->s_sibling);
-                                sysfs_put(sd);
-                        }
-                        d_drop(*d);
-                }
-                dput(*d);
-        } else
-                error = PTR_ERR(*d);
-        mutex_unlock(&p->d_inode->i_mutex);
-        return error;
-}
+        /* link in */
+        sysfs_addrm_start(&acxt, parent_sd);
+        if (!sysfs_find_dirent(parent_sd, name)) {
+                sysfs_add_one(&acxt, sd);
+                sysfs_link_sibling(sd);
+        }
+        if (sysfs_addrm_finish(&acxt)) {
+                *p_sd = sd;
+                return 0;
+        }
-int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d)
+        sysfs_put(sd);
+        return -EEXIST;
+}
+int sysfs_create_subdir(struct kobject *kobj, const char *name,
+                        struct sysfs_dirent **p_sd)
 {
-        return create_dir(k,k->dentry,n,d);
+        return create_dir(kobj, kobj->sd, name, p_sd);
 }
 /**
 *      sysfs_create_dir - create a directory for an object.
 *      @kobj:          object we're creating directory for. 
- *      @shadow_parent: parent parent object.
+ *      @shadow_parent: parent object.
 */
+int sysfs_create_dir(struct kobject *kobj,
-int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent)
+                     struct sysfs_dirent *shadow_parent_sd)
 {
-        struct dentry * dentry = NULL;
+        struct sysfs_dirent *parent_sd, *sd;
-        struct dentry * parent;
        int error = 0;
        BUG_ON(!kobj);
-        if (shadow_parent)
+        if (shadow_parent_sd)
-                parent = shadow_parent;
+                parent_sd = shadow_parent_sd;
        else if (kobj->parent)
-                parent = kobj->parent->dentry;
+                parent_sd = kobj->parent->sd;
        else if (sysfs_mount && sysfs_mount->mnt_sb)
-                parent = sysfs_mount->mnt_sb->s_root;
+                parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
        else
                return -EFAULT;
-        error = create_dir(kobj,parent,kobject_name(kobj),&dentry);
+        error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
        if (!error)
-                kobj->dentry = dentry;
+                kobj->sd = sd;
        return error;
 }
-/* attaches attribute's sysfs_dirent to the dentry corresponding to the
+static int sysfs_count_nlink(struct sysfs_dirent *sd)
- * attribute file
- */
-static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry)
 {
-        struct attribute * attr = NULL;
+        struct sysfs_dirent *child;
-        struct bin_attribute * bin_attr = NULL;
+        int nr = 0;
-        int (* init) (struct inode *) = NULL;
-        int error = 0;
-        if (sd->s_type & SYSFS_KOBJ_BIN_ATTR) {
+        for (child = sd->s_children; child; child = child->s_sibling)
-                bin_attr = sd->s_element;
+                if (sysfs_type(child) == SYSFS_DIR)
-                attr = &bin_attr->attr;
+                        nr++;
-        } else {
+        return nr + 2;
-                attr = sd->s_element;
-                init = init_file;
-        }
-        dentry->d_fsdata = sysfs_get(sd);
-        sd->s_dentry = dentry;
-        error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init);
-        if (error) {
-                sysfs_put(sd);
-                return error;
-        }
-        if (bin_attr) {
-                dentry->d_inode->i_size = bin_attr->size;
-                dentry->d_inode->i_fop = &bin_fops;
-        }
-        dentry->d_op = &sysfs_dentry_ops;
-        d_rehash(dentry);
-        return 0;
-}
-static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry)
-{
-        int err = 0;
-        dentry->d_fsdata = sysfs_get(sd);
-        sd->s_dentry = dentry;
-        err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink);
-        if (!err) {
-                dentry->d_op = &sysfs_dentry_ops;
-                d_rehash(dentry);
-        } else
-                sysfs_put(sd);
-        return err;
 }
 static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
@@ -276,24 +761,60 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 {
        struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
        struct sysfs_dirent * sd;
-        int err = 0;
+        struct bin_attribute *bin_attr;
+        struct inode *inode;
+        int found = 0;
-        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+        for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
-                if (sd->s_type & SYSFS_NOT_PINNED) {
+                if (sysfs_type(sd) &&
-                        const unsigned char * name = sysfs_get_name(sd);
+                    !strcmp(sd->s_name, dentry->d_name.name)) {
+                        found = 1;
+                        break;
+                }
+        }
+        /* no such entry */
+        if (!found)
+                return NULL;
-                        if (strcmp(name, dentry->d_name.name))
+        /* attach dentry and inode */
-                                continue;
+        inode = sysfs_get_inode(sd);
+        if (!inode)
+                return ERR_PTR(-ENOMEM);
-                        if (sd->s_type & SYSFS_KOBJ_LINK)
+        mutex_lock(&sysfs_mutex);
-                                err = sysfs_attach_link(sd, dentry);
-                        else
+        if (inode->i_state & I_NEW) {
-                                err = sysfs_attach_attr(sd, dentry);
+                /* initialize inode according to type */
+                switch (sysfs_type(sd)) {
+                case SYSFS_DIR:
+                        inode->i_op = &sysfs_dir_inode_operations;
+                        inode->i_fop = &sysfs_dir_operations;
+                        inode->i_nlink = sysfs_count_nlink(sd);
                        break;
+                case SYSFS_KOBJ_ATTR:
+                        inode->i_size = PAGE_SIZE;
+                        inode->i_fop = &sysfs_file_operations;
+                        break;
+                case SYSFS_KOBJ_BIN_ATTR:
+                        bin_attr = sd->s_elem.bin_attr.bin_attr;
+                        inode->i_size = bin_attr->size;
+                        inode->i_fop = &bin_fops;
+                        break;
+                case SYSFS_KOBJ_LINK:
+                        inode->i_op = &sysfs_symlink_inode_operations;
+                        break;
+                default:
+                        BUG();
                }
        }
-        return ERR_PTR(err);
+        sysfs_instantiate(dentry, inode);
+        sysfs_attach_dentry(sd, dentry);
+        mutex_unlock(&sysfs_mutex);
+        return NULL;
 }
 const struct inode_operations sysfs_dir_inode_operations = {
@@ -301,58 +822,46 @@ const struct inode_operations sysfs_dir_inode_operations = {
        .setattr        = sysfs_setattr,
 };
-static void remove_dir(struct dentry * d)
+static void remove_dir(struct sysfs_dirent *sd)
 {
-        struct dentry * parent = dget(d->d_parent);
+        struct sysfs_addrm_cxt acxt;
-        struct sysfs_dirent * sd;
-        mutex_lock(&parent->d_inode->i_mutex);
-        d_delete(d);
-        sd = d->d_fsdata;
-        list_del_init(&sd->s_sibling);
-        sysfs_put(sd);
-        if (d->d_inode)
-                simple_rmdir(parent->d_inode,d);
-        pr_debug(" o %s removing done (%d)\n",d->d_name.name,
+        sysfs_addrm_start(&acxt, sd->s_parent);
-                 atomic_read(&d->d_count));
+        sysfs_unlink_sibling(sd);
+        sysfs_remove_one(&acxt, sd);
-        mutex_unlock(&parent->d_inode->i_mutex);
+        sysfs_addrm_finish(&acxt);
-        dput(parent);
 }
-void sysfs_remove_subdir(struct dentry * d)
+void sysfs_remove_subdir(struct sysfs_dirent *sd)
 {
-        remove_dir(d);
+        remove_dir(sd);
 }
-static void __sysfs_remove_dir(struct dentry *dentry)
+static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
 {
-        struct sysfs_dirent * parent_sd;
+        struct sysfs_addrm_cxt acxt;
-        struct sysfs_dirent * sd, * tmp;
+        struct sysfs_dirent **pos;
-        dget(dentry);
+        if (!dir_sd)
-        if (!dentry)
                return;
-        pr_debug("sysfs %s: removing dir\n",dentry->d_name.name);
+        pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
-        mutex_lock(&dentry->d_inode->i_mutex);
+        sysfs_addrm_start(&acxt, dir_sd);
-        parent_sd = dentry->d_fsdata;
+        pos = &dir_sd->s_children;
-        list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
+        while (*pos) {
-                if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED))
+                struct sysfs_dirent *sd = *pos;
-                        continue;
-                list_del_init(&sd->s_sibling);
+                if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
-                sysfs_drop_dentry(sd, dentry);
+                        *pos = sd->s_sibling;
-                sysfs_put(sd);
+                        sd->s_sibling = NULL;
+                        sysfs_remove_one(&acxt, sd);
+                } else
+                        pos = &(*pos)->s_sibling;
        }
-        mutex_unlock(&dentry->d_inode->i_mutex);
+        sysfs_addrm_finish(&acxt);
-        remove_dir(dentry);
+        remove_dir(dir_sd);
-        /**
-         * Drop reference from dget() on entrance.
-         */
-        dput(dentry);
 }
 /**
@@ -366,102 +875,166 @@ static void __sysfs_remove_dir(struct dentry *dentry)
 void sysfs_remove_dir(struct kobject * kobj)
 {
-        __sysfs_remove_dir(kobj->dentry);
+        struct sysfs_dirent *sd = kobj->sd;
-        kobj->dentry = NULL;
+        spin_lock(&sysfs_assoc_lock);
+        kobj->sd = NULL;
+        spin_unlock(&sysfs_assoc_lock);
+        __sysfs_remove_dir(sd);
 }
-int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent,
+int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
                     const char *new_name)
 {
-        int error = 0;
+        struct sysfs_dirent *sd = kobj->sd;
-        struct dentry * new_dentry;
+        struct dentry *new_parent = NULL;
+        struct dentry *old_dentry = NULL, *new_dentry = NULL;
+        const char *dup_name = NULL;
+        int error;
-        if (!new_parent)
+        /* get dentries */
-                return -EFAULT;
+        old_dentry = sysfs_get_dentry(sd);
+        if (IS_ERR(old_dentry)) {
+                error = PTR_ERR(old_dentry);
+                goto out_dput;
+        }
+        new_parent = sysfs_get_dentry(new_parent_sd);
+        if (IS_ERR(new_parent)) {
+                error = PTR_ERR(new_parent);
+                goto out_dput;
+        }
-        down_write(&sysfs_rename_sem);
+        /* lock new_parent and get dentry for new name */
        mutex_lock(&new_parent->d_inode->i_mutex);
        new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
-        if (!IS_ERR(new_dentry)) {
+        if (IS_ERR(new_dentry)) {
-                /* By allowing two different directories with the
+                error = PTR_ERR(new_dentry);
-                 * same d_parent we allow this routine to move
+                goto out_unlock;
-                 * between different shadows of the same directory
-                 */
-                if (kobj->dentry->d_parent->d_inode != new_parent->d_inode)
-                        return -EINVAL;
-                else if (new_dentry->d_parent->d_inode != new_parent->d_inode)
-                        error = -EINVAL;
-                else if (new_dentry == kobj->dentry)
-                        error = -EINVAL;
-                else if (!new_dentry->d_inode) {
-                        error = kobject_set_name(kobj, "%s", new_name);
-                        if (!error) {
-                                struct sysfs_dirent *sd, *parent_sd;
-                                d_add(new_dentry, NULL);
-                                d_move(kobj->dentry, new_dentry);
-                                sd = kobj->dentry->d_fsdata;
-                                parent_sd = new_parent->d_fsdata;
-                                list_del_init(&sd->s_sibling);
-                                list_add(&sd->s_sibling, &parent_sd->s_children);
-                        }
-                        else
-                                d_drop(new_dentry);
-                } else
-                        error = -EEXIST;
-                dput(new_dentry);
        }
-        mutex_unlock(&new_parent->d_inode->i_mutex);
-        up_write(&sysfs_rename_sem);
+        /* By allowing two different directories with the same
+         * d_parent we allow this routine to move between different
+         * shadows of the same directory
+         */
+        error = -EINVAL;
+        if (old_dentry->d_parent->d_inode != new_parent->d_inode ||
+            new_dentry->d_parent->d_inode != new_parent->d_inode ||
+            old_dentry == new_dentry)
+                goto out_unlock;
+        error = -EEXIST;
+        if (new_dentry->d_inode)
+                goto out_unlock;
+        /* rename kobject and sysfs_dirent */
+        error = -ENOMEM;
+        new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
+        if (!new_name)
+                goto out_drop;
+        error = kobject_set_name(kobj, "%s", new_name);
+        if (error)
+                goto out_drop;
+        dup_name = sd->s_name;
+        sd->s_name = new_name;
+        /* move under the new parent */
+        d_add(new_dentry, NULL);
+        d_move(sd->s_dentry, new_dentry);
+        mutex_lock(&sysfs_mutex);
+        sysfs_unlink_sibling(sd);
+        sysfs_get(new_parent_sd);
+        sysfs_put(sd->s_parent);
+        sd->s_parent = new_parent_sd;
+        sysfs_link_sibling(sd);
+        mutex_unlock(&sysfs_mutex);
+        error = 0;
+        goto out_unlock;
+ out_drop:
+        d_drop(new_dentry);
+ out_unlock:
+        mutex_unlock(&new_parent->d_inode->i_mutex);
+ out_dput:
+        kfree(dup_name);
+        dput(new_parent);
+        dput(old_dentry);
+        dput(new_dentry);
        return error;
 }
-int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent)
+int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 {
-        struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry;
+        struct sysfs_dirent *sd = kobj->sd;
-        struct sysfs_dirent *new_parent_sd, *sd;
+        struct sysfs_dirent *new_parent_sd;
+        struct dentry *old_parent, *new_parent = NULL;
+        struct dentry *old_dentry = NULL, *new_dentry = NULL;
        int error;
-        old_parent_dentry = kobj->parent ?
+        BUG_ON(!sd->s_parent);
-                kobj->parent->dentry : sysfs_mount->mnt_sb->s_root;
+        new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
-        new_parent_dentry = new_parent ?
-                new_parent->dentry : sysfs_mount->mnt_sb->s_root;
+        /* get dentries */
+        old_dentry = sysfs_get_dentry(sd);
+        if (IS_ERR(old_dentry)) {
+                error = PTR_ERR(old_dentry);
+                goto out_dput;
+        }
+        old_parent = sd->s_parent->s_dentry;
+        new_parent = sysfs_get_dentry(new_parent_sd);
+        if (IS_ERR(new_parent)) {
+                error = PTR_ERR(new_parent);
+                goto out_dput;
+        }
-        if (old_parent_dentry->d_inode == new_parent_dentry->d_inode)
+        if (old_parent->d_inode == new_parent->d_inode) {
-                return 0;       /* nothing to move */
+                error = 0;
+                goto out_dput;  /* nothing to move */
+        }
 again:
-        mutex_lock(&old_parent_dentry->d_inode->i_mutex);
+        mutex_lock(&old_parent->d_inode->i_mutex);
-        if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) {
+        if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
-                mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
+                mutex_unlock(&old_parent->d_inode->i_mutex);
                goto again;
        }
-        new_parent_sd = new_parent_dentry->d_fsdata;
+        new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
-        sd = kobj->dentry->d_fsdata;
-        new_dentry = lookup_one_len(kobj->name, new_parent_dentry,
-                                    strlen(kobj->name));
        if (IS_ERR(new_dentry)) {
                error = PTR_ERR(new_dentry);
-                goto out;
+                goto out_unlock;
        } else
                error = 0;
        d_add(new_dentry, NULL);
-        d_move(kobj->dentry, new_dentry);
+        d_move(sd->s_dentry, new_dentry);
        dput(new_dentry);
        /* Remove from old parent's list and insert into new parent's list. */
-        list_del_init(&sd->s_sibling);
+        mutex_lock(&sysfs_mutex);
-        list_add(&sd->s_sibling, &new_parent_sd->s_children);
-out:
+        sysfs_unlink_sibling(sd);
-        mutex_unlock(&new_parent_dentry->d_inode->i_mutex);
+        sysfs_get(new_parent_sd);
-        mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
+        sysfs_put(sd->s_parent);
+        sd->s_parent = new_parent_sd;
+        sysfs_link_sibling(sd);
+        mutex_unlock(&sysfs_mutex);
+ out_unlock:
+        mutex_unlock(&new_parent->d_inode->i_mutex);
+        mutex_unlock(&old_parent->d_inode->i_mutex);
+ out_dput:
+        dput(new_parent);
+        dput(old_dentry);
+        dput(new_dentry);
        return error;
 }
@@ -469,23 +1042,27 @@ static int sysfs_dir_open(struct inode *inode, struct file *file)
 {
        struct dentry * dentry = file->f_path.dentry;
        struct sysfs_dirent * parent_sd = dentry->d_fsdata;
+        struct sysfs_dirent * sd;
-        mutex_lock(&dentry->d_inode->i_mutex);
+        sd = sysfs_new_dirent("_DIR_", 0, 0);
-        file->private_data = sysfs_new_dirent(parent_sd, NULL);
+        if (sd) {
-        mutex_unlock(&dentry->d_inode->i_mutex);
+                mutex_lock(&sysfs_mutex);
+                sd->s_parent = sysfs_get(parent_sd);
-        return file->private_data ? 0 : -ENOMEM;
+                sysfs_link_sibling(sd);
+                mutex_unlock(&sysfs_mutex);
+        }
+        file->private_data = sd;
+        return sd ? 0 : -ENOMEM;
 }
 static int sysfs_dir_close(struct inode *inode, struct file *file)
 {
-        struct dentry * dentry = file->f_path.dentry;
        struct sysfs_dirent * cursor = file->private_data;
-        mutex_lock(&dentry->d_inode->i_mutex);
+        mutex_lock(&sysfs_mutex);
-        list_del_init(&cursor->s_sibling);
+        sysfs_unlink_sibling(cursor);
-        mutex_unlock(&dentry->d_inode->i_mutex);
+        mutex_unlock(&sysfs_mutex);
        release_sysfs_dirent(cursor);
@@ -503,54 +1080,65 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
        struct dentry *dentry = filp->f_path.dentry;
        struct sysfs_dirent * parent_sd = dentry->d_fsdata;
        struct sysfs_dirent *cursor = filp->private_data;
-        struct list_head *p, *q = &cursor->s_sibling;
+        struct sysfs_dirent **pos;
        ino_t ino;
        int i = filp->f_pos;
        switch (i) {
                case 0:
-                        ino = dentry->d_inode->i_ino;
+                        ino = parent_sd->s_ino;
                        if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
                                break;
                        filp->f_pos++;
                        i++;
                        /* fallthrough */
                case 1:
-                        ino = parent_ino(dentry);
+                        if (parent_sd->s_parent)
+                                ino = parent_sd->s_parent->s_ino;
+                        else
+                                ino = parent_sd->s_ino;
                        if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
                                break;
                        filp->f_pos++;
                        i++;
                        /* fallthrough */
                default:
+                        mutex_lock(&sysfs_mutex);
+                        pos = &parent_sd->s_children;
+                        while (*pos != cursor)
+                                pos = &(*pos)->s_sibling;
+                        /* unlink cursor */
+                        *pos = cursor->s_sibling;
                        if (filp->f_pos == 2)
-                                list_move(q, &parent_sd->s_children);
+                                pos = &parent_sd->s_children;
-                        for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
+                        for ( ; *pos; pos = &(*pos)->s_sibling) {
-                                struct sysfs_dirent *next;
+                                struct sysfs_dirent *next = *pos;
                                const char * name;
                                int len;
-                                next = list_entry(p, struct sysfs_dirent,
+                                if (!sysfs_type(next))
-                                                   s_sibling);
-                                if (!next->s_element)
                                        continue;
-                                name = sysfs_get_name(next);
+                                name = next->s_name;
                                len = strlen(name);
-                                if (next->s_dentry)
+                                ino = next->s_ino;
-                                        ino = next->s_dentry->d_inode->i_ino;
-                                else
-                                        ino = iunique(sysfs_sb, 2);
                                if (filldir(dirent, name, len, filp->f_pos, ino,
                                                 dt_type(next)) < 0)
-                                        return 0;
+                                        break;
-                                list_move(q, p);
-                                p = q;
                                filp->f_pos++;
                        }
+                        /* put cursor back in */
+                        cursor->s_sibling = *pos;
+                        *pos = cursor;
+                        mutex_unlock(&sysfs_mutex);
        }
        return 0;
 }
@@ -559,7 +1147,6 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 {
        struct dentry * dentry = file->f_path.dentry;
-        mutex_lock(&dentry->d_inode->i_mutex);
        switch (origin) {
                case 1:
                        offset += file->f_pos;
@@ -567,31 +1154,35 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
                        if (offset >= 0)
                                break;
                default:
-                        mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
+                mutex_lock(&sysfs_mutex);
                file->f_pos = offset;
                if (file->f_pos >= 2) {
                        struct sysfs_dirent *sd = dentry->d_fsdata;
                        struct sysfs_dirent *cursor = file->private_data;
-                        struct list_head *p;
+                        struct sysfs_dirent **pos;
                        loff_t n = file->f_pos - 2;
-                        list_del(&cursor->s_sibling);
+                        sysfs_unlink_sibling(cursor);
-                        p = sd->s_children.next;
-                        while (n && p != &sd->s_children) {
+                        pos = &sd->s_children;
-                                struct sysfs_dirent *next;
+                        while (n && *pos) {
-                                next = list_entry(p, struct sysfs_dirent,
+                                struct sysfs_dirent *next = *pos;
-                                                   s_sibling);
+                                if (sysfs_type(next))
-                                if (next->s_element)
                                        n--;
-                                p = p->next;
+                                pos = &(*pos)->s_sibling;
                        }
-                        list_add_tail(&cursor->s_sibling, p);
+                        cursor->s_sibling = *pos;
+                        *pos = cursor;
                }
+                mutex_unlock(&sysfs_mutex);
        }
-        mutex_unlock(&dentry->d_inode->i_mutex);
        return offset;
 }
@@ -604,12 +1195,20 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 int sysfs_make_shadowed_dir(struct kobject *kobj,
        void * (*follow_link)(struct dentry *, struct nameidata *))
 {
+        struct dentry *dentry;
        struct inode *inode;
        struct inode_operations *i_op;
-        inode = kobj->dentry->d_inode;
+        /* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */
-        if (inode->i_op != &sysfs_dir_inode_operations)
+        dentry = sysfs_get_dentry(kobj->sd);
+        if (IS_ERR(dentry))
+                return PTR_ERR(dentry);
+        inode = dentry->d_inode;
+        if (inode->i_op != &sysfs_dir_inode_operations) {
+                dput(dentry);
                return -EINVAL;
+        }
        i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
        if (!i_op)
@@ -634,54 +1233,72 @@ int sysfs_make_shadowed_dir(struct kobject *kobj,
 *      directory.
 */
-struct dentry *sysfs_create_shadow_dir(struct kobject *kobj)
+struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
 {
-        struct sysfs_dirent *sd;
+        struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
-        struct dentry *parent, *dir, *shadow;
+        struct dentry *dir, *parent, *shadow;
        struct inode *inode;
+        struct sysfs_dirent *sd;
+        struct sysfs_addrm_cxt acxt;
-        dir = kobj->dentry;
+        dir = sysfs_get_dentry(kobj->sd);
-        inode = dir->d_inode;
+        if (IS_ERR(dir)) {
+                sd = (void *)dir;
+                goto out;
+        }
        parent = dir->d_parent;
-        shadow = ERR_PTR(-EINVAL);
+        inode = dir->d_inode;
+        sd = ERR_PTR(-EINVAL);
        if (!sysfs_is_shadowed_inode(inode))
-                goto out;
+                goto out_dput;
        shadow = d_alloc(parent, &dir->d_name);
        if (!shadow)
                goto nomem;
-        sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR);
+        sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
        if (!sd)
                goto nomem;
+        sd->s_elem.dir.kobj = kobj;
+        sysfs_addrm_start(&acxt, parent_sd);
+        /* add but don't link into children list */
+        sysfs_add_one(&acxt, sd);
+        /* attach and instantiate dentry */
+        sysfs_attach_dentry(sd, shadow);
        d_instantiate(shadow, igrab(inode));
-        inc_nlink(inode);
+        inc_nlink(inode);       /* tj: synchronization? */
-        inc_nlink(parent->d_inode);
-        shadow->d_op = &sysfs_dentry_ops;
+        sysfs_addrm_finish(&acxt);
        dget(shadow);           /* Extra count - pin the dentry in core */
-out:
+        goto out_dput;
-        return shadow;
-nomem:
+ nomem:
        dput(shadow);
-        shadow = ERR_PTR(-ENOMEM);
+        sd = ERR_PTR(-ENOMEM);
-        goto out;
+ out_dput:
+        dput(dir);
+ out:
+        return sd;
 }
 /**
 *      sysfs_remove_shadow_dir - remove an object's directory.
- *      @shadow: dentry of shadow directory
+ *      @shadow_sd: sysfs_dirent of shadow directory
 *
 *      The only thing special about this is that we remove any files in
 *      the directory before we remove the directory, and we've inlined
 *      what used to be sysfs_rmdir() below, instead of calling separately.
 */
-void sysfs_remove_shadow_dir(struct dentry *shadow)
+void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
 {
-        __sysfs_remove_dir(shadow);
+        __sysfs_remove_dir(shadow_sd);
 }
 const struct file_operations sysfs_dir_operations = {
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b502c7197ec0..cc497994b2a8 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -50,29 +50,15 @@ static struct sysfs_ops subsys_sysfs_ops = {
        .store  = subsys_attr_store,
 };
-/**
+struct sysfs_buffer {
- *      add_to_collection - add buffer to a collection
+        size_t                  count;
- *      @buffer:        buffer to be added
+        loff_t                  pos;
- *      @node:          inode of set to add to
+        char                    * page;
- */
+        struct sysfs_ops        * ops;
+        struct semaphore        sem;
-static inline void
+        int                     needs_read_fill;
-add_to_collection(struct sysfs_buffer *buffer, struct inode *node)
+        int                     event;
-{
+};
-        struct sysfs_buffer_collection *set = node->i_private;
-        mutex_lock(&node->i_mutex);
-        list_add(&buffer->associates, &set->associates);
-        mutex_unlock(&node->i_mutex);
-}
-static inline void
-remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
-{
-        mutex_lock(&node->i_mutex);
-        list_del(&buffer->associates);
-        mutex_unlock(&node->i_mutex);
-}
 /**
 *      fill_read_buffer - allocate and fill buffer from object.
@@ -87,9 +73,8 @@ remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
 */
 static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
 {
-        struct sysfs_dirent * sd = dentry->d_fsdata;
+        struct sysfs_dirent *attr_sd = dentry->d_fsdata;
-        struct attribute * attr = to_attr(dentry);
+        struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
-        struct kobject * kobj = to_kobj(dentry->d_parent);
        struct sysfs_ops * ops = buffer->ops;
        int ret = 0;
        ssize_t count;
@@ -99,8 +84,15 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
        if (!buffer->page)
                return -ENOMEM;
-        buffer->event = atomic_read(&sd->s_event);
+        /* need attr_sd for attr and ops, its parent for kobj */
-        count = ops->show(kobj,attr,buffer->page);
+        if (!sysfs_get_active_two(attr_sd))
+                return -ENODEV;
+        buffer->event = atomic_read(&attr_sd->s_event);
+        count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page);
+        sysfs_put_active_two(attr_sd);
        BUG_ON(count > (ssize_t)PAGE_SIZE);
        if (count >= 0) {
                buffer->needs_read_fill = 0;
@@ -138,10 +130,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
        down(&buffer->sem);
        if (buffer->needs_read_fill) {
-                if (buffer->orphaned)
+                retval = fill_read_buffer(file->f_path.dentry,buffer);
-                        retval = -ENODEV;
-                else
-                        retval = fill_read_buffer(file->f_path.dentry,buffer);
                if (retval)
                        goto out;
        }
@@ -196,14 +185,23 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
 *      passing the buffer that we acquired in fill_write_buffer().
 */
-static int 
+static int
 flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count)
 {
-        struct attribute * attr = to_attr(dentry);
+        struct sysfs_dirent *attr_sd = dentry->d_fsdata;
-        struct kobject * kobj = to_kobj(dentry->d_parent);
+        struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
        struct sysfs_ops * ops = buffer->ops;
+        int rc;
+        /* need attr_sd for attr and ops, its parent for kobj */
+        if (!sysfs_get_active_two(attr_sd))
+                return -ENODEV;
+        rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count);
-        return ops->store(kobj,attr,buffer->page,count);
+        sysfs_put_active_two(attr_sd);
+        return rc;
 }
@@ -231,37 +229,26 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t
        ssize_t len;
        down(&buffer->sem);
-        if (buffer->orphaned) {
-                len = -ENODEV;
-                goto out;
-        }
        len = fill_write_buffer(buffer, buf, count);
        if (len > 0)
                len = flush_write_buffer(file->f_path.dentry, buffer, len);
        if (len > 0)
                *ppos += len;
-out:
        up(&buffer->sem);
        return len;
 }
 static int sysfs_open_file(struct inode *inode, struct file *file)
 {
-        struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
+        struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-        struct attribute * attr = to_attr(file->f_path.dentry);
+        struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
-        struct sysfs_buffer_collection *set;
        struct sysfs_buffer * buffer;
        struct sysfs_ops * ops = NULL;
-        int error = 0;
+        int error;
-        if (!kobj || !attr)
-                goto Einval;
-        /* Grab the module reference for this attribute if we have one */
+        /* need attr_sd for attr and ops, its parent for kobj */
-        if (!try_module_get(attr->owner)) {
+        if (!sysfs_get_active_two(attr_sd))
-                error = -ENODEV;
+                return -ENODEV;
-                goto Done;
-        }
        /* if the kobject has no ktype, then we assume that it is a subsystem
         * itself, and use ops for it.
@@ -273,33 +260,21 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
        else
                ops = &subsys_sysfs_ops;
+        error = -EACCES;
        /* No sysfs operations, either from having no subsystem,
         * or the subsystem have no operations.
         */
        if (!ops)
-                goto Eaccess;
+                goto err_out;
-        /* make sure we have a collection to add our buffers to */
-        mutex_lock(&inode->i_mutex);
-        if (!(set = inode->i_private)) {
-                if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) {
-                        error = -ENOMEM;
-                        goto Done;
-                } else {
-                        INIT_LIST_HEAD(&set->associates);
-                }
-        }
-        mutex_unlock(&inode->i_mutex);
        /* File needs write support.
         * The inode's perms must say it's ok, 
         * and we must have a store method.
         */
        if (file->f_mode & FMODE_WRITE) {
                if (!(inode->i_mode & S_IWUGO) || !ops->store)
-                        goto Eaccess;
+                        goto err_out;
        }
        /* File needs read support.
@@ -308,48 +283,38 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
         */
        if (file->f_mode & FMODE_READ) {
                if (!(inode->i_mode & S_IRUGO) || !ops->show)
-                        goto Eaccess;
+                        goto err_out;
        }
        /* No error? Great, allocate a buffer for the file, and store it
         * it in file->private_data for easy access.
         */
+        error = -ENOMEM;
        buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
-        if (buffer) {
+        if (!buffer)
-                INIT_LIST_HEAD(&buffer->associates);
+                goto err_out;
-                init_MUTEX(&buffer->sem);
-                buffer->needs_read_fill = 1;
+        init_MUTEX(&buffer->sem);
-                buffer->ops = ops;
+        buffer->needs_read_fill = 1;
-                add_to_collection(buffer, inode);
+        buffer->ops = ops;
-                file->private_data = buffer;
+        file->private_data = buffer;
-        } else
-                error = -ENOMEM;
+        /* open succeeded, put active references and pin attr_sd */
-        goto Done;
+        sysfs_put_active_two(attr_sd);
+        sysfs_get(attr_sd);
- Einval:
+        return 0;
-        error = -EINVAL;
-        goto Done;
+ err_out:
- Eaccess:
+        sysfs_put_active_two(attr_sd);
-        error = -EACCES;
-        module_put(attr->owner);
- Done:
-        if (error)
-                kobject_put(kobj);
        return error;
 }
 static int sysfs_release(struct inode * inode, struct file * filp)
 {
-        struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
+        struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
-        struct attribute * attr = to_attr(filp->f_path.dentry);
+        struct sysfs_buffer *buffer = filp->private_data;
-        struct module * owner = attr->owner;
-        struct sysfs_buffer * buffer = filp->private_data;
-        if (buffer)
+        sysfs_put(attr_sd);
-                remove_from_collection(buffer, inode);
-        kobject_put(kobj);
-        /* After this point, attr should not be accessed. */
-        module_put(owner);
        if (buffer) {
                if (buffer->page)
@@ -376,57 +341,43 @@ static int sysfs_release(struct inode * inode, struct file * filp)
 static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
 {
        struct sysfs_buffer * buffer = filp->private_data;
-        struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
+        struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
-        struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata;
+        struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
-        int res = 0;
+        /* need parent for the kobj, grab both */
+        if (!sysfs_get_active_two(attr_sd))
+                goto trigger;
        poll_wait(filp, &kobj->poll, wait);
-        if (buffer->event != atomic_read(&sd->s_event)) {
+        sysfs_put_active_two(attr_sd);
-                res = POLLERR|POLLPRI;
-                buffer->needs_read_fill = 1;
-        }
-        return res;
+        if (buffer->event != atomic_read(&attr_sd->s_event))
-}
+                goto trigger;
+        return 0;
-static struct dentry *step_down(struct dentry *dir, const char * name)
+ trigger:
-{
+        buffer->needs_read_fill = 1;
-        struct dentry * de;
+        return POLLERR|POLLPRI;
-        if (dir == NULL || dir->d_inode == NULL)
-                return NULL;
-        mutex_lock(&dir->d_inode->i_mutex);
-        de = lookup_one_len(name, dir, strlen(name));
-        mutex_unlock(&dir->d_inode->i_mutex);
-        dput(dir);
-        if (IS_ERR(de))
-                return NULL;
-        if (de->d_inode == NULL) {
-                dput(de);
-                return NULL;
-        }
-        return de;
 }
-void sysfs_notify(struct kobject * k, char *dir, char *attr)
+void sysfs_notify(struct kobject *k, char *dir, char *attr)
 {
-        struct dentry *de = k->dentry;
+        struct sysfs_dirent *sd = k->sd;
-        if (de)
-                dget(de);
+        mutex_lock(&sysfs_mutex);
-        if (de && dir)
-                de = step_down(de, dir);
+        if (sd && dir)
-        if (de && attr)
+                sd = sysfs_find_dirent(sd, dir);
-                de = step_down(de, attr);
+        if (sd && attr)
-        if (de) {
+                sd = sysfs_find_dirent(sd, attr);
-                struct sysfs_dirent * sd = de->d_fsdata;
+        if (sd) {
-                if (sd)
+                atomic_inc(&sd->s_event);
-                        atomic_inc(&sd->s_event);
                wake_up_interruptible(&k->poll);
-                dput(de);
        }
+        mutex_unlock(&sysfs_mutex);
 }
 EXPORT_SYMBOL_GPL(sysfs_notify);
@@ -440,19 +391,30 @@ const struct file_operations sysfs_file_operations = {
 };
-int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
+int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
+                   int type)
 {
-        struct sysfs_dirent * parent_sd = dir->d_fsdata;
        umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
-        int error = -EEXIST;
+        struct sysfs_addrm_cxt acxt;
+        struct sysfs_dirent *sd;
-        mutex_lock(&dir->d_inode->i_mutex);
+        sd = sysfs_new_dirent(attr->name, mode, type);
-        if (!sysfs_dirent_exist(parent_sd, attr->name))
+        if (!sd)
-                error = sysfs_make_dirent(parent_sd, NULL, (void *)attr,
+                return -ENOMEM;
-                                          mode, type);
+        sd->s_elem.attr.attr = (void *)attr;
-        mutex_unlock(&dir->d_inode->i_mutex);
-        return error;
+        sysfs_addrm_start(&acxt, dir_sd);
+        if (!sysfs_find_dirent(dir_sd, attr->name)) {
+                sysfs_add_one(&acxt, sd);
+                sysfs_link_sibling(sd);
+        }
+        if (sysfs_addrm_finish(&acxt))
+                return 0;
+        sysfs_put(sd);
+        return -EEXIST;
 }
@@ -464,9 +426,9 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
 int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
 {
-        BUG_ON(!kobj || !kobj->dentry || !attr);
+        BUG_ON(!kobj || !kobj->sd || !attr);
-        return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR);
+        return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR);
 }
@@ -480,16 +442,16 @@ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
 int sysfs_add_file_to_group(struct kobject *kobj,
                const struct attribute *attr, const char *group)
 {
-        struct dentry *dir;
+        struct sysfs_dirent *dir_sd;
        int error;
-        dir = lookup_one_len(group, kobj->dentry, strlen(group));
+        dir_sd = sysfs_get_dirent(kobj->sd, group);
-        if (IS_ERR(dir))
+        if (!dir_sd)
-                error = PTR_ERR(dir);
+                return -ENOENT;
-        else {
-                error = sysfs_add_file(dir, attr, SYSFS_KOBJ_ATTR);
+        error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR);
-                dput(dir);
+        sysfs_put(dir_sd);
-        }
        return error;
 }
 EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
@@ -502,30 +464,31 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
 */
 int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
 {
-        struct dentry * dir = kobj->dentry;
+        struct sysfs_dirent *victim_sd = NULL;
-        struct dentry * victim;
+        struct dentry *victim = NULL;
-        int res = -ENOENT;
+        int rc;
-        mutex_lock(&dir->d_inode->i_mutex);
+        rc = -ENOENT;
-        victim = lookup_one_len(attr->name, dir, strlen(attr->name));
+        victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
-        if (!IS_ERR(victim)) {
+        if (!victim_sd)
-                /* make sure dentry is really there */
+                goto out;
-                if (victim->d_inode && 
-                    (victim->d_parent->d_inode == dir->d_inode)) {
+        victim = sysfs_get_dentry(victim_sd);
-                        victim->d_inode->i_mtime = CURRENT_TIME;
+        if (IS_ERR(victim)) {
-                        fsnotify_modify(victim);
+                rc = PTR_ERR(victim);
-                        res = 0;
+                victim = NULL;
-                } else
+                goto out;
-                        d_drop(victim);
-                
-                /**
-                 * Drop the reference acquired from lookup_one_len() above.
-                 */
-                dput(victim);
        }
-        mutex_unlock(&dir->d_inode->i_mutex);
-        return res;
+        mutex_lock(&victim->d_inode->i_mutex);
+        victim->d_inode->i_mtime = CURRENT_TIME;
+        fsnotify_modify(victim);
+        mutex_unlock(&victim->d_inode->i_mutex);
+        rc = 0;
+ out:
+        dput(victim);
+        sysfs_put(victim_sd);
+        return rc;
 }
@@ -538,30 +501,34 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
 */
 int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
 {
-        struct dentry *dir = kobj->dentry;
+        struct sysfs_dirent *victim_sd = NULL;
-        struct dentry *victim;
+        struct dentry *victim = NULL;
        struct inode * inode;
        struct iattr newattrs;
-        int res = -ENOENT;
+        int rc;
-        mutex_lock(&dir->d_inode->i_mutex);
+        rc = -ENOENT;
-        victim = lookup_one_len(attr->name, dir, strlen(attr->name));
+        victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
-        if (!IS_ERR(victim)) {
+        if (!victim_sd)
-                if (victim->d_inode &&
+                goto out;
-                    (victim->d_parent->d_inode == dir->d_inode)) {
-                        inode = victim->d_inode;
+        victim = sysfs_get_dentry(victim_sd);
-                        mutex_lock(&inode->i_mutex);
+        if (IS_ERR(victim)) {
-                        newattrs.ia_mode = (mode & S_IALLUGO) |
+                rc = PTR_ERR(victim);
-                                                (inode->i_mode & ~S_IALLUGO);
+                victim = NULL;
-                        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+                goto out;
-                        res = notify_change(victim, &newattrs);
-                        mutex_unlock(&inode->i_mutex);
-                }
-                dput(victim);
        }
-        mutex_unlock(&dir->d_inode->i_mutex);
-        return res;
+        inode = victim->d_inode;
+        mutex_lock(&inode->i_mutex);
+        newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+        rc = notify_change(victim, &newattrs);
+        mutex_unlock(&inode->i_mutex);
+ out:
+        dput(victim);
+        sysfs_put(victim_sd);
+        return rc;
 }
 EXPORT_SYMBOL_GPL(sysfs_chmod_file);
@@ -576,7 +543,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 {
-        sysfs_hash_and_remove(kobj->dentry, attr->name);
+        sysfs_hash_and_remove(kobj->sd, attr->name);
 }
@@ -589,12 +556,12 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 void sysfs_remove_file_from_group(struct kobject *kobj,
                const struct attribute *attr, const char *group)
 {
-        struct dentry *dir;
+        struct sysfs_dirent *dir_sd;
-        dir = lookup_one_len(group, kobj->dentry, strlen(group));
+        dir_sd = sysfs_get_dirent(kobj->sd, group);
-        if (!IS_ERR(dir)) {
+        if (dir_sd) {
-                sysfs_hash_and_remove(dir, attr->name);
+                sysfs_hash_and_remove(dir_sd, attr->name);
-                dput(dir);
+                sysfs_put(dir_sd);
        }
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 52eed2a7a5ef..f318b73c790c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -18,26 +18,25 @@
 #include "sysfs.h"
-static void remove_files(struct dentry * dir, 
+static void remove_files(struct sysfs_dirent *dir_sd,
-                         const struct attribute_group * grp)
+                         const struct attribute_group *grp)
 {
        struct attribute *const* attr;
        for (attr = grp->attrs; *attr; attr++)
-                sysfs_hash_and_remove(dir,(*attr)->name);
+                sysfs_hash_and_remove(dir_sd, (*attr)->name);
 }
-static int create_files(struct dentry * dir,
+static int create_files(struct sysfs_dirent *dir_sd,
-                        const struct attribute_group * grp)
+                        const struct attribute_group *grp)
 {
        struct attribute *const* attr;
        int error = 0;
-        for (attr = grp->attrs; *attr && !error; attr++) {
+        for (attr = grp->attrs; *attr && !error; attr++)
-                error = sysfs_add_file(dir, *attr, SYSFS_KOBJ_ATTR);
+                error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
-        }
        if (error)
-                remove_files(dir,grp);
+                remove_files(dir_sd, grp);
        return error;
 }
@@ -45,44 +44,44 @@ static int create_files(struct dentry * dir,
 int sysfs_create_group(struct kobject * kobj, 
                       const struct attribute_group * grp)
 {
-        struct dentry * dir;
+        struct sysfs_dirent *sd;
        int error;
-        BUG_ON(!kobj || !kobj->dentry);
+        BUG_ON(!kobj || !kobj->sd);
        if (grp->name) {
-                error = sysfs_create_subdir(kobj,grp->name,&dir);
+                error = sysfs_create_subdir(kobj, grp->name, &sd);
                if (error)
                        return error;
        } else
-                dir = kobj->dentry;
+                sd = kobj->sd;
-        dir = dget(dir);
+        sysfs_get(sd);
-        if ((error = create_files(dir,grp))) {
+        error = create_files(sd, grp);
+        if (error) {
                if (grp->name)
-                        sysfs_remove_subdir(dir);
+                        sysfs_remove_subdir(sd);
        }
-        dput(dir);
+        sysfs_put(sd);
        return error;
 }
 void sysfs_remove_group(struct kobject * kobj, 
                        const struct attribute_group * grp)
 {
-        struct dentry * dir;
+        struct sysfs_dirent *dir_sd = kobj->sd;
+        struct sysfs_dirent *sd;
        if (grp->name) {
-                dir = lookup_one_len_kern(grp->name, kobj->dentry,
+                sd = sysfs_get_dirent(dir_sd, grp->name);
-                                strlen(grp->name));
+                BUG_ON(!sd);
-                BUG_ON(IS_ERR(dir));
+        } else
-        }
+                sd = sysfs_get(dir_sd);
-        else
-                dir = dget(kobj->dentry);
-        remove_files(dir,grp);
+        remove_files(sd, grp);
        if (grp->name)
-                sysfs_remove_subdir(dir);
+                sysfs_remove_subdir(sd);
-        /* release the ref. taken in this routine */
-        dput(dir);
+        sysfs_put(sd);
 }
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index bdd30e74de6b..3756e152285a 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -133,170 +133,94 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
 */
 static struct lock_class_key sysfs_inode_imutex_key;
-struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
+void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
 {
-        struct inode * inode = new_inode(sysfs_sb);
+        inode->i_blocks = 0;
-        if (inode) {
+        inode->i_mapping->a_ops = &sysfs_aops;
-                inode->i_blocks = 0;
+        inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
-                inode->i_mapping->a_ops = &sysfs_aops;
+        inode->i_op = &sysfs_inode_operations;
-                inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
+        inode->i_ino = sd->s_ino;
-                inode->i_op = &sysfs_inode_operations;
+        lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
-                lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
+        if (sd->s_iattr) {
-                if (sd->s_iattr) {
+                /* sysfs_dirent has non-default attributes
-                        /* sysfs_dirent has non-default attributes
+                 * get them for the new inode from persistent copy
-                         * get them for the new inode from persistent copy
+                 * in sysfs_dirent
-                         * in sysfs_dirent
+                 */
-                         */
+                set_inode_attr(inode, sd->s_iattr);
-                        set_inode_attr(inode, sd->s_iattr);
-                } else
-                        set_default_inode_attr(inode, mode);
-        }
-        return inode;
-}
-int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
-{
-        int error = 0;
-        struct inode * inode = NULL;
-        if (dentry) {
-                if (!dentry->d_inode) {
-                        struct sysfs_dirent * sd = dentry->d_fsdata;
-                        if ((inode = sysfs_new_inode(mode, sd))) {
-                                if (dentry->d_parent && dentry->d_parent->d_inode) {
-                                        struct inode *p_inode = dentry->d_parent->d_inode;
-                                        p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
-                                }
-                                goto Proceed;
-                        }
-                        else 
-                                error = -ENOMEM;
-                } else
-                        error = -EEXIST;
-        } else 
-                error = -ENOENT;
-        goto Done;
- Proceed:
-        if (init)
-                error = init(inode);
-        if (!error) {
-                d_instantiate(dentry, inode);
-                if (S_ISDIR(mode))
-                        dget(dentry);  /* pin only directory dentry in core */
        } else
-                iput(inode);
+                set_default_inode_attr(inode, sd->s_mode);
- Done:
-        return error;
 }
-/*
+/**
- * Get the name for corresponding element represented by the given sysfs_dirent
+ *      sysfs_get_inode - get inode for sysfs_dirent
+ *      @sd: sysfs_dirent to allocate inode for
+ *
+ *      Get inode for @sd.  If such inode doesn't exist, a new inode
+ *      is allocated and basics are initialized.  New inode is
+ *      returned locked.
+ *
+ *      LOCKING:
+ *      Kernel thread context (may sleep).
+ *
+ *      RETURNS:
+ *      Pointer to allocated inode on success, NULL on failure.
 */
-const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
+struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
 {
-        struct attribute * attr;
+        struct inode *inode;
-        struct bin_attribute * bin_attr;
-        struct sysfs_symlink  * sl;
-        BUG_ON(!sd || !sd->s_element);
-        switch (sd->s_type) {
-                case SYSFS_DIR:
-                        /* Always have a dentry so use that */
-                        return sd->s_dentry->d_name.name;
-                case SYSFS_KOBJ_ATTR:
-                        attr = sd->s_element;
-                        return attr->name;
-                case SYSFS_KOBJ_BIN_ATTR:
+        inode = iget_locked(sysfs_sb, sd->s_ino);
-                        bin_attr = sd->s_element;
+        if (inode && (inode->i_state & I_NEW))
-                        return bin_attr->attr.name;
+                sysfs_init_inode(sd, inode);
-                case SYSFS_KOBJ_LINK:
+        return inode;
-                        sl = sd->s_element;
-                        return sl->link_name;
-        }
-        return NULL;
-}
-static inline void orphan_all_buffers(struct inode *node)
-{
-        struct sysfs_buffer_collection *set;
-        struct sysfs_buffer *buf;
-        mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD);
-        set = node->i_private;
-        if (set) {
-                list_for_each_entry(buf, &set->associates, associates) {
-                        down(&buf->sem);
-                        buf->orphaned = 1;
-                        up(&buf->sem);
-                }
-        }
-        mutex_unlock(&node->i_mutex);
 }
+/**
-/*
+ *      sysfs_instantiate - instantiate dentry
- * Unhashes the dentry corresponding to given sysfs_dirent
+ *      @dentry: dentry to be instantiated
- * Called with parent inode's i_mutex held.
+ *      @inode: inode associated with @sd
+ *
+ *      Unlock @inode if locked and instantiate @dentry with @inode.
+ *
+ *      LOCKING:
+ *      None.
 */
-void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
+void sysfs_instantiate(struct dentry *dentry, struct inode *inode)
 {
-        struct dentry * dentry = sd->s_dentry;
+        BUG_ON(!dentry || dentry->d_inode);
-        struct inode *inode;
-        if (dentry) {
+        if (inode->i_state & I_NEW)
-                spin_lock(&dcache_lock);
+                unlock_new_inode(inode);
-                spin_lock(&dentry->d_lock);
-                if (!(d_unhashed(dentry) && dentry->d_inode)) {
+        d_instantiate(dentry, inode);
-                        inode = dentry->d_inode;
-                        spin_lock(&inode->i_lock);
-                        __iget(inode);
-                        spin_unlock(&inode->i_lock);
-                        dget_locked(dentry);
-                        __d_drop(dentry);
-                        spin_unlock(&dentry->d_lock);
-                        spin_unlock(&dcache_lock);
-                        simple_unlink(parent->d_inode, dentry);
-                        orphan_all_buffers(inode);
-                        iput(inode);
-                } else {
-                        spin_unlock(&dentry->d_lock);
-                        spin_unlock(&dcache_lock);
-                }
-        }
 }
-int sysfs_hash_and_remove(struct dentry * dir, const char * name)
+int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
 {
-        struct sysfs_dirent * sd;
+        struct sysfs_addrm_cxt acxt;
-        struct sysfs_dirent * parent_sd;
+        struct sysfs_dirent **pos, *sd;
-        int found = 0;
-        if (!dir)
+        if (!dir_sd)
                return -ENOENT;
-        if (dir->d_inode == NULL)
+        sysfs_addrm_start(&acxt, dir_sd);
-                /* no inode means this hasn't been made visible yet */
-                return -ENOENT;
+        for (pos = &dir_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
+                sd = *pos;
-        parent_sd = dir->d_fsdata;
+                if (!sysfs_type(sd))
-        mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
-        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
-                if (!sd->s_element)
                        continue;
-                if (!strcmp(sysfs_get_name(sd), name)) {
+                if (!strcmp(sd->s_name, name)) {
-                        list_del_init(&sd->s_sibling);
+                        *pos = sd->s_sibling;
-                        sysfs_drop_dentry(sd, dir);
+                        sd->s_sibling = NULL;
-                        sysfs_put(sd);
+                        sysfs_remove_one(&acxt, sd);
-                        found = 1;
                        break;
                }
        }
-        mutex_unlock(&dir->d_inode->i_mutex);
-        return found ? 0 : -ENOENT;
+        if (sysfs_addrm_finish(&acxt))
+                return 0;
+        return -ENOENT;
 }
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 23a48a38e6af..402cc356203c 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,27 +19,18 @@ struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
 struct kmem_cache *sysfs_dir_cachep;
-static void sysfs_clear_inode(struct inode *inode);
 static const struct super_operations sysfs_ops = {
        .statfs         = simple_statfs,
        .drop_inode     = sysfs_delete_inode,
-        .clear_inode    = sysfs_clear_inode,
 };
-static struct sysfs_dirent sysfs_root = {
+struct sysfs_dirent sysfs_root = {
-        .s_sibling      = LIST_HEAD_INIT(sysfs_root.s_sibling),
+        .s_count        = ATOMIC_INIT(1),
-        .s_children     = LIST_HEAD_INIT(sysfs_root.s_children),
+        .s_flags        = SYSFS_ROOT,
-        .s_element      = NULL,
+        .s_mode         = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
-        .s_type         = SYSFS_ROOT,
+        .s_ino          = 1,
-        .s_iattr        = NULL,
 };
-static void sysfs_clear_inode(struct inode *inode)
-{
-        kfree(inode->i_private);
-}
 static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 {
        struct inode *inode;
@@ -52,24 +43,26 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_time_gran = 1;
        sysfs_sb = sb;
-        inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
+        inode = new_inode(sysfs_sb);
-                                 &sysfs_root);
+        if (!inode) {
-        if (inode) {
-                inode->i_op = &sysfs_dir_inode_operations;
-                inode->i_fop = &sysfs_dir_operations;
-                /* directory inodes start off with i_nlink == 2 (for "." entry) */
-                inc_nlink(inode);
-        } else {
                pr_debug("sysfs: could not get root inode\n");
                return -ENOMEM;
        }
+        sysfs_init_inode(&sysfs_root, inode);
+        inode->i_op = &sysfs_dir_inode_operations;
+        inode->i_fop = &sysfs_dir_operations;
+        /* directory inodes start off with i_nlink == 2 (for "." entry) */
+        inc_nlink(inode);
        root = d_alloc_root(inode);
        if (!root) {
                pr_debug("%s: could not get root dentry!\n",__FUNCTION__);
                iput(inode);
                return -ENOMEM;
        }
+        sysfs_root.s_dentry = root;
        root->d_fsdata = &sysfs_root;
        sb->s_root = root;
        return 0;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 7b9c5bfde920..2f86e0422290 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -11,71 +11,39 @@
 #include "sysfs.h"
-static int object_depth(struct kobject * kobj)
+static int object_depth(struct sysfs_dirent *sd)
 {
-        struct kobject * p = kobj;
        int depth = 0;
-        do { depth++; } while ((p = p->parent));
+        for (; sd->s_parent; sd = sd->s_parent)
+                depth++;
        return depth;
 }
-static int object_path_length(struct kobject * kobj)
+static int object_path_length(struct sysfs_dirent * sd)
 {
-        struct kobject * p = kobj;
        int length = 1;
-        do {
-                length += strlen(kobject_name(p)) + 1;
+        for (; sd->s_parent; sd = sd->s_parent)
-                p = p->parent;
+                length += strlen(sd->s_name) + 1;
-        } while (p);
        return length;
 }
-static void fill_object_path(struct kobject * kobj, char * buffer, int length)
+static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length)
 {
-        struct kobject * p;
        --length;
-        for (p = kobj; p; p = p->parent) {
+        for (; sd->s_parent; sd = sd->s_parent) {
-                int cur = strlen(kobject_name(p));
+                int cur = strlen(sd->s_name);
                /* back up enough to print this bus id with '/' */
                length -= cur;
-                strncpy(buffer + length,kobject_name(p),cur);
+                strncpy(buffer + length, sd->s_name, cur);
                *(buffer + --length) = '/';
        }
 }
-static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target)
-{
-        struct sysfs_dirent * parent_sd = parent->d_fsdata;
-        struct sysfs_symlink * sl;
-        int error = 0;
-        error = -ENOMEM;
-        sl = kmalloc(sizeof(*sl), GFP_KERNEL);
-        if (!sl)
-                goto exit1;
-        sl->link_name = kmalloc(strlen(name) + 1, GFP_KERNEL);
-        if (!sl->link_name)
-                goto exit2;
-        strcpy(sl->link_name, name);
-        sl->target_kobj = kobject_get(target);
-        error = sysfs_make_dirent(parent_sd, NULL, sl, S_IFLNK|S_IRWXUGO,
-                                SYSFS_KOBJ_LINK);
-        if (!error)
-                return 0;
-        kobject_put(target);
-        kfree(sl->link_name);
-exit2:
-        kfree(sl);
-exit1:
-        return error;
-}
 /**
 *      sysfs_create_link - create symlink between two objects.
 *      @kobj:  object whose directory we're creating the link in.
@@ -84,24 +52,57 @@ exit1:
 */
 int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
 {
-        struct dentry *dentry = NULL;
+        struct sysfs_dirent *parent_sd = NULL;
-        int error = -EEXIST;
+        struct sysfs_dirent *target_sd = NULL;
+        struct sysfs_dirent *sd = NULL;
+        struct sysfs_addrm_cxt acxt;
+        int error;
        BUG_ON(!name);
        if (!kobj) {
                if (sysfs_mount && sysfs_mount->mnt_sb)
-                        dentry = sysfs_mount->mnt_sb->s_root;
+                        parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
        } else
-                dentry = kobj->dentry;
+                parent_sd = kobj->sd;
+        error = -EFAULT;
+        if (!parent_sd)
+                goto out_put;
+        /* target->sd can go away beneath us but is protected with
+         * sysfs_assoc_lock.  Fetch target_sd from it.
+         */
+        spin_lock(&sysfs_assoc_lock);
+        if (target->sd)
+                target_sd = sysfs_get(target->sd);
+        spin_unlock(&sysfs_assoc_lock);
+        error = -ENOENT;
+        if (!target_sd)
+                goto out_put;
+        error = -ENOMEM;
+        sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
+        if (!sd)
+                goto out_put;
+        sd->s_elem.symlink.target_sd = target_sd;
-        if (!dentry)
+        sysfs_addrm_start(&acxt, parent_sd);
-                return -EFAULT;
-        mutex_lock(&dentry->d_inode->i_mutex);
+        if (!sysfs_find_dirent(parent_sd, name)) {
-        if (!sysfs_dirent_exist(dentry->d_fsdata, name))
+                sysfs_add_one(&acxt, sd);
-                error = sysfs_add_link(dentry, name, target);
+                sysfs_link_sibling(sd);
-        mutex_unlock(&dentry->d_inode->i_mutex);
+        }
+        if (sysfs_addrm_finish(&acxt))
+                return 0;
+        error = -EEXIST;
+        /* fall through */
+ out_put:
+        sysfs_put(target_sd);
+        sysfs_put(sd);
        return error;
 }
@@ -114,17 +115,17 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
 void sysfs_remove_link(struct kobject * kobj, const char * name)
 {
-        sysfs_hash_and_remove(kobj->dentry,name);
+        sysfs_hash_and_remove(kobj->sd, name);
 }
-static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
+static int sysfs_get_target_path(struct sysfs_dirent * parent_sd,
-                                 char *path)
+                                 struct sysfs_dirent * target_sd, char *path)
 {
        char * s;
        int depth, size;
-        depth = object_depth(kobj);
+        depth = object_depth(parent_sd);
-        size = object_path_length(target) + depth * 3 - 1;
+        size = object_path_length(target_sd) + depth * 3 - 1;
        if (size > PATH_MAX)
                return -ENAMETOOLONG;
@@ -133,7 +134,7 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
        for (s = path; depth--; s += 3)
                strcpy(s,"../");
-        fill_object_path(target, path, size);
+        fill_object_path(target_sd, path, size);
        pr_debug("%s: path = '%s'\n", __FUNCTION__, path);
        return 0;
@@ -141,27 +142,16 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
 static int sysfs_getlink(struct dentry *dentry, char * path)
 {
-        struct kobject *kobj, *target_kobj;
+        struct sysfs_dirent *sd = dentry->d_fsdata;
-        int error = 0;
+        struct sysfs_dirent *parent_sd = sd->s_parent;
+        struct sysfs_dirent *target_sd = sd->s_elem.symlink.target_sd;
+        int error;
-        kobj = sysfs_get_kobject(dentry->d_parent);
+        mutex_lock(&sysfs_mutex);
-        if (!kobj)
+        error = sysfs_get_target_path(parent_sd, target_sd, path);
-                return -EINVAL;
+        mutex_unlock(&sysfs_mutex);
-        target_kobj = sysfs_get_kobject(dentry);
-        if (!target_kobj) {
-                kobject_put(kobj);
-                return -EINVAL;
-        }
-        down_read(&sysfs_rename_sem);
-        error = sysfs_get_target_path(kobj, target_kobj, path);
-        up_read(&sysfs_rename_sem);
-        
-        kobject_put(kobj);
-        kobject_put(target_kobj);
        return error;
 }
 static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index a77c57e5a6d5..6a37f2386a8d 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -1,38 +1,101 @@
+struct sysfs_elem_dir {
+        struct kobject          * kobj;
+};
+struct sysfs_elem_symlink {
+        struct sysfs_dirent     * target_sd;
+};
+struct sysfs_elem_attr {
+        struct attribute        * attr;
+};
+struct sysfs_elem_bin_attr {
+        struct bin_attribute    * bin_attr;
+};
+/*
+ * As long as s_count reference is held, the sysfs_dirent itself is
+ * accessible.  Dereferencing s_elem or any other outer entity
+ * requires s_active reference.
+ */
 struct sysfs_dirent {
        atomic_t                s_count;
-        struct list_head        s_sibling;
+        atomic_t                s_active;
-        struct list_head        s_children;
+        struct sysfs_dirent     * s_parent;
-        void                    * s_element;
+        struct sysfs_dirent     * s_sibling;
-        int                     s_type;
+        struct sysfs_dirent     * s_children;
+        const char              * s_name;
+        union {
+                struct sysfs_elem_dir           dir;
+                struct sysfs_elem_symlink       symlink;
+                struct sysfs_elem_attr          attr;
+                struct sysfs_elem_bin_attr      bin_attr;
+        }                       s_elem;
+        unsigned int            s_flags;
        umode_t                 s_mode;
+        ino_t                   s_ino;
        struct dentry           * s_dentry;
        struct iattr            * s_iattr;
        atomic_t                s_event;
 };
+#define SD_DEACTIVATED_BIAS     INT_MIN
+struct sysfs_addrm_cxt {
+        struct sysfs_dirent     *parent_sd;
+        struct inode            *parent_inode;
+        struct sysfs_dirent     *removed;
+        int                     cnt;
+};
 extern struct vfsmount * sysfs_mount;
+extern struct sysfs_dirent sysfs_root;
 extern struct kmem_cache *sysfs_dir_cachep;
-extern void sysfs_delete_inode(struct inode *inode);
+extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
-extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
+extern void sysfs_link_sibling(struct sysfs_dirent *sd);
-extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
+extern void sysfs_unlink_sibling(struct sysfs_dirent *sd);
+extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
+extern void sysfs_put_active(struct sysfs_dirent *sd);
+extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
+extern void sysfs_put_active_two(struct sysfs_dirent *sd);
+extern void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
+                              struct sysfs_dirent *parent_sd);
+extern void sysfs_add_one(struct sysfs_addrm_cxt *acxt,
+                          struct sysfs_dirent *sd);
+extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
+                             struct sysfs_dirent *sd);
+extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
-extern int sysfs_dirent_exist(struct sysfs_dirent *, const unsigned char *);
+extern void sysfs_delete_inode(struct inode *inode);
-extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
+extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode);
-                                umode_t, int);
+extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd);
+extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode);
-extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
-extern int sysfs_hash_and_remove(struct dentry * dir, const char * name);
+extern void release_sysfs_dirent(struct sysfs_dirent * sd);
+extern struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+                                              const unsigned char *name);
+extern struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+                                             const unsigned char *name);
+extern struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode,
+                                             int type);
+extern int sysfs_add_file(struct sysfs_dirent *dir_sd,
+                          const struct attribute *attr, int type);
+extern int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
 extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
-extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **);
+extern int sysfs_create_subdir(struct kobject *kobj, const char *name,
-extern void sysfs_remove_subdir(struct dentry *);
+                               struct sysfs_dirent **p_sd);
+extern void sysfs_remove_subdir(struct sysfs_dirent *sd);
-extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd);
-extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent);
 extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
-extern struct rw_semaphore sysfs_rename_sem;
+extern spinlock_t sysfs_assoc_lock;
+extern struct mutex sysfs_mutex;
 extern struct super_block * sysfs_sb;
 extern const struct file_operations sysfs_dir_operations;
 extern const struct file_operations sysfs_file_operations;
@@ -40,73 +103,9 @@ extern const struct file_operations bin_fops;
 extern const struct inode_operations sysfs_dir_inode_operations;
 extern const struct inode_operations sysfs_symlink_inode_operations;
-struct sysfs_symlink {
+static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
-        char * link_name;
-        struct kobject * target_kobj;
-};
-struct sysfs_buffer {
-        struct list_head                associates;
-        size_t                          count;
-        loff_t                          pos;
-        char                            * page;
-        struct sysfs_ops                * ops;
-        struct semaphore                sem;
-        int                             orphaned;
-        int                             needs_read_fill;
-        int                             event;
-};
-struct sysfs_buffer_collection {
-        struct list_head        associates;
-};
-static inline struct kobject * to_kobj(struct dentry * dentry)
-{
-        struct sysfs_dirent * sd = dentry->d_fsdata;
-        return ((struct kobject *) sd->s_element);
-}
-static inline struct attribute * to_attr(struct dentry * dentry)
 {
-        struct sysfs_dirent * sd = dentry->d_fsdata;
+        return sd->s_flags & SYSFS_TYPE_MASK;
-        return ((struct attribute *) sd->s_element);
-}
-static inline struct bin_attribute * to_bin_attr(struct dentry * dentry)
-{
-        struct sysfs_dirent * sd = dentry->d_fsdata;
-        return ((struct bin_attribute *) sd->s_element);
-}
-static inline struct kobject *sysfs_get_kobject(struct dentry *dentry)
-{
-        struct kobject * kobj = NULL;
-        spin_lock(&dcache_lock);
-        if (!d_unhashed(dentry)) {
-                struct sysfs_dirent * sd = dentry->d_fsdata;
-                if (sd->s_type & SYSFS_KOBJ_LINK) {
-                        struct sysfs_symlink * sl = sd->s_element;
-                        kobj = kobject_get(sl->target_kobj);
-                } else
-                        kobj = kobject_get(sd->s_element);
-        }
-        spin_unlock(&dcache_lock);
-        return kobj;
-}
-static inline void release_sysfs_dirent(struct sysfs_dirent * sd)
-{
-        if (sd->s_type & SYSFS_KOBJ_LINK) {
-                struct sysfs_symlink * sl = sd->s_element;
-                kfree(sl->link_name);
-                kobject_put(sl->target_kobj);
-                kfree(sl);
-        }
-        kfree(sd->s_iattr);
-        kmem_cache_free(sysfs_dir_cachep, sd);
 }
 static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
@@ -120,7 +119,7 @@ static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
 static inline void sysfs_put(struct sysfs_dirent * sd)
 {
-        if (atomic_dec_and_test(&sd->s_count))
+        if (sd && atomic_dec_and_test(&sd->s_count))
                release_sysfs_dirent(sd);
 }
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 0732ddb9020b..589be21d884e 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -27,7 +27,7 @@ const struct file_operations sysv_file_operations = {
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
        .fsync          = sysv_sync_file,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 const struct inode_operations sysv_file_inode_operations = {
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 51b5764685e7..df070bee8d4f 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -261,7 +261,7 @@ const struct file_operations udf_file_operations = {
        .aio_write              = udf_file_aio_write,
        .release                = udf_release_file,
        .fsync                  = udf_fsync_file,
-        .sendfile               = generic_file_sendfile,
+        .splice_read            = generic_file_splice_read,
 };
 const struct inode_operations udf_file_inode_operations = {
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index c8461551e108..bf7de0bdbab3 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -100,14 +100,23 @@ no_delete:
        clear_inode(inode);
 }
+/*
+ * If we are going to release inode from memory, we discard preallocation and
+ * truncate last inode extent to proper length. We could use drop_inode() but
+ * it's called under inode_lock and thus we cannot mark inode dirty there.  We
+ * use clear_inode() but we have to make sure to write inode as it's not written
+ * automatically.
+ */
 void udf_clear_inode(struct inode *inode)
 {
        if (!(inode->i_sb->s_flags & MS_RDONLY)) {
                lock_kernel();
+                /* Discard preallocation for directories, symlinks, etc. */
                udf_discard_prealloc(inode);
+                udf_truncate_tail_extent(inode);
                unlock_kernel();
+                write_inode_now(inode, 1);
        }
        kfree(UDF_I_DATA(inode));
        UDF_I_DATA(inode) = NULL;
 }
@@ -460,8 +469,8 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block,
        kernel_long_ad laarr[EXTENT_MERGE_SIZE];
        struct extent_position prev_epos, cur_epos, next_epos;
        int count = 0, startnum = 0, endnum = 0;
-        uint32_t elen = 0;
+        uint32_t elen = 0, tmpelen;
-        kernel_lb_addr eloc;
+        kernel_lb_addr eloc, tmpeloc;
        int c = 1;
        loff_t lbcount = 0, b_off = 0;
        uint32_t newblocknum, newblock;
@@ -520,8 +529,12 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block,
        b_off -= lbcount;
        offset = b_off >> inode->i_sb->s_blocksize_bits;
-        /* Move into indirect extent if we are at a pointer to it */
+        /*
-        udf_next_aext(inode, &prev_epos, &eloc, &elen, 0);
+         * Move prev_epos and cur_epos into indirect extent if we are at
+         * the pointer to it
+         */
+        udf_next_aext(inode, &prev_epos, &tmpeloc, &tmpelen, 0);
+        udf_next_aext(inode, &cur_epos, &tmpeloc, &tmpelen, 0);
        /* if the extent is allocated and recorded, return the block
       if the extent is not a multiple of the blocksize, round up */
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 3a743d854c17..6658afb41cc7 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1351,7 +1351,7 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
        for (i=0; i<UDF_SB_NUMPARTS(sb); i++)
        {
-                switch UDF_SB_PARTTYPE(sb, i)
+                switch (UDF_SB_PARTTYPE(sb, i))
                {
                        case UDF_VIRTUAL_MAP15:
                        case UDF_VIRTUAL_MAP20:
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 77975ae291a5..60d277644248 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -61,7 +61,11 @@ static void extent_trunc(struct inode * inode, struct extent_position *epos,
        }
 }
-void udf_discard_prealloc(struct inode * inode)
+/*
+ * Truncate the last extent to match i_size. This function assumes
+ * that preallocation extent is already truncated.
+ */
+void udf_truncate_tail_extent(struct inode *inode)
 {
        struct extent_position epos = { NULL, 0, {0, 0}};
        kernel_lb_addr eloc;
@@ -71,7 +75,10 @@ void udf_discard_prealloc(struct inode * inode)
        int adsize;
        if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB ||
-                inode->i_size == UDF_I_LENEXTENTS(inode))
+            inode->i_size == UDF_I_LENEXTENTS(inode))
+                return;
+        /* Are we going to delete the file anyway? */
+        if (inode->i_nlink == 0)
                return;
        if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT)
@@ -79,36 +86,76 @@ void udf_discard_prealloc(struct inode * inode)
        else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG)
                adsize = sizeof(long_ad);
        else
-                adsize = 0;
+                BUG();
-        epos.block = UDF_I_LOCATION(inode);
        /* Find the last extent in the file */
        while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1)
        {
                etype = netype;
                lbcount += elen;
-                if (lbcount > inode->i_size && lbcount - elen < inode->i_size)
+                if (lbcount > inode->i_size) {
-                {
+                        if (lbcount - inode->i_size >= inode->i_sb->s_blocksize)
-                        WARN_ON(lbcount - inode->i_size >= inode->i_sb->s_blocksize);
+                                printk(KERN_WARNING
+                                       "udf_truncate_tail_extent(): Too long "
+                                       "extent after EOF in inode %u: i_size: "
+                                       "%Ld lbcount: %Ld extent %u+%u\n",
+                                       (unsigned)inode->i_ino,
+                                       (long long)inode->i_size,
+                                       (long long)lbcount,
+                                       (unsigned)eloc.logicalBlockNum,
+                                       (unsigned)elen);
                        nelen = elen - (lbcount - inode->i_size);
                        epos.offset -= adsize;
                        extent_trunc(inode, &epos, eloc, etype, elen, nelen);
                        epos.offset += adsize;
-                        lbcount = inode->i_size;
+                        if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1)
+                                printk(KERN_ERR "udf_truncate_tail_extent(): "
+                                       "Extent after EOF in inode %u.\n",
+                                       (unsigned)inode->i_ino);
+                        break;
                }
        }
+        /* This inode entry is in-memory only and thus we don't have to mark
+         * the inode dirty */
+        UDF_I_LENEXTENTS(inode) = inode->i_size;
+        brelse(epos.bh);
+}
+void udf_discard_prealloc(struct inode *inode)
+{
+        struct extent_position epos = { NULL, 0, {0, 0}};
+        kernel_lb_addr eloc;
+        uint32_t elen;
+        uint64_t lbcount = 0;
+        int8_t etype = -1, netype;
+        int adsize;
+        if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB ||
+                inode->i_size == UDF_I_LENEXTENTS(inode))
+                return;
+        if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT)
+                adsize = sizeof(short_ad);
+        else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG)
+                adsize = sizeof(long_ad);
+        else
+                adsize = 0;
+        epos.block = UDF_I_LOCATION(inode);
+        /* Find the last extent in the file */
+        while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) {
+                etype = netype;
+                lbcount += elen;
+        }
        if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
                epos.offset -= adsize;
                lbcount -= elen;
                extent_trunc(inode, &epos, eloc, etype, elen, 0);
-                if (!epos.bh)
+                if (!epos.bh) {
-                {
                        UDF_I_LENALLOC(inode) = epos.offset - udf_file_entry_alloc_offset(inode);
                        mark_inode_dirty(inode);
-                }
+                } else {
-                else
-                {
                        struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data);
                        aed->lengthAllocDescs = cpu_to_le32(epos.offset - sizeof(struct allocExtDesc));
                        if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
@@ -118,9 +165,9 @@ void udf_discard_prealloc(struct inode * inode)
                        mark_buffer_dirty_inode(epos.bh, inode);
                }
        }
+        /* This inode entry is in-memory only and thus we don't have to mark
+         * the inode dirty */
        UDF_I_LENEXTENTS(inode) = lbcount;
-        WARN_ON(lbcount != inode->i_size);
        brelse(epos.bh);
 }
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 67ded289497c..f581f2f69c0f 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -146,6 +146,7 @@ extern void udf_free_inode(struct inode *);
 extern struct inode * udf_new_inode (struct inode *, int, int *);
 /* truncate.c */
+extern void udf_truncate_tail_extent(struct inode *);
 extern void udf_discard_prealloc(struct inode *);
 extern void udf_truncate_extents(struct inode *);
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 1e096323bad4..6705d74c6d2d 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -60,5 +60,5 @@ const struct file_operations ufs_file_operations = {
        .mmap           = generic_file_mmap,
        .open           = generic_file_open,
        .fsync          = ufs_sync_file,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
diff --git a/fs/utimes.c b/fs/utimes.c
index 480f7c8c29da..b3c88952465f 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -106,9 +106,16 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
                if (IS_IMMUTABLE(inode))
                        goto dput_and_out;
-                if (current->fsuid != inode->i_uid &&
+                if (current->fsuid != inode->i_uid) {
-                    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
+                        if (f) {
-                        goto dput_and_out;
+                                if (!(f->f_mode & FMODE_WRITE))
+                                        goto dput_and_out;
+                        } else {
+                                error = vfs_permission(&nd, MAY_WRITE);
+                                if (error)
+                                        goto dput_and_out;
+                        }
+                }
        }
        mutex_lock(&inode->i_mutex);
        error = notify_change(dentry, &newattrs);
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index b49989bb89ad..e7a9a83f0087 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -64,6 +64,7 @@ xfs-y				+= xfs_alloc.o \
                                   xfs_dir2_sf.o \
                                   xfs_error.o \
                                   xfs_extfree_item.o \
+                                   xfs_filestream.o \
                                   xfs_fsops.o \
                                   xfs_ialloc.o \
                                   xfs_ialloc_btree.o \
@@ -77,6 +78,7 @@ xfs-y				+= xfs_alloc.o \
                                   xfs_log.o \
                                   xfs_log_recover.o \
                                   xfs_mount.o \
+                                   xfs_mru_cache.o \
                                   xfs_rename.o \
                                   xfs_trans.o \
                                   xfs_trans_ail.o \
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 9ebabdf7829c..4b6470cf87f0 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -100,25 +100,6 @@ kmem_zone_destroy(kmem_zone_t *zone)
 extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
 extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
-/*
- * Low memory cache shrinkers
- */
-typedef struct shrinker *kmem_shaker_t;
-typedef int (*kmem_shake_func_t)(int, gfp_t);
-static inline kmem_shaker_t
-kmem_shake_register(kmem_shake_func_t sfunc)
-{
-        return set_shrinker(DEFAULT_SEEKS, sfunc);
-}
-static inline void
-kmem_shake_deregister(kmem_shaker_t shrinker)
-{
-        remove_shrinker(shrinker);
-}
 static inline int
 kmem_shake_allow(gfp_t gfp_mask)
 {
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 7361861e3aac..fd4105d662e0 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -108,14 +108,19 @@ xfs_page_trace(
 /*
 * Schedule IO completion handling on a xfsdatad if this was
- * the final hold on this ioend.
+ * the final hold on this ioend. If we are asked to wait,
+ * flush the workqueue.
 */
 STATIC void
 xfs_finish_ioend(
-        xfs_ioend_t             *ioend)
+        xfs_ioend_t     *ioend,
+        int             wait)
 {
-        if (atomic_dec_and_test(&ioend->io_remaining))
+        if (atomic_dec_and_test(&ioend->io_remaining)) {
                queue_work(xfsdatad_workqueue, &ioend->io_work);
+                if (wait)
+                        flush_workqueue(xfsdatad_workqueue);
+        }
 }
 /*
@@ -156,6 +161,8 @@ xfs_setfilesize(
        xfs_fsize_t             bsize;
        ip = xfs_vtoi(ioend->io_vnode);
+        if (!ip)
+                return;
        ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
        ASSERT(ioend->io_type != IOMAP_READ);
@@ -334,7 +341,7 @@ xfs_end_bio(
        bio->bi_end_io = NULL;
        bio_put(bio);
-        xfs_finish_ioend(ioend);
+        xfs_finish_ioend(ioend, 0);
        return 0;
 }
@@ -470,7 +477,7 @@ xfs_submit_ioend(
                }
                if (bio)
                        xfs_submit_ioend_bio(ioend, bio);
-                xfs_finish_ioend(ioend);
+                xfs_finish_ioend(ioend, 0);
        } while ((ioend = next) != NULL);
 }
@@ -1003,6 +1010,8 @@ xfs_page_state_convert(
                if (buffer_unwritten(bh) || buffer_delay(bh) ||
                    ((buffer_uptodate(bh) || PageUptodate(page)) &&
                     !buffer_mapped(bh) && (unmapped || startio))) {
+                        int new_ioend = 0;
                        /*
                         * Make sure we don't use a read-only iomap
                         */
@@ -1021,6 +1030,15 @@ xfs_page_state_convert(
                        }
                        if (!iomap_valid) {
+                                /*
+                                 * if we didn't have a valid mapping then we
+                                 * need to ensure that we put the new mapping
+                                 * in a new ioend structure. This needs to be
+                                 * done to ensure that the ioends correctly
+                                 * reflect the block mappings at io completion
+                                 * for unwritten extent conversion.
+                                 */
+                                new_ioend = 1;
                                if (type == IOMAP_NEW) {
                                        size = xfs_probe_cluster(inode,
                                                        page, bh, head, 0);
@@ -1040,7 +1058,7 @@ xfs_page_state_convert(
                                if (startio) {
                                        xfs_add_to_ioend(inode, bh, offset,
                                                        type, &ioend,
-                                                        !iomap_valid);
+                                                        new_ioend);
                                } else {
                                        set_buffer_dirty(bh);
                                        unlock_buffer(bh);
@@ -1416,6 +1434,13 @@ xfs_end_io_direct(
         * This is not necessary for synchronous direct I/O, but we do
         * it anyway to keep the code uniform and simpler.
         *
+         * Well, if only it were that simple. Because synchronous direct I/O
+         * requires extent conversion to occur *before* we return to userspace,
+         * we have to wait for extent conversion to complete. Look at the
+         * iocb that has been passed to us to determine if this is AIO or
+         * not. If it is synchronous, tell xfs_finish_ioend() to kick the
+         * workqueue and wait for it to complete.
+         *
         * The core direct I/O code might be changed to always call the
         * completion handler in the future, in which case all this can
         * go away.
@@ -1423,9 +1448,9 @@ xfs_end_io_direct(
        ioend->io_offset = offset;
        ioend->io_size = size;
        if (ioend->io_type == IOMAP_READ) {
-                xfs_finish_ioend(ioend);
+                xfs_finish_ioend(ioend, 0);
        } else if (private && size > 0) {
-                xfs_finish_ioend(ioend);
+                xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
        } else {
                /*
                 * A direct I/O write ioend starts it's life in unwritten
@@ -1434,7 +1459,7 @@ xfs_end_io_direct(
                 * handler.
                 */
                INIT_WORK(&ioend->io_work, xfs_end_bio_written);
-                xfs_finish_ioend(ioend);
+                xfs_finish_ioend(ioend, 0);
        }
        /*
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index fe4f66a5af14..2df63622354e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -35,7 +35,7 @@
 #include <linux/freezer.h>
 static kmem_zone_t *xfs_buf_zone;
-static kmem_shaker_t xfs_buf_shake;
+static struct shrinker *xfs_buf_shake;
 STATIC int xfsbufd(void *);
 STATIC int xfsbufd_wakeup(int, gfp_t);
 STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
@@ -314,7 +314,7 @@ xfs_buf_free(
        ASSERT(list_empty(&bp->b_hash_list));
-        if (bp->b_flags & _XBF_PAGE_CACHE) {
+        if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
                uint            i;
                if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
@@ -323,18 +323,11 @@ xfs_buf_free(
                for (i = 0; i < bp->b_page_count; i++) {
                        struct page     *page = bp->b_pages[i];
-                        ASSERT(!PagePrivate(page));
+                        if (bp->b_flags & _XBF_PAGE_CACHE)
+                                ASSERT(!PagePrivate(page));
                        page_cache_release(page);
                }
                _xfs_buf_free_pages(bp);
-        } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
-                 /*
-                  * XXX(hch): bp->b_count_desired might be incorrect (see
-                  * xfs_buf_associate_memory for details), but fortunately
-                  * the Linux version of kmem_free ignores the len argument..
-                  */
-                kmem_free(bp->b_addr, bp->b_count_desired);
-                _xfs_buf_free_pages(bp);
        }
        xfs_buf_deallocate(bp);
@@ -764,43 +757,44 @@ xfs_buf_get_noaddr(
        size_t                  len,
        xfs_buftarg_t           *target)
 {
-        size_t                  malloc_len = len;
+        unsigned long           page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
+        int                     error, i;
        xfs_buf_t               *bp;
-        void                    *data;
-        int                     error;
        bp = xfs_buf_allocate(0);
        if (unlikely(bp == NULL))
                goto fail;
        _xfs_buf_initialize(bp, target, 0, len, 0);
- try_again:
+        error = _xfs_buf_get_pages(bp, page_count, 0);
-        data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE);
+        if (error)
-        if (unlikely(data == NULL))
                goto fail_free_buf;
-        /* check whether alignment matches.. */
+        for (i = 0; i < page_count; i++) {
-        if ((__psunsigned_t)data !=
+                bp->b_pages[i] = alloc_page(GFP_KERNEL);
-            ((__psunsigned_t)data & ~target->bt_smask)) {
+                if (!bp->b_pages[i])
-                /* .. else double the size and try again */
+                        goto fail_free_mem;
-                kmem_free(data, malloc_len);
-                malloc_len <<= 1;
-                goto try_again;
        }
+        bp->b_flags |= _XBF_PAGES;
-        error = xfs_buf_associate_memory(bp, data, len);
+        error = _xfs_buf_map_pages(bp, XBF_MAPPED);
-        if (error)
+        if (unlikely(error)) {
+                printk(KERN_WARNING "%s: failed to map pages\n",
+                                __FUNCTION__);
                goto fail_free_mem;
-        bp->b_flags |= _XBF_KMEM_ALLOC;
+        }
        xfs_buf_unlock(bp);
-        XB_TRACE(bp, "no_daddr", data);
+        XB_TRACE(bp, "no_daddr", len);
        return bp;
 fail_free_mem:
-        kmem_free(data, malloc_len);
+        while (--i >= 0)
+                __free_page(bp->b_pages[i]);
+        _xfs_buf_free_pages(bp);
 fail_free_buf:
-        xfs_buf_free(bp);
+        xfs_buf_deallocate(bp);
 fail:
        return NULL;
 }
@@ -1453,6 +1447,7 @@ xfs_free_buftarg(
        int                     external)
 {
        xfs_flush_buftarg(btp, 1);
+        xfs_blkdev_issue_flush(btp);
        if (external)
                xfs_blkdev_put(btp->bt_bdev);
        xfs_free_bufhash(btp);
@@ -1837,7 +1832,7 @@ xfs_buf_init(void)
        if (!xfsdatad_workqueue)
                goto out_destroy_xfslogd_workqueue;
-        xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup);
+        xfs_buf_shake = set_shrinker(DEFAULT_SEEKS, xfsbufd_wakeup);
        if (!xfs_buf_shake)
                goto out_destroy_xfsdatad_workqueue;
@@ -1859,7 +1854,7 @@ xfs_buf_init(void)
 void
 xfs_buf_terminate(void)
 {
-        kmem_shake_deregister(xfs_buf_shake);
+        remove_shrinker(xfs_buf_shake);
        destroy_workqueue(xfsdatad_workqueue);
        destroy_workqueue(xfslogd_workqueue);
        kmem_zone_destroy(xfs_buf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index b6241f6201a5..b5908a34b15d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -63,7 +63,7 @@ typedef enum {
        /* flags used only internally */
        _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache                 */
-        _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()              */
+        _XBF_PAGES = (1 << 18),     /* backed by refcounted pages          */
        _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue         */
        _XBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue             */
 } xfs_buf_flags_t;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index cb51dc961355..cbcd40c8c2a0 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -124,30 +124,6 @@ xfs_file_aio_write_invis(
 }
 STATIC ssize_t
-xfs_file_sendfile(
-        struct file             *filp,
-        loff_t                  *pos,
-        size_t                  count,
-        read_actor_t            actor,
-        void                    *target)
-{
-        return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
-                                filp, pos, 0, count, actor, target, NULL);
-}
-STATIC ssize_t
-xfs_file_sendfile_invis(
-        struct file             *filp,
-        loff_t                  *pos,
-        size_t                  count,
-        read_actor_t            actor,
-        void                    *target)
-{
-        return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
-                                filp, pos, IO_INVIS, count, actor, target, NULL);
-}
-STATIC ssize_t
 xfs_file_splice_read(
        struct file             *infilp,
        loff_t                  *ppos,
@@ -208,15 +184,6 @@ xfs_file_open(
 }
 STATIC int
-xfs_file_close(
-        struct file     *filp,
-        fl_owner_t      id)
-{
-        return -bhv_vop_close(vn_from_inode(filp->f_path.dentry->d_inode), 0,
-                                file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL);
-}
-STATIC int
 xfs_file_release(
        struct inode    *inode,
        struct file     *filp)
@@ -452,7 +419,6 @@ const struct file_operations xfs_file_operations = {
        .write          = do_sync_write,
        .aio_read       = xfs_file_aio_read,
        .aio_write      = xfs_file_aio_write,
-        .sendfile       = xfs_file_sendfile,
        .splice_read    = xfs_file_splice_read,
        .splice_write   = xfs_file_splice_write,
        .unlocked_ioctl = xfs_file_ioctl,
@@ -461,7 +427,6 @@ const struct file_operations xfs_file_operations = {
 #endif
        .mmap           = xfs_file_mmap,
        .open           = xfs_file_open,
-        .flush          = xfs_file_close,
        .release        = xfs_file_release,
        .fsync          = xfs_file_fsync,
 #ifdef HAVE_FOP_OPEN_EXEC
@@ -475,7 +440,6 @@ const struct file_operations xfs_invis_file_operations = {
        .write          = do_sync_write,
        .aio_read       = xfs_file_aio_read_invis,
        .aio_write      = xfs_file_aio_write_invis,
-        .sendfile       = xfs_file_sendfile_invis,
        .splice_read    = xfs_file_splice_read_invis,
        .splice_write   = xfs_file_splice_write_invis,
        .unlocked_ioctl = xfs_file_ioctl_invis,
@@ -484,7 +448,6 @@ const struct file_operations xfs_invis_file_operations = {
 #endif
        .mmap           = xfs_file_mmap,
        .open           = xfs_file_open,
-        .flush          = xfs_file_close,
        .release        = xfs_file_release,
        .fsync          = xfs_file_fsync,
 };
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index ed3a5e1b4b67..bb72c3d4141f 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -46,6 +46,7 @@ xfs_param_t xfs_params = {
        .inherit_nosym  = {     0,              0,              1       },
        .rotorstep      = {     1,              1,              255     },
        .inherit_nodfrg = {     0,              1,              1       },
+        .fstrm_timer    = {     1,              50,             3600*100},
 };
 /*
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index ff5c41ff8d40..5917808abbd6 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -1019,7 +1019,7 @@ xfs_ioc_bulkstat(
        if (cmd == XFS_IOC_FSINUMBERS)
                error = xfs_inumbers(mp, &inlast, &count,
-                                                bulkreq.ubuffer);
+                                        bulkreq.ubuffer, xfs_inumbers_fmt);
        else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
                error = xfs_bulkstat_single(mp, &inlast,
                                                bulkreq.ubuffer, &done);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b83cebc165f1..141cf15067c2 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -23,10 +23,25 @@
 #include <linux/fs.h>
 #include <asm/uaccess.h>
 #include "xfs.h"
-#include "xfs_types.h"
 #include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir2_sf.h"
 #include "xfs_vfs.h"
 #include "xfs_vnode.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_error.h"
 #include "xfs_dfrag.h"
 #define  _NATIVE_IOC(cmd, type) \
@@ -34,6 +49,7 @@
 #if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
 #define BROKEN_X86_ALIGNMENT
+#define _PACKED __attribute__((packed))
 /* on ia32 l_start is on a 32-bit boundary */
 typedef struct xfs_flock64_32 {
        __s16           l_type;
@@ -75,35 +91,276 @@ xfs_ioctl32_flock(
        return (unsigned long)p;
 }
+typedef struct compat_xfs_fsop_geom_v1 {
+        __u32           blocksize;      /* filesystem (data) block size */
+        __u32           rtextsize;      /* realtime extent size         */
+        __u32           agblocks;       /* fsblocks in an AG            */
+        __u32           agcount;        /* number of allocation groups  */
+        __u32           logblocks;      /* fsblocks in the log          */
+        __u32           sectsize;       /* (data) sector size, bytes    */
+        __u32           inodesize;      /* inode size in bytes          */
+        __u32           imaxpct;        /* max allowed inode space(%)   */
+        __u64           datablocks;     /* fsblocks in data subvolume   */
+        __u64           rtblocks;       /* fsblocks in realtime subvol  */
+        __u64           rtextents;      /* rt extents in realtime subvol*/
+        __u64           logstart;       /* starting fsblock of the log  */
+        unsigned char   uuid[16];       /* unique id of the filesystem  */
+        __u32           sunit;          /* stripe unit, fsblocks        */
+        __u32           swidth;         /* stripe width, fsblocks       */
+        __s32           version;        /* structure version            */
+        __u32           flags;          /* superblock version flags     */
+        __u32           logsectsize;    /* log sector size, bytes       */
+        __u32           rtsectsize;     /* realtime sector size, bytes  */
+        __u32           dirblocksize;   /* directory block size, bytes  */
+} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
+#define XFS_IOC_FSGEOMETRY_V1_32  \
+        _IOR ('X', 100, struct compat_xfs_fsop_geom_v1)
+STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg)
+{
+        compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg;
+        xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p));
+        if (copy_in_user(p, p32, sizeof(*p32)))
+                return -EFAULT;
+        return (unsigned long)p;
+}
+typedef struct compat_xfs_inogrp {
+        __u64           xi_startino;    /* starting inode number        */
+        __s32           xi_alloccount;  /* # bits set in allocmask      */
+        __u64           xi_allocmask;   /* mask of allocated inodes     */
+} __attribute__((packed)) compat_xfs_inogrp_t;
+STATIC int xfs_inumbers_fmt_compat(
+        void __user *ubuffer,
+        const xfs_inogrp_t *buffer,
+        long count,
+        long *written)
+{
+        compat_xfs_inogrp_t *p32 = ubuffer;
+        long i;
+        for (i = 0; i < count; i++) {
+                if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
+                    put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
+                    put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
+                        return -EFAULT;
+        }
+        *written = count * sizeof(*p32);
+        return 0;
+}
 #else
-typedef struct xfs_fsop_bulkreq32 {
+#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
+#define _PACKED
+#endif
+/* XFS_IOC_FSBULKSTAT and friends */
+typedef struct compat_xfs_bstime {
+        __s32           tv_sec;         /* seconds              */
+        __s32           tv_nsec;        /* and nanoseconds      */
+} compat_xfs_bstime_t;
+STATIC int xfs_bstime_store_compat(
+        compat_xfs_bstime_t __user *p32,
+        const xfs_bstime_t *p)
+{
+        __s32 sec32;
+        sec32 = p->tv_sec;
+        if (put_user(sec32, &p32->tv_sec) ||
+            put_user(p->tv_nsec, &p32->tv_nsec))
+                return -EFAULT;
+        return 0;
+}
+typedef struct compat_xfs_bstat {
+        __u64           bs_ino;         /* inode number                 */
+        __u16           bs_mode;        /* type and mode                */
+        __u16           bs_nlink;       /* number of links              */
+        __u32           bs_uid;         /* user id                      */
+        __u32           bs_gid;         /* group id                     */
+        __u32           bs_rdev;        /* device value                 */
+        __s32           bs_blksize;     /* block size                   */
+        __s64           bs_size;        /* file size                    */
+        compat_xfs_bstime_t bs_atime;   /* access time                  */
+        compat_xfs_bstime_t bs_mtime;   /* modify time                  */
+        compat_xfs_bstime_t bs_ctime;   /* inode change time            */
+        int64_t         bs_blocks;      /* number of blocks             */
+        __u32           bs_xflags;      /* extended flags               */
+        __s32           bs_extsize;     /* extent size                  */
+        __s32           bs_extents;     /* number of extents            */
+        __u32           bs_gen;         /* generation count             */
+        __u16           bs_projid;      /* project id                   */
+        unsigned char   bs_pad[14];     /* pad space, unused            */
+        __u32           bs_dmevmask;    /* DMIG event mask              */
+        __u16           bs_dmstate;     /* DMIG state info              */
+        __u16           bs_aextents;    /* attribute number of extents  */
+} _PACKED compat_xfs_bstat_t;
+STATIC int xfs_bulkstat_one_fmt_compat(
+        void                    __user *ubuffer,
+        const xfs_bstat_t       *buffer)
+{
+        compat_xfs_bstat_t __user *p32 = ubuffer;
+        if (put_user(buffer->bs_ino, &p32->bs_ino) ||
+            put_user(buffer->bs_mode, &p32->bs_mode) ||
+            put_user(buffer->bs_nlink, &p32->bs_nlink) ||
+            put_user(buffer->bs_uid, &p32->bs_uid) ||
+            put_user(buffer->bs_gid, &p32->bs_gid) ||
+            put_user(buffer->bs_rdev, &p32->bs_rdev) ||
+            put_user(buffer->bs_blksize, &p32->bs_blksize) ||
+            put_user(buffer->bs_size, &p32->bs_size) ||
+            xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
+            xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
+            xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
+            put_user(buffer->bs_blocks, &p32->bs_blocks) ||
+            put_user(buffer->bs_xflags, &p32->bs_xflags) ||
+            put_user(buffer->bs_extsize, &p32->bs_extsize) ||
+            put_user(buffer->bs_extents, &p32->bs_extents) ||
+            put_user(buffer->bs_gen, &p32->bs_gen) ||
+            put_user(buffer->bs_projid, &p32->bs_projid) ||
+            put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
+            put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
+            put_user(buffer->bs_aextents, &p32->bs_aextents))
+                return -EFAULT;
+        return sizeof(*p32);
+}
+typedef struct compat_xfs_fsop_bulkreq {
        compat_uptr_t   lastip;         /* last inode # pointer         */
        __s32           icount;         /* count of entries in buffer   */
        compat_uptr_t   ubuffer;        /* user buffer for inode desc.  */
-        __s32           ocount;         /* output count pointer         */
+        compat_uptr_t   ocount;         /* output count pointer         */
-} xfs_fsop_bulkreq32_t;
+} compat_xfs_fsop_bulkreq_t;
-STATIC unsigned long
+#define XFS_IOC_FSBULKSTAT_32 \
-xfs_ioctl32_bulkstat(
+        _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
-        unsigned long           arg)
+#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
+        _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSINUMBERS_32 \
+        _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
+/* copied from xfs_ioctl.c */
+STATIC int
+xfs_ioc_bulkstat_compat(
+        xfs_mount_t             *mp,
+        unsigned int            cmd,
+        void                    __user *arg)
 {
-        xfs_fsop_bulkreq32_t    __user *p32 = (void __user *)arg;
+        compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg;
-        xfs_fsop_bulkreq_t      __user *p = compat_alloc_user_space(sizeof(*p));
        u32                     addr;
+        xfs_fsop_bulkreq_t      bulkreq;
+        int                     count;  /* # of records returned */
+        xfs_ino_t               inlast; /* last inode number */
+        int                     done;
+        int                     error;
+        /* done = 1 if there are more stats to get and if bulkstat */
+        /* should be called again (unused here, but used in dmapi) */
+        if (!capable(CAP_SYS_ADMIN))
+                return -EPERM;
+        if (XFS_FORCED_SHUTDOWN(mp))
+                return -XFS_ERROR(EIO);
+        if (get_user(addr, &p32->lastip))
+                return -EFAULT;
+        bulkreq.lastip = compat_ptr(addr);
+        if (get_user(bulkreq.icount, &p32->icount) ||
+            get_user(addr, &p32->ubuffer))
+                return -EFAULT;
+        bulkreq.ubuffer = compat_ptr(addr);
+        if (get_user(addr, &p32->ocount))
+                return -EFAULT;
+        bulkreq.ocount = compat_ptr(addr);
+        if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+                return -XFS_ERROR(EFAULT);
+        if ((count = bulkreq.icount) <= 0)
+                return -XFS_ERROR(EINVAL);
+        if (cmd == XFS_IOC_FSINUMBERS)
+                error = xfs_inumbers(mp, &inlast, &count,
+                                bulkreq.ubuffer, xfs_inumbers_fmt_compat);
+        else {
+                /* declare a var to get a warning in case the type changes */
+                bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat;
+                error = xfs_bulkstat(mp, &inlast, &count,
+                        xfs_bulkstat_one, formatter,
+                        sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
+                        BULKSTAT_FG_QUICK, &done);
+        }
+        if (error)
+                return -error;
+        if (bulkreq.ocount != NULL) {
+                if (copy_to_user(bulkreq.lastip, &inlast,
+                                                sizeof(xfs_ino_t)))
+                        return -XFS_ERROR(EFAULT);
+                if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+                        return -XFS_ERROR(EFAULT);
+        }
+        return 0;
+}
+typedef struct compat_xfs_fsop_handlereq {
+        __u32           fd;             /* fd for FD_TO_HANDLE          */
+        compat_uptr_t   path;           /* user pathname                */
+        __u32           oflags;         /* open flags                   */
+        compat_uptr_t   ihandle;        /* user supplied handle         */
+        __u32           ihandlen;       /* user supplied length         */
+        compat_uptr_t   ohandle;        /* user buffer for handle       */
+        compat_uptr_t   ohandlen;       /* user buffer length           */
+} compat_xfs_fsop_handlereq_t;
+#define XFS_IOC_PATH_TO_FSHANDLE_32 \
+        _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_PATH_TO_HANDLE_32 \
+        _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_FD_TO_HANDLE_32 \
+        _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_OPEN_BY_HANDLE_32 \
+        _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_READLINK_BY_HANDLE_32 \
+        _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
+STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg)
+{
+        compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg;
+        xfs_fsop_handlereq_t __user *p = compat_alloc_user_space(sizeof(*p));
+        u32 addr;
-        if (get_user(addr, &p32->lastip) ||
+        if (copy_in_user(&p->fd, &p32->fd, sizeof(__u32)) ||
-            put_user(compat_ptr(addr), &p->lastip) ||
+            get_user(addr, &p32->path) ||
-            copy_in_user(&p->icount, &p32->icount, sizeof(s32)) ||
+            put_user(compat_ptr(addr), &p->path) ||
-            get_user(addr, &p32->ubuffer) ||
+            copy_in_user(&p->oflags, &p32->oflags, sizeof(__u32)) ||
-            put_user(compat_ptr(addr), &p->ubuffer) ||
+            get_user(addr, &p32->ihandle) ||
-            get_user(addr, &p32->ocount) ||
+            put_user(compat_ptr(addr), &p->ihandle) ||
-            put_user(compat_ptr(addr), &p->ocount))
+            copy_in_user(&p->ihandlen, &p32->ihandlen, sizeof(__u32)) ||
+            get_user(addr, &p32->ohandle) ||
+            put_user(compat_ptr(addr), &p->ohandle) ||
+            get_user(addr, &p32->ohandlen) ||
+            put_user(compat_ptr(addr), &p->ohandlen))
                return -EFAULT;
        return (unsigned long)p;
 }
-#endif
 STATIC long
 xfs_compat_ioctl(
@@ -118,7 +375,6 @@ xfs_compat_ioctl(
        switch (cmd) {
        case XFS_IOC_DIOINFO:
-        case XFS_IOC_FSGEOMETRY_V1:
        case XFS_IOC_FSGEOMETRY:
        case XFS_IOC_GETVERSION:
        case XFS_IOC_GETXFLAGS:
@@ -131,12 +387,7 @@ xfs_compat_ioctl(
        case XFS_IOC_GETBMAPA:
        case XFS_IOC_GETBMAPX:
 /* not handled
-        case XFS_IOC_FD_TO_HANDLE:
-        case XFS_IOC_PATH_TO_HANDLE:
-        case XFS_IOC_PATH_TO_FSHANDLE:
-        case XFS_IOC_OPEN_BY_HANDLE:
        case XFS_IOC_FSSETDM_BY_HANDLE:
-        case XFS_IOC_READLINK_BY_HANDLE:
        case XFS_IOC_ATTRLIST_BY_HANDLE:
        case XFS_IOC_ATTRMULTI_BY_HANDLE:
 */
@@ -166,6 +417,10 @@ xfs_compat_ioctl(
                arg = xfs_ioctl32_flock(arg);
                cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
                break;
+        case XFS_IOC_FSGEOMETRY_V1_32:
+                arg = xfs_ioctl32_geom_v1(arg);
+                cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1);
+                break;
 #else /* These are handled fine if no alignment issues */
        case XFS_IOC_ALLOCSP:
@@ -176,18 +431,28 @@ xfs_compat_ioctl(
        case XFS_IOC_FREESP64:
        case XFS_IOC_RESVSP64:
        case XFS_IOC_UNRESVSP64:
+        case XFS_IOC_FSGEOMETRY_V1:
                break;
        /* xfs_bstat_t still has wrong u32 vs u64 alignment */
        case XFS_IOC_SWAPEXT:
                break;
-        case XFS_IOC_FSBULKSTAT_SINGLE:
-        case XFS_IOC_FSBULKSTAT:
-        case XFS_IOC_FSINUMBERS:
-                arg = xfs_ioctl32_bulkstat(arg);
-                break;
 #endif
+        case XFS_IOC_FSBULKSTAT_32:
+        case XFS_IOC_FSBULKSTAT_SINGLE_32:
+        case XFS_IOC_FSINUMBERS_32:
+                cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq);
+                return xfs_ioc_bulkstat_compat(XFS_BHVTOI(VNHEAD(vp))->i_mount,
+                                cmd, (void*)arg);
+        case XFS_IOC_FD_TO_HANDLE_32:
+        case XFS_IOC_PATH_TO_HANDLE_32:
+        case XFS_IOC_PATH_TO_FSHANDLE_32:
+        case XFS_IOC_OPEN_BY_HANDLE_32:
+        case XFS_IOC_READLINK_BY_HANDLE_32:
+                arg = xfs_ioctl32_fshandle(arg);
+                cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
+                break;
        default:
                return -ENOIOCTLCMD;
        }
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 715adad7dd4d..330c4ba9d404 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -101,7 +101,6 @@
 * Feature macros (disable/enable)
 */
 #undef  HAVE_REFCACHE   /* reference cache not needed for NFS in 2.6 */
-#define HAVE_SENDFILE   /* sendfile(2) exists in 2.6, but not in 2.4 */
 #define HAVE_SPLICE     /* a splice(2) exists in 2.6, but not in 2.4 */
 #ifdef CONFIG_SMP
 #define HAVE_PERCPU_SB  /* per cpu superblock counters are a 2.6 feature */
@@ -124,6 +123,7 @@
 #define xfs_inherit_nosymlinks  xfs_params.inherit_nosym.val
 #define xfs_rotorstep           xfs_params.rotorstep.val
 #define xfs_inherit_nodefrag    xfs_params.inherit_nodfrg.val
+#define xfs_fstrm_centisecs     xfs_params.fstrm_timer.val
 #define current_cpu()           (raw_smp_processor_id())
 #define current_pid()           (current->pid)
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 86fb671a8bcc..765ec16a6e39 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -159,7 +159,7 @@ xfs_iozero(
                if (status)
                        goto unlock;
-                memclear_highpage_flush(page, offset, bytes);
+                zero_user_page(page, offset, bytes, KM_USER0);
                status = mapping->a_ops->commit_write(NULL, page, offset,
                                                        offset + bytes);
@@ -287,50 +287,6 @@ xfs_read(
 }
 ssize_t
-xfs_sendfile(
-        bhv_desc_t              *bdp,
-        struct file             *filp,
-        loff_t                  *offset,
-        int                     ioflags,
-        size_t                  count,
-        read_actor_t            actor,
-        void                    *target,
-        cred_t                  *credp)
-{
-        xfs_inode_t             *ip = XFS_BHVTOI(bdp);
-        xfs_mount_t             *mp = ip->i_mount;
-        ssize_t                 ret;
-        XFS_STATS_INC(xs_read_calls);
-        if (XFS_FORCED_SHUTDOWN(mp))
-                return -EIO;
-        xfs_ilock(ip, XFS_IOLOCK_SHARED);
-        if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
-            (!(ioflags & IO_INVIS))) {
-                bhv_vrwlock_t locktype = VRWLOCK_READ;
-                int error;
-                error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
-                                      *offset, count,
-                                      FILP_DELAY_FLAG(filp), &locktype);
-                if (error) {
-                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-                        return -error;
-                }
-        }
-        xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
-                   (void *)(unsigned long)target, count, *offset, ioflags);
-        ret = generic_file_sendfile(filp, offset, count, actor, target);
-        if (ret > 0)
-                XFS_STATS_ADD(xs_read_bytes, ret);
-        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-        return ret;
-}
-ssize_t
 xfs_splice_read(
        bhv_desc_t              *bdp,
        struct file             *infilp,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index 7ac51b1d2161..7c60a1eed88b 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -90,9 +90,6 @@ extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
 extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
                                const struct iovec *, unsigned int,
                                loff_t *, int, struct cred *);
-extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
-                                loff_t *, int, size_t, read_actor_t,
-                                void *, struct cred *);
 extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *,
                                struct pipe_inode_info *, size_t, int, int,
                                struct cred *);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index bf9a9d5909be..06894cf00b12 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -547,7 +547,8 @@ vfs_sync_worker(
        if (!(vfsp->vfs_flag & VFS_RDONLY))
                error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \
-                                        SYNC_ATTR | SYNC_REFCACHE, NULL);
+                                        SYNC_ATTR | SYNC_REFCACHE | SYNC_SUPER,
+                                        NULL);
        vfsp->vfs_sync_seq++;
        wake_up(&vfsp->vfs_wait_single_sync_task);
 }
@@ -663,7 +664,7 @@ xfs_fs_sync_super(
                 * occur here so don't bother flushing the buftarg (i.e
                 * SYNC_QUIESCE) because it'll just get dirty again.
                 */
-                flags = SYNC_FSDATA | SYNC_DELWRI | SYNC_WAIT | SYNC_IOWAIT;
+                flags = SYNC_DATA_QUIESCE;
        } else
                flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index cd6eaa44aa2b..bb997d75c05c 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -210,6 +210,17 @@ static ctl_table xfs_table[] = {
                .extra1         = &xfs_params.inherit_nodfrg.min,
                .extra2         = &xfs_params.inherit_nodfrg.max
        },
+        {
+                .ctl_name       = XFS_FILESTREAM_TIMER,
+                .procname       = "filestream_centisecs",
+                .data           = &xfs_params.fstrm_timer.val,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec_minmax,
+                .strategy       = &sysctl_intvec,
+                .extra1         = &xfs_params.fstrm_timer.min,
+                .extra2         = &xfs_params.fstrm_timer.max,
+        },
        /* please keep this the last entry */
 #ifdef CONFIG_PROC_FS
        {
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index a631fb8cc5ac..98b97e399d6f 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -47,6 +47,7 @@ typedef struct xfs_param {
        xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
        xfs_sysctl_val_t rotorstep;     /* inode32 AG rotoring control knob */
        xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
+        xfs_sysctl_val_t fstrm_timer;   /* Filestream dir-AG assoc'n timeout. */
 } xfs_param_t;
 /*
@@ -86,6 +87,7 @@ enum {
        XFS_INHERIT_NOSYM = 19,
        XFS_ROTORSTEP = 20,
        XFS_INHERIT_NODFRG = 21,
+        XFS_FILESTREAM_TIMER = 22,
 };
 extern xfs_param_t      xfs_params;
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index e2c2ce98ab5b..dca3481aaafa 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -92,6 +92,21 @@ typedef enum {
 #define SYNC_REFCACHE           0x0040  /* prune some of the nfs ref cache */
 #define SYNC_REMOUNT            0x0080  /* remount readonly, no dummy LRs */
 #define SYNC_IOWAIT             0x0100  /* wait for all I/O to complete */
+#define SYNC_SUPER              0x0200  /* flush superblock to disk */
+/*
+ * When remounting a filesystem read-only or freezing the filesystem,
+ * we have two phases to execute. This first phase is syncing the data
+ * before we quiesce the fielsystem, and the second is flushing all the
+ * inodes out after we've waited for all the transactions created by
+ * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE
+ * to ensure that the inodes are written to their location on disk
+ * rather than just existing in transactions in the log. This means
+ * after a quiesce there is no log replay required to write the inodes
+ * to disk (this is the main difference between a sync and a quiesce).
+ */
+#define SYNC_DATA_QUIESCE       (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT)
+#define SYNC_INODE_QUIESCE      (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT)
 #define SHUTDOWN_META_IO_ERROR  0x0001  /* write attempt to metadata failed */
 #define SHUTDOWN_LOG_IO_ERROR   0x0002  /* write attempt to the log failed */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index d1b2d01843d1..5742d65f0785 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -129,19 +129,13 @@ typedef enum bhv_vchange {
        VCHANGE_FLAGS_IOEXCL_COUNT      = 4
 } bhv_vchange_t;
-typedef enum { L_FALSE, L_TRUE } lastclose_t;
 typedef int     (*vop_open_t)(bhv_desc_t *, struct cred *);
-typedef int     (*vop_close_t)(bhv_desc_t *, int, lastclose_t, struct cred *);
 typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
                                const struct iovec *, unsigned int,
                                loff_t *, int, struct cred *);
 typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
                                const struct iovec *, unsigned int,
                                loff_t *, int, struct cred *);
-typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
-                                loff_t *, int, size_t, read_actor_t,
-                                void *, struct cred *);
 typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *,
                                struct pipe_inode_info *, size_t, int, int,
                                struct cred *);
@@ -203,10 +197,8 @@ typedef int	(*vop_iflush_t)(bhv_desc_t *, int);
 typedef struct bhv_vnodeops {
        bhv_position_t  vn_position;    /* position within behavior chain */
        vop_open_t              vop_open;
-        vop_close_t             vop_close;
        vop_read_t              vop_read;
        vop_write_t             vop_write;
-        vop_sendfile_t          vop_sendfile;
        vop_splice_read_t       vop_splice_read;
        vop_splice_write_t      vop_splice_write;
        vop_ioctl_t             vop_ioctl;
@@ -249,13 +241,10 @@ typedef struct bhv_vnodeops {
 #define VNHEAD(vp)      ((vp)->v_bh.bh_first)
 #define VOP(op, vp)     (*((bhv_vnodeops_t *)VNHEAD(vp)->bd_ops)->op)
 #define bhv_vop_open(vp, cr)            VOP(vop_open, vp)(VNHEAD(vp),cr)
-#define bhv_vop_close(vp, f,last,cr)    VOP(vop_close, vp)(VNHEAD(vp),f,last,cr)
 #define bhv_vop_read(vp,file,iov,segs,offset,ioflags,cr)                \
                VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
 #define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr)               \
                VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
-#define bhv_vop_sendfile(vp,f,off,ioflags,cnt,act,targ,cr)              \
-                VOP(vop_sendfile, vp)(VNHEAD(vp),f,off,ioflags,cnt,act,targ,cr)
 #define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr)                 \
                VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr)
 #define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr)                \
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 3e4a8ad8a34c..7def4c699343 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -62,10 +62,9 @@ uint		ndquot;
 kmem_zone_t     *qm_dqzone;
 kmem_zone_t     *qm_dqtrxzone;
-static kmem_shaker_t    xfs_qm_shaker;
+static struct shrinker *xfs_qm_shaker;
 static cred_t   xfs_zerocr;
-static xfs_inode_t      xfs_zeroino;
 STATIC void     xfs_qm_list_init(xfs_dqlist_t *, char *, int);
 STATIC void     xfs_qm_list_destroy(xfs_dqlist_t *);
@@ -150,7 +149,7 @@ xfs_Gqm_init(void)
        } else
                xqm->qm_dqzone = qm_dqzone;
-        xfs_qm_shaker = kmem_shake_register(xfs_qm_shake);
+        xfs_qm_shaker = set_shrinker(DEFAULT_SEEKS, xfs_qm_shake);
        /*
         * The t_dqinfo portion of transactions.
@@ -182,7 +181,7 @@ xfs_qm_destroy(
        ASSERT(xqm != NULL);
        ASSERT(xqm->qm_nrefs == 0);
-        kmem_shake_deregister(xfs_qm_shaker);
+        remove_shrinker(xfs_qm_shaker);
        hsize = xqm->qm_dqhashmask + 1;
        for (i = 0; i < hsize; i++) {
                xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
@@ -1415,7 +1414,7 @@ xfs_qm_qino_alloc(
                return error;
        }
-        if ((error = xfs_dir_ialloc(&tp, &xfs_zeroino, S_IFREG, 1, 0,
+        if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
                                   &xfs_zerocr, 0, 1, ip, &committed))) {
                xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
                                 XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index bf0a12040b13..b5a7d92c6843 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -38,6 +38,7 @@
 #define XFS_RW_TRACE 1
 #define XFS_BUF_TRACE 1
 #define XFS_VNODE_TRACE 1
+#define XFS_FILESTREAMS_TRACE 1
 #endif
 #include <linux-2.6/xfs_linux.h>
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 9ece7f87ec5b..51c09c114a20 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -68,6 +68,7 @@ typedef struct xfs_agf {
        __be32          agf_flcount;    /* count of blocks in freelist */
        __be32          agf_freeblks;   /* total free blocks */
        __be32          agf_longest;    /* longest free space */
+        __be32          agf_btreeblks;  /* # of blocks held in AGF btrees */
 } xfs_agf_t;
 #define XFS_AGF_MAGICNUM        0x00000001
@@ -81,7 +82,8 @@ typedef struct xfs_agf {
 #define XFS_AGF_FLCOUNT         0x00000100
 #define XFS_AGF_FREEBLKS        0x00000200
 #define XFS_AGF_LONGEST         0x00000400
-#define XFS_AGF_NUM_BITS        11
+#define XFS_AGF_BTREEBLKS       0x00000800
+#define XFS_AGF_NUM_BITS        12
 #define XFS_AGF_ALL_BITS        ((1 << XFS_AGF_NUM_BITS) - 1)
 /* disk block (xfs_daddr_t) in the AG */
@@ -186,12 +188,15 @@ typedef struct xfs_perag
        __uint32_t      pagf_flcount;   /* count of blocks in freelist */
        xfs_extlen_t    pagf_freeblks;  /* total free blocks */
        xfs_extlen_t    pagf_longest;   /* longest free space */
+        __uint32_t      pagf_btreeblks; /* # of blocks held in AGF btrees */
        xfs_agino_t     pagi_freecount; /* number of free inodes */
+        xfs_agino_t     pagi_count;     /* number of allocated inodes */
+        int             pagb_count;     /* pagb slots in use */
 #ifdef __KERNEL__
        lock_t          pagb_lock;      /* lock for pagb_list */
 #endif
-        int             pagb_count;     /* pagb slots in use */
        xfs_perag_busy_t *pagb_list;    /* unstable blocks */
+        atomic_t        pagf_fstrms;    /* # of filestreams active in this AG */
 } xfs_perag_t;
 #define XFS_AG_MAXLEVELS(mp)            ((mp)->m_ag_maxlevels)
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 8e9a40aa0cd3..012a649a19c3 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -55,17 +55,17 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
 ktrace_t *xfs_alloc_trace_buf;
 #define TRACE_ALLOC(s,a)        \
-        xfs_alloc_trace_alloc(fname, s, a, __LINE__)
+        xfs_alloc_trace_alloc(__FUNCTION__, s, a, __LINE__)
 #define TRACE_FREE(s,a,b,x,f)   \
-        xfs_alloc_trace_free(fname, s, mp, a, b, x, f, __LINE__)
+        xfs_alloc_trace_free(__FUNCTION__, s, mp, a, b, x, f, __LINE__)
 #define TRACE_MODAGF(s,a,f)     \
-        xfs_alloc_trace_modagf(fname, s, mp, a, f, __LINE__)
+        xfs_alloc_trace_modagf(__FUNCTION__, s, mp, a, f, __LINE__)
-#define TRACE_BUSY(fname,s,ag,agb,l,sl,tp)      \
+#define TRACE_BUSY(__FUNCTION__,s,ag,agb,l,sl,tp)       \
-        xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
+        xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
-#define TRACE_UNBUSY(fname,s,ag,sl,tp)  \
+#define TRACE_UNBUSY(__FUNCTION__,s,ag,sl,tp)   \
-        xfs_alloc_trace_busy(fname, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
+        xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
-#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp)        \
+#define TRACE_BUSYSEARCH(__FUNCTION__,s,ag,agb,l,sl,tp) \
-        xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
+        xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
 #else
 #define TRACE_ALLOC(s,a)
 #define TRACE_FREE(s,a,b,x,f)
@@ -420,7 +420,7 @@ xfs_alloc_read_agfl(
 */
 STATIC void
 xfs_alloc_trace_alloc(
-        char            *name,          /* function tag string */
+        const char      *name,          /* function tag string */
        char            *str,           /* additional string */
        xfs_alloc_arg_t *args,          /* allocation argument structure */
        int             line)           /* source line number */
@@ -453,7 +453,7 @@ xfs_alloc_trace_alloc(
 */
 STATIC void
 xfs_alloc_trace_free(
-        char            *name,          /* function tag string */
+        const char      *name,          /* function tag string */
        char            *str,           /* additional string */
        xfs_mount_t     *mp,            /* file system mount point */
        xfs_agnumber_t  agno,           /* allocation group number */
@@ -479,7 +479,7 @@ xfs_alloc_trace_free(
 */
 STATIC void
 xfs_alloc_trace_modagf(
-        char            *name,          /* function tag string */
+        const char      *name,          /* function tag string */
        char            *str,           /* additional string */
        xfs_mount_t     *mp,            /* file system mount point */
        xfs_agf_t       *agf,           /* new agf value */
@@ -507,7 +507,7 @@ xfs_alloc_trace_modagf(
 STATIC void
 xfs_alloc_trace_busy(
-        char            *name,          /* function tag string */
+        const char      *name,          /* function tag string */
        char            *str,           /* additional string */
        xfs_mount_t     *mp,            /* file system mount point */
        xfs_agnumber_t  agno,           /* allocation group number */
@@ -549,9 +549,6 @@ xfs_alloc_ag_vextent(
        xfs_alloc_arg_t *args)  /* argument structure for allocation */
 {
        int             error=0;
-#ifdef XFS_ALLOC_TRACE
-        static char     fname[] = "xfs_alloc_ag_vextent";
-#endif
        ASSERT(args->minlen > 0);
        ASSERT(args->maxlen > 0);
@@ -635,9 +632,6 @@ xfs_alloc_ag_vextent_exact(
        xfs_agblock_t   fbno;   /* start block of found extent */
        xfs_agblock_t   fend;   /* end block of found extent */
        xfs_extlen_t    flen;   /* length of found extent */
-#ifdef XFS_ALLOC_TRACE
-        static char     fname[] = "xfs_alloc_ag_vextent_exact";
-#endif
        int             i;      /* success/failure of operation */
        xfs_agblock_t   maxend; /* end of maximal extent */
        xfs_agblock_t   minend; /* end of minimal extent */
@@ -737,9 +731,6 @@ xfs_alloc_ag_vextent_near(
        xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
        xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
        xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
-#ifdef XFS_ALLOC_TRACE
-        static char     fname[] = "xfs_alloc_ag_vextent_near";
-#endif
        xfs_agblock_t   gtbno;          /* start bno of right side entry */
        xfs_agblock_t   gtbnoa;         /* aligned ... */
        xfs_extlen_t    gtdiff;         /* difference to right side entry */
@@ -1270,9 +1261,6 @@ xfs_alloc_ag_vextent_size(
        int             error;          /* error result */
        xfs_agblock_t   fbno;           /* start of found freespace */
        xfs_extlen_t    flen;           /* length of found freespace */
-#ifdef XFS_ALLOC_TRACE
-        static char     fname[] = "xfs_alloc_ag_vextent_size";
-#endif
        int             i;              /* temp status variable */
        xfs_agblock_t   rbno;           /* returned block number */
        xfs_extlen_t    rlen;           /* length of returned extent */
@@ -1427,9 +1415,6 @@ xfs_alloc_ag_vextent_small(
        int             error;
        xfs_agblock_t   fbno;
        xfs_extlen_t    flen;
-#ifdef XFS_ALLOC_TRACE
-        static char     fname[] = "xfs_alloc_ag_vextent_small";
-#endif
        int             i;
        if ((error = xfs_alloc_decrement(ccur, 0, &i)))
@@ -1447,7 +1432,8 @@ xfs_alloc_ag_vextent_small(
        else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
                 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
                  > args->minleft)) {
-                if ((error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno)))
+                error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
+                if (error)
                        goto error0;
                if (fbno != NULLAGBLOCK) {
                        if (args->userdata) {
@@ -1515,9 +1501,6 @@ xfs_free_ag_extent(
        xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
        xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
        int             error;          /* error return value */
-#ifdef XFS_ALLOC_TRACE
-        static char     fname[] = "xfs_free_ag_extent";
-#endif
        xfs_agblock_t   gtbno;          /* start of right neighbor block */
        xfs_extlen_t    gtlen;          /* length of right neighbor block */
        int             haveleft;       /* have a left neighbor block */
@@ -1923,7 +1906,8 @@ xfs_alloc_fix_freelist(
        while (be32_to_cpu(agf->agf_flcount) > need) {
                xfs_buf_t       *bp;
-                if ((error = xfs_alloc_get_freelist(tp, agbp, &bno)))
+                error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
+                if (error)
                        return error;
                if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
                        return error;
@@ -1973,8 +1957,9 @@ xfs_alloc_fix_freelist(
                 * Put each allocated block on the list.
                 */
                for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
-                        if ((error = xfs_alloc_put_freelist(tp, agbp, agflbp,
+                        error = xfs_alloc_put_freelist(tp, agbp,
-                                        bno)))
+                                                        agflbp, bno, 0);
+                        if (error)
                                return error;
                }
        }
@@ -1991,16 +1976,15 @@ int				/* error */
 xfs_alloc_get_freelist(
        xfs_trans_t     *tp,    /* transaction pointer */
        xfs_buf_t       *agbp,  /* buffer containing the agf structure */
-        xfs_agblock_t   *bnop)  /* block address retrieved from freelist */
+        xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
+        int             btreeblk) /* destination is a AGF btree */
 {
        xfs_agf_t       *agf;   /* a.g. freespace structure */
        xfs_agfl_t      *agfl;  /* a.g. freelist structure */
        xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
        xfs_agblock_t   bno;    /* block number returned */
        int             error;
-#ifdef XFS_ALLOC_TRACE
+        int             logflags;
-        static char     fname[] = "xfs_alloc_get_freelist";
-#endif
        xfs_mount_t     *mp;    /* mount structure */
        xfs_perag_t     *pag;   /* per allocation group data */
@@ -2032,8 +2016,16 @@ xfs_alloc_get_freelist(
        be32_add(&agf->agf_flcount, -1);
        xfs_trans_agflist_delta(tp, -1);
        pag->pagf_flcount--;
-        TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
-        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
+        logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
+        if (btreeblk) {
+                be32_add(&agf->agf_btreeblks, 1);
+                pag->pagf_btreeblks++;
+                logflags |= XFS_AGF_BTREEBLKS;
+        }
+        TRACE_MODAGF(NULL, agf, logflags);
+        xfs_alloc_log_agf(tp, agbp, logflags);
        *bnop = bno;
        /*
@@ -2071,6 +2063,7 @@ xfs_alloc_log_agf(
                offsetof(xfs_agf_t, agf_flcount),
                offsetof(xfs_agf_t, agf_freeblks),
                offsetof(xfs_agf_t, agf_longest),
+                offsetof(xfs_agf_t, agf_btreeblks),
                sizeof(xfs_agf_t)
        };
@@ -2106,15 +2099,14 @@ xfs_alloc_put_freelist(
        xfs_trans_t             *tp,    /* transaction pointer */
        xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
        xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
-        xfs_agblock_t           bno)    /* block being freed */
+        xfs_agblock_t           bno,    /* block being freed */
+        int                     btreeblk) /* block came from a AGF btree */
 {
        xfs_agf_t               *agf;   /* a.g. freespace structure */
        xfs_agfl_t              *agfl;  /* a.g. free block array */
        __be32                  *blockp;/* pointer to array entry */
        int                     error;
-#ifdef XFS_ALLOC_TRACE
+        int                     logflags;
-        static char             fname[] = "xfs_alloc_put_freelist";
-#endif
        xfs_mount_t             *mp;    /* mount structure */
        xfs_perag_t             *pag;   /* per allocation group data */
@@ -2132,11 +2124,22 @@ xfs_alloc_put_freelist(
        be32_add(&agf->agf_flcount, 1);
        xfs_trans_agflist_delta(tp, 1);
        pag->pagf_flcount++;
+        logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
+        if (btreeblk) {
+                be32_add(&agf->agf_btreeblks, -1);
+                pag->pagf_btreeblks--;
+                logflags |= XFS_AGF_BTREEBLKS;
+        }
+        TRACE_MODAGF(NULL, agf, logflags);
+        xfs_alloc_log_agf(tp, agbp, logflags);
        ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
        blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
        *blockp = cpu_to_be32(bno);
-        TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
+        TRACE_MODAGF(NULL, agf, logflags);
-        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
+        xfs_alloc_log_agf(tp, agbp, logflags);
        xfs_trans_log_buf(tp, agflbp,
                (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
                (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
@@ -2196,6 +2199,7 @@ xfs_alloc_read_agf(
        pag = &mp->m_perag[agno];
        if (!pag->pagf_init) {
                pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
+                pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
                pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
                pag->pagf_longest = be32_to_cpu(agf->agf_longest);
                pag->pagf_levels[XFS_BTNUM_BNOi] =
@@ -2235,9 +2239,6 @@ xfs_alloc_vextent(
        xfs_agblock_t   agsize; /* allocation group size */
        int             error;
        int             flags;  /* XFS_ALLOC_FLAG_... locking flags */
-#ifdef XFS_ALLOC_TRACE
-        static char     fname[] = "xfs_alloc_vextent";
-#endif
        xfs_extlen_t    minleft;/* minimum left value, temp copy */
        xfs_mount_t     *mp;    /* mount structure pointer */
        xfs_agnumber_t  sagno;  /* starting allocation group number */
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 5a4256120ccc..5aec15d0651e 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -136,7 +136,8 @@ int				/* error */
 xfs_alloc_get_freelist(
        struct xfs_trans *tp,   /* transaction pointer */
        struct xfs_buf  *agbp,  /* buffer containing the agf structure */
-        xfs_agblock_t   *bnop); /* block address retrieved from freelist */
+        xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
+        int             btreeblk); /* destination is a AGF btree */
 /*
 * Log the given fields from the agf structure.
@@ -165,7 +166,8 @@ xfs_alloc_put_freelist(
        struct xfs_trans *tp,   /* transaction pointer */
        struct xfs_buf  *agbp,  /* buffer for a.g. freelist header */
        struct xfs_buf  *agflbp,/* buffer for a.g. free block array */
-        xfs_agblock_t   bno);   /* block being freed */
+        xfs_agblock_t   bno,    /* block being freed */
+        int             btreeblk); /* owner was a AGF btree */
 /*
 * Read in the allocation group header (free/alloc section).
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 74cadf95d4e8..1603ce595853 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -226,8 +226,9 @@ xfs_alloc_delrec(
                        /*
                         * Put this buffer/block on the ag's freelist.
                         */
-                        if ((error = xfs_alloc_put_freelist(cur->bc_tp,
+                        error = xfs_alloc_put_freelist(cur->bc_tp,
-                                        cur->bc_private.a.agbp, NULL, bno)))
+                                        cur->bc_private.a.agbp, NULL, bno, 1);
+                        if (error)
                                return error;
                        /*
                         * Since blocks move to the free list without the
@@ -549,8 +550,9 @@ xfs_alloc_delrec(
        /*
         * Free the deleting block by putting it on the freelist.
         */
-        if ((error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+        error = xfs_alloc_put_freelist(cur->bc_tp,
-                        NULL, rbno)))
+                                         cur->bc_private.a.agbp, NULL, rbno, 1);
+        if (error)
                return error;
        /*
         * Since blocks move to the free list without the coordination
@@ -1320,8 +1322,9 @@ xfs_alloc_newroot(
        /*
         * Get a buffer from the freelist blocks, for the new root.
         */
-        if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+        error = xfs_alloc_get_freelist(cur->bc_tp,
-                        &nbno)))
+                                        cur->bc_private.a.agbp, &nbno, 1);
+        if (error)
                return error;
        /*
         * None available, we fail.
@@ -1604,8 +1607,9 @@ xfs_alloc_split(
         * Allocate the new block from the freelist.
         * If we can't do it, we're toast.  Give up.
         */
-        if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+        error = xfs_alloc_get_freelist(cur->bc_tp,
-                        &rbno)))
+                                         cur->bc_private.a.agbp, &rbno, 1);
+        if (error)
                return error;
        if (rbno == NULLAGBLOCK) {
                *stat = 0;
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index 1afe07f67e3b..fab0b6d5a41b 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -66,44 +66,6 @@ static const char xfs_highbit[256] = {
 #endif
 /*
- * Count of bits set in byte, 0..8.
- */
-static const char xfs_countbit[256] = {
-        0, 1, 1, 2, 1, 2, 2, 3,                 /* 00 .. 07 */
-        1, 2, 2, 3, 2, 3, 3, 4,                 /* 08 .. 0f */
-        1, 2, 2, 3, 2, 3, 3, 4,                 /* 10 .. 17 */
-        2, 3, 3, 4, 3, 4, 4, 5,                 /* 18 .. 1f */
-        1, 2, 2, 3, 2, 3, 3, 4,                 /* 20 .. 27 */
-        2, 3, 3, 4, 3, 4, 4, 5,                 /* 28 .. 2f */
-        2, 3, 3, 4, 3, 4, 4, 5,                 /* 30 .. 37 */
-        3, 4, 4, 5, 4, 5, 5, 6,                 /* 38 .. 3f */
-        1, 2, 2, 3, 2, 3, 3, 4,                 /* 40 .. 47 */
-        2, 3, 3, 4, 3, 4, 4, 5,                 /* 48 .. 4f */
-        2, 3, 3, 4, 3, 4, 4, 5,                 /* 50 .. 57 */
-        3, 4, 4, 5, 4, 5, 5, 6,                 /* 58 .. 5f */
-        2, 3, 3, 4, 3, 4, 4, 5,                 /* 60 .. 67 */
-        3, 4, 4, 5, 4, 5, 5, 6,                 /* 68 .. 6f */
-        3, 4, 4, 5, 4, 5, 5, 6,                 /* 70 .. 77 */
-        4, 5, 5, 6, 5, 6, 6, 7,                 /* 78 .. 7f */
-        1, 2, 2, 3, 2, 3, 3, 4,                 /* 80 .. 87 */
-        2, 3, 3, 4, 3, 4, 4, 5,                 /* 88 .. 8f */
-        2, 3, 3, 4, 3, 4, 4, 5,                 /* 90 .. 97 */
-        3, 4, 4, 5, 4, 5, 5, 6,                 /* 98 .. 9f */
-        2, 3, 3, 4, 3, 4, 4, 5,                 /* a0 .. a7 */
-        3, 4, 4, 5, 4, 5, 5, 6,                 /* a8 .. af */
-        3, 4, 4, 5, 4, 5, 5, 6,                 /* b0 .. b7 */
-        4, 5, 5, 6, 5, 6, 6, 7,                 /* b8 .. bf */
-        2, 3, 3, 4, 3, 4, 4, 5,                 /* c0 .. c7 */
-        3, 4, 4, 5, 4, 5, 5, 6,                 /* c8 .. cf */
-        3, 4, 4, 5, 4, 5, 5, 6,                 /* d0 .. d7 */
-        4, 5, 5, 6, 5, 6, 6, 7,                 /* d8 .. df */
-        3, 4, 4, 5, 4, 5, 5, 6,                 /* e0 .. e7 */
-        4, 5, 5, 6, 5, 6, 6, 7,                 /* e8 .. ef */
-        4, 5, 5, 6, 5, 6, 6, 7,                 /* f0 .. f7 */
-        5, 6, 6, 7, 6, 7, 7, 8,                 /* f8 .. ff */
-};
-/*
 * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
 */
 inline int
@@ -167,56 +129,21 @@ xfs_highbit64(
 /*
- * Count the number of bits set in the bitmap starting with bit
+ * Return whether bitmap is empty.
- * start_bit.  Size is the size of the bitmap in words.
+ * Size is number of words in the bitmap, which is padded to word boundary
- *
+ * Returns 1 for empty, 0 for non-empty.
- * Do the counting by mapping a byte value to the number of set
- * bits for that value using the xfs_countbit array, i.e.
- * xfs_countbit[0] == 0, xfs_countbit[1] == 1, xfs_countbit[2] == 1,
- * xfs_countbit[3] == 2, etc.
 */
 int
-xfs_count_bits(uint *map, uint size, uint start_bit)
+xfs_bitmap_empty(uint *map, uint size)
 {
-        register int    bits;
+        uint i;
-        register unsigned char  *bytep;
+        uint ret = 0;
-        register unsigned char  *end_map;
-        int             byte_bit;
-        bits = 0;
-        end_map = (char*)(map + size);
-        bytep = (char*)(map + (start_bit & ~0x7));
-        byte_bit = start_bit & 0x7;
-        /*
-         * If the caller fell off the end of the map, return 0.
-         */
-        if (bytep >= end_map) {
-                return (0);
-        }
-        /*
-         * If start_bit is not byte aligned, then process the
-         * first byte separately.
-         */
-        if (byte_bit != 0) {
-                /*
-                 * Shift off the bits we don't want to look at,
-                 * before indexing into xfs_countbit.
-                 */
-                bits += xfs_countbit[(*bytep >> byte_bit)];
-                bytep++;
-        }
-        /*
+        for (i = 0; i < size; i++) {
-         * Count the bits in each byte until the end of the bitmap.
+                ret |= map[i];
-         */
-        while (bytep < end_map) {
-                bits += xfs_countbit[*bytep];
-                bytep++;
        }
-        return (bits);
+        return (ret == 0);
 }
 /*
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 0bbe56817542..082641a9782c 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -55,8 +55,8 @@ extern int xfs_lowbit64(__uint64_t v);
 /* Get high bit set out of 64-bit argument, -1 if none set */
 extern int xfs_highbit64(__uint64_t);
-/* Count set bits in map starting with start_bit */
+/* Return whether bitmap is empty (1 == empty) */
-extern int xfs_count_bits(uint *map, uint size, uint start_bit);
+extern int xfs_bitmap_empty(uint *map, uint size);
 /* Count continuous one bits in map starting with start_bit */
 extern int xfs_contig_bits(uint *map, uint size, uint start_bit);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index b1ea26e40aaf..94b5c5fe2681 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -52,6 +52,7 @@
 #include "xfs_quota.h"
 #include "xfs_trans_space.h"
 #include "xfs_buf_item.h"
+#include "xfs_filestream.h"
 #ifdef DEBUG
@@ -277,7 +278,7 @@ xfs_bmap_isaeof(
 STATIC void
 xfs_bmap_trace_addentry(
        int             opcode,         /* operation */
-        char            *fname,         /* function name */
+        const char      *fname,         /* function name */
        char            *desc,          /* operation description */
        xfs_inode_t     *ip,            /* incore inode pointer */
        xfs_extnum_t    idx,            /* index of entry(ies) */
@@ -291,7 +292,7 @@ xfs_bmap_trace_addentry(
 */
 STATIC void
 xfs_bmap_trace_delete(
-        char            *fname,         /* function name */
+        const char      *fname,         /* function name */
        char            *desc,          /* operation description */
        xfs_inode_t     *ip,            /* incore inode pointer */
        xfs_extnum_t    idx,            /* index of entry(entries) deleted */
@@ -304,7 +305,7 @@ xfs_bmap_trace_delete(
 */
 STATIC void
 xfs_bmap_trace_insert(
-        char            *fname,         /* function name */
+        const char      *fname,         /* function name */
        char            *desc,          /* operation description */
        xfs_inode_t     *ip,            /* incore inode pointer */
        xfs_extnum_t    idx,            /* index of entry(entries) inserted */
@@ -318,7 +319,7 @@ xfs_bmap_trace_insert(
 */
 STATIC void
 xfs_bmap_trace_post_update(
-        char            *fname,         /* function name */
+        const char      *fname,         /* function name */
        char            *desc,          /* operation description */
        xfs_inode_t     *ip,            /* incore inode pointer */
        xfs_extnum_t    idx,            /* index of entry updated */
@@ -329,17 +330,25 @@ xfs_bmap_trace_post_update(
 */
 STATIC void
 xfs_bmap_trace_pre_update(
-        char            *fname,         /* function name */
+        const char      *fname,         /* function name */
        char            *desc,          /* operation description */
        xfs_inode_t     *ip,            /* incore inode pointer */
        xfs_extnum_t    idx,            /* index of entry to be updated */
        int             whichfork);     /* data or attr fork */
+#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)       \
+        xfs_bmap_trace_delete(__FUNCTION__,d,ip,i,c,w)
+#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \
+        xfs_bmap_trace_insert(__FUNCTION__,d,ip,i,c,r1,r2,w)
+#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w)    \
+        xfs_bmap_trace_post_update(__FUNCTION__,d,ip,i,w)
+#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w)     \
+        xfs_bmap_trace_pre_update(__FUNCTION__,d,ip,i,w)
 #else
-#define xfs_bmap_trace_delete(f,d,ip,i,c,w)
+#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)
-#define xfs_bmap_trace_insert(f,d,ip,i,c,r1,r2,w)
+#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w)
-#define xfs_bmap_trace_post_update(f,d,ip,i,w)
+#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w)
-#define xfs_bmap_trace_pre_update(f,d,ip,i,w)
+#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w)
 #endif  /* XFS_BMAP_TRACE */
 /*
@@ -531,9 +540,6 @@ xfs_bmap_add_extent(
        xfs_filblks_t           da_new; /* new count del alloc blocks used */
        xfs_filblks_t           da_old; /* old count del alloc blocks used */
        int                     error;  /* error return value */
-#ifdef XFS_BMAP_TRACE
-        static char             fname[] = "xfs_bmap_add_extent";
-#endif
        xfs_ifork_t             *ifp;   /* inode fork ptr */
        int                     logflags; /* returned value */
        xfs_extnum_t            nextents; /* number of extents in file now */
@@ -551,8 +557,8 @@ xfs_bmap_add_extent(
         * already extents in the list.
         */
        if (nextents == 0) {
-                xfs_bmap_trace_insert(fname, "insert empty", ip, 0, 1, new,
+                XFS_BMAP_TRACE_INSERT("insert empty", ip, 0, 1, new, NULL,
-                        NULL, whichfork);
+                        whichfork);
                xfs_iext_insert(ifp, 0, 1, new);
                ASSERT(cur == NULL);
                ifp->if_lastex = 0;
@@ -710,9 +716,6 @@ xfs_bmap_add_extent_delay_real(
        int                     diff;   /* temp value */
        xfs_bmbt_rec_t          *ep;    /* extent entry for idx */
        int                     error;  /* error return value */
-#ifdef XFS_BMAP_TRACE
-        static char             fname[] = "xfs_bmap_add_extent_delay_real";
-#endif
        int                     i;      /* temp state */
        xfs_ifork_t             *ifp;   /* inode fork pointer */
        xfs_fileoff_t           new_endoff;     /* end offset of new entry */
@@ -808,15 +811,14 @@ xfs_bmap_add_extent_delay_real(
                 * Filling in all of a previously delayed allocation extent.
                 * The left and right neighbors are both contiguous with new.
                 */
-                xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+                XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1,
                        XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
                        LEFT.br_blockcount + PREV.br_blockcount +
                        RIGHT.br_blockcount);
-                xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+                XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1,
-                        XFS_DATA_FORK);
-                xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
                        XFS_DATA_FORK);
+                XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK);
                xfs_iext_remove(ifp, idx, 2);
                ip->i_df.if_lastex = idx - 1;
                ip->i_d.di_nextents--;
@@ -855,15 +857,14 @@ xfs_bmap_add_extent_delay_real(
                 * Filling in all of a previously delayed allocation extent.
                 * The left neighbor is contiguous, the right is not.
                 */
-                xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
+                XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1,
                        XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
                        LEFT.br_blockcount + PREV.br_blockcount);
-                xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
+                XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1,
                        XFS_DATA_FORK);
                ip->i_df.if_lastex = idx - 1;
-                xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
+                XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                xfs_iext_remove(ifp, idx, 1);
                if (cur == NULL)
                        rval = XFS_ILOG_DEXT;
@@ -892,16 +893,13 @@ xfs_bmap_add_extent_delay_real(
                 * Filling in all of a previously delayed allocation extent.
                 * The right neighbor is contiguous, the left is not.
                 */
-                xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx,
+                XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                xfs_bmbt_set_startblock(ep, new->br_startblock);
                xfs_bmbt_set_blockcount(ep,
                        PREV.br_blockcount + RIGHT.br_blockcount);
-                xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx,
+                XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                ip->i_df.if_lastex = idx;
-                xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
+                XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                xfs_iext_remove(ifp, idx + 1, 1);
                if (cur == NULL)
                        rval = XFS_ILOG_DEXT;
@@ -931,11 +929,9 @@ xfs_bmap_add_extent_delay_real(
                 * Neither the left nor right neighbors are contiguous with
                 * the new one.
                 */
-                xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx,
+                XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                xfs_bmbt_set_startblock(ep, new->br_startblock);
-                xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx,
+                XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                ip->i_df.if_lastex = idx;
                ip->i_d.di_nextents++;
                if (cur == NULL)
@@ -963,17 +959,14 @@ xfs_bmap_add_extent_delay_real(
                 * Filling in the first part of a previous delayed allocation.
                 * The left neighbor is contiguous.
                 */
-                xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
+                XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
                        LEFT.br_blockcount + new->br_blockcount);
                xfs_bmbt_set_startoff(ep,
                        PREV.br_startoff + new->br_blockcount);
-                xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1,
+                XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                temp = PREV.br_blockcount - new->br_blockcount;
-                xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx,
+                XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(ep, temp);
                ip->i_df.if_lastex = idx - 1;
                if (cur == NULL)
@@ -995,8 +988,7 @@ xfs_bmap_add_extent_delay_real(
                temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
                        STARTBLOCKVAL(PREV.br_startblock));
                xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-                xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
+                XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                *dnew = temp;
                /* DELTA: The boundary between two in-core extents moved. */
                temp = LEFT.br_startoff;
@@ -1009,11 +1001,11 @@ xfs_bmap_add_extent_delay_real(
                 * Filling in the first part of a previous delayed allocation.
                 * The left neighbor is not contiguous.
                 */
-                xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK);
+                XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK);
                xfs_bmbt_set_startoff(ep, new_endoff);
                temp = PREV.br_blockcount - new->br_blockcount;
                xfs_bmbt_set_blockcount(ep, temp);
-                xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
+                XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL,
                        XFS_DATA_FORK);
                xfs_iext_insert(ifp, idx, 1, new);
                ip->i_df.if_lastex = idx;
@@ -1046,8 +1038,7 @@ xfs_bmap_add_extent_delay_real(
                        (cur ? cur->bc_private.b.allocated : 0));
                ep = xfs_iext_get_ext(ifp, idx + 1);
                xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-                xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1,
+                XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx + 1, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                *dnew = temp;
                /* DELTA: One in-core extent is split in two. */
                temp = PREV.br_startoff;
@@ -1060,17 +1051,14 @@ xfs_bmap_add_extent_delay_real(
                 * The right neighbor is contiguous with the new allocation.
                 */
                temp = PREV.br_blockcount - new->br_blockcount;
-                xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx,
+                XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
+                XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK);
-                xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
-                        XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(ep, temp);
                xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
                        new->br_startoff, new->br_startblock,
                        new->br_blockcount + RIGHT.br_blockcount,
                        RIGHT.br_state);
-                xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
+                XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                ip->i_df.if_lastex = idx + 1;
                if (cur == NULL)
                        rval = XFS_ILOG_DEXT;
@@ -1091,8 +1079,7 @@ xfs_bmap_add_extent_delay_real(
                temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
                        STARTBLOCKVAL(PREV.br_startblock));
                xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-                xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
+                XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                *dnew = temp;
                /* DELTA: The boundary between two in-core extents moved. */
                temp = PREV.br_startoff;
@@ -1106,10 +1093,10 @@ xfs_bmap_add_extent_delay_real(
                 * The right neighbor is not contiguous.
                 */
                temp = PREV.br_blockcount - new->br_blockcount;
-                xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+                XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(ep, temp);
-                xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
+                XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL,
-                        new, NULL, XFS_DATA_FORK);
+                        XFS_DATA_FORK);
                xfs_iext_insert(ifp, idx + 1, 1, new);
                ip->i_df.if_lastex = idx + 1;
                ip->i_d.di_nextents++;
@@ -1141,7 +1128,7 @@ xfs_bmap_add_extent_delay_real(
                        (cur ? cur->bc_private.b.allocated : 0));
                ep = xfs_iext_get_ext(ifp, idx);
                xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-                xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+                XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK);
                *dnew = temp;
                /* DELTA: One in-core extent is split in two. */
                temp = PREV.br_startoff;
@@ -1155,7 +1142,7 @@ xfs_bmap_add_extent_delay_real(
                 * This case is avoided almost all the time.
                 */
                temp = new->br_startoff - PREV.br_startoff;
-                xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK);
+                XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(ep, temp);
                r[0] = *new;
                r[1].br_state = PREV.br_state;
@@ -1163,7 +1150,7 @@ xfs_bmap_add_extent_delay_real(
                r[1].br_startoff = new_endoff;
                temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
                r[1].br_blockcount = temp2;
-                xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
+                XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1],
                        XFS_DATA_FORK);
                xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
                ip->i_df.if_lastex = idx + 1;
@@ -1222,13 +1209,11 @@ xfs_bmap_add_extent_delay_real(
                }
                ep = xfs_iext_get_ext(ifp, idx);
                xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-                xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
+                XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK);
-                xfs_bmap_trace_pre_update(fname, "0", ip, idx + 2,
+                XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx + 2, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2),
                        NULLSTARTBLOCK((int)temp2));
-                xfs_bmap_trace_post_update(fname, "0", ip, idx + 2,
+                XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx + 2, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                *dnew = temp + temp2;
                /* DELTA: One in-core extent is split in three. */
                temp = PREV.br_startoff;
@@ -1287,9 +1272,6 @@ xfs_bmap_add_extent_unwritten_real(
        xfs_btree_cur_t         *cur;   /* btree cursor */
        xfs_bmbt_rec_t          *ep;    /* extent entry for idx */
        int                     error;  /* error return value */
-#ifdef XFS_BMAP_TRACE
-        static char             fname[] = "xfs_bmap_add_extent_unwritten_real";
-#endif
        int                     i;      /* temp state */
        xfs_ifork_t             *ifp;   /* inode fork pointer */
        xfs_fileoff_t           new_endoff;     /* end offset of new entry */
@@ -1390,15 +1372,14 @@ xfs_bmap_add_extent_unwritten_real(
                 * Setting all of a previous oldext extent to newext.
                 * The left and right neighbors are both contiguous with new.
                 */
-                xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+                XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1,
                        XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
                        LEFT.br_blockcount + PREV.br_blockcount +
                        RIGHT.br_blockcount);
-                xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+                XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1,
-                        XFS_DATA_FORK);
-                xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
                        XFS_DATA_FORK);
+                XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK);
                xfs_iext_remove(ifp, idx, 2);
                ip->i_df.if_lastex = idx - 1;
                ip->i_d.di_nextents -= 2;
@@ -1441,15 +1422,14 @@ xfs_bmap_add_extent_unwritten_real(
                 * Setting all of a previous oldext extent to newext.
                 * The left neighbor is contiguous, the right is not.
                 */
-                xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
+                XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1,
                        XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
                        LEFT.br_blockcount + PREV.br_blockcount);
-                xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
+                XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1,
                        XFS_DATA_FORK);
                ip->i_df.if_lastex = idx - 1;
-                xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
+                XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                xfs_iext_remove(ifp, idx, 1);
                ip->i_d.di_nextents--;
                if (cur == NULL)
@@ -1484,16 +1464,15 @@ xfs_bmap_add_extent_unwritten_real(
                 * Setting all of a previous oldext extent to newext.
                 * The right neighbor is contiguous, the left is not.
                 */
-                xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx,
+                XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx,
                        XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(ep,
                        PREV.br_blockcount + RIGHT.br_blockcount);
                xfs_bmbt_set_state(ep, newext);
-                xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx,
+                XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx,
                        XFS_DATA_FORK);
                ip->i_df.if_lastex = idx;
-                xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
+                XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK);
-                        XFS_DATA_FORK);
                xfs_iext_remove(ifp, idx + 1, 1);
                ip->i_d.di_nextents--;
                if (cur == NULL)
@@ -1529,10 +1508,10 @@ xfs_bmap_add_extent_unwritten_real(
                 * Neither the left nor right neighbors are contiguous with
                 * the new one.
                 */
-                xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx,
+                XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx,
                        XFS_DATA_FORK);
                xfs_bmbt_set_state(ep, newext);
-                xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx,
+                XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx,
                        XFS_DATA_FORK);
                ip->i_df.if_lastex = idx;
                if (cur == NULL)
@@ -1559,21 +1538,21 @@ xfs_bmap_add_extent_unwritten_real(
                 * Setting the first part of a previous oldext extent to newext.
                 * The left neighbor is contiguous.
                 */
-                xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
+                XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1,
                        XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
                        LEFT.br_blockcount + new->br_blockcount);
                xfs_bmbt_set_startoff(ep,
                        PREV.br_startoff + new->br_blockcount);
-                xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1,
+                XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1,
                        XFS_DATA_FORK);
-                xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx,
+                XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx,
                        XFS_DATA_FORK);
                xfs_bmbt_set_startblock(ep,
                        new->br_startblock + new->br_blockcount);
                xfs_bmbt_set_blockcount(ep,
                        PREV.br_blockcount - new->br_blockcount);
-                xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
+                XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx,
                        XFS_DATA_FORK);
                ip->i_df.if_lastex = idx - 1;
                if (cur == NULL)
@@ -1610,15 +1589,15 @@ xfs_bmap_add_extent_unwritten_real(
                 * Setting the first part of a previous oldext extent to newext.
                 * The left neighbor is not contiguous.
                 */
-                xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK);
+                XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK);
                ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
                xfs_bmbt_set_startoff(ep, new_endoff);
                xfs_bmbt_set_blockcount(ep,
                        PREV.br_blockcount - new->br_blockcount);
                xfs_bmbt_set_startblock(ep,
                        new->br_startblock + new->br_blockcount);
-                xfs_bmap_trace_post_update(fname, "LF", ip, idx, XFS_DATA_FORK);
+                XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx, XFS_DATA_FORK);
-                xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
+                XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL,
                        XFS_DATA_FORK);
                xfs_iext_insert(ifp, idx, 1, new);
                ip->i_df.if_lastex = idx;
@@ -1653,18 +1632,18 @@ xfs_bmap_add_extent_unwritten_real(
                 * Setting the last part of a previous oldext extent to newext.
                 * The right neighbor is contiguous with the new allocation.
                 */
-                xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx,
+                XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx,
                        XFS_DATA_FORK);
-                xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
+                XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1,
                        XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(ep,
                        PREV.br_blockcount - new->br_blockcount);
-                xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
+                XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx,
                        XFS_DATA_FORK);
                xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
                        new->br_startoff, new->br_startblock,
                        new->br_blockcount + RIGHT.br_blockcount, newext);
-                xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
+                XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1,
                        XFS_DATA_FORK);
                ip->i_df.if_lastex = idx + 1;
                if (cur == NULL)
@@ -1700,12 +1679,12 @@ xfs_bmap_add_extent_unwritten_real(
                 * Setting the last part of a previous oldext extent to newext.
                 * The right neighbor is not contiguous.
                 */
-                xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+                XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(ep,
                        PREV.br_blockcount - new->br_blockcount);
-                xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+                XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK);
-                xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
+                XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL,
-                        new, NULL, XFS_DATA_FORK);
+                        XFS_DATA_FORK);
                xfs_iext_insert(ifp, idx + 1, 1, new);
                ip->i_df.if_lastex = idx + 1;
                ip->i_d.di_nextents++;
@@ -1744,17 +1723,17 @@ xfs_bmap_add_extent_unwritten_real(
                 * newext.  Contiguity is impossible here.
                 * One extent becomes three extents.
                 */
-                xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK);
+                XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(ep,
                        new->br_startoff - PREV.br_startoff);
-                xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
+                XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK);
                r[0] = *new;
                r[1].br_startoff = new_endoff;
                r[1].br_blockcount =
                        PREV.br_startoff + PREV.br_blockcount - new_endoff;
                r[1].br_startblock = new->br_startblock + new->br_blockcount;
                r[1].br_state = oldext;
-                xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
+                XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1],
                        XFS_DATA_FORK);
                xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
                ip->i_df.if_lastex = idx + 1;
@@ -1845,9 +1824,6 @@ xfs_bmap_add_extent_hole_delay(
        int                     rsvd)           /* OK to allocate reserved blocks */
 {
        xfs_bmbt_rec_t          *ep;    /* extent record for idx */
-#ifdef XFS_BMAP_TRACE
-        static char             fname[] = "xfs_bmap_add_extent_hole_delay";
-#endif
        xfs_ifork_t             *ifp;   /* inode fork pointer */
        xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
        xfs_filblks_t           newlen=0;       /* new indirect size */
@@ -1919,7 +1895,7 @@ xfs_bmap_add_extent_hole_delay(
                 */
                temp = left.br_blockcount + new->br_blockcount +
                        right.br_blockcount;
-                xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
+                XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1,
                        XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
                oldlen = STARTBLOCKVAL(left.br_startblock) +
@@ -1928,10 +1904,9 @@ xfs_bmap_add_extent_hole_delay(
                newlen = xfs_bmap_worst_indlen(ip, temp);
                xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
                        NULLSTARTBLOCK((int)newlen));
-                xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
+                XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1,
-                        XFS_DATA_FORK);
-                xfs_bmap_trace_delete(fname, "LC|RC", ip, idx, 1,
                        XFS_DATA_FORK);
+                XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, XFS_DATA_FORK);
                xfs_iext_remove(ifp, idx, 1);
                ip->i_df.if_lastex = idx - 1;
                /* DELTA: Two in-core extents were replaced by one. */
@@ -1946,7 +1921,7 @@ xfs_bmap_add_extent_hole_delay(
                 * Merge the new allocation with the left neighbor.
                 */
                temp = left.br_blockcount + new->br_blockcount;
-                xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1,
+                XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1,
                        XFS_DATA_FORK);
                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
                oldlen = STARTBLOCKVAL(left.br_startblock) +
@@ -1954,7 +1929,7 @@ xfs_bmap_add_extent_hole_delay(
                newlen = xfs_bmap_worst_indlen(ip, temp);
                xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
                        NULLSTARTBLOCK((int)newlen));
-                xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1,
+                XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1,
                        XFS_DATA_FORK);
                ip->i_df.if_lastex = idx - 1;
                /* DELTA: One in-core extent grew into a hole. */
@@ -1968,14 +1943,14 @@ xfs_bmap_add_extent_hole_delay(
                 * on the right.
                 * Merge the new allocation with the right neighbor.
                 */
-                xfs_bmap_trace_pre_update(fname, "RC", ip, idx, XFS_DATA_FORK);
+                XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, XFS_DATA_FORK);
                temp = new->br_blockcount + right.br_blockcount;
                oldlen = STARTBLOCKVAL(new->br_startblock) +
                        STARTBLOCKVAL(right.br_startblock);
                newlen = xfs_bmap_worst_indlen(ip, temp);
                xfs_bmbt_set_allf(ep, new->br_startoff,
                        NULLSTARTBLOCK((int)newlen), temp, right.br_state);
-                xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK);
+                XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, XFS_DATA_FORK);
                ip->i_df.if_lastex = idx;
                /* DELTA: One in-core extent grew into a hole. */
                temp2 = temp;
@@ -1989,7 +1964,7 @@ xfs_bmap_add_extent_hole_delay(
                 * Insert a new entry.
                 */
                oldlen = newlen = 0;
-                xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
+                XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL,
                        XFS_DATA_FORK);
                xfs_iext_insert(ifp, idx, 1, new);
                ip->i_df.if_lastex = idx;
@@ -2039,9 +2014,6 @@ xfs_bmap_add_extent_hole_real(
 {
        xfs_bmbt_rec_t          *ep;    /* pointer to extent entry ins. point */
        int                     error;  /* error return value */
-#ifdef XFS_BMAP_TRACE
-        static char             fname[] = "xfs_bmap_add_extent_hole_real";
-#endif
        int                     i;      /* temp state */
        xfs_ifork_t             *ifp;   /* inode fork pointer */
        xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
@@ -2118,15 +2090,14 @@ xfs_bmap_add_extent_hole_real(
                 * left and on the right.
                 * Merge all three into a single extent record.
                 */
-                xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
+                XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1,
                        whichfork);
                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
                        left.br_blockcount + new->br_blockcount +
                        right.br_blockcount);
-                xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
+                XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1,
                        whichfork);
-                xfs_bmap_trace_delete(fname, "LC|RC", ip,
+                XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, whichfork);
-                        idx, 1, whichfork);
                xfs_iext_remove(ifp, idx, 1);
                ifp->if_lastex = idx - 1;
                XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -2168,10 +2139,10 @@ xfs_bmap_add_extent_hole_real(
                 * on the left.
                 * Merge the new allocation with the left neighbor.
                 */
-                xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, whichfork);
+                XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, whichfork);
                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
                        left.br_blockcount + new->br_blockcount);
-                xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork);
+                XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, whichfork);
                ifp->if_lastex = idx - 1;
                if (cur == NULL) {
                        rval = XFS_ILOG_FEXT(whichfork);
@@ -2202,11 +2173,11 @@ xfs_bmap_add_extent_hole_real(
                 * on the right.
                 * Merge the new allocation with the right neighbor.
                 */
-                xfs_bmap_trace_pre_update(fname, "RC", ip, idx, whichfork);
+                XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, whichfork);
                xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock,
                        new->br_blockcount + right.br_blockcount,
                        right.br_state);
-                xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork);
+                XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, whichfork);
                ifp->if_lastex = idx;
                if (cur == NULL) {
                        rval = XFS_ILOG_FEXT(whichfork);
@@ -2237,8 +2208,7 @@ xfs_bmap_add_extent_hole_real(
                 * real allocation.
                 * Insert a new entry.
                 */
-                xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
+                XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, whichfork);
-                        whichfork);
                xfs_iext_insert(ifp, idx, 1, new);
                ifp->if_lastex = idx;
                XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -2605,12 +2575,10 @@ xfs_bmap_rtalloc(
        xfs_extlen_t    prod = 0;       /* product factor for allocators */
        xfs_extlen_t    ralen = 0;      /* realtime allocation length */
        xfs_extlen_t    align;          /* minimum allocation alignment */
-        xfs_rtblock_t   rtx;            /* realtime extent number */
        xfs_rtblock_t   rtb;
        mp = ap->ip->i_mount;
-        align = ap->ip->i_d.di_extsize ?
+        align = xfs_get_extsz_hint(ap->ip);
-                ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
        prod = align / mp->m_sb.sb_rextsize;
        error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
                                        align, 1, ap->eof, 0,
@@ -2644,6 +2612,8 @@ xfs_bmap_rtalloc(
         * pick an extent that will space things out in the rt area.
         */
        if (ap->eof && ap->off == 0) {
+                xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
                error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
                if (error)
                        return error;
@@ -2715,9 +2685,7 @@ xfs_bmap_btalloc(
        int             error;
        mp = ap->ip->i_mount;
-        align = (ap->userdata && ap->ip->i_d.di_extsize &&
+        align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
-                (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
-                ap->ip->i_d.di_extsize : 0;
        if (unlikely(align)) {
                error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
                                                align, 0, ap->eof, 0, ap->conv,
@@ -2727,9 +2695,15 @@ xfs_bmap_btalloc(
        }
        nullfb = ap->firstblock == NULLFSBLOCK;
        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
-        if (nullfb)
+        if (nullfb) {
-                ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+                if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
-        else
+                        ag = xfs_filestream_lookup_ag(ap->ip);
+                        ag = (ag != NULLAGNUMBER) ? ag : 0;
+                        ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
+                } else {
+                        ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+                }
+        } else
                ap->rval = ap->firstblock;
        xfs_bmap_adjacent(ap);
@@ -2753,13 +2727,22 @@ xfs_bmap_btalloc(
        args.firstblock = ap->firstblock;
        blen = 0;
        if (nullfb) {
-                args.type = XFS_ALLOCTYPE_START_BNO;
+                if (ap->userdata && xfs_inode_is_filestream(ap->ip))
+                        args.type = XFS_ALLOCTYPE_NEAR_BNO;
+                else
+                        args.type = XFS_ALLOCTYPE_START_BNO;
                args.total = ap->total;
                /*
-                 * Find the longest available space.
+                 * Search for an allocation group with a single extent
-                 * We're going to try for the whole allocation at once.
+                 * large enough for the request.
+                 *
+                 * If one isn't found, then adjust the minimum allocation
+                 * size to the largest space found.
                 */
                startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
+                if (startag == NULLAGNUMBER)
+                        startag = ag = 0;
                notinit = 0;
                down_read(&mp->m_peraglock);
                while (blen < ap->alen) {
@@ -2785,6 +2768,35 @@ xfs_bmap_btalloc(
                                        blen = longest;
                        } else
                                notinit = 1;
+                        if (xfs_inode_is_filestream(ap->ip)) {
+                                if (blen >= ap->alen)
+                                        break;
+                                if (ap->userdata) {
+                                        /*
+                                         * If startag is an invalid AG, we've
+                                         * come here once before and
+                                         * xfs_filestream_new_ag picked the
+                                         * best currently available.
+                                         *
+                                         * Don't continue looping, since we
+                                         * could loop forever.
+                                         */
+                                        if (startag == NULLAGNUMBER)
+                                                break;
+                                        error = xfs_filestream_new_ag(ap, &ag);
+                                        if (error) {
+                                                up_read(&mp->m_peraglock);
+                                                return error;
+                                        }
+                                        /* loop again to set 'blen'*/
+                                        startag = NULLAGNUMBER;
+                                        continue;
+                                }
+                        }
                        if (++ag == mp->m_sb.sb_agcount)
                                ag = 0;
                        if (ag == startag)
@@ -2809,17 +2821,27 @@ xfs_bmap_btalloc(
                 */
                else
                        args.minlen = ap->alen;
+                /*
+                 * set the failure fallback case to look in the selected
+                 * AG as the stream may have moved.
+                 */
+                if (xfs_inode_is_filestream(ap->ip))
+                        ap->rval = args.fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
        } else if (ap->low) {
-                args.type = XFS_ALLOCTYPE_START_BNO;
+                if (xfs_inode_is_filestream(ap->ip))
+                        args.type = XFS_ALLOCTYPE_FIRST_AG;
+                else
+                        args.type = XFS_ALLOCTYPE_START_BNO;
                args.total = args.minlen = ap->minlen;
        } else {
                args.type = XFS_ALLOCTYPE_NEAR_BNO;
                args.total = ap->total;
                args.minlen = ap->minlen;
        }
-        if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
+        /* apply extent size hints if obtained earlier */
-                    (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
+        if (unlikely(align)) {
-                args.prod = ap->ip->i_d.di_extsize;
+                args.prod = align;
                if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
                        args.mod = (xfs_extlen_t)(args.prod - args.mod);
        } else if (mp->m_sb.sb_blocksize >= NBPP) {
@@ -3051,9 +3073,6 @@ xfs_bmap_del_extent(
        xfs_bmbt_rec_t          *ep;    /* current extent entry pointer */
        int                     error;  /* error return value */
        int                     flags;  /* inode logging flags */
-#ifdef XFS_BMAP_TRACE
-        static char             fname[] = "xfs_bmap_del_extent";
-#endif
        xfs_bmbt_irec_t         got;    /* current extent entry */
        xfs_fileoff_t           got_endoff;     /* first offset past got */
        int                     i;      /* temp state */
@@ -3147,7 +3166,7 @@ xfs_bmap_del_extent(
                /*
                 * Matches the whole extent.  Delete the entry.
                 */
-                xfs_bmap_trace_delete(fname, "3", ip, idx, 1, whichfork);
+                XFS_BMAP_TRACE_DELETE("3", ip, idx, 1, whichfork);
                xfs_iext_remove(ifp, idx, 1);
                ifp->if_lastex = idx;
                if (delay)
@@ -3168,7 +3187,7 @@ xfs_bmap_del_extent(
                /*
                 * Deleting the first part of the extent.
                 */
-                xfs_bmap_trace_pre_update(fname, "2", ip, idx, whichfork);
+                XFS_BMAP_TRACE_PRE_UPDATE("2", ip, idx, whichfork);
                xfs_bmbt_set_startoff(ep, del_endoff);
                temp = got.br_blockcount - del->br_blockcount;
                xfs_bmbt_set_blockcount(ep, temp);
@@ -3177,13 +3196,13 @@ xfs_bmap_del_extent(
                        temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
                                da_old);
                        xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-                        xfs_bmap_trace_post_update(fname, "2", ip, idx,
+                        XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx,
                                whichfork);
                        da_new = temp;
                        break;
                }
                xfs_bmbt_set_startblock(ep, del_endblock);
-                xfs_bmap_trace_post_update(fname, "2", ip, idx, whichfork);
+                XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, whichfork);
                if (!cur) {
                        flags |= XFS_ILOG_FEXT(whichfork);
                        break;
@@ -3199,19 +3218,19 @@ xfs_bmap_del_extent(
                 * Deleting the last part of the extent.
                 */
                temp = got.br_blockcount - del->br_blockcount;
-                xfs_bmap_trace_pre_update(fname, "1", ip, idx, whichfork);
+                XFS_BMAP_TRACE_PRE_UPDATE("1", ip, idx, whichfork);
                xfs_bmbt_set_blockcount(ep, temp);
                ifp->if_lastex = idx;
                if (delay) {
                        temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
                                da_old);
                        xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-                        xfs_bmap_trace_post_update(fname, "1", ip, idx,
+                        XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx,
                                whichfork);
                        da_new = temp;
                        break;
                }
-                xfs_bmap_trace_post_update(fname, "1", ip, idx, whichfork);
+                XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, whichfork);
                if (!cur) {
                        flags |= XFS_ILOG_FEXT(whichfork);
                        break;
@@ -3228,7 +3247,7 @@ xfs_bmap_del_extent(
                 * Deleting the middle of the extent.
                 */
                temp = del->br_startoff - got.br_startoff;
-                xfs_bmap_trace_pre_update(fname, "0", ip, idx, whichfork);
+                XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, whichfork);
                xfs_bmbt_set_blockcount(ep, temp);
                new.br_startoff = del_endoff;
                temp2 = got_endoff - del_endoff;
@@ -3315,8 +3334,8 @@ xfs_bmap_del_extent(
                                }
                        }
                }
-                xfs_bmap_trace_post_update(fname, "0", ip, idx, whichfork);
+                XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, whichfork);
-                xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 1, &new, NULL,
+                XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 1, &new, NULL,
                        whichfork);
                xfs_iext_insert(ifp, idx + 1, 1, &new);
                ifp->if_lastex = idx + 1;
@@ -3556,9 +3575,6 @@ xfs_bmap_local_to_extents(
 {
        int             error;          /* error return value */
        int             flags;          /* logging flags returned */
-#ifdef XFS_BMAP_TRACE
-        static char     fname[] = "xfs_bmap_local_to_extents";
-#endif
        xfs_ifork_t     *ifp;           /* inode fork pointer */
        /*
@@ -3613,7 +3629,7 @@ xfs_bmap_local_to_extents(
                xfs_iext_add(ifp, 0, 1);
                ep = xfs_iext_get_ext(ifp, 0);
                xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
-                xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
+                XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork);
                XFS_IFORK_NEXT_SET(ip, whichfork, 1);
                ip->i_d.di_nblocks = 1;
                XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
@@ -3736,7 +3752,7 @@ ktrace_t	*xfs_bmap_trace_buf;
 STATIC void
 xfs_bmap_trace_addentry(
        int             opcode,         /* operation */
-        char            *fname,         /* function name */
+        const char      *fname,         /* function name */
        char            *desc,          /* operation description */
        xfs_inode_t     *ip,            /* incore inode pointer */
        xfs_extnum_t    idx,            /* index of entry(ies) */
@@ -3795,7 +3811,7 @@ xfs_bmap_trace_addentry(
 */
 STATIC void
 xfs_bmap_trace_delete(
-        char            *fname,         /* function name */
+        const char      *fname,         /* function name */
        char            *desc,          /* operation description */
        xfs_inode_t     *ip,            /* incore inode pointer */
        xfs_extnum_t    idx,            /* index of entry(entries) deleted */
@@ -3817,7 +3833,7 @@ xfs_bmap_trace_delete(
 */
 STATIC void
 xfs_bmap_trace_insert(
-        char            *fname,         /* function name */
+        const char      *fname,         /* function name */
        char            *desc,          /* operation description */
        xfs_inode_t     *ip,            /* incore inode pointer */
        xfs_extnum_t    idx,            /* index of entry(entries) inserted */
@@ -3846,7 +3862,7 @@ xfs_bmap_trace_insert(
 */
 STATIC void
 xfs_bmap_trace_post_update(
-        char            *fname,         /* function name */
+        const char      *fname,         /* function name */
        char            *desc,          /* operation description */
        xfs_inode_t     *ip,            /* incore inode pointer */
        xfs_extnum_t    idx,            /* index of entry updated */
@@ -3864,7 +3880,7 @@ xfs_bmap_trace_post_update(
 */
 STATIC void
 xfs_bmap_trace_pre_update(
-        char            *fname,         /* function name */
+        const char      *fname,         /* function name */
        char            *desc,          /* operation description */
        xfs_inode_t     *ip,            /* incore inode pointer */
        xfs_extnum_t    idx,            /* index of entry to be updated */
@@ -4481,9 +4497,6 @@ xfs_bmap_read_extents(
        xfs_buf_t               *bp;    /* buffer for "block" */
        int                     error;  /* error return value */
        xfs_exntfmt_t           exntf;  /* XFS_EXTFMT_NOSTATE, if checking */
-#ifdef XFS_BMAP_TRACE
-        static char             fname[] = "xfs_bmap_read_extents";
-#endif
        xfs_extnum_t            i, j;   /* index into the extents list */
        xfs_ifork_t             *ifp;   /* fork structure */
        int                     level;  /* btree level, for checking */
@@ -4600,7 +4613,7 @@ xfs_bmap_read_extents(
        }
        ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
        ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
-        xfs_bmap_trace_exlist(fname, ip, i, whichfork);
+        XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
        return 0;
 error0:
        xfs_trans_brelse(tp, bp);
@@ -4613,7 +4626,7 @@ error0:
 */
 void
 xfs_bmap_trace_exlist(
-        char            *fname,         /* function name */
+        const char      *fname,         /* function name */
        xfs_inode_t     *ip,            /* incore inode pointer */
        xfs_extnum_t    cnt,            /* count of entries in the list */
        int             whichfork)      /* data or attr fork */
@@ -4628,7 +4641,7 @@ xfs_bmap_trace_exlist(
        for (idx = 0; idx < cnt; idx++) {
                ep = xfs_iext_get_ext(ifp, idx);
                xfs_bmbt_get_all(ep, &s);
-                xfs_bmap_trace_insert(fname, "exlist", ip, idx, 1, &s, NULL,
+                XFS_BMAP_TRACE_INSERT("exlist", ip, idx, 1, &s, NULL,
                        whichfork);
        }
 }
@@ -4868,12 +4881,7 @@ xfs_bmapi(
                                xfs_extlen_t    extsz;
                                /* Figure out the extent size, adjust alen */
-                                if (rt) {
+                                extsz = xfs_get_extsz_hint(ip);
-                                        if (!(extsz = ip->i_d.di_extsize))
-                                                extsz = mp->m_sb.sb_rextsize;
-                                } else {
-                                        extsz = ip->i_d.di_extsize;
-                                }
                                if (extsz) {
                                        error = xfs_bmap_extsize_align(mp,
                                                        &got, &prev, extsz,
@@ -5219,10 +5227,10 @@ xfs_bmapi(
                 * Else go on to the next record.
                 */
                ep = xfs_iext_get_ext(ifp, ++lastx);
-                if (lastx >= nextents) {
+                prev = got;
+                if (lastx >= nextents)
                        eof = 1;
-                        prev = got;
+                else
-                } else
                        xfs_bmbt_get_all(ep, &got);
        }
        ifp->if_lastex = lastx;
@@ -5813,8 +5821,7 @@ xfs_getbmap(
                   ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
                return XFS_ERROR(EINVAL);
        if (whichfork == XFS_DATA_FORK) {
-                if ((ip->i_d.di_extsize && (ip->i_d.di_flags &
+                if (xfs_get_extsz_hint(ip) ||
-                                (XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||
                    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
                        prealloced = 1;
                        fixlen = XFS_MAXIOFFSET(mp);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 4f24c7e39b31..524b1c9d5246 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -144,12 +144,14 @@ extern ktrace_t	*xfs_bmap_trace_buf;
 */
 void
 xfs_bmap_trace_exlist(
-        char                    *fname,         /* function name */
+        const char              *fname,         /* function name */
        struct xfs_inode        *ip,            /* incore inode pointer */
        xfs_extnum_t            cnt,            /* count of entries in list */
        int                     whichfork);     /* data or attr fork */
+#define XFS_BMAP_TRACE_EXLIST(ip,c,w)   \
+        xfs_bmap_trace_exlist(__FUNCTION__,ip,c,w)
 #else
-#define xfs_bmap_trace_exlist(f,ip,c,w)
+#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
 #endif
 /*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 0bf192fea3eb..89b891f51cfb 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -76,7 +76,7 @@ static char	EXIT[] = "exit";
 */
 STATIC void
 xfs_bmbt_trace_enter(
-        char            *func,
+        const char      *func,
        xfs_btree_cur_t *cur,
        char            *s,
        int             type,
@@ -117,7 +117,7 @@ xfs_bmbt_trace_enter(
 */
 STATIC void
 xfs_bmbt_trace_argbi(
-        char            *func,
+        const char      *func,
        xfs_btree_cur_t *cur,
        xfs_buf_t       *b,
        int             i,
@@ -134,7 +134,7 @@ xfs_bmbt_trace_argbi(
 */
 STATIC void
 xfs_bmbt_trace_argbii(
-        char            *func,
+        const char      *func,
        xfs_btree_cur_t *cur,
        xfs_buf_t       *b,
        int             i0,
@@ -153,7 +153,7 @@ xfs_bmbt_trace_argbii(
 */
 STATIC void
 xfs_bmbt_trace_argfffi(
-        char                    *func,
+        const char              *func,
        xfs_btree_cur_t         *cur,
        xfs_dfiloff_t           o,
        xfs_dfsbno_t            b,
@@ -172,7 +172,7 @@ xfs_bmbt_trace_argfffi(
 */
 STATIC void
 xfs_bmbt_trace_argi(
-        char            *func,
+        const char      *func,
        xfs_btree_cur_t *cur,
        int             i,
        int             line)
@@ -188,7 +188,7 @@ xfs_bmbt_trace_argi(
 */
 STATIC void
 xfs_bmbt_trace_argifk(
-        char                    *func,
+        const char              *func,
        xfs_btree_cur_t         *cur,
        int                     i,
        xfs_fsblock_t           f,
@@ -206,7 +206,7 @@ xfs_bmbt_trace_argifk(
 */
 STATIC void
 xfs_bmbt_trace_argifr(
-        char                    *func,
+        const char              *func,
        xfs_btree_cur_t         *cur,
        int                     i,
        xfs_fsblock_t           f,
@@ -235,7 +235,7 @@ xfs_bmbt_trace_argifr(
 */
 STATIC void
 xfs_bmbt_trace_argik(
-        char                    *func,
+        const char              *func,
        xfs_btree_cur_t         *cur,
        int                     i,
        xfs_bmbt_key_t          *k,
@@ -255,7 +255,7 @@ xfs_bmbt_trace_argik(
 */
 STATIC void
 xfs_bmbt_trace_cursor(
-        char            *func,
+        const char      *func,
        xfs_btree_cur_t *cur,
        char            *s,
        int             line)
@@ -274,21 +274,21 @@ xfs_bmbt_trace_cursor(
 }
 #define XFS_BMBT_TRACE_ARGBI(c,b,i)     \
-        xfs_bmbt_trace_argbi(fname, c, b, i, __LINE__)
+        xfs_bmbt_trace_argbi(__FUNCTION__, c, b, i, __LINE__)
 #define XFS_BMBT_TRACE_ARGBII(c,b,i,j)  \
-        xfs_bmbt_trace_argbii(fname, c, b, i, j, __LINE__)
+        xfs_bmbt_trace_argbii(__FUNCTION__, c, b, i, j, __LINE__)
 #define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j)       \
-        xfs_bmbt_trace_argfffi(fname, c, o, b, i, j, __LINE__)
+        xfs_bmbt_trace_argfffi(__FUNCTION__, c, o, b, i, j, __LINE__)
 #define XFS_BMBT_TRACE_ARGI(c,i)        \
-        xfs_bmbt_trace_argi(fname, c, i, __LINE__)
+        xfs_bmbt_trace_argi(__FUNCTION__, c, i, __LINE__)
 #define XFS_BMBT_TRACE_ARGIFK(c,i,f,s)  \
-        xfs_bmbt_trace_argifk(fname, c, i, f, s, __LINE__)
+        xfs_bmbt_trace_argifk(__FUNCTION__, c, i, f, s, __LINE__)
 #define XFS_BMBT_TRACE_ARGIFR(c,i,f,r)  \
-        xfs_bmbt_trace_argifr(fname, c, i, f, r, __LINE__)
+        xfs_bmbt_trace_argifr(__FUNCTION__, c, i, f, r, __LINE__)
 #define XFS_BMBT_TRACE_ARGIK(c,i,k)     \
-        xfs_bmbt_trace_argik(fname, c, i, k, __LINE__)
+        xfs_bmbt_trace_argik(__FUNCTION__, c, i, k, __LINE__)
 #define XFS_BMBT_TRACE_CURSOR(c,s)      \
-        xfs_bmbt_trace_cursor(fname, c, s, __LINE__)
+        xfs_bmbt_trace_cursor(__FUNCTION__, c, s, __LINE__)
 #else
 #define XFS_BMBT_TRACE_ARGBI(c,b,i)
 #define XFS_BMBT_TRACE_ARGBII(c,b,i,j)
@@ -318,9 +318,6 @@ xfs_bmbt_delrec(
        xfs_fsblock_t           bno;            /* fs-relative block number */
        xfs_buf_t               *bp;            /* buffer for block */
        int                     error;          /* error return value */
-#ifdef XFS_BMBT_TRACE
-        static char             fname[] = "xfs_bmbt_delrec";
-#endif
        int                     i;              /* loop counter */
        int                     j;              /* temp state */
        xfs_bmbt_key_t          key;            /* bmap btree key */
@@ -694,9 +691,6 @@ xfs_bmbt_insrec(
        xfs_bmbt_block_t        *block;         /* bmap btree block */
        xfs_buf_t               *bp;            /* buffer for block */
        int                     error;          /* error return value */
-#ifdef XFS_BMBT_TRACE
-        static char             fname[] = "xfs_bmbt_insrec";
-#endif
        int                     i;              /* loop index */
        xfs_bmbt_key_t          key;            /* bmap btree key */
        xfs_bmbt_key_t          *kp=NULL;       /* pointer to bmap btree key */
@@ -881,9 +875,6 @@ xfs_bmbt_killroot(
 #ifdef DEBUG
        int                     error;
 #endif
-#ifdef XFS_BMBT_TRACE
-        static char             fname[] = "xfs_bmbt_killroot";
-#endif
        int                     i;
        xfs_bmbt_key_t          *kp;
        xfs_inode_t             *ip;
@@ -973,9 +964,6 @@ xfs_bmbt_log_keys(
        int             kfirst,
        int             klast)
 {
-#ifdef XFS_BMBT_TRACE
-        static char     fname[] = "xfs_bmbt_log_keys";
-#endif
        xfs_trans_t     *tp;
        XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
@@ -1012,9 +1000,6 @@ xfs_bmbt_log_ptrs(
        int             pfirst,
        int             plast)
 {
-#ifdef XFS_BMBT_TRACE
-        static char     fname[] = "xfs_bmbt_log_ptrs";
-#endif
        xfs_trans_t     *tp;
        XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
@@ -1055,9 +1040,6 @@ xfs_bmbt_lookup(
        xfs_daddr_t             d;
        xfs_sfiloff_t           diff;
        int                     error;          /* error return value */
-#ifdef XFS_BMBT_TRACE
-        static char     fname[] = "xfs_bmbt_lookup";
-#endif
        xfs_fsblock_t           fsbno=0;
        int                     high;
        int                     i;
@@ -1195,9 +1177,6 @@ xfs_bmbt_lshift(
        int                     *stat)          /* success/failure */
 {
        int                     error;          /* error return value */
-#ifdef XFS_BMBT_TRACE
-        static char             fname[] = "xfs_bmbt_lshift";
-#endif
 #ifdef DEBUG
        int                     i;              /* loop counter */
 #endif
@@ -1331,9 +1310,6 @@ xfs_bmbt_rshift(
        int                     *stat)          /* success/failure */
 {
        int                     error;          /* error return value */
-#ifdef XFS_BMBT_TRACE
-        static char             fname[] = "xfs_bmbt_rshift";
-#endif
        int                     i;              /* loop counter */
        xfs_bmbt_key_t          key;            /* bmap btree key */
        xfs_buf_t               *lbp;           /* left buffer pointer */
@@ -1492,9 +1468,6 @@ xfs_bmbt_split(
 {
        xfs_alloc_arg_t         args;           /* block allocation args */
        int                     error;          /* error return value */
-#ifdef XFS_BMBT_TRACE
-        static char             fname[] = "xfs_bmbt_split";
-#endif
        int                     i;              /* loop counter */
        xfs_fsblock_t           lbno;           /* left sibling block number */
        xfs_buf_t               *lbp;           /* left buffer pointer */
@@ -1641,9 +1614,6 @@ xfs_bmbt_updkey(
 #ifdef DEBUG
        int                     error;
 #endif
-#ifdef XFS_BMBT_TRACE
-        static char             fname[] = "xfs_bmbt_updkey";
-#endif
        xfs_bmbt_key_t          *kp;
        int                     ptr;
@@ -1712,9 +1682,6 @@ xfs_bmbt_decrement(
        xfs_bmbt_block_t        *block;
        xfs_buf_t               *bp;
        int                     error;          /* error return value */
-#ifdef XFS_BMBT_TRACE
-        static char             fname[] = "xfs_bmbt_decrement";
-#endif
        xfs_fsblock_t           fsbno;
        int                     lev;
        xfs_mount_t             *mp;
@@ -1785,9 +1752,6 @@ xfs_bmbt_delete(
        int             *stat)          /* success/failure */
 {
        int             error;          /* error return value */
-#ifdef XFS_BMBT_TRACE
-        static char     fname[] = "xfs_bmbt_delete";
-#endif
        int             i;
        int             level;
@@ -2000,9 +1964,6 @@ xfs_bmbt_increment(
        xfs_bmbt_block_t        *block;
        xfs_buf_t               *bp;
        int                     error;          /* error return value */
-#ifdef XFS_BMBT_TRACE
-        static char             fname[] = "xfs_bmbt_increment";
-#endif
        xfs_fsblock_t           fsbno;
        int                     lev;
        xfs_mount_t             *mp;
@@ -2080,9 +2041,6 @@ xfs_bmbt_insert(
        int             *stat)          /* success/failure */
 {
        int             error;          /* error return value */
-#ifdef XFS_BMBT_TRACE
-        static char     fname[] = "xfs_bmbt_insert";
-#endif
        int             i;
        int             level;
        xfs_fsblock_t   nbno;
@@ -2142,9 +2100,6 @@ xfs_bmbt_log_block(
        int                     fields)
 {
        int                     first;
-#ifdef XFS_BMBT_TRACE
-        static char             fname[] = "xfs_bmbt_log_block";
-#endif
        int                     last;
        xfs_trans_t             *tp;
        static const short      offsets[] = {
@@ -2181,9 +2136,6 @@ xfs_bmbt_log_recs(
 {
        xfs_bmbt_block_t        *block;
        int                     first;
-#ifdef XFS_BMBT_TRACE
-        static char             fname[] = "xfs_bmbt_log_recs";
-#endif
        int                     last;
        xfs_bmbt_rec_t          *rp;
        xfs_trans_t             *tp;
@@ -2245,9 +2197,6 @@ xfs_bmbt_newroot(
        xfs_bmbt_key_t          *ckp;           /* child key pointer */
        xfs_bmbt_ptr_t          *cpp;           /* child ptr pointer */
        int                     error;          /* error return code */
-#ifdef XFS_BMBT_TRACE
-        static char             fname[] = "xfs_bmbt_newroot";
-#endif
 #ifdef DEBUG
        int                     i;              /* loop counter */
 #endif
@@ -2630,9 +2579,6 @@ xfs_bmbt_update(
        xfs_bmbt_block_t        *block;
        xfs_buf_t               *bp;
        int                     error;
-#ifdef XFS_BMBT_TRACE
-        static char             fname[] = "xfs_bmbt_update";
-#endif
        xfs_bmbt_key_t          key;
        int                     ptr;
        xfs_bmbt_rec_t          *rp;
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 4e27d55a1e73..6e40a0a198ff 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -444,30 +444,14 @@ xfs_btree_setbuf(
 /*
 * Min and max functions for extlen, agblock, fileoff, and filblks types.
 */
-#define XFS_EXTLEN_MIN(a,b)     \
+#define XFS_EXTLEN_MIN(a,b)     min_t(xfs_extlen_t, (a), (b))
-        ((xfs_extlen_t)(a) < (xfs_extlen_t)(b) ? \
+#define XFS_EXTLEN_MAX(a,b)     max_t(xfs_extlen_t, (a), (b))
-                (xfs_extlen_t)(a) : (xfs_extlen_t)(b))
+#define XFS_AGBLOCK_MIN(a,b)    min_t(xfs_agblock_t, (a), (b))
-#define XFS_EXTLEN_MAX(a,b)     \
+#define XFS_AGBLOCK_MAX(a,b)    max_t(xfs_agblock_t, (a), (b))
-        ((xfs_extlen_t)(a) > (xfs_extlen_t)(b) ? \
+#define XFS_FILEOFF_MIN(a,b)    min_t(xfs_fileoff_t, (a), (b))
-                (xfs_extlen_t)(a) : (xfs_extlen_t)(b))
+#define XFS_FILEOFF_MAX(a,b)    max_t(xfs_fileoff_t, (a), (b))
-#define XFS_AGBLOCK_MIN(a,b)    \
+#define XFS_FILBLKS_MIN(a,b)    min_t(xfs_filblks_t, (a), (b))
-        ((xfs_agblock_t)(a) < (xfs_agblock_t)(b) ? \
+#define XFS_FILBLKS_MAX(a,b)    max_t(xfs_filblks_t, (a), (b))
-                (xfs_agblock_t)(a) : (xfs_agblock_t)(b))
-#define XFS_AGBLOCK_MAX(a,b)    \
-        ((xfs_agblock_t)(a) > (xfs_agblock_t)(b) ? \
-                (xfs_agblock_t)(a) : (xfs_agblock_t)(b))
-#define XFS_FILEOFF_MIN(a,b)    \
-        ((xfs_fileoff_t)(a) < (xfs_fileoff_t)(b) ? \
-                (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
-#define XFS_FILEOFF_MAX(a,b)    \
-        ((xfs_fileoff_t)(a) > (xfs_fileoff_t)(b) ? \
-                (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
-#define XFS_FILBLKS_MIN(a,b)    \
-        ((xfs_filblks_t)(a) < (xfs_filblks_t)(b) ? \
-                (xfs_filblks_t)(a) : (xfs_filblks_t)(b))
-#define XFS_FILBLKS_MAX(a,b)    \
-        ((xfs_filblks_t)(a) > (xfs_filblks_t)(b) ? \
-                (xfs_filblks_t)(a) : (xfs_filblks_t)(b))
 #define XFS_FSB_SANITY_CHECK(mp,fsb)    \
        (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 6c1bddc04e31..b0667cb27d66 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -580,8 +580,8 @@ xfs_buf_item_unlock(
         * If the buf item isn't tracking any data, free it.
         * Otherwise, if XFS_BLI_HOLD is set clear it.
         */
-        if (xfs_count_bits(bip->bli_format.blf_data_map,
+        if (xfs_bitmap_empty(bip->bli_format.blf_data_map,
-                              bip->bli_format.blf_map_size, 0) == 0) {
+                             bip->bli_format.blf_map_size)) {
                xfs_buf_item_relse(bp);
        } else if (hold) {
                bip->bli_flags &= ~XFS_BLI_HOLD;
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 5b7eb81453be..f89196cb08d2 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -99,5 +99,7 @@ struct xfs_mount_args {
 */
 #define XFSMNT2_COMPAT_IOSIZE   0x00000001      /* don't report large preferred
                                                 * I/O size in stat(2) */
+#define XFSMNT2_FILESTREAMS     0x00000002      /* enable the filestreams
+                                                 * allocator */
 #endif  /* __XFS_CLNT_H__ */
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index b33826961c45..fefd0116bac9 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -257,6 +257,7 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_EXTSIZE_BIT      11  /* inode extent size allocator hint */
 #define XFS_DIFLAG_EXTSZINHERIT_BIT 12  /* inherit inode extent size */
 #define XFS_DIFLAG_NODEFRAG_BIT     13  /* do not reorganize/defragment */
+#define XFS_DIFLAG_FILESTREAM_BIT   14  /* use filestream allocator */
 #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
 #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
 #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -271,12 +272,13 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_EXTSIZE       (1 << XFS_DIFLAG_EXTSIZE_BIT)
 #define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
 #define XFS_DIFLAG_NODEFRAG      (1 << XFS_DIFLAG_NODEFRAG_BIT)
+#define XFS_DIFLAG_FILESTREAM    (1 << XFS_DIFLAG_FILESTREAM_BIT)
 #define XFS_DIFLAG_ANY \
        (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
         XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
         XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
         XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
-         XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG)
+         XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
 #endif  /* __XFS_DINODE_H__ */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 8e8e5279334a..29e091914df4 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -55,9 +55,9 @@ xfs_dir_mount(
               XFS_MAX_BLOCKSIZE);
        mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
        mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog;
-        mp->m_dirdatablk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_DATA_FIRSTDB(mp));
+        mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp));
-        mp->m_dirleafblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
+        mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
-        mp->m_dirfreeblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_FREE_FIRSTDB(mp));
+        mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp));
        mp->m_attr_node_ents =
                (mp->m_sb.sb_blocksize - (uint)sizeof(xfs_da_node_hdr_t)) /
                (uint)sizeof(xfs_da_node_entry_t);
@@ -554,7 +554,7 @@ xfs_dir2_grow_inode(
         */
        if (mapp != &map)
                kmem_free(mapp, sizeof(*mapp) * count);
-        *dbp = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)bno);
+        *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
        /*
         * Update file's size if this is the data space and it grew.
         */
@@ -706,7 +706,7 @@ xfs_dir2_shrink_inode(
        dp = args->dp;
        mp = dp->i_mount;
        tp = args->trans;
-        da = XFS_DIR2_DB_TO_DA(mp, db);
+        da = xfs_dir2_db_to_da(mp, db);
        /*
         * Unmap the fsblock(s).
         */
@@ -742,7 +742,7 @@ xfs_dir2_shrink_inode(
        /*
         * If the block isn't the last one in the directory, we're done.
         */
-        if (dp->i_d.di_size > XFS_DIR2_DB_OFF_TO_BYTE(mp, db + 1, 0))
+        if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(mp, db + 1, 0))
                return 0;
        bno = da;
        if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) {
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 3accc1dcd6c9..e4df1aaae2a2 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -115,13 +115,13 @@ xfs_dir2_block_addname(
                xfs_da_brelse(tp, bp);
                return XFS_ERROR(EFSCORRUPTED);
        }
-        len = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+        len = xfs_dir2_data_entsize(args->namelen);
        /*
         * Set up pointers to parts of the block.
         */
        bf = block->hdr.bestfree;
-        btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, block);
-        blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+        blp = xfs_dir2_block_leaf_p(btp);
        /*
         * No stale entries?  Need space for entry and new leaf.
         */
@@ -396,7 +396,7 @@ xfs_dir2_block_addname(
         * Fill in the leaf entry.
         */
        blp[mid].hashval = cpu_to_be32(args->hashval);
-        blp[mid].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+        blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
                                (char *)dep - (char *)block));
        xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
        /*
@@ -411,7 +411,7 @@ xfs_dir2_block_addname(
        dep->inumber = cpu_to_be64(args->inumber);
        dep->namelen = args->namelen;
        memcpy(dep->name, args->name, args->namelen);
-        tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+        tagp = xfs_dir2_data_entry_tag_p(dep);
        *tagp = cpu_to_be16((char *)dep - (char *)block);
        /*
         * Clean up the bestfree array and log the header, tail, and entry.
@@ -455,7 +455,7 @@ xfs_dir2_block_getdents(
        /*
         * If the block number in the offset is out of range, we're done.
         */
-        if (XFS_DIR2_DATAPTR_TO_DB(mp, uio->uio_offset) > mp->m_dirdatablk) {
+        if (xfs_dir2_dataptr_to_db(mp, uio->uio_offset) > mp->m_dirdatablk) {
                *eofp = 1;
                return 0;
        }
@@ -471,15 +471,15 @@ xfs_dir2_block_getdents(
         * Extract the byte offset we start at from the seek pointer.
         * We'll skip entries before this.
         */
-        wantoff = XFS_DIR2_DATAPTR_TO_OFF(mp, uio->uio_offset);
+        wantoff = xfs_dir2_dataptr_to_off(mp, uio->uio_offset);
        block = bp->data;
        xfs_dir2_data_check(dp, bp);
        /*
         * Set up values for the loop.
         */
-        btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, block);
        ptr = (char *)block->u;
-        endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
+        endptr = (char *)xfs_dir2_block_leaf_p(btp);
        p.dbp = dbp;
        p.put = put;
        p.uio = uio;
@@ -502,7 +502,7 @@ xfs_dir2_block_getdents(
                /*
                 * Bump pointer for the next iteration.
                 */
-                ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+                ptr += xfs_dir2_data_entsize(dep->namelen);
                /*
                 * The entry is before the desired starting point, skip it.
                 */
@@ -513,7 +513,7 @@ xfs_dir2_block_getdents(
                 */
                p.namelen = dep->namelen;
-                p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+                p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
                                                    ptr - (char *)block);
                p.ino = be64_to_cpu(dep->inumber);
 #if XFS_BIG_INUMS
@@ -531,7 +531,7 @@ xfs_dir2_block_getdents(
                 */
                if (!p.done) {
                        uio->uio_offset =
-                                XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+                                xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
                                        (char *)dep - (char *)block);
                        xfs_da_brelse(tp, bp);
                        return error;
@@ -545,7 +545,7 @@ xfs_dir2_block_getdents(
        *eofp = 1;
        uio->uio_offset =
-                XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk + 1, 0);
+                xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0);
        xfs_da_brelse(tp, bp);
@@ -569,8 +569,8 @@ xfs_dir2_block_log_leaf(
        mp = tp->t_mountp;
        block = bp->data;
-        btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, block);
-        blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+        blp = xfs_dir2_block_leaf_p(btp);
        xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block),
                (uint)((char *)&blp[last + 1] - (char *)block - 1));
 }
@@ -589,7 +589,7 @@ xfs_dir2_block_log_tail(
        mp = tp->t_mountp;
        block = bp->data;
-        btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, block);
        xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block),
                (uint)((char *)(btp + 1) - (char *)block - 1));
 }
@@ -623,13 +623,13 @@ xfs_dir2_block_lookup(
        mp = dp->i_mount;
        block = bp->data;
        xfs_dir2_data_check(dp, bp);
-        btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, block);
-        blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+        blp = xfs_dir2_block_leaf_p(btp);
        /*
         * Get the offset from the leaf entry, to point to the data.
         */
        dep = (xfs_dir2_data_entry_t *)
-              ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
+              ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
        /*
         * Fill in inode number, release the block.
         */
@@ -675,8 +675,8 @@ xfs_dir2_block_lookup_int(
        ASSERT(bp != NULL);
        block = bp->data;
        xfs_dir2_data_check(dp, bp);
-        btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, block);
-        blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+        blp = xfs_dir2_block_leaf_p(btp);
        /*
         * Loop doing a binary search for our hash value.
         * Find our entry, ENOENT if it's not there.
@@ -713,7 +713,7 @@ xfs_dir2_block_lookup_int(
                 * Get pointer to the entry from the leaf.
                 */
                dep = (xfs_dir2_data_entry_t *)
-                        ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr));
+                        ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
                /*
                 * Compare, if it's right give back buffer & entry number.
                 */
@@ -768,20 +768,20 @@ xfs_dir2_block_removename(
        tp = args->trans;
        mp = dp->i_mount;
        block = bp->data;
-        btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, block);
-        blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+        blp = xfs_dir2_block_leaf_p(btp);
        /*
         * Point to the data entry using the leaf entry.
         */
        dep = (xfs_dir2_data_entry_t *)
-              ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
+              ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
        /*
         * Mark the data entry's space free.
         */
        needlog = needscan = 0;
        xfs_dir2_data_make_free(tp, bp,
                (xfs_dir2_data_aoff_t)((char *)dep - (char *)block),
-                XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+                xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
        /*
         * Fix up the block tail.
         */
@@ -843,13 +843,13 @@ xfs_dir2_block_replace(
        dp = args->dp;
        mp = dp->i_mount;
        block = bp->data;
-        btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, block);
-        blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+        blp = xfs_dir2_block_leaf_p(btp);
        /*
         * Point to the data entry we need to change.
         */
        dep = (xfs_dir2_data_entry_t *)
-              ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
+              ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
        ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
        /*
         * Change the inode number to the new value.
@@ -912,7 +912,7 @@ xfs_dir2_leaf_to_block(
        mp = dp->i_mount;
        leaf = lbp->data;
        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
-        ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
        /*
         * If there are data blocks other than the first one, take this
         * opportunity to remove trailing empty data blocks that may have
@@ -920,7 +920,7 @@ xfs_dir2_leaf_to_block(
         * These will show up in the leaf bests table.
         */
        while (dp->i_d.di_size > mp->m_dirblksize) {
-                bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
+                bestsp = xfs_dir2_leaf_bests_p(ltp);
                if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
                    mp->m_dirblksize - (uint)sizeof(block->hdr)) {
                        if ((error =
@@ -974,14 +974,14 @@ xfs_dir2_leaf_to_block(
        /*
         * Initialize the block tail.
         */
-        btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, block);
        btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
        btp->stale = 0;
        xfs_dir2_block_log_tail(tp, dbp);
        /*
         * Initialize the block leaf area.  We compact out stale entries.
         */
-        lep = XFS_DIR2_BLOCK_LEAF_P(btp);
+        lep = xfs_dir2_block_leaf_p(btp);
        for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
                if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
                        continue;
@@ -1067,7 +1067,7 @@ xfs_dir2_sf_to_block(
        ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
        ASSERT(dp->i_df.if_u1.if_data != NULL);
        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-        ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
        /*
         * Copy the directory into the stack buffer.
         * Then pitch the incore inode data so we can make extents.
@@ -1119,10 +1119,10 @@ xfs_dir2_sf_to_block(
        /*
         * Fill in the tail.
         */
-        btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, block);
        btp->count = cpu_to_be32(sfp->hdr.count + 2);   /* ., .. */
        btp->stale = 0;
-        blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+        blp = xfs_dir2_block_leaf_p(btp);
        endoffset = (uint)((char *)blp - (char *)block);
        /*
         * Remove the freespace, we'll manage it.
@@ -1138,25 +1138,25 @@ xfs_dir2_sf_to_block(
        dep->inumber = cpu_to_be64(dp->i_ino);
        dep->namelen = 1;
        dep->name[0] = '.';
-        tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+        tagp = xfs_dir2_data_entry_tag_p(dep);
        *tagp = cpu_to_be16((char *)dep - (char *)block);
        xfs_dir2_data_log_entry(tp, bp, dep);
        blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
-        blp[0].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+        blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
                                (char *)dep - (char *)block));
        /*
         * Create entry for ..
         */
        dep = (xfs_dir2_data_entry_t *)
                ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
-        dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent));
+        dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
        dep->namelen = 2;
        dep->name[0] = dep->name[1] = '.';
-        tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+        tagp = xfs_dir2_data_entry_tag_p(dep);
        *tagp = cpu_to_be16((char *)dep - (char *)block);
        xfs_dir2_data_log_entry(tp, bp, dep);
        blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
-        blp[1].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+        blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
                                (char *)dep - (char *)block));
        offset = XFS_DIR2_DATA_FIRST_OFFSET;
        /*
@@ -1165,7 +1165,7 @@ xfs_dir2_sf_to_block(
        if ((i = 0) == sfp->hdr.count)
                sfep = NULL;
        else
-                sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+                sfep = xfs_dir2_sf_firstentry(sfp);
        /*
         * Need to preserve the existing offset values in the sf directory.
         * Insert holes (unused entries) where necessary.
@@ -1177,7 +1177,7 @@ xfs_dir2_sf_to_block(
                if (sfep == NULL)
                        newoffset = endoffset;
                else
-                        newoffset = XFS_DIR2_SF_GET_OFFSET(sfep);
+                        newoffset = xfs_dir2_sf_get_offset(sfep);
                /*
                 * There should be a hole here, make one.
                 */
@@ -1186,7 +1186,7 @@ xfs_dir2_sf_to_block(
                              ((char *)block + offset);
                        dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                        dup->length = cpu_to_be16(newoffset - offset);
-                        *XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16(
+                        *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
                                ((char *)dup - (char *)block));
                        xfs_dir2_data_log_unused(tp, bp, dup);
                        (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
@@ -1198,22 +1198,22 @@ xfs_dir2_sf_to_block(
                 * Copy a real entry.
                 */
                dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
-                dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp,
+                dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp,
-                                XFS_DIR2_SF_INUMBERP(sfep)));
+                                xfs_dir2_sf_inumberp(sfep)));
                dep->namelen = sfep->namelen;
                memcpy(dep->name, sfep->name, dep->namelen);
-                tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+                tagp = xfs_dir2_data_entry_tag_p(dep);
                *tagp = cpu_to_be16((char *)dep - (char *)block);
                xfs_dir2_data_log_entry(tp, bp, dep);
                blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname(
                                        (char *)sfep->name, sfep->namelen));
-                blp[2 + i].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+                blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
                                                 (char *)dep - (char *)block));
                offset = (int)((char *)(tagp + 1) - (char *)block);
                if (++i == sfp->hdr.count)
                        sfep = NULL;
                else
-                        sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+                        sfep = xfs_dir2_sf_nextentry(sfp, sfep);
        }
        /* Done with the temporary buffer */
        kmem_free(buf, buf_len);
diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h
index 6722effd0b20..e7c2606161e9 100644
--- a/fs/xfs/xfs_dir2_block.h
+++ b/fs/xfs/xfs_dir2_block.h
@@ -60,7 +60,6 @@ typedef struct xfs_dir2_block {
 /*
 * Pointer to the leaf header embedded in a data block (1-block format)
 */
-#define XFS_DIR2_BLOCK_TAIL_P(mp,block) xfs_dir2_block_tail_p(mp,block)
 static inline xfs_dir2_block_tail_t *
 xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
 {
@@ -71,7 +70,6 @@ xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
 /*
 * Pointer to the leaf entries embedded in a data block (1-block format)
 */
-#define XFS_DIR2_BLOCK_LEAF_P(btp)      xfs_dir2_block_leaf_p(btp)
 static inline struct xfs_dir2_leaf_entry *
 xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
 {
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index c211c37ef67c..7ebe295bd6d3 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -72,8 +72,8 @@ xfs_dir2_data_check(
        bf = d->hdr.bestfree;
        p = (char *)d->u;
        if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-                btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
+                btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
-                lep = XFS_DIR2_BLOCK_LEAF_P(btp);
+                lep = xfs_dir2_block_leaf_p(btp);
                endp = (char *)lep;
        } else
                endp = (char *)d + mp->m_dirblksize;
@@ -107,7 +107,7 @@ xfs_dir2_data_check(
                 */
                if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
                        ASSERT(lastfree == 0);
-                        ASSERT(be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)) ==
+                        ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
                               (char *)dup - (char *)d);
                        dfp = xfs_dir2_data_freefind(d, dup);
                        if (dfp) {
@@ -131,12 +131,12 @@ xfs_dir2_data_check(
                dep = (xfs_dir2_data_entry_t *)p;
                ASSERT(dep->namelen != 0);
                ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
-                ASSERT(be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)) ==
+                ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
                       (char *)dep - (char *)d);
                count++;
                lastfree = 0;
                if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-                        addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+                        addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
                                (xfs_dir2_data_aoff_t)
                                ((char *)dep - (char *)d));
                        hash = xfs_da_hashname((char *)dep->name, dep->namelen);
@@ -147,7 +147,7 @@ xfs_dir2_data_check(
                        }
                        ASSERT(i < be32_to_cpu(btp->count));
                }
-                p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+                p += xfs_dir2_data_entsize(dep->namelen);
        }
        /*
         * Need to have seen all the entries and all the bestfree slots.
@@ -346,8 +346,8 @@ xfs_dir2_data_freescan(
         */
        p = (char *)d->u;
        if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-                btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
+                btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
-                endp = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
+                endp = (char *)xfs_dir2_block_leaf_p(btp);
        } else
                endp = (char *)d + mp->m_dirblksize;
        /*
@@ -360,7 +360,7 @@ xfs_dir2_data_freescan(
                 */
                if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
                        ASSERT((char *)dup - (char *)d ==
-                               be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)));
+                               be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
                        xfs_dir2_data_freeinsert(d, dup, loghead);
                        p += be16_to_cpu(dup->length);
                }
@@ -370,8 +370,8 @@ xfs_dir2_data_freescan(
                else {
                        dep = (xfs_dir2_data_entry_t *)p;
                        ASSERT((char *)dep - (char *)d ==
-                               be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)));
+                               be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)));
-                        p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+                        p += xfs_dir2_data_entsize(dep->namelen);
                }
        }
 }
@@ -402,7 +402,7 @@ xfs_dir2_data_init(
        /*
         * Get the buffer set up for the block.
         */
-        error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, blkno), -1, &bp,
+        error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp,
                XFS_DATA_FORK);
        if (error) {
                return error;
@@ -427,7 +427,7 @@ xfs_dir2_data_init(
        t=mp->m_dirblksize - (uint)sizeof(d->hdr);
        d->hdr.bestfree[0].length = cpu_to_be16(t);
        dup->length = cpu_to_be16(t);
-        *XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16((char *)dup - (char *)d);
+        *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d);
        /*
         * Log it and return it.
         */
@@ -452,7 +452,7 @@ xfs_dir2_data_log_entry(
        ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
               be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
        xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
-                (uint)((char *)(XFS_DIR2_DATA_ENTRY_TAG_P(dep) + 1) -
+                (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
                       (char *)d - 1));
 }
@@ -497,8 +497,8 @@ xfs_dir2_data_log_unused(
         * Log the end (tag) of the unused entry.
         */
        xfs_da_log_buf(tp, bp,
-                (uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P(dup) - (char *)d),
+                (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d),
-                (uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P(dup) - (char *)d +
+                (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d +
                       sizeof(xfs_dir2_data_off_t) - 1));
 }
@@ -535,8 +535,8 @@ xfs_dir2_data_make_free(
                xfs_dir2_block_tail_t   *btp;   /* block tail */
                ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-                btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
+                btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
-                endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
+                endptr = (char *)xfs_dir2_block_leaf_p(btp);
        }
        /*
         * If this isn't the start of the block, then back up to
@@ -587,7 +587,7 @@ xfs_dir2_data_make_free(
                 * Fix up the new big freespace.
                 */
                be16_add(&prevdup->length, len + be16_to_cpu(postdup->length));
-                *XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) =
+                *xfs_dir2_data_unused_tag_p(prevdup) =
                        cpu_to_be16((char *)prevdup - (char *)d);
                xfs_dir2_data_log_unused(tp, bp, prevdup);
                if (!needscan) {
@@ -621,7 +621,7 @@ xfs_dir2_data_make_free(
        else if (prevdup) {
                dfp = xfs_dir2_data_freefind(d, prevdup);
                be16_add(&prevdup->length, len);
-                *XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) =
+                *xfs_dir2_data_unused_tag_p(prevdup) =
                        cpu_to_be16((char *)prevdup - (char *)d);
                xfs_dir2_data_log_unused(tp, bp, prevdup);
                /*
@@ -649,7 +649,7 @@ xfs_dir2_data_make_free(
                newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
                newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
-                *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+                *xfs_dir2_data_unused_tag_p(newdup) =
                        cpu_to_be16((char *)newdup - (char *)d);
                xfs_dir2_data_log_unused(tp, bp, newdup);
                /*
@@ -676,7 +676,7 @@ xfs_dir2_data_make_free(
                newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
                newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                newdup->length = cpu_to_be16(len);
-                *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+                *xfs_dir2_data_unused_tag_p(newdup) =
                        cpu_to_be16((char *)newdup - (char *)d);
                xfs_dir2_data_log_unused(tp, bp, newdup);
                (void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
@@ -712,7 +712,7 @@ xfs_dir2_data_use_free(
        ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
        ASSERT(offset >= (char *)dup - (char *)d);
        ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
-        ASSERT((char *)dup - (char *)d == be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)));
+        ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
        /*
         * Look up the entry in the bestfree table.
         */
@@ -745,7 +745,7 @@ xfs_dir2_data_use_free(
                newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
                newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                newdup->length = cpu_to_be16(oldlen - len);
-                *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+                *xfs_dir2_data_unused_tag_p(newdup) =
                        cpu_to_be16((char *)newdup - (char *)d);
                xfs_dir2_data_log_unused(tp, bp, newdup);
                /*
@@ -772,7 +772,7 @@ xfs_dir2_data_use_free(
        else if (matchback) {
                newdup = dup;
                newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
-                *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+                *xfs_dir2_data_unused_tag_p(newdup) =
                        cpu_to_be16((char *)newdup - (char *)d);
                xfs_dir2_data_log_unused(tp, bp, newdup);
                /*
@@ -799,13 +799,13 @@ xfs_dir2_data_use_free(
        else {
                newdup = dup;
                newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
-                *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+                *xfs_dir2_data_unused_tag_p(newdup) =
                        cpu_to_be16((char *)newdup - (char *)d);
                xfs_dir2_data_log_unused(tp, bp, newdup);
                newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
                newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
-                *XFS_DIR2_DATA_UNUSED_TAG_P(newdup2) =
+                *xfs_dir2_data_unused_tag_p(newdup2) =
                        cpu_to_be16((char *)newdup2 - (char *)d);
                xfs_dir2_data_log_unused(tp, bp, newdup2);
                /*
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index c94c9099cfb1..b816e0252739 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -44,7 +44,7 @@ struct xfs_trans;
 #define XFS_DIR2_DATA_SPACE     0
 #define XFS_DIR2_DATA_OFFSET    (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
 #define XFS_DIR2_DATA_FIRSTDB(mp)       \
-        XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATA_OFFSET)
+        xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
 /*
 * Offsets of . and .. in data space (always block 0)
@@ -52,9 +52,9 @@ struct xfs_trans;
 #define XFS_DIR2_DATA_DOT_OFFSET        \
        ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t))
 #define XFS_DIR2_DATA_DOTDOT_OFFSET     \
-        (XFS_DIR2_DATA_DOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(1))
+        (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
 #define XFS_DIR2_DATA_FIRST_OFFSET              \
-        (XFS_DIR2_DATA_DOTDOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(2))
+        (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
 /*
 * Structures.
@@ -123,7 +123,6 @@ typedef struct xfs_dir2_data {
 /*
 * Size of a data entry.
 */
-#define XFS_DIR2_DATA_ENTSIZE(n)        xfs_dir2_data_entsize(n)
 static inline int xfs_dir2_data_entsize(int n)
 {
        return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
@@ -133,19 +132,16 @@ static inline int xfs_dir2_data_entsize(int n)
 /*
 * Pointer to an entry's tag word.
 */
-#define XFS_DIR2_DATA_ENTRY_TAG_P(dep)  xfs_dir2_data_entry_tag_p(dep)
 static inline __be16 *
 xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
 {
        return (__be16 *)((char *)dep +
-                XFS_DIR2_DATA_ENTSIZE(dep->namelen) - sizeof(__be16));
+                xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
 }
 /*
 * Pointer to a freespace's tag word.
 */
-#define XFS_DIR2_DATA_UNUSED_TAG_P(dup) \
-        xfs_dir2_data_unused_tag_p(dup)
 static inline __be16 *
 xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
 {
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index db14ea71459f..1b73c9ad646a 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -92,7 +92,7 @@ xfs_dir2_block_to_leaf(
        if ((error = xfs_da_grow_inode(args, &blkno))) {
                return error;
        }
-        ldb = XFS_DIR2_DA_TO_DB(mp, blkno);
+        ldb = xfs_dir2_da_to_db(mp, blkno);
        ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
        /*
         * Initialize the leaf block, get a buffer for it.
@@ -104,8 +104,8 @@ xfs_dir2_block_to_leaf(
        leaf = lbp->data;
        block = dbp->data;
        xfs_dir2_data_check(dp, dbp);
-        btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, block);
-        blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+        blp = xfs_dir2_block_leaf_p(btp);
        /*
         * Set the counts in the leaf header.
         */
@@ -137,9 +137,9 @@ xfs_dir2_block_to_leaf(
        /*
         * Set up leaf tail and bests table.
         */
-        ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
        ltp->bestcount = cpu_to_be32(1);
-        bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
+        bestsp = xfs_dir2_leaf_bests_p(ltp);
        bestsp[0] =  block->hdr.bestfree[0].length;
        /*
         * Log the data header and leaf bests table.
@@ -209,9 +209,9 @@ xfs_dir2_leaf_addname(
         */
        index = xfs_dir2_leaf_search_hash(args, lbp);
        leaf = lbp->data;
-        ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
-        bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
+        bestsp = xfs_dir2_leaf_bests_p(ltp);
-        length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+        length = xfs_dir2_data_entsize(args->namelen);
        /*
         * See if there are any entries with the same hash value
         * and space in their block for the new entry.
@@ -223,7 +223,7 @@ xfs_dir2_leaf_addname(
             index++, lep++) {
                if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
                        continue;
-                i = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+                i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
                ASSERT(i < be32_to_cpu(ltp->bestcount));
                ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
                if (be16_to_cpu(bestsp[i]) >= length) {
@@ -378,7 +378,7 @@ xfs_dir2_leaf_addname(
         */
        else {
                if ((error =
-                    xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, use_block),
+                    xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block),
                            -1, &dbp, XFS_DATA_FORK))) {
                        xfs_da_brelse(tp, lbp);
                        return error;
@@ -407,7 +407,7 @@ xfs_dir2_leaf_addname(
        dep->inumber = cpu_to_be64(args->inumber);
        dep->namelen = args->namelen;
        memcpy(dep->name, args->name, dep->namelen);
-        tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+        tagp = xfs_dir2_data_entry_tag_p(dep);
        *tagp = cpu_to_be16((char *)dep - (char *)data);
        /*
         * Need to scan fix up the bestfree table.
@@ -529,7 +529,7 @@ xfs_dir2_leaf_addname(
         * Fill in the new leaf entry.
         */
        lep->hashval = cpu_to_be32(args->hashval);
-        lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block,
+        lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block,
                                be16_to_cpu(*tagp)));
        /*
         * Log the leaf fields and give up the buffers.
@@ -567,13 +567,13 @@ xfs_dir2_leaf_check(
         * Should factor in the size of the bests table as well.
         * We can deduce a value for that from di_size.
         */
-        ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
+        ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
-        ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
        /*
         * Leaves and bests don't overlap.
         */
        ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
-               (char *)XFS_DIR2_LEAF_BESTS_P(ltp));
+               (char *)xfs_dir2_leaf_bests_p(ltp));
        /*
         * Check hash value order, count stale entries.
         */
@@ -815,12 +815,12 @@ xfs_dir2_leaf_getdents(
         * Inside the loop we keep the main offset value as a byte offset
         * in the directory file.
         */
-        curoff = XFS_DIR2_DATAPTR_TO_BYTE(mp, uio->uio_offset);
+        curoff = xfs_dir2_dataptr_to_byte(mp, uio->uio_offset);
        /*
         * Force this conversion through db so we truncate the offset
         * down to get the start of the data block.
         */
-        map_off = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, curoff));
+        map_off = xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, curoff));
        /*
         * Loop over directory entries until we reach the end offset.
         * Get more blocks and readahead as necessary.
@@ -870,7 +870,7 @@ xfs_dir2_leaf_getdents(
                         */
                        if (1 + ra_want > map_blocks &&
                            map_off <
-                            XFS_DIR2_BYTE_TO_DA(mp, XFS_DIR2_LEAF_OFFSET)) {
+                            xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
                                /*
                                 * Get more bmaps, fill in after the ones
                                 * we already have in the table.
@@ -878,7 +878,7 @@ xfs_dir2_leaf_getdents(
                                nmap = map_size - map_valid;
                                error = xfs_bmapi(tp, dp,
                                        map_off,
-                                        XFS_DIR2_BYTE_TO_DA(mp,
+                                        xfs_dir2_byte_to_da(mp,
                                                XFS_DIR2_LEAF_OFFSET) - map_off,
                                        XFS_BMAPI_METADATA, NULL, 0,
                                        &map[map_valid], &nmap, NULL, NULL);
@@ -903,7 +903,7 @@ xfs_dir2_leaf_getdents(
                                        map[map_valid + nmap - 1].br_blockcount;
                                else
                                        map_off =
-                                                XFS_DIR2_BYTE_TO_DA(mp,
+                                                xfs_dir2_byte_to_da(mp,
                                                        XFS_DIR2_LEAF_OFFSET);
                                /*
                                 * Look for holes in the mapping, and
@@ -931,14 +931,14 @@ xfs_dir2_leaf_getdents(
                         * No valid mappings, so no more data blocks.
                         */
                        if (!map_valid) {
-                                curoff = XFS_DIR2_DA_TO_BYTE(mp, map_off);
+                                curoff = xfs_dir2_da_to_byte(mp, map_off);
                                break;
                        }
                        /*
                         * Read the directory block starting at the first
                         * mapping.
                         */
-                        curdb = XFS_DIR2_DA_TO_DB(mp, map->br_startoff);
+                        curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
                        error = xfs_da_read_buf(tp, dp, map->br_startoff,
                                map->br_blockcount >= mp->m_dirblkfsbs ?
                                    XFS_FSB_TO_DADDR(mp, map->br_startblock) :
@@ -1014,7 +1014,7 @@ xfs_dir2_leaf_getdents(
                        /*
                         * Having done a read, we need to set a new offset.
                         */
-                        newoff = XFS_DIR2_DB_OFF_TO_BYTE(mp, curdb, 0);
+                        newoff = xfs_dir2_db_off_to_byte(mp, curdb, 0);
                        /*
                         * Start of the current block.
                         */
@@ -1024,7 +1024,7 @@ xfs_dir2_leaf_getdents(
                         * Make sure we're in the right block.
                         */
                        else if (curoff > newoff)
-                                ASSERT(XFS_DIR2_BYTE_TO_DB(mp, curoff) ==
+                                ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
                                       curdb);
                        data = bp->data;
                        xfs_dir2_data_check(dp, bp);
@@ -1032,7 +1032,7 @@ xfs_dir2_leaf_getdents(
                         * Find our position in the block.
                         */
                        ptr = (char *)&data->u;
-                        byteoff = XFS_DIR2_BYTE_TO_OFF(mp, curoff);
+                        byteoff = xfs_dir2_byte_to_off(mp, curoff);
                        /*
                         * Skip past the header.
                         */
@@ -1054,15 +1054,15 @@ xfs_dir2_leaf_getdents(
                                        }
                                        dep = (xfs_dir2_data_entry_t *)ptr;
                                        length =
-                                           XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+                                           xfs_dir2_data_entsize(dep->namelen);
                                        ptr += length;
                                }
                                /*
                                 * Now set our real offset.
                                 */
                                curoff =
-                                        XFS_DIR2_DB_OFF_TO_BYTE(mp,
+                                        xfs_dir2_db_off_to_byte(mp,
-                                            XFS_DIR2_BYTE_TO_DB(mp, curoff),
+                                            xfs_dir2_byte_to_db(mp, curoff),
                                            (char *)ptr - (char *)data);
                                if (ptr >= (char *)data + mp->m_dirblksize) {
                                        continue;
@@ -1091,9 +1091,9 @@ xfs_dir2_leaf_getdents(
                p->namelen = dep->namelen;
-                length = XFS_DIR2_DATA_ENTSIZE(p->namelen);
+                length = xfs_dir2_data_entsize(p->namelen);
-                p->cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length);
+                p->cook = xfs_dir2_byte_to_dataptr(mp, curoff + length);
                p->ino = be64_to_cpu(dep->inumber);
 #if XFS_BIG_INUMS
@@ -1121,10 +1121,10 @@ xfs_dir2_leaf_getdents(
         * All done.  Set output offset value to current offset.
         */
        *eofp = eof;
-        if (curoff > XFS_DIR2_DATAPTR_TO_BYTE(mp, XFS_DIR2_MAX_DATAPTR))
+        if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
                uio->uio_offset = XFS_DIR2_MAX_DATAPTR;
        else
-                uio->uio_offset = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff);
+                uio->uio_offset = xfs_dir2_byte_to_dataptr(mp, curoff);
        kmem_free(map, map_size * sizeof(*map));
        kmem_free(p, sizeof(*p));
        if (bp)
@@ -1159,7 +1159,7 @@ xfs_dir2_leaf_init(
        /*
         * Get the buffer for the block.
         */
-        error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, bno), -1, &bp,
+        error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
                XFS_DATA_FORK);
        if (error) {
                return error;
@@ -1181,7 +1181,7 @@ xfs_dir2_leaf_init(
         * the block.
         */
        if (magic == XFS_DIR2_LEAF1_MAGIC) {
-                ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+                ltp = xfs_dir2_leaf_tail_p(mp, leaf);
                ltp->bestcount = 0;
                xfs_dir2_leaf_log_tail(tp, bp);
        }
@@ -1206,9 +1206,9 @@ xfs_dir2_leaf_log_bests(
        leaf = bp->data;
        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
-        ltp = XFS_DIR2_LEAF_TAIL_P(tp->t_mountp, leaf);
+        ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
-        firstb = XFS_DIR2_LEAF_BESTS_P(ltp) + first;
+        firstb = xfs_dir2_leaf_bests_p(ltp) + first;
-        lastb = XFS_DIR2_LEAF_BESTS_P(ltp) + last;
+        lastb = xfs_dir2_leaf_bests_p(ltp) + last;
        xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
                (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
 }
@@ -1268,7 +1268,7 @@ xfs_dir2_leaf_log_tail(
        mp = tp->t_mountp;
        leaf = bp->data;
        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
-        ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
        xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
                (uint)(mp->m_dirblksize - 1));
 }
@@ -1312,7 +1312,7 @@ xfs_dir2_leaf_lookup(
         */
        dep = (xfs_dir2_data_entry_t *)
              ((char *)dbp->data +
-               XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address)));
+               xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
        /*
         * Return the found inode number.
         */
@@ -1381,7 +1381,7 @@ xfs_dir2_leaf_lookup_int(
                /*
                 * Get the new data block number.
                 */
-                newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+                newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
                /*
                 * If it's not the same as the old data block number,
                 * need to pitch the old one and read the new one.
@@ -1391,7 +1391,7 @@ xfs_dir2_leaf_lookup_int(
                                xfs_da_brelse(tp, dbp);
                        if ((error =
                            xfs_da_read_buf(tp, dp,
-                                    XFS_DIR2_DB_TO_DA(mp, newdb), -1, &dbp,
+                                    xfs_dir2_db_to_da(mp, newdb), -1, &dbp,
                                    XFS_DATA_FORK))) {
                                xfs_da_brelse(tp, lbp);
                                return error;
@@ -1404,7 +1404,7 @@ xfs_dir2_leaf_lookup_int(
                 */
                dep = (xfs_dir2_data_entry_t *)
                      ((char *)dbp->data +
-                       XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
+                       xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
                /*
                 * If it matches then return it.
                 */
@@ -1469,20 +1469,20 @@ xfs_dir2_leaf_removename(
         * Point to the leaf entry, use that to point to the data entry.
         */
        lep = &leaf->ents[index];
-        db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+        db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
        dep = (xfs_dir2_data_entry_t *)
-              ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
+              ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
        needscan = needlog = 0;
        oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
-        ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
-        bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
+        bestsp = xfs_dir2_leaf_bests_p(ltp);
        ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
        /*
         * Mark the former data entry unused.
         */
        xfs_dir2_data_make_free(tp, dbp,
                (xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
-                XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+                xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
        /*
         * We just mark the leaf entry stale by putting a null in it.
         */
@@ -1602,7 +1602,7 @@ xfs_dir2_leaf_replace(
         */
        dep = (xfs_dir2_data_entry_t *)
              ((char *)dbp->data +
-               XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address)));
+               xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
        ASSERT(args->inumber != be64_to_cpu(dep->inumber));
        /*
         * Put the new inode number in, log it.
@@ -1698,7 +1698,7 @@ xfs_dir2_leaf_trim_data(
        /*
         * Read the offending data block.  We need its buffer.
         */
-        if ((error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, db), -1, &dbp,
+        if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp,
                        XFS_DATA_FORK))) {
                return error;
        }
@@ -1712,7 +1712,7 @@ xfs_dir2_leaf_trim_data(
         */
        leaf = lbp->data;
-        ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
        ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
               mp->m_dirblksize - (uint)sizeof(data->hdr));
        ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
@@ -1727,7 +1727,7 @@ xfs_dir2_leaf_trim_data(
        /*
         * Eliminate the last bests entry from the table.
         */
-        bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
+        bestsp = xfs_dir2_leaf_bests_p(ltp);
        be32_add(&ltp->bestcount, -1);
        memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
        xfs_dir2_leaf_log_tail(tp, lbp);
@@ -1838,12 +1838,12 @@ xfs_dir2_node_to_leaf(
        /*
         * Set up the leaf tail from the freespace block.
         */
-        ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
        ltp->bestcount = free->hdr.nvalid;
        /*
         * Set up the leaf bests table.
         */
-        memcpy(XFS_DIR2_LEAF_BESTS_P(ltp), free->bests,
+        memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
                be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0]));
        xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
        xfs_dir2_leaf_log_tail(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index f57ca1162412..70c97f3f815e 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -32,7 +32,7 @@ struct xfs_trans;
 #define XFS_DIR2_LEAF_SPACE     1
 #define XFS_DIR2_LEAF_OFFSET    (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
 #define XFS_DIR2_LEAF_FIRSTDB(mp)       \
-        XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_LEAF_OFFSET)
+        xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
 /*
 * Offset in data space of a data entry.
@@ -82,7 +82,6 @@ typedef struct xfs_dir2_leaf {
 * DB blocks here are logical directory block numbers, not filesystem blocks.
 */
-#define XFS_DIR2_MAX_LEAF_ENTS(mp)      xfs_dir2_max_leaf_ents(mp)
 static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
 {
        return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) /
@@ -92,7 +91,6 @@ static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
 /*
 * Get address of the bestcount field in the single-leaf block.
 */
-#define XFS_DIR2_LEAF_TAIL_P(mp,lp)     xfs_dir2_leaf_tail_p(mp, lp)
 static inline xfs_dir2_leaf_tail_t *
 xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
 {
@@ -104,7 +102,6 @@ xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
 /*
 * Get address of the bests array in the single-leaf block.
 */
-#define XFS_DIR2_LEAF_BESTS_P(ltp)      xfs_dir2_leaf_bests_p(ltp)
 static inline __be16 *
 xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
 {
@@ -114,7 +111,6 @@ xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
 /*
 * Convert dataptr to byte in file space
 */
-#define XFS_DIR2_DATAPTR_TO_BYTE(mp,dp) xfs_dir2_dataptr_to_byte(mp, dp)
 static inline xfs_dir2_off_t
 xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 {
@@ -124,7 +120,6 @@ xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 /*
 * Convert byte in file space to dataptr.  It had better be aligned.
 */
-#define XFS_DIR2_BYTE_TO_DATAPTR(mp,by) xfs_dir2_byte_to_dataptr(mp,by)
 static inline xfs_dir2_dataptr_t
 xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
 {
@@ -134,7 +129,6 @@ xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
 /*
 * Convert byte in space to (DB) block
 */
-#define XFS_DIR2_BYTE_TO_DB(mp,by)      xfs_dir2_byte_to_db(mp, by)
 static inline xfs_dir2_db_t
 xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
 {
@@ -145,17 +139,15 @@ xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
 /*
 * Convert dataptr to a block number
 */
-#define XFS_DIR2_DATAPTR_TO_DB(mp,dp)   xfs_dir2_dataptr_to_db(mp, dp)
 static inline xfs_dir2_db_t
 xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 {
-        return XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp));
+        return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
 }
 /*
 * Convert byte in space to offset in a block
 */
-#define XFS_DIR2_BYTE_TO_OFF(mp,by)     xfs_dir2_byte_to_off(mp, by)
 static inline xfs_dir2_data_aoff_t
 xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
 {
@@ -166,18 +158,15 @@ xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
 /*
 * Convert dataptr to a byte offset in a block
 */
-#define XFS_DIR2_DATAPTR_TO_OFF(mp,dp)  xfs_dir2_dataptr_to_off(mp, dp)
 static inline xfs_dir2_data_aoff_t
 xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 {
-        return XFS_DIR2_BYTE_TO_OFF(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp));
+        return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
 }
 /*
 * Convert block and offset to byte in space
 */
-#define XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o)        \
-        xfs_dir2_db_off_to_byte(mp, db, o)
 static inline xfs_dir2_off_t
 xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
                        xfs_dir2_data_aoff_t o)
@@ -189,7 +178,6 @@ xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
 /*
 * Convert block (DB) to block (dablk)
 */
-#define XFS_DIR2_DB_TO_DA(mp,db)        xfs_dir2_db_to_da(mp, db)
 static inline xfs_dablk_t
 xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
 {
@@ -199,29 +187,25 @@ xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
 /*
 * Convert byte in space to (DA) block
 */
-#define XFS_DIR2_BYTE_TO_DA(mp,by)      xfs_dir2_byte_to_da(mp, by)
 static inline xfs_dablk_t
 xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
 {
-        return XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, by));
+        return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
 }
 /*
 * Convert block and offset to dataptr
 */
-#define XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o)     \
-        xfs_dir2_db_off_to_dataptr(mp, db, o)
 static inline xfs_dir2_dataptr_t
 xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
                           xfs_dir2_data_aoff_t o)
 {
-        return XFS_DIR2_BYTE_TO_DATAPTR(mp, XFS_DIR2_DB_OFF_TO_BYTE(mp, db, o));
+        return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
 }
 /*
 * Convert block (dablk) to block (DB)
 */
-#define XFS_DIR2_DA_TO_DB(mp,da)        xfs_dir2_da_to_db(mp, da)
 static inline xfs_dir2_db_t
 xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
 {
@@ -231,11 +215,10 @@ xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
 /*
 * Convert block (dablk) to byte offset in space
 */
-#define XFS_DIR2_DA_TO_BYTE(mp,da)      xfs_dir2_da_to_byte(mp, da)
 static inline xfs_dir2_off_t
 xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
 {
-        return XFS_DIR2_DB_OFF_TO_BYTE(mp, XFS_DIR2_DA_TO_DB(mp, da), 0);
+        return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
 }
 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index d083c3819934..91c61d9632c8 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -136,14 +136,14 @@ xfs_dir2_leaf_to_node(
        /*
         * Get the buffer for the new freespace block.
         */
-        if ((error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb), -1, &fbp,
+        if ((error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp,
                        XFS_DATA_FORK))) {
                return error;
        }
        ASSERT(fbp != NULL);
        free = fbp->data;
        leaf = lbp->data;
-        ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
        /*
         * Initialize the freespace block header.
         */
@@ -155,7 +155,7 @@ xfs_dir2_leaf_to_node(
         * Copy freespace entries from the leaf block to the new block.
         * Count active entries.
         */
-        for (i = n = 0, from = XFS_DIR2_LEAF_BESTS_P(ltp), to = free->bests;
+        for (i = n = 0, from = xfs_dir2_leaf_bests_p(ltp), to = free->bests;
             i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
                if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
                        n++;
@@ -215,7 +215,7 @@ xfs_dir2_leafn_add(
         * a compact.
         */
-        if (be16_to_cpu(leaf->hdr.count) == XFS_DIR2_MAX_LEAF_ENTS(mp)) {
+        if (be16_to_cpu(leaf->hdr.count) == xfs_dir2_max_leaf_ents(mp)) {
                if (!leaf->hdr.stale)
                        return XFS_ERROR(ENOSPC);
                compact = be16_to_cpu(leaf->hdr.stale) > 1;
@@ -327,7 +327,7 @@ xfs_dir2_leafn_add(
         * Insert the new entry, log everything.
         */
        lep->hashval = cpu_to_be32(args->hashval);
-        lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp,
+        lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
                                args->blkno, args->index));
        xfs_dir2_leaf_log_header(tp, bp);
        xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh);
@@ -352,7 +352,7 @@ xfs_dir2_leafn_check(
        leaf = bp->data;
        mp = dp->i_mount;
        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
-        ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
+        ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
        for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
                if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
                        ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
@@ -440,7 +440,7 @@ xfs_dir2_leafn_lookup_int(
        if (args->addname) {
                curfdb = curbp ? state->extrablk.blkno : -1;
                curdb = -1;
-                length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+                length = xfs_dir2_data_entsize(args->namelen);
                if ((free = (curbp ? curbp->data : NULL)))
                        ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
        }
@@ -465,7 +465,7 @@ xfs_dir2_leafn_lookup_int(
                /*
                 * Pull the data block number from the entry.
                 */
-                newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+                newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
                /*
                 * For addname, we're looking for a place to put the new entry.
                 * We want to use a data block with an entry of equal
@@ -482,7 +482,7 @@ xfs_dir2_leafn_lookup_int(
                                 * Convert the data block to the free block
                                 * holding its freespace information.
                                 */
-                                newfdb = XFS_DIR2_DB_TO_FDB(mp, newdb);
+                                newfdb = xfs_dir2_db_to_fdb(mp, newdb);
                                /*
                                 * If it's not the one we have in hand,
                                 * read it in.
@@ -497,7 +497,7 @@ xfs_dir2_leafn_lookup_int(
                                         * Read the free block.
                                         */
                                        if ((error = xfs_da_read_buf(tp, dp,
-                                                        XFS_DIR2_DB_TO_DA(mp,
+                                                        xfs_dir2_db_to_da(mp,
                                                                newfdb),
                                                        -1, &curbp,
                                                        XFS_DATA_FORK))) {
@@ -517,7 +517,7 @@ xfs_dir2_leafn_lookup_int(
                                /*
                                 * Get the index for our entry.
                                 */
-                                fi = XFS_DIR2_DB_TO_FDINDEX(mp, curdb);
+                                fi = xfs_dir2_db_to_fdindex(mp, curdb);
                                /*
                                 * If it has room, return it.
                                 */
@@ -561,7 +561,7 @@ xfs_dir2_leafn_lookup_int(
                                 */
                                if ((error =
                                    xfs_da_read_buf(tp, dp,
-                                            XFS_DIR2_DB_TO_DA(mp, newdb), -1,
+                                            xfs_dir2_db_to_da(mp, newdb), -1,
                                            &curbp, XFS_DATA_FORK))) {
                                        return error;
                                }
@@ -573,7 +573,7 @@ xfs_dir2_leafn_lookup_int(
                         */
                        dep = (xfs_dir2_data_entry_t *)
                              ((char *)curbp->data +
-                               XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
+                               xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
                        /*
                         * Compare the entry, return it if it matches.
                         */
@@ -876,9 +876,9 @@ xfs_dir2_leafn_remove(
        /*
         * Extract the data block and offset from the entry.
         */
-        db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+        db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
        ASSERT(dblk->blkno == db);
-        off = XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address));
+        off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address));
        ASSERT(dblk->index == off);
        /*
         * Kill the leaf entry by marking it stale.
@@ -898,7 +898,7 @@ xfs_dir2_leafn_remove(
        longest = be16_to_cpu(data->hdr.bestfree[0].length);
        needlog = needscan = 0;
        xfs_dir2_data_make_free(tp, dbp, off,
-                XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+                xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
        /*
         * Rescan the data block freespaces for bestfree.
         * Log the data block header if needed.
@@ -924,8 +924,8 @@ xfs_dir2_leafn_remove(
                 * Convert the data block number to a free block,
                 * read in the free block.
                 */
-                fdb = XFS_DIR2_DB_TO_FDB(mp, db);
+                fdb = xfs_dir2_db_to_fdb(mp, db);
-                if ((error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb),
+                if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb),
                                -1, &fbp, XFS_DATA_FORK))) {
                        return error;
                }
@@ -937,7 +937,7 @@ xfs_dir2_leafn_remove(
                /*
                 * Calculate which entry we need to fix.
                 */
-                findex = XFS_DIR2_DB_TO_FDINDEX(mp, db);
+                findex = xfs_dir2_db_to_fdindex(mp, db);
                longest = be16_to_cpu(data->hdr.bestfree[0].length);
                /*
                 * If the data block is now empty we can get rid of it
@@ -1073,7 +1073,7 @@ xfs_dir2_leafn_split(
        /*
         * Initialize the new leaf block.
         */
-        error = xfs_dir2_leaf_init(args, XFS_DIR2_DA_TO_DB(mp, blkno),
+        error = xfs_dir2_leaf_init(args, xfs_dir2_da_to_db(mp, blkno),
                &newblk->bp, XFS_DIR2_LEAFN_MAGIC);
        if (error) {
                return error;
@@ -1385,7 +1385,7 @@ xfs_dir2_node_addname_int(
        dp = args->dp;
        mp = dp->i_mount;
        tp = args->trans;
-        length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+        length = xfs_dir2_data_entsize(args->namelen);
        /*
         * If we came in with a freespace block that means that lookup
         * found an entry with our hash value.  This is the freespace
@@ -1438,7 +1438,7 @@ xfs_dir2_node_addname_int(
                if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK)))
                        return error;
-                lastfbno = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo);
+                lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo);
                fbno = ifbno;
        }
        /*
@@ -1474,7 +1474,7 @@ xfs_dir2_node_addname_int(
                         * to avoid it.
                         */
                        if ((error = xfs_da_read_buf(tp, dp,
-                                        XFS_DIR2_DB_TO_DA(mp, fbno), -2, &fbp,
+                                        xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
                                        XFS_DATA_FORK))) {
                                return error;
                        }
@@ -1550,9 +1550,9 @@ xfs_dir2_node_addname_int(
                 * Get the freespace block corresponding to the data block
                 * that was just allocated.
                 */
-                fbno = XFS_DIR2_DB_TO_FDB(mp, dbno);
+                fbno = xfs_dir2_db_to_fdb(mp, dbno);
                if (unlikely(error = xfs_da_read_buf(tp, dp,
-                                XFS_DIR2_DB_TO_DA(mp, fbno), -2, &fbp,
+                                xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
                                XFS_DATA_FORK))) {
                        xfs_da_buf_done(dbp);
                        return error;
@@ -1567,14 +1567,14 @@ xfs_dir2_node_addname_int(
                                return error;
                        }
-                        if (unlikely(XFS_DIR2_DB_TO_FDB(mp, dbno) != fbno)) {
+                        if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
                                cmn_err(CE_ALERT,
                                        "xfs_dir2_node_addname_int: dir ino "
                                        "%llu needed freesp block %lld for\n"
                                        "  data block %lld, got %lld\n"
                                        "  ifbno %llu lastfbno %d\n",
                                        (unsigned long long)dp->i_ino,
-                                        (long long)XFS_DIR2_DB_TO_FDB(mp, dbno),
+                                        (long long)xfs_dir2_db_to_fdb(mp, dbno),
                                        (long long)dbno, (long long)fbno,
                                        (unsigned long long)ifbno, lastfbno);
                                if (fblk) {
@@ -1598,7 +1598,7 @@ xfs_dir2_node_addname_int(
                         * Get a buffer for the new block.
                         */
                        if ((error = xfs_da_get_buf(tp, dp,
-                                                   XFS_DIR2_DB_TO_DA(mp, fbno),
+                                                   xfs_dir2_db_to_da(mp, fbno),
                                                   -1, &fbp, XFS_DATA_FORK))) {
                                return error;
                        }
@@ -1623,7 +1623,7 @@ xfs_dir2_node_addname_int(
                /*
                 * Set the freespace block index from the data block number.
                 */
-                findex = XFS_DIR2_DB_TO_FDINDEX(mp, dbno);
+                findex = xfs_dir2_db_to_fdindex(mp, dbno);
                /*
                 * If it's after the end of the current entries in the
                 * freespace block, extend that table.
@@ -1669,7 +1669,7 @@ xfs_dir2_node_addname_int(
                 * Read the data block in.
                 */
                if (unlikely(
-                    error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, dbno),
+                    error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno),
                                -1, &dbp, XFS_DATA_FORK))) {
                        if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
                                xfs_da_buf_done(fbp);
@@ -1698,7 +1698,7 @@ xfs_dir2_node_addname_int(
        dep->inumber = cpu_to_be64(args->inumber);
        dep->namelen = args->namelen;
        memcpy(dep->name, args->name, dep->namelen);
-        tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+        tagp = xfs_dir2_data_entry_tag_p(dep);
        *tagp = cpu_to_be16((char *)dep - (char *)data);
        xfs_dir2_data_log_entry(tp, dbp, dep);
        /*
@@ -1904,7 +1904,7 @@ xfs_dir2_node_replace(
                ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
                dep = (xfs_dir2_data_entry_t *)
                      ((char *)data +
-                       XFS_DIR2_DATAPTR_TO_OFF(state->mp, be32_to_cpu(lep->address)));
+                       xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
                ASSERT(inum != be64_to_cpu(dep->inumber));
                /*
                 * Fill in the new inode number and log the entry.
@@ -1980,7 +1980,7 @@ xfs_dir2_node_trim_free(
         * Blow the block away.
         */
        if ((error =
-            xfs_dir2_shrink_inode(args, XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo),
+            xfs_dir2_shrink_inode(args, xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo),
                    bp))) {
                /*
                 * Can't fail with ENOSPC since that only happens with no
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
index c7c870ee7857..dde72db3d695 100644
--- a/fs/xfs/xfs_dir2_node.h
+++ b/fs/xfs/xfs_dir2_node.h
@@ -36,7 +36,7 @@ struct xfs_trans;
 #define XFS_DIR2_FREE_SPACE     2
 #define XFS_DIR2_FREE_OFFSET    (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
 #define XFS_DIR2_FREE_FIRSTDB(mp)       \
-        XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_FREE_OFFSET)
+        xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
 #define XFS_DIR2_FREE_MAGIC     0x58443246      /* XD2F */
@@ -60,7 +60,6 @@ typedef struct xfs_dir2_free {
 /*
 * Convert data space db to the corresponding free db.
 */
-#define XFS_DIR2_DB_TO_FDB(mp,db)       xfs_dir2_db_to_fdb(mp, db)
 static inline xfs_dir2_db_t
 xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
 {
@@ -70,7 +69,6 @@ xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
 /*
 * Convert data space db to the corresponding index in a free db.
 */
-#define XFS_DIR2_DB_TO_FDINDEX(mp,db)   xfs_dir2_db_to_fdindex(mp, db)
 static inline int
 xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
 {
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 0cd77b17bf92..38fc4f22b76d 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -89,8 +89,8 @@ xfs_dir2_block_sfsize(
        mp = dp->i_mount;
        count = i8count = namelen = 0;
-        btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, block);
-        blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+        blp = xfs_dir2_block_leaf_p(btp);
        /*
         * Iterate over the block's data entries by using the leaf pointers.
@@ -102,7 +102,7 @@ xfs_dir2_block_sfsize(
                 * Calculate the pointer to the entry at hand.
                 */
                dep = (xfs_dir2_data_entry_t *)
-                      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr));
+                      ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
                /*
                 * Detect . and .., so we can special-case them.
                 * . is not included in sf directories.
@@ -124,7 +124,7 @@ xfs_dir2_block_sfsize(
                /*
                 * Calculate the new size, see if we should give up yet.
                 */
-                size = XFS_DIR2_SF_HDR_SIZE(i8count) +          /* header */
+                size = xfs_dir2_sf_hdr_size(i8count) +          /* header */
                       count +                                  /* namelen */
                       count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */
                       namelen +                                /* name */
@@ -139,7 +139,7 @@ xfs_dir2_block_sfsize(
         */
        sfhp->count = count;
        sfhp->i8count = i8count;
-        XFS_DIR2_SF_PUT_INUMBER((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent);
+        xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent);
        return size;
 }
@@ -199,15 +199,15 @@ xfs_dir2_block_to_sf(
         * Copy the header into the newly allocate local space.
         */
        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-        memcpy(sfp, sfhp, XFS_DIR2_SF_HDR_SIZE(sfhp->i8count));
+        memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
        dp->i_d.di_size = size;
        /*
         * Set up to loop over the block's entries.
         */
-        btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, block);
        ptr = (char *)block->u;
-        endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
+        endptr = (char *)xfs_dir2_block_leaf_p(btp);
-        sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+        sfep = xfs_dir2_sf_firstentry(sfp);
        /*
         * Loop over the active and unused entries.
         * Stop when we reach the leaf/tail portion of the block.
@@ -233,22 +233,22 @@ xfs_dir2_block_to_sf(
                else if (dep->namelen == 2 &&
                         dep->name[0] == '.' && dep->name[1] == '.')
                        ASSERT(be64_to_cpu(dep->inumber) ==
-                               XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent));
+                               xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
                /*
                 * Normal entry, copy it into shortform.
                 */
                else {
                        sfep->namelen = dep->namelen;
-                        XFS_DIR2_SF_PUT_OFFSET(sfep,
+                        xfs_dir2_sf_put_offset(sfep,
                                (xfs_dir2_data_aoff_t)
                                ((char *)dep - (char *)block));
                        memcpy(sfep->name, dep->name, dep->namelen);
                        temp = be64_to_cpu(dep->inumber);
-                        XFS_DIR2_SF_PUT_INUMBER(sfp, &temp,
+                        xfs_dir2_sf_put_inumber(sfp, &temp,
-                                XFS_DIR2_SF_INUMBERP(sfep));
+                                xfs_dir2_sf_inumberp(sfep));
-                        sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+                        sfep = xfs_dir2_sf_nextentry(sfp, sfep);
                }
-                ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+                ptr += xfs_dir2_data_entsize(dep->namelen);
        }
        ASSERT((char *)sfep - (char *)sfp == size);
        xfs_dir2_sf_check(args);
@@ -294,11 +294,11 @@ xfs_dir2_sf_addname(
        ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
        ASSERT(dp->i_df.if_u1.if_data != NULL);
        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-        ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
        /*
         * Compute entry (and change in) size.
         */
-        add_entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen);
+        add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
        incr_isize = add_entsize;
        objchange = 0;
 #if XFS_BIG_INUMS
@@ -392,7 +392,7 @@ xfs_dir2_sf_addname_easy(
        /*
         * Grow the in-inode space.
         */
-        xfs_idata_realloc(dp, XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen),
+        xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen),
                XFS_DATA_FORK);
        /*
         * Need to set up again due to realloc of the inode data.
@@ -403,10 +403,10 @@ xfs_dir2_sf_addname_easy(
         * Fill in the new entry.
         */
        sfep->namelen = args->namelen;
-        XFS_DIR2_SF_PUT_OFFSET(sfep, offset);
+        xfs_dir2_sf_put_offset(sfep, offset);
        memcpy(sfep->name, args->name, sfep->namelen);
-        XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber,
+        xfs_dir2_sf_put_inumber(sfp, &args->inumber,
-                XFS_DIR2_SF_INUMBERP(sfep));
+                xfs_dir2_sf_inumberp(sfep));
        /*
         * Update the header and inode.
         */
@@ -463,14 +463,14 @@ xfs_dir2_sf_addname_hard(
         * If it's going to end up at the end then oldsfep will point there.
         */
        for (offset = XFS_DIR2_DATA_FIRST_OFFSET,
-              oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp),
+              oldsfep = xfs_dir2_sf_firstentry(oldsfp),
-              add_datasize = XFS_DIR2_DATA_ENTSIZE(args->namelen),
+              add_datasize = xfs_dir2_data_entsize(args->namelen),
              eof = (char *)oldsfep == &buf[old_isize];
             !eof;
-             offset = new_offset + XFS_DIR2_DATA_ENTSIZE(oldsfep->namelen),
+             offset = new_offset + xfs_dir2_data_entsize(oldsfep->namelen),
-              oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep),
+              oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep),
              eof = (char *)oldsfep == &buf[old_isize]) {
-                new_offset = XFS_DIR2_SF_GET_OFFSET(oldsfep);
+                new_offset = xfs_dir2_sf_get_offset(oldsfep);
                if (offset + add_datasize <= new_offset)
                        break;
        }
@@ -495,10 +495,10 @@ xfs_dir2_sf_addname_hard(
         * Fill in the new entry, and update the header counts.
         */
        sfep->namelen = args->namelen;
-        XFS_DIR2_SF_PUT_OFFSET(sfep, offset);
+        xfs_dir2_sf_put_offset(sfep, offset);
        memcpy(sfep->name, args->name, sfep->namelen);
-        XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber,
+        xfs_dir2_sf_put_inumber(sfp, &args->inumber,
-                XFS_DIR2_SF_INUMBERP(sfep));
+                xfs_dir2_sf_inumberp(sfep));
        sfp->hdr.count++;
 #if XFS_BIG_INUMS
        if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
@@ -508,7 +508,7 @@ xfs_dir2_sf_addname_hard(
         * If there's more left to copy, do that.
         */
        if (!eof) {
-                sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+                sfep = xfs_dir2_sf_nextentry(sfp, sfep);
                memcpy(sfep, oldsfep, old_isize - nbytes);
        }
        kmem_free(buf, old_isize);
@@ -544,9 +544,9 @@ xfs_dir2_sf_addname_pick(
        mp = dp->i_mount;
        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-        size = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+        size = xfs_dir2_data_entsize(args->namelen);
        offset = XFS_DIR2_DATA_FIRST_OFFSET;
-        sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+        sfep = xfs_dir2_sf_firstentry(sfp);
        holefit = 0;
        /*
         * Loop over sf entries.
@@ -555,10 +555,10 @@ xfs_dir2_sf_addname_pick(
         */
        for (i = 0; i < sfp->hdr.count; i++) {
                if (!holefit)
-                        holefit = offset + size <= XFS_DIR2_SF_GET_OFFSET(sfep);
+                        holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
-                offset = XFS_DIR2_SF_GET_OFFSET(sfep) +
+                offset = xfs_dir2_sf_get_offset(sfep) +
-                         XFS_DIR2_DATA_ENTSIZE(sfep->namelen);
+                         xfs_dir2_data_entsize(sfep->namelen);
-                sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+                sfep = xfs_dir2_sf_nextentry(sfp, sfep);
        }
        /*
         * Calculate data bytes used excluding the new entry, if this
@@ -617,18 +617,18 @@ xfs_dir2_sf_check(
        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
        offset = XFS_DIR2_DATA_FIRST_OFFSET;
-        ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent);
+        ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
        i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
-        for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
             i < sfp->hdr.count;
-             i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+             i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
-                ASSERT(XFS_DIR2_SF_GET_OFFSET(sfep) >= offset);
+                ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
-                ino = XFS_DIR2_SF_GET_INUMBER(sfp, XFS_DIR2_SF_INUMBERP(sfep));
+                ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
                i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
                offset =
-                        XFS_DIR2_SF_GET_OFFSET(sfep) +
+                        xfs_dir2_sf_get_offset(sfep) +
-                        XFS_DIR2_DATA_ENTSIZE(sfep->namelen);
+                        xfs_dir2_data_entsize(sfep->namelen);
        }
        ASSERT(i8count == sfp->hdr.i8count);
        ASSERT(XFS_BIG_INUMS || i8count == 0);
@@ -671,7 +671,7 @@ xfs_dir2_sf_create(
        ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
        ASSERT(dp->i_df.if_bytes == 0);
        i8count = pino > XFS_DIR2_MAX_SHORT_INUM;
-        size = XFS_DIR2_SF_HDR_SIZE(i8count);
+        size = xfs_dir2_sf_hdr_size(i8count);
        /*
         * Make a buffer for the data.
         */
@@ -684,7 +684,7 @@ xfs_dir2_sf_create(
        /*
         * Now can put in the inode number, since i8count is set.
         */
-        XFS_DIR2_SF_PUT_INUMBER(sfp, &pino, &sfp->hdr.parent);
+        xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent);
        sfp->hdr.count = 0;
        dp->i_d.di_size = size;
        xfs_dir2_sf_check(args);
@@ -727,12 +727,12 @@ xfs_dir2_sf_getdents(
        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-        ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
        /*
         * If the block number in the offset is out of range, we're done.
         */
-        if (XFS_DIR2_DATAPTR_TO_DB(mp, dir_offset) > mp->m_dirdatablk) {
+        if (xfs_dir2_dataptr_to_db(mp, dir_offset) > mp->m_dirdatablk) {
                *eofp = 1;
                return 0;
        }
@@ -747,9 +747,9 @@ xfs_dir2_sf_getdents(
         * Put . entry unless we're starting past it.
         */
        if (dir_offset <=
-                    XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+                    xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
                                               XFS_DIR2_DATA_DOT_OFFSET)) {
-                p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, 0,
+                p.cook = xfs_dir2_db_off_to_dataptr(mp, 0,
                                                XFS_DIR2_DATA_DOTDOT_OFFSET);
                p.ino = dp->i_ino;
 #if XFS_BIG_INUMS
@@ -762,7 +762,7 @@ xfs_dir2_sf_getdents(
                if (!p.done) {
                        uio->uio_offset =
-                                XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+                                xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
                                                XFS_DIR2_DATA_DOT_OFFSET);
                        return error;
                }
@@ -772,11 +772,11 @@ xfs_dir2_sf_getdents(
         * Put .. entry unless we're starting past it.
         */
        if (dir_offset <=
-                    XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+                    xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
                                               XFS_DIR2_DATA_DOTDOT_OFFSET)) {
-                p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+                p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
                                                XFS_DIR2_DATA_FIRST_OFFSET);
-                p.ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent);
+                p.ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
 #if XFS_BIG_INUMS
                p.ino += mp->m_inoadd;
 #endif
@@ -787,7 +787,7 @@ xfs_dir2_sf_getdents(
                if (!p.done) {
                        uio->uio_offset =
-                                XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+                                xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
                                        XFS_DIR2_DATA_DOTDOT_OFFSET);
                        return error;
                }
@@ -796,23 +796,23 @@ xfs_dir2_sf_getdents(
        /*
         * Loop while there are more entries and put'ing works.
         */
-        for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
                     i < sfp->hdr.count;
-                             i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+                             i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
-                off = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+                off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
-                                XFS_DIR2_SF_GET_OFFSET(sfep));
+                                xfs_dir2_sf_get_offset(sfep));
                if (dir_offset > off)
                        continue;
                p.namelen = sfep->namelen;
-                p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+                p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
-                        XFS_DIR2_SF_GET_OFFSET(sfep) +
+                        xfs_dir2_sf_get_offset(sfep) +
-                        XFS_DIR2_DATA_ENTSIZE(p.namelen));
+                        xfs_dir2_data_entsize(p.namelen));
-                p.ino = XFS_DIR2_SF_GET_INUMBER(sfp, XFS_DIR2_SF_INUMBERP(sfep));
+                p.ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
 #if XFS_BIG_INUMS
                p.ino += mp->m_inoadd;
 #endif
@@ -832,7 +832,7 @@ xfs_dir2_sf_getdents(
        *eofp = 1;
        uio->uio_offset =
-                XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk + 1, 0);
+                xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0);
        return 0;
 }
@@ -865,7 +865,7 @@ xfs_dir2_sf_lookup(
        ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
        ASSERT(dp->i_df.if_u1.if_data != NULL);
        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-        ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
        /*
         * Special case for .
         */
@@ -878,21 +878,21 @@ xfs_dir2_sf_lookup(
         */
        if (args->namelen == 2 &&
            args->name[0] == '.' && args->name[1] == '.') {
-                args->inumber = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent);
+                args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
                return XFS_ERROR(EEXIST);
        }
        /*
         * Loop over all the entries trying to match ours.
         */
-        for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
             i < sfp->hdr.count;
-             i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+             i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
                if (sfep->namelen == args->namelen &&
                    sfep->name[0] == args->name[0] &&
                    memcmp(args->name, sfep->name, args->namelen) == 0) {
                        args->inumber =
-                                XFS_DIR2_SF_GET_INUMBER(sfp,
+                                xfs_dir2_sf_get_inumber(sfp,
-                                        XFS_DIR2_SF_INUMBERP(sfep));
+                                        xfs_dir2_sf_inumberp(sfep));
                        return XFS_ERROR(EEXIST);
                }
        }
@@ -934,19 +934,19 @@ xfs_dir2_sf_removename(
        ASSERT(dp->i_df.if_bytes == oldsize);
        ASSERT(dp->i_df.if_u1.if_data != NULL);
        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-        ASSERT(oldsize >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+        ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
        /*
         * Loop over the old directory entries.
         * Find the one we're deleting.
         */
-        for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
             i < sfp->hdr.count;
-             i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+             i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
                if (sfep->namelen == args->namelen &&
                    sfep->name[0] == args->name[0] &&
                    memcmp(sfep->name, args->name, args->namelen) == 0) {
-                        ASSERT(XFS_DIR2_SF_GET_INUMBER(sfp,
+                        ASSERT(xfs_dir2_sf_get_inumber(sfp,
-                                        XFS_DIR2_SF_INUMBERP(sfep)) ==
+                                        xfs_dir2_sf_inumberp(sfep)) ==
                                args->inumber);
                        break;
                }
@@ -961,7 +961,7 @@ xfs_dir2_sf_removename(
         * Calculate sizes.
         */
        byteoff = (int)((char *)sfep - (char *)sfp);
-        entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen);
+        entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
        newsize = oldsize - entsize;
        /*
         * Copy the part if any after the removed entry, sliding it down.
@@ -1027,7 +1027,7 @@ xfs_dir2_sf_replace(
        ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
        ASSERT(dp->i_df.if_u1.if_data != NULL);
        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-        ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 #if XFS_BIG_INUMS
        /*
         * New inode number is large, and need to convert to 8-byte inodes.
@@ -1067,28 +1067,28 @@ xfs_dir2_sf_replace(
        if (args->namelen == 2 &&
            args->name[0] == '.' && args->name[1] == '.') {
 #if XFS_BIG_INUMS || defined(DEBUG)
-                ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent);
+                ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
                ASSERT(args->inumber != ino);
 #endif
-                XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, &sfp->hdr.parent);
+                xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent);
        }
        /*
         * Normal entry, look for the name.
         */
        else {
-                for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+                for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
                     i < sfp->hdr.count;
-                     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+                     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
                        if (sfep->namelen == args->namelen &&
                            sfep->name[0] == args->name[0] &&
                            memcmp(args->name, sfep->name, args->namelen) == 0) {
 #if XFS_BIG_INUMS || defined(DEBUG)
-                                ino = XFS_DIR2_SF_GET_INUMBER(sfp,
+                                ino = xfs_dir2_sf_get_inumber(sfp,
-                                        XFS_DIR2_SF_INUMBERP(sfep));
+                                        xfs_dir2_sf_inumberp(sfep));
                                ASSERT(args->inumber != ino);
 #endif
-                                XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber,
+                                xfs_dir2_sf_put_inumber(sfp, &args->inumber,
-                                        XFS_DIR2_SF_INUMBERP(sfep));
+                                        xfs_dir2_sf_inumberp(sfep));
                                break;
                        }
                }
@@ -1189,22 +1189,22 @@ xfs_dir2_sf_toino4(
         */
        sfp->hdr.count = oldsfp->hdr.count;
        sfp->hdr.i8count = 0;
-        ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, &oldsfp->hdr.parent);
+        ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
-        XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, &sfp->hdr.parent);
+        xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
        /*
         * Copy the entries field by field.
         */
-        for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp),
+        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
-                    oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp);
+                    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
             i < sfp->hdr.count;
-             i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep),
+             i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
-                  oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
+                  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
                sfep->namelen = oldsfep->namelen;
                sfep->offset = oldsfep->offset;
                memcpy(sfep->name, oldsfep->name, sfep->namelen);
-                ino = XFS_DIR2_SF_GET_INUMBER(oldsfp,
+                ino = xfs_dir2_sf_get_inumber(oldsfp,
-                        XFS_DIR2_SF_INUMBERP(oldsfep));
+                        xfs_dir2_sf_inumberp(oldsfep));
-                XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep));
+                xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
        }
        /*
         * Clean up the inode.
@@ -1266,22 +1266,22 @@ xfs_dir2_sf_toino8(
         */
        sfp->hdr.count = oldsfp->hdr.count;
        sfp->hdr.i8count = 1;
-        ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, &oldsfp->hdr.parent);
+        ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
-        XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, &sfp->hdr.parent);
+        xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
        /*
         * Copy the entries field by field.
         */
-        for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp),
+        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
-                    oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp);
+                    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
             i < sfp->hdr.count;
-             i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep),
+             i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
-                  oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
+                  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
                sfep->namelen = oldsfep->namelen;
                sfep->offset = oldsfep->offset;
                memcpy(sfep->name, oldsfep->name, sfep->namelen);
-                ino = XFS_DIR2_SF_GET_INUMBER(oldsfp,
+                ino = xfs_dir2_sf_get_inumber(oldsfp,
-                        XFS_DIR2_SF_INUMBERP(oldsfep));
+                        xfs_dir2_sf_inumberp(oldsfep));
-                XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep));
+                xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
        }
        /*
         * Clean up the inode.
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 42f015b70018..11e503209afa 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -90,7 +90,6 @@ typedef struct xfs_dir2_sf {
        xfs_dir2_sf_entry_t     list[1];        /* shortform entries */
 } xfs_dir2_sf_t;
-#define XFS_DIR2_SF_HDR_SIZE(i8count)   xfs_dir2_sf_hdr_size(i8count)
 static inline int xfs_dir2_sf_hdr_size(int i8count)
 {
        return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \
@@ -98,14 +97,11 @@ static inline int xfs_dir2_sf_hdr_size(int i8count)
                ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
 }
-#define XFS_DIR2_SF_INUMBERP(sfep)      xfs_dir2_sf_inumberp(sfep)
 static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
 {
        return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen];
 }
-#define XFS_DIR2_SF_GET_INUMBER(sfp, from) \
-        xfs_dir2_sf_get_inumber(sfp, from)
 static inline xfs_intino_t
 xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
 {
@@ -114,8 +110,6 @@ xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
                (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8));
 }
-#define XFS_DIR2_SF_PUT_INUMBER(sfp,from,to) \
-        xfs_dir2_sf_put_inumber(sfp,from,to)
 static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
                                                xfs_dir2_inou_t *to)
 {
@@ -125,24 +119,18 @@ static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
                XFS_PUT_DIR_INO8(*(from), (to)->i8);
 }
-#define XFS_DIR2_SF_GET_OFFSET(sfep)    \
-        xfs_dir2_sf_get_offset(sfep)
 static inline xfs_dir2_data_aoff_t
 xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
 {
        return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i);
 }
-#define XFS_DIR2_SF_PUT_OFFSET(sfep,off) \
-        xfs_dir2_sf_put_offset(sfep,off)
 static inline void
 xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
 {
        INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off);
 }
-#define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len)     \
-        xfs_dir2_sf_entsize_byname(sfp,len)
 static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
 {
        return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
@@ -150,8 +138,6 @@ static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
                ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
 }
-#define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)   \
-        xfs_dir2_sf_entsize_byentry(sfp,sfep)
 static inline int
 xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
 {
@@ -160,19 +146,17 @@ xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
                ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
 }
-#define XFS_DIR2_SF_FIRSTENTRY(sfp)     xfs_dir2_sf_firstentry(sfp)
 static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp)
 {
        return ((xfs_dir2_sf_entry_t *) \
-                ((char *)(sfp) + XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)));
+                ((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count)));
 }
-#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep) xfs_dir2_sf_nextentry(sfp,sfep)
 static inline xfs_dir2_sf_entry_t *
 xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
 {
        return ((xfs_dir2_sf_entry_t *) \
-                ((char *)(sfep) + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)));
+                ((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep)));
 }
 /*
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
new file mode 100644
index 000000000000..ce2278611bb7
--- /dev/null
+++ b/fs/xfs/xfs_filestream.c
@@ -0,0 +1,771 @@
+/*
+ * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inum.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_ag.h"
+#include "xfs_dmapi.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_bmap.h"
+#include "xfs_alloc.h"
+#include "xfs_utils.h"
+#include "xfs_mru_cache.h"
+#include "xfs_filestream.h"
+#ifdef XFS_FILESTREAMS_TRACE
+ktrace_t *xfs_filestreams_trace_buf;
+STATIC void
+xfs_filestreams_trace(
+        xfs_mount_t     *mp,    /* mount point */
+        int             type,   /* type of trace */
+        const char      *func,  /* source function */
+        int             line,   /* source line number */
+        __psunsigned_t  arg0,
+        __psunsigned_t  arg1,
+        __psunsigned_t  arg2,
+        __psunsigned_t  arg3,
+        __psunsigned_t  arg4,
+        __psunsigned_t  arg5)
+{
+        ktrace_enter(xfs_filestreams_trace_buf,
+                (void *)(__psint_t)(type | (line << 16)),
+                (void *)func,
+                (void *)(__psunsigned_t)current_pid(),
+                (void *)mp,
+                (void *)(__psunsigned_t)arg0,
+                (void *)(__psunsigned_t)arg1,
+                (void *)(__psunsigned_t)arg2,
+                (void *)(__psunsigned_t)arg3,
+                (void *)(__psunsigned_t)arg4,
+                (void *)(__psunsigned_t)arg5,
+                NULL, NULL, NULL, NULL, NULL, NULL);
+}
+#define TRACE0(mp,t)                    TRACE6(mp,t,0,0,0,0,0,0)
+#define TRACE1(mp,t,a0)                 TRACE6(mp,t,a0,0,0,0,0,0)
+#define TRACE2(mp,t,a0,a1)              TRACE6(mp,t,a0,a1,0,0,0,0)
+#define TRACE3(mp,t,a0,a1,a2)           TRACE6(mp,t,a0,a1,a2,0,0,0)
+#define TRACE4(mp,t,a0,a1,a2,a3)        TRACE6(mp,t,a0,a1,a2,a3,0,0)
+#define TRACE5(mp,t,a0,a1,a2,a3,a4)     TRACE6(mp,t,a0,a1,a2,a3,a4,0)
+#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \
+        xfs_filestreams_trace(mp, t, __FUNCTION__, __LINE__, \
+                                (__psunsigned_t)a0, (__psunsigned_t)a1, \
+                                (__psunsigned_t)a2, (__psunsigned_t)a3, \
+                                (__psunsigned_t)a4, (__psunsigned_t)a5)
+#define TRACE_AG_SCAN(mp, ag, ag2) \
+                TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2);
+#define TRACE_AG_PICK1(mp, max_ag, maxfree) \
+                TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree);
+#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \
+                TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \
+                         cnt, free, scan, flag)
+#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \
+                TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2)
+#define TRACE_FREE(mp, ip, pip, ag, cnt) \
+                TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt)
+#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \
+                TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt)
+#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \
+                TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt)
+#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \
+                TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt)
+#define TRACE_ORPHAN(mp, ip, ag) \
+                TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag);
+#else
+#define TRACE_AG_SCAN(mp, ag, ag2)
+#define TRACE_AG_PICK1(mp, max_ag, maxfree)
+#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag)
+#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2)
+#define TRACE_FREE(mp, ip, pip, ag, cnt)
+#define TRACE_LOOKUP(mp, ip, pip, ag, cnt)
+#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt)
+#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt)
+#define TRACE_ORPHAN(mp, ip, ag)
+#endif
+static kmem_zone_t *item_zone;
+/*
+ * Structure for associating a file or a directory with an allocation group.
+ * The parent directory pointer is only needed for files, but since there will
+ * generally be vastly more files than directories in the cache, using the same
+ * data structure simplifies the code with very little memory overhead.
+ */
+typedef struct fstrm_item
+{
+        xfs_agnumber_t  ag;     /* AG currently in use for the file/directory. */
+        xfs_inode_t     *ip;    /* inode self-pointer. */
+        xfs_inode_t     *pip;   /* Parent directory inode pointer. */
+} fstrm_item_t;
+/*
+ * Scan the AGs starting at startag looking for an AG that isn't in use and has
+ * at least minlen blocks free.
+ */
+static int
+_xfs_filestream_pick_ag(
+        xfs_mount_t     *mp,
+        xfs_agnumber_t  startag,
+        xfs_agnumber_t  *agp,
+        int             flags,
+        xfs_extlen_t    minlen)
+{
+        int             err, trylock, nscan;
+        xfs_extlen_t    delta, longest, need, free, minfree, maxfree = 0;
+        xfs_agnumber_t  ag, max_ag = NULLAGNUMBER;
+        struct xfs_perag *pag;
+        /* 2% of an AG's blocks must be free for it to be chosen. */
+        minfree = mp->m_sb.sb_agblocks / 50;
+        ag = startag;
+        *agp = NULLAGNUMBER;
+        /* For the first pass, don't sleep trying to init the per-AG. */
+        trylock = XFS_ALLOC_FLAG_TRYLOCK;
+        for (nscan = 0; 1; nscan++) {
+                TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag));
+                pag = mp->m_perag + ag;
+                if (!pag->pagf_init) {
+                        err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
+                        if (err && !trylock)
+                                return err;
+                }
+                /* Might fail sometimes during the 1st pass with trylock set. */
+                if (!pag->pagf_init)
+                        goto next_ag;
+                /* Keep track of the AG with the most free blocks. */
+                if (pag->pagf_freeblks > maxfree) {
+                        maxfree = pag->pagf_freeblks;
+                        max_ag = ag;
+                }
+                /*
+                 * The AG reference count does two things: it enforces mutual
+                 * exclusion when examining the suitability of an AG in this
+                 * loop, and it guards against two filestreams being established
+                 * in the same AG as each other.
+                 */
+                if (xfs_filestream_get_ag(mp, ag) > 1) {
+                        xfs_filestream_put_ag(mp, ag);
+                        goto next_ag;
+                }
+                need = XFS_MIN_FREELIST_PAG(pag, mp);
+                delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
+                longest = (pag->pagf_longest > delta) ?
+                          (pag->pagf_longest - delta) :
+                          (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
+                if (((minlen && longest >= minlen) ||
+                     (!minlen && pag->pagf_freeblks >= minfree)) &&
+                    (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
+                     (flags & XFS_PICK_LOWSPACE))) {
+                        /* Break out, retaining the reference on the AG. */
+                        free = pag->pagf_freeblks;
+                        *agp = ag;
+                        break;
+                }
+                /* Drop the reference on this AG, it's not usable. */
+                xfs_filestream_put_ag(mp, ag);
+next_ag:
+                /* Move to the next AG, wrapping to AG 0 if necessary. */
+                if (++ag >= mp->m_sb.sb_agcount)
+                        ag = 0;
+                /* If a full pass of the AGs hasn't been done yet, continue. */
+                if (ag != startag)
+                        continue;
+                /* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */
+                if (trylock != 0) {
+                        trylock = 0;
+                        continue;
+                }
+                /* Finally, if lowspace wasn't set, set it for the 3rd pass. */
+                if (!(flags & XFS_PICK_LOWSPACE)) {
+                        flags |= XFS_PICK_LOWSPACE;
+                        continue;
+                }
+                /*
+                 * Take the AG with the most free space, regardless of whether
+                 * it's already in use by another filestream.
+                 */
+                if (max_ag != NULLAGNUMBER) {
+                        xfs_filestream_get_ag(mp, max_ag);
+                        TRACE_AG_PICK1(mp, max_ag, maxfree);
+                        free = maxfree;
+                        *agp = max_ag;
+                        break;
+                }
+                /* take AG 0 if none matched */
+                TRACE_AG_PICK1(mp, max_ag, maxfree);
+                *agp = 0;
+                return 0;
+        }
+        TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp),
+                        free, nscan, flags);
+        return 0;
+}
+/*
+ * Set the allocation group number for a file or a directory, updating inode
+ * references and per-AG references as appropriate.  Must be called with the
+ * m_peraglock held in read mode.
+ */
+static int
+_xfs_filestream_update_ag(
+        xfs_inode_t     *ip,
+        xfs_inode_t     *pip,
+        xfs_agnumber_t  ag)
+{
+        int             err = 0;
+        xfs_mount_t     *mp;
+        xfs_mru_cache_t *cache;
+        fstrm_item_t    *item;
+        xfs_agnumber_t  old_ag;
+        xfs_inode_t     *old_pip;
+        /*
+         * Either ip is a regular file and pip is a directory, or ip is a
+         * directory and pip is NULL.
+         */
+        ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip &&
+                       (pip->i_d.di_mode & S_IFDIR)) ||
+                      ((ip->i_d.di_mode & S_IFDIR) && !pip)));
+        mp = ip->i_mount;
+        cache = mp->m_filestream;
+        item = xfs_mru_cache_lookup(cache, ip->i_ino);
+        if (item) {
+                ASSERT(item->ip == ip);
+                old_ag = item->ag;
+                item->ag = ag;
+                old_pip = item->pip;
+                item->pip = pip;
+                xfs_mru_cache_done(cache);
+                /*
+                 * If the AG has changed, drop the old ref and take a new one,
+                 * effectively transferring the reference from old to new AG.
+                 */
+                if (ag != old_ag) {
+                        xfs_filestream_put_ag(mp, old_ag);
+                        xfs_filestream_get_ag(mp, ag);
+                }
+                /*
+                 * If ip is a file and its pip has changed, drop the old ref and
+                 * take a new one.
+                 */
+                if (pip && pip != old_pip) {
+                        IRELE(old_pip);
+                        IHOLD(pip);
+                }
+                TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag),
+                                ag, xfs_filestream_peek_ag(mp, ag));
+                return 0;
+        }
+        item = kmem_zone_zalloc(item_zone, KM_MAYFAIL);
+        if (!item)
+                return ENOMEM;
+        item->ag = ag;
+        item->ip = ip;
+        item->pip = pip;
+        err = xfs_mru_cache_insert(cache, ip->i_ino, item);
+        if (err) {
+                kmem_zone_free(item_zone, item);
+                return err;
+        }
+        /* Take a reference on the AG. */
+        xfs_filestream_get_ag(mp, ag);
+        /*
+         * Take a reference on the inode itself regardless of whether it's a
+         * regular file or a directory.
+         */
+        IHOLD(ip);
+        /*
+         * In the case of a regular file, take a reference on the parent inode
+         * as well to ensure it remains in-core.
+         */
+        if (pip)
+                IHOLD(pip);
+        TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag),
+                        ag, xfs_filestream_peek_ag(mp, ag));
+        return 0;
+}
+/* xfs_fstrm_free_func(): callback for freeing cached stream items. */
+void
+xfs_fstrm_free_func(
+        xfs_ino_t       ino,
+        fstrm_item_t    *item)
+{
+        xfs_inode_t     *ip = item->ip;
+        int ref;
+        ASSERT(ip->i_ino == ino);
+        xfs_iflags_clear(ip, XFS_IFILESTREAM);
+        /* Drop the reference taken on the AG when the item was added. */
+        ref = xfs_filestream_put_ag(ip->i_mount, item->ag);
+        ASSERT(ref >= 0);
+        TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
+                xfs_filestream_peek_ag(ip->i_mount, item->ag));
+        /*
+         * _xfs_filestream_update_ag() always takes a reference on the inode
+         * itself, whether it's a file or a directory.  Release it here.
+         * This can result in the inode being freed and so we must
+         * not hold any inode locks when freeing filesstreams objects
+         * otherwise we can deadlock here.
+         */
+        IRELE(ip);
+        /*
+         * In the case of a regular file, _xfs_filestream_update_ag() also
+         * takes a ref on the parent inode to keep it in-core.  Release that
+         * too.
+         */
+        if (item->pip)
+                IRELE(item->pip);
+        /* Finally, free the memory allocated for the item. */
+        kmem_zone_free(item_zone, item);
+}
+/*
+ * xfs_filestream_init() is called at xfs initialisation time to set up the
+ * memory zone that will be used for filestream data structure allocation.
+ */
+int
+xfs_filestream_init(void)
+{
+        item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
+#ifdef XFS_FILESTREAMS_TRACE
+        xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP);
+#endif
+        return item_zone ? 0 : -ENOMEM;
+}
+/*
+ * xfs_filestream_uninit() is called at xfs termination time to destroy the
+ * memory zone that was used for filestream data structure allocation.
+ */
+void
+xfs_filestream_uninit(void)
+{
+#ifdef XFS_FILESTREAMS_TRACE
+        ktrace_free(xfs_filestreams_trace_buf);
+#endif
+        kmem_zone_destroy(item_zone);
+}
+/*
+ * xfs_filestream_mount() is called when a file system is mounted with the
+ * filestream option.  It is responsible for allocating the data structures
+ * needed to track the new file system's file streams.
+ */
+int
+xfs_filestream_mount(
+        xfs_mount_t     *mp)
+{
+        int             err;
+        unsigned int    lifetime, grp_count;
+        /*
+         * The filestream timer tunable is currently fixed within the range of
+         * one second to four minutes, with five seconds being the default.  The
+         * group count is somewhat arbitrary, but it'd be nice to adhere to the
+         * timer tunable to within about 10 percent.  This requires at least 10
+         * groups.
+         */
+        lifetime  = xfs_fstrm_centisecs * 10;
+        grp_count = 10;
+        err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count,
+                             (xfs_mru_cache_free_func_t)xfs_fstrm_free_func);
+        return err;
+}
+/*
+ * xfs_filestream_unmount() is called when a file system that was mounted with
+ * the filestream option is unmounted.  It drains the data structures created
+ * to track the file system's file streams and frees all the memory that was
+ * allocated.
+ */
+void
+xfs_filestream_unmount(
+        xfs_mount_t     *mp)
+{
+        xfs_mru_cache_destroy(mp->m_filestream);
+}
+/*
+ * If the mount point's m_perag array is going to be reallocated, all
+ * outstanding cache entries must be flushed to avoid accessing reference count
+ * addresses that have been freed.  The call to xfs_filestream_flush() must be
+ * made inside the block that holds the m_peraglock in write mode to do the
+ * reallocation.
+ */
+void
+xfs_filestream_flush(
+        xfs_mount_t     *mp)
+{
+        /* point in time flush, so keep the reaper running */
+        xfs_mru_cache_flush(mp->m_filestream, 1);
+}
+/*
+ * Return the AG of the filestream the file or directory belongs to, or
+ * NULLAGNUMBER otherwise.
+ */
+xfs_agnumber_t
+xfs_filestream_lookup_ag(
+        xfs_inode_t     *ip)
+{
+        xfs_mru_cache_t *cache;
+        fstrm_item_t    *item;
+        xfs_agnumber_t  ag;
+        int             ref;
+        if (!(ip->i_d.di_mode & (S_IFREG | S_IFDIR))) {
+                ASSERT(0);
+                return NULLAGNUMBER;
+        }
+        cache = ip->i_mount->m_filestream;
+        item = xfs_mru_cache_lookup(cache, ip->i_ino);
+        if (!item) {
+                TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0);
+                return NULLAGNUMBER;
+        }
+        ASSERT(ip == item->ip);
+        ag = item->ag;
+        ref = xfs_filestream_peek_ag(ip->i_mount, ag);
+        xfs_mru_cache_done(cache);
+        TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref);
+        return ag;
+}
+/*
+ * xfs_filestream_associate() should only be called to associate a regular file
+ * with its parent directory.  Calling it with a child directory isn't
+ * appropriate because filestreams don't apply to entire directory hierarchies.
+ * Creating a file in a child directory of an existing filestream directory
+ * starts a new filestream with its own allocation group association.
+ *
+ * Returns < 0 on error, 0 if successful association occurred, > 0 if
+ * we failed to get an association because of locking issues.
+ */
+int
+xfs_filestream_associate(
+        xfs_inode_t     *pip,
+        xfs_inode_t     *ip)
+{
+        xfs_mount_t     *mp;
+        xfs_mru_cache_t *cache;
+        fstrm_item_t    *item;
+        xfs_agnumber_t  ag, rotorstep, startag;
+        int             err = 0;
+        ASSERT(pip->i_d.di_mode & S_IFDIR);
+        ASSERT(ip->i_d.di_mode & S_IFREG);
+        if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG))
+                return -EINVAL;
+        mp = pip->i_mount;
+        cache = mp->m_filestream;
+        down_read(&mp->m_peraglock);
+        /*
+         * We have a problem, Houston.
+         *
+         * Taking the iolock here violates inode locking order - we already
+         * hold the ilock. Hence if we block getting this lock we may never
+         * wake. Unfortunately, that means if we can't get the lock, we're
+         * screwed in terms of getting a stream association - we can't spin
+         * waiting for the lock because someone else is waiting on the lock we
+         * hold and we cannot drop that as we are in a transaction here.
+         *
+         * Lucky for us, this inversion is rarely a problem because it's a
+         * directory inode that we are trying to lock here and that means the
+         * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is
+         * used. i.e. freeze, remount-ro, quotasync or unmount.
+         *
+         * So, if we can't get the iolock without sleeping then just give up
+         */
+        if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) {
+                up_read(&mp->m_peraglock);
+                return 1;
+        }
+        /* If the parent directory is already in the cache, use its AG. */
+        item = xfs_mru_cache_lookup(cache, pip->i_ino);
+        if (item) {
+                ASSERT(item->ip == pip);
+                ag = item->ag;
+                xfs_mru_cache_done(cache);
+                TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag));
+                err = _xfs_filestream_update_ag(ip, pip, ag);
+                goto exit;
+        }
+        /*
+         * Set the starting AG using the rotor for inode32, otherwise
+         * use the directory inode's AG.
+         */
+        if (mp->m_flags & XFS_MOUNT_32BITINODES) {
+                rotorstep = xfs_rotorstep;
+                startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
+                mp->m_agfrotor = (mp->m_agfrotor + 1) %
+                                 (mp->m_sb.sb_agcount * rotorstep);
+        } else
+                startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
+        /* Pick a new AG for the parent inode starting at startag. */
+        err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0);
+        if (err || ag == NULLAGNUMBER)
+                goto exit_did_pick;
+        /* Associate the parent inode with the AG. */
+        err = _xfs_filestream_update_ag(pip, NULL, ag);
+        if (err)
+                goto exit_did_pick;
+        /* Associate the file inode with the AG. */
+        err = _xfs_filestream_update_ag(ip, pip, ag);
+        if (err)
+                goto exit_did_pick;
+        TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag));
+exit_did_pick:
+        /*
+         * If _xfs_filestream_pick_ag() returned a valid AG, remove the
+         * reference it took on it, since the file and directory will have taken
+         * their own now if they were successfully cached.
+         */
+        if (ag != NULLAGNUMBER)
+                xfs_filestream_put_ag(mp, ag);
+exit:
+        xfs_iunlock(pip, XFS_IOLOCK_EXCL);
+        up_read(&mp->m_peraglock);
+        return -err;
+}
+/*
+ * Pick a new allocation group for the current file and its file stream.  This
+ * function is called by xfs_bmap_filestreams() with the mount point's per-ag
+ * lock held.
+ */
+int
+xfs_filestream_new_ag(
+        xfs_bmalloca_t  *ap,
+        xfs_agnumber_t  *agp)
+{
+        int             flags, err;
+        xfs_inode_t     *ip, *pip = NULL;
+        xfs_mount_t     *mp;
+        xfs_mru_cache_t *cache;
+        xfs_extlen_t    minlen;
+        fstrm_item_t    *dir, *file;
+        xfs_agnumber_t  ag = NULLAGNUMBER;
+        ip = ap->ip;
+        mp = ip->i_mount;
+        cache = mp->m_filestream;
+        minlen = ap->alen;
+        *agp = NULLAGNUMBER;
+        /*
+         * Look for the file in the cache, removing it if it's found.  Doing
+         * this allows it to be held across the dir lookup that follows.
+         */
+        file = xfs_mru_cache_remove(cache, ip->i_ino);
+        if (file) {
+                ASSERT(ip == file->ip);
+                /* Save the file's parent inode and old AG number for later. */
+                pip = file->pip;
+                ag = file->ag;
+                /* Look for the file's directory in the cache. */
+                dir = xfs_mru_cache_lookup(cache, pip->i_ino);
+                if (dir) {
+                        ASSERT(pip == dir->ip);
+                        /*
+                         * If the directory has already moved on to a new AG,
+                         * use that AG as the new AG for the file. Don't
+                         * forget to twiddle the AG refcounts to match the
+                         * movement.
+                         */
+                        if (dir->ag != file->ag) {
+                                xfs_filestream_put_ag(mp, file->ag);
+                                xfs_filestream_get_ag(mp, dir->ag);
+                                *agp = file->ag = dir->ag;
+                        }
+                        xfs_mru_cache_done(cache);
+                }
+                /*
+                 * Put the file back in the cache.  If this fails, the free
+                 * function needs to be called to tidy up in the same way as if
+                 * the item had simply expired from the cache.
+                 */
+                err = xfs_mru_cache_insert(cache, ip->i_ino, file);
+                if (err) {
+                        xfs_fstrm_free_func(ip->i_ino, file);
+                        return err;
+                }
+                /*
+                 * If the file's AG was moved to the directory's new AG, there's
+                 * nothing more to be done.
+                 */
+                if (*agp != NULLAGNUMBER) {
+                        TRACE_MOVEAG(mp, ip, pip,
+                                        ag, xfs_filestream_peek_ag(mp, ag),
+                                        *agp, xfs_filestream_peek_ag(mp, *agp));
+                        return 0;
+                }
+        }
+        /*
+         * If the file's parent directory is known, take its iolock in exclusive
+         * mode to prevent two sibling files from racing each other to migrate
+         * themselves and their parent to different AGs.
+         */
+        if (pip)
+                xfs_ilock(pip, XFS_IOLOCK_EXCL);
+        /*
+         * A new AG needs to be found for the file.  If the file's parent
+         * directory is also known, it will be moved to the new AG as well to
+         * ensure that files created inside it in future use the new AG.
+         */
+        ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;
+        flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
+                (ap->low ? XFS_PICK_LOWSPACE : 0);
+        err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen);
+        if (err || *agp == NULLAGNUMBER)
+                goto exit;
+        /*
+         * If the file wasn't found in the file cache, then its parent directory
+         * inode isn't known.  For this to have happened, the file must either
+         * be pre-existing, or it was created long enough ago that its cache
+         * entry has expired.  This isn't the sort of usage that the filestreams
+         * allocator is trying to optimise, so there's no point trying to track
+         * its new AG somehow in the filestream data structures.
+         */
+        if (!pip) {
+                TRACE_ORPHAN(mp, ip, *agp);
+                goto exit;
+        }
+        /* Associate the parent inode with the AG. */
+        err = _xfs_filestream_update_ag(pip, NULL, *agp);
+        if (err)
+                goto exit;
+        /* Associate the file inode with the AG. */
+        err = _xfs_filestream_update_ag(ip, pip, *agp);
+        if (err)
+                goto exit;
+        TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0,
+                        *agp, xfs_filestream_peek_ag(mp, *agp));
+exit:
+        /*
+         * If _xfs_filestream_pick_ag() returned a valid AG, remove the
+         * reference it took on it, since the file and directory will have taken
+         * their own now if they were successfully cached.
+         */
+        if (*agp != NULLAGNUMBER)
+                xfs_filestream_put_ag(mp, *agp);
+        else
+                *agp = 0;
+        if (pip)
+                xfs_iunlock(pip, XFS_IOLOCK_EXCL);
+        return err;
+}
+/*
+ * Remove an association between an inode and a filestream object.
+ * Typically this is done on last close of an unlinked file.
+ */
+void
+xfs_filestream_deassociate(
+        xfs_inode_t     *ip)
+{
+        xfs_mru_cache_t *cache = ip->i_mount->m_filestream;
+        xfs_mru_cache_delete(cache, ip->i_ino);
+}
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
new file mode 100644
index 000000000000..f655f7dc334c
--- /dev/null
+++ b/fs/xfs/xfs_filestream.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_FILESTREAM_H__
+#define __XFS_FILESTREAM_H__
+#ifdef __KERNEL__
+struct xfs_mount;
+struct xfs_inode;
+struct xfs_perag;
+struct xfs_bmalloca;
+#ifdef XFS_FILESTREAMS_TRACE
+#define XFS_FSTRM_KTRACE_INFO           1
+#define XFS_FSTRM_KTRACE_AGSCAN         2
+#define XFS_FSTRM_KTRACE_AGPICK1        3
+#define XFS_FSTRM_KTRACE_AGPICK2        4
+#define XFS_FSTRM_KTRACE_UPDATE         5
+#define XFS_FSTRM_KTRACE_FREE           6
+#define XFS_FSTRM_KTRACE_ITEM_LOOKUP    7
+#define XFS_FSTRM_KTRACE_ASSOCIATE      8
+#define XFS_FSTRM_KTRACE_MOVEAG         9
+#define XFS_FSTRM_KTRACE_ORPHAN         10
+#define XFS_FSTRM_KTRACE_SIZE   16384
+extern ktrace_t *xfs_filestreams_trace_buf;
+#endif
+/*
+ * Allocation group filestream associations are tracked with per-ag atomic
+ * counters.  These counters allow _xfs_filestream_pick_ag() to tell whether a
+ * particular AG already has active filestreams associated with it. The mount
+ * point's m_peraglock is used to protect these counters from per-ag array
+ * re-allocation during a growfs operation.  When xfs_growfs_data_private() is
+ * about to reallocate the array, it calls xfs_filestream_flush() with the
+ * m_peraglock held in write mode.
+ *
+ * Since xfs_mru_cache_flush() guarantees that all the free functions for all
+ * the cache elements have finished executing before it returns, it's safe for
+ * the free functions to use the atomic counters without m_peraglock protection.
+ * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
+ * whether it was called with the m_peraglock held in read mode, write mode or
+ * not held at all.  The race condition this addresses is the following:
+ *
+ *  - The work queue scheduler fires and pulls a filestream directory cache
+ *    element off the LRU end of the cache for deletion, then gets pre-empted.
+ *  - A growfs operation grabs the m_peraglock in write mode, flushes all the
+ *    remaining items from the cache and reallocates the mount point's per-ag
+ *    array, resetting all the counters to zero.
+ *  - The work queue thread resumes and calls the free function for the element
+ *    it started cleaning up earlier.  In the process it decrements the
+ *    filestreams counter for an AG that now has no references.
+ *
+ * With a shrinkfs feature, the above scenario could panic the system.
+ *
+ * All other uses of the following macros should be protected by either the
+ * m_peraglock held in read mode, or the cache's internal locking exposed by the
+ * interval between a call to xfs_mru_cache_lookup() and a call to
+ * xfs_mru_cache_done().  In addition, the m_peraglock must be held in read mode
+ * when new elements are added to the cache.
+ *
+ * Combined, these locking rules ensure that no associations will ever exist in
+ * the cache that reference per-ag array elements that have since been
+ * reallocated.
+ */
+STATIC_INLINE int
+xfs_filestream_peek_ag(
+        xfs_mount_t     *mp,
+        xfs_agnumber_t  agno)
+{
+        return atomic_read(&mp->m_perag[agno].pagf_fstrms);
+}
+STATIC_INLINE int
+xfs_filestream_get_ag(
+        xfs_mount_t     *mp,
+        xfs_agnumber_t  agno)
+{
+        return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms);
+}
+STATIC_INLINE int
+xfs_filestream_put_ag(
+        xfs_mount_t     *mp,
+        xfs_agnumber_t  agno)
+{
+        return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms);
+}
+/* allocation selection flags */
+typedef enum xfs_fstrm_alloc {
+        XFS_PICK_USERDATA = 1,
+        XFS_PICK_LOWSPACE = 2,
+} xfs_fstrm_alloc_t;
+/* prototypes for filestream.c */
+int xfs_filestream_init(void);
+void xfs_filestream_uninit(void);
+int xfs_filestream_mount(struct xfs_mount *mp);
+void xfs_filestream_unmount(struct xfs_mount *mp);
+void xfs_filestream_flush(struct xfs_mount *mp);
+xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
+int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
+void xfs_filestream_deassociate(struct xfs_inode *ip);
+int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
+/* filestreams for the inode? */
+STATIC_INLINE int
+xfs_inode_is_filestream(
+        struct xfs_inode        *ip)
+{
+        return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) ||
+                xfs_iflags_test(ip, XFS_IFILESTREAM) ||
+                (ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);
+}
+#endif /* __KERNEL__ */
+#endif /* __XFS_FILESTREAM_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 1335449841cd..ec3c9c27e0de 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -66,6 +66,7 @@ struct fsxattr {
 #define XFS_XFLAG_EXTSIZE       0x00000800      /* extent size allocator hint */
 #define XFS_XFLAG_EXTSZINHERIT  0x00001000      /* inherit inode extent size */
 #define XFS_XFLAG_NODEFRAG      0x00002000      /* do not defragment */
+#define XFS_XFLAG_FILESTREAM    0x00004000      /* use filestream allocator */
 #define XFS_XFLAG_HASATTR       0x80000000      /* no DIFLAG for this   */
 /*
@@ -238,6 +239,7 @@ typedef struct xfs_fsop_resblks {
 #define XFS_FSOP_GEOM_FLAGS_LOGV2       0x0100  /* log format version 2 */
 #define XFS_FSOP_GEOM_FLAGS_SECTOR      0x0200  /* sector sizes >1BB    */
 #define XFS_FSOP_GEOM_FLAGS_ATTR2       0x0400  /* inline attributes rework */
+#define XFS_FSOP_GEOM_FLAGS_LAZYSB      0x4000  /* lazy superblock counters */
 /*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b599e6be9ec1..432e82347ed6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -44,6 +44,7 @@
 #include "xfs_trans_space.h"
 #include "xfs_rtalloc.h"
 #include "xfs_rw.h"
+#include "xfs_filestream.h"
 /*
 * File system operations
@@ -94,6 +95,8 @@ xfs_fs_geometry(
                                XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
                        (XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
                                XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
+                        (xfs_sb_version_haslazysbcount(&mp->m_sb) ?
+                                XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
                        (XFS_SB_VERSION_HASATTR2(&mp->m_sb) ?
                                XFS_FSOP_GEOM_FLAGS_ATTR2 : 0);
                geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
@@ -140,6 +143,8 @@ xfs_growfs_data_private(
        pct = in->imaxpct;
        if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
                return XFS_ERROR(EINVAL);
+        if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
+                return error;
        dpct = pct - mp->m_sb.sb_imax_pct;
        error = xfs_read_buf(mp, mp->m_ddev_targp,
                        XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
@@ -161,6 +166,7 @@ xfs_growfs_data_private(
        new = nb - mp->m_sb.sb_dblocks;
        oagcount = mp->m_sb.sb_agcount;
        if (nagcount > oagcount) {
+                xfs_filestream_flush(mp);
                down_write(&mp->m_peraglock);
                mp->m_perag = kmem_realloc(mp->m_perag,
                        sizeof(xfs_perag_t) * nagcount,
@@ -173,6 +179,7 @@ xfs_growfs_data_private(
                up_write(&mp->m_peraglock);
        }
        tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
+        tp->t_flags |= XFS_TRANS_RESERVE;
        if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
                        XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) {
                xfs_trans_cancel(tp, 0);
@@ -328,6 +335,7 @@ xfs_growfs_data_private(
                be32_add(&agf->agf_length, new);
                ASSERT(be32_to_cpu(agf->agf_length) ==
                       be32_to_cpu(agi->agi_length));
+                xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
                /*
                 * Free the new space.
                 */
@@ -494,8 +502,9 @@ xfs_reserve_blocks(
        unsigned long           s;
        /* If inval is null, report current values and return */
        if (inval == (__uint64_t *)NULL) {
+                if (!outval)
+                        return EINVAL;
                outval->resblks = mp->m_resblks;
                outval->resblks_avail = mp->m_resblks_avail;
                return 0;
@@ -558,8 +567,10 @@ retry:
                }
        }
 out:
-        outval->resblks = mp->m_resblks;
+        if (outval) {
-        outval->resblks_avail = mp->m_resblks_avail;
+                outval->resblks = mp->m_resblks;
+                outval->resblks_avail = mp->m_resblks_avail;
+        }
        XFS_SB_UNLOCK(mp, s);
        if (fdblks_delta) {
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index b5feb3e77116..f943368c9b93 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -123,6 +123,7 @@ xfs_ialloc_ag_alloc(
        int             blks_per_cluster;  /* fs blocks per inode cluster */
        xfs_btree_cur_t *cur;           /* inode btree cursor */
        xfs_daddr_t     d;              /* disk addr of buffer */
+        xfs_agnumber_t  agno;
        int             error;
        xfs_buf_t       *fbuf;          /* new free inodes' buffer */
        xfs_dinode_t    *free;          /* new free inode structure */
@@ -302,15 +303,15 @@ xfs_ialloc_ag_alloc(
        }
        be32_add(&agi->agi_count, newlen);
        be32_add(&agi->agi_freecount, newlen);
+        agno = be32_to_cpu(agi->agi_seqno);
        down_read(&args.mp->m_peraglock);
-        args.mp->m_perag[be32_to_cpu(agi->agi_seqno)].pagi_freecount += newlen;
+        args.mp->m_perag[agno].pagi_freecount += newlen;
        up_read(&args.mp->m_peraglock);
        agi->agi_newino = cpu_to_be32(newino);
        /*
         * Insert records describing the new inode chunk into the btree.
         */
-        cur = xfs_btree_init_cursor(args.mp, tp, agbp,
+        cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno,
-                        be32_to_cpu(agi->agi_seqno),
                        XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
        for (thisino = newino;
             thisino < newino + newlen;
@@ -1387,6 +1388,7 @@ xfs_ialloc_read_agi(
        pag = &mp->m_perag[agno];
        if (!pag->pagi_init) {
                pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
+                pag->pagi_count = be32_to_cpu(agi->agi_count);
                pag->pagi_init = 1;
        } else {
                /*
@@ -1410,3 +1412,23 @@ xfs_ialloc_read_agi(
        *bpp = bp;
        return 0;
 }
+/*
+ * Read in the agi to initialise the per-ag data in the mount structure
+ */
+int
+xfs_ialloc_pagi_init(
+        xfs_mount_t     *mp,            /* file system mount structure */
+        xfs_trans_t     *tp,            /* transaction pointer */
+        xfs_agnumber_t  agno)           /* allocation group number */
+{
+        xfs_buf_t       *bp = NULL;
+        int             error;
+        error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
+        if (error)
+                return error;
+        if (bp)
+                xfs_trans_brelse(tp, bp);
+        return 0;
+}
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 7f5debe1acb6..97f4040931ca 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -149,6 +149,16 @@ xfs_ialloc_read_agi(
        xfs_agnumber_t  agno,           /* allocation group number */
        struct xfs_buf  **bpp);         /* allocation group hdr buf */
+/*
+ * Read in the allocation group header to initialise the per-ag data
+ * in the mount structure
+ */
+int
+xfs_ialloc_pagi_init(
+        struct xfs_mount *mp,           /* file system mount structure */
+        struct xfs_trans *tp,           /* transaction pointer */
+        xfs_agnumber_t  agno);          /* allocation group number */
 #endif  /* __KERNEL__ */
 #endif  /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3ca5d43b8345..cdc4c28926d0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -48,7 +48,9 @@
 #include "xfs_dir2_trace.h"
 #include "xfs_quota.h"
 #include "xfs_acl.h"
+#include "xfs_filestream.h"
+#include <linux/log2.h>
 kmem_zone_t *xfs_ifork_zone;
 kmem_zone_t *xfs_inode_zone;
@@ -643,8 +645,7 @@ xfs_iformat_extents(
                        ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1),
                                                                ARCH_CONVERT);
                }
-                xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex,
+                XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
-                        whichfork);
                if (whichfork != XFS_DATA_FORK ||
                        XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
                                if (unlikely(xfs_check_nostate_extents(
@@ -817,6 +818,8 @@ _xfs_dic2xflags(
                        flags |= XFS_XFLAG_EXTSZINHERIT;
                if (di_flags & XFS_DIFLAG_NODEFRAG)
                        flags |= XFS_XFLAG_NODEFRAG;
+                if (di_flags & XFS_DIFLAG_FILESTREAM)
+                        flags |= XFS_XFLAG_FILESTREAM;
        }
        return flags;
@@ -1074,6 +1077,11 @@ xfs_iread_extents(
 * also returns the [locked] bp pointing to the head of the freelist
 * as ialloc_context.  The caller should hold this buffer across
 * the commit and pass it back into this routine on the second call.
+ *
+ * If we are allocating quota inodes, we do not have a parent inode
+ * to attach to or associate with (i.e. pip == NULL) because they
+ * are not linked into the directory structure - they are attached
+ * directly to the superblock - and so have no parent.
 */
 int
 xfs_ialloc(
@@ -1099,7 +1107,7 @@ xfs_ialloc(
         * Call the space management code to pick
         * the on-disk inode to be allocated.
         */
-        error = xfs_dialloc(tp, pip->i_ino, mode, okalloc,
+        error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
                            ialloc_context, call_again, &ino);
        if (error != 0) {
                return error;
@@ -1150,10 +1158,10 @@ xfs_ialloc(
        /*
         * Project ids won't be stored on disk if we are using a version 1 inode.
         */
-        if ( (prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1))
+        if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1))
                xfs_bump_ino_vers2(tp, ip);
-        if (XFS_INHERIT_GID(pip, vp->v_vfsp)) {
+        if (pip && XFS_INHERIT_GID(pip, vp->v_vfsp)) {
                ip->i_d.di_gid = pip->i_d.di_gid;
                if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
                        ip->i_d.di_mode |= S_ISGID;
@@ -1195,8 +1203,16 @@ xfs_ialloc(
                flags |= XFS_ILOG_DEV;
                break;
        case S_IFREG:
+                if (pip && xfs_inode_is_filestream(pip)) {
+                        error = xfs_filestream_associate(pip, ip);
+                        if (error < 0)
+                                return -error;
+                        if (!error)
+                                xfs_iflags_set(ip, XFS_IFILESTREAM);
+                }
+                /* fall through */
        case S_IFDIR:
-                if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
+                if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
                        uint    di_flags = 0;
                        if ((mode & S_IFMT) == S_IFDIR) {
@@ -1233,6 +1249,8 @@ xfs_ialloc(
                        if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
                            xfs_inherit_nodefrag)
                                di_flags |= XFS_DIFLAG_NODEFRAG;
+                        if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
+                                di_flags |= XFS_DIFLAG_FILESTREAM;
                        ip->i_d.di_flags |= di_flags;
                }
                /* FALLTHROUGH */
@@ -2875,9 +2893,6 @@ xfs_iextents_copy(
        int                     copied;
        xfs_bmbt_rec_t          *dest_ep;
        xfs_bmbt_rec_t          *ep;
-#ifdef XFS_BMAP_TRACE
-        static char             fname[] = "xfs_iextents_copy";
-#endif
        int                     i;
        xfs_ifork_t             *ifp;
        int                     nrecs;
@@ -2888,7 +2903,7 @@ xfs_iextents_copy(
        ASSERT(ifp->if_bytes > 0);
        nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-        xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork);
+        XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
        ASSERT(nrecs > 0);
        /*
@@ -4184,7 +4199,7 @@ xfs_iext_realloc_direct(
                        ifp->if_bytes = new_size;
                        return;
                }
-                if ((new_size & (new_size - 1)) != 0) {
+                if (!is_power_of_2(new_size)){
                        rnew_size = xfs_iroundup(new_size);
                }
                if (rnew_size != ifp->if_real_bytes) {
@@ -4207,7 +4222,7 @@ xfs_iext_realloc_direct(
         */
        else {
                new_size += ifp->if_bytes;
-                if ((new_size & (new_size - 1)) != 0) {
+                if (!is_power_of_2(new_size)) {
                        rnew_size = xfs_iroundup(new_size);
                }
                xfs_iext_inline_to_direct(ifp, rnew_size);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f75afecef8e7..012dfd4a958c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -379,6 +379,7 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
 #define XFS_ISTALE      0x0010  /* inode has been staled */
 #define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */
 #define XFS_INEW        0x0040
+#define XFS_IFILESTREAM 0x0080  /* inode is in a filestream directory */
 /*
 * Flags for inode locking.
@@ -414,19 +415,22 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
 * gets a lockdep subclass of 1 and the second lock will have a lockdep
 * subclass of 0.
 *
- * XFS_I[O]LOCK_INUMORDER - for locking several inodes at the some time
+ * XFS_LOCK_INUMORDER - for locking several inodes at the some time
 * with xfs_lock_inodes().  This flag is used as the starting subclass
 * and each subsequent lock acquired will increment the subclass by one.
 * So the first lock acquired will have a lockdep subclass of 2, the
- * second lock will have a lockdep subclass of 3, and so on.
+ * second lock will have a lockdep subclass of 3, and so on. It is
+ * the responsibility of the class builder to shift this to the correct
+ * portion of the lock_mode lockdep mask.
 */
+#define XFS_LOCK_PARENT         1
+#define XFS_LOCK_INUMORDER      2
 #define XFS_IOLOCK_SHIFT        16
-#define XFS_IOLOCK_PARENT       (1 << XFS_IOLOCK_SHIFT)
+#define XFS_IOLOCK_PARENT       (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
-#define XFS_IOLOCK_INUMORDER    (2 << XFS_IOLOCK_SHIFT)
 #define XFS_ILOCK_SHIFT         24
-#define XFS_ILOCK_PARENT        (1 << XFS_ILOCK_SHIFT)
+#define XFS_ILOCK_PARENT        (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
-#define XFS_ILOCK_INUMORDER     (2 << XFS_ILOCK_SHIFT)
 #define XFS_IOLOCK_DEP_MASK     0x00ff0000
 #define XFS_ILOCK_DEP_MASK      0xff000000
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3f2b9f2a7b94..bf57b75acb90 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -451,19 +451,14 @@ xfs_iomap_write_direct(
                return XFS_ERROR(error);
        rt = XFS_IS_REALTIME_INODE(ip);
-        if (unlikely(rt)) {
+        extsz = xfs_get_extsz_hint(ip);
-                if (!(extsz = ip->i_d.di_extsize))
-                        extsz = mp->m_sb.sb_rextsize;
-        } else {
-                extsz = ip->i_d.di_extsize;
-        }
        isize = ip->i_size;
        if (io->io_new_size > isize)
                isize = io->io_new_size;
-        offset_fsb = XFS_B_TO_FSBT(mp, offset);
+        offset_fsb = XFS_B_TO_FSBT(mp, offset);
-        last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+        last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
        if ((offset + count) > isize) {
                error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
                                                        &last_fsb);
@@ -489,13 +484,13 @@ xfs_iomap_write_direct(
        if (unlikely(rt)) {
                resrtextents = qblocks = resaligned;
                resrtextents /= mp->m_sb.sb_rextsize;
-                resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+                resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
-                quota_flag = XFS_QMOPT_RES_RTBLKS;
+                quota_flag = XFS_QMOPT_RES_RTBLKS;
-        } else {
+        } else {
-                resrtextents = 0;
+                resrtextents = 0;
                resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
-                quota_flag = XFS_QMOPT_RES_REGBLKS;
+                quota_flag = XFS_QMOPT_RES_REGBLKS;
-        }
+        }
        /*
         * Allocate and setup the transaction
@@ -666,13 +661,7 @@ xfs_iomap_write_delay(
        if (error)
                return XFS_ERROR(error);
-        if (XFS_IS_REALTIME_INODE(ip)) {
+        extsz = xfs_get_extsz_hint(ip);
-                if (!(extsz = ip->i_d.di_extsize))
-                        extsz = mp->m_sb.sb_rextsize;
-        } else {
-                extsz = ip->i_d.di_extsize;
-        }
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
 retry:
@@ -788,18 +777,12 @@ xfs_iomap_write_allocate(
                nimaps = 0;
                while (nimaps == 0) {
                        tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+                        tp->t_flags |= XFS_TRANS_RESERVE;
                        nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
                        error = xfs_trans_reserve(tp, nres,
                                        XFS_WRITE_LOG_RES(mp),
                                        0, XFS_TRANS_PERM_LOG_RES,
                                        XFS_WRITE_LOG_COUNT);
-                        if (error == ENOSPC) {
-                                error = xfs_trans_reserve(tp, 0,
-                                                XFS_WRITE_LOG_RES(mp),
-                                                0,
-                                                XFS_TRANS_PERM_LOG_RES,
-                                                XFS_WRITE_LOG_COUNT);
-                        }
                        if (error) {
                                xfs_trans_cancel(tp, 0);
                                return XFS_ERROR(error);
@@ -917,8 +900,8 @@ xfs_iomap_write_unwritten(
                 * from unwritten to real. Do allocations in a loop until
                 * we have covered the range passed in.
                 */
                tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+                tp->t_flags |= XFS_TRANS_RESERVE;
                error = xfs_trans_reserve(tp, resblks,
                                XFS_WRITE_LOG_RES(mp), 0,
                                XFS_TRANS_PERM_LOG_RES,
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index e725ddd3de5f..4c2454bcc714 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -202,6 +202,16 @@ xfs_bulkstat_one_dinode(
        return 0;
 }
+STATIC int
+xfs_bulkstat_one_fmt(
+        void                    __user *ubuffer,
+        const xfs_bstat_t       *buffer)
+{
+        if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
+                return -EFAULT;
+        return sizeof(*buffer);
+}
 /*
 * Return stat information for one inode.
 * Return 0 if ok, else errno.
@@ -221,6 +231,7 @@ xfs_bulkstat_one(
        xfs_bstat_t     *buf;           /* return buffer */
        int             error = 0;      /* error value */
        xfs_dinode_t    *dip;           /* dinode inode pointer */
+        bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt;
        dip = (xfs_dinode_t *)dibuff;
        *stat = BULKSTAT_RV_NOTHING;
@@ -243,14 +254,15 @@ xfs_bulkstat_one(
                xfs_bulkstat_one_dinode(mp, ino, dip, buf);
        }
-        if (copy_to_user(buffer, buf, sizeof(*buf)))  {
+        error = formatter(buffer, buf);
+        if (error < 0)  {
                error = EFAULT;
                goto out_free;
        }
        *stat = BULKSTAT_RV_DIDONE;
        if (ubused)
-                *ubused = sizeof(*buf);
+                *ubused = error;
 out_free:
        kmem_free(buf, sizeof(*buf));
@@ -748,6 +760,19 @@ xfs_bulkstat_single(
        return 0;
 }
+int
+xfs_inumbers_fmt(
+        void                    __user *ubuffer, /* buffer to write to */
+        const xfs_inogrp_t      *buffer,        /* buffer to read from */
+        long                    count,          /* # of elements to read */
+        long                    *written)       /* # of bytes written */
+{
+        if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer)))
+                return -EFAULT;
+        *written = count * sizeof(*buffer);
+        return 0;
+}
 /*
 * Return inode number table for the filesystem.
 */
@@ -756,7 +781,8 @@ xfs_inumbers(
        xfs_mount_t     *mp,            /* mount point for filesystem */
        xfs_ino_t       *lastino,       /* last inode returned */
        int             *count,         /* size of buffer/count returned */
-        xfs_inogrp_t    __user *ubuffer)/* buffer with inode descriptions */
+        void            __user *ubuffer,/* buffer with inode descriptions */
+        inumbers_fmt_pf formatter)
 {
        xfs_buf_t       *agbp;
        xfs_agino_t     agino;
@@ -835,12 +861,12 @@ xfs_inumbers(
                bufidx++;
                left--;
                if (bufidx == bcount) {
-                        if (copy_to_user(ubuffer, buffer,
+                        long written;
-                                        bufidx * sizeof(*buffer))) {
+                        if (formatter(ubuffer, buffer, bufidx, &written)) {
                                error = XFS_ERROR(EFAULT);
                                break;
                        }
-                        ubuffer += bufidx;
+                        ubuffer += written;
                        *count += bufidx;
                        bufidx = 0;
                }
@@ -862,8 +888,8 @@ xfs_inumbers(
        }
        if (!error) {
                if (bufidx) {
-                        if (copy_to_user(ubuffer, buffer,
+                        long written;
-                                        bufidx * sizeof(*buffer)))
+                        if (formatter(ubuffer, buffer, bufidx, &written))
                                error = XFS_ERROR(EFAULT);
                        else
                                *count += bufidx;
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index f25a28862a17..a1f18fce9b70 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -69,6 +69,10 @@ xfs_bulkstat_single(
        char                    __user *buffer,
        int                     *done);
+typedef int (*bulkstat_one_fmt_pf)(  /* used size in bytes or negative error */
+        void                    __user *ubuffer, /* buffer to write to */
+        const xfs_bstat_t       *buffer);        /* buffer to read from */
 int
 xfs_bulkstat_one(
        xfs_mount_t             *mp,
@@ -86,11 +90,25 @@ xfs_internal_inum(
        xfs_mount_t             *mp,
        xfs_ino_t               ino);
+typedef int (*inumbers_fmt_pf)(
+        void                    __user *ubuffer, /* buffer to write to */
+        const xfs_inogrp_t      *buffer,        /* buffer to read from */
+        long                    count,          /* # of elements to read */
+        long                    *written);      /* # of bytes written */
+int
+xfs_inumbers_fmt(
+        void                    __user *ubuffer, /* buffer to write to */
+        const xfs_inogrp_t      *buffer,        /* buffer to read from */
+        long                    count,          /* # of elements to read */
+        long                    *written);      /* # of bytes written */
 int                                     /* error status */
 xfs_inumbers(
        xfs_mount_t             *mp,    /* mount point for filesystem */
        xfs_ino_t               *last,  /* last inode returned */
        int                     *count, /* size of buffer/count returned */
-        xfs_inogrp_t            __user *buffer);/* buffer with inode info */
+        void                    __user *buffer, /* buffer with inode info */
+        inumbers_fmt_pf         formatter);
 #endif  /* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c48bf61f17bd..9d4c4fbeb3ee 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -817,10 +817,8 @@ xfs_log_need_covered(xfs_mount_t *mp)
        SPLDECL(s);
        int             needed = 0, gen;
        xlog_t          *log = mp->m_log;
-        bhv_vfs_t       *vfsp = XFS_MTOVFS(mp);
-        if (vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) ||
+        if (!xfs_fs_writable(mp))
-            (vfsp->vfs_flag & VFS_RDONLY))
                return 0;
        s = LOG_LOCK(log);
@@ -967,14 +965,16 @@ xlog_iodone(xfs_buf_t *bp)
        } else if (iclog->ic_state & XLOG_STATE_IOERROR) {
                aborted = XFS_LI_ABORTED;
        }
+        /* log I/O is always issued ASYNC */
+        ASSERT(XFS_BUF_ISASYNC(bp));
        xlog_state_done_syncing(iclog, aborted);
-        if (!(XFS_BUF_ISASYNC(bp))) {
+        /*
-                /*
+         * do not reference the buffer (bp) here as we could race
-                 * Corresponding psema() will be done in bwrite().  If we don't
+         * with it being freed after writing the unmount record to the
-                 * vsema() here, panic.
+         * log.
-                 */
+         */
-                XFS_BUF_V_IODONESEMA(bp);
-        }
 }       /* xlog_iodone */
 /*
@@ -1199,11 +1199,18 @@ xlog_alloc_log(xfs_mount_t	*mp,
                *iclogp = (xlog_in_core_t *)
                          kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP);
                iclog = *iclogp;
-                iclog->hic_data = (xlog_in_core_2_t *)
-                          kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE);
                iclog->ic_prev = prev_iclog;
                prev_iclog = iclog;
+                bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
+                if (!XFS_BUF_CPSEMA(bp))
+                        ASSERT(0);
+                XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
+                XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
+                XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
+                iclog->ic_bp = bp;
+                iclog->hic_data = bp->b_addr;
                log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
                head = &iclog->ic_header;
@@ -1216,11 +1223,6 @@ xlog_alloc_log(xfs_mount_t	*mp,
                INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT);
                memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
-                bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
-                XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
-                XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
-                XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
-                iclog->ic_bp = bp;
                iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
                iclog->ic_state = XLOG_STATE_ACTIVE;
@@ -1432,7 +1434,7 @@ xlog_sync(xlog_t		*log,
        } else {
                iclog->ic_bwritecnt = 1;
        }
-        XFS_BUF_SET_PTR(bp, (xfs_caddr_t) &(iclog->ic_header), count);
+        XFS_BUF_SET_COUNT(bp, count);
        XFS_BUF_SET_FSPRIVATE(bp, iclog);       /* save for later */
        XFS_BUF_ZEROFLAGS(bp);
        XFS_BUF_BUSY(bp);
@@ -1528,7 +1530,6 @@ xlog_dealloc_log(xlog_t *log)
                }
 #endif
                next_iclog = iclog->ic_next;
-                kmem_free(iclog->hic_data, log->l_iclog_size);
                kmem_free(iclog, sizeof(xlog_in_core_t));
                iclog = next_iclog;
        }
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 080fabf61c92..fddbb091a86f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -927,6 +927,14 @@ xlog_find_tail(
                        ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle,
                                        after_umount_blk);
                        *tail_blk = after_umount_blk;
+                        /*
+                         * Note that the unmount was clean. If the unmount
+                         * was not clean, we need to know this to rebuild the
+                         * superblock counters from the perag headers if we
+                         * have a filesystem using non-persistent counters.
+                         */
+                        log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
                }
        }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index a96bde6df96d..a66b39805176 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -202,6 +202,27 @@ xfs_mount_free(
        kmem_free(mp, sizeof(xfs_mount_t));
 }
+/*
+ * Check size of device based on the (data/realtime) block count.
+ * Note: this check is used by the growfs code as well as mount.
+ */
+int
+xfs_sb_validate_fsb_count(
+        xfs_sb_t        *sbp,
+        __uint64_t      nblocks)
+{
+        ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
+        ASSERT(sbp->sb_blocklog >= BBSHIFT);
+#if XFS_BIG_BLKNOS     /* Limited by ULONG_MAX of page cache index */
+        if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
+                return E2BIG;
+#else                  /* Limited by UINT_MAX of sectors */
+        if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
+                return E2BIG;
+#endif
+        return 0;
+}
 /*
 * Check the validity of the SB found.
@@ -284,18 +305,8 @@ xfs_mount_validate_sb(
                return XFS_ERROR(EFSCORRUPTED);
        }
-        ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
+        if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
-        ASSERT(sbp->sb_blocklog >= BBSHIFT);
+            xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
-#if XFS_BIG_BLKNOS     /* Limited by ULONG_MAX of page cache index */
-        if (unlikely(
-            (sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX ||
-            (sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) {
-#else                  /* Limited by UINT_MAX of sectors */
-        if (unlikely(
-            (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX ||
-            (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) {
-#endif
                xfs_fs_mount_cmn_err(flags,
                        "file system too large to be mounted on this system.");
                return XFS_ERROR(E2BIG);
@@ -632,6 +643,64 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
                                        sbp->sb_inopblock);
        mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
 }
+/*
+ * xfs_initialize_perag_data
+ *
+ * Read in each per-ag structure so we can count up the number of
+ * allocated inodes, free inodes and used filesystem blocks as this
+ * information is no longer persistent in the superblock. Once we have
+ * this information, write it into the in-core superblock structure.
+ */
+STATIC int
+xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
+{
+        xfs_agnumber_t  index;
+        xfs_perag_t     *pag;
+        xfs_sb_t        *sbp = &mp->m_sb;
+        uint64_t        ifree = 0;
+        uint64_t        ialloc = 0;
+        uint64_t        bfree = 0;
+        uint64_t        bfreelst = 0;
+        uint64_t        btree = 0;
+        int             error;
+        int             s;
+        for (index = 0; index < agcount; index++) {
+                /*
+                 * read the agf, then the agi. This gets us
+                 * all the inforamtion we need and populates the
+                 * per-ag structures for us.
+                 */
+                error = xfs_alloc_pagf_init(mp, NULL, index, 0);
+                if (error)
+                        return error;
+                error = xfs_ialloc_pagi_init(mp, NULL, index);
+                if (error)
+                        return error;
+                pag = &mp->m_perag[index];
+                ifree += pag->pagi_freecount;
+                ialloc += pag->pagi_count;
+                bfree += pag->pagf_freeblks;
+                bfreelst += pag->pagf_flcount;
+                btree += pag->pagf_btreeblks;
+        }
+        /*
+         * Overwrite incore superblock counters with just-read data
+         */
+        s = XFS_SB_LOCK(mp);
+        sbp->sb_ifree = ifree;
+        sbp->sb_icount = ialloc;
+        sbp->sb_fdblocks = bfree + bfreelst + btree;
+        XFS_SB_UNLOCK(mp, s);
+        /* Fixup the per-cpu counters as well. */
+        xfs_icsb_reinit_counters(mp);
+        return 0;
+}
 /*
 * xfs_mountfs
 *
@@ -656,7 +725,7 @@ xfs_mountfs(
        bhv_vnode_t     *rvp = NULL;
        int             readio_log, writeio_log;
        xfs_daddr_t     d;
-        __uint64_t      ret64;
+        __uint64_t      resblks;
        __int64_t       update_flags;
        uint            quotamount, quotaflags;
        int             agno;
@@ -773,6 +842,7 @@ xfs_mountfs(
         */
        if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
            (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
+                __uint64_t      ret64;
                if (xfs_uuid_mount(mp)) {
                        error = XFS_ERROR(EINVAL);
                        goto error1;
@@ -976,6 +1046,34 @@ xfs_mountfs(
        }
        /*
+         * Now the log is mounted, we know if it was an unclean shutdown or
+         * not. If it was, with the first phase of recovery has completed, we
+         * have consistent AG blocks on disk. We have not recovered EFIs yet,
+         * but they are recovered transactionally in the second recovery phase
+         * later.
+         *
+         * Hence we can safely re-initialise incore superblock counters from
+         * the per-ag data. These may not be correct if the filesystem was not
+         * cleanly unmounted, so we need to wait for recovery to finish before
+         * doing this.
+         *
+         * If the filesystem was cleanly unmounted, then we can trust the
+         * values in the superblock to be correct and we don't need to do
+         * anything here.
+         *
+         * If we are currently making the filesystem, the initialisation will
+         * fail as the perag data is in an undefined state.
+         */
+        if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
+            !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
+             !mp->m_sb.sb_inprogress) {
+                error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
+                if (error) {
+                        goto error2;
+                }
+        }
+        /*
         * Get and sanity-check the root inode.
         * Save the pointer to it in the mount structure.
         */
@@ -1044,6 +1142,23 @@ xfs_mountfs(
        if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
                goto error4;
+        /*
+         * Now we are mounted, reserve a small amount of unused space for
+         * privileged transactions. This is needed so that transaction
+         * space required for critical operations can dip into this pool
+         * when at ENOSPC. This is needed for operations like create with
+         * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
+         * are not allowed to use this reserved space.
+         *
+         * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
+         * This may drive us straight to ENOSPC on mount, but that implies
+         * we were already there on the last unmount.
+         */
+        resblks = mp->m_sb.sb_dblocks;
+        do_div(resblks, 20);
+        resblks = min_t(__uint64_t, resblks, 1024);
+        xfs_reserve_blocks(mp, &resblks, NULL);
        return 0;
 error4:
@@ -1083,7 +1198,19 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
        int64_t         fsid;
 #endif
+        __uint64_t      resblks;
+        /*
+         * We can potentially deadlock here if we have an inode cluster
+         * that has been freed has it's buffer still pinned in memory because
+         * the transaction is still sitting in a iclog. The stale inodes
+         * on that buffer will have their flush locks held until the
+         * transaction hits the disk and the callbacks run. the inode
+         * flush takes the flush lock unconditionally and with nothing to
+         * push out the iclog we will never get that unlocked. hence we
+         * need to force the log first.
+         */
+        xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
        xfs_iflush_all(mp);
        XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
@@ -1100,10 +1227,26 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
                xfs_binval(mp->m_rtdev_targp);
        }
-        xfs_unmountfs_writesb(mp);
+        /*
+         * Unreserve any blocks we have so that when we unmount we don't account
+         * the reserved free space as used. This is really only necessary for
+         * lazy superblock counting because it trusts the incore superblock
+         * counters to be aboslutely correct on clean unmount.
+         *
+         * We don't bother correcting this elsewhere for lazy superblock
+         * counting because on mount of an unclean filesystem we reconstruct the
+         * correct counter value and this is irrelevant.
+         *
+         * For non-lazy counter filesystems, this doesn't matter at all because
+         * we only every apply deltas to the superblock and hence the incore
+         * value does not matter....
+         */
+        resblks = 0;
+        xfs_reserve_blocks(mp, &resblks, NULL);
+        xfs_log_sbcount(mp, 1);
+        xfs_unmountfs_writesb(mp);
        xfs_unmountfs_wait(mp);                 /* wait for async bufs */
        xfs_log_unmount(mp);                    /* Done! No more fs ops. */
        xfs_freesb(mp);
@@ -1150,6 +1293,62 @@ xfs_unmountfs_wait(xfs_mount_t *mp)
 }
 int
+xfs_fs_writable(xfs_mount_t *mp)
+{
+        bhv_vfs_t       *vfsp = XFS_MTOVFS(mp);
+        return !(vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) ||
+                (vfsp->vfs_flag & VFS_RDONLY));
+}
+/*
+ * xfs_log_sbcount
+ *
+ * Called either periodically to keep the on disk superblock values
+ * roughly up to date or from unmount to make sure the values are
+ * correct on a clean unmount.
+ *
+ * Note this code can be called during the process of freezing, so
+ * we may need to use the transaction allocator which does not not
+ * block when the transaction subsystem is in its frozen state.
+ */
+int
+xfs_log_sbcount(
+        xfs_mount_t     *mp,
+        uint            sync)
+{
+        xfs_trans_t     *tp;
+        int             error;
+        if (!xfs_fs_writable(mp))
+                return 0;
+        xfs_icsb_sync_counters(mp);
+        /*
+         * we don't need to do this if we are updating the superblock
+         * counters on every modification.
+         */
+        if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
+                return 0;
+        tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT);
+        error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+                                        XFS_DEFAULT_LOG_COUNT);
+        if (error) {
+                xfs_trans_cancel(tp, 0);
+                return error;
+        }
+        xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
+        if (sync)
+                xfs_trans_set_sync(tp);
+        xfs_trans_commit(tp, 0);
+        return 0;
+}
+int
 xfs_unmountfs_writesb(xfs_mount_t *mp)
 {
        xfs_buf_t       *sbp;
@@ -1160,16 +1359,15 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
         * skip superblock write if fs is read-only, or
         * if we are doing a forced umount.
         */
-        sbp = xfs_getsb(mp, 0);
        if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY ||
                XFS_FORCED_SHUTDOWN(mp))) {
-                xfs_icsb_sync_counters(mp);
+                sbp = xfs_getsb(mp, 0);
+                sb = XFS_BUF_TO_SBP(sbp);
                /*
                 * mark shared-readonly if desired
                 */
-                sb = XFS_BUF_TO_SBP(sbp);
                if (mp->m_mk_sharedro) {
                        if (!(sb->sb_flags & XFS_SBF_READONLY))
                                sb->sb_flags |= XFS_SBF_READONLY;
@@ -1178,6 +1376,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
                        xfs_fs_cmn_err(CE_NOTE, mp,
                                "Unmounting, marking shared read-only");
                }
                XFS_BUF_UNDONE(sbp);
                XFS_BUF_UNREAD(sbp);
                XFS_BUF_UNDELAYWRITE(sbp);
@@ -1192,8 +1391,8 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
                                          mp, sbp, XFS_BUF_ADDR(sbp));
                if (error && mp->m_mk_sharedro)
                        xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting.  Filesystem may not be marked shared readonly");
+                xfs_buf_relse(sbp);
        }
-        xfs_buf_relse(sbp);
        return error;
 }
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 82304b94646d..76ad74758696 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -66,6 +66,7 @@ struct xfs_bmbt_irec;
 struct xfs_bmap_free;
 struct xfs_extdelta;
 struct xfs_swapext;
+struct xfs_mru_cache;
 extern struct bhv_vfsops xfs_vfsops;
 extern struct bhv_vnodeops xfs_vnodeops;
@@ -424,17 +425,18 @@ typedef struct xfs_mount {
        struct notifier_block   m_icsb_notifier; /* hotplug cpu notifier */
        struct mutex            m_icsb_mutex;   /* balancer sync lock */
 #endif
+        struct xfs_mru_cache    *m_filestream;  /* per-mount filestream data */
 } xfs_mount_t;
 /*
 * Flags for m_flags.
 */
-#define XFS_MOUNT_WSYNC         (1ULL << 0)     /* for nfs - all metadata ops
+#define XFS_MOUNT_WSYNC         (1ULL << 0)     /* for nfs - all metadata ops
                                                   must be synchronous except
                                                   for space allocations */
-#define XFS_MOUNT_INO64         (1ULL << 1)
+#define XFS_MOUNT_INO64         (1ULL << 1)
                             /* (1ULL << 2)     -- currently unused */
-                             /* (1ULL << 3)     -- currently unused */
+#define XFS_MOUNT_WAS_CLEAN     (1ULL << 3)
 #define XFS_MOUNT_FS_SHUTDOWN   (1ULL << 4)     /* atomic stop of all filesystem
                                                   operations, typically for
                                                   disk errors in metadata */
@@ -463,6 +465,8 @@ typedef struct xfs_mount {
                                                 * I/O size in stat() */
 #define XFS_MOUNT_NO_PERCPU_SB  (1ULL << 23)    /* don't use per-cpu superblock
                                                   counters */
+#define XFS_MOUNT_FILESTREAMS   (1ULL << 24)    /* enable the filestreams
+                                                   allocator */
 /*
@@ -511,6 +515,8 @@ xfs_preferred_iosize(xfs_mount_t *mp)
 #define XFS_MAXIOFFSET(mp)      ((mp)->m_maxioffset)
+#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp)  \
+                                ((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
 #define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
 #define xfs_force_shutdown(m,f) \
        bhv_vfs_force_shutdown((XFS_MTOVFS(m)), f, __FILE__, __LINE__)
@@ -602,6 +608,7 @@ typedef struct xfs_mod_sb {
 extern xfs_mount_t *xfs_mount_init(void);
 extern void     xfs_mod_sb(xfs_trans_t *, __int64_t);
+extern int      xfs_log_sbcount(xfs_mount_t *, uint);
 extern void     xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
 extern int      xfs_mountfs(struct bhv_vfs *, xfs_mount_t *mp, int);
 extern void     xfs_mountfs_check_barriers(xfs_mount_t *mp);
@@ -618,12 +625,14 @@ extern int	xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
 extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
 extern int      xfs_readsb(xfs_mount_t *, int);
 extern void     xfs_freesb(xfs_mount_t *);
+extern int      xfs_fs_writable(xfs_mount_t *);
 extern void     xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
 extern int      xfs_syncsub(xfs_mount_t *, int, int *);
 extern int      xfs_sync_inodes(xfs_mount_t *, int, int *);
 extern xfs_agnumber_t   xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *,
                                                xfs_agnumber_t);
 extern void     xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t);
+extern int      xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
 extern struct xfs_dmops xfs_dmcore_stub;
 extern struct xfs_qmops xfs_qmcore_stub;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
new file mode 100644
index 000000000000..7deb9e3cbbd3
--- /dev/null
+++ b/fs/xfs/xfs_mru_cache.c
@@ -0,0 +1,608 @@
+/*
+ * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_mru_cache.h"
+/*
+ * The MRU Cache data structure consists of a data store, an array of lists and
+ * a lock to protect its internal state.  At initialisation time, the client
+ * supplies an element lifetime in milliseconds and a group count, as well as a
+ * function pointer to call when deleting elements.  A data structure for
+ * queueing up work in the form of timed callbacks is also included.
+ *
+ * The group count controls how many lists are created, and thereby how finely
+ * the elements are grouped in time.  When reaping occurs, all the elements in
+ * all the lists whose time has expired are deleted.
+ *
+ * To give an example of how this works in practice, consider a client that
+ * initialises an MRU Cache with a lifetime of ten seconds and a group count of
+ * five.  Five internal lists will be created, each representing a two second
+ * period in time.  When the first element is added, time zero for the data
+ * structure is initialised to the current time.
+ *
+ * All the elements added in the first two seconds are appended to the first
+ * list.  Elements added in the third second go into the second list, and so on.
+ * If an element is accessed at any point, it is removed from its list and
+ * inserted at the head of the current most-recently-used list.
+ *
+ * The reaper function will have nothing to do until at least twelve seconds
+ * have elapsed since the first element was added.  The reason for this is that
+ * if it were called at t=11s, there could be elements in the first list that
+ * have only been inactive for nine seconds, so it still does nothing.  If it is
+ * called anywhere between t=12 and t=14 seconds, it will delete all the
+ * elements that remain in the first list.  It's therefore possible for elements
+ * to remain in the data store even after they've been inactive for up to
+ * (t + t/g) seconds, where t is the inactive element lifetime and g is the
+ * number of groups.
+ *
+ * The above example assumes that the reaper function gets called at least once
+ * every (t/g) seconds.  If it is called less frequently, unused elements will
+ * accumulate in the reap list until the reaper function is eventually called.
+ * The current implementation uses work queue callbacks to carefully time the
+ * reaper function calls, so this should happen rarely, if at all.
+ *
+ * From a design perspective, the primary reason for the choice of a list array
+ * representing discrete time intervals is that it's only practical to reap
+ * expired elements in groups of some appreciable size.  This automatically
+ * introduces a granularity to element lifetimes, so there's no point storing an
+ * individual timeout with each element that specifies a more precise reap time.
+ * The bonus is a saving of sizeof(long) bytes of memory per element stored.
+ *
+ * The elements could have been stored in just one list, but an array of
+ * counters or pointers would need to be maintained to allow them to be divided
+ * up into discrete time groups.  More critically, the process of touching or
+ * removing an element would involve walking large portions of the entire list,
+ * which would have a detrimental effect on performance.  The additional memory
+ * requirement for the array of list heads is minimal.
+ *
+ * When an element is touched or deleted, it needs to be removed from its
+ * current list.  Doubly linked lists are used to make the list maintenance
+ * portion of these operations O(1).  Since reaper timing can be imprecise,
+ * inserts and lookups can occur when there are no free lists available.  When
+ * this happens, all the elements on the LRU list need to be migrated to the end
+ * of the reap list.  To keep the list maintenance portion of these operations
+ * O(1) also, list tails need to be accessible without walking the entire list.
+ * This is the reason why doubly linked list heads are used.
+ */
+/*
+ * An MRU Cache is a dynamic data structure that stores its elements in a way
+ * that allows efficient lookups, but also groups them into discrete time
+ * intervals based on insertion time.  This allows elements to be efficiently
+ * and automatically reaped after a fixed period of inactivity.
+ *
+ * When a client data pointer is stored in the MRU Cache it needs to be added to
+ * both the data store and to one of the lists.  It must also be possible to
+ * access each of these entries via the other, i.e. to:
+ *
+ *    a) Walk a list, removing the corresponding data store entry for each item.
+ *    b) Look up a data store entry, then access its list entry directly.
+ *
+ * To achieve both of these goals, each entry must contain both a list entry and
+ * a key, in addition to the user's data pointer.  Note that it's not a good
+ * idea to have the client embed one of these structures at the top of their own
+ * data structure, because inserting the same item more than once would most
+ * likely result in a loop in one of the lists.  That's a sure-fire recipe for
+ * an infinite loop in the code.
+ */
+typedef struct xfs_mru_cache_elem
+{
+        struct list_head list_node;
+        unsigned long   key;
+        void            *value;
+} xfs_mru_cache_elem_t;
+static kmem_zone_t              *xfs_mru_elem_zone;
+static struct workqueue_struct  *xfs_mru_reap_wq;
+/*
+ * When inserting, destroying or reaping, it's first necessary to update the
+ * lists relative to a particular time.  In the case of destroying, that time
+ * will be well in the future to ensure that all items are moved to the reap
+ * list.  In all other cases though, the time will be the current time.
+ *
+ * This function enters a loop, moving the contents of the LRU list to the reap
+ * list again and again until either a) the lists are all empty, or b) time zero
+ * has been advanced sufficiently to be within the immediate element lifetime.
+ *
+ * Case a) above is detected by counting how many groups are migrated and
+ * stopping when they've all been moved.  Case b) is detected by monitoring the
+ * time_zero field, which is updated as each group is migrated.
+ *
+ * The return value is the earliest time that more migration could be needed, or
+ * zero if there's no need to schedule more work because the lists are empty.
+ */
+STATIC unsigned long
+_xfs_mru_cache_migrate(
+        xfs_mru_cache_t *mru,
+        unsigned long   now)
+{
+        unsigned int    grp;
+        unsigned int    migrated = 0;
+        struct list_head *lru_list;
+        /* Nothing to do if the data store is empty. */
+        if (!mru->time_zero)
+                return 0;
+        /* While time zero is older than the time spanned by all the lists. */
+        while (mru->time_zero <= now - mru->grp_count * mru->grp_time) {
+                /*
+                 * If the LRU list isn't empty, migrate its elements to the tail
+                 * of the reap list.
+                 */
+                lru_list = mru->lists + mru->lru_grp;
+                if (!list_empty(lru_list))
+                        list_splice_init(lru_list, mru->reap_list.prev);
+                /*
+                 * Advance the LRU group number, freeing the old LRU list to
+                 * become the new MRU list; advance time zero accordingly.
+                 */
+                mru->lru_grp = (mru->lru_grp + 1) % mru->grp_count;
+                mru->time_zero += mru->grp_time;
+                /*
+                 * If reaping is so far behind that all the elements on all the
+                 * lists have been migrated to the reap list, it's now empty.
+                 */
+                if (++migrated == mru->grp_count) {
+                        mru->lru_grp = 0;
+                        mru->time_zero = 0;
+                        return 0;
+                }
+        }
+        /* Find the first non-empty list from the LRU end. */
+        for (grp = 0; grp < mru->grp_count; grp++) {
+                /* Check the grp'th list from the LRU end. */
+                lru_list = mru->lists + ((mru->lru_grp + grp) % mru->grp_count);
+                if (!list_empty(lru_list))
+                        return mru->time_zero +
+                               (mru->grp_count + grp) * mru->grp_time;
+        }
+        /* All the lists must be empty. */
+        mru->lru_grp = 0;
+        mru->time_zero = 0;
+        return 0;
+}
+/*
+ * When inserting or doing a lookup, an element needs to be inserted into the
+ * MRU list.  The lists must be migrated first to ensure that they're
+ * up-to-date, otherwise the new element could be given a shorter lifetime in
+ * the cache than it should.
+ */
+STATIC void
+_xfs_mru_cache_list_insert(
+        xfs_mru_cache_t         *mru,
+        xfs_mru_cache_elem_t    *elem)
+{
+        unsigned int    grp = 0;
+        unsigned long   now = jiffies;
+        /*
+         * If the data store is empty, initialise time zero, leave grp set to
+         * zero and start the work queue timer if necessary.  Otherwise, set grp
+         * to the number of group times that have elapsed since time zero.
+         */
+        if (!_xfs_mru_cache_migrate(mru, now)) {
+                mru->time_zero = now;
+                if (!mru->next_reap)
+                        mru->next_reap = mru->grp_count * mru->grp_time;
+        } else {
+                grp = (now - mru->time_zero) / mru->grp_time;
+                grp = (mru->lru_grp + grp) % mru->grp_count;
+        }
+        /* Insert the element at the tail of the corresponding list. */
+        list_add_tail(&elem->list_node, mru->lists + grp);
+}
+/*
+ * When destroying or reaping, all the elements that were migrated to the reap
+ * list need to be deleted.  For each element this involves removing it from the
+ * data store, removing it from the reap list, calling the client's free
+ * function and deleting the element from the element zone.
+ */
+STATIC void
+_xfs_mru_cache_clear_reap_list(
+        xfs_mru_cache_t         *mru)
+{
+        xfs_mru_cache_elem_t    *elem, *next;
+        struct list_head        tmp;
+        INIT_LIST_HEAD(&tmp);
+        list_for_each_entry_safe(elem, next, &mru->reap_list, list_node) {
+                /* Remove the element from the data store. */
+                radix_tree_delete(&mru->store, elem->key);
+                /*
+                 * remove to temp list so it can be freed without
+                 * needing to hold the lock
+                 */
+                list_move(&elem->list_node, &tmp);
+        }
+        mutex_spinunlock(&mru->lock, 0);
+        list_for_each_entry_safe(elem, next, &tmp, list_node) {
+                /* Remove the element from the reap list. */
+                list_del_init(&elem->list_node);
+                /* Call the client's free function with the key and value pointer. */
+                mru->free_func(elem->key, elem->value);
+                /* Free the element structure. */
+                kmem_zone_free(xfs_mru_elem_zone, elem);
+        }
+        mutex_spinlock(&mru->lock);
+}
+/*
+ * We fire the reap timer every group expiry interval so
+ * we always have a reaper ready to run. This makes shutdown
+ * and flushing of the reaper easy to do. Hence we need to
+ * keep when the next reap must occur so we can determine
+ * at each interval whether there is anything we need to do.
+ */
+STATIC void
+_xfs_mru_cache_reap(
+        struct work_struct      *work)
+{
+        xfs_mru_cache_t         *mru = container_of(work, xfs_mru_cache_t, work.work);
+        unsigned long           now;
+        ASSERT(mru && mru->lists);
+        if (!mru || !mru->lists)
+                return;
+        mutex_spinlock(&mru->lock);
+        now = jiffies;
+        if (mru->reap_all ||
+            (mru->next_reap && time_after(now, mru->next_reap))) {
+                if (mru->reap_all)
+                        now += mru->grp_count * mru->grp_time * 2;
+                mru->next_reap = _xfs_mru_cache_migrate(mru, now);
+                _xfs_mru_cache_clear_reap_list(mru);
+        }
+        /*
+         * the process that triggered the reap_all is responsible
+         * for restating the periodic reap if it is required.
+         */
+        if (!mru->reap_all)
+                queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
+        mru->reap_all = 0;
+        mutex_spinunlock(&mru->lock, 0);
+}
+int
+xfs_mru_cache_init(void)
+{
+        xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
+                                         "xfs_mru_cache_elem");
+        if (!xfs_mru_elem_zone)
+                return ENOMEM;
+        xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
+        if (!xfs_mru_reap_wq) {
+                kmem_zone_destroy(xfs_mru_elem_zone);
+                return ENOMEM;
+        }
+        return 0;
+}
+void
+xfs_mru_cache_uninit(void)
+{
+        destroy_workqueue(xfs_mru_reap_wq);
+        kmem_zone_destroy(xfs_mru_elem_zone);
+}
+/*
+ * To initialise a struct xfs_mru_cache pointer, call xfs_mru_cache_create()
+ * with the address of the pointer, a lifetime value in milliseconds, a group
+ * count and a free function to use when deleting elements.  This function
+ * returns 0 if the initialisation was successful.
+ */
+int
+xfs_mru_cache_create(
+        xfs_mru_cache_t         **mrup,
+        unsigned int            lifetime_ms,
+        unsigned int            grp_count,
+        xfs_mru_cache_free_func_t free_func)
+{
+        xfs_mru_cache_t *mru = NULL;
+        int             err = 0, grp;
+        unsigned int    grp_time;
+        if (mrup)
+                *mrup = NULL;
+        if (!mrup || !grp_count || !lifetime_ms || !free_func)
+                return EINVAL;
+        if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count))
+                return EINVAL;
+        if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP)))
+                return ENOMEM;
+        /* An extra list is needed to avoid reaping up to a grp_time early. */
+        mru->grp_count = grp_count + 1;
+        mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
+        if (!mru->lists) {
+                err = ENOMEM;
+                goto exit;
+        }
+        for (grp = 0; grp < mru->grp_count; grp++)
+                INIT_LIST_HEAD(mru->lists + grp);
+        /*
+         * We use GFP_KERNEL radix tree preload and do inserts under a
+         * spinlock so GFP_ATOMIC is appropriate for the radix tree itself.
+         */
+        INIT_RADIX_TREE(&mru->store, GFP_ATOMIC);
+        INIT_LIST_HEAD(&mru->reap_list);
+        spinlock_init(&mru->lock, "xfs_mru_cache");
+        INIT_DELAYED_WORK(&mru->work, _xfs_mru_cache_reap);
+        mru->grp_time  = grp_time;
+        mru->free_func = free_func;
+        /* start up the reaper event */
+        mru->next_reap = 0;
+        mru->reap_all = 0;
+        queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
+        *mrup = mru;
+exit:
+        if (err && mru && mru->lists)
+                kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
+        if (err && mru)
+                kmem_free(mru, sizeof(*mru));
+        return err;
+}
+/*
+ * Call xfs_mru_cache_flush() to flush out all cached entries, calling their
+ * free functions as they're deleted.  When this function returns, the caller is
+ * guaranteed that all the free functions for all the elements have finished
+ * executing.
+ *
+ * While we are flushing, we stop the periodic reaper event from triggering.
+ * Normally, we want to restart this periodic event, but if we are shutting
+ * down the cache we do not want it restarted. hence the restart parameter
+ * where 0 = do not restart reaper and 1 = restart reaper.
+ */
+void
+xfs_mru_cache_flush(
+        xfs_mru_cache_t         *mru,
+        int                     restart)
+{
+        if (!mru || !mru->lists)
+                return;
+        cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
+        mutex_spinlock(&mru->lock);
+        mru->reap_all = 1;
+        mutex_spinunlock(&mru->lock, 0);
+        queue_work(xfs_mru_reap_wq, &mru->work.work);
+        flush_workqueue(xfs_mru_reap_wq);
+        mutex_spinlock(&mru->lock);
+        WARN_ON_ONCE(mru->reap_all != 0);
+        mru->reap_all = 0;
+        if (restart)
+                queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
+        mutex_spinunlock(&mru->lock, 0);
+}
+void
+xfs_mru_cache_destroy(
+        xfs_mru_cache_t         *mru)
+{
+        if (!mru || !mru->lists)
+                return;
+        /* we don't want the reaper to restart here */
+        xfs_mru_cache_flush(mru, 0);
+        kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
+        kmem_free(mru, sizeof(*mru));
+}
+/*
+ * To insert an element, call xfs_mru_cache_insert() with the data store, the
+ * element's key and the client data pointer.  This function returns 0 on
+ * success or ENOMEM if memory for the data element couldn't be allocated.
+ */
+int
+xfs_mru_cache_insert(
+        xfs_mru_cache_t *mru,
+        unsigned long   key,
+        void            *value)
+{
+        xfs_mru_cache_elem_t *elem;
+        ASSERT(mru && mru->lists);
+        if (!mru || !mru->lists)
+                return EINVAL;
+        elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP);
+        if (!elem)
+                return ENOMEM;
+        if (radix_tree_preload(GFP_KERNEL)) {
+                kmem_zone_free(xfs_mru_elem_zone, elem);
+                return ENOMEM;
+        }
+        INIT_LIST_HEAD(&elem->list_node);
+        elem->key = key;
+        elem->value = value;
+        mutex_spinlock(&mru->lock);
+        radix_tree_insert(&mru->store, key, elem);
+        radix_tree_preload_end();
+        _xfs_mru_cache_list_insert(mru, elem);
+        mutex_spinunlock(&mru->lock, 0);
+        return 0;
+}
+/*
+ * To remove an element without calling the free function, call
+ * xfs_mru_cache_remove() with the data store and the element's key.  On success
+ * the client data pointer for the removed element is returned, otherwise this
+ * function will return a NULL pointer.
+ */
+void *
+xfs_mru_cache_remove(
+        xfs_mru_cache_t *mru,
+        unsigned long   key)
+{
+        xfs_mru_cache_elem_t *elem;
+        void            *value = NULL;
+        ASSERT(mru && mru->lists);
+        if (!mru || !mru->lists)
+                return NULL;
+        mutex_spinlock(&mru->lock);
+        elem = radix_tree_delete(&mru->store, key);
+        if (elem) {
+                value = elem->value;
+                list_del(&elem->list_node);
+        }
+        mutex_spinunlock(&mru->lock, 0);
+        if (elem)
+                kmem_zone_free(xfs_mru_elem_zone, elem);
+        return value;
+}
+/*
+ * To remove and element and call the free function, call xfs_mru_cache_delete()
+ * with the data store and the element's key.
+ */
+void
+xfs_mru_cache_delete(
+        xfs_mru_cache_t *mru,
+        unsigned long   key)
+{
+        void            *value = xfs_mru_cache_remove(mru, key);
+        if (value)
+                mru->free_func(key, value);
+}
+/*
+ * To look up an element using its key, call xfs_mru_cache_lookup() with the
+ * data store and the element's key.  If found, the element will be moved to the
+ * head of the MRU list to indicate that it's been touched.
+ *
+ * The internal data structures are protected by a spinlock that is STILL HELD
+ * when this function returns.  Call xfs_mru_cache_done() to release it.  Note
+ * that it is not safe to call any function that might sleep in the interim.
+ *
+ * The implementation could have used reference counting to avoid this
+ * restriction, but since most clients simply want to get, set or test a member
+ * of the returned data structure, the extra per-element memory isn't warranted.
+ *
+ * If the element isn't found, this function returns NULL and the spinlock is
+ * released.  xfs_mru_cache_done() should NOT be called when this occurs.
+ */
+void *
+xfs_mru_cache_lookup(
+        xfs_mru_cache_t *mru,
+        unsigned long   key)
+{
+        xfs_mru_cache_elem_t *elem;
+        ASSERT(mru && mru->lists);
+        if (!mru || !mru->lists)
+                return NULL;
+        mutex_spinlock(&mru->lock);
+        elem = radix_tree_lookup(&mru->store, key);
+        if (elem) {
+                list_del(&elem->list_node);
+                _xfs_mru_cache_list_insert(mru, elem);
+        }
+        else
+                mutex_spinunlock(&mru->lock, 0);
+        return elem ? elem->value : NULL;
+}
+/*
+ * To look up an element using its key, but leave its location in the internal
+ * lists alone, call xfs_mru_cache_peek().  If the element isn't found, this
+ * function returns NULL.
+ *
+ * See the comments above the declaration of the xfs_mru_cache_lookup() function
+ * for important locking information pertaining to this call.
+ */
+void *
+xfs_mru_cache_peek(
+        xfs_mru_cache_t *mru,
+        unsigned long   key)
+{
+        xfs_mru_cache_elem_t *elem;
+        ASSERT(mru && mru->lists);
+        if (!mru || !mru->lists)
+                return NULL;
+        mutex_spinlock(&mru->lock);
+        elem = radix_tree_lookup(&mru->store, key);
+        if (!elem)
+                mutex_spinunlock(&mru->lock, 0);
+        return elem ? elem->value : NULL;
+}
+/*
+ * To release the internal data structure spinlock after having performed an
+ * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done()
+ * with the data store pointer.
+ */
+void
+xfs_mru_cache_done(
+        xfs_mru_cache_t *mru)
+{
+        mutex_spinunlock(&mru->lock, 0);
+}
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
new file mode 100644
index 000000000000..624fd10ee8e5
--- /dev/null
+++ b/fs/xfs/xfs_mru_cache.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_MRU_CACHE_H__
+#define __XFS_MRU_CACHE_H__
+/* Function pointer type for callback to free a client's data pointer. */
+typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*);
+typedef struct xfs_mru_cache
+{
+        struct radix_tree_root  store;     /* Core storage data structure.  */
+        struct list_head        *lists;    /* Array of lists, one per grp.  */
+        struct list_head        reap_list; /* Elements overdue for reaping. */
+        spinlock_t              lock;      /* Lock to protect this struct.  */
+        unsigned int            grp_count; /* Number of discrete groups.    */
+        unsigned int            grp_time;  /* Time period spanned by grps.  */
+        unsigned int            lru_grp;   /* Group containing time zero.   */
+        unsigned long           time_zero; /* Time first element was added. */
+        unsigned long           next_reap; /* Time that the reaper should
+                                              next do something. */
+        unsigned int            reap_all;  /* if set, reap all lists */
+        xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
+        struct delayed_work     work;      /* Workqueue data for reaping.   */
+} xfs_mru_cache_t;
+int xfs_mru_cache_init(void);
+void xfs_mru_cache_uninit(void);
+int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
+                             unsigned int grp_count,
+                             xfs_mru_cache_free_func_t free_func);
+void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart);
+void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
+int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
+                                void *value);
+void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
+void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key);
+void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
+void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key);
+void xfs_mru_cache_done(struct xfs_mru_cache *mru);
+#endif /* __XFS_MRU_CACHE_H__ */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index b3a5f07bd073..47082c01872d 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1882,11 +1882,13 @@ xfs_growfs_rt(
            (nrblocks = in->newblocks) <= sbp->sb_rblocks ||
            (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
                return XFS_ERROR(EINVAL);
+        if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks)))
+                return error;
        /*
         * Read in the last block of the device, make sure it exists.
         */
        error = xfs_read_buf(mp, mp->m_rtdev_targp,
-                        XFS_FSB_TO_BB(mp, in->newblocks - 1),
+                        XFS_FSB_TO_BB(mp, nrblocks - 1),
                        XFS_FSB_TO_BB(mp, 1), 0, &bp);
        if (error)
                return error;
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index 188b296ff50c..fcf28dbded7c 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -72,6 +72,34 @@ xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb)
 }
 /*
+ * Flags for xfs_free_eofblocks
+ */
+#define XFS_FREE_EOF_LOCK       (1<<0)
+#define XFS_FREE_EOF_NOLOCK     (1<<1)
+/*
+ * helper function to extract extent size hint from inode
+ */
+STATIC_INLINE xfs_extlen_t
+xfs_get_extsz_hint(
+        xfs_inode_t     *ip)
+{
+        xfs_extlen_t    extsz;
+        if (unlikely(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
+                extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
+                                ? ip->i_d.di_extsize
+                                : ip->i_mount->m_sb.sb_rextsize;
+                ASSERT(extsz);
+        } else {
+                extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
+                                ? ip->i_d.di_extsize : 0;
+        }
+        return extsz;
+}
+/*
 * Prototypes for functions in xfs_rw.c.
 */
 extern int xfs_write_clear_setuid(struct xfs_inode *ip);
@@ -91,10 +119,12 @@ extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
 extern int xfs_rwlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
 extern void xfs_rwunlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
 extern int xfs_setattr(bhv_desc_t *, bhv_vattr_t *vap, int flags,
-                       cred_t *credp);
+                        cred_t *credp);
 extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf,
-                                 xfs_off_t offset, cred_t *credp, int flags);
+                        xfs_off_t offset, cred_t *credp, int flags);
 extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state,
-                           cred_t *credp);
+                        cred_t *credp);
+extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
+                        int flags);
 #endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 467854b45c8f..ef42537a607a 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -74,12 +74,13 @@ struct xfs_mount;
 */
 #define XFS_SB_VERSION2_REALFBITS       0x00ffffff      /* Mask: features */
 #define XFS_SB_VERSION2_RESERVED1BIT    0x00000001
-#define XFS_SB_VERSION2_RESERVED2BIT    0x00000002
+#define XFS_SB_VERSION2_LAZYSBCOUNTBIT  0x00000002      /* Superblk counters */
 #define XFS_SB_VERSION2_RESERVED4BIT    0x00000004
 #define XFS_SB_VERSION2_ATTR2BIT        0x00000008      /* Inline attr rework */
 #define XFS_SB_VERSION2_OKREALFBITS     \
-        (XFS_SB_VERSION2_ATTR2BIT)
+        (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
+         XFS_SB_VERSION2_ATTR2BIT)
 #define XFS_SB_VERSION2_OKSASHFBITS     \
        (0)
 #define XFS_SB_VERSION2_OKREALBITS      \
@@ -181,6 +182,9 @@ typedef enum {
 #define XFS_SB_SHARED_VN        XFS_SB_MVAL(SHARED_VN)
 #define XFS_SB_UNIT             XFS_SB_MVAL(UNIT)
 #define XFS_SB_WIDTH            XFS_SB_MVAL(WIDTH)
+#define XFS_SB_ICOUNT           XFS_SB_MVAL(ICOUNT)
+#define XFS_SB_IFREE            XFS_SB_MVAL(IFREE)
+#define XFS_SB_FDBLOCKS         XFS_SB_MVAL(FDBLOCKS)
 #define XFS_SB_FEATURES2        XFS_SB_MVAL(FEATURES2)
 #define XFS_SB_NUM_BITS         ((int)XFS_SBS_FIELDCOUNT)
 #define XFS_SB_ALL_BITS         ((1LL << XFS_SB_NUM_BITS) - 1)
@@ -188,7 +192,7 @@ typedef enum {
        (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
         XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
         XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
-         XFS_SB_FEATURES2)
+         XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2)
 /*
@@ -414,6 +418,12 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
 *       ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT)
 */
+static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
+{
+        return (XFS_SB_VERSION_HASMOREBITS(sbp) &&      \
+                ((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
+}
 #define XFS_SB_VERSION_HASATTR2(sbp)    xfs_sb_version_hasattr2(sbp)
 static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
 {
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index cc2d60951e21..356d6627f581 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -427,6 +427,14 @@ undo_blocks:
 *
 * Mark the transaction structure to indicate that the superblock
 * needs to be updated before committing.
+ *
+ * Because we may not be keeping track of allocated/free inodes and
+ * used filesystem blocks in the superblock, we do not mark the
+ * superblock dirty in this transaction if we modify these fields.
+ * We still need to update the transaction deltas so that they get
+ * applied to the incore superblock, but we don't want them to
+ * cause the superblock to get locked and logged if these are the
+ * only fields in the superblock that the transaction modifies.
 */
 void
 xfs_trans_mod_sb(
@@ -434,13 +442,19 @@ xfs_trans_mod_sb(
        uint            field,
        int64_t         delta)
 {
+        uint32_t        flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY);
+        xfs_mount_t     *mp = tp->t_mountp;
        switch (field) {
        case XFS_TRANS_SB_ICOUNT:
                tp->t_icount_delta += delta;
+                if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+                        flags &= ~XFS_TRANS_SB_DIRTY;
                break;
        case XFS_TRANS_SB_IFREE:
                tp->t_ifree_delta += delta;
+                if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+                        flags &= ~XFS_TRANS_SB_DIRTY;
                break;
        case XFS_TRANS_SB_FDBLOCKS:
                /*
@@ -453,6 +467,8 @@ xfs_trans_mod_sb(
                        ASSERT(tp->t_blk_res_used <= tp->t_blk_res);
                }
                tp->t_fdblocks_delta += delta;
+                if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+                        flags &= ~XFS_TRANS_SB_DIRTY;
                break;
        case XFS_TRANS_SB_RES_FDBLOCKS:
                /*
@@ -462,6 +478,8 @@ xfs_trans_mod_sb(
                 */
                ASSERT(delta < 0);
                tp->t_res_fdblocks_delta += delta;
+                if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+                        flags &= ~XFS_TRANS_SB_DIRTY;
                break;
        case XFS_TRANS_SB_FREXTENTS:
                /*
@@ -515,7 +533,7 @@ xfs_trans_mod_sb(
                return;
        }
-        tp->t_flags |= (XFS_TRANS_SB_DIRTY | XFS_TRANS_DIRTY);
+        tp->t_flags |= flags;
 }
 /*
@@ -544,18 +562,23 @@ xfs_trans_apply_sb_deltas(
               (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta +
                tp->t_ag_btree_delta));
-        if (tp->t_icount_delta != 0) {
+        /*
-                INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta);
+         * Only update the superblock counters if we are logging them
-        }
+         */
-        if (tp->t_ifree_delta != 0) {
+        if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) {
-                INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta);
+                if (tp->t_icount_delta != 0) {
-        }
+                        INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta);
+                }
+                if (tp->t_ifree_delta != 0) {
+                        INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta);
+                }
-        if (tp->t_fdblocks_delta != 0) {
+                if (tp->t_fdblocks_delta != 0) {
-                INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta);
+                        INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta);
-        }
+                }
-        if (tp->t_res_fdblocks_delta != 0) {
+                if (tp->t_res_fdblocks_delta != 0) {
-                INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta);
+                        INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta);
+                }
        }
        if (tp->t_frextents_delta != 0) {
@@ -615,11 +638,23 @@ xfs_trans_apply_sb_deltas(
 }
 /*
- * xfs_trans_unreserve_and_mod_sb() is called to release unused
+ * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
- * reservations and apply superblock counter changes to the in-core
+ * and apply superblock counter changes to the in-core superblock.  The
- * superblock.
+ * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
+ * applied to the in-core superblock.  The idea is that that has already been
+ * done.
 *
 * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
+ * However, we have to ensure that we only modify each superblock field only
+ * once because the application of the delta values may not be atomic. That can
+ * lead to ENOSPC races occurring if we have two separate modifcations of the
+ * free space counter to put back the entire reservation and then take away
+ * what we used.
+ *
+ * If we are not logging superblock counters, then the inode allocated/free and
+ * used block counts are not updated in the on disk superblock. In this case,
+ * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
+ * still need to update the incore superblock with the changes.
 */
 STATIC void
 xfs_trans_unreserve_and_mod_sb(
@@ -627,40 +662,49 @@ xfs_trans_unreserve_and_mod_sb(
 {
        xfs_mod_sb_t    msb[14];        /* If you add cases, add entries */
        xfs_mod_sb_t    *msbp;
+        xfs_mount_t     *mp = tp->t_mountp;
        /* REFERENCED */
        int             error;
        int             rsvd;
+        int64_t         blkdelta = 0;
+        int64_t         rtxdelta = 0;
        msbp = msb;
        rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
-        /*
+        /* calculate free blocks delta */
-         * Release any reserved blocks.  Any that were allocated
+        if (tp->t_blk_res > 0)
-         * will be taken back again by fdblocks_delta below.
+                blkdelta = tp->t_blk_res;
-         */
-        if (tp->t_blk_res > 0) {
+        if ((tp->t_fdblocks_delta != 0) &&
+            (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
+             (tp->t_flags & XFS_TRANS_SB_DIRTY)))
+                blkdelta += tp->t_fdblocks_delta;
+        if (blkdelta != 0) {
                msbp->msb_field = XFS_SBS_FDBLOCKS;
-                msbp->msb_delta = tp->t_blk_res;
+                msbp->msb_delta = blkdelta;
                msbp++;
        }
-        /*
+        /* calculate free realtime extents delta */
-         * Release any reserved real time extents .  Any that were
+        if (tp->t_rtx_res > 0)
-         * allocated will be taken back again by frextents_delta below.
+                rtxdelta = tp->t_rtx_res;
-         */
-        if (tp->t_rtx_res > 0) {
+        if ((tp->t_frextents_delta != 0) &&
+            (tp->t_flags & XFS_TRANS_SB_DIRTY))
+                rtxdelta += tp->t_frextents_delta;
+        if (rtxdelta != 0) {
                msbp->msb_field = XFS_SBS_FREXTENTS;
-                msbp->msb_delta = tp->t_rtx_res;
+                msbp->msb_delta = rtxdelta;
                msbp++;
        }
-        /*
+        /* apply remaining deltas */
-         * Apply any superblock modifications to the in-core version.
-         * The t_res_fdblocks_delta and t_res_frextents_delta fields are
+        if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
-         * explicitly NOT applied to the in-core superblock.
+             (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
-         * The idea is that that has already been done.
-         */
-        if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
                if (tp->t_icount_delta != 0) {
                        msbp->msb_field = XFS_SBS_ICOUNT;
                        msbp->msb_delta = tp->t_icount_delta;
@@ -671,16 +715,9 @@ xfs_trans_unreserve_and_mod_sb(
                        msbp->msb_delta = tp->t_ifree_delta;
                        msbp++;
                }
-                if (tp->t_fdblocks_delta != 0) {
+        }
-                        msbp->msb_field = XFS_SBS_FDBLOCKS;
-                        msbp->msb_delta = tp->t_fdblocks_delta;
+        if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
-                        msbp++;
-                }
-                if (tp->t_frextents_delta != 0) {
-                        msbp->msb_field = XFS_SBS_FREXTENTS;
-                        msbp->msb_delta = tp->t_frextents_delta;
-                        msbp++;
-                }
                if (tp->t_dblocks_delta != 0) {
                        msbp->msb_field = XFS_SBS_DBLOCKS;
                        msbp->msb_delta = tp->t_dblocks_delta;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7dfcc450366f..0e26e729023e 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -94,7 +94,8 @@ typedef struct xfs_trans_header {
 #define XFS_TRANS_GROWFSRT_ZERO         38
 #define XFS_TRANS_GROWFSRT_FREE         39
 #define XFS_TRANS_SWAPEXT               40
-#define XFS_TRANS_TYPE_MAX              40
+#define XFS_TRANS_SB_COUNT              41
+#define XFS_TRANS_TYPE_MAX              41
 /* new transaction types need to be reflected in xfs_logprint(8) */
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 65c561201cb8..11f5ea29a038 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -51,6 +51,8 @@
 #include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_clnt.h"
+#include "xfs_mru_cache.h"
+#include "xfs_filestream.h"
 #include "xfs_fsops.h"
 STATIC int      xfs_sync(bhv_desc_t *, int, cred_t *);
@@ -81,6 +83,8 @@ xfs_init(void)
        xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
        xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
        xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
+        xfs_mru_cache_init();
+        xfs_filestream_init();
        /*
         * The size of the zone allocated buf log item is the maximum
@@ -164,6 +168,8 @@ xfs_cleanup(void)
        xfs_cleanup_procfs();
        xfs_sysctl_unregister();
        xfs_refcache_destroy();
+        xfs_filestream_uninit();
+        xfs_mru_cache_uninit();
        xfs_acl_zone_destroy(xfs_acl_zone);
 #ifdef XFS_DIR2_TRACE
@@ -320,6 +326,9 @@ xfs_start_flags(
        else
                mp->m_flags &= ~XFS_MOUNT_BARRIER;
+        if (ap->flags2 & XFSMNT2_FILESTREAMS)
+                mp->m_flags |= XFS_MOUNT_FILESTREAMS;
        return 0;
 }
@@ -518,6 +527,9 @@ xfs_mount(
        if (mp->m_flags & XFS_MOUNT_BARRIER)
                xfs_mountfs_check_barriers(mp);
+        if ((error = xfs_filestream_mount(mp)))
+                goto error2;
        error = XFS_IOINIT(vfsp, args, flags);
        if (error)
                goto error2;
@@ -575,6 +587,13 @@ xfs_unmount(
         */
        xfs_refcache_purge_mp(mp);
+        /*
+         * Blow away any referenced inode in the filestreams cache.
+         * This can and will cause log traffic as inodes go inactive
+         * here.
+         */
+        xfs_filestream_unmount(mp);
        XFS_bflush(mp->m_ddev_targp);
        error = xfs_unmount_flush(mp, 0);
        if (error)
@@ -640,7 +659,7 @@ xfs_quiesce_fs(
         * we can write the unmount record.
         */
        do {
-                xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, NULL);
+                xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL);
                pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
                if (!pincount) {
                        delay(50);
@@ -651,6 +670,30 @@ xfs_quiesce_fs(
        return 0;
 }
+/*
+ * Second stage of a quiesce. The data is already synced, now we have to take
+ * care of the metadata. New transactions are already blocked, so we need to
+ * wait for any remaining transactions to drain out before proceding.
+ */
+STATIC void
+xfs_attr_quiesce(
+        xfs_mount_t     *mp)
+{
+        /* wait for all modifications to complete */
+        while (atomic_read(&mp->m_active_trans) > 0)
+                delay(100);
+        /* flush inodes and push all remaining buffers out to disk */
+        xfs_quiesce_fs(mp);
+        ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
+        /* Push the superblock and write an unmount record */
+        xfs_log_sbcount(mp, 1);
+        xfs_log_unmount_write(mp);
+        xfs_unmountfs_writesb(mp);
+}
 STATIC int
 xfs_mntupdate(
        bhv_desc_t                      *bdp,
@@ -670,10 +713,9 @@ xfs_mntupdate(
                        mp->m_flags &= ~XFS_MOUNT_BARRIER;
                }
        } else if (!(vfsp->vfs_flag & VFS_RDONLY)) {    /* rw -> ro */
-                bhv_vfs_sync(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL);
+                xfs_filestream_flush(mp);
-                xfs_quiesce_fs(mp);
+                bhv_vfs_sync(vfsp, SYNC_DATA_QUIESCE, NULL);
-                xfs_log_unmount_write(mp);
+                xfs_attr_quiesce(mp);
-                xfs_unmountfs_writesb(mp);
                vfsp->vfs_flag |= VFS_RDONLY;
        }
        return 0;
@@ -887,6 +929,9 @@ xfs_sync(
 {
        xfs_mount_t     *mp = XFS_BHVTOM(bdp);
+        if (flags & SYNC_IOWAIT)
+                xfs_filestream_flush(mp);
        return xfs_syncsub(mp, flags, NULL);
 }
@@ -1128,58 +1173,41 @@ xfs_sync_inodes(
                 * in the inode list.
                 */
-                if ((flags & SYNC_CLOSE)  && (vp != NULL)) {
+                /*
-                        /*
+                 * If we have to flush data or wait for I/O completion
-                         * This is the shutdown case.  We just need to
+                 * we need to drop the ilock that we currently hold.
-                         * flush and invalidate all the pages associated
+                 * If we need to drop the lock, insert a marker if we
-                         * with the inode.  Drop the inode lock since
+                 * have not already done so.
-                         * we can't hold it across calls to the buffer
+                 */
-                         * cache.
+                if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) ||
-                         *
+                    ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) {
-                         * We don't set the VREMAPPING bit in the vnode
+                        if (mount_locked) {
-                         * here, because we don't hold the vnode lock
+                                IPOINTER_INSERT(ip, mp);
-                         * exclusively.  It doesn't really matter, though,
-                         * because we only come here when we're shutting
-                         * down anyway.
-                         */
-                        xfs_iunlock(ip, XFS_ILOCK_SHARED);
-                        if (XFS_FORCED_SHUTDOWN(mp)) {
-                                bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
-                        } else {
-                                error = bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF);
                        }
+                        xfs_iunlock(ip, XFS_ILOCK_SHARED);
-                        xfs_ilock(ip, XFS_ILOCK_SHARED);
+                        if (flags & SYNC_CLOSE) {
+                                /* Shutdown case. Flush and invalidate. */
-                } else if ((flags & SYNC_DELWRI) && (vp != NULL)) {
+                                if (XFS_FORCED_SHUTDOWN(mp))
-                        if (VN_DIRTY(vp)) {
+                                        bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
-                                /* We need to have dropped the lock here,
+                                else
-                                 * so insert a marker if we have not already
+                                        error = bhv_vop_flushinval_pages(vp, 0,
-                                 * done so.
+                                                                -1, FI_REMAPF);
-                                 */
+                        } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
-                                if (mount_locked) {
-                                        IPOINTER_INSERT(ip, mp);
-                                }
-                                /*
-                                 * Drop the inode lock since we can't hold it
-                                 * across calls to the buffer cache.
-                                 */
-                                xfs_iunlock(ip, XFS_ILOCK_SHARED);
                                error = bhv_vop_flush_pages(vp, (xfs_off_t)0,
                                                        -1, fflag, FI_NONE);
-                                xfs_ilock(ip, XFS_ILOCK_SHARED);
                        }
+                        /*
+                         * When freezing, we need to wait ensure all I/O (including direct
+                         * I/O) is complete to ensure no further data modification can take
+                         * place after this point
+                         */
+                        if (flags & SYNC_IOWAIT)
+                                vn_iowait(vp);
+                        xfs_ilock(ip, XFS_ILOCK_SHARED);
                }
-                /*
-                 * When freezing, we need to wait ensure all I/O (including direct
-                 * I/O) is complete to ensure no further data modification can take
-                 * place after this point
-                 */
-                if (flags & SYNC_IOWAIT)
-                        vn_iowait(vp);
                if (flags & SYNC_BDFLUSH) {
                        if ((flags & SYNC_ATTR) &&
@@ -1514,6 +1542,15 @@ xfs_syncsub(
        }
        /*
+         * If asked, update the disk superblock with incore counter values if we
+         * are using non-persistent counters so that they don't get too far out
+         * of sync if we crash or get a forced shutdown. We don't want to force
+         * this to disk, just get a transaction into the iclogs....
+         */
+        if (flags & SYNC_SUPER)
+                xfs_log_sbcount(mp, 0);
+        /*
         * Now check to see if the log needs a "dummy" transaction.
         */
@@ -1645,6 +1682,7 @@ xfs_vget(
                                         * in stat(). */
 #define MNTOPT_ATTR2    "attr2"         /* do use attr2 attribute format */
 #define MNTOPT_NOATTR2  "noattr2"       /* do not use attr2 attribute format */
+#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
 STATIC unsigned long
 suffix_strtoul(char *s, char **endp, unsigned int base)
@@ -1831,6 +1869,8 @@ xfs_parseargs(
                        args->flags |= XFSMNT_ATTR2;
                } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
                        args->flags &= ~XFSMNT_ATTR2;
+                } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
+                        args->flags2 |= XFSMNT2_FILESTREAMS;
                } else if (!strcmp(this_char, "osyncisdsync")) {
                        /* no-op, this is now the default */
                        cmn_err(CE_WARN,
@@ -1959,9 +1999,9 @@ xfs_showargs(
 }
 /*
- * Second stage of a freeze. The data is already frozen, now we have to take
+ * Second stage of a freeze. The data is already frozen so we only
- * care of the metadata. New transactions are already blocked, so we need to
+ * need to take care of themetadata. Once that's done write a dummy
- * wait for any remaining transactions to drain out before proceding.
+ * record to dirty the log in case of a crash while frozen.
 */
 STATIC void
 xfs_freeze(
@@ -1969,18 +2009,7 @@ xfs_freeze(
 {
        xfs_mount_t     *mp = XFS_BHVTOM(bdp);
-        /* wait for all modifications to complete */
+        xfs_attr_quiesce(mp);
-        while (atomic_read(&mp->m_active_trans) > 0)
-                delay(100);
-        /* flush inodes and push all remaining buffers out to disk */
-        xfs_quiesce_fs(mp);
-        ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
-        /* Push the superblock and write an unmount record */
-        xfs_log_unmount_write(mp);
-        xfs_unmountfs_writesb(mp);
        xfs_fs_log_dummy(mp);
 }
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index de17aed578f0..79b522779aa4 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -51,6 +51,7 @@
 #include "xfs_refcache.h"
 #include "xfs_trans_space.h"
 #include "xfs_log_priv.h"
+#include "xfs_filestream.h"
 STATIC int
 xfs_open(
@@ -77,36 +78,6 @@ xfs_open(
        return 0;
 }
-STATIC int
-xfs_close(
-        bhv_desc_t      *bdp,
-        int             flags,
-        lastclose_t     lastclose,
-        cred_t          *credp)
-{
-        bhv_vnode_t     *vp = BHV_TO_VNODE(bdp);
-        xfs_inode_t     *ip = XFS_BHVTOI(bdp);
-        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-                return XFS_ERROR(EIO);
-        if (lastclose != L_TRUE || !VN_ISREG(vp))
-                return 0;
-        /*
-         * If we previously truncated this file and removed old data in
-         * the process, we want to initiate "early" writeout on the last
-         * close.  This is an attempt to combat the notorious NULL files
-         * problem which is particularly noticable from a truncate down,
-         * buffered (re-)write (delalloc), followed by a crash.  What we
-         * are effectively doing here is significantly reducing the time
-         * window where we'd otherwise be exposed to that problem.
-         */
-        if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
-                return bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
-        return 0;
-}
 /*
 * xfs_getattr
 */
@@ -183,9 +154,8 @@ xfs_getattr(
                         * realtime extent size or the realtime volume's
                         * extent size.
                         */
-                        vap->va_blocksize = ip->i_d.di_extsize ?
+                        vap->va_blocksize =
-                                (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) :
+                                xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
-                                (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
                }
                break;
        }
@@ -814,6 +784,8 @@ xfs_setattr(
                                di_flags |= XFS_DIFLAG_PROJINHERIT;
                        if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
                                di_flags |= XFS_DIFLAG_NODEFRAG;
+                        if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
+                                di_flags |= XFS_DIFLAG_FILESTREAM;
                        if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
                                if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
                                        di_flags |= XFS_DIFLAG_RTINHERIT;
@@ -1201,13 +1173,15 @@ xfs_fsync(
 }
 /*
- * This is called by xfs_inactive to free any blocks beyond eof,
+ * This is called by xfs_inactive to free any blocks beyond eof
- * when the link count isn't zero.
+ * when the link count isn't zero and by xfs_dm_punch_hole() when
+ * punching a hole to EOF.
 */
-STATIC int
+int
-xfs_inactive_free_eofblocks(
+xfs_free_eofblocks(
        xfs_mount_t     *mp,
-        xfs_inode_t     *ip)
+        xfs_inode_t     *ip,
+        int             flags)
 {
        xfs_trans_t     *tp;
        int             error;
@@ -1216,6 +1190,7 @@ xfs_inactive_free_eofblocks(
        xfs_filblks_t   map_len;
        int             nimaps;
        xfs_bmbt_irec_t imap;
+        int             use_iolock = (flags & XFS_FREE_EOF_LOCK);
        /*
         * Figure out if there are any blocks beyond the end
@@ -1256,11 +1231,14 @@ xfs_inactive_free_eofblocks(
                 * cache and we can't
                 * do that within a transaction.
                 */
-                xfs_ilock(ip, XFS_IOLOCK_EXCL);
+                if (use_iolock)
+                        xfs_ilock(ip, XFS_IOLOCK_EXCL);
                error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
                                    ip->i_size);
                if (error) {
-                        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+                        xfs_trans_cancel(tp, 0);
+                        if (use_iolock)
+                                xfs_iunlock(ip, XFS_IOLOCK_EXCL);
                        return error;
                }
@@ -1297,7 +1275,8 @@ xfs_inactive_free_eofblocks(
                        error = xfs_trans_commit(tp,
                                                XFS_TRANS_RELEASE_LOG_RES);
                }
-                xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+                xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
+                                            : XFS_ILOCK_EXCL));
        }
        return error;
 }
@@ -1560,6 +1539,31 @@ xfs_release(
        if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
                return 0;
+        if (!XFS_FORCED_SHUTDOWN(mp)) {
+                /*
+                 * If we are using filestreams, and we have an unlinked
+                 * file that we are processing the last close on, then nothing
+                 * will be able to reopen and write to this file. Purge this
+                 * inode from the filestreams cache so that it doesn't delay
+                 * teardown of the inode.
+                 */
+                if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
+                        xfs_filestream_deassociate(ip);
+                /*
+                 * If we previously truncated this file and removed old data
+                 * in the process, we want to initiate "early" writeout on
+                 * the last close.  This is an attempt to combat the notorious
+                 * NULL files problem which is particularly noticable from a
+                 * truncate down, buffered (re-)write (delalloc), followed by
+                 * a crash.  What we are effectively doing here is
+                 * significantly reducing the time window where we'd otherwise
+                 * be exposed to that problem.
+                 */
+                if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
+                        bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
+        }
 #ifdef HAVE_REFCACHE
        /* If we are in the NFS reference cache then don't do this now */
        if (ip->i_refcache)
@@ -1573,7 +1577,8 @@ xfs_release(
                     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
                    (!(ip->i_d.di_flags &
                                (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
-                        if ((error = xfs_inactive_free_eofblocks(mp, ip)))
+                        error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+                        if (error)
                                return error;
                        /* Update linux inode block count after free above */
                        vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
@@ -1654,7 +1659,8 @@ xfs_inactive(
                     (!(ip->i_d.di_flags &
                                (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
                      (ip->i_delayed_blks != 0)))) {
-                        if ((error = xfs_inactive_free_eofblocks(mp, ip)))
+                        error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+                        if (error)
                                return VN_INACTIVE_CACHE;
                        /* Update linux inode block count after free above */
                        vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
@@ -1680,6 +1686,7 @@ xfs_inactive(
                error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
                if (error) {
+                        xfs_trans_cancel(tp, 0);
                        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
                        return VN_INACTIVE_CACHE;
                }
@@ -2217,9 +2224,9 @@ static inline int
 xfs_lock_inumorder(int lock_mode, int subclass)
 {
        if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
-                lock_mode |= (subclass + XFS_IOLOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+                lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
        if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
-                lock_mode |= (subclass + XFS_ILOCK_INUMORDER) << XFS_ILOCK_SHIFT;
+                lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
        return lock_mode;
 }
@@ -2546,6 +2553,15 @@ xfs_remove(
         */
        xfs_refcache_purge_ip(ip);
+        /*
+         * If we are using filestreams, kill the stream association.
+         * If the file is still open it may get a new one but that
+         * will get killed on last close in xfs_close() so we don't
+         * have to worry about that.
+         */
+        if (link_zero && xfs_inode_is_filestream(ip))
+                xfs_filestream_deassociate(ip);
        vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
        /*
@@ -4047,22 +4063,16 @@ xfs_alloc_file_space(
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        rt = XFS_IS_REALTIME_INODE(ip);
-        if (unlikely(rt)) {
-                if (!(extsz = ip->i_d.di_extsize))
-                        extsz = mp->m_sb.sb_rextsize;
-        } else {
-                extsz = ip->i_d.di_extsize;
-        }
        if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
                return error;
        if (len <= 0)
                return XFS_ERROR(EINVAL);
+        rt = XFS_IS_REALTIME_INODE(ip);
+        extsz = xfs_get_extsz_hint(ip);
        count = len;
-        error = 0;
        imapp = &imaps[0];
        nimaps = 1;
        bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
@@ -4678,11 +4688,7 @@ xfs_change_file_space(
 bhv_vnodeops_t xfs_vnodeops = {
        BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS),
        .vop_open               = xfs_open,
-        .vop_close              = xfs_close,
        .vop_read               = xfs_read,
-#ifdef HAVE_SENDFILE
-        .vop_sendfile           = xfs_sendfile,
-#endif
 #ifdef HAVE_SPLICE
        .vop_splice_read        = xfs_splice_read,
        .vop_splice_write       = xfs_splice_write,