46 files changed, 7458 insertions, 6086 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 3c4886b849f5..e33c08924572 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -2019,7 +2019,7 @@ config CODA_FS_OLD_API
 config AFS_FS
        tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
        depends on INET && EXPERIMENTAL
-        select RXRPC
+        select AF_RXRPC
        help
          If you say Y here, you will get an experimental Andrew File System
          driver. It currently only supports unsecured read-only AFS access.
@@ -2028,8 +2028,15 @@ config AFS_FS
          If unsure, say N.
-config RXRPC
+config AFS_DEBUG
-        tristate
+        bool "AFS dynamic debugging"
+        depends on AFS_FS
+        help
+          Say Y here to make runtime controllable debugging messages appear.
+          See <file:Documentation/filesystems/afs.txt> for more information.
+          If unsure, say N.
 config 9P_FS
        tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 4029c9da4b86..01545eb1d872 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -2,8 +2,6 @@
 # Makefile for Red Hat Linux AFS client.
 #
-#CFLAGS += -finstrument-functions
 kafs-objs := \
        callback.o \
        cell.o \
@@ -12,14 +10,15 @@ kafs-objs := \
        file.o \
        fsclient.o \
        inode.o \
-        kafsasyncd.o \
-        kafstimod.o \
        main.o \
        misc.o \
        mntpt.o \
        proc.o \
+        rxrpc.o \
+        security.o \
        server.o \
        super.o \
+        use-rtnetlink.o \
        vlclient.o \
        vlocation.o \
        vnode.o \
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
new file mode 100644
index 000000000000..52d0752265b8
--- /dev/null
+++ b/fs/afs/afs.h
@@ -0,0 +1,146 @@
+/* AFS common types
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef AFS_H
+#define AFS_H
+#include <linux/in.h>
+#define AFS_MAXCELLNAME 64              /* maximum length of a cell name */
+#define AFS_MAXVOLNAME  64              /* maximum length of a volume name */
+typedef unsigned                        afs_volid_t;
+typedef unsigned                        afs_vnodeid_t;
+typedef unsigned long long              afs_dataversion_t;
+typedef enum {
+        AFSVL_RWVOL,                    /* read/write volume */
+        AFSVL_ROVOL,                    /* read-only volume */
+        AFSVL_BACKVOL,                  /* backup volume */
+} __attribute__((packed)) afs_voltype_t;
+typedef enum {
+        AFS_FTYPE_INVALID       = 0,
+        AFS_FTYPE_FILE          = 1,
+        AFS_FTYPE_DIR           = 2,
+        AFS_FTYPE_SYMLINK       = 3,
+} afs_file_type_t;
+/*
+ * AFS file identifier
+ */
+struct afs_fid {
+        afs_volid_t     vid;            /* volume ID */
+        afs_vnodeid_t   vnode;          /* file index within volume */
+        unsigned        unique;         /* unique ID number (file index version) */
+};
+/*
+ * AFS callback notification
+ */
+typedef enum {
+        AFSCM_CB_UNTYPED        = 0,    /* no type set on CB break */
+        AFSCM_CB_EXCLUSIVE      = 1,    /* CB exclusive to CM [not implemented] */
+        AFSCM_CB_SHARED         = 2,    /* CB shared by other CM's */
+        AFSCM_CB_DROPPED        = 3,    /* CB promise cancelled by file server */
+} afs_callback_type_t;
+struct afs_callback {
+        struct afs_fid          fid;            /* file identifier */
+        unsigned                version;        /* callback version */
+        unsigned                expiry;         /* time at which expires */
+        afs_callback_type_t     type;           /* type of callback */
+};
+#define AFSCBMAX 50     /* maximum callbacks transferred per bulk op */
+/*
+ * AFS volume information
+ */
+struct afs_volume_info {
+        afs_volid_t             vid;            /* volume ID */
+        afs_voltype_t           type;           /* type of this volume */
+        afs_volid_t             type_vids[5];   /* volume ID's for possible types for this vol */
+        /* list of fileservers serving this volume */
+        size_t                  nservers;       /* number of entries used in servers[] */
+        struct {
+                struct in_addr  addr;           /* fileserver address */
+        } servers[8];
+};
+/*
+ * AFS security ACE access mask
+ */
+typedef u32 afs_access_t;
+#define AFS_ACE_READ            0x00000001U     /* - permission to read a file/dir */
+#define AFS_ACE_WRITE           0x00000002U     /* - permission to write/chmod a file */
+#define AFS_ACE_INSERT          0x00000004U     /* - permission to create dirent in a dir */
+#define AFS_ACE_LOOKUP          0x00000008U     /* - permission to lookup a file/dir in a dir */
+#define AFS_ACE_DELETE          0x00000010U     /* - permission to delete a dirent from a dir */
+#define AFS_ACE_LOCK            0x00000020U     /* - permission to lock a file */
+#define AFS_ACE_ADMINISTER      0x00000040U     /* - permission to change ACL */
+#define AFS_ACE_USER_A          0x01000000U     /* - 'A' user-defined permission */
+#define AFS_ACE_USER_B          0x02000000U     /* - 'B' user-defined permission */
+#define AFS_ACE_USER_C          0x04000000U     /* - 'C' user-defined permission */
+#define AFS_ACE_USER_D          0x08000000U     /* - 'D' user-defined permission */
+#define AFS_ACE_USER_E          0x10000000U     /* - 'E' user-defined permission */
+#define AFS_ACE_USER_F          0x20000000U     /* - 'F' user-defined permission */
+#define AFS_ACE_USER_G          0x40000000U     /* - 'G' user-defined permission */
+#define AFS_ACE_USER_H          0x80000000U     /* - 'H' user-defined permission */
+/*
+ * AFS file status information
+ */
+struct afs_file_status {
+        unsigned                if_version;     /* interface version */
+#define AFS_FSTATUS_VERSION     1
+        afs_file_type_t         type;           /* file type */
+        unsigned                nlink;          /* link count */
+        u64                     size;           /* file size */
+        afs_dataversion_t       data_version;   /* current data version */
+        u32                     author;         /* author ID */
+        u32                     owner;          /* owner ID */
+        u32                     group;          /* group ID */
+        afs_access_t            caller_access;  /* access rights for authenticated caller */
+        afs_access_t            anon_access;    /* access rights for unauthenticated caller */
+        umode_t                 mode;           /* UNIX mode */
+        struct afs_fid          parent;         /* parent dir ID for non-dirs only */
+        time_t                  mtime_client;   /* last time client changed data */
+        time_t                  mtime_server;   /* last time server changed data */
+};
+/*
+ * AFS file status change request
+ */
+struct afs_store_status {
+        u32                     mask;           /* which bits of the struct are set */
+        u32                     mtime_client;   /* last time client changed data */
+        u32                     owner;          /* owner ID */
+        u32                     group;          /* group ID */
+        umode_t                 mode;           /* UNIX mode */
+};
+#define AFS_SET_MTIME           0x01            /* set the mtime */
+#define AFS_SET_OWNER           0x02            /* set the owner ID */
+#define AFS_SET_GROUP           0x04            /* set the group ID (unsupported?) */
+#define AFS_SET_MODE            0x08            /* set the UNIX mode */
+#define AFS_SET_SEG_SIZE        0x10            /* set the segment size (unsupported) */
+/*
+ * AFS volume synchronisation information
+ */
+struct afs_volsync {
+        time_t                  creation;       /* volume creation time */
+};
+#endif /* AFS_H */
diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h
new file mode 100644
index 000000000000..7b4d4fab4c80
--- /dev/null
+++ b/fs/afs/afs_cm.h
@@ -0,0 +1,32 @@
+/* AFS Cache Manager definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef AFS_CM_H
+#define AFS_CM_H
+#define AFS_CM_PORT             7001    /* AFS file server port */
+#define CM_SERVICE              1       /* AFS File Service ID */
+enum AFS_CM_Operations {
+        CBCallBack              = 204,  /* break callback promises */
+        CBInitCallBackState     = 205,  /* initialise callback state */
+        CBProbe                 = 206,  /* probe client */
+        CBGetLock               = 207,  /* get contents of CM lock table */
+        CBGetCE                 = 208,  /* get cache file description */
+        CBGetXStatsVersion      = 209,  /* get version of extended statistics */
+        CBGetXStats             = 210,  /* get contents of extended statistics data */
+        CBInitCallBackState3    = 213,  /* initialise callback state, version 3 */
+        CBGetCapabilities       = 65538, /* get client capabilities */
+};
+#define AFS_CAP_ERROR_TRANSLATION       0x1
+#endif /* AFS_FS_H */
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
new file mode 100644
index 000000000000..89e0d1650a72
--- /dev/null
+++ b/fs/afs/afs_fs.h
@@ -0,0 +1,48 @@
+/* AFS File Service definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef AFS_FS_H
+#define AFS_FS_H
+#define AFS_FS_PORT             7000    /* AFS file server port */
+#define FS_SERVICE              1       /* AFS File Service ID */
+enum AFS_FS_Operations {
+        FSFETCHDATA             = 130,  /* AFS Fetch file data */
+        FSFETCHSTATUS           = 132,  /* AFS Fetch file status */
+        FSREMOVEFILE            = 136,  /* AFS Remove a file */
+        FSCREATEFILE            = 137,  /* AFS Create a file */
+        FSRENAME                = 138,  /* AFS Rename or move a file or directory */
+        FSSYMLINK               = 139,  /* AFS Create a symbolic link */
+        FSLINK                  = 140,  /* AFS Create a hard link */
+        FSMAKEDIR               = 141,  /* AFS Create a directory */
+        FSREMOVEDIR             = 142,  /* AFS Remove a directory */
+        FSGIVEUPCALLBACKS       = 147,  /* AFS Discard callback promises */
+        FSGETVOLUMEINFO         = 148,  /* AFS Get root volume information */
+        FSGETROOTVOLUME         = 151,  /* AFS Get root volume name */
+        FSLOOKUP                = 161,  /* AFS lookup file in directory */
+};
+enum AFS_FS_Errors {
+        VSALVAGE        = 101,  /* volume needs salvaging */
+        VNOVNODE        = 102,  /* no such file/dir (vnode) */
+        VNOVOL          = 103,  /* no such volume or volume unavailable */
+        VVOLEXISTS      = 104,  /* volume name already exists */
+        VNOSERVICE      = 105,  /* volume not currently in service */
+        VOFFLINE        = 106,  /* volume is currently offline (more info available [VVL-spec]) */
+        VONLINE         = 107,  /* volume is already online */
+        VDISKFULL       = 108,  /* disk partition is full */
+        VOVERQUOTA      = 109,  /* volume's maximum quota exceeded */
+        VBUSY           = 110,  /* volume is temporarily unavailable */
+        VMOVED          = 111,  /* volume moved to new server - ask this FS where */
+};
+#endif /* AFS_FS_H */
diff --git a/fs/afs/vlclient.h b/fs/afs/afs_vl.h
index e3d601179c46..8bbefe009ed4 100644
--- a/fs/afs/vlclient.h
+++ b/fs/afs/afs_vl.h
@@ -1,6 +1,6 @@
-/* vlclient.h: Volume Location Service client interface
+/* AFS Volume Location Service client interface
 *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
@@ -9,10 +9,19 @@
 * 2 of the License, or (at your option) any later version.
 */
-#ifndef _LINUX_AFS_VLCLIENT_H
+#ifndef AFS_VL_H
-#define _LINUX_AFS_VLCLIENT_H
+#define AFS_VL_H
-#include "types.h"
+#include "afs.h"
+#define AFS_VL_PORT             7003    /* volume location service port */
+#define VL_SERVICE              52      /* RxRPC service ID for the Volume Location service */
+enum AFSVL_Operations {
+        VLGETENTRYBYID          = 503,  /* AFS Get Cache Entry By ID operation ID */
+        VLGETENTRYBYNAME        = 504,  /* AFS Get Cache Entry By Name operation ID */
+        VLPROBE                 = 514,  /* AFS Probe Volume Location Service operation ID */
+};
 enum AFSVL_Errors {
        AFSVL_IDEXIST           = 363520,       /* Volume Id entry exists in vl database */
@@ -40,14 +49,16 @@ enum AFSVL_Errors {
        AFSVL_BADVOLOPER        = 363542,       /* Bad volume operation code */
        AFSVL_BADRELLOCKTYPE    = 363543,       /* Bad release lock type */
        AFSVL_RERELEASE         = 363544,       /* Status report: last release was aborted */
-        AFSVL_BADSERVERFLAG     = 363545,       /* Invalid replication site server �ag */
+        AFSVL_BADSERVERFLAG     = 363545,       /* Invalid replication site server ��ag */
        AFSVL_PERM              = 363546,       /* No permission access */
        AFSVL_NOMEM             = 363547,       /* malloc/realloc failed to alloc enough memory */
 };
-/* maps to "struct vldbentry" in vvl-spec.pdf */
+/*
+ * maps to "struct vldbentry" in vvl-spec.pdf
+ */
 struct afs_vldbentry {
-        char            name[65];               /* name of volume (including NUL char) */
+        char            name[65];               /* name of volume (with NUL char) */
        afs_voltype_t   type;                   /* volume type */
        unsigned        num_servers;            /* num servers that hold instances of this vol */
        unsigned        clone_id;               /* cloning ID */
@@ -68,26 +79,6 @@ struct afs_vldbentry {
 #define AFS_VLSF_RWVOL          0x0004  /* this server holds a R/W instance of the volume */
 #define AFS_VLSF_BACKVOL        0x0008  /* this server holds a backup instance of the volume */
        } servers[8];
 };
-/* look up a volume location database entry by name */
+#endif /* AFS_VL_H */
-extern int afs_rxvl_get_entry_by_name(struct afs_server *server,
-                                      const char *volname,
-                                      unsigned volnamesz,
-                                      struct afs_cache_vlocation *entry);
-/* look up a volume location database entry by ID */
-extern int afs_rxvl_get_entry_by_id(struct afs_server *server,
-                                    afs_volid_t volid,
-                                    afs_voltype_t voltype,
-                                    struct afs_cache_vlocation *entry);
-extern int afs_rxvl_get_entry_by_id_async(struct afs_async_op *op,
-                                          afs_volid_t volid,
-                                          afs_voltype_t voltype);
-extern int afs_rxvl_get_entry_by_id_async2(struct afs_async_op *op,
-                                           struct afs_cache_vlocation *entry);
-#endif /* _LINUX_AFS_VLCLIENT_H */
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
new file mode 100644
index 000000000000..de0d7de69edc
--- /dev/null
+++ b/fs/afs/cache.c
@@ -0,0 +1,256 @@
+/* AFS caching stuff
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_cell_cache_match(void *target,
+                                                const void *entry);
+static void afs_cell_cache_update(void *source, void *entry);
+struct cachefs_index_def afs_cache_cell_index_def = {
+        .name                   = "cell_ix",
+        .data_size              = sizeof(struct afs_cache_cell),
+        .keys[0]                = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
+        .match                  = afs_cell_cache_match,
+        .update                 = afs_cell_cache_update,
+};
+#endif
+/*
+ * match a cell record obtained from the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_cell_cache_match(void *target,
+                                                const void *entry)
+{
+        const struct afs_cache_cell *ccell = entry;
+        struct afs_cell *cell = target;
+        _enter("{%s},{%s}", ccell->name, cell->name);
+        if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) {
+                _leave(" = SUCCESS");
+                return CACHEFS_MATCH_SUCCESS;
+        }
+        _leave(" = FAILED");
+        return CACHEFS_MATCH_FAILED;
+}
+#endif
+/*
+ * update a cell record in the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static void afs_cell_cache_update(void *source, void *entry)
+{
+        struct afs_cache_cell *ccell = entry;
+        struct afs_cell *cell = source;
+        _enter("%p,%p", source, entry);
+        strncpy(ccell->name, cell->name, sizeof(ccell->name));
+        memcpy(ccell->vl_servers,
+               cell->vl_addrs,
+               min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs)));
+}
+#endif
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_vlocation_cache_match(void *target,
+                                                     const void *entry);
+static void afs_vlocation_cache_update(void *source, void *entry);
+struct cachefs_index_def afs_vlocation_cache_index_def = {
+        .name           = "vldb",
+        .data_size      = sizeof(struct afs_cache_vlocation),
+        .keys[0]        = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
+        .match          = afs_vlocation_cache_match,
+        .update         = afs_vlocation_cache_update,
+};
+#endif
+/*
+ * match a VLDB record stored in the cache
+ * - may also load target from entry
+ */
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_vlocation_cache_match(void *target,
+                                                     const void *entry)
+{
+        const struct afs_cache_vlocation *vldb = entry;
+        struct afs_vlocation *vlocation = target;
+        _enter("{%s},{%s}", vlocation->vldb.name, vldb->name);
+        if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0
+            ) {
+                if (!vlocation->valid ||
+                    vlocation->vldb.rtime == vldb->rtime
+                    ) {
+                        vlocation->vldb = *vldb;
+                        vlocation->valid = 1;
+                        _leave(" = SUCCESS [c->m]");
+                        return CACHEFS_MATCH_SUCCESS;
+                } else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) {
+                        /* delete if VIDs for this name differ */
+                        if (memcmp(&vlocation->vldb.vid,
+                                   &vldb->vid,
+                                   sizeof(vldb->vid)) != 0) {
+                                _leave(" = DELETE");
+                                return CACHEFS_MATCH_SUCCESS_DELETE;
+                        }
+                        _leave(" = UPDATE");
+                        return CACHEFS_MATCH_SUCCESS_UPDATE;
+                } else {
+                        _leave(" = SUCCESS");
+                        return CACHEFS_MATCH_SUCCESS;
+                }
+        }
+        _leave(" = FAILED");
+        return CACHEFS_MATCH_FAILED;
+}
+#endif
+/*
+ * update a VLDB record stored in the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static void afs_vlocation_cache_update(void *source, void *entry)
+{
+        struct afs_cache_vlocation *vldb = entry;
+        struct afs_vlocation *vlocation = source;
+        _enter("");
+        *vldb = vlocation->vldb;
+}
+#endif
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_volume_cache_match(void *target,
+                                                  const void *entry);
+static void afs_volume_cache_update(void *source, void *entry);
+struct cachefs_index_def afs_volume_cache_index_def = {
+        .name           = "volume",
+        .data_size      = sizeof(struct afs_cache_vhash),
+        .keys[0]        = { CACHEFS_INDEX_KEYS_BIN, 1 },
+        .keys[1]        = { CACHEFS_INDEX_KEYS_BIN, 1 },
+        .match          = afs_volume_cache_match,
+        .update         = afs_volume_cache_update,
+};
+#endif
+/*
+ * match a volume hash record stored in the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_volume_cache_match(void *target,
+                                                  const void *entry)
+{
+        const struct afs_cache_vhash *vhash = entry;
+        struct afs_volume *volume = target;
+        _enter("{%u},{%u}", volume->type, vhash->vtype);
+        if (volume->type == vhash->vtype) {
+                _leave(" = SUCCESS");
+                return CACHEFS_MATCH_SUCCESS;
+        }
+        _leave(" = FAILED");
+        return CACHEFS_MATCH_FAILED;
+}
+#endif
+/*
+ * update a volume hash record stored in the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static void afs_volume_cache_update(void *source, void *entry)
+{
+        struct afs_cache_vhash *vhash = entry;
+        struct afs_volume *volume = source;
+        _enter("");
+        vhash->vtype = volume->type;
+}
+#endif
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_vnode_cache_match(void *target,
+                                                 const void *entry);
+static void afs_vnode_cache_update(void *source, void *entry);
+struct cachefs_index_def afs_vnode_cache_index_def = {
+        .name           = "vnode",
+        .data_size      = sizeof(struct afs_cache_vnode),
+        .keys[0]        = { CACHEFS_INDEX_KEYS_BIN, 4 },
+        .match          = afs_vnode_cache_match,
+        .update         = afs_vnode_cache_update,
+};
+#endif
+/*
+ * match a vnode record stored in the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_vnode_cache_match(void *target,
+                                                 const void *entry)
+{
+        const struct afs_cache_vnode *cvnode = entry;
+        struct afs_vnode *vnode = target;
+        _enter("{%x,%x,%Lx},{%x,%x,%Lx}",
+               vnode->fid.vnode,
+               vnode->fid.unique,
+               vnode->status.version,
+               cvnode->vnode_id,
+               cvnode->vnode_unique,
+               cvnode->data_version);
+        if (vnode->fid.vnode != cvnode->vnode_id) {
+                _leave(" = FAILED");
+                return CACHEFS_MATCH_FAILED;
+        }
+        if (vnode->fid.unique != cvnode->vnode_unique ||
+            vnode->status.version != cvnode->data_version) {
+                _leave(" = DELETE");
+                return CACHEFS_MATCH_SUCCESS_DELETE;
+        }
+        _leave(" = SUCCESS");
+        return CACHEFS_MATCH_SUCCESS;
+}
+#endif
+/*
+ * update a vnode record stored in the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static void afs_vnode_cache_update(void *source, void *entry)
+{
+        struct afs_cache_vnode *cvnode = entry;
+        struct afs_vnode *vnode = source;
+        _enter("");
+        cvnode->vnode_id        = vnode->fid.vnode;
+        cvnode->vnode_unique    = vnode->fid.unique;
+        cvnode->data_version    = vnode->status.version;
+}
+#endif
diff --git a/fs/afs/cache.h b/fs/afs/cache.h
index 9eb7722b34d5..36a3642cf90e 100644
--- a/fs/afs/cache.h
+++ b/fs/afs/cache.h
@@ -1,4 +1,4 @@
-/* cache.h: AFS local cache management interface
+/* AFS local cache management interface
 *
 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
@@ -9,8 +9,8 @@
 * 2 of the License, or (at your option) any later version.
 */
-#ifndef _LINUX_AFS_CACHE_H
+#ifndef AFS_CACHE_H
-#define _LINUX_AFS_CACHE_H
+#define AFS_CACHE_H
 #undef AFS_CACHING_SUPPORT
@@ -20,8 +20,4 @@
 #endif
 #include "types.h"
-#ifdef __KERNEL__
+#endif /* AFS_CACHE_H */
-#endif /* __KERNEL__ */
-#endif /* _LINUX_AFS_CACHE_H */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 9cb206e9d4be..639399f0ab6f 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved.
 *
 * This software may be freely redistributed under the terms of the
 * GNU General Public License.
@@ -16,85 +16,187 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include "server.h"
+#include <linux/circ_buf.h>
-#include "vnode.h"
 #include "internal.h"
-#include "cmservice.h"
-/*****************************************************************************/
+unsigned afs_vnode_update_timeout = 10;
+#define afs_breakring_space(server) \
+        CIRC_SPACE((server)->cb_break_head, (server)->cb_break_tail,    \
+                   ARRAY_SIZE((server)->cb_break))
+//static void afs_callback_updater(struct work_struct *);
+static struct workqueue_struct *afs_callback_update_worker;
 /*
 * allow the fileserver to request callback state (re-)initialisation
 */
-int SRXAFSCM_InitCallBackState(struct afs_server *server)
+void afs_init_callback_state(struct afs_server *server)
 {
-        struct list_head callbacks;
+        struct afs_vnode *vnode;
-        _enter("%p", server);
+        _enter("{%p}", server);
-        INIT_LIST_HEAD(&callbacks);
-        /* transfer the callback list from the server to a temp holding area */
        spin_lock(&server->cb_lock);
-        list_add(&callbacks, &server->cb_promises);
+        /* kill all the promises on record from this server */
-        list_del_init(&server->cb_promises);
+        while (!RB_EMPTY_ROOT(&server->cb_promises)) {
+                vnode = rb_entry(server->cb_promises.rb_node,
+                                 struct afs_vnode, cb_promise);
+                _debug("UNPROMISE { vid=%x vn=%u uq=%u}",
+                       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
+                rb_erase(&vnode->cb_promise, &server->cb_promises);
+                vnode->cb_promised = false;
+        }
-        /* munch our way through the list, grabbing the inode, dropping all the
+        spin_unlock(&server->cb_lock);
-         * locks and regetting them in the right order
+        _leave("");
-         */
+}
-        while (!list_empty(&callbacks)) {
-                struct afs_vnode *vnode;
-                struct inode *inode;
-                vnode = list_entry(callbacks.next, struct afs_vnode, cb_link);
+/*
-                list_del_init(&vnode->cb_link);
+ * handle the data invalidation side of a callback being broken
+ */
+void afs_broken_callback_work(struct work_struct *work)
+{
+        struct afs_vnode *vnode =
+                container_of(work, struct afs_vnode, cb_broken_work);
-                /* try and grab the inode - may fail */
+        _enter("");
-                inode = igrab(AFS_VNODE_TO_I(vnode));
-                if (inode) {
-                        int release = 0;
-                        spin_unlock(&server->cb_lock);
+        if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-                        spin_lock(&vnode->lock);
+                return;
-                        if (vnode->cb_server == server) {
+        /* we're only interested in dealing with a broken callback on *this*
-                                vnode->cb_server = NULL;
+         * vnode and only if no-one else has dealt with it yet */
-                                afs_kafstimod_del_timer(&vnode->cb_timeout);
+        if (!mutex_trylock(&vnode->validate_lock))
-                                spin_lock(&afs_cb_hash_lock);
+                return; /* someone else is dealing with it */
-                                list_del_init(&vnode->cb_hash_link);
-                                spin_unlock(&afs_cb_hash_lock);
-                                release = 1;
-                        }
-                        spin_unlock(&vnode->lock);
+        if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
+                if (S_ISDIR(vnode->vfs_inode.i_mode))
+                        afs_clear_permits(vnode);
-                        iput(inode);
+                if (afs_vnode_fetch_status(vnode, NULL, NULL) < 0)
-                        afs_put_server(server);
+                        goto out;
-                        spin_lock(&server->cb_lock);
+                if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+                        goto out;
+                /* if the vnode's data version number changed then its contents
+                 * are different */
+                if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
+                        _debug("zap data {%x:%u}",
+                               vnode->fid.vid, vnode->fid.vnode);
+                        invalidate_remote_inode(&vnode->vfs_inode);
                }
        }
-        spin_unlock(&server->cb_lock);
+out:
+        mutex_unlock(&vnode->validate_lock);
-        _leave(" = 0");
+        /* avoid the potential race whereby the mutex_trylock() in this
-        return 0;
+         * function happens again between the clear_bit() and the
-} /* end SRXAFSCM_InitCallBackState() */
+         * mutex_unlock() */
+        if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
+                _debug("requeue");
+                queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
+        }
+        _leave("");
+}
+/*
+ * actually break a callback
+ */
+static void afs_break_callback(struct afs_server *server,
+                               struct afs_vnode *vnode)
+{
+        _enter("");
+        set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+        if (vnode->cb_promised) {
+                spin_lock(&vnode->lock);
+                _debug("break callback");
+                spin_lock(&server->cb_lock);
+                if (vnode->cb_promised) {
+                        rb_erase(&vnode->cb_promise, &server->cb_promises);
+                        vnode->cb_promised = false;
+                }
+                spin_unlock(&server->cb_lock);
+                queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
+                spin_unlock(&vnode->lock);
+        }
+}
+/*
+ * allow the fileserver to explicitly break one callback
+ * - happens when
+ *   - the backing file is changed
+ *   - a lock is released
+ */
+static void afs_break_one_callback(struct afs_server *server,
+                                   struct afs_fid *fid)
+{
+        struct afs_vnode *vnode;
+        struct rb_node *p;
+        _debug("find");
+        spin_lock(&server->fs_lock);
+        p = server->fs_vnodes.rb_node;
+        while (p) {
+                vnode = rb_entry(p, struct afs_vnode, server_rb);
+                if (fid->vid < vnode->fid.vid)
+                        p = p->rb_left;
+                else if (fid->vid > vnode->fid.vid)
+                        p = p->rb_right;
+                else if (fid->vnode < vnode->fid.vnode)
+                        p = p->rb_left;
+                else if (fid->vnode > vnode->fid.vnode)
+                        p = p->rb_right;
+                else if (fid->unique < vnode->fid.unique)
+                        p = p->rb_left;
+                else if (fid->unique > vnode->fid.unique)
+                        p = p->rb_right;
+                else
+                        goto found;
+        }
+        /* not found so we just ignore it (it may have moved to another
+         * server) */
+not_available:
+        _debug("not avail");
+        spin_unlock(&server->fs_lock);
+        _leave("");
+        return;
+found:
+        _debug("found");
+        ASSERTCMP(server, ==, vnode->server);
+        if (!igrab(AFS_VNODE_TO_I(vnode)))
+                goto not_available;
+        spin_unlock(&server->fs_lock);
+        afs_break_callback(server, vnode);
+        iput(&vnode->vfs_inode);
+        _leave("");
+}
-/*****************************************************************************/
 /*
 * allow the fileserver to break callback promises
 */
-int SRXAFSCM_CallBack(struct afs_server *server, size_t count,
+void afs_break_callbacks(struct afs_server *server, size_t count,
-                      struct afs_callback callbacks[])
+                         struct afs_callback callbacks[])
 {
-        _enter("%p,%u,", server, count);
+        _enter("%p,%zu,", server, count);
-        for (; count > 0; callbacks++, count--) {
+        ASSERT(server != NULL);
-                struct afs_vnode *vnode = NULL;
+        ASSERTCMP(count, <=, AFSCBMAX);
-                struct inode *inode = NULL;
-                int valid = 0;
+        for (; count > 0; callbacks++, count--) {
                _debug("- Fid { vl=%08x n=%u u=%u }  CB { v=%u x=%u t=%u }",
                       callbacks->fid.vid,
                       callbacks->fid.vnode,
@@ -103,67 +205,270 @@ int SRXAFSCM_CallBack(struct afs_server *server, size_t count,
                       callbacks->expiry,
                       callbacks->type
                       );
+                afs_break_one_callback(server, &callbacks->fid);
+        }
-                /* find the inode for this fid */
+        _leave("");
-                spin_lock(&afs_cb_hash_lock);
+        return;
+}
-                list_for_each_entry(vnode,
+/*
-                                    &afs_cb_hash(server, &callbacks->fid),
+ * record the callback for breaking
-                                    cb_hash_link) {
+ * - the caller must hold server->cb_lock
-                        if (memcmp(&vnode->fid, &callbacks->fid,
+ */
-                                   sizeof(struct afs_fid)) != 0)
+static void afs_do_give_up_callback(struct afs_server *server,
-                                continue;
+                                    struct afs_vnode *vnode)
+{
+        struct afs_callback *cb;
-                        /* right vnode, but is it same server? */
+        _enter("%p,%p", server, vnode);
-                        if (vnode->cb_server != server)
-                                break; /* no */
-                        /* try and nail the inode down */
+        cb = &server->cb_break[server->cb_break_head];
-                        inode = igrab(AFS_VNODE_TO_I(vnode));
+        cb->fid         = vnode->fid;
-                        break;
+        cb->version     = vnode->cb_version;
+        cb->expiry      = vnode->cb_expiry;
+        cb->type        = vnode->cb_type;
+        smp_wmb();
+        server->cb_break_head =
+                (server->cb_break_head + 1) &
+                (ARRAY_SIZE(server->cb_break) - 1);
+        /* defer the breaking of callbacks to try and collect as many as
+         * possible to ship in one operation */
+        switch (atomic_inc_return(&server->cb_break_n)) {
+        case 1 ... AFSCBMAX - 1:
+                queue_delayed_work(afs_callback_update_worker,
+                                   &server->cb_break_work, HZ * 2);
+                break;
+        case AFSCBMAX:
+                afs_flush_callback_breaks(server);
+                break;
+        default:
+                break;
+        }
+        ASSERT(server->cb_promises.rb_node != NULL);
+        rb_erase(&vnode->cb_promise, &server->cb_promises);
+        vnode->cb_promised = false;
+        _leave("");
+}
+/*
+ * discard the callback on a deleted item
+ */
+void afs_discard_callback_on_delete(struct afs_vnode *vnode)
+{
+        struct afs_server *server = vnode->server;
+        _enter("%d", vnode->cb_promised);
+        if (!vnode->cb_promised) {
+                _leave(" [not promised]");
+                return;
+        }
+        ASSERT(server != NULL);
+        spin_lock(&server->cb_lock);
+        if (vnode->cb_promised) {
+                ASSERT(server->cb_promises.rb_node != NULL);
+                rb_erase(&vnode->cb_promise, &server->cb_promises);
+                vnode->cb_promised = false;
+        }
+        spin_unlock(&server->cb_lock);
+        _leave("");
+}
+/*
+ * give up the callback registered for a vnode on the file server when the
+ * inode is being cleared
+ */
+void afs_give_up_callback(struct afs_vnode *vnode)
+{
+        struct afs_server *server = vnode->server;
+        DECLARE_WAITQUEUE(myself, current);
+        _enter("%d", vnode->cb_promised);
+        _debug("GIVE UP INODE %p", &vnode->vfs_inode);
+        if (!vnode->cb_promised) {
+                _leave(" [not promised]");
+                return;
+        }
+        ASSERT(server != NULL);
+        spin_lock(&server->cb_lock);
+        if (vnode->cb_promised && afs_breakring_space(server) == 0) {
+                add_wait_queue(&server->cb_break_waitq, &myself);
+                for (;;) {
+                        set_current_state(TASK_UNINTERRUPTIBLE);
+                        if (!vnode->cb_promised ||
+                            afs_breakring_space(server) != 0)
+                                break;
+                        spin_unlock(&server->cb_lock);
+                        schedule();
+                        spin_lock(&server->cb_lock);
                }
+                remove_wait_queue(&server->cb_break_waitq, &myself);
+                __set_current_state(TASK_RUNNING);
+        }
+        /* of course, it's always possible for the server to break this vnode's
+         * callback first... */
+        if (vnode->cb_promised)
+                afs_do_give_up_callback(server, vnode);
+        spin_unlock(&server->cb_lock);
+        _leave("");
+}
+/*
+ * dispatch a deferred give up callbacks operation
+ */
+void afs_dispatch_give_up_callbacks(struct work_struct *work)
+{
+        struct afs_server *server =
+                container_of(work, struct afs_server, cb_break_work.work);
+        _enter("");
+        /* tell the fileserver to discard the callback promises it has
+         * - in the event of ENOMEM or some other error, we just forget that we
+         *   had callbacks entirely, and the server will call us later to break
+         *   them
+         */
+        afs_fs_give_up_callbacks(server, &afs_async_call);
+}
+/*
+ * flush the outstanding callback breaks on a server
+ */
+void afs_flush_callback_breaks(struct afs_server *server)
+{
+        cancel_delayed_work(&server->cb_break_work);
+        queue_delayed_work(afs_callback_update_worker,
+                           &server->cb_break_work, 0);
+}
-                spin_unlock(&afs_cb_hash_lock);
+#if 0
+/*
-                if (inode) {
+ * update a bunch of callbacks
-                        /* we've found the record for this vnode */
+ */
-                        spin_lock(&vnode->lock);
+static void afs_callback_updater(struct work_struct *work)
-                        if (vnode->cb_server == server) {
+{
-                                /* the callback _is_ on the calling server */
+        struct afs_server *server;
-                                vnode->cb_server = NULL;
+        struct afs_vnode *vnode, *xvnode;
-                                valid = 1;
+        time_t now;
+        long timeout;
-                                afs_kafstimod_del_timer(&vnode->cb_timeout);
+        int ret;
-                                vnode->flags |= AFS_VNODE_CHANGED;
+        server = container_of(work, struct afs_server, updater);
-                                spin_lock(&server->cb_lock);
-                                list_del_init(&vnode->cb_link);
+        _enter("");
-                                spin_unlock(&server->cb_lock);
+        now = get_seconds();
-                                spin_lock(&afs_cb_hash_lock);
-                                list_del_init(&vnode->cb_hash_link);
+        /* find the first vnode to update */
-                                spin_unlock(&afs_cb_hash_lock);
+        spin_lock(&server->cb_lock);
-                        }
+        for (;;) {
-                        spin_unlock(&vnode->lock);
+                if (RB_EMPTY_ROOT(&server->cb_promises)) {
+                        spin_unlock(&server->cb_lock);
-                        if (valid) {
+                        _leave(" [nothing]");
-                                invalidate_remote_inode(inode);
+                        return;
-                                afs_put_server(server);
-                        }
-                        iput(inode);
                }
+                vnode = rb_entry(rb_first(&server->cb_promises),
+                                 struct afs_vnode, cb_promise);
+                if (atomic_read(&vnode->usage) > 0)
+                        break;
+                rb_erase(&vnode->cb_promise, &server->cb_promises);
+                vnode->cb_promised = false;
        }
-        _leave(" = 0");
+        timeout = vnode->update_at - now;
-        return 0;
+        if (timeout > 0) {
-} /* end SRXAFSCM_CallBack() */
+                queue_delayed_work(afs_vnode_update_worker,
+                                   &afs_vnode_update, timeout * HZ);
+                spin_unlock(&server->cb_lock);
+                _leave(" [nothing]");
+                return;
+        }
+        list_del_init(&vnode->update);
+        atomic_inc(&vnode->usage);
+        spin_unlock(&server->cb_lock);
+        /* we can now perform the update */
+        _debug("update %s", vnode->vldb.name);
+        vnode->state = AFS_VL_UPDATING;
+        vnode->upd_rej_cnt = 0;
+        vnode->upd_busy_cnt = 0;
+        ret = afs_vnode_update_record(vl, &vldb);
+        switch (ret) {
+        case 0:
+                afs_vnode_apply_update(vl, &vldb);
+                vnode->state = AFS_VL_UPDATING;
+                break;
+        case -ENOMEDIUM:
+                vnode->state = AFS_VL_VOLUME_DELETED;
+                break;
+        default:
+                vnode->state = AFS_VL_UNCERTAIN;
+                break;
+        }
+        /* and then reschedule */
+        _debug("reschedule");
+        vnode->update_at = get_seconds() + afs_vnode_update_timeout;
+        spin_lock(&server->cb_lock);
+        if (!list_empty(&server->cb_promises)) {
+                /* next update in 10 minutes, but wait at least 1 second more
+                 * than the newest record already queued so that we don't spam
+                 * the VL server suddenly with lots of requests
+                 */
+                xvnode = list_entry(server->cb_promises.prev,
+                                    struct afs_vnode, update);
+                if (vnode->update_at <= xvnode->update_at)
+                        vnode->update_at = xvnode->update_at + 1;
+                xvnode = list_entry(server->cb_promises.next,
+                                    struct afs_vnode, update);
+                timeout = xvnode->update_at - now;
+                if (timeout < 0)
+                        timeout = 0;
+        } else {
+                timeout = afs_vnode_update_timeout;
+        }
+        list_add_tail(&vnode->update, &server->cb_promises);
+        _debug("timeout %ld", timeout);
+        queue_delayed_work(afs_vnode_update_worker,
+                           &afs_vnode_update, timeout * HZ);
+        spin_unlock(&server->cb_lock);
+        afs_put_vnode(vl);
+}
+#endif
+/*
+ * initialise the callback update process
+ */
+int __init afs_callback_update_init(void)
+{
+        afs_callback_update_worker =
+                create_singlethread_workqueue("kafs_callbackd");
+        return afs_callback_update_worker ? 0 : -ENOMEM;
+}
-/*****************************************************************************/
 /*
- * allow the fileserver to see if the cache manager is still alive
+ * shut down the callback update process
 */
-int SRXAFSCM_Probe(struct afs_server *server)
+void __exit afs_callback_update_kill(void)
 {
-        _debug("SRXAFSCM_Probe(%p)\n", server);
+        destroy_workqueue(afs_callback_update_worker);
-        return 0;
+}
-} /* end SRXAFSCM_Probe() */
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 1fc578372759..9b1311a1df51 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -1,4 +1,4 @@
-/* cell.c: AFS cell and server record management
+/* AFS cell and server record management
 *
 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
@@ -11,15 +11,9 @@
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <rxrpc/peer.h>
+#include <linux/key.h>
-#include <rxrpc/connection.h>
+#include <linux/ctype.h>
-#include "volume.h"
+#include <keys/rxrpc-type.h>
-#include "cell.h"
-#include "server.h"
-#include "transport.h"
-#include "vlclient.h"
-#include "kafstimod.h"
-#include "super.h"
 #include "internal.h"
 DECLARE_RWSEM(afs_proc_cells_sem);
@@ -28,66 +22,47 @@ LIST_HEAD(afs_proc_cells);
 static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells);
 static DEFINE_RWLOCK(afs_cells_lock);
 static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
+static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq);
 static struct afs_cell *afs_cell_root;
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_cell_cache_match(void *target,
-                                                const void *entry);
-static void afs_cell_cache_update(void *source, void *entry);
-struct cachefs_index_def afs_cache_cell_index_def = {
-        .name                   = "cell_ix",
-        .data_size              = sizeof(struct afs_cache_cell),
-        .keys[0]                = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
-        .match                  = afs_cell_cache_match,
-        .update                 = afs_cell_cache_update,
-};
-#endif
-/*****************************************************************************/
 /*
- * create a cell record
+ * allocate a cell record and fill in its name, VL server address list and
- * - "name" is the name of the cell
+ * allocate an anonymous key
- * - "vllist" is a colon separated list of IP addresses in "a.b.c.d" format
 */
-int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell)
+static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
 {
        struct afs_cell *cell;
-        char *next;
+        size_t namelen;
+        char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next;
        int ret;
-        _enter("%s", name);
+        _enter("%s,%s", name, vllist);
        BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */
+        namelen = strlen(name);
+        if (namelen > AFS_MAXCELLNAME)
+                return ERR_PTR(-ENAMETOOLONG);
        /* allocate and initialise a cell record */
-        cell = kmalloc(sizeof(struct afs_cell) + strlen(name) + 1, GFP_KERNEL);
+        cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL);
        if (!cell) {
                _leave(" = -ENOMEM");
-                return -ENOMEM;
+                return ERR_PTR(-ENOMEM);
        }
-        down_write(&afs_cells_sem);
+        memcpy(cell->name, name, namelen);
+        cell->name[namelen] = 0;
-        memset(cell, 0, sizeof(struct afs_cell));
-        atomic_set(&cell->usage, 0);
+        atomic_set(&cell->usage, 1);
        INIT_LIST_HEAD(&cell->link);
+        rwlock_init(&cell->servers_lock);
-        rwlock_init(&cell->sv_lock);
+        INIT_LIST_HEAD(&cell->servers);
-        INIT_LIST_HEAD(&cell->sv_list);
-        INIT_LIST_HEAD(&cell->sv_graveyard);
-        spin_lock_init(&cell->sv_gylock);
        init_rwsem(&cell->vl_sem);
        INIT_LIST_HEAD(&cell->vl_list);
-        INIT_LIST_HEAD(&cell->vl_graveyard);
+        spin_lock_init(&cell->vl_lock);
-        spin_lock_init(&cell->vl_gylock);
-        strcpy(cell->name,name);
        /* fill in the VL server list from the rest of the string */
-        ret = -EINVAL;
        do {
                unsigned a, b, c, d;
@@ -96,20 +71,75 @@ int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell)
                        *next++ = 0;
                if (sscanf(vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4)
-                        goto badaddr;
+                        goto bad_address;
                if (a > 255 || b > 255 || c > 255 || d > 255)
-                        goto badaddr;
+                        goto bad_address;
                cell->vl_addrs[cell->vl_naddrs++].s_addr =
                        htonl((a << 24) | (b << 16) | (c << 8) | d);
-                if (cell->vl_naddrs >= AFS_CELL_MAX_ADDRS)
+        } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (vllist = next));
-                        break;
+        /* create a key to represent an anonymous user */
+        memcpy(keyname, "afs@", 4);
+        dp = keyname + 4;
+        cp = cell->name;
+        do {
+                *dp++ = toupper(*cp);
+        } while (*cp++);
+        cell->anonymous_key = key_alloc(&key_type_rxrpc, keyname, 0, 0, current,
+                                        KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA);
+        if (IS_ERR(cell->anonymous_key)) {
+                _debug("no key");
+                ret = PTR_ERR(cell->anonymous_key);
+                goto error;
+        }
+        ret = key_instantiate_and_link(cell->anonymous_key, NULL, 0,
+                                       NULL, NULL);
+        if (ret < 0) {
+                _debug("instantiate failed");
+                goto error;
+        }
+        _debug("anon key %p{%x}",
+               cell->anonymous_key, key_serial(cell->anonymous_key));
+        _leave(" = %p", cell);
+        return cell;
+bad_address:
+        printk(KERN_ERR "kAFS: bad VL server IP address\n");
+        ret = -EINVAL;
+error:
+        key_put(cell->anonymous_key);
+        kfree(cell);
+        _leave(" = %d", ret);
+        return ERR_PTR(ret);
+}
+/*
+ * create a cell record
+ * - "name" is the name of the cell
+ * - "vllist" is a colon separated list of IP addresses in "a.b.c.d" format
+ */
+struct afs_cell *afs_cell_create(const char *name, char *vllist)
+{
+        struct afs_cell *cell;
+        int ret;
+        _enter("%s,%s", name, vllist);
-        } while(vllist = next, vllist);
+        cell = afs_cell_alloc(name, vllist);
+        if (IS_ERR(cell)) {
+                _leave(" = %ld", PTR_ERR(cell));
+                return cell;
+        }
+        down_write(&afs_cells_sem);
-        /* add a proc dir for this cell */
+        /* add a proc directory for this cell */
        ret = afs_proc_cell_setup(cell);
        if (ret < 0)
                goto error;
@@ -130,31 +160,28 @@ int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell)
        down_write(&afs_proc_cells_sem);
        list_add_tail(&cell->proc_link, &afs_proc_cells);
        up_write(&afs_proc_cells_sem);
-        *_cell = cell;
        up_write(&afs_cells_sem);
-        _leave(" = 0 (%p)", cell);
+        _leave(" = %p", cell);
-        return 0;
+        return cell;
- badaddr:
+error:
-        printk(KERN_ERR "kAFS: bad VL server IP address: '%s'\n", vllist);
- error:
        up_write(&afs_cells_sem);
+        key_put(cell->anonymous_key);
        kfree(cell);
        _leave(" = %d", ret);
-        return ret;
+        return ERR_PTR(ret);
-} /* end afs_cell_create() */
+}
-/*****************************************************************************/
 /*
- * initialise the cell database from module parameters
+ * set the root cell information
+ * - can be called with a module parameter string
+ * - can be called from a write to /proc/fs/afs/rootcell
 */
 int afs_cell_init(char *rootcell)
 {
        struct afs_cell *old_root, *new_root;
        char *cp;
-        int ret;
        _enter("");
@@ -162,82 +189,60 @@ int afs_cell_init(char *rootcell)
                /* module is loaded with no parameters, or built statically.
                 * - in the future we might initialize cell DB here.
                 */
-                _leave(" = 0 (but no root)");
+                _leave(" = 0 [no root]");
                return 0;
        }
        cp = strchr(rootcell, ':');
        if (!cp) {
                printk(KERN_ERR "kAFS: no VL server IP addresses specified\n");
-                _leave(" = %d (no colon)", -EINVAL);
+                _leave(" = -EINVAL");
                return -EINVAL;
        }
        /* allocate a cell record for the root cell */
        *cp++ = 0;
-        ret = afs_cell_create(rootcell, cp, &new_root);
+        new_root = afs_cell_create(rootcell, cp);
-        if (ret < 0) {
+        if (IS_ERR(new_root)) {
-                _leave(" = %d", ret);
+                _leave(" = %ld", PTR_ERR(new_root));
-                return ret;
+                return PTR_ERR(new_root);
        }
-        /* as afs_put_cell() takes locks by itself, we have to do
+        /* install the new cell */
-         * a little gymnastics to be race-free.
-         */
-        afs_get_cell(new_root);
        write_lock(&afs_cells_lock);
-        while (afs_cell_root) {
+        old_root = afs_cell_root;
-                old_root = afs_cell_root;
-                afs_cell_root = NULL;
-                write_unlock(&afs_cells_lock);
-                afs_put_cell(old_root);
-                write_lock(&afs_cells_lock);
-        }
        afs_cell_root = new_root;
        write_unlock(&afs_cells_lock);
+        afs_put_cell(old_root);
-        _leave(" = %d", ret);
+        _leave(" = 0");
-        return ret;
+        return 0;
+}
-} /* end afs_cell_init() */
-/*****************************************************************************/
 /*
 * lookup a cell record
 */
-int afs_cell_lookup(const char *name, unsigned namesz, struct afs_cell **_cell)
+struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz)
 {
        struct afs_cell *cell;
-        int ret;
        _enter("\"%*.*s\",", namesz, namesz, name ? name : "");
-        *_cell = NULL;
+        down_read(&afs_cells_sem);
+        read_lock(&afs_cells_lock);
        if (name) {
                /* if the cell was named, look for it in the cell record list */
-                ret = -ENOENT;
-                cell = NULL;
-                read_lock(&afs_cells_lock);
                list_for_each_entry(cell, &afs_cells, link) {
                        if (strncmp(cell->name, name, namesz) == 0) {
                                afs_get_cell(cell);
                                goto found;
                        }
                }
-                cell = NULL;
+                cell = ERR_PTR(-ENOENT);
        found:
+                ;
-                read_unlock(&afs_cells_lock);
+        } else {
-                if (cell)
-                        ret = 0;
-        }
-        else {
-                read_lock(&afs_cells_lock);
                cell = afs_cell_root;
                if (!cell) {
                        /* this should not happen unless user tries to mount
@@ -246,44 +251,35 @@ int afs_cell_lookup(const char *name, unsigned namesz, struct afs_cell **_cell)
                         * ENOENT might be "more appropriate" but they happen
                         * for other reasons.
                         */
-                        ret = -EDESTADDRREQ;
+                        cell = ERR_PTR(-EDESTADDRREQ);
-                }
+                } else {
-                else {
                        afs_get_cell(cell);
-                        ret = 0;
                }
-                read_unlock(&afs_cells_lock);
        }
-        *_cell = cell;
+        read_unlock(&afs_cells_lock);
-        _leave(" = %d (%p)", ret, cell);
+        up_read(&afs_cells_sem);
-        return ret;
+        _leave(" = %p", cell);
+        return cell;
-} /* end afs_cell_lookup() */
+}
-/*****************************************************************************/
 /*
 * try and get a cell record
 */
-struct afs_cell *afs_get_cell_maybe(struct afs_cell **_cell)
+struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell)
 {
-        struct afs_cell *cell;
        write_lock(&afs_cells_lock);
-        cell = *_cell;
        if (cell && !list_empty(&cell->link))
                afs_get_cell(cell);
        else
                cell = NULL;
        write_unlock(&afs_cells_lock);
        return cell;
-} /* end afs_get_cell_maybe() */
+}
-/*****************************************************************************/
 /*
 * destroy a cell record
 */
@@ -294,8 +290,7 @@ void afs_put_cell(struct afs_cell *cell)
        _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name);
-        /* sanity check */
+        ASSERTCMP(atomic_read(&cell->usage), >, 0);
-        BUG_ON(atomic_read(&cell->usage) <= 0);
        /* to prevent a race, the decrement and the dequeue must be effectively
         * atomic */
@@ -307,36 +302,49 @@ void afs_put_cell(struct afs_cell *cell)
                return;
        }
+        ASSERT(list_empty(&cell->servers));
+        ASSERT(list_empty(&cell->vl_list));
        write_unlock(&afs_cells_lock);
-        BUG_ON(!list_empty(&cell->sv_list));
+        wake_up(&afs_cells_freeable_wq);
-        BUG_ON(!list_empty(&cell->sv_graveyard));
-        BUG_ON(!list_empty(&cell->vl_list));
-        BUG_ON(!list_empty(&cell->vl_graveyard));
        _leave(" [unused]");
-} /* end afs_put_cell() */
+}
-/*****************************************************************************/
 /*
 * destroy a cell record
+ * - must be called with the afs_cells_sem write-locked
+ * - cell->link should have been broken by the caller
 */
 static void afs_cell_destroy(struct afs_cell *cell)
 {
        _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name);
-        /* to prevent a race, the decrement and the dequeue must be effectively
+        ASSERTCMP(atomic_read(&cell->usage), >=, 0);
-         * atomic */
+        ASSERT(list_empty(&cell->link));
-        write_lock(&afs_cells_lock);
-        /* sanity check */
+        /* wait for everyone to stop using the cell */
-        BUG_ON(atomic_read(&cell->usage) != 0);
+        if (atomic_read(&cell->usage) > 0) {
+                DECLARE_WAITQUEUE(myself, current);
-        list_del_init(&cell->link);
+                _debug("wait for cell %s", cell->name);
+                set_current_state(TASK_UNINTERRUPTIBLE);
+                add_wait_queue(&afs_cells_freeable_wq, &myself);
-        write_unlock(&afs_cells_lock);
+                while (atomic_read(&cell->usage) > 0) {
+                        schedule();
+                        set_current_state(TASK_UNINTERRUPTIBLE);
+                }
-        down_write(&afs_cells_sem);
+                remove_wait_queue(&afs_cells_freeable_wq, &myself);
+                set_current_state(TASK_RUNNING);
+        }
+        _debug("cell dead");
+        ASSERTCMP(atomic_read(&cell->usage), ==, 0);
+        ASSERT(list_empty(&cell->servers));
+        ASSERT(list_empty(&cell->vl_list));
        afs_proc_cell_remove(cell);
@@ -348,104 +356,26 @@ static void afs_cell_destroy(struct afs_cell *cell)
        cachefs_relinquish_cookie(cell->cache, 0);
 #endif
-        up_write(&afs_cells_sem);
+        key_put(cell->anonymous_key);
-        BUG_ON(!list_empty(&cell->sv_list));
-        BUG_ON(!list_empty(&cell->sv_graveyard));
-        BUG_ON(!list_empty(&cell->vl_list));
-        BUG_ON(!list_empty(&cell->vl_graveyard));
-        /* finish cleaning up the cell */
        kfree(cell);
        _leave(" [destroyed]");
-} /* end afs_cell_destroy() */
+}
-/*****************************************************************************/
-/*
- * lookup the server record corresponding to an Rx RPC peer
- */
-int afs_server_find_by_peer(const struct rxrpc_peer *peer,
-                            struct afs_server **_server)
-{
-        struct afs_server *server;
-        struct afs_cell *cell;
-        _enter("%p{a=%08x},", peer, ntohl(peer->addr.s_addr));
-        /* search the cell list */
-        read_lock(&afs_cells_lock);
-        list_for_each_entry(cell, &afs_cells, link) {
-                _debug("? cell %s",cell->name);
-                write_lock(&cell->sv_lock);
-                /* check the active list */
-                list_for_each_entry(server, &cell->sv_list, link) {
-                        _debug("?? server %08x", ntohl(server->addr.s_addr));
-                        if (memcmp(&server->addr, &peer->addr,
-                                   sizeof(struct in_addr)) == 0)
-                                goto found_server;
-                }
-                /* check the inactive list */
-                spin_lock(&cell->sv_gylock);
-                list_for_each_entry(server, &cell->sv_graveyard, link) {
-                        _debug("?? dead server %08x",
-                               ntohl(server->addr.s_addr));
-                        if (memcmp(&server->addr, &peer->addr,
-                                   sizeof(struct in_addr)) == 0)
-                                goto found_dead_server;
-                }
-                spin_unlock(&cell->sv_gylock);
-                write_unlock(&cell->sv_lock);
-        }
-        read_unlock(&afs_cells_lock);
-        _leave(" = -ENOENT");
-        return -ENOENT;
-        /* we found it in the graveyard - resurrect it */
- found_dead_server:
-        list_move_tail(&server->link, &cell->sv_list);
-        afs_get_server(server);
-        afs_kafstimod_del_timer(&server->timeout);
-        spin_unlock(&cell->sv_gylock);
-        goto success;
-        /* we found it - increment its ref count and return it */
- found_server:
-        afs_get_server(server);
- success:
-        write_unlock(&cell->sv_lock);
-        read_unlock(&afs_cells_lock);
-        *_server = server;
-        _leave(" = 0 (s=%p c=%p)", server, cell);
-        return 0;
-} /* end afs_server_find_by_peer() */
-/*****************************************************************************/
 /*
 * purge in-memory cell database on module unload or afs_init() failure
 * - the timeout daemon is stopped before calling this
 */
 void afs_cell_purge(void)
 {
-        struct afs_vlocation *vlocation;
        struct afs_cell *cell;
        _enter("");
        afs_put_cell(afs_cell_root);
+        down_write(&afs_cells_sem);
        while (!list_empty(&afs_cells)) {
                cell = NULL;
@@ -464,104 +394,11 @@ void afs_cell_purge(void)
                        _debug("PURGING CELL %s (%d)",
                               cell->name, atomic_read(&cell->usage));
-                        BUG_ON(!list_empty(&cell->sv_list));
-                        BUG_ON(!list_empty(&cell->vl_list));
-                        /* purge the cell's VL graveyard list */
-                        _debug(" - clearing VL graveyard");
-                        spin_lock(&cell->vl_gylock);
-                        while (!list_empty(&cell->vl_graveyard)) {
-                                vlocation = list_entry(cell->vl_graveyard.next,
-                                                       struct afs_vlocation,
-                                                       link);
-                                list_del_init(&vlocation->link);
-                                afs_kafstimod_del_timer(&vlocation->timeout);
-                                spin_unlock(&cell->vl_gylock);
-                                afs_vlocation_do_timeout(vlocation);
-                                /* TODO: race if move to use krxtimod instead
-                                 * of kafstimod */
-                                spin_lock(&cell->vl_gylock);
-                        }
-                        spin_unlock(&cell->vl_gylock);
-                        /* purge the cell's server graveyard list */
-                        _debug(" - clearing server graveyard");
-                        spin_lock(&cell->sv_gylock);
-                        while (!list_empty(&cell->sv_graveyard)) {
-                                struct afs_server *server;
-                                server = list_entry(cell->sv_graveyard.next,
-                                                    struct afs_server, link);
-                                list_del_init(&server->link);
-                                afs_kafstimod_del_timer(&server->timeout);
-                                spin_unlock(&cell->sv_gylock);
-                                afs_server_do_timeout(server);
-                                spin_lock(&cell->sv_gylock);
-                        }
-                        spin_unlock(&cell->sv_gylock);
                        /* now the cell should be left with no references */
                        afs_cell_destroy(cell);
                }
        }
+        up_write(&afs_cells_sem);
        _leave("");
-} /* end afs_cell_purge() */
+}
-/*****************************************************************************/
-/*
- * match a cell record obtained from the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_cell_cache_match(void *target,
-                                                const void *entry)
-{
-        const struct afs_cache_cell *ccell = entry;
-        struct afs_cell *cell = target;
-        _enter("{%s},{%s}", ccell->name, cell->name);
-        if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) {
-                _leave(" = SUCCESS");
-                return CACHEFS_MATCH_SUCCESS;
-        }
-        _leave(" = FAILED");
-        return CACHEFS_MATCH_FAILED;
-} /* end afs_cell_cache_match() */
-#endif
-/*****************************************************************************/
-/*
- * update a cell record in the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_cell_cache_update(void *source, void *entry)
-{
-        struct afs_cache_cell *ccell = entry;
-        struct afs_cell *cell = source;
-        _enter("%p,%p", source, entry);
-        strncpy(ccell->name, cell->name, sizeof(ccell->name));
-        memcpy(ccell->vl_servers,
-               cell->vl_addrs,
-               min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs)));
-} /* end afs_cell_cache_update() */
-#endif
diff --git a/fs/afs/cell.h b/fs/afs/cell.h
deleted file mode 100644
index 48349108fb00..000000000000
--- a/fs/afs/cell.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/* cell.h: AFS cell record
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _LINUX_AFS_CELL_H
-#define _LINUX_AFS_CELL_H
-#include "types.h"
-#include "cache.h"
-#define AFS_CELL_MAX_ADDRS 15
-extern volatile int afs_cells_being_purged; /* T when cells are being purged by rmmod */
-/*****************************************************************************/
-/*
- * entry in the cached cell catalogue
- */
-struct afs_cache_cell
-{
-        char                    name[64];       /* cell name (padded with NULs) */
-        struct in_addr          vl_servers[15]; /* cached cell VL servers */
-};
-/*****************************************************************************/
-/*
- * AFS cell record
- */
-struct afs_cell
-{
-        atomic_t                usage;
-        struct list_head        link;           /* main cell list link */
-        struct list_head        proc_link;      /* /proc cell list link */
-        struct proc_dir_entry   *proc_dir;      /* /proc dir for this cell */
-#ifdef AFS_CACHING_SUPPORT
-        struct cachefs_cookie   *cache;         /* caching cookie */
-#endif
-        /* server record management */
-        rwlock_t                sv_lock;        /* active server list lock */
-        struct list_head        sv_list;        /* active server list */
-        struct list_head        sv_graveyard;   /* inactive server list */
-        spinlock_t              sv_gylock;      /* inactive server list lock */
-        /* volume location record management */
-        struct rw_semaphore     vl_sem;         /* volume management serialisation semaphore */
-        struct list_head        vl_list;        /* cell's active VL record list */
-        struct list_head        vl_graveyard;   /* cell's inactive VL record list */
-        spinlock_t              vl_gylock;      /* graveyard lock */
-        unsigned short          vl_naddrs;      /* number of VL servers in addr list */
-        unsigned short          vl_curr_svix;   /* current server index */
-        struct in_addr          vl_addrs[AFS_CELL_MAX_ADDRS];   /* cell VL server addresses */
-        char                    name[0];        /* cell name - must go last */
-};
-extern int afs_cell_init(char *rootcell);
-extern int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell);
-extern int afs_cell_lookup(const char *name, unsigned nmsize, struct afs_cell **_cell);
-#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
-extern struct afs_cell *afs_get_cell_maybe(struct afs_cell **_cell);
-extern void afs_put_cell(struct afs_cell *cell);
-extern void afs_cell_purge(void);
-#endif /* _LINUX_AFS_CELL_H */
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 3d097fddcb7a..6685f4cbccb3 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -1,4 +1,4 @@
-/* cmservice.c: AFS Cache Manager Service
+/* AFS Cache Manager Service
 *
 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
@@ -12,641 +12,463 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sched.h>
-#include <linux/completion.h>
+#include <linux/ip.h>
-#include "server.h"
-#include "cell.h"
-#include "transport.h"
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include "cmservice.h"
 #include "internal.h"
+#include "afs_cm.h"
-static unsigned afscm_usage;            /* AFS cache manager usage count */
+struct workqueue_struct *afs_cm_workqueue;
-static struct rw_semaphore afscm_sem;   /* AFS cache manager start/stop semaphore */
-static int afscm_new_call(struct rxrpc_call *call);
-static void afscm_attention(struct rxrpc_call *call);
-static void afscm_error(struct rxrpc_call *call);
-static void afscm_aemap(struct rxrpc_call *call);
-static void _SRXAFSCM_CallBack(struct rxrpc_call *call);
-static void _SRXAFSCM_InitCallBackState(struct rxrpc_call *call);
-static void _SRXAFSCM_Probe(struct rxrpc_call *call);
-typedef void (*_SRXAFSCM_xxxx_t)(struct rxrpc_call *call);
-static const struct rxrpc_operation AFSCM_ops[] = {
-        {
-                .id     = 204,
-                .asize  = RXRPC_APP_MARK_EOF,
-                .name   = "CallBack",
-                .user   = _SRXAFSCM_CallBack,
-        },
-        {
-                .id     = 205,
-                .asize  = RXRPC_APP_MARK_EOF,
-                .name   = "InitCallBackState",
-                .user   = _SRXAFSCM_InitCallBackState,
-        },
-        {
-                .id     = 206,
-                .asize  = RXRPC_APP_MARK_EOF,
-                .name   = "Probe",
-                .user   = _SRXAFSCM_Probe,
-        },
-#if 0
-        {
-                .id     = 207,
-                .asize  = RXRPC_APP_MARK_EOF,
-                .name   = "GetLock",
-                .user   = _SRXAFSCM_GetLock,
-        },
-        {
-                .id     = 208,
-                .asize  = RXRPC_APP_MARK_EOF,
-                .name   = "GetCE",
-                .user   = _SRXAFSCM_GetCE,
-        },
-        {
-                .id     = 209,
-                .asize  = RXRPC_APP_MARK_EOF,
-                .name   = "GetXStatsVersion",
-                .user   = _SRXAFSCM_GetXStatsVersion,
-        },
-        {
-                .id     = 210,
-                .asize  = RXRPC_APP_MARK_EOF,
-                .name   = "GetXStats",
-                .user   = _SRXAFSCM_GetXStats,
-        }
-#endif
-};
-static struct rxrpc_service AFSCM_service = {
+static int afs_deliver_cb_init_call_back_state(struct afs_call *,
-        .name           = "AFS/CM",
+                                               struct sk_buff *, bool);
-        .owner          = THIS_MODULE,
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *,
-        .link           = LIST_HEAD_INIT(AFSCM_service.link),
+                                                struct sk_buff *, bool);
-        .new_call       = afscm_new_call,
+static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
-        .service_id     = 1,
+static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
-        .attn_func      = afscm_attention,
+static int afs_deliver_cb_get_capabilities(struct afs_call *, struct sk_buff *,
-        .error_func     = afscm_error,
+                                           bool);
-        .aemap_func     = afscm_aemap,
+static void afs_cm_destructor(struct afs_call *);
-        .ops_begin      = &AFSCM_ops[0],
-        .ops_end        = &AFSCM_ops[ARRAY_SIZE(AFSCM_ops)],
-};
-static DECLARE_COMPLETION(kafscmd_alive);
-static DECLARE_COMPLETION(kafscmd_dead);
-static DECLARE_WAIT_QUEUE_HEAD(kafscmd_sleepq);
-static LIST_HEAD(kafscmd_attention_list);
-static LIST_HEAD(afscm_calls);
-static DEFINE_SPINLOCK(afscm_calls_lock);
-static DEFINE_SPINLOCK(kafscmd_attention_lock);
-static int kafscmd_die;
-/*****************************************************************************/
 /*
- * AFS Cache Manager kernel thread
+ * CB.CallBack operation type
 */
-static int kafscmd(void *arg)
+static const struct afs_call_type afs_SRXCBCallBack = {
-{
+        .name           = "CB.CallBack",
-        DECLARE_WAITQUEUE(myself, current);
+        .deliver        = afs_deliver_cb_callback,
+        .abort_to_error = afs_abort_to_error,
-        struct rxrpc_call *call;
+        .destructor     = afs_cm_destructor,
-        _SRXAFSCM_xxxx_t func;
+};
-        int die;
-        printk(KERN_INFO "kAFS: Started kafscmd %d\n", current->pid);
-        daemonize("kafscmd");
-        complete(&kafscmd_alive);
-        /* loop around looking for things to attend to */
-        do {
-                if (list_empty(&kafscmd_attention_list)) {
-                        set_current_state(TASK_INTERRUPTIBLE);
-                        add_wait_queue(&kafscmd_sleepq, &myself);
-                        for (;;) {
-                                set_current_state(TASK_INTERRUPTIBLE);
-                                if (!list_empty(&kafscmd_attention_list) ||
-                                    signal_pending(current) ||
-                                    kafscmd_die)
-                                        break;
-                                schedule();
-                        }
-                        remove_wait_queue(&kafscmd_sleepq, &myself);
-                        set_current_state(TASK_RUNNING);
-                }
-                die = kafscmd_die;
-                /* dequeue the next call requiring attention */
-                call = NULL;
-                spin_lock(&kafscmd_attention_lock);
-                if (!list_empty(&kafscmd_attention_list)) {
-                        call = list_entry(kafscmd_attention_list.next,
-                                          struct rxrpc_call,
-                                          app_attn_link);
-                        list_del_init(&call->app_attn_link);
-                        die = 0;
-                }
-                spin_unlock(&kafscmd_attention_lock);
-                if (call) {
-                        /* act upon it */
-                        _debug("@@@ Begin Attend Call %p", call);
-                        func = call->app_user;
-                        if (func)
-                                func(call);
-                        rxrpc_put_call(call);
-                        _debug("@@@ End Attend Call %p", call);
-                }
-        } while(!die);
-        /* and that's all */
-        complete_and_exit(&kafscmd_dead, 0);
-} /* end kafscmd() */
-/*****************************************************************************/
 /*
- * handle a call coming in to the cache manager
+ * CB.InitCallBackState operation type
- * - if I want to keep the call, I must increment its usage count
- * - the return value will be negated and passed back in an abort packet if
- *   non-zero
- * - serialised by virtue of there only being one krxiod
 */
-static int afscm_new_call(struct rxrpc_call *call)
+static const struct afs_call_type afs_SRXCBInitCallBackState = {
-{
+        .name           = "CB.InitCallBackState",
-        _enter("%p{cid=%u u=%d}",
+        .deliver        = afs_deliver_cb_init_call_back_state,
-               call, ntohl(call->call_id), atomic_read(&call->usage));
+        .abort_to_error = afs_abort_to_error,
+        .destructor     = afs_cm_destructor,
-        rxrpc_get_call(call);
+};
-        /* add to my current call list */
-        spin_lock(&afscm_calls_lock);
-        list_add(&call->app_link,&afscm_calls);
-        spin_unlock(&afscm_calls_lock);
-        _leave(" = 0");
-        return 0;
-} /* end afscm_new_call() */
-/*****************************************************************************/
 /*
- * queue on the kafscmd queue for attention
+ * CB.InitCallBackState3 operation type
 */
-static void afscm_attention(struct rxrpc_call *call)
+static const struct afs_call_type afs_SRXCBInitCallBackState3 = {
-{
+        .name           = "CB.InitCallBackState3",
-        _enter("%p{cid=%u u=%d}",
+        .deliver        = afs_deliver_cb_init_call_back_state3,
-               call, ntohl(call->call_id), atomic_read(&call->usage));
+        .abort_to_error = afs_abort_to_error,
+        .destructor     = afs_cm_destructor,
-        spin_lock(&kafscmd_attention_lock);
+};
-        if (list_empty(&call->app_attn_link)) {
-                list_add_tail(&call->app_attn_link, &kafscmd_attention_list);
-                rxrpc_get_call(call);
-        }
-        spin_unlock(&kafscmd_attention_lock);
-        wake_up(&kafscmd_sleepq);
-        _leave(" {u=%d}", atomic_read(&call->usage));
-} /* end afscm_attention() */
-/*****************************************************************************/
 /*
- * handle my call being aborted
+ * CB.Probe operation type
- * - clean up, dequeue and put my ref to the call
 */
-static void afscm_error(struct rxrpc_call *call)
+static const struct afs_call_type afs_SRXCBProbe = {
-{
+        .name           = "CB.Probe",
-        int removed;
+        .deliver        = afs_deliver_cb_probe,
+        .abort_to_error = afs_abort_to_error,
-        _enter("%p{est=%s ac=%u er=%d}",
+        .destructor     = afs_cm_destructor,
-               call,
+};
-               rxrpc_call_error_states[call->app_err_state],
-               call->app_abort_code,
-               call->app_errno);
-        spin_lock(&kafscmd_attention_lock);
-        if (list_empty(&call->app_attn_link)) {
-                list_add_tail(&call->app_attn_link, &kafscmd_attention_list);
-                rxrpc_get_call(call);
-        }
-        spin_unlock(&kafscmd_attention_lock);
-        removed = 0;
-        spin_lock(&afscm_calls_lock);
-        if (!list_empty(&call->app_link)) {
-                list_del_init(&call->app_link);
-                removed = 1;
-        }
-        spin_unlock(&afscm_calls_lock);
-        if (removed)
-                rxrpc_put_call(call);
-        wake_up(&kafscmd_sleepq);
-        _leave("");
+/*
-} /* end afscm_error() */
+ * CB.GetCapabilities operation type
+ */
+static const struct afs_call_type afs_SRXCBGetCapabilites = {
+        .name           = "CB.GetCapabilities",
+        .deliver        = afs_deliver_cb_get_capabilities,
+        .abort_to_error = afs_abort_to_error,
+        .destructor     = afs_cm_destructor,
+};
-/*****************************************************************************/
 /*
- * map afs abort codes to/from Linux error codes
+ * route an incoming cache manager call
- * - called with call->lock held
+ * - return T if supported, F if not
 */
-static void afscm_aemap(struct rxrpc_call *call)
+bool afs_cm_incoming_call(struct afs_call *call)
 {
-        switch (call->app_err_state) {
+        u32 operation_id = ntohl(call->operation_ID);
-        case RXRPC_ESTATE_LOCAL_ABORT:
-                call->app_abort_code = -call->app_errno;
+        _enter("{CB.OP %u}", operation_id);
-                break;
-        case RXRPC_ESTATE_PEER_ABORT:
+        switch (operation_id) {
-                call->app_errno = -ECONNABORTED;
+        case CBCallBack:
-                break;
+                call->type = &afs_SRXCBCallBack;
+                return true;
+        case CBInitCallBackState:
+                call->type = &afs_SRXCBInitCallBackState;
+                return true;
+        case CBInitCallBackState3:
+                call->type = &afs_SRXCBInitCallBackState3;
+                return true;
+        case CBProbe:
+                call->type = &afs_SRXCBProbe;
+                return true;
+        case CBGetCapabilities:
+                call->type = &afs_SRXCBGetCapabilites;
+                return true;
        default:
-                break;
+                return false;
        }
-} /* end afscm_aemap() */
+}
-/*****************************************************************************/
 /*
- * start the cache manager service if not already started
+ * clean up a cache manager call
 */
-int afscm_start(void)
+static void afs_cm_destructor(struct afs_call *call)
 {
-        int ret;
+        _enter("");
-        down_write(&afscm_sem);
-        if (!afscm_usage) {
-                ret = kernel_thread(kafscmd, NULL, 0);
-                if (ret < 0)
-                        goto out;
-                wait_for_completion(&kafscmd_alive);
-                ret = rxrpc_add_service(afs_transport, &AFSCM_service);
-                if (ret < 0)
-                        goto kill;
-                afs_kafstimod_add_timer(&afs_mntpt_expiry_timer,
-                                        afs_mntpt_expiry_timeout * HZ);
-        }
-        afscm_usage++;
-        up_write(&afscm_sem);
-        return 0;
- kill:
-        kafscmd_die = 1;
-        wake_up(&kafscmd_sleepq);
-        wait_for_completion(&kafscmd_dead);
- out:
-        up_write(&afscm_sem);
-        return ret;
-} /* end afscm_start() */
+        afs_put_server(call->server);
+        call->server = NULL;
+        kfree(call->buffer);
+        call->buffer = NULL;
+}
-/*****************************************************************************/
 /*
- * stop the cache manager service
+ * allow the fileserver to see if the cache manager is still alive
 */
-void afscm_stop(void)
+static void SRXAFSCB_CallBack(struct work_struct *work)
 {
-        struct rxrpc_call *call;
+        struct afs_call *call = container_of(work, struct afs_call, work);
-        down_write(&afscm_sem);
+        _enter("");
-        BUG_ON(afscm_usage == 0);
+        /* be sure to send the reply *before* attempting to spam the AFS server
-        afscm_usage--;
+         * with FSFetchStatus requests on the vnodes with broken callbacks lest
+         * the AFS server get into a vicious cycle of trying to break further
+         * callbacks because it hadn't received completion of the CBCallBack op
+         * yet */
+        afs_send_empty_reply(call);
-        if (afscm_usage == 0) {
+        afs_break_callbacks(call->server, call->count, call->request);
-                /* don't want more incoming calls */
+        _leave("");
-                rxrpc_del_service(afs_transport, &AFSCM_service);
+}
-                /* abort any calls I've still got open (the afscm_error() will
-                 * dequeue them) */
-                spin_lock(&afscm_calls_lock);
-                while (!list_empty(&afscm_calls)) {
-                        call = list_entry(afscm_calls.next,
-                                          struct rxrpc_call,
-                                          app_link);
-                        list_del_init(&call->app_link);
+/*
-                        rxrpc_get_call(call);
+ * deliver request data to a CB.CallBack call
-                        spin_unlock(&afscm_calls_lock);
+ */
+static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
+                                   bool last)
+{
+        struct afs_callback *cb;
+        struct afs_server *server;
+        struct in_addr addr;
+        __be32 *bp;
+        u32 tmp;
+        int ret, loop;
+        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+        switch (call->unmarshall) {
+        case 0:
+                call->offset = 0;
+                call->unmarshall++;
+                /* extract the FID array and its count in two steps */
+        case 1:
+                _debug("extract FID count");
+                ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+                switch (ret) {
+                case 0:         break;
+                case -EAGAIN:   return 0;
+                default:        return ret;
+                }
-                        rxrpc_call_abort(call, -ESRCH); /* abort, dequeue and
+                call->count = ntohl(call->tmp);
-                                                         * put */
+                _debug("FID count: %u", call->count);
+                if (call->count > AFSCBMAX)
+                        return -EBADMSG;
+                call->buffer = kmalloc(call->count * 3 * 4, GFP_KERNEL);
+                if (!call->buffer)
+                        return -ENOMEM;
+                call->offset = 0;
+                call->unmarshall++;
+        case 2:
+                _debug("extract FID array");
+                ret = afs_extract_data(call, skb, last, call->buffer,
+                                       call->count * 3 * 4);
+                switch (ret) {
+                case 0:         break;
+                case -EAGAIN:   return 0;
+                default:        return ret;
+                }
-                        _debug("nuking active call %08x.%d",
+                _debug("unmarshall FID array");
-                               ntohl(call->conn->conn_id),
+                call->request = kcalloc(call->count,
-                               ntohl(call->call_id));
+                                        sizeof(struct afs_callback),
-                        rxrpc_put_call(call);
+                                        GFP_KERNEL);
-                        rxrpc_put_call(call);
+                if (!call->request)
+                        return -ENOMEM;
+                cb = call->request;
+                bp = call->buffer;
+                for (loop = call->count; loop > 0; loop--, cb++) {
+                        cb->fid.vid     = ntohl(*bp++);
+                        cb->fid.vnode   = ntohl(*bp++);
+                        cb->fid.unique  = ntohl(*bp++);
+                        cb->type        = AFSCM_CB_UNTYPED;
+                }
-                        spin_lock(&afscm_calls_lock);
+                call->offset = 0;
+                call->unmarshall++;
+                /* extract the callback array and its count in two steps */
+        case 3:
+                _debug("extract CB count");
+                ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+                switch (ret) {
+                case 0:         break;
+                case -EAGAIN:   return 0;
+                default:        return ret;
                }
-                spin_unlock(&afscm_calls_lock);
-                /* get rid of my daemon */
+                tmp = ntohl(call->tmp);
-                kafscmd_die = 1;
+                _debug("CB count: %u", tmp);
-                wake_up(&kafscmd_sleepq);
+                if (tmp != call->count && tmp != 0)
-                wait_for_completion(&kafscmd_dead);
+                        return -EBADMSG;
+                call->offset = 0;
+                call->unmarshall++;
+                if (tmp == 0)
+                        goto empty_cb_array;
+        case 4:
+                _debug("extract CB array");
+                ret = afs_extract_data(call, skb, last, call->request,
+                                       call->count * 3 * 4);
+                switch (ret) {
+                case 0:         break;
+                case -EAGAIN:   return 0;
+                default:        return ret;
+                }
-                /* dispose of any calls waiting for attention */
+                _debug("unmarshall CB array");
-                spin_lock(&kafscmd_attention_lock);
+                cb = call->request;
-                while (!list_empty(&kafscmd_attention_list)) {
+                bp = call->buffer;
-                        call = list_entry(kafscmd_attention_list.next,
+                for (loop = call->count; loop > 0; loop--, cb++) {
-                                          struct rxrpc_call,
+                        cb->version     = ntohl(*bp++);
-                                          app_attn_link);
+                        cb->expiry      = ntohl(*bp++);
+                        cb->type        = ntohl(*bp++);
+                }
-                        list_del_init(&call->app_attn_link);
+        empty_cb_array:
-                        spin_unlock(&kafscmd_attention_lock);
+                call->offset = 0;
+                call->unmarshall++;
-                        rxrpc_put_call(call);
+        case 5:
+                _debug("trailer");
+                if (skb->len != 0)
+                        return -EBADMSG;
+                break;
+        }
-                        spin_lock(&kafscmd_attention_lock);
+        if (!last)
-                }
+                return 0;
-                spin_unlock(&kafscmd_attention_lock);
-                afs_kafstimod_del_timer(&afs_mntpt_expiry_timer);
+        call->state = AFS_CALL_REPLYING;
-        }
-        up_write(&afscm_sem);
+        /* we'll need the file server record as that tells us which set of
+         * vnodes to operate upon */
+        memcpy(&addr, &ip_hdr(skb)->saddr, 4);
+        server = afs_find_server(&addr);
+        if (!server)
+                return -ENOTCONN;
+        call->server = server;
-} /* end afscm_stop() */
+        INIT_WORK(&call->work, SRXAFSCB_CallBack);
+        schedule_work(&call->work);
+        return 0;
+}
-/*****************************************************************************/
 /*
- * handle the fileserver breaking a set of callbacks
+ * allow the fileserver to request callback state (re-)initialisation
 */
-static void _SRXAFSCM_CallBack(struct rxrpc_call *call)
+static void SRXAFSCB_InitCallBackState(struct work_struct *work)
 {
-        struct afs_server *server;
+        struct afs_call *call = container_of(work, struct afs_call, work);
-        size_t count, qty, tmp;
-        int ret = 0, removed;
-        _enter("%p{acs=%s}", call, rxrpc_call_states[call->app_call_state]);
-        server = afs_server_get_from_peer(call->conn->peer);
-        switch (call->app_call_state) {
-                /* we've received the last packet
-                 * - drain all the data from the call and send the reply
-                 */
-        case RXRPC_CSTATE_SRVR_GOT_ARGS:
-                ret = -EBADMSG;
-                qty = call->app_ready_qty;
-                if (qty < 8 || qty > 50 * (6 * 4) + 8)
-                        break;
-                {
-                        struct afs_callback *cb, *pcb;
-                        int loop;
-                        __be32 *fp, *bp;
-                        fp = rxrpc_call_alloc_scratch(call, qty);
-                        /* drag the entire argument block out to the scratch
-                         * space */
-                        ret = rxrpc_call_read_data(call, fp, qty, 0);
-                        if (ret < 0)
-                                break;
-                        /* and unmarshall the parameter block */
-                        ret = -EBADMSG;
-                        count = ntohl(*fp++);
-                        if (count>AFSCBMAX ||
-                            (count * (3 * 4) + 8 != qty &&
-                             count * (6 * 4) + 8 != qty))
-                                break;
-                        bp = fp + count*3;
-                        tmp = ntohl(*bp++);
-                        if (tmp > 0 && tmp != count)
-                                break;
-                        if (tmp == 0)
-                                bp = NULL;
-                        pcb = cb = rxrpc_call_alloc_scratch_s(
-                                call, struct afs_callback);
-                        for (loop = count - 1; loop >= 0; loop--) {
-                                pcb->fid.vid    = ntohl(*fp++);
-                                pcb->fid.vnode  = ntohl(*fp++);
-                                pcb->fid.unique = ntohl(*fp++);
-                                if (bp) {
-                                        pcb->version    = ntohl(*bp++);
-                                        pcb->expiry     = ntohl(*bp++);
-                                        pcb->type       = ntohl(*bp++);
-                                }
-                                else {
-                                        pcb->version    = 0;
-                                        pcb->expiry     = 0;
-                                        pcb->type       = AFSCM_CB_UNTYPED;
-                                }
-                                pcb++;
-                        }
-                        /* invoke the actual service routine */
-                        ret = SRXAFSCM_CallBack(server, count, cb);
-                        if (ret < 0)
-                                break;
-                }
-                /* send the reply */
+        _enter("{%p}", call->server);
-                ret = rxrpc_call_write_data(call, 0, NULL, RXRPC_LAST_PACKET,
-                                            GFP_KERNEL, 0, &count);
-                if (ret < 0)
-                        break;
-                break;
-                /* operation complete */
-        case RXRPC_CSTATE_COMPLETE:
-                call->app_user = NULL;
-                removed = 0;
-                spin_lock(&afscm_calls_lock);
-                if (!list_empty(&call->app_link)) {
-                        list_del_init(&call->app_link);
-                        removed = 1;
-                }
-                spin_unlock(&afscm_calls_lock);
-                if (removed)
+        afs_init_callback_state(call->server);
-                        rxrpc_put_call(call);
+        afs_send_empty_reply(call);
-                break;
+        _leave("");
+}
-                /* operation terminated on error */
+/*
-        case RXRPC_CSTATE_ERROR:
+ * deliver request data to a CB.InitCallBackState call
-                call->app_user = NULL;
+ */
-                break;
+static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
+                                               struct sk_buff *skb,
+                                               bool last)
+{
+        struct afs_server *server;
+        struct in_addr addr;
-        default:
+        _enter(",{%u},%d", skb->len, last);
-                break;
-        }
-        if (ret < 0)
+        if (skb->len > 0)
-                rxrpc_call_abort(call, ret);
+                return -EBADMSG;
+        if (!last)
+                return 0;
-        afs_put_server(server);
+        /* no unmarshalling required */
+        call->state = AFS_CALL_REPLYING;
-        _leave(" = %d", ret);
+        /* we'll need the file server record as that tells us which set of
+         * vnodes to operate upon */
+        memcpy(&addr, &ip_hdr(skb)->saddr, 4);
+        server = afs_find_server(&addr);
+        if (!server)
+                return -ENOTCONN;
+        call->server = server;
-} /* end _SRXAFSCM_CallBack() */
+        INIT_WORK(&call->work, SRXAFSCB_InitCallBackState);
+        schedule_work(&call->work);
+        return 0;
+}
-/*****************************************************************************/
 /*
- * handle the fileserver asking us to initialise our callback state
+ * deliver request data to a CB.InitCallBackState3 call
 */
-static void _SRXAFSCM_InitCallBackState(struct rxrpc_call *call)
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
+                                                struct sk_buff *skb,
+                                                bool last)
 {
        struct afs_server *server;
-        size_t count;
+        struct in_addr addr;
-        int ret = 0, removed;
-        _enter("%p{acs=%s}", call, rxrpc_call_states[call->app_call_state]);
+        _enter(",{%u},%d", skb->len, last);
-        server = afs_server_get_from_peer(call->conn->peer);
+        if (!last)
+                return 0;
-        switch (call->app_call_state) {
+        /* no unmarshalling required */
-                /* we've received the last packet - drain all the data from the
+        call->state = AFS_CALL_REPLYING;
-                 * call */
-        case RXRPC_CSTATE_SRVR_GOT_ARGS:
-                /* shouldn't be any args */
-                ret = -EBADMSG;
-                break;
-                /* send the reply when asked for it */
-        case RXRPC_CSTATE_SRVR_SND_REPLY:
-                /* invoke the actual service routine */
-                ret = SRXAFSCM_InitCallBackState(server);
-                if (ret < 0)
-                        break;
-                ret = rxrpc_call_write_data(call, 0, NULL, RXRPC_LAST_PACKET,
-                                            GFP_KERNEL, 0, &count);
-                if (ret < 0)
-                        break;
-                break;
-                /* operation complete */
+        /* we'll need the file server record as that tells us which set of
-        case RXRPC_CSTATE_COMPLETE:
+         * vnodes to operate upon */
-                call->app_user = NULL;
+        memcpy(&addr, &ip_hdr(skb)->saddr, 4);
-                removed = 0;
+        server = afs_find_server(&addr);
-                spin_lock(&afscm_calls_lock);
+        if (!server)
-                if (!list_empty(&call->app_link)) {
+                return -ENOTCONN;
-                        list_del_init(&call->app_link);
+        call->server = server;
-                        removed = 1;
-                }
-                spin_unlock(&afscm_calls_lock);
-                if (removed)
+        INIT_WORK(&call->work, SRXAFSCB_InitCallBackState);
-                        rxrpc_put_call(call);
+        schedule_work(&call->work);
-                break;
+        return 0;
+}
-                /* operation terminated on error */
-        case RXRPC_CSTATE_ERROR:
-                call->app_user = NULL;
-                break;
-        default:
-                break;
-        }
-        if (ret < 0)
-                rxrpc_call_abort(call, ret);
-        afs_put_server(server);
-        _leave(" = %d", ret);
+/*
+ * allow the fileserver to see if the cache manager is still alive
+ */
+static void SRXAFSCB_Probe(struct work_struct *work)
+{
+        struct afs_call *call = container_of(work, struct afs_call, work);
-} /* end _SRXAFSCM_InitCallBackState() */
+        _enter("");
+        afs_send_empty_reply(call);
+        _leave("");
+}
-/*****************************************************************************/
 /*
- * handle a probe from a fileserver
+ * deliver request data to a CB.Probe call
 */
-static void _SRXAFSCM_Probe(struct rxrpc_call *call)
+static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
+                                bool last)
 {
-        struct afs_server *server;
+        _enter(",{%u},%d", skb->len, last);
-        size_t count;
-        int ret = 0, removed;
-        _enter("%p{acs=%s}", call, rxrpc_call_states[call->app_call_state]);
-        server = afs_server_get_from_peer(call->conn->peer);
+        if (skb->len > 0)
+                return -EBADMSG;
+        if (!last)
+                return 0;
-        switch (call->app_call_state) {
+        /* no unmarshalling required */
-                /* we've received the last packet - drain all the data from the
+        call->state = AFS_CALL_REPLYING;
-                 * call */
-        case RXRPC_CSTATE_SRVR_GOT_ARGS:
-                /* shouldn't be any args */
-                ret = -EBADMSG;
-                break;
-                /* send the reply when asked for it */
+        INIT_WORK(&call->work, SRXAFSCB_Probe);
-        case RXRPC_CSTATE_SRVR_SND_REPLY:
+        schedule_work(&call->work);
-                /* invoke the actual service routine */
+        return 0;
-                ret = SRXAFSCM_Probe(server);
+}
-                if (ret < 0)
-                        break;
-                ret = rxrpc_call_write_data(call, 0, NULL, RXRPC_LAST_PACKET,
-                                            GFP_KERNEL, 0, &count);
-                if (ret < 0)
-                        break;
-                break;
-                /* operation complete */
+/*
-        case RXRPC_CSTATE_COMPLETE:
+ * allow the fileserver to ask about the cache manager's capabilities
-                call->app_user = NULL;
+ */
-                removed = 0;
+static void SRXAFSCB_GetCapabilities(struct work_struct *work)
-                spin_lock(&afscm_calls_lock);
+{
-                if (!list_empty(&call->app_link)) {
+        struct afs_interface *ifs;
-                        list_del_init(&call->app_link);
+        struct afs_call *call = container_of(work, struct afs_call, work);
-                        removed = 1;
+        int loop, nifs;
+        struct {
+                struct /* InterfaceAddr */ {
+                        __be32 nifs;
+                        __be32 uuid[11];
+                        __be32 ifaddr[32];
+                        __be32 netmask[32];
+                        __be32 mtu[32];
+                } ia;
+                struct /* Capabilities */ {
+                        __be32 capcount;
+                        __be32 caps[1];
+                } cap;
+        } reply;
+        _enter("");
+        nifs = 0;
+        ifs = kcalloc(32, sizeof(*ifs), GFP_KERNEL);
+        if (ifs) {
+                nifs = afs_get_ipv4_interfaces(ifs, 32, false);
+                if (nifs < 0) {
+                        kfree(ifs);
+                        ifs = NULL;
+                        nifs = 0;
                }
-                spin_unlock(&afscm_calls_lock);
+        }
-                if (removed)
+        memset(&reply, 0, sizeof(reply));
-                        rxrpc_put_call(call);
+        reply.ia.nifs = htonl(nifs);
-                break;
+        reply.ia.uuid[0] = htonl(afs_uuid.time_low);
+        reply.ia.uuid[1] = htonl(afs_uuid.time_mid);
+        reply.ia.uuid[2] = htonl(afs_uuid.time_hi_and_version);
+        reply.ia.uuid[3] = htonl((s8) afs_uuid.clock_seq_hi_and_reserved);
+        reply.ia.uuid[4] = htonl((s8) afs_uuid.clock_seq_low);
+        for (loop = 0; loop < 6; loop++)
+                reply.ia.uuid[loop + 5] = htonl((s8) afs_uuid.node[loop]);
+        if (ifs) {
+                for (loop = 0; loop < nifs; loop++) {
+                        reply.ia.ifaddr[loop] = ifs[loop].address.s_addr;
+                        reply.ia.netmask[loop] = ifs[loop].netmask.s_addr;
+                        reply.ia.mtu[loop] = htonl(ifs[loop].mtu);
+                }
+        }
-                /* operation terminated on error */
+        reply.cap.capcount = htonl(1);
-        case RXRPC_CSTATE_ERROR:
+        reply.cap.caps[0] = htonl(AFS_CAP_ERROR_TRANSLATION);
-                call->app_user = NULL;
+        afs_send_simple_reply(call, &reply, sizeof(reply));
-                break;
-        default:
+        _leave("");
-                break;
+}
-        }
-        if (ret < 0)
+/*
-                rxrpc_call_abort(call, ret);
+ * deliver request data to a CB.GetCapabilities call
+ */
+static int afs_deliver_cb_get_capabilities(struct afs_call *call,
+                                           struct sk_buff *skb, bool last)
+{
+        _enter(",{%u},%d", skb->len, last);
-        afs_put_server(server);
+        if (skb->len > 0)
+                return -EBADMSG;
+        if (!last)
+                return 0;
-        _leave(" = %d", ret);
+        /* no unmarshalling required */
+        call->state = AFS_CALL_REPLYING;
-} /* end _SRXAFSCM_Probe() */
+        INIT_WORK(&call->work, SRXAFSCB_GetCapabilities);
+        schedule_work(&call->work);
+        return 0;
+}
diff --git a/fs/afs/cmservice.h b/fs/afs/cmservice.h
deleted file mode 100644
index af8d4d689cb2..000000000000
--- a/fs/afs/cmservice.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* cmservice.h: AFS Cache Manager Service declarations
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _LINUX_AFS_CMSERVICE_H
-#define _LINUX_AFS_CMSERVICE_H
-#include <rxrpc/transport.h>
-#include "types.h"
-/* cache manager start/stop */
-extern int afscm_start(void);
-extern void afscm_stop(void);
-/* cache manager server functions */
-extern int SRXAFSCM_InitCallBackState(struct afs_server *server);
-extern int SRXAFSCM_CallBack(struct afs_server *server,
-                             size_t count,
-                             struct afs_callback callbacks[]);
-extern int SRXAFSCM_Probe(struct afs_server *server);
-#endif /* _LINUX_AFS_CMSERVICE_H */
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index b6dc2ebe47a8..dac5b990c0cd 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -15,45 +15,53 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
+#include <linux/ctype.h>
-#include "vnode.h"
-#include "volume.h"
-#include <rxrpc/call.h>
-#include "super.h"
 #include "internal.h"
-static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
+static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
-                                     struct nameidata *nd);
+                                 struct nameidata *nd);
 static int afs_dir_open(struct inode *inode, struct file *file);
-static int afs_dir_readdir(struct file *file, void *dirent, filldir_t filldir);
+static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
 static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd);
 static int afs_d_delete(struct dentry *dentry);
-static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
+static void afs_d_release(struct dentry *dentry);
+static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
                                  loff_t fpos, u64 ino, unsigned dtype);
+static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
+                      struct nameidata *nd);
+static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+static int afs_rmdir(struct inode *dir, struct dentry *dentry);
+static int afs_unlink(struct inode *dir, struct dentry *dentry);
+static int afs_link(struct dentry *from, struct inode *dir,
+                    struct dentry *dentry);
+static int afs_symlink(struct inode *dir, struct dentry *dentry,
+                       const char *content);
+static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
+                      struct inode *new_dir, struct dentry *new_dentry);
 const struct file_operations afs_dir_file_operations = {
        .open           = afs_dir_open,
-        .readdir        = afs_dir_readdir,
+        .release        = afs_release,
+        .readdir        = afs_readdir,
 };
 const struct inode_operations afs_dir_inode_operations = {
-        .lookup         = afs_dir_lookup,
+        .create         = afs_create,
+        .lookup         = afs_lookup,
+        .link           = afs_link,
+        .unlink         = afs_unlink,
+        .symlink        = afs_symlink,
+        .mkdir          = afs_mkdir,
+        .rmdir          = afs_rmdir,
+        .rename         = afs_rename,
+        .permission     = afs_permission,
        .getattr        = afs_inode_getattr,
-#if 0 /* TODO */
-        .create         = afs_dir_create,
-        .link           = afs_dir_link,
-        .unlink         = afs_dir_unlink,
-        .symlink        = afs_dir_symlink,
-        .mkdir          = afs_dir_mkdir,
-        .rmdir          = afs_dir_rmdir,
-        .mknod          = afs_dir_mknod,
-        .rename         = afs_dir_rename,
-#endif
 };
 static struct dentry_operations afs_fs_dentry_operations = {
        .d_revalidate   = afs_d_revalidate,
        .d_delete       = afs_d_delete,
+        .d_release      = afs_d_release,
 };
 #define AFS_DIR_HASHTBL_SIZE    128
@@ -105,14 +113,13 @@ struct afs_dir_page {
        union afs_dir_block blocks[PAGE_SIZE / sizeof(union afs_dir_block)];
 };
-struct afs_dir_lookup_cookie {
+struct afs_lookup_cookie {
        struct afs_fid  fid;
        const char      *name;
        size_t          nlen;
        int             found;
 };
-/*****************************************************************************/
 /*
 * check that a directory page is valid
 */
@@ -128,9 +135,10 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
        if (qty == 0)
                goto error;
-        if (page->index==0 && qty!=ntohs(dbuf->blocks[0].pagehdr.npages)) {
+        if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) {
                printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n",
-                       __FUNCTION__,dir->i_ino,qty,ntohs(dbuf->blocks[0].pagehdr.npages));
+                       __FUNCTION__, dir->i_ino, qty,
+                       ntohs(dbuf->blocks[0].pagehdr.npages));
                goto error;
        }
 #endif
@@ -157,13 +165,11 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
        SetPageChecked(page);
        return;
- error:
+error:
        SetPageChecked(page);
        SetPageError(page);
+}
-} /* end afs_dir_check_page() */
-/*****************************************************************************/
 /*
 * discard a page cached in the pagecache
 */
@@ -171,20 +177,22 @@ static inline void afs_dir_put_page(struct page *page)
 {
        kunmap(page);
        page_cache_release(page);
+}
-} /* end afs_dir_put_page() */
-/*****************************************************************************/
 /*
 * get a page into the pagecache
 */
-static struct page *afs_dir_get_page(struct inode *dir, unsigned long index)
+static struct page *afs_dir_get_page(struct inode *dir, unsigned long index,
+                                     struct key *key)
 {
        struct page *page;
+        struct file file = {
+                .private_data = key,
+        };
        _enter("{%lu},%lu", dir->i_ino, index);
-        page = read_mapping_page(dir->i_mapping, index, NULL);
+        page = read_mapping_page(dir->i_mapping, index, &file);
        if (!IS_ERR(page)) {
                wait_on_page_locked(page);
                kmap(page);
@@ -197,12 +205,12 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index)
        }
        return page;
- fail:
+fail:
        afs_dir_put_page(page);
+        _leave(" = -EIO");
        return ERR_PTR(-EIO);
-} /* end afs_dir_get_page() */
+}
-/*****************************************************************************/
 /*
 * open an AFS directory file
 */
@@ -213,15 +221,12 @@ static int afs_dir_open(struct inode *inode, struct file *file)
        BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
        BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
-        if (AFS_FS_I(inode)->flags & AFS_VNODE_DELETED)
+        if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(inode)->flags))
                return -ENOENT;
-        _leave(" = 0");
+        return afs_open(inode, file);
-        return 0;
+}
-} /* end afs_dir_open() */
-/*****************************************************************************/
 /*
 * deal with one block in an AFS directory
 */
@@ -250,7 +255,7 @@ static int afs_dir_iterate_block(unsigned *fpos,
                /* skip entries marked unused in the bitmap */
                if (!(block->pagehdr.bitmap[offset / 8] &
                      (1 << (offset % 8)))) {
-                        _debug("ENT[%Zu.%u]: unused\n",
+                        _debug("ENT[%Zu.%u]: unused",
                               blkoff / sizeof(union afs_dir_block), offset);
                        if (offset >= curr)
                                *fpos = blkoff +
@@ -264,7 +269,7 @@ static int afs_dir_iterate_block(unsigned *fpos,
                               sizeof(*block) -
                               offset * sizeof(union afs_dirent));
-                _debug("ENT[%Zu.%u]: %s %Zu \"%s\"\n",
+                _debug("ENT[%Zu.%u]: %s %Zu \"%s\"",
                       blkoff / sizeof(union afs_dir_block), offset,
                       (offset < curr ? "skip" : "fill"),
                       nlen, dire->u.name);
@@ -274,7 +279,7 @@ static int afs_dir_iterate_block(unsigned *fpos,
                        if (next >= AFS_DIRENT_PER_BLOCK) {
                                _debug("ENT[%Zu.%u]:"
                                       " %u travelled beyond end dir block"
-                                       " (len %u/%Zu)\n",
+                                       " (len %u/%Zu)",
                                       blkoff / sizeof(union afs_dir_block),
                                       offset, next, tmp, nlen);
                                return -EIO;
@@ -282,13 +287,13 @@ static int afs_dir_iterate_block(unsigned *fpos,
                        if (!(block->pagehdr.bitmap[next / 8] &
                              (1 << (next % 8)))) {
                                _debug("ENT[%Zu.%u]:"
-                                       " %u unmarked extension (len %u/%Zu)\n",
+                                       " %u unmarked extension (len %u/%Zu)",
                                       blkoff / sizeof(union afs_dir_block),
                                       offset, next, tmp, nlen);
                                return -EIO;
                        }
-                        _debug("ENT[%Zu.%u]: ext %u/%Zu\n",
+                        _debug("ENT[%Zu.%u]: ext %u/%Zu",
                               blkoff / sizeof(union afs_dir_block),
                               next, tmp, nlen);
                        next++;
@@ -304,7 +309,7 @@ static int afs_dir_iterate_block(unsigned *fpos,
                              nlen,
                              blkoff + offset * sizeof(union afs_dirent),
                              ntohl(dire->u.vnode),
-                              filldir == afs_dir_lookup_filldir ?
+                              filldir == afs_lookup_filldir ?
                              ntohl(dire->u.unique) : DT_UNKNOWN);
                if (ret < 0) {
                        _leave(" = 0 [full]");
@@ -316,16 +321,15 @@ static int afs_dir_iterate_block(unsigned *fpos,
        _leave(" = 1 [more]");
        return 1;
-} /* end afs_dir_iterate_block() */
+}
-/*****************************************************************************/
 /*
- * read an AFS directory
+ * iterate through the data blob that lists the contents of an AFS directory
 */
 static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie,
-                           filldir_t filldir)
+                           filldir_t filldir, struct key *key)
 {
-        union afs_dir_block     *dblock;
+        union afs_dir_block *dblock;
        struct afs_dir_page *dbuf;
        struct page *page;
        unsigned blkoff, limit;
@@ -333,7 +337,7 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie,
        _enter("{%lu},%u,,", dir->i_ino, *fpos);
-        if (AFS_FS_I(dir)->flags & AFS_VNODE_DELETED) {
+        if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
                _leave(" = -ESTALE");
                return -ESTALE;
        }
@@ -348,7 +352,7 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie,
                blkoff = *fpos & ~(sizeof(union afs_dir_block) - 1);
                /* fetch the appropriate page from the directory */
-                page = afs_dir_get_page(dir, blkoff / PAGE_SIZE);
+                page = afs_dir_get_page(dir, blkoff / PAGE_SIZE, key);
                if (IS_ERR(page)) {
                        ret = PTR_ERR(page);
                        break;
@@ -377,43 +381,50 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie,
                ret = 0;
        }
- out:
+out:
        _leave(" = %d", ret);
        return ret;
-} /* end afs_dir_iterate() */
+}
-/*****************************************************************************/
 /*
 * read an AFS directory
 */
-static int afs_dir_readdir(struct file *file, void *cookie, filldir_t filldir)
+static int afs_readdir(struct file *file, void *cookie, filldir_t filldir)
 {
        unsigned fpos;
        int ret;
-        _enter("{%Ld,{%lu}}", file->f_pos, file->f_path.dentry->d_inode->i_ino);
+        _enter("{%Ld,{%lu}}",
+               file->f_pos, file->f_path.dentry->d_inode->i_ino);
+        ASSERT(file->private_data != NULL);
        fpos = file->f_pos;
-        ret = afs_dir_iterate(file->f_path.dentry->d_inode, &fpos, cookie, filldir);
+        ret = afs_dir_iterate(file->f_path.dentry->d_inode, &fpos,
+                              cookie, filldir, file->private_data);
        file->f_pos = fpos;
        _leave(" = %d", ret);
        return ret;
-} /* end afs_dir_readdir() */
+}
-/*****************************************************************************/
 /*
 * search the directory for a name
 * - if afs_dir_iterate_block() spots this function, it'll pass the FID
 *   uniquifier through dtype
 */
-static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
+static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
-                                  loff_t fpos, u64 ino, unsigned dtype)
+                              loff_t fpos, u64 ino, unsigned dtype)
 {
-        struct afs_dir_lookup_cookie *cookie = _cookie;
+        struct afs_lookup_cookie *cookie = _cookie;
-        _enter("{%s,%Zu},%s,%u,,%lu,%u",
+        _enter("{%s,%Zu},%s,%u,,%llu,%u",
-               cookie->name, cookie->nlen, name, nlen, ino, dtype);
+               cookie->name, cookie->nlen, name, nlen,
+               (unsigned long long) ino, dtype);
+        /* insanity checks first */
+        BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
+        BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
        if (cookie->nlen != nlen || memcmp(cookie->name, name, nlen) != 0) {
                _leave(" = 0 [no]");
@@ -426,216 +437,254 @@ static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
        _leave(" = -1 [found]");
        return -1;
-} /* end afs_dir_lookup_filldir() */
+}
-/*****************************************************************************/
 /*
- * look up an entry in a directory
+ * do a lookup in a directory
+ * - just returns the FID the dentry name maps to if found
 */
-static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
+static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
-                                     struct nameidata *nd)
+                         struct afs_fid *fid, struct key *key)
 {
-        struct afs_dir_lookup_cookie cookie;
+        struct afs_lookup_cookie cookie;
        struct afs_super_info *as;
+        unsigned fpos;
+        int ret;
+        _enter("{%lu},%p{%s},", dir->i_ino, dentry, dentry->d_name.name);
+        as = dir->i_sb->s_fs_info;
+        /* search the directory */
+        cookie.name     = dentry->d_name.name;
+        cookie.nlen     = dentry->d_name.len;
+        cookie.fid.vid  = as->volume->vid;
+        cookie.found    = 0;
+        fpos = 0;
+        ret = afs_dir_iterate(dir, &fpos, &cookie, afs_lookup_filldir,
+                              key);
+        if (ret < 0) {
+                _leave(" = %d [iter]", ret);
+                return ret;
+        }
+        ret = -ENOENT;
+        if (!cookie.found) {
+                _leave(" = -ENOENT [not found]");
+                return -ENOENT;
+        }
+        *fid = cookie.fid;
+        _leave(" = 0 { vn=%u u=%u }", fid->vnode, fid->unique);
+        return 0;
+}
+/*
+ * look up an entry in a directory
+ */
+static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
+                                 struct nameidata *nd)
+{
        struct afs_vnode *vnode;
+        struct afs_fid fid;
        struct inode *inode;
-        unsigned fpos;
+        struct key *key;
        int ret;
-        _enter("{%lu},%p{%s}", dir->i_ino, dentry, dentry->d_name.name);
+        vnode = AFS_FS_I(dir);
-        /* insanity checks first */
+        _enter("{%x:%d},%p{%s},",
-        BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
+               vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name);
-        BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
+        ASSERTCMP(dentry->d_inode, ==, NULL);
        if (dentry->d_name.len > 255) {
                _leave(" = -ENAMETOOLONG");
                return ERR_PTR(-ENAMETOOLONG);
        }
-        vnode = AFS_FS_I(dir);
+        if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
-        if (vnode->flags & AFS_VNODE_DELETED) {
                _leave(" = -ESTALE");
                return ERR_PTR(-ESTALE);
        }
-        as = dir->i_sb->s_fs_info;
+        key = afs_request_key(vnode->volume->cell);
+        if (IS_ERR(key)) {
-        /* search the directory */
+                _leave(" = %ld [key]", PTR_ERR(key));
-        cookie.name     = dentry->d_name.name;
+                return ERR_PTR(PTR_ERR(key));
-        cookie.nlen     = dentry->d_name.len;
+        }
-        cookie.fid.vid  = as->volume->vid;
-        cookie.found    = 0;
-        fpos = 0;
+        ret = afs_validate(vnode, key);
-        ret = afs_dir_iterate(dir, &fpos, &cookie, afs_dir_lookup_filldir);
        if (ret < 0) {
-                _leave(" = %d", ret);
+                key_put(key);
+                _leave(" = %d [val]", ret);
                return ERR_PTR(ret);
        }
-        ret = -ENOENT;
+        ret = afs_do_lookup(dir, dentry, &fid, key);
-        if (!cookie.found) {
+        if (ret < 0) {
-                _leave(" = %d", ret);
+                key_put(key);
+                if (ret == -ENOENT) {
+                        d_add(dentry, NULL);
+                        _leave(" = NULL [negative]");
+                        return NULL;
+                }
+                _leave(" = %d [do]", ret);
                return ERR_PTR(ret);
        }
+        dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version;
        /* instantiate the dentry */
-        ret = afs_iget(dir->i_sb, &cookie.fid, &inode);
+        inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL);
-        if (ret < 0) {
+        key_put(key);
-                _leave(" = %d", ret);
+        if (IS_ERR(inode)) {
-                return ERR_PTR(ret);
+                _leave(" = %ld", PTR_ERR(inode));
+                return ERR_PTR(PTR_ERR(inode));
        }
        dentry->d_op = &afs_fs_dentry_operations;
-        dentry->d_fsdata = (void *) (unsigned long) vnode->status.version;
        d_add(dentry, inode);
        _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%lu }",
-               cookie.fid.vnode,
+               fid.vnode,
-               cookie.fid.unique,
+               fid.unique,
               dentry->d_inode->i_ino,
               dentry->d_inode->i_version);
        return NULL;
-} /* end afs_dir_lookup() */
+}
-/*****************************************************************************/
 /*
 * check that a dentry lookup hit has found a valid entry
 * - NOTE! the hit can be a negative hit too, so we can't assume we have an
 *   inode
- * (derived from nfs_lookup_revalidate)
 */
 static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-        struct afs_dir_lookup_cookie cookie;
+        struct afs_vnode *vnode, *dir;
+        struct afs_fid fid;
        struct dentry *parent;
-        struct inode *inode, *dir;
+        struct key *key;
-        unsigned fpos;
+        void *dir_version;
        int ret;
-        _enter("{sb=%p n=%s},", dentry->d_sb, dentry->d_name.name);
+        vnode = AFS_FS_I(dentry->d_inode);
-        /* lock down the parent dentry so we can peer at it */
+        if (dentry->d_inode)
-        parent = dget_parent(dentry->d_parent);
+                _enter("{v={%x:%u} n=%s fl=%lx},",
+                       vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
+                       vnode->flags);
+        else
+                _enter("{neg n=%s}", dentry->d_name.name);
-        dir = parent->d_inode;
+        key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell);
-        inode = dentry->d_inode;
+        if (IS_ERR(key))
+                key = NULL;
-        /* handle a negative dentry */
+        /* lock down the parent dentry so we can peer at it */
-        if (!inode)
+        parent = dget_parent(dentry);
+        if (!parent->d_inode)
                goto out_bad;
-        /* handle a bad inode */
+        dir = AFS_FS_I(parent->d_inode);
-        if (is_bad_inode(inode)) {
-                printk("kAFS: afs_d_revalidate: %s/%s has bad inode\n",
-                       dentry->d_parent->d_name.name, dentry->d_name.name);
-                goto out_bad;
-        }
-        /* force a full look up if the parent directory changed since last the
+        /* validate the parent directory */
-         * server was consulted
+        if (test_bit(AFS_VNODE_MODIFIED, &dir->flags))
-         * - otherwise this inode must still exist, even if the inode details
+                afs_validate(dir, key);
-         *   themselves have changed
-         */
-        if (AFS_FS_I(dir)->flags & AFS_VNODE_CHANGED)
-                afs_vnode_fetch_status(AFS_FS_I(dir));
-        if (AFS_FS_I(dir)->flags & AFS_VNODE_DELETED) {
+        if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
                _debug("%s: parent dir deleted", dentry->d_name.name);
                goto out_bad;
        }
-        if (AFS_FS_I(inode)->flags & AFS_VNODE_DELETED) {
+        dir_version = (void *) (unsigned long) dir->status.data_version;
-                _debug("%s: file already deleted", dentry->d_name.name);
+        if (dentry->d_fsdata == dir_version)
-                goto out_bad;
+                goto out_valid; /* the dir contents are unchanged */
-        }
-        if ((unsigned long) dentry->d_fsdata !=
-            (unsigned long) AFS_FS_I(dir)->status.version) {
-                _debug("%s: parent changed %lu -> %u",
-                       dentry->d_name.name,
-                       (unsigned long) dentry->d_fsdata,
-                       (unsigned) AFS_FS_I(dir)->status.version);
-                /* search the directory for this vnode */
+        _debug("dir modified");
-                cookie.name     = dentry->d_name.name;
-                cookie.nlen     = dentry->d_name.len;
-                cookie.fid.vid  = AFS_FS_I(inode)->volume->vid;
-                cookie.found    = 0;
-                fpos = 0;
+        /* search the directory for this vnode */
-                ret = afs_dir_iterate(dir, &fpos, &cookie,
+        ret = afs_do_lookup(&dir->vfs_inode, dentry, &fid, key);
-                                      afs_dir_lookup_filldir);
+        switch (ret) {
-                if (ret < 0) {
+        case 0:
-                        _debug("failed to iterate dir %s: %d",
+                /* the filename maps to something */
-                               parent->d_name.name, ret);
+                if (!dentry->d_inode)
+                        goto out_bad;
+                if (is_bad_inode(dentry->d_inode)) {
+                        printk("kAFS: afs_d_revalidate: %s/%s has bad inode\n",
+                               parent->d_name.name, dentry->d_name.name);
                        goto out_bad;
-                }
-                if (!cookie.found) {
-                        _debug("%s: dirent not found", dentry->d_name.name);
-                        goto not_found;
                }
                /* if the vnode ID has changed, then the dirent points to a
                 * different file */
-                if (cookie.fid.vnode != AFS_FS_I(inode)->fid.vnode) {
+                if (fid.vnode != vnode->fid.vnode) {
-                        _debug("%s: dirent changed", dentry->d_name.name);
+                        _debug("%s: dirent changed [%u != %u]",
+                               dentry->d_name.name, fid.vnode,
+                               vnode->fid.vnode);
                        goto not_found;
                }
                /* if the vnode ID uniqifier has changed, then the file has
-                 * been deleted */
+                 * been deleted and replaced, and the original vnode ID has
-                if (cookie.fid.unique != AFS_FS_I(inode)->fid.unique) {
+                 * been reused */
+                if (fid.unique != vnode->fid.unique) {
                        _debug("%s: file deleted (uq %u -> %u I:%lu)",
-                               dentry->d_name.name,
+                               dentry->d_name.name, fid.unique,
-                               cookie.fid.unique,
+                               vnode->fid.unique, dentry->d_inode->i_version);
-                               AFS_FS_I(inode)->fid.unique,
+                        spin_lock(&vnode->lock);
-                               inode->i_version);
+                        set_bit(AFS_VNODE_DELETED, &vnode->flags);
-                        spin_lock(&AFS_FS_I(inode)->lock);
+                        spin_unlock(&vnode->lock);
-                        AFS_FS_I(inode)->flags |= AFS_VNODE_DELETED;
+                        goto not_found;
-                        spin_unlock(&AFS_FS_I(inode)->lock);
-                        invalidate_remote_inode(inode);
-                        goto out_bad;
                }
+                goto out_valid;
+        case -ENOENT:
+                /* the filename is unknown */
+                _debug("%s: dirent not found", dentry->d_name.name);
+                if (dentry->d_inode)
+                        goto not_found;
+                goto out_valid;
-                dentry->d_fsdata =
+        default:
-                        (void *) (unsigned long) AFS_FS_I(dir)->status.version;
+                _debug("failed to iterate dir %s: %d",
+                       parent->d_name.name, ret);
+                goto out_bad;
        }
- out_valid:
+out_valid:
+        dentry->d_fsdata = dir_version;
+out_skip:
        dput(parent);
+        key_put(key);
        _leave(" = 1 [valid]");
        return 1;
        /* the dirent, if it exists, now points to a different vnode */
- not_found:
+not_found:
        spin_lock(&dentry->d_lock);
        dentry->d_flags |= DCACHE_NFSFS_RENAMED;
        spin_unlock(&dentry->d_lock);
- out_bad:
+out_bad:
-        if (inode) {
+        if (dentry->d_inode) {
                /* don't unhash if we have submounts */
                if (have_submounts(dentry))
-                        goto out_valid;
+                        goto out_skip;
        }
-        shrink_dcache_parent(dentry);
        _debug("dropping dentry %s/%s",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+               parent->d_name.name, dentry->d_name.name);
+        shrink_dcache_parent(dentry);
        d_drop(dentry);
        dput(parent);
+        key_put(key);
        _leave(" = 0 [bad]");
        return 0;
-} /* end afs_d_revalidate() */
+}
-/*****************************************************************************/
 /*
 * allow the VFS to enquire as to whether a dentry should be unhashed (mustn't
 * sleep)
@@ -649,15 +698,444 @@ static int afs_d_delete(struct dentry *dentry)
        if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
                goto zap;
-        if (dentry->d_inode) {
+        if (dentry->d_inode &&
-                if (AFS_FS_I(dentry->d_inode)->flags & AFS_VNODE_DELETED)
+            test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dentry->d_inode)->flags))
                        goto zap;
-        }
        _leave(" = 0 [keep]");
        return 0;
- zap:
+zap:
        _leave(" = 1 [zap]");
        return 1;
-} /* end afs_d_delete() */
+}
+/*
+ * handle dentry release
+ */
+static void afs_d_release(struct dentry *dentry)
+{
+        _enter("%s", dentry->d_name.name);
+}
+/*
+ * create a directory on an AFS filesystem
+ */
+static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+        struct afs_file_status status;
+        struct afs_callback cb;
+        struct afs_server *server;
+        struct afs_vnode *dvnode, *vnode;
+        struct afs_fid fid;
+        struct inode *inode;
+        struct key *key;
+        int ret;
+        dvnode = AFS_FS_I(dir);
+        _enter("{%x:%d},{%s},%o",
+               dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
+        ret = -ENAMETOOLONG;
+        if (dentry->d_name.len > 255)
+                goto error;
+        key = afs_request_key(dvnode->volume->cell);
+        if (IS_ERR(key)) {
+                ret = PTR_ERR(key);
+                goto error;
+        }
+        mode |= S_IFDIR;
+        ret = afs_vnode_create(dvnode, key, dentry->d_name.name,
+                               mode, &fid, &status, &cb, &server);
+        if (ret < 0)
+                goto mkdir_error;
+        inode = afs_iget(dir->i_sb, key, &fid, &status, &cb);
+        if (IS_ERR(inode)) {
+                /* ENOMEM at a really inconvenient time - just abandon the new
+                 * directory on the server */
+                ret = PTR_ERR(inode);
+                goto iget_error;
+        }
+        /* apply the status report we've got for the new vnode */
+        vnode = AFS_FS_I(inode);
+        spin_lock(&vnode->lock);
+        vnode->update_cnt++;
+        spin_unlock(&vnode->lock);
+        afs_vnode_finalise_status_update(vnode, server);
+        afs_put_server(server);
+        d_instantiate(dentry, inode);
+        if (d_unhashed(dentry)) {
+                _debug("not hashed");
+                d_rehash(dentry);
+        }
+        key_put(key);
+        _leave(" = 0");
+        return 0;
+iget_error:
+        afs_put_server(server);
+mkdir_error:
+        key_put(key);
+error:
+        d_drop(dentry);
+        _leave(" = %d", ret);
+        return ret;
+}
+/*
+ * remove a directory from an AFS filesystem
+ */
+static int afs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+        struct afs_vnode *dvnode, *vnode;
+        struct key *key;
+        int ret;
+        dvnode = AFS_FS_I(dir);
+        _enter("{%x:%d},{%s}",
+               dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
+        ret = -ENAMETOOLONG;
+        if (dentry->d_name.len > 255)
+                goto error;
+        key = afs_request_key(dvnode->volume->cell);
+        if (IS_ERR(key)) {
+                ret = PTR_ERR(key);
+                goto error;
+        }
+        ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, true);
+        if (ret < 0)
+                goto rmdir_error;
+        if (dentry->d_inode) {
+                vnode = AFS_FS_I(dentry->d_inode);
+                clear_nlink(&vnode->vfs_inode);
+                set_bit(AFS_VNODE_DELETED, &vnode->flags);
+                afs_discard_callback_on_delete(vnode);
+        }
+        key_put(key);
+        _leave(" = 0");
+        return 0;
+rmdir_error:
+        key_put(key);
+error:
+        _leave(" = %d", ret);
+        return ret;
+}
+/*
+ * remove a file from an AFS filesystem
+ */
+static int afs_unlink(struct inode *dir, struct dentry *dentry)
+{
+        struct afs_vnode *dvnode, *vnode;
+        struct key *key;
+        int ret;
+        dvnode = AFS_FS_I(dir);
+        _enter("{%x:%d},{%s}",
+               dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
+        ret = -ENAMETOOLONG;
+        if (dentry->d_name.len > 255)
+                goto error;
+        key = afs_request_key(dvnode->volume->cell);
+        if (IS_ERR(key)) {
+                ret = PTR_ERR(key);
+                goto error;
+        }
+        if (dentry->d_inode) {
+                vnode = AFS_FS_I(dentry->d_inode);
+                /* make sure we have a callback promise on the victim */
+                ret = afs_validate(vnode, key);
+                if (ret < 0)
+                        goto error;
+        }
+        ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, false);
+        if (ret < 0)
+                goto remove_error;
+        if (dentry->d_inode) {
+                /* if the file wasn't deleted due to excess hard links, the
+                 * fileserver will break the callback promise on the file - if
+                 * it had one - before it returns to us, and if it was deleted,
+                 * it won't
+                 *
+                 * however, if we didn't have a callback promise outstanding,
+                 * or it was outstanding on a different server, then it won't
+                 * break it either...
+                 */
+                vnode = AFS_FS_I(dentry->d_inode);
+                if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+                        _debug("AFS_VNODE_DELETED");
+                if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
+                        _debug("AFS_VNODE_CB_BROKEN");
+                set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+                ret = afs_validate(vnode, key);
+                _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
+        }
+        key_put(key);
+        _leave(" = 0");
+        return 0;
+remove_error:
+        key_put(key);
+error:
+        _leave(" = %d", ret);
+        return ret;
+}
+/*
+ * create a regular file on an AFS filesystem
+ */
+static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
+                      struct nameidata *nd)
+{
+        struct afs_file_status status;
+        struct afs_callback cb;
+        struct afs_server *server;
+        struct afs_vnode *dvnode, *vnode;
+        struct afs_fid fid;
+        struct inode *inode;
+        struct key *key;
+        int ret;
+        dvnode = AFS_FS_I(dir);
+        _enter("{%x:%d},{%s},%o,",
+               dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
+        ret = -ENAMETOOLONG;
+        if (dentry->d_name.len > 255)
+                goto error;
+        key = afs_request_key(dvnode->volume->cell);
+        if (IS_ERR(key)) {
+                ret = PTR_ERR(key);
+                goto error;
+        }
+        mode |= S_IFREG;
+        ret = afs_vnode_create(dvnode, key, dentry->d_name.name,
+                               mode, &fid, &status, &cb, &server);
+        if (ret < 0)
+                goto create_error;
+        inode = afs_iget(dir->i_sb, key, &fid, &status, &cb);
+        if (IS_ERR(inode)) {
+                /* ENOMEM at a really inconvenient time - just abandon the new
+                 * directory on the server */
+                ret = PTR_ERR(inode);
+                goto iget_error;
+        }
+        /* apply the status report we've got for the new vnode */
+        vnode = AFS_FS_I(inode);
+        spin_lock(&vnode->lock);
+        vnode->update_cnt++;
+        spin_unlock(&vnode->lock);
+        afs_vnode_finalise_status_update(vnode, server);
+        afs_put_server(server);
+        d_instantiate(dentry, inode);
+        if (d_unhashed(dentry)) {
+                _debug("not hashed");
+                d_rehash(dentry);
+        }
+        key_put(key);
+        _leave(" = 0");
+        return 0;
+iget_error:
+        afs_put_server(server);
+create_error:
+        key_put(key);
+error:
+        d_drop(dentry);
+        _leave(" = %d", ret);
+        return ret;
+}
+/*
+ * create a hard link between files in an AFS filesystem
+ */
+static int afs_link(struct dentry *from, struct inode *dir,
+                    struct dentry *dentry)
+{
+        struct afs_vnode *dvnode, *vnode;
+        struct key *key;
+        int ret;
+        vnode = AFS_FS_I(from->d_inode);
+        dvnode = AFS_FS_I(dir);
+        _enter("{%x:%d},{%x:%d},{%s}",
+               vnode->fid.vid, vnode->fid.vnode,
+               dvnode->fid.vid, dvnode->fid.vnode,
+               dentry->d_name.name);
+        ret = -ENAMETOOLONG;
+        if (dentry->d_name.len > 255)
+                goto error;
+        key = afs_request_key(dvnode->volume->cell);
+        if (IS_ERR(key)) {
+                ret = PTR_ERR(key);
+                goto error;
+        }
+        ret = afs_vnode_link(dvnode, vnode, key, dentry->d_name.name);
+        if (ret < 0)
+                goto link_error;
+        atomic_inc(&vnode->vfs_inode.i_count);
+        d_instantiate(dentry, &vnode->vfs_inode);
+        key_put(key);
+        _leave(" = 0");
+        return 0;
+link_error:
+        key_put(key);
+error:
+        d_drop(dentry);
+        _leave(" = %d", ret);
+        return ret;
+}
+/*
+ * create a symlink in an AFS filesystem
+ */
+static int afs_symlink(struct inode *dir, struct dentry *dentry,
+                       const char *content)
+{
+        struct afs_file_status status;
+        struct afs_server *server;
+        struct afs_vnode *dvnode, *vnode;
+        struct afs_fid fid;
+        struct inode *inode;
+        struct key *key;
+        int ret;
+        dvnode = AFS_FS_I(dir);
+        _enter("{%x:%d},{%s},%s",
+               dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name,
+               content);
+        ret = -ENAMETOOLONG;
+        if (dentry->d_name.len > 255)
+                goto error;
+        ret = -EINVAL;
+        if (strlen(content) > 1023)
+                goto error;
+        key = afs_request_key(dvnode->volume->cell);
+        if (IS_ERR(key)) {
+                ret = PTR_ERR(key);
+                goto error;
+        }
+        ret = afs_vnode_symlink(dvnode, key, dentry->d_name.name, content,
+                                &fid, &status, &server);
+        if (ret < 0)
+                goto create_error;
+        inode = afs_iget(dir->i_sb, key, &fid, &status, NULL);
+        if (IS_ERR(inode)) {
+                /* ENOMEM at a really inconvenient time - just abandon the new
+                 * directory on the server */
+                ret = PTR_ERR(inode);
+                goto iget_error;
+        }
+        /* apply the status report we've got for the new vnode */
+        vnode = AFS_FS_I(inode);
+        spin_lock(&vnode->lock);
+        vnode->update_cnt++;
+        spin_unlock(&vnode->lock);
+        afs_vnode_finalise_status_update(vnode, server);
+        afs_put_server(server);
+        d_instantiate(dentry, inode);
+        if (d_unhashed(dentry)) {
+                _debug("not hashed");
+                d_rehash(dentry);
+        }
+        key_put(key);
+        _leave(" = 0");
+        return 0;
+iget_error:
+        afs_put_server(server);
+create_error:
+        key_put(key);
+error:
+        d_drop(dentry);
+        _leave(" = %d", ret);
+        return ret;
+}
+/*
+ * rename a file in an AFS filesystem and/or move it between directories
+ */
+static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
+                      struct inode *new_dir, struct dentry *new_dentry)
+{
+        struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
+        struct key *key;
+        int ret;
+        vnode = AFS_FS_I(old_dentry->d_inode);
+        orig_dvnode = AFS_FS_I(old_dir);
+        new_dvnode = AFS_FS_I(new_dir);
+        _enter("{%x:%d},{%x:%d},{%x:%d},{%s}",
+               orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
+               vnode->fid.vid, vnode->fid.vnode,
+               new_dvnode->fid.vid, new_dvnode->fid.vnode,
+               new_dentry->d_name.name);
+        ret = -ENAMETOOLONG;
+        if (new_dentry->d_name.len > 255)
+                goto error;
+        key = afs_request_key(orig_dvnode->volume->cell);
+        if (IS_ERR(key)) {
+                ret = PTR_ERR(key);
+                goto error;
+        }
+        ret = afs_vnode_rename(orig_dvnode, new_dvnode, key,
+                               old_dentry->d_name.name,
+                               new_dentry->d_name.name);
+        if (ret < 0)
+                goto rename_error;
+        key_put(key);
+        _leave(" = 0");
+        return 0;
+rename_error:
+        key_put(key);
+error:
+        d_drop(new_dentry);
+        _leave(" = %d", ret);
+        return ret;
+}
diff --git a/fs/afs/errors.h b/fs/afs/errors.h
deleted file mode 100644
index 574d94ac8d05..000000000000
--- a/fs/afs/errors.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* errors.h: AFS abort/error codes
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _LINUX_AFS_ERRORS_H
-#define _LINUX_AFS_ERRORS_H
-#include "types.h"
-/* file server abort codes */
-typedef enum {
-        VSALVAGE        = 101,  /* volume needs salvaging */
-        VNOVNODE        = 102,  /* no such file/dir (vnode) */
-        VNOVOL          = 103,  /* no such volume or volume unavailable */
-        VVOLEXISTS      = 104,  /* volume name already exists */
-        VNOSERVICE      = 105,  /* volume not currently in service */
-        VOFFLINE        = 106,  /* volume is currently offline (more info available [VVL-spec]) */
-        VONLINE         = 107,  /* volume is already online */
-        VDISKFULL       = 108,  /* disk partition is full */
-        VOVERQUOTA      = 109,  /* volume's maximum quota exceeded */
-        VBUSY           = 110,  /* volume is temporarily unavailable */
-        VMOVED          = 111,  /* volume moved to new server - ask this FS where */
-} afs_rxfs_abort_t;
-extern int afs_abort_to_error(int abortcode);
-#endif /* _LINUX_AFS_ERRORS_H */
diff --git a/fs/afs/file.c b/fs/afs/file.c
index b17634541f67..ae256498f4f7 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -1,6 +1,6 @@
-/* file.c: AFS filesystem file handling
+/* AFS filesystem file handling
 *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
@@ -15,22 +15,25 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include "volume.h"
-#include "vnode.h"
-#include <rxrpc/call.h>
 #include "internal.h"
-#if 0
-static int afs_file_open(struct inode *inode, struct file *file);
-static int afs_file_release(struct inode *inode, struct file *file);
-#endif
 static int afs_file_readpage(struct file *file, struct page *page);
 static void afs_file_invalidatepage(struct page *page, unsigned long offset);
 static int afs_file_releasepage(struct page *page, gfp_t gfp_flags);
+const struct file_operations afs_file_operations = {
+        .open           = afs_open,
+        .release        = afs_release,
+        .llseek         = generic_file_llseek,
+        .read           = do_sync_read,
+        .aio_read       = generic_file_aio_read,
+        .mmap           = generic_file_readonly_mmap,
+        .sendfile       = generic_file_sendfile,
+};
 const struct inode_operations afs_file_inode_operations = {
        .getattr        = afs_inode_getattr,
+        .permission     = afs_permission,
 };
 const struct address_space_operations afs_fs_aops = {
@@ -40,7 +43,48 @@ const struct address_space_operations afs_fs_aops = {
        .invalidatepage = afs_file_invalidatepage,
 };
-/*****************************************************************************/
+/*
+ * open an AFS file or directory and attach a key to it
+ */
+int afs_open(struct inode *inode, struct file *file)
+{
+        struct afs_vnode *vnode = AFS_FS_I(inode);
+        struct key *key;
+        int ret;
+        _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode);
+        key = afs_request_key(vnode->volume->cell);
+        if (IS_ERR(key)) {
+                _leave(" = %ld [key]", PTR_ERR(key));
+                return PTR_ERR(key);
+        }
+        ret = afs_validate(vnode, key);
+        if (ret < 0) {
+                _leave(" = %d [val]", ret);
+                return ret;
+        }
+        file->private_data = key;
+        _leave(" = 0");
+        return 0;
+}
+/*
+ * release an AFS file or directory and discard its key
+ */
+int afs_release(struct inode *inode, struct file *file)
+{
+        struct afs_vnode *vnode = AFS_FS_I(inode);
+        _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode);
+        key_put(file->private_data);
+        _leave(" = 0");
+        return 0;
+}
 /*
 * deal with notification that a page was read from the cache
 */
@@ -58,10 +102,9 @@ static void afs_file_readpage_read_complete(void *cookie_data,
                SetPageUptodate(page);
        unlock_page(page);
-} /* end afs_file_readpage_read_complete() */
+}
 #endif
-/*****************************************************************************/
 /*
 * deal with notification that a page was written to the cache
 */
@@ -74,41 +117,38 @@ static void afs_file_readpage_write_complete(void *cookie_data,
        _enter("%p,%p,%p,%d", cookie_data, page, data, error);
        unlock_page(page);
+}
-} /* end afs_file_readpage_write_complete() */
 #endif
-/*****************************************************************************/
 /*
 * AFS read page from file (or symlink)
 */
 static int afs_file_readpage(struct file *file, struct page *page)
 {
-        struct afs_rxfs_fetch_descriptor desc;
-#ifdef AFS_CACHING_SUPPORT
-        struct cachefs_page *pageio;
-#endif
        struct afs_vnode *vnode;
        struct inode *inode;
+        struct key *key;
+        size_t len;
+        off_t offset;
        int ret;
        inode = page->mapping->host;
-        _enter("{%lu},{%lu}", inode->i_ino, page->index);
+        ASSERT(file != NULL);
+        key = file->private_data;
+        ASSERT(key != NULL);
+        _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index);
        vnode = AFS_FS_I(inode);
        BUG_ON(!PageLocked(page));
        ret = -ESTALE;
-        if (vnode->flags & AFS_VNODE_DELETED)
+        if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
                goto error;
 #ifdef AFS_CACHING_SUPPORT
-        ret = cachefs_page_get_private(page, &pageio, GFP_NOIO);
-        if (ret < 0)
-                goto error;
        /* is it cached? */
        ret = cachefs_read_or_alloc_page(vnode->cache,
                                         page,
@@ -132,26 +172,19 @@ static int afs_file_readpage(struct file *file, struct page *page)
        case -ENOBUFS:
        case -ENODATA:
        default:
-                desc.fid        = vnode->fid;
+                offset = page->index << PAGE_CACHE_SHIFT;
-                desc.offset     = page->index << PAGE_CACHE_SHIFT;
+                len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE);
-                desc.size       = min((size_t) (inode->i_size - desc.offset),
-                                      (size_t) PAGE_SIZE);
-                desc.buffer     = kmap(page);
-                clear_page(desc.buffer);
                /* read the contents of the file from the server into the
                 * page */
-                ret = afs_vnode_fetch_data(vnode, &desc);
+                ret = afs_vnode_fetch_data(vnode, key, offset, len, page);
-                kunmap(page);
                if (ret < 0) {
-                        if (ret==-ENOENT) {
+                        if (ret == -ENOENT) {
                                _debug("got NOENT from server"
                                       " - marking file deleted and stale");
-                                vnode->flags |= AFS_VNODE_DELETED;
+                                set_bit(AFS_VNODE_DELETED, &vnode->flags);
                                ret = -ESTALE;
                        }
 #ifdef AFS_CACHING_SUPPORT
                        cachefs_uncache_page(vnode->cache, page);
 #endif
@@ -178,16 +211,13 @@ static int afs_file_readpage(struct file *file, struct page *page)
        _leave(" = 0");
        return 0;
- error:
+error:
        SetPageError(page);
        unlock_page(page);
        _leave(" = %d", ret);
        return ret;
+}
-} /* end afs_file_readpage() */
-/*****************************************************************************/
 /*
 * get a page cookie for the specified page
 */
@@ -202,10 +232,9 @@ int afs_cache_get_page_cookie(struct page *page,
        _leave(" = %d", ret);
        return ret;
-} /* end afs_cache_get_page_cookie() */
+}
 #endif
-/*****************************************************************************/
 /*
 * invalidate part or all of a page
 */
@@ -240,9 +269,8 @@ static void afs_file_invalidatepage(struct page *page, unsigned long offset)
        }
        _leave(" = %d", ret);
-} /* end afs_file_invalidatepage() */
+}
-/*****************************************************************************/
 /*
 * release a page and cleanup its private data
 */
@@ -267,4 +295,4 @@ static int afs_file_releasepage(struct page *page, gfp_t gfp_flags)
        _leave(" = 0");
        return 0;
-} /* end afs_file_releasepage() */
+}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 61bc371532ab..2393d2a08d79 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1,6 +1,6 @@
-/* fsclient.c: AFS File Server client stubs
+/* AFS File Server client stubs
 *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
@@ -11,827 +11,927 @@
 #include <linux/init.h>
 #include <linux/sched.h>
-#include <rxrpc/rxrpc.h>
+#include <linux/circ_buf.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include "fsclient.h"
-#include "cmservice.h"
-#include "vnode.h"
-#include "server.h"
-#include "errors.h"
 #include "internal.h"
+#include "afs_fs.h"
-#define FSFETCHSTATUS           132     /* AFS Fetch file status */
-#define FSFETCHDATA             130     /* AFS Fetch file data */
-#define FSGIVEUPCALLBACKS       147     /* AFS Discard callback promises */
-#define FSGETVOLUMEINFO         148     /* AFS Get root volume information */
-#define FSGETROOTVOLUME         151     /* AFS Get root volume name */
-#define FSLOOKUP                161     /* AFS lookup file in directory */
-/*****************************************************************************/
 /*
- * map afs abort codes to/from Linux error codes
+ * decode an AFSFid block
- * - called with call->lock held
 */
-static void afs_rxfs_aemap(struct rxrpc_call *call)
+static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid)
 {
-        switch (call->app_err_state) {
+        const __be32 *bp = *_bp;
-        case RXRPC_ESTATE_LOCAL_ABORT:
-                call->app_abort_code = -call->app_errno;
+        fid->vid                = ntohl(*bp++);
-                break;
+        fid->vnode              = ntohl(*bp++);
-        case RXRPC_ESTATE_PEER_ABORT:
+        fid->unique             = ntohl(*bp++);
-                call->app_errno = afs_abort_to_error(call->app_abort_code);
+        *_bp = bp;
-                break;
+}
-        default:
-                break;
-        }
-} /* end afs_rxfs_aemap() */
-/*****************************************************************************/
 /*
- * get the root volume name from a fileserver
+ * decode an AFSFetchStatus block
- * - this operation doesn't seem to work correctly in OpenAFS server 1.2.2
 */
-#if 0
+static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
-int afs_rxfs_get_root_volume(struct afs_server *server,
+                                      struct afs_file_status *status,
-                             char *buf, size_t *buflen)
+                                      struct afs_vnode *vnode)
 {
-        struct rxrpc_connection *conn;
+        const __be32 *bp = *_bp;
-        struct rxrpc_call *call;
+        umode_t mode;
-        struct kvec piov[2];
+        u64 data_version, size;
-        size_t sent;
+        u32 changed = 0; /* becomes non-zero if ctime-type changes seen */
-        int ret;
-        u32 param[1];
+#define EXTRACT(DST)                            \
+        do {                                    \
+                u32 x = ntohl(*bp++);           \
+                changed |= DST - x;             \
+                DST = x;                        \
+        } while (0)
+        status->if_version = ntohl(*bp++);
+        EXTRACT(status->type);
+        EXTRACT(status->nlink);
+        size = ntohl(*bp++);
+        data_version = ntohl(*bp++);
+        EXTRACT(status->author);
+        EXTRACT(status->owner);
+        EXTRACT(status->caller_access); /* call ticket dependent */
+        EXTRACT(status->anon_access);
+        EXTRACT(status->mode);
+        EXTRACT(status->parent.vnode);
+        EXTRACT(status->parent.unique);
+        bp++; /* seg size */
+        status->mtime_client = ntohl(*bp++);
+        status->mtime_server = ntohl(*bp++);
+        EXTRACT(status->group);
+        bp++; /* sync counter */
+        data_version |= (u64) ntohl(*bp++) << 32;
+        bp++; /* lock count */
+        size |= (u64) ntohl(*bp++) << 32;
+        bp++; /* spare 4 */
+        *_bp = bp;
+        if (size != status->size) {
+                status->size = size;
+                changed |= true;
+        }
+        status->mode &= S_IALLUGO;
+        _debug("vnode time %lx, %lx",
+               status->mtime_client, status->mtime_server);
+        if (vnode) {
+                status->parent.vid = vnode->fid.vid;
+                if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
+                        _debug("vnode changed");
+                        i_size_write(&vnode->vfs_inode, size);
+                        vnode->vfs_inode.i_uid = status->owner;
+                        vnode->vfs_inode.i_gid = status->group;
+                        vnode->vfs_inode.i_version = vnode->fid.unique;
+                        vnode->vfs_inode.i_nlink = status->nlink;
+                        mode = vnode->vfs_inode.i_mode;
+                        mode &= ~S_IALLUGO;
+                        mode |= status->mode;
+                        barrier();
+                        vnode->vfs_inode.i_mode = mode;
+                }
-        DECLARE_WAITQUEUE(myself, current);
+                vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server;
+                vnode->vfs_inode.i_mtime        = vnode->vfs_inode.i_ctime;
+                vnode->vfs_inode.i_atime        = vnode->vfs_inode.i_ctime;
+        }
-        kenter("%p,%p,%u",server, buf, *buflen);
+        if (status->data_version != data_version) {
+                status->data_version = data_version;
+                if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
+                        _debug("vnode modified %llx on {%x:%u}",
+                               (unsigned long long) data_version,
+                               vnode->fid.vid, vnode->fid.vnode);
+                        set_bit(AFS_VNODE_MODIFIED, &vnode->flags);
+                        set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
+                }
+        }
+}
-        /* get hold of the fileserver connection */
+/*
-        ret = afs_server_get_fsconn(server, &conn);
+ * decode an AFSCallBack block
-        if (ret < 0)
+ */
-                goto out;
+static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
+{
+        const __be32 *bp = *_bp;
-        /* create a call through that connection */
+        vnode->cb_version       = ntohl(*bp++);
-        ret = rxrpc_create_call(conn, NULL, NULL, afs_rxfs_aemap, &call);
+        vnode->cb_expiry        = ntohl(*bp++);
-        if (ret < 0) {
+        vnode->cb_type          = ntohl(*bp++);
-                printk("kAFS: Unable to create call: %d\n", ret);
+        vnode->cb_expires       = vnode->cb_expiry + get_seconds();
-                goto out_put_conn;
+        *_bp = bp;
-        }
+}
-        call->app_opcode = FSGETROOTVOLUME;
-        /* we want to get event notifications from the call */
+static void xdr_decode_AFSCallBack_raw(const __be32 **_bp,
-        add_wait_queue(&call->waitq, &myself);
+                                       struct afs_callback *cb)
+{
+        const __be32 *bp = *_bp;
-        /* marshall the parameters */
+        cb->version     = ntohl(*bp++);
-        param[0] = htonl(FSGETROOTVOLUME);
+        cb->expiry      = ntohl(*bp++);
+        cb->type        = ntohl(*bp++);
-        piov[0].iov_len = sizeof(param);
+        *_bp = bp;
-        piov[0].iov_base = param;
+}
-        /* send the parameters to the server */
-        ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-                                    0, &sent);
-        if (ret < 0)
-                goto abort;
-        /* wait for the reply to completely arrive */
-        for (;;) {
-                set_current_state(TASK_INTERRUPTIBLE);
-                if (call->app_call_state != RXRPC_CSTATE_CLNT_RCV_REPLY ||
-                    signal_pending(current))
-                        break;
-                schedule();
-        }
-        set_current_state(TASK_RUNNING);
-        ret = -EINTR;
+/*
-        if (signal_pending(current))
+ * decode an AFSVolSync block
-                goto abort;
+ */
+static void xdr_decode_AFSVolSync(const __be32 **_bp,
+                                  struct afs_volsync *volsync)
+{
+        const __be32 *bp = *_bp;
-        switch (call->app_call_state) {
+        volsync->creation = ntohl(*bp++);
-        case RXRPC_CSTATE_ERROR:
+        bp++; /* spare2 */
-                ret = call->app_errno;
+        bp++; /* spare3 */
-                kdebug("Got Error: %d", ret);
+        bp++; /* spare4 */
-                goto out_unwait;
+        bp++; /* spare5 */
+        bp++; /* spare6 */
+        *_bp = bp;
+}
-        case RXRPC_CSTATE_CLNT_GOT_REPLY:
+/*
-                /* read the reply */
+ * deliver reply data to an FS.FetchStatus
-                kdebug("Got Reply: qty=%d", call->app_ready_qty);
+ */
+static int afs_deliver_fs_fetch_status(struct afs_call *call,
+                                       struct sk_buff *skb, bool last)
+{
+        struct afs_vnode *vnode = call->reply;
+        const __be32 *bp;
-                ret = -EBADMSG;
+        _enter(",,%u", last);
-                if (call->app_ready_qty <= 4)
-                        goto abort;
-                ret = rxrpc_call_read_data(call, NULL, call->app_ready_qty, 0);
+        afs_transfer_reply(call, skb);
-                if (ret < 0)
+        if (!last)
-                        goto abort;
+                return 0;
-#if 0
+        if (call->reply_size != call->reply_max)
-                /* unmarshall the reply */
+                return -EBADMSG;
-                bp = buffer;
-                for (loop = 0; loop < 65; loop++)
-                        entry->name[loop] = ntohl(*bp++);
-                entry->name[64] = 0;
-                entry->type = ntohl(*bp++);
+        /* unmarshall the reply once we've received all of it */
-                entry->num_servers = ntohl(*bp++);
+        bp = call->buffer;
+        xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+        xdr_decode_AFSCallBack(&bp, vnode);
+        if (call->reply2)
+                xdr_decode_AFSVolSync(&bp, call->reply2);
-                for (loop = 0; loop < 8; loop++)
+        _leave(" = 0 [done]");
-                        entry->servers[loop].addr.s_addr = *bp++;
+        return 0;
+}
-                for (loop = 0; loop < 8; loop++)
+/*
-                        entry->servers[loop].partition = ntohl(*bp++);
+ * FS.FetchStatus operation type
+ */
+static const struct afs_call_type afs_RXFSFetchStatus = {
+        .name           = "FS.FetchStatus",
+        .deliver        = afs_deliver_fs_fetch_status,
+        .abort_to_error = afs_abort_to_error,
+        .destructor     = afs_flat_call_destructor,
+};
-                for (loop = 0; loop < 8; loop++)
+/*
-                        entry->servers[loop].flags = ntohl(*bp++);
+ * fetch the status information for a file
+ */
+int afs_fs_fetch_file_status(struct afs_server *server,
+                             struct key *key,
+                             struct afs_vnode *vnode,
+                             struct afs_volsync *volsync,
+                             const struct afs_wait_mode *wait_mode)
+{
+        struct afs_call *call;
+        __be32 *bp;
-                for (loop = 0; loop < 3; loop++)
+        _enter(",%x,{%x:%d},,",
-                        entry->volume_ids[loop] = ntohl(*bp++);
+               key_serial(key), vnode->fid.vid, vnode->fid.vnode);
-                entry->clone_id = ntohl(*bp++);
+        call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
-                entry->flags = ntohl(*bp);
+        if (!call)
-#endif
+                return -ENOMEM;
-                /* success */
+        call->key = key;
-                ret = 0;
+        call->reply = vnode;
-                goto out_unwait;
+        call->reply2 = volsync;
+        call->service_id = FS_SERVICE;
+        call->port = htons(AFS_FS_PORT);
-        default:
+        /* marshall the parameters */
-                BUG();
+        bp = call->request;
-        }
+        bp[0] = htonl(FSFETCHSTATUS);
+        bp[1] = htonl(vnode->fid.vid);
+        bp[2] = htonl(vnode->fid.vnode);
+        bp[3] = htonl(vnode->fid.unique);
+        return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
- abort:
-        set_current_state(TASK_UNINTERRUPTIBLE);
-        rxrpc_call_abort(call, ret);
-        schedule();
- out_unwait:
-        set_current_state(TASK_RUNNING);
-        remove_wait_queue(&call->waitq, &myself);
-        rxrpc_put_call(call);
- out_put_conn:
-        afs_server_release_fsconn(server, conn);
- out:
-        kleave("");
-        return ret;
-} /* end afs_rxfs_get_root_volume() */
-#endif
-/*****************************************************************************/
 /*
- * get information about a volume
+ * deliver reply data to an FS.FetchData
 */
-#if 0
+static int afs_deliver_fs_fetch_data(struct afs_call *call,
-int afs_rxfs_get_volume_info(struct afs_server *server,
+                                     struct sk_buff *skb, bool last)
-                             const char *name,
-                             struct afs_volume_info *vinfo)
 {
-        struct rxrpc_connection *conn;
+        struct afs_vnode *vnode = call->reply;
-        struct rxrpc_call *call;
+        const __be32 *bp;
-        struct kvec piov[3];
+        struct page *page;
-        size_t sent;
+        void *buffer;
        int ret;
-        u32 param[2], *bp, zero;
-        DECLARE_WAITQUEUE(myself, current);
+        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+        switch (call->unmarshall) {
+        case 0:
+                call->offset = 0;
+                call->unmarshall++;
+                /* extract the returned data length */
+        case 1:
+                _debug("extract data length");
+                ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+                switch (ret) {
+                case 0:         break;
+                case -EAGAIN:   return 0;
+                default:        return ret;
+                }
-        _enter("%p,%s,%p", server, name, vinfo);
+                call->count = ntohl(call->tmp);
+                _debug("DATA length: %u", call->count);
+                if (call->count > PAGE_SIZE)
+                        return -EBADMSG;
+                call->offset = 0;
+                call->unmarshall++;
+                if (call->count < PAGE_SIZE) {
+                        buffer = kmap_atomic(call->reply3, KM_USER0);
+                        memset(buffer + PAGE_SIZE - call->count, 0,
+                               call->count);
+                        kunmap_atomic(buffer, KM_USER0);
+                }
-        /* get hold of the fileserver connection */
+                /* extract the returned data */
-        ret = afs_server_get_fsconn(server, &conn);
+        case 2:
-        if (ret < 0)
+                _debug("extract data");
-                goto out;
+                page = call->reply3;
+                buffer = kmap_atomic(page, KM_USER0);
+                ret = afs_extract_data(call, skb, last, buffer, call->count);
+                kunmap_atomic(buffer, KM_USER0);
+                switch (ret) {
+                case 0:         break;
+                case -EAGAIN:   return 0;
+                default:        return ret;
+                }
-        /* create a call through that connection */
+                call->offset = 0;
-        ret = rxrpc_create_call(conn, NULL, NULL, afs_rxfs_aemap, &call);
+                call->unmarshall++;
-        if (ret < 0) {
-                printk("kAFS: Unable to create call: %d\n", ret);
+                /* extract the metadata */
-                goto out_put_conn;
+        case 3:
-        }
+                ret = afs_extract_data(call, skb, last, call->buffer,
-        call->app_opcode = FSGETVOLUMEINFO;
+                                       (21 + 3 + 6) * 4);
+                switch (ret) {
+                case 0:         break;
+                case -EAGAIN:   return 0;
+                default:        return ret;
+                }
-        /* we want to get event notifications from the call */
+                bp = call->buffer;
-        add_wait_queue(&call->waitq, &myself);
+                xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+                xdr_decode_AFSCallBack(&bp, vnode);
+                if (call->reply2)
+                        xdr_decode_AFSVolSync(&bp, call->reply2);
-        /* marshall the parameters */
+                call->offset = 0;
-        piov[1].iov_len = strlen(name);
+                call->unmarshall++;
-        piov[1].iov_base = (char *) name;
+        case 4:
-        zero = 0;
+                _debug("trailer");
-        piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3;
+                if (skb->len != 0)
-        piov[2].iov_base = &zero;
+                        return -EBADMSG;
+                break;
-        param[0] = htonl(FSGETVOLUMEINFO);
-        param[1] = htonl(piov[1].iov_len);
-        piov[0].iov_len = sizeof(param);
-        piov[0].iov_base = param;
-        /* send the parameters to the server */
-        ret = rxrpc_call_write_data(call, 3, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-                                    0, &sent);
-        if (ret < 0)
-                goto abort;
-        /* wait for the reply to completely arrive */
-        bp = rxrpc_call_alloc_scratch(call, 64);
-        ret = rxrpc_call_read_data(call, bp, 64,
-                                   RXRPC_CALL_READ_BLOCK |
-                                   RXRPC_CALL_READ_ALL);
-        if (ret < 0) {
-                if (ret == -ECONNABORTED) {
-                        ret = call->app_errno;
-                        goto out_unwait;
-                }
-                goto abort;
        }
-        /* unmarshall the reply */
+        if (!last)
-        vinfo->vid = ntohl(*bp++);
+                return 0;
-        vinfo->type = ntohl(*bp++);
+        _leave(" = 0 [done]");
-        vinfo->type_vids[0] = ntohl(*bp++);
+        return 0;
-        vinfo->type_vids[1] = ntohl(*bp++);
+}
-        vinfo->type_vids[2] = ntohl(*bp++);
-        vinfo->type_vids[3] = ntohl(*bp++);
-        vinfo->type_vids[4] = ntohl(*bp++);
-        vinfo->nservers = ntohl(*bp++);
-        vinfo->servers[0].addr.s_addr = *bp++;
-        vinfo->servers[1].addr.s_addr = *bp++;
-        vinfo->servers[2].addr.s_addr = *bp++;
-        vinfo->servers[3].addr.s_addr = *bp++;
-        vinfo->servers[4].addr.s_addr = *bp++;
-        vinfo->servers[5].addr.s_addr = *bp++;
-        vinfo->servers[6].addr.s_addr = *bp++;
-        vinfo->servers[7].addr.s_addr = *bp++;
-        ret = -EBADMSG;
-        if (vinfo->nservers > 8)
-                goto abort;
-        /* success */
-        ret = 0;
- out_unwait:
-        set_current_state(TASK_RUNNING);
-        remove_wait_queue(&call->waitq, &myself);
-        rxrpc_put_call(call);
- out_put_conn:
-        afs_server_release_fsconn(server, conn);
- out:
-        _leave("");
-        return ret;
- abort:
-        set_current_state(TASK_UNINTERRUPTIBLE);
-        rxrpc_call_abort(call, ret);
-        schedule();
-        goto out_unwait;
-} /* end afs_rxfs_get_volume_info() */
-#endif
-/*****************************************************************************/
 /*
- * fetch the status information for a file
+ * FS.FetchData operation type
+ */
+static const struct afs_call_type afs_RXFSFetchData = {
+        .name           = "FS.FetchData",
+        .deliver        = afs_deliver_fs_fetch_data,
+        .abort_to_error = afs_abort_to_error,
+        .destructor     = afs_flat_call_destructor,
+};
+/*
+ * fetch data from a file
 */
-int afs_rxfs_fetch_file_status(struct afs_server *server,
+int afs_fs_fetch_data(struct afs_server *server,
-                               struct afs_vnode *vnode,
+                      struct key *key,
-                               struct afs_volsync *volsync)
+                      struct afs_vnode *vnode,
+                      off_t offset, size_t length,
+                      struct page *buffer,
+                      const struct afs_wait_mode *wait_mode)
 {
-        struct afs_server_callslot callslot;
+        struct afs_call *call;
-        struct rxrpc_call *call;
-        struct kvec piov[1];
-        size_t sent;
-        int ret;
        __be32 *bp;
-        DECLARE_WAITQUEUE(myself, current);
+        _enter("");
-        _enter("%p,{%u,%u,%u}",
+        call = afs_alloc_flat_call(&afs_RXFSFetchData, 24, (21 + 3 + 6) * 4);
-               server, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
+        if (!call)
+                return -ENOMEM;
-        /* get hold of the fileserver connection */
+        call->key = key;
-        ret = afs_server_request_callslot(server, &callslot);
+        call->reply = vnode;
-        if (ret < 0)
+        call->reply2 = NULL; /* volsync */
-                goto out;
+        call->reply3 = buffer;
+        call->service_id = FS_SERVICE;
-        /* create a call through that connection */
+        call->port = htons(AFS_FS_PORT);
-        ret = rxrpc_create_call(callslot.conn, NULL, NULL, afs_rxfs_aemap,
-                                &call);
-        if (ret < 0) {
-                printk("kAFS: Unable to create call: %d\n", ret);
-                goto out_put_conn;
-        }
-        call->app_opcode = FSFETCHSTATUS;
-        /* we want to get event notifications from the call */
-        add_wait_queue(&call->waitq, &myself);
        /* marshall the parameters */
-        bp = rxrpc_call_alloc_scratch(call, 16);
+        bp = call->request;
-        bp[0] = htonl(FSFETCHSTATUS);
+        bp[0] = htonl(FSFETCHDATA);
        bp[1] = htonl(vnode->fid.vid);
        bp[2] = htonl(vnode->fid.vnode);
        bp[3] = htonl(vnode->fid.unique);
+        bp[4] = htonl(offset);
+        bp[5] = htonl(length);
-        piov[0].iov_len = 16;
+        return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
-        piov[0].iov_base = bp;
+}
-        /* send the parameters to the server */
-        ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-                                    0, &sent);
-        if (ret < 0)
-                goto abort;
-        /* wait for the reply to completely arrive */
-        bp = rxrpc_call_alloc_scratch(call, 120);
-        ret = rxrpc_call_read_data(call, bp, 120,
-                                   RXRPC_CALL_READ_BLOCK |
-                                   RXRPC_CALL_READ_ALL);
-        if (ret < 0) {
-                if (ret == -ECONNABORTED) {
-                        ret = call->app_errno;
-                        goto out_unwait;
-                }
-                goto abort;
-        }
-        /* unmarshall the reply */
+/*
-        vnode->status.if_version        = ntohl(*bp++);
+ * deliver reply data to an FS.GiveUpCallBacks
-        vnode->status.type              = ntohl(*bp++);
+ */
-        vnode->status.nlink             = ntohl(*bp++);
+static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
-        vnode->status.size              = ntohl(*bp++);
+                                            struct sk_buff *skb, bool last)
-        vnode->status.version           = ntohl(*bp++);
+{
-        vnode->status.author            = ntohl(*bp++);
+        _enter(",{%u},%d", skb->len, last);
-        vnode->status.owner             = ntohl(*bp++);
-        vnode->status.caller_access     = ntohl(*bp++);
-        vnode->status.anon_access       = ntohl(*bp++);
-        vnode->status.mode              = ntohl(*bp++);
-        vnode->status.parent.vid        = vnode->fid.vid;
-        vnode->status.parent.vnode      = ntohl(*bp++);
-        vnode->status.parent.unique     = ntohl(*bp++);
-        bp++; /* seg size */
-        vnode->status.mtime_client      = ntohl(*bp++);
-        vnode->status.mtime_server      = ntohl(*bp++);
-        bp++; /* group */
-        bp++; /* sync counter */
-        vnode->status.version |= ((unsigned long long) ntohl(*bp++)) << 32;
-        bp++; /* spare2 */
-        bp++; /* spare3 */
-        bp++; /* spare4 */
-        vnode->cb_version               = ntohl(*bp++);
+        if (skb->len > 0)
-        vnode->cb_expiry                = ntohl(*bp++);
+                return -EBADMSG; /* shouldn't be any reply data */
-        vnode->cb_type                  = ntohl(*bp++);
+        return 0;
+}
-        if (volsync) {
-                volsync->creation       = ntohl(*bp++);
-                bp++; /* spare2 */
-                bp++; /* spare3 */
-                bp++; /* spare4 */
-                bp++; /* spare5 */
-                bp++; /* spare6 */
-        }
-        /* success */
-        ret = 0;
- out_unwait:
-        set_current_state(TASK_RUNNING);
-        remove_wait_queue(&call->waitq, &myself);
-        rxrpc_put_call(call);
- out_put_conn:
-        afs_server_release_callslot(server, &callslot);
- out:
-        _leave("");
-        return ret;
- abort:
-        set_current_state(TASK_UNINTERRUPTIBLE);
-        rxrpc_call_abort(call, ret);
-        schedule();
-        goto out_unwait;
-} /* end afs_rxfs_fetch_file_status() */
-/*****************************************************************************/
 /*
- * fetch the contents of a file or directory
+ * FS.GiveUpCallBacks operation type
 */
-int afs_rxfs_fetch_file_data(struct afs_server *server,
+static const struct afs_call_type afs_RXFSGiveUpCallBacks = {
-                             struct afs_vnode *vnode,
+        .name           = "FS.GiveUpCallBacks",
-                             struct afs_rxfs_fetch_descriptor *desc,
+        .deliver        = afs_deliver_fs_give_up_callbacks,
-                             struct afs_volsync *volsync)
+        .abort_to_error = afs_abort_to_error,
+        .destructor     = afs_flat_call_destructor,
+};
+/*
+ * give up a set of callbacks
+ * - the callbacks are held in the server->cb_break ring
+ */
+int afs_fs_give_up_callbacks(struct afs_server *server,
+                             const struct afs_wait_mode *wait_mode)
 {
-        struct afs_server_callslot callslot;
+        struct afs_call *call;
-        struct rxrpc_call *call;
+        size_t ncallbacks;
-        struct kvec piov[1];
+        __be32 *bp, *tp;
-        size_t sent;
+        int loop;
-        int ret;
-        __be32 *bp;
-        DECLARE_WAITQUEUE(myself, current);
+        ncallbacks = CIRC_CNT(server->cb_break_head, server->cb_break_tail,
+                              ARRAY_SIZE(server->cb_break));
-        _enter("%p,{fid={%u,%u,%u},sz=%Zu,of=%lu}",
-               server,
+        _enter("{%zu},", ncallbacks);
-               desc->fid.vid,
-               desc->fid.vnode,
+        if (ncallbacks == 0)
-               desc->fid.unique,
+                return 0;
-               desc->size,
+        if (ncallbacks > AFSCBMAX)
-               desc->offset);
+                ncallbacks = AFSCBMAX;
-        /* get hold of the fileserver connection */
+        _debug("break %zu callbacks", ncallbacks);
-        ret = afs_server_request_callslot(server, &callslot);
-        if (ret < 0)
-                goto out;
-        /* create a call through that connection */
-        ret = rxrpc_create_call(callslot.conn, NULL, NULL, afs_rxfs_aemap, &call);
-        if (ret < 0) {
-                printk("kAFS: Unable to create call: %d\n", ret);
-                goto out_put_conn;
-        }
-        call->app_opcode = FSFETCHDATA;
-        /* we want to get event notifications from the call */
+        call = afs_alloc_flat_call(&afs_RXFSGiveUpCallBacks,
-        add_wait_queue(&call->waitq, &myself);
+                                   12 + ncallbacks * 6 * 4, 0);
+        if (!call)
+                return -ENOMEM;
+        call->service_id = FS_SERVICE;
+        call->port = htons(AFS_FS_PORT);
        /* marshall the parameters */
-        bp = rxrpc_call_alloc_scratch(call, 24);
+        bp = call->request;
-        bp[0] = htonl(FSFETCHDATA);
+        tp = bp + 2 + ncallbacks * 3;
-        bp[1] = htonl(desc->fid.vid);
+        *bp++ = htonl(FSGIVEUPCALLBACKS);
-        bp[2] = htonl(desc->fid.vnode);
+        *bp++ = htonl(ncallbacks);
-        bp[3] = htonl(desc->fid.unique);
+        *tp++ = htonl(ncallbacks);
-        bp[4] = htonl(desc->offset);
-        bp[5] = htonl(desc->size);
+        atomic_sub(ncallbacks, &server->cb_break_n);
+        for (loop = ncallbacks; loop > 0; loop--) {
-        piov[0].iov_len = 24;
+                struct afs_callback *cb =
-        piov[0].iov_base = bp;
+                        &server->cb_break[server->cb_break_tail];
-        /* send the parameters to the server */
+                *bp++ = htonl(cb->fid.vid);
-        ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
+                *bp++ = htonl(cb->fid.vnode);
-                                    0, &sent);
+                *bp++ = htonl(cb->fid.unique);
-        if (ret < 0)
+                *tp++ = htonl(cb->version);
-                goto abort;
+                *tp++ = htonl(cb->expiry);
+                *tp++ = htonl(cb->type);
-        /* wait for the data count to arrive */
+                smp_mb();
-        ret = rxrpc_call_read_data(call, bp, 4, RXRPC_CALL_READ_BLOCK);
+                server->cb_break_tail =
-        if (ret < 0)
+                        (server->cb_break_tail + 1) &
-                goto read_failed;
+                        (ARRAY_SIZE(server->cb_break) - 1);
-        desc->actual = ntohl(bp[0]);
-        if (desc->actual != desc->size) {
-                ret = -EBADMSG;
-                goto abort;
        }
-        /* call the app to read the actual data */
+        ASSERT(ncallbacks > 0);
-        rxrpc_call_reset_scratch(call);
+        wake_up_nr(&server->cb_break_waitq, ncallbacks);
-        ret = rxrpc_call_read_data(call, desc->buffer, desc->actual,
-                                   RXRPC_CALL_READ_BLOCK);
-        if (ret < 0)
-                goto read_failed;
-        /* wait for the rest of the reply to completely arrive */
-        rxrpc_call_reset_scratch(call);
-        bp = rxrpc_call_alloc_scratch(call, 120);
-        ret = rxrpc_call_read_data(call, bp, 120,
-                                   RXRPC_CALL_READ_BLOCK |
-                                   RXRPC_CALL_READ_ALL);
-        if (ret < 0)
-                goto read_failed;
-        /* unmarshall the reply */
-        vnode->status.if_version        = ntohl(*bp++);
-        vnode->status.type              = ntohl(*bp++);
-        vnode->status.nlink             = ntohl(*bp++);
-        vnode->status.size              = ntohl(*bp++);
-        vnode->status.version           = ntohl(*bp++);
-        vnode->status.author            = ntohl(*bp++);
-        vnode->status.owner             = ntohl(*bp++);
-        vnode->status.caller_access     = ntohl(*bp++);
-        vnode->status.anon_access       = ntohl(*bp++);
-        vnode->status.mode              = ntohl(*bp++);
-        vnode->status.parent.vid        = desc->fid.vid;
-        vnode->status.parent.vnode      = ntohl(*bp++);
-        vnode->status.parent.unique     = ntohl(*bp++);
-        bp++; /* seg size */
-        vnode->status.mtime_client      = ntohl(*bp++);
-        vnode->status.mtime_server      = ntohl(*bp++);
-        bp++; /* group */
-        bp++; /* sync counter */
-        vnode->status.version |= ((unsigned long long) ntohl(*bp++)) << 32;
-        bp++; /* spare2 */
-        bp++; /* spare3 */
-        bp++; /* spare4 */
-        vnode->cb_version               = ntohl(*bp++);
+        return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
-        vnode->cb_expiry                = ntohl(*bp++);
+}
-        vnode->cb_type                  = ntohl(*bp++);
-        if (volsync) {
-                volsync->creation       = ntohl(*bp++);
-                bp++; /* spare2 */
-                bp++; /* spare3 */
-                bp++; /* spare4 */
-                bp++; /* spare5 */
-                bp++; /* spare6 */
-        }
-        /* success */
+/*
-        ret = 0;
+ * deliver reply data to an FS.CreateFile or an FS.MakeDir
+ */
- out_unwait:
+static int afs_deliver_fs_create_vnode(struct afs_call *call,
-        set_current_state(TASK_RUNNING);
+                                       struct sk_buff *skb, bool last)
-        remove_wait_queue(&call->waitq,&myself);
+{
-        rxrpc_put_call(call);
+        struct afs_vnode *vnode = call->reply;
- out_put_conn:
+        const __be32 *bp;
-        afs_server_release_callslot(server, &callslot);
- out:
+        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-        _leave(" = %d", ret);
-        return ret;
- read_failed:
-        if (ret == -ECONNABORTED) {
-                ret = call->app_errno;
-                goto out_unwait;
-        }
- abort:
+        afs_transfer_reply(call, skb);
-        set_current_state(TASK_UNINTERRUPTIBLE);
+        if (!last)
-        rxrpc_call_abort(call, ret);
+                return 0;
-        schedule();
-        goto out_unwait;
-} /* end afs_rxfs_fetch_file_data() */
+        if (call->reply_size != call->reply_max)
+                return -EBADMSG;
+        /* unmarshall the reply once we've received all of it */
+        bp = call->buffer;
+        xdr_decode_AFSFid(&bp, call->reply2);
+        xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL);
+        xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+        xdr_decode_AFSCallBack_raw(&bp, call->reply4);
+        /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+        _leave(" = 0 [done]");
+        return 0;
+}
+/*
+ * FS.CreateFile and FS.MakeDir operation type
+ */
+static const struct afs_call_type afs_RXFSCreateXXXX = {
+        .name           = "FS.CreateXXXX",
+        .deliver        = afs_deliver_fs_create_vnode,
+        .abort_to_error = afs_abort_to_error,
+        .destructor     = afs_flat_call_destructor,
+};
-/*****************************************************************************/
 /*
- * ask the AFS fileserver to discard a callback request on a file
+ * create a file or make a directory
 */
-int afs_rxfs_give_up_callback(struct afs_server *server,
+int afs_fs_create(struct afs_server *server,
-                              struct afs_vnode *vnode)
+                  struct key *key,
+                  struct afs_vnode *vnode,
+                  const char *name,
+                  umode_t mode,
+                  struct afs_fid *newfid,
+                  struct afs_file_status *newstatus,
+                  struct afs_callback *newcb,
+                  const struct afs_wait_mode *wait_mode)
 {
-        struct afs_server_callslot callslot;
+        struct afs_call *call;
-        struct rxrpc_call *call;
+        size_t namesz, reqsz, padsz;
-        struct kvec piov[1];
-        size_t sent;
-        int ret;
        __be32 *bp;
-        DECLARE_WAITQUEUE(myself, current);
+        _enter("");
-        _enter("%p,{%u,%u,%u}",
+        namesz = strlen(name);
-               server, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
+        padsz = (4 - (namesz & 3)) & 3;
+        reqsz = (5 * 4) + namesz + padsz + (6 * 4);
-        /* get hold of the fileserver connection */
+        call = afs_alloc_flat_call(&afs_RXFSCreateXXXX, reqsz,
-        ret = afs_server_request_callslot(server, &callslot);
+                                   (3 + 21 + 21 + 3 + 6) * 4);
-        if (ret < 0)
+        if (!call)
-                goto out;
+                return -ENOMEM;
-        /* create a call through that connection */
+        call->key = key;
-        ret = rxrpc_create_call(callslot.conn, NULL, NULL, afs_rxfs_aemap, &call);
+        call->reply = vnode;
-        if (ret < 0) {
+        call->reply2 = newfid;
-                printk("kAFS: Unable to create call: %d\n", ret);
+        call->reply3 = newstatus;
-                goto out_put_conn;
+        call->reply4 = newcb;
+        call->service_id = FS_SERVICE;
+        call->port = htons(AFS_FS_PORT);
+        /* marshall the parameters */
+        bp = call->request;
+        *bp++ = htonl(S_ISDIR(mode) ? FSMAKEDIR : FSCREATEFILE);
+        *bp++ = htonl(vnode->fid.vid);
+        *bp++ = htonl(vnode->fid.vnode);
+        *bp++ = htonl(vnode->fid.unique);
+        *bp++ = htonl(namesz);
+        memcpy(bp, name, namesz);
+        bp = (void *) bp + namesz;
+        if (padsz > 0) {
+                memset(bp, 0, padsz);
+                bp = (void *) bp + padsz;
        }
-        call->app_opcode = FSGIVEUPCALLBACKS;
+        *bp++ = htonl(AFS_SET_MODE);
+        *bp++ = 0; /* mtime */
+        *bp++ = 0; /* owner */
+        *bp++ = 0; /* group */
+        *bp++ = htonl(mode & S_IALLUGO); /* unix mode */
+        *bp++ = 0; /* segment size */
-        /* we want to get event notifications from the call */
+        return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
-        add_wait_queue(&call->waitq, &myself);
+}
-        /* marshall the parameters */
+/*
-        bp = rxrpc_call_alloc_scratch(call, (1 + 4 + 4) * 4);
+ * deliver reply data to an FS.RemoveFile or FS.RemoveDir
+ */
+static int afs_deliver_fs_remove(struct afs_call *call,
+                                 struct sk_buff *skb, bool last)
+{
+        struct afs_vnode *vnode = call->reply;
+        const __be32 *bp;
-        piov[0].iov_len = (1 + 4 + 4) * 4;
+        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-        piov[0].iov_base = bp;
-        *bp++ = htonl(FSGIVEUPCALLBACKS);
+        afs_transfer_reply(call, skb);
-        *bp++ = htonl(1);
+        if (!last)
+                return 0;
+        if (call->reply_size != call->reply_max)
+                return -EBADMSG;
+        /* unmarshall the reply once we've received all of it */
+        bp = call->buffer;
+        xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+        /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+        _leave(" = 0 [done]");
+        return 0;
+}
+/*
+ * FS.RemoveDir/FS.RemoveFile operation type
+ */
+static const struct afs_call_type afs_RXFSRemoveXXXX = {
+        .name           = "FS.RemoveXXXX",
+        .deliver        = afs_deliver_fs_remove,
+        .abort_to_error = afs_abort_to_error,
+        .destructor     = afs_flat_call_destructor,
+};
+/*
+ * remove a file or directory
+ */
+int afs_fs_remove(struct afs_server *server,
+                  struct key *key,
+                  struct afs_vnode *vnode,
+                  const char *name,
+                  bool isdir,
+                  const struct afs_wait_mode *wait_mode)
+{
+        struct afs_call *call;
+        size_t namesz, reqsz, padsz;
+        __be32 *bp;
+        _enter("");
+        namesz = strlen(name);
+        padsz = (4 - (namesz & 3)) & 3;
+        reqsz = (5 * 4) + namesz + padsz;
+        call = afs_alloc_flat_call(&afs_RXFSRemoveXXXX, reqsz, (21 + 6) * 4);
+        if (!call)
+                return -ENOMEM;
+        call->key = key;
+        call->reply = vnode;
+        call->service_id = FS_SERVICE;
+        call->port = htons(AFS_FS_PORT);
+        /* marshall the parameters */
+        bp = call->request;
+        *bp++ = htonl(isdir ? FSREMOVEDIR : FSREMOVEFILE);
        *bp++ = htonl(vnode->fid.vid);
        *bp++ = htonl(vnode->fid.vnode);
        *bp++ = htonl(vnode->fid.unique);
-        *bp++ = htonl(1);
+        *bp++ = htonl(namesz);
-        *bp++ = htonl(vnode->cb_version);
+        memcpy(bp, name, namesz);
-        *bp++ = htonl(vnode->cb_expiry);
+        bp = (void *) bp + namesz;
-        *bp++ = htonl(vnode->cb_type);
+        if (padsz > 0) {
+                memset(bp, 0, padsz);
-        /* send the parameters to the server */
+                bp = (void *) bp + padsz;
-        ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-                                    0, &sent);
-        if (ret < 0)
-                goto abort;
-        /* wait for the reply to completely arrive */
-        for (;;) {
-                set_current_state(TASK_INTERRUPTIBLE);
-                if (call->app_call_state != RXRPC_CSTATE_CLNT_RCV_REPLY ||
-                    signal_pending(current))
-                        break;
-                schedule();
        }
-        set_current_state(TASK_RUNNING);
-        ret = -EINTR;
+        return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
-        if (signal_pending(current))
+}
-                goto abort;
-        switch (call->app_call_state) {
+/*
-        case RXRPC_CSTATE_ERROR:
+ * deliver reply data to an FS.Link
-                ret = call->app_errno;
+ */
-                goto out_unwait;
+static int afs_deliver_fs_link(struct afs_call *call,
+                               struct sk_buff *skb, bool last)
+{
+        struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
+        const __be32 *bp;
-        case RXRPC_CSTATE_CLNT_GOT_REPLY:
+        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-                ret = 0;
-                goto out_unwait;
-        default:
+        afs_transfer_reply(call, skb);
-                BUG();
+        if (!last)
-        }
+                return 0;
+        if (call->reply_size != call->reply_max)
+                return -EBADMSG;
+        /* unmarshall the reply once we've received all of it */
+        bp = call->buffer;
+        xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+        xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode);
+        /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+        _leave(" = 0 [done]");
+        return 0;
+}
+/*
+ * FS.Link operation type
+ */
+static const struct afs_call_type afs_RXFSLink = {
+        .name           = "FS.Link",
+        .deliver        = afs_deliver_fs_link,
+        .abort_to_error = afs_abort_to_error,
+        .destructor     = afs_flat_call_destructor,
+};
- out_unwait:
-        set_current_state(TASK_RUNNING);
-        remove_wait_queue(&call->waitq, &myself);
-        rxrpc_put_call(call);
- out_put_conn:
-        afs_server_release_callslot(server, &callslot);
- out:
-        _leave("");
-        return ret;
- abort:
-        set_current_state(TASK_UNINTERRUPTIBLE);
-        rxrpc_call_abort(call, ret);
-        schedule();
-        goto out_unwait;
-} /* end afs_rxfs_give_up_callback() */
-/*****************************************************************************/
 /*
- * look a filename up in a directory
+ * make a hard link
- * - this operation doesn't seem to work correctly in OpenAFS server 1.2.2
 */
-#if 0
+int afs_fs_link(struct afs_server *server,
-int afs_rxfs_lookup(struct afs_server *server,
+                struct key *key,
-                    struct afs_vnode *dir,
+                struct afs_vnode *dvnode,
-                    const char *filename,
+                struct afs_vnode *vnode,
-                    struct afs_vnode *vnode,
+                const char *name,
-                    struct afs_volsync *volsync)
+                const struct afs_wait_mode *wait_mode)
 {
-        struct rxrpc_connection *conn;
+        struct afs_call *call;
-        struct rxrpc_call *call;
+        size_t namesz, reqsz, padsz;
-        struct kvec piov[3];
+        __be32 *bp;
-        size_t sent;
-        int ret;
-        u32 *bp, zero;
-        DECLARE_WAITQUEUE(myself, current);
+        _enter("");
-        kenter("%p,{%u,%u,%u},%s",
+        namesz = strlen(name);
-               server, fid->vid, fid->vnode, fid->unique, filename);
+        padsz = (4 - (namesz & 3)) & 3;
+        reqsz = (5 * 4) + namesz + padsz + (3 * 4);
-        /* get hold of the fileserver connection */
+        call = afs_alloc_flat_call(&afs_RXFSLink, reqsz, (21 + 21 + 6) * 4);
-        ret = afs_server_get_fsconn(server, &conn);
+        if (!call)
-        if (ret < 0)
+                return -ENOMEM;
-                goto out;
-        /* create a call through that connection */
+        call->key = key;
-        ret = rxrpc_create_call(conn, NULL, NULL, afs_rxfs_aemap, &call);
+        call->reply = dvnode;
-        if (ret < 0) {
+        call->reply2 = vnode;
-                printk("kAFS: Unable to create call: %d\n", ret);
+        call->service_id = FS_SERVICE;
-                goto out_put_conn;
+        call->port = htons(AFS_FS_PORT);
+        /* marshall the parameters */
+        bp = call->request;
+        *bp++ = htonl(FSLINK);
+        *bp++ = htonl(dvnode->fid.vid);
+        *bp++ = htonl(dvnode->fid.vnode);
+        *bp++ = htonl(dvnode->fid.unique);
+        *bp++ = htonl(namesz);
+        memcpy(bp, name, namesz);
+        bp = (void *) bp + namesz;
+        if (padsz > 0) {
+                memset(bp, 0, padsz);
+                bp = (void *) bp + padsz;
        }
-        call->app_opcode = FSLOOKUP;
+        *bp++ = htonl(vnode->fid.vid);
+        *bp++ = htonl(vnode->fid.vnode);
+        *bp++ = htonl(vnode->fid.unique);
+        return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
+/*
+ * deliver reply data to an FS.Symlink
+ */
+static int afs_deliver_fs_symlink(struct afs_call *call,
+                                  struct sk_buff *skb, bool last)
+{
+        struct afs_vnode *vnode = call->reply;
+        const __be32 *bp;
-        /* we want to get event notifications from the call */
+        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-        add_wait_queue(&call->waitq,&myself);
+        afs_transfer_reply(call, skb);
+        if (!last)
+                return 0;
+        if (call->reply_size != call->reply_max)
+                return -EBADMSG;
+        /* unmarshall the reply once we've received all of it */
+        bp = call->buffer;
+        xdr_decode_AFSFid(&bp, call->reply2);
+        xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL);
+        xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+        /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+        _leave(" = 0 [done]");
+        return 0;
+}
+/*
+ * FS.Symlink operation type
+ */
+static const struct afs_call_type afs_RXFSSymlink = {
+        .name           = "FS.Symlink",
+        .deliver        = afs_deliver_fs_symlink,
+        .abort_to_error = afs_abort_to_error,
+        .destructor     = afs_flat_call_destructor,
+};
+/*
+ * create a symbolic link
+ */
+int afs_fs_symlink(struct afs_server *server,
+                   struct key *key,
+                   struct afs_vnode *vnode,
+                   const char *name,
+                   const char *contents,
+                   struct afs_fid *newfid,
+                   struct afs_file_status *newstatus,
+                   const struct afs_wait_mode *wait_mode)
+{
+        struct afs_call *call;
+        size_t namesz, reqsz, padsz, c_namesz, c_padsz;
+        __be32 *bp;
+        _enter("");
+        namesz = strlen(name);
+        padsz = (4 - (namesz & 3)) & 3;
+        c_namesz = strlen(contents);
+        c_padsz = (4 - (c_namesz & 3)) & 3;
+        reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4);
+        call = afs_alloc_flat_call(&afs_RXFSSymlink, reqsz,
+                                   (3 + 21 + 21 + 6) * 4);
+        if (!call)
+                return -ENOMEM;
+        call->key = key;
+        call->reply = vnode;
+        call->reply2 = newfid;
+        call->reply3 = newstatus;
+        call->service_id = FS_SERVICE;
+        call->port = htons(AFS_FS_PORT);
        /* marshall the parameters */
-        bp = rxrpc_call_alloc_scratch(call, 20);
+        bp = call->request;
+        *bp++ = htonl(FSSYMLINK);
-        zero = 0;
+        *bp++ = htonl(vnode->fid.vid);
+        *bp++ = htonl(vnode->fid.vnode);
-        piov[0].iov_len = 20;
+        *bp++ = htonl(vnode->fid.unique);
-        piov[0].iov_base = bp;
+        *bp++ = htonl(namesz);
-        piov[1].iov_len = strlen(filename);
+        memcpy(bp, name, namesz);
-        piov[1].iov_base = (char *) filename;
+        bp = (void *) bp + namesz;
-        piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3;
+        if (padsz > 0) {
-        piov[2].iov_base = &zero;
+                memset(bp, 0, padsz);
+                bp = (void *) bp + padsz;
-        *bp++ = htonl(FSLOOKUP);
-        *bp++ = htonl(dirfid->vid);
-        *bp++ = htonl(dirfid->vnode);
-        *bp++ = htonl(dirfid->unique);
-        *bp++ = htonl(piov[1].iov_len);
-        /* send the parameters to the server */
-        ret = rxrpc_call_write_data(call, 3, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-                                    0, &sent);
-        if (ret < 0)
-                goto abort;
-        /* wait for the reply to completely arrive */
-        bp = rxrpc_call_alloc_scratch(call, 220);
-        ret = rxrpc_call_read_data(call, bp, 220,
-                                   RXRPC_CALL_READ_BLOCK |
-                                   RXRPC_CALL_READ_ALL);
-        if (ret < 0) {
-                if (ret == -ECONNABORTED) {
-                        ret = call->app_errno;
-                        goto out_unwait;
-                }
-                goto abort;
        }
+        *bp++ = htonl(c_namesz);
+        memcpy(bp, contents, c_namesz);
+        bp = (void *) bp + c_namesz;
+        if (c_padsz > 0) {
+                memset(bp, 0, c_padsz);
+                bp = (void *) bp + c_padsz;
+        }
+        *bp++ = htonl(AFS_SET_MODE);
+        *bp++ = 0; /* mtime */
+        *bp++ = 0; /* owner */
+        *bp++ = 0; /* group */
+        *bp++ = htonl(S_IRWXUGO); /* unix mode */
+        *bp++ = 0; /* segment size */
-        /* unmarshall the reply */
+        return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
-        fid->vid                = ntohl(*bp++);
+}
-        fid->vnode              = ntohl(*bp++);
-        fid->unique             = ntohl(*bp++);
-        vnode->status.if_version        = ntohl(*bp++);
+/*
-        vnode->status.type              = ntohl(*bp++);
+ * deliver reply data to an FS.Rename
-        vnode->status.nlink             = ntohl(*bp++);
+ */
-        vnode->status.size              = ntohl(*bp++);
+static int afs_deliver_fs_rename(struct afs_call *call,
-        vnode->status.version           = ntohl(*bp++);
+                                  struct sk_buff *skb, bool last)
-        vnode->status.author            = ntohl(*bp++);
+{
-        vnode->status.owner             = ntohl(*bp++);
+        struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
-        vnode->status.caller_access     = ntohl(*bp++);
+        const __be32 *bp;
-        vnode->status.anon_access       = ntohl(*bp++);
-        vnode->status.mode              = ntohl(*bp++);
-        vnode->status.parent.vid        = dirfid->vid;
-        vnode->status.parent.vnode      = ntohl(*bp++);
-        vnode->status.parent.unique     = ntohl(*bp++);
-        bp++; /* seg size */
-        vnode->status.mtime_client      = ntohl(*bp++);
-        vnode->status.mtime_server      = ntohl(*bp++);
-        bp++; /* group */
-        bp++; /* sync counter */
-        vnode->status.version |= ((unsigned long long) ntohl(*bp++)) << 32;
-        bp++; /* spare2 */
-        bp++; /* spare3 */
-        bp++; /* spare4 */
-        dir->status.if_version          = ntohl(*bp++);
+        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-        dir->status.type                = ntohl(*bp++);
-        dir->status.nlink               = ntohl(*bp++);
+        afs_transfer_reply(call, skb);
-        dir->status.size                = ntohl(*bp++);
+        if (!last)
-        dir->status.version             = ntohl(*bp++);
+                return 0;
-        dir->status.author              = ntohl(*bp++);
-        dir->status.owner               = ntohl(*bp++);
+        if (call->reply_size != call->reply_max)
-        dir->status.caller_access       = ntohl(*bp++);
+                return -EBADMSG;
-        dir->status.anon_access         = ntohl(*bp++);
-        dir->status.mode                = ntohl(*bp++);
+        /* unmarshall the reply once we've received all of it */
-        dir->status.parent.vid          = dirfid->vid;
+        bp = call->buffer;
-        dir->status.parent.vnode        = ntohl(*bp++);
+        xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode);
-        dir->status.parent.unique       = ntohl(*bp++);
+        if (new_dvnode != orig_dvnode)
-        bp++; /* seg size */
+                xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode);
-        dir->status.mtime_client        = ntohl(*bp++);
+        /* xdr_decode_AFSVolSync(&bp, call->replyX); */
-        dir->status.mtime_server        = ntohl(*bp++);
-        bp++; /* group */
+        _leave(" = 0 [done]");
-        bp++; /* sync counter */
+        return 0;
-        dir->status.version |= ((unsigned long long) ntohl(*bp++)) << 32;
+}
-        bp++; /* spare2 */
-        bp++; /* spare3 */
+/*
-        bp++; /* spare4 */
+ * FS.Rename operation type
+ */
+static const struct afs_call_type afs_RXFSRename = {
+        .name           = "FS.Rename",
+        .deliver        = afs_deliver_fs_rename,
+        .abort_to_error = afs_abort_to_error,
+        .destructor     = afs_flat_call_destructor,
+};
+/*
+ * create a symbolic link
+ */
+int afs_fs_rename(struct afs_server *server,
+                  struct key *key,
+                  struct afs_vnode *orig_dvnode,
+                  const char *orig_name,
+                  struct afs_vnode *new_dvnode,
+                  const char *new_name,
+                  const struct afs_wait_mode *wait_mode)
+{
+        struct afs_call *call;
+        size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz;
+        __be32 *bp;
+        _enter("");
+        o_namesz = strlen(orig_name);
+        o_padsz = (4 - (o_namesz & 3)) & 3;
+        n_namesz = strlen(new_name);
+        n_padsz = (4 - (n_namesz & 3)) & 3;
+        reqsz = (4 * 4) +
+                4 + o_namesz + o_padsz +
+                (3 * 4) +
+                4 + n_namesz + n_padsz;
+        call = afs_alloc_flat_call(&afs_RXFSRename, reqsz, (21 + 21 + 6) * 4);
+        if (!call)
+                return -ENOMEM;
+        call->key = key;
+        call->reply = orig_dvnode;
+        call->reply2 = new_dvnode;
+        call->service_id = FS_SERVICE;
+        call->port = htons(AFS_FS_PORT);
+        /* marshall the parameters */
+        bp = call->request;
+        *bp++ = htonl(FSRENAME);
+        *bp++ = htonl(orig_dvnode->fid.vid);
+        *bp++ = htonl(orig_dvnode->fid.vnode);
+        *bp++ = htonl(orig_dvnode->fid.unique);
+        *bp++ = htonl(o_namesz);
+        memcpy(bp, orig_name, o_namesz);
+        bp = (void *) bp + o_namesz;
+        if (o_padsz > 0) {
+                memset(bp, 0, o_padsz);
+                bp = (void *) bp + o_padsz;
+        }
-        callback->fid           = *fid;
+        *bp++ = htonl(new_dvnode->fid.vid);
-        callback->version       = ntohl(*bp++);
+        *bp++ = htonl(new_dvnode->fid.vnode);
-        callback->expiry        = ntohl(*bp++);
+        *bp++ = htonl(new_dvnode->fid.unique);
-        callback->type          = ntohl(*bp++);
+        *bp++ = htonl(n_namesz);
+        memcpy(bp, new_name, n_namesz);
-        if (volsync) {
+        bp = (void *) bp + n_namesz;
-                volsync->creation       = ntohl(*bp++);
+        if (n_padsz > 0) {
-                bp++; /* spare2 */
+                memset(bp, 0, n_padsz);
-                bp++; /* spare3 */
+                bp = (void *) bp + n_padsz;
-                bp++; /* spare4 */
-                bp++; /* spare5 */
-                bp++; /* spare6 */
        }
-        /* success */
+        return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
-        ret = 0;
+}
- out_unwait:
-        set_current_state(TASK_RUNNING);
-        remove_wait_queue(&call->waitq, &myself);
-        rxrpc_put_call(call);
- out_put_conn:
-        afs_server_release_fsconn(server, conn);
- out:
-        kleave("");
-        return ret;
- abort:
-        set_current_state(TASK_UNINTERRUPTIBLE);
-        rxrpc_call_abort(call, ret);
-        schedule();
-        goto out_unwait;
-} /* end afs_rxfs_lookup() */
-#endif
diff --git a/fs/afs/fsclient.h b/fs/afs/fsclient.h
deleted file mode 100644
index 8ba3e749ee3c..000000000000
--- a/fs/afs/fsclient.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* fsclient.h: AFS File Server client stub declarations
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _LINUX_AFS_FSCLIENT_H
-#define _LINUX_AFS_FSCLIENT_H
-#include "server.h"
-extern int afs_rxfs_get_volume_info(struct afs_server *server,
-                                    const char *name,
-                                    struct afs_volume_info *vinfo);
-extern int afs_rxfs_fetch_file_status(struct afs_server *server,
-                                      struct afs_vnode *vnode,
-                                      struct afs_volsync *volsync);
-struct afs_rxfs_fetch_descriptor {
-        struct afs_fid  fid;            /* file ID to fetch */
-        size_t          size;           /* total number of bytes to fetch */
-        off_t           offset;         /* offset in file to start from */
-        void            *buffer;        /* read buffer */
-        size_t          actual;         /* actual size sent back by server */
-};
-extern int afs_rxfs_fetch_file_data(struct afs_server *server,
-                                    struct afs_vnode *vnode,
-                                    struct afs_rxfs_fetch_descriptor *desc,
-                                    struct afs_volsync *volsync);
-extern int afs_rxfs_give_up_callback(struct afs_server *server,
-                                     struct afs_vnode *vnode);
-/* this doesn't appear to work in OpenAFS server */
-extern int afs_rxfs_lookup(struct afs_server *server,
-                           struct afs_vnode *dir,
-                           const char *filename,
-                           struct afs_vnode *vnode,
-                           struct afs_volsync *volsync);
-/* this is apparently mis-implemented in OpenAFS server */
-extern int afs_rxfs_get_root_volume(struct afs_server *server,
-                                    char *buf,
-                                    size_t *buflen);
-#endif /* _LINUX_AFS_FSCLIENT_H */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 9d9bca6c28b5..c184a4ee5995 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -19,9 +19,6 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include "volume.h"
-#include "vnode.h"
-#include "super.h"
 #include "internal.h"
 struct afs_iget_data {
@@ -29,26 +26,25 @@ struct afs_iget_data {
        struct afs_volume       *volume;        /* volume on which resides */
 };
-/*****************************************************************************/
 /*
 * map the AFS file status to the inode member variables
 */
-static int afs_inode_map_status(struct afs_vnode *vnode)
+static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 {
        struct inode *inode = AFS_VNODE_TO_I(vnode);
-        _debug("FS: ft=%d lk=%d sz=%Zu ver=%Lu mod=%hu",
+        _debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
               vnode->status.type,
               vnode->status.nlink,
-               vnode->status.size,
+               (unsigned long long) vnode->status.size,
-               vnode->status.version,
+               vnode->status.data_version,
               vnode->status.mode);
        switch (vnode->status.type) {
        case AFS_FTYPE_FILE:
                inode->i_mode   = S_IFREG | vnode->status.mode;
                inode->i_op     = &afs_file_inode_operations;
-                inode->i_fop    = &generic_ro_fops;
+                inode->i_fop    = &afs_file_operations;
                break;
        case AFS_FTYPE_DIR:
                inode->i_mode   = S_IFDIR | vnode->status.mode;
@@ -77,9 +73,9 @@ static int afs_inode_map_status(struct afs_vnode *vnode)
        /* check to see whether a symbolic link is really a mountpoint */
        if (vnode->status.type == AFS_FTYPE_SYMLINK) {
-                afs_mntpt_check_symlink(vnode);
+                afs_mntpt_check_symlink(vnode, key);
-                if (vnode->flags & AFS_VNODE_MOUNTPOINT) {
+                if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) {
                        inode->i_mode   = S_IFDIR | vnode->status.mode;
                        inode->i_op     = &afs_mntpt_inode_operations;
                        inode->i_fop    = &afs_mntpt_file_operations;
@@ -87,30 +83,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode)
        }
        return 0;
-} /* end afs_inode_map_status() */
+}
-/*****************************************************************************/
-/*
- * attempt to fetch the status of an inode, coelescing multiple simultaneous
- * fetches
- */
-static int afs_inode_fetch_status(struct inode *inode)
-{
-        struct afs_vnode *vnode;
-        int ret;
-        vnode = AFS_FS_I(inode);
-        ret = afs_vnode_fetch_status(vnode);
-        if (ret == 0)
-                ret = afs_inode_map_status(vnode);
-        return ret;
-} /* end afs_inode_fetch_status() */
-/*****************************************************************************/
 /*
 * iget5() comparator
 */
@@ -120,9 +94,8 @@ static int afs_iget5_test(struct inode *inode, void *opaque)
        return inode->i_ino == data->fid.vnode &&
                inode->i_version == data->fid.unique;
-} /* end afs_iget5_test() */
+}
-/*****************************************************************************/
 /*
 * iget5() inode initialiser
 */
@@ -137,14 +110,14 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
        vnode->volume = data->volume;
        return 0;
-} /* end afs_iget5_set() */
+}
-/*****************************************************************************/
 /*
 * inode retrieval
 */
-inline int afs_iget(struct super_block *sb, struct afs_fid *fid,
+struct inode *afs_iget(struct super_block *sb, struct key *key,
-                    struct inode **_inode)
+                       struct afs_fid *fid, struct afs_file_status *status,
+                       struct afs_callback *cb)
 {
        struct afs_iget_data data = { .fid = *fid };
        struct afs_super_info *as;
@@ -161,20 +134,18 @@ inline int afs_iget(struct super_block *sb, struct afs_fid *fid,
                             &data);
        if (!inode) {
                _leave(" = -ENOMEM");
-                return -ENOMEM;
+                return ERR_PTR(-ENOMEM);
        }
+        _debug("GOT INODE %p { vl=%x vn=%x, u=%x }",
+               inode, fid->vid, fid->vnode, fid->unique);
        vnode = AFS_FS_I(inode);
        /* deal with an existing inode */
        if (!(inode->i_state & I_NEW)) {
-                ret = afs_vnode_fetch_status(vnode);
+                _leave(" = %p", inode);
-                if (ret==0)
+                return inode;
-                        *_inode = inode;
-                else
-                        iput(inode);
-                _leave(" = %d", ret);
-                return ret;
        }
 #ifdef AFS_CACHING_SUPPORT
@@ -186,100 +157,185 @@ inline int afs_iget(struct super_block *sb, struct afs_fid *fid,
                               &vnode->cache);
 #endif
-        /* okay... it's a new inode */
+        if (!status) {
-        inode->i_flags |= S_NOATIME;
+                /* it's a remotely extant inode */
-        vnode->flags |= AFS_VNODE_CHANGED;
+                set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-        ret = afs_inode_fetch_status(inode);
+                ret = afs_vnode_fetch_status(vnode, NULL, key);
-        if (ret<0)
+                if (ret < 0)
+                        goto bad_inode;
+        } else {
+                /* it's an inode we just created */
+                memcpy(&vnode->status, status, sizeof(vnode->status));
+                if (!cb) {
+                        /* it's a symlink we just created (the fileserver
+                         * didn't give us a callback) */
+                        vnode->cb_version = 0;
+                        vnode->cb_expiry = 0;
+                        vnode->cb_type = 0;
+                        vnode->cb_expires = get_seconds();
+                } else {
+                        vnode->cb_version = cb->version;
+                        vnode->cb_expiry = cb->expiry;
+                        vnode->cb_type = cb->type;
+                        vnode->cb_expires = vnode->cb_expiry + get_seconds();
+                }
+        }
+        ret = afs_inode_map_status(vnode, key);
+        if (ret < 0)
                goto bad_inode;
        /* success */
+        clear_bit(AFS_VNODE_UNSET, &vnode->flags);
+        inode->i_flags |= S_NOATIME;
        unlock_new_inode(inode);
+        _leave(" = %p [CB { v=%u t=%u }]", inode, vnode->cb_version, vnode->cb_type);
-        *_inode = inode;
+        return inode;
-        _leave(" = 0 [CB { v=%u x=%lu t=%u }]",
-               vnode->cb_version,
-               vnode->cb_timeout.timo_jif,
-               vnode->cb_type);
-        return 0;
        /* failure */
- bad_inode:
+bad_inode:
        make_bad_inode(inode);
        unlock_new_inode(inode);
        iput(inode);
        _leave(" = %d [bad]", ret);
+        return ERR_PTR(ret);
+}
+/*
+ * validate a vnode/inode
+ * - there are several things we need to check
+ *   - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
+ *     symlink)
+ *   - parent dir metadata changed (security changes)
+ *   - dentry data changed (write, truncate)
+ *   - dentry metadata changed (security changes)
+ */
+int afs_validate(struct afs_vnode *vnode, struct key *key)
+{
+        int ret;
+        _enter("{v={%x:%u} fl=%lx},%x",
+               vnode->fid.vid, vnode->fid.vnode, vnode->flags,
+               key_serial(key));
+        if (vnode->cb_promised &&
+            !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
+            !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
+            !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
+                if (vnode->cb_expires < get_seconds() + 10) {
+                        _debug("callback expired");
+                        set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+                } else {
+                        goto valid;
+                }
+        }
+        if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+                goto valid;
+        mutex_lock(&vnode->validate_lock);
+        /* if the promise has expired, we need to check the server again to get
+         * a new promise - note that if the (parent) directory's metadata was
+         * changed then the security may be different and we may no longer have
+         * access */
+        if (!vnode->cb_promised ||
+            test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
+                _debug("not promised");
+                ret = afs_vnode_fetch_status(vnode, NULL, key);
+                if (ret < 0)
+                        goto error_unlock;
+                _debug("new promise [fl=%lx]", vnode->flags);
+        }
+        if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+                _debug("file already deleted");
+                ret = -ESTALE;
+                goto error_unlock;
+        }
+        /* if the vnode's data version number changed then its contents are
+         * different */
+        if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
+                _debug("zap data {%x:%d}", vnode->fid.vid, vnode->fid.vnode);
+                invalidate_remote_inode(&vnode->vfs_inode);
+        }
+        clear_bit(AFS_VNODE_MODIFIED, &vnode->flags);
+        mutex_unlock(&vnode->validate_lock);
+valid:
+        _leave(" = 0");
+        return 0;
+error_unlock:
+        mutex_unlock(&vnode->validate_lock);
+        _leave(" = %d", ret);
        return ret;
-} /* end afs_iget() */
+}
-/*****************************************************************************/
 /*
 * read the attributes of an inode
 */
 int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
                      struct kstat *stat)
 {
-        struct afs_vnode *vnode;
        struct inode *inode;
-        int ret;
        inode = dentry->d_inode;
        _enter("{ ino=%lu v=%lu }", inode->i_ino, inode->i_version);
-        vnode = AFS_FS_I(inode);
-        ret = afs_inode_fetch_status(inode);
-        if (ret == -ENOENT) {
-                _leave(" = %d [%d %p]",
-                       ret, atomic_read(&dentry->d_count), dentry->d_inode);
-                return ret;
-        }
-        else if (ret < 0) {
-                make_bad_inode(inode);
-                _leave(" = %d", ret);
-                return ret;
-        }
-        /* transfer attributes from the inode structure to the stat
-         * structure */
        generic_fillattr(inode, stat);
-        _leave(" = 0 CB { v=%u x=%u t=%u }",
-               vnode->cb_version,
-               vnode->cb_expiry,
-               vnode->cb_type);
        return 0;
-} /* end afs_inode_getattr() */
+}
-/*****************************************************************************/
 /*
 * clear an AFS inode
 */
 void afs_clear_inode(struct inode *inode)
 {
+        struct afs_permits *permits;
        struct afs_vnode *vnode;
        vnode = AFS_FS_I(inode);
-        _enter("ino=%lu { vn=%08x v=%u x=%u t=%u }",
+        _enter("{%x:%d.%d} v=%u x=%u t=%u }",
-               inode->i_ino,
+               vnode->fid.vid,
               vnode->fid.vnode,
+               vnode->fid.unique,
               vnode->cb_version,
               vnode->cb_expiry,
-               vnode->cb_type
+               vnode->cb_type);
-               );
-        BUG_ON(inode->i_ino != vnode->fid.vnode);
+        _debug("CLEAR INODE %p", inode);
-        afs_vnode_give_up_callback(vnode);
+        ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
+        afs_give_up_callback(vnode);
+        if (vnode->server) {
+                spin_lock(&vnode->server->fs_lock);
+                rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes);
+                spin_unlock(&vnode->server->fs_lock);
+                afs_put_server(vnode->server);
+                vnode->server = NULL;
+        }
+        ASSERT(!vnode->cb_promised);
 #ifdef AFS_CACHING_SUPPORT
        cachefs_relinquish_cookie(vnode->cache, 0);
        vnode->cache = NULL;
 #endif
+        mutex_lock(&vnode->permits_lock);
+        permits = vnode->permits;
+        rcu_assign_pointer(vnode->permits, NULL);
+        mutex_unlock(&vnode->permits_lock);
+        if (permits)
+                call_rcu(&permits->rcu, afs_zap_permits);
        _leave("");
-} /* end afs_clear_inode() */
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5151d5da2c2f..6dd3197d1d8d 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1,6 +1,6 @@
-/* internal.h: internal AFS stuff
+/* internal AFS stuff
 *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
@@ -9,48 +9,391 @@
 * 2 of the License, or (at your option) any later version.
 */
-#ifndef AFS_INTERNAL_H
-#define AFS_INTERNAL_H
 #include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/skbuff.h>
+#include <linux/rxrpc.h>
+#include <linux/key.h>
+#include "afs.h"
+#include "afs_vl.h"
+#define AFS_CELL_MAX_ADDRS 15
+struct afs_call;
+typedef enum {
+        AFS_VL_NEW,                     /* new, uninitialised record */
+        AFS_VL_CREATING,                /* creating record */
+        AFS_VL_VALID,                   /* record is pending */
+        AFS_VL_NO_VOLUME,               /* no such volume available */
+        AFS_VL_UPDATING,                /* update in progress */
+        AFS_VL_VOLUME_DELETED,          /* volume was deleted */
+        AFS_VL_UNCERTAIN,               /* uncertain state (update failed) */
+} __attribute__((packed)) afs_vlocation_state_t;
+struct afs_mount_params {
+        bool                    rwpath;         /* T if the parent should be considered R/W */
+        bool                    force;          /* T to force cell type */
+        afs_voltype_t           type;           /* type of volume requested */
+        int                     volnamesz;      /* size of volume name */
+        const char              *volname;       /* name of volume to mount */
+        struct afs_cell         *cell;          /* cell in which to find volume */
+        struct afs_volume       *volume;        /* volume record */
+        struct key              *key;           /* key to use for secure mounting */
+};
 /*
- * debug tracing
+ * definition of how to wait for the completion of an operation
 */
-#define kenter(FMT, a...)       printk("==> %s("FMT")\n",__FUNCTION__ , ## a)
+struct afs_wait_mode {
-#define kleave(FMT, a...)       printk("<== %s()"FMT"\n",__FUNCTION__ , ## a)
+        /* RxRPC received message notification */
-#define kdebug(FMT, a...)       printk(FMT"\n" , ## a)
+        void (*rx_wakeup)(struct afs_call *call);
-#define kproto(FMT, a...)       printk("### "FMT"\n" , ## a)
-#define knet(FMT, a...)         printk(FMT"\n" , ## a)
-#ifdef __KDEBUG
-#define _enter(FMT, a...)       kenter(FMT , ## a)
-#define _leave(FMT, a...)       kleave(FMT , ## a)
-#define _debug(FMT, a...)       kdebug(FMT , ## a)
-#define _proto(FMT, a...)       kproto(FMT , ## a)
-#define _net(FMT, a...)         knet(FMT , ## a)
-#else
-#define _enter(FMT, a...)       do { } while(0)
-#define _leave(FMT, a...)       do { } while(0)
-#define _debug(FMT, a...)       do { } while(0)
-#define _proto(FMT, a...)       do { } while(0)
-#define _net(FMT, a...)         do { } while(0)
-#endif
-static inline void afs_discard_my_signals(void)
+        /* synchronous call waiter and call dispatched notification */
-{
+        int (*wait)(struct afs_call *call);
-        while (signal_pending(current)) {
-                siginfo_t sinfo;
+        /* asynchronous call completion */
+        void (*async_complete)(void *reply, int error);
+};
+extern const struct afs_wait_mode afs_sync_call;
+extern const struct afs_wait_mode afs_async_call;
-                spin_lock_irq(&current->sighand->siglock);
+/*
-                dequeue_signal(current,&current->blocked, &sinfo);
+ * a record of an in-progress RxRPC call
-                spin_unlock_irq(&current->sighand->siglock);
+ */
-        }
+struct afs_call {
+        const struct afs_call_type *type;       /* type of call */
+        const struct afs_wait_mode *wait_mode;  /* completion wait mode */
+        wait_queue_head_t       waitq;          /* processes awaiting completion */
+        struct work_struct      async_work;     /* asynchronous work processor */
+        struct work_struct      work;           /* actual work processor */
+        struct sk_buff_head     rx_queue;       /* received packets */
+        struct rxrpc_call       *rxcall;        /* RxRPC call handle */
+        struct key              *key;           /* security for this call */
+        struct afs_server       *server;        /* server affected by incoming CM call */
+        void                    *request;       /* request data (first part) */
+        void                    *request2;      /* request data (second part) */
+        void                    *buffer;        /* reply receive buffer */
+        void                    *reply;         /* reply buffer (first part) */
+        void                    *reply2;        /* reply buffer (second part) */
+        void                    *reply3;        /* reply buffer (third part) */
+        void                    *reply4;        /* reply buffer (fourth part) */
+        enum {                                  /* call state */
+                AFS_CALL_REQUESTING,    /* request is being sent for outgoing call */
+                AFS_CALL_AWAIT_REPLY,   /* awaiting reply to outgoing call */
+                AFS_CALL_AWAIT_OP_ID,   /* awaiting op ID on incoming call */
+                AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */
+                AFS_CALL_REPLYING,      /* replying to incoming call */
+                AFS_CALL_AWAIT_ACK,     /* awaiting final ACK of incoming call */
+                AFS_CALL_COMPLETE,      /* successfully completed */
+                AFS_CALL_BUSY,          /* server was busy */
+                AFS_CALL_ABORTED,       /* call was aborted */
+                AFS_CALL_ERROR,         /* call failed due to error */
+        }                       state;
+        int                     error;          /* error code */
+        unsigned                request_size;   /* size of request data */
+        unsigned                reply_max;      /* maximum size of reply */
+        unsigned                reply_size;     /* current size of reply */
+        unsigned short          offset;         /* offset into received data store */
+        unsigned char           unmarshall;     /* unmarshalling phase */
+        bool                    incoming;       /* T if incoming call */
+        u16                     service_id;     /* RxRPC service ID to call */
+        __be16                  port;           /* target UDP port */
+        __be32                  operation_ID;   /* operation ID for an incoming call */
+        u32                     count;          /* count for use in unmarshalling */
+        __be32                  tmp;            /* place to extract temporary data */
+};
+struct afs_call_type {
+        const char *name;
+        /* deliver request or reply data to an call
+         * - returning an error will cause the call to be aborted
+         */
+        int (*deliver)(struct afs_call *call, struct sk_buff *skb,
+                       bool last);
+        /* map an abort code to an error number */
+        int (*abort_to_error)(u32 abort_code);
+        /* clean up a call */
+        void (*destructor)(struct afs_call *call);
+};
+/*
+ * AFS superblock private data
+ * - there's one superblock per volume
+ */
+struct afs_super_info {
+        struct afs_volume       *volume;        /* volume record */
+        char                    rwparent;       /* T if parent is R/W AFS volume */
+};
+static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
+{
+        return sb->s_fs_info;
 }
+extern struct file_system_type afs_fs_type;
+/*
+ * entry in the cached cell catalogue
+ */
+struct afs_cache_cell {
+        char            name[AFS_MAXCELLNAME];  /* cell name (padded with NULs) */
+        struct in_addr  vl_servers[15];         /* cached cell VL servers */
+};
+/*
+ * AFS cell record
+ */
+struct afs_cell {
+        atomic_t                usage;
+        struct list_head        link;           /* main cell list link */
+        struct key              *anonymous_key; /* anonymous user key for this cell */
+        struct list_head        proc_link;      /* /proc cell list link */
+        struct proc_dir_entry   *proc_dir;      /* /proc dir for this cell */
+#ifdef AFS_CACHING_SUPPORT
+        struct cachefs_cookie   *cache;         /* caching cookie */
+#endif
+        /* server record management */
+        rwlock_t                servers_lock;   /* active server list lock */
+        struct list_head        servers;        /* active server list */
+        /* volume location record management */
+        struct rw_semaphore     vl_sem;         /* volume management serialisation semaphore */
+        struct list_head        vl_list;        /* cell's active VL record list */
+        spinlock_t              vl_lock;        /* vl_list lock */
+        unsigned short          vl_naddrs;      /* number of VL servers in addr list */
+        unsigned short          vl_curr_svix;   /* current server index */
+        struct in_addr          vl_addrs[AFS_CELL_MAX_ADDRS];   /* cell VL server addresses */
+        char                    name[0];        /* cell name - must go last */
+};
+/*
+ * entry in the cached volume location catalogue
+ */
+struct afs_cache_vlocation {
+        /* volume name (lowercase, padded with NULs) */
+        uint8_t                 name[AFS_MAXVOLNAME + 1];
+        uint8_t                 nservers;       /* number of entries used in servers[] */
+        uint8_t                 vidmask;        /* voltype mask for vid[] */
+        uint8_t                 srvtmask[8];    /* voltype masks for servers[] */
+#define AFS_VOL_VTM_RW  0x01 /* R/W version of the volume is available (on this server) */
+#define AFS_VOL_VTM_RO  0x02 /* R/O version of the volume is available (on this server) */
+#define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */
+        afs_volid_t             vid[3];         /* volume IDs for R/W, R/O and Bak volumes */
+        struct in_addr          servers[8];     /* fileserver addresses */
+        time_t                  rtime;          /* last retrieval time */
+};
+/*
+ * volume -> vnode hash table entry
+ */
+struct afs_cache_vhash {
+        afs_voltype_t           vtype;          /* which volume variation */
+        uint8_t                 hash_bucket;    /* which hash bucket this represents */
+} __attribute__((packed));
+/*
+ * AFS volume location record
+ */
+struct afs_vlocation {
+        atomic_t                usage;
+        time_t                  time_of_death;  /* time at which put reduced usage to 0 */
+        struct list_head        link;           /* link in cell volume location list */
+        struct list_head        grave;          /* link in master graveyard list */
+        struct list_head        update;         /* link in master update list */
+        struct afs_cell         *cell;          /* cell to which volume belongs */
+#ifdef AFS_CACHING_SUPPORT
+        struct cachefs_cookie   *cache;         /* caching cookie */
+#endif
+        struct afs_cache_vlocation vldb;        /* volume information DB record */
+        struct afs_volume       *vols[3];       /* volume access record pointer (index by type) */
+        wait_queue_head_t       waitq;          /* status change waitqueue */
+        time_t                  update_at;      /* time at which record should be updated */
+        spinlock_t              lock;           /* access lock */
+        afs_vlocation_state_t   state;          /* volume location state */
+        unsigned short          upd_rej_cnt;    /* ENOMEDIUM count during update */
+        unsigned short          upd_busy_cnt;   /* EBUSY count during update */
+        bool                    valid;          /* T if valid */
+};
+/*
+ * AFS fileserver record
+ */
+struct afs_server {
+        atomic_t                usage;
+        time_t                  time_of_death;  /* time at which put reduced usage to 0 */
+        struct in_addr          addr;           /* server address */
+        struct afs_cell         *cell;          /* cell in which server resides */
+        struct list_head        link;           /* link in cell's server list */
+        struct list_head        grave;          /* link in master graveyard list */
+        struct rb_node          master_rb;      /* link in master by-addr tree */
+        struct rw_semaphore     sem;            /* access lock */
+        /* file service access */
+        struct rb_root          fs_vnodes;      /* vnodes backed by this server (ordered by FID) */
+        unsigned long           fs_act_jif;     /* time at which last activity occurred */
+        unsigned long           fs_dead_jif;    /* time at which no longer to be considered dead */
+        spinlock_t              fs_lock;        /* access lock */
+        int                     fs_state;       /* 0 or reason FS currently marked dead (-errno) */
+        /* callback promise management */
+        struct rb_root          cb_promises;    /* vnode expiration list (ordered earliest first) */
+        struct delayed_work     cb_updater;     /* callback updater */
+        struct delayed_work     cb_break_work;  /* collected break dispatcher */
+        wait_queue_head_t       cb_break_waitq; /* space available in cb_break waitqueue */
+        spinlock_t              cb_lock;        /* access lock */
+        struct afs_callback     cb_break[64];   /* ring of callbacks awaiting breaking */
+        atomic_t                cb_break_n;     /* number of pending breaks */
+        u8                      cb_break_head;  /* head of callback breaking ring */
+        u8                      cb_break_tail;  /* tail of callback breaking ring */
+};
+/*
+ * AFS volume access record
+ */
+struct afs_volume {
+        atomic_t                usage;
+        struct afs_cell         *cell;          /* cell to which belongs (unrefd ptr) */
+        struct afs_vlocation    *vlocation;     /* volume location */
+#ifdef AFS_CACHING_SUPPORT
+        struct cachefs_cookie   *cache;         /* caching cookie */
+#endif
+        afs_volid_t             vid;            /* volume ID */
+        afs_voltype_t           type;           /* type of volume */
+        char                    type_force;     /* force volume type (suppress R/O -> R/W) */
+        unsigned short          nservers;       /* number of server slots filled */
+        unsigned short          rjservers;      /* number of servers discarded due to -ENOMEDIUM */
+        struct afs_server       *servers[8];    /* servers on which volume resides (ordered) */
+        struct rw_semaphore     server_sem;     /* lock for accessing current server */
+};
+/*
+ * vnode catalogue entry
+ */
+struct afs_cache_vnode {
+        afs_vnodeid_t           vnode_id;       /* vnode ID */
+        unsigned                vnode_unique;   /* vnode ID uniquifier */
+        afs_dataversion_t       data_version;   /* data version */
+};
+/*
+ * AFS inode private data
+ */
+struct afs_vnode {
+        struct inode            vfs_inode;      /* the VFS's inode record */
+        struct afs_volume       *volume;        /* volume on which vnode resides */
+        struct afs_server       *server;        /* server currently supplying this file */
+        struct afs_fid          fid;            /* the file identifier for this inode */
+        struct afs_file_status  status;         /* AFS status info for this file */
+#ifdef AFS_CACHING_SUPPORT
+        struct cachefs_cookie   *cache;         /* caching cookie */
+#endif
+        struct afs_permits      *permits;       /* cache of permits so far obtained */
+        struct mutex            permits_lock;   /* lock for altering permits list */
+        struct mutex            validate_lock;  /* lock for validating this vnode */
+        wait_queue_head_t       update_waitq;   /* status fetch waitqueue */
+        int                     update_cnt;     /* number of outstanding ops that will update the
+                                                 * status */
+        spinlock_t              lock;           /* waitqueue/flags lock */
+        unsigned long           flags;
+#define AFS_VNODE_CB_BROKEN     0               /* set if vnode's callback was broken */
+#define AFS_VNODE_UNSET         1               /* set if vnode attributes not yet set */
+#define AFS_VNODE_MODIFIED      2               /* set if vnode's data modified */
+#define AFS_VNODE_ZAP_DATA      3               /* set if vnode's data should be invalidated */
+#define AFS_VNODE_DELETED       4               /* set if vnode deleted on server */
+#define AFS_VNODE_MOUNTPOINT    5               /* set if vnode is a mountpoint symlink */
+        long                    acl_order;      /* ACL check count (callback break count) */
+        /* outstanding callback notification on this file */
+        struct rb_node          server_rb;      /* link in server->fs_vnodes */
+        struct rb_node          cb_promise;     /* link in server->cb_promises */
+        struct work_struct      cb_broken_work; /* work to be done on callback break */
+        time_t                  cb_expires;     /* time at which callback expires */
+        time_t                  cb_expires_at;  /* time used to order cb_promise */
+        unsigned                cb_version;     /* callback version */
+        unsigned                cb_expiry;      /* callback expiry time */
+        afs_callback_type_t     cb_type;        /* type of callback */
+        bool                    cb_promised;    /* true if promise still holds */
+};
+/*
+ * cached security record for one user's attempt to access a vnode
+ */
+struct afs_permit {
+        struct key              *key;           /* RxRPC ticket holding a security context */
+        afs_access_t            access_mask;    /* access mask for this key */
+};
+/*
+ * cache of security records from attempts to access a vnode
+ */
+struct afs_permits {
+        struct rcu_head         rcu;            /* disposal procedure */
+        int                     count;          /* number of records */
+        struct afs_permit       permits[0];     /* the permits so far examined */
+};
+/*
+ * record of one of a system's set of network interfaces
+ */
+struct afs_interface {
+        unsigned        index;          /* interface index */
+        struct in_addr  address;        /* IPv4 address bound to interface */
+        struct in_addr  netmask;        /* netmask applied to address */
+        unsigned        mtu;            /* MTU of interface */
+};
+/*
+ * UUID definition [internet draft]
+ * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns
+ *   increments since midnight 15th October 1582
+ *   - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID
+ *     time
+ * - the clock sequence is a 14-bit counter to avoid duplicate times
+ */
+struct afs_uuid {
+        u32             time_low;                       /* low part of timestamp */
+        u16             time_mid;                       /* mid part of timestamp */
+        u16             time_hi_and_version;            /* high part of timestamp and version  */
+#define AFS_UUID_TO_UNIX_TIME   0x01b21dd213814000
+#define AFS_UUID_TIMEHI_MASK    0x0fff
+#define AFS_UUID_VERSION_TIME   0x1000  /* time-based UUID */
+#define AFS_UUID_VERSION_NAME   0x3000  /* name-based UUID */
+#define AFS_UUID_VERSION_RANDOM 0x4000  /* (pseudo-)random generated UUID */
+        u8              clock_seq_hi_and_reserved;      /* clock seq hi and variant */
+#define AFS_UUID_CLOCKHI_MASK   0x3f
+#define AFS_UUID_VARIANT_STD    0x80
+        u8              clock_seq_low;                  /* clock seq low */
+        u8              node[6];                        /* spatially unique node ID (MAC addr) */
+};
+/*****************************************************************************/
+/*
+ * callback.c
+ */
+extern void afs_init_callback_state(struct afs_server *);
+extern void afs_broken_callback_work(struct work_struct *);
+extern void afs_break_callbacks(struct afs_server *, size_t,
+                                struct afs_callback[]);
+extern void afs_discard_callback_on_delete(struct afs_vnode *);
+extern void afs_give_up_callback(struct afs_vnode *);
+extern void afs_dispatch_give_up_callbacks(struct work_struct *);
+extern void afs_flush_callback_breaks(struct afs_server *);
+extern int __init afs_callback_update_init(void);
+extern void __exit afs_callback_update_kill(void);
 /*
 * cell.c
 */
@@ -60,57 +403,156 @@ extern struct list_head afs_proc_cells;
 extern struct cachefs_index_def afs_cache_cell_index_def;
 #endif
+#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
+extern int afs_cell_init(char *);
+extern struct afs_cell *afs_cell_create(const char *, char *);
+extern struct afs_cell *afs_cell_lookup(const char *, unsigned);
+extern struct afs_cell *afs_grab_cell(struct afs_cell *);
+extern void afs_put_cell(struct afs_cell *);
+extern void afs_cell_purge(void);
+/*
+ * cmservice.c
+ */
+extern bool afs_cm_incoming_call(struct afs_call *);
 /*
 * dir.c
 */
 extern const struct inode_operations afs_dir_inode_operations;
 extern const struct file_operations afs_dir_file_operations;
+extern int afs_permission(struct inode *, int, struct nameidata *);
 /*
 * file.c
 */
 extern const struct address_space_operations afs_fs_aops;
 extern const struct inode_operations afs_file_inode_operations;
+extern const struct file_operations afs_file_operations;
+extern int afs_open(struct inode *, struct file *);
+extern int afs_release(struct inode *, struct file *);
 #ifdef AFS_CACHING_SUPPORT
-extern int afs_cache_get_page_cookie(struct page *page,
+extern int afs_cache_get_page_cookie(struct page *, struct cachefs_page **);
-                                     struct cachefs_page **_page_cookie);
 #endif
 /*
- * inode.c
+ * fsclient.c
 */
-extern int afs_iget(struct super_block *sb, struct afs_fid *fid,
+extern int afs_fs_fetch_file_status(struct afs_server *, struct key *,
-                    struct inode **_inode);
+                                    struct afs_vnode *, struct afs_volsync *,
-extern int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
+                                    const struct afs_wait_mode *);
-                             struct kstat *stat);
+extern int afs_fs_give_up_callbacks(struct afs_server *,
-extern void afs_clear_inode(struct inode *inode);
+                                    const struct afs_wait_mode *);
+extern int afs_fs_fetch_data(struct afs_server *, struct key *,
+                             struct afs_vnode *, off_t, size_t, struct page *,
+                             const struct afs_wait_mode *);
+extern int afs_fs_create(struct afs_server *, struct key *,
+                         struct afs_vnode *, const char *, umode_t,
+                         struct afs_fid *, struct afs_file_status *,
+                         struct afs_callback *,
+                         const struct afs_wait_mode *);
+extern int afs_fs_remove(struct afs_server *, struct key *,
+                         struct afs_vnode *, const char *, bool,
+                         const struct afs_wait_mode *);
+extern int afs_fs_link(struct afs_server *, struct key *, struct afs_vnode *,
+                       struct afs_vnode *, const char *,
+                       const struct afs_wait_mode *);
+extern int afs_fs_symlink(struct afs_server *, struct key *,
+                          struct afs_vnode *, const char *, const char *,
+                          struct afs_fid *, struct afs_file_status *,
+                          const struct afs_wait_mode *);
+extern int afs_fs_rename(struct afs_server *, struct key *,
+                         struct afs_vnode *, const char *,
+                         struct afs_vnode *, const char *,
+                         const struct afs_wait_mode *);
 /*
- * key_afs.c
+ * inode.c
 */
-#ifdef CONFIG_KEYS
+extern struct inode *afs_iget(struct super_block *, struct key *,
-extern int afs_key_register(void);
+                              struct afs_fid *, struct afs_file_status *,
-extern void afs_key_unregister(void);
+                              struct afs_callback *);
-#endif
+extern int afs_validate(struct afs_vnode *, struct key *);
+extern int afs_inode_getattr(struct vfsmount *, struct dentry *,
+                             struct kstat *);
+extern void afs_zap_permits(struct rcu_head *);
+extern void afs_clear_inode(struct inode *);
 /*
 * main.c
 */
+extern struct afs_uuid afs_uuid;
 #ifdef AFS_CACHING_SUPPORT
 extern struct cachefs_netfs afs_cache_netfs;
 #endif
 /*
+ * misc.c
+ */
+extern int afs_abort_to_error(u32);
+/*
 * mntpt.c
 */
 extern const struct inode_operations afs_mntpt_inode_operations;
 extern const struct file_operations afs_mntpt_file_operations;
-extern struct afs_timer afs_mntpt_expiry_timer;
-extern struct afs_timer_ops afs_mntpt_expiry_timer_ops;
 extern unsigned long afs_mntpt_expiry_timeout;
-extern int afs_mntpt_check_symlink(struct afs_vnode *vnode);
+extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
+extern void afs_mntpt_kill_timer(void);
+extern void afs_umount_begin(struct vfsmount *, int);
+/*
+ * proc.c
+ */
+extern int afs_proc_init(void);
+extern void afs_proc_cleanup(void);
+extern int afs_proc_cell_setup(struct afs_cell *);
+extern void afs_proc_cell_remove(struct afs_cell *);
+/*
+ * rxrpc.c
+ */
+extern int afs_open_socket(void);
+extern void afs_close_socket(void);
+extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
+                         const struct afs_wait_mode *);
+extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
+                                            size_t, size_t);
+extern void afs_flat_call_destructor(struct afs_call *);
+extern void afs_transfer_reply(struct afs_call *, struct sk_buff *);
+extern void afs_send_empty_reply(struct afs_call *);
+extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
+extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
+                            size_t);
+/*
+ * security.c
+ */
+extern void afs_clear_permits(struct afs_vnode *);
+extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
+extern struct key *afs_request_key(struct afs_cell *);
+extern int afs_permission(struct inode *, int, struct nameidata *);
+/*
+ * server.c
+ */
+extern spinlock_t afs_server_peer_lock;
+#define afs_get_server(S)                                       \
+do {                                                            \
+        _debug("GET SERVER %d", atomic_read(&(S)->usage));      \
+        atomic_inc(&(S)->usage);                                \
+} while(0)
+extern struct afs_server *afs_lookup_server(struct afs_cell *,
+                                            const struct in_addr *);
+extern struct afs_server *afs_find_server(const struct in_addr *);
+extern void afs_put_server(struct afs_server *);
+extern void __exit afs_purge_servers(void);
 /*
 * super.c
@@ -118,22 +560,211 @@ extern int afs_mntpt_check_symlink(struct afs_vnode *vnode);
 extern int afs_fs_init(void);
 extern void afs_fs_exit(void);
-#define AFS_CB_HASH_COUNT (PAGE_SIZE / sizeof(struct list_head))
+/*
+ * use-rtnetlink.c
+ */
+extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool);
+extern int afs_get_MAC_address(u8 [6]);
-extern struct list_head afs_cb_hash_tbl[];
+/*
-extern spinlock_t afs_cb_hash_lock;
+ * vlclient.c
+ */
+#ifdef AFS_CACHING_SUPPORT
+extern struct cachefs_index_def afs_vlocation_cache_index_def;
+#endif
-#define afs_cb_hash(SRV,FID) \
+extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *,
-        afs_cb_hash_tbl[((unsigned long)(SRV) + \
+                                    const char *, struct afs_cache_vlocation *,
-                        (FID)->vid + (FID)->vnode + (FID)->unique) % \
+                                    const struct afs_wait_mode *);
-                        AFS_CB_HASH_COUNT]
+extern int afs_vl_get_entry_by_id(struct in_addr *, struct key *,
+                                  afs_volid_t, afs_voltype_t,
+                                  struct afs_cache_vlocation *,
+                                  const struct afs_wait_mode *);
 /*
- * proc.c
+ * vlocation.c
 */
-extern int afs_proc_init(void);
+#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
-extern void afs_proc_cleanup(void);
-extern int afs_proc_cell_setup(struct afs_cell *cell);
+extern int __init afs_vlocation_update_init(void);
-extern void afs_proc_cell_remove(struct afs_cell *cell);
+extern struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *,
+                                                  struct key *,
+                                                  const char *, size_t);
+extern void afs_put_vlocation(struct afs_vlocation *);
+extern void __exit afs_vlocation_purge(void);
+/*
+ * vnode.c
+ */
+#ifdef AFS_CACHING_SUPPORT
+extern struct cachefs_index_def afs_vnode_cache_index_def;
+#endif
+extern struct afs_timer_ops afs_vnode_cb_timed_out_ops;
+static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
+{
+        return container_of(inode, struct afs_vnode, vfs_inode);
+}
+static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
+{
+        return &vnode->vfs_inode;
+}
+extern void afs_vnode_finalise_status_update(struct afs_vnode *,
+                                             struct afs_server *);
+extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *,
+                                  struct key *);
+extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *,
+                                off_t, size_t, struct page *);
+extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *,
+                            umode_t, struct afs_fid *, struct afs_file_status *,
+                            struct afs_callback *, struct afs_server **);
+extern int afs_vnode_remove(struct afs_vnode *, struct key *, const char *,
+                            bool);
+extern int afs_vnode_link(struct afs_vnode *, struct afs_vnode *, struct key *,
+                          const char *);
+extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *,
+                             const char *, struct afs_fid *,
+                             struct afs_file_status *, struct afs_server **);
+extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *,
+                            struct key *, const char *, const char *);
+/*
+ * volume.c
+ */
+#ifdef AFS_CACHING_SUPPORT
+extern struct cachefs_index_def afs_volume_cache_index_def;
+#endif
+#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
+extern void afs_put_volume(struct afs_volume *);
+extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *);
+extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
+extern int afs_volume_release_fileserver(struct afs_vnode *,
+                                         struct afs_server *, int);
+/*****************************************************************************/
+/*
+ * debug tracing
+ */
+extern unsigned afs_debug;
+#define dbgprintk(FMT,...) \
+        printk("[%x%-6.6s] "FMT"\n", smp_processor_id(), current->comm ,##__VA_ARGS__)
+/* make sure we maintain the format strings, even when debugging is disabled */
+static inline __attribute__((format(printf,1,2)))
+void _dbprintk(const char *fmt, ...)
+{
+}
+#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define kdebug(FMT,...) dbgprintk("    "FMT ,##__VA_ARGS__)
+#if defined(__KDEBUG)
+#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__)
+#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__)
+#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__)
+#elif defined(CONFIG_AFS_DEBUG)
+#define AFS_DEBUG_KENTER        0x01
+#define AFS_DEBUG_KLEAVE        0x02
+#define AFS_DEBUG_KDEBUG        0x04
+#define _enter(FMT,...)                                 \
+do {                                                    \
+        if (unlikely(afs_debug & AFS_DEBUG_KENTER))     \
+                kenter(FMT,##__VA_ARGS__);              \
+} while (0)
+#define _leave(FMT,...)                                 \
+do {                                                    \
+        if (unlikely(afs_debug & AFS_DEBUG_KLEAVE))     \
+                kleave(FMT,##__VA_ARGS__);              \
+} while (0)
+#define _debug(FMT,...)                                 \
+do {                                                    \
+        if (unlikely(afs_debug & AFS_DEBUG_KDEBUG))     \
+                kdebug(FMT,##__VA_ARGS__);              \
+} while (0)
+#else
+#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define _debug(FMT,...) _dbprintk("    "FMT ,##__VA_ARGS__)
+#endif
+/*
+ * debug assertion checking
+ */
+#if 1 // defined(__KDEBUGALL)
+#define ASSERT(X)                                               \
+do {                                                            \
+        if (unlikely(!(X))) {                                   \
+                printk(KERN_ERR "\n");                          \
+                printk(KERN_ERR "AFS: Assertion failed\n");     \
+                BUG();                                          \
+        }                                                       \
+} while(0)
+#define ASSERTCMP(X, OP, Y)                                             \
+do {                                                                    \
+        if (unlikely(!((X) OP (Y)))) {                                  \
+                printk(KERN_ERR "\n");                                  \
+                printk(KERN_ERR "AFS: Assertion failed\n");             \
+                printk(KERN_ERR "%lu " #OP " %lu is false\n",           \
+                       (unsigned long)(X), (unsigned long)(Y));         \
+                printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n",       \
+                       (unsigned long)(X), (unsigned long)(Y));         \
+                BUG();                                                  \
+        }                                                               \
+} while(0)
+#define ASSERTIF(C, X)                                          \
+do {                                                            \
+        if (unlikely((C) && !(X))) {                            \
+                printk(KERN_ERR "\n");                          \
+                printk(KERN_ERR "AFS: Assertion failed\n");     \
+                BUG();                                          \
+        }                                                       \
+} while(0)
+#define ASSERTIFCMP(C, X, OP, Y)                                        \
+do {                                                                    \
+        if (unlikely((C) && !((X) OP (Y)))) {                           \
+                printk(KERN_ERR "\n");                                  \
+                printk(KERN_ERR "AFS: Assertion failed\n");             \
+                printk(KERN_ERR "%lu " #OP " %lu is false\n",           \
+                       (unsigned long)(X), (unsigned long)(Y));         \
+                printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n",       \
+                       (unsigned long)(X), (unsigned long)(Y));         \
+                BUG();                                                  \
+        }                                                               \
+} while(0)
+#else
+#define ASSERT(X)                               \
+do {                                            \
+} while(0)
+#define ASSERTCMP(X, OP, Y)                     \
+do {                                            \
+} while(0)
+#define ASSERTIF(C, X)                          \
+do {                                            \
+} while(0)
+#define ASSERTIFCMP(C, X, OP, Y)                \
+do {                                            \
+} while(0)
-#endif /* AFS_INTERNAL_H */
+#endif /* __KDEBUGALL */
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
deleted file mode 100644
index 615df2407cb2..000000000000
--- a/fs/afs/kafsasyncd.c
+++ /dev/null
@@ -1,255 +0,0 @@
-/* kafsasyncd.c: AFS asynchronous operation daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- *
- * The AFS async daemon is used to the following:
- * - probe "dead" servers to see whether they've come back to life yet.
- * - probe "live" servers that we haven't talked to for a while to see if they are better
- *   candidates for serving than what we're currently using
- * - poll volume location servers to keep up to date volume location lists
- */
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/freezer.h>
-#include "cell.h"
-#include "server.h"
-#include "volume.h"
-#include "kafsasyncd.h"
-#include "kafstimod.h"
-#include <rxrpc/call.h>
-#include <asm/errno.h>
-#include "internal.h"
-static DECLARE_COMPLETION(kafsasyncd_alive);
-static DECLARE_COMPLETION(kafsasyncd_dead);
-static DECLARE_WAIT_QUEUE_HEAD(kafsasyncd_sleepq);
-static struct task_struct *kafsasyncd_task;
-static int kafsasyncd_die;
-static int kafsasyncd(void *arg);
-static LIST_HEAD(kafsasyncd_async_attnq);
-static LIST_HEAD(kafsasyncd_async_busyq);
-static DEFINE_SPINLOCK(kafsasyncd_async_lock);
-static void kafsasyncd_null_call_attn_func(struct rxrpc_call *call)
-{
-}
-static void kafsasyncd_null_call_error_func(struct rxrpc_call *call)
-{
-}
-/*****************************************************************************/
-/*
- * start the async daemon
- */
-int afs_kafsasyncd_start(void)
-{
-        int ret;
-        ret = kernel_thread(kafsasyncd, NULL, 0);
-        if (ret < 0)
-                return ret;
-        wait_for_completion(&kafsasyncd_alive);
-        return ret;
-} /* end afs_kafsasyncd_start() */
-/*****************************************************************************/
-/*
- * stop the async daemon
- */
-void afs_kafsasyncd_stop(void)
-{
-        /* get rid of my daemon */
-        kafsasyncd_die = 1;
-        wake_up(&kafsasyncd_sleepq);
-        wait_for_completion(&kafsasyncd_dead);
-} /* end afs_kafsasyncd_stop() */
-/*****************************************************************************/
-/*
- * probing daemon
- */
-static int kafsasyncd(void *arg)
-{
-        struct afs_async_op *op;
-        int die;
-        DECLARE_WAITQUEUE(myself, current);
-        kafsasyncd_task = current;
-        printk("kAFS: Started kafsasyncd %d\n", current->pid);
-        daemonize("kafsasyncd");
-        complete(&kafsasyncd_alive);
-        /* loop around looking for things to attend to */
-        do {
-                set_current_state(TASK_INTERRUPTIBLE);
-                add_wait_queue(&kafsasyncd_sleepq, &myself);
-                for (;;) {
-                        if (!list_empty(&kafsasyncd_async_attnq) ||
-                            signal_pending(current) ||
-                            kafsasyncd_die)
-                                break;
-                        schedule();
-                        set_current_state(TASK_INTERRUPTIBLE);
-                }
-                remove_wait_queue(&kafsasyncd_sleepq, &myself);
-                set_current_state(TASK_RUNNING);
-                try_to_freeze();
-                /* discard pending signals */
-                afs_discard_my_signals();
-                die = kafsasyncd_die;
-                /* deal with the next asynchronous operation requiring
-                 * attention */
-                if (!list_empty(&kafsasyncd_async_attnq)) {
-                        struct afs_async_op *op;
-                        _debug("@@@ Begin Asynchronous Operation");
-                        op = NULL;
-                        spin_lock(&kafsasyncd_async_lock);
-                        if (!list_empty(&kafsasyncd_async_attnq)) {
-                                op = list_entry(kafsasyncd_async_attnq.next,
-                                                struct afs_async_op, link);
-                                list_move_tail(&op->link,
-                                              &kafsasyncd_async_busyq);
-                        }
-                        spin_unlock(&kafsasyncd_async_lock);
-                        _debug("@@@ Operation %p {%p}\n",
-                               op, op ? op->ops : NULL);
-                        if (op)
-                                op->ops->attend(op);
-                        _debug("@@@ End Asynchronous Operation");
-                }
-        } while(!die);
-        /* need to kill all outstanding asynchronous operations before
-         * exiting */
-        kafsasyncd_task = NULL;
-        spin_lock(&kafsasyncd_async_lock);
-        /* fold the busy and attention queues together */
-        list_splice_init(&kafsasyncd_async_busyq,
-                         &kafsasyncd_async_attnq);
-        /* dequeue kafsasyncd from all their wait queues */
-        list_for_each_entry(op, &kafsasyncd_async_attnq, link) {
-                op->call->app_attn_func = kafsasyncd_null_call_attn_func;
-                op->call->app_error_func = kafsasyncd_null_call_error_func;
-                remove_wait_queue(&op->call->waitq, &op->waiter);
-        }
-        spin_unlock(&kafsasyncd_async_lock);
-        /* abort all the operations */
-        while (!list_empty(&kafsasyncd_async_attnq)) {
-                op = list_entry(kafsasyncd_async_attnq.next, struct afs_async_op, link);
-                list_del_init(&op->link);
-                rxrpc_call_abort(op->call, -EIO);
-                rxrpc_put_call(op->call);
-                op->call = NULL;
-                op->ops->discard(op);
-        }
-        /* and that's all */
-        _leave("");
-        complete_and_exit(&kafsasyncd_dead, 0);
-} /* end kafsasyncd() */
-/*****************************************************************************/
-/*
- * begin an operation
- * - place operation on busy queue
- */
-void afs_kafsasyncd_begin_op(struct afs_async_op *op)
-{
-        _enter("");
-        spin_lock(&kafsasyncd_async_lock);
-        init_waitqueue_entry(&op->waiter, kafsasyncd_task);
-        add_wait_queue(&op->call->waitq, &op->waiter);
-        list_move_tail(&op->link, &kafsasyncd_async_busyq);
-        spin_unlock(&kafsasyncd_async_lock);
-        _leave("");
-} /* end afs_kafsasyncd_begin_op() */
-/*****************************************************************************/
-/*
- * request attention for an operation
- * - move to attention queue
- */
-void afs_kafsasyncd_attend_op(struct afs_async_op *op)
-{
-        _enter("");
-        spin_lock(&kafsasyncd_async_lock);
-        list_move_tail(&op->link, &kafsasyncd_async_attnq);
-        spin_unlock(&kafsasyncd_async_lock);
-        wake_up(&kafsasyncd_sleepq);
-        _leave("");
-} /* end afs_kafsasyncd_attend_op() */
-/*****************************************************************************/
-/*
- * terminate an operation
- * - remove from either queue
- */
-void afs_kafsasyncd_terminate_op(struct afs_async_op *op)
-{
-        _enter("");
-        spin_lock(&kafsasyncd_async_lock);
-        if (!list_empty(&op->link)) {
-                list_del_init(&op->link);
-                remove_wait_queue(&op->call->waitq, &op->waiter);
-        }
-        spin_unlock(&kafsasyncd_async_lock);
-        wake_up(&kafsasyncd_sleepq);
-        _leave("");
-} /* end afs_kafsasyncd_terminate_op() */
diff --git a/fs/afs/kafsasyncd.h b/fs/afs/kafsasyncd.h
deleted file mode 100644
index 791803f9a6fb..000000000000
--- a/fs/afs/kafsasyncd.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* kafsasyncd.h: AFS asynchronous operation daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _LINUX_AFS_KAFSASYNCD_H
-#define _LINUX_AFS_KAFSASYNCD_H
-#include "types.h"
-struct afs_async_op;
-struct afs_async_op_ops {
-        void (*attend)(struct afs_async_op *op);
-        void (*discard)(struct afs_async_op *op);
-};
-/*****************************************************************************/
-/*
- * asynchronous operation record
- */
-struct afs_async_op
-{
-        struct list_head                link;
-        struct afs_server               *server;        /* server being contacted */
-        struct rxrpc_call               *call;          /* RxRPC call performing op */
-        wait_queue_t                    waiter;         /* wait queue for kafsasyncd */
-        const struct afs_async_op_ops   *ops;           /* operations */
-};
-static inline void afs_async_op_init(struct afs_async_op *op,
-                                     const struct afs_async_op_ops *ops)
-{
-        INIT_LIST_HEAD(&op->link);
-        op->call = NULL;
-        op->ops = ops;
-}
-extern int afs_kafsasyncd_start(void);
-extern void afs_kafsasyncd_stop(void);
-extern void afs_kafsasyncd_begin_op(struct afs_async_op *op);
-extern void afs_kafsasyncd_attend_op(struct afs_async_op *op);
-extern void afs_kafsasyncd_terminate_op(struct afs_async_op *op);
-#endif /* _LINUX_AFS_KAFSASYNCD_H */
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
deleted file mode 100644
index 694344e4d3c7..000000000000
--- a/fs/afs/kafstimod.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/* kafstimod.c: AFS timeout daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/freezer.h>
-#include "cell.h"
-#include "volume.h"
-#include "kafstimod.h"
-#include <asm/errno.h>
-#include "internal.h"
-static DECLARE_COMPLETION(kafstimod_alive);
-static DECLARE_COMPLETION(kafstimod_dead);
-static DECLARE_WAIT_QUEUE_HEAD(kafstimod_sleepq);
-static int kafstimod_die;
-static LIST_HEAD(kafstimod_list);
-static DEFINE_SPINLOCK(kafstimod_lock);
-static int kafstimod(void *arg);
-/*****************************************************************************/
-/*
- * start the timeout daemon
- */
-int afs_kafstimod_start(void)
-{
-        int ret;
-        ret = kernel_thread(kafstimod, NULL, 0);
-        if (ret < 0)
-                return ret;
-        wait_for_completion(&kafstimod_alive);
-        return ret;
-} /* end afs_kafstimod_start() */
-/*****************************************************************************/
-/*
- * stop the timeout daemon
- */
-void afs_kafstimod_stop(void)
-{
-        /* get rid of my daemon */
-        kafstimod_die = 1;
-        wake_up(&kafstimod_sleepq);
-        wait_for_completion(&kafstimod_dead);
-} /* end afs_kafstimod_stop() */
-/*****************************************************************************/
-/*
- * timeout processing daemon
- */
-static int kafstimod(void *arg)
-{
-        struct afs_timer *timer;
-        DECLARE_WAITQUEUE(myself, current);
-        printk("kAFS: Started kafstimod %d\n", current->pid);
-        daemonize("kafstimod");
-        complete(&kafstimod_alive);
-        /* loop around looking for things to attend to */
- loop:
-        set_current_state(TASK_INTERRUPTIBLE);
-        add_wait_queue(&kafstimod_sleepq, &myself);
-        for (;;) {
-                unsigned long jif;
-                signed long timeout;
-                /* deal with the server being asked to die */
-                if (kafstimod_die) {
-                        remove_wait_queue(&kafstimod_sleepq, &myself);
-                        _leave("");
-                        complete_and_exit(&kafstimod_dead, 0);
-                }
-                try_to_freeze();
-                /* discard pending signals */
-                afs_discard_my_signals();
-                /* work out the time to elapse before the next event */
-                spin_lock(&kafstimod_lock);
-                if (list_empty(&kafstimod_list)) {
-                        timeout = MAX_SCHEDULE_TIMEOUT;
-                }
-                else {
-                        timer = list_entry(kafstimod_list.next,
-                                           struct afs_timer, link);
-                        timeout = timer->timo_jif;
-                        jif = jiffies;
-                        if (time_before_eq((unsigned long) timeout, jif))
-                                goto immediate;
-                        else {
-                                timeout = (long) timeout - (long) jiffies;
-                        }
-                }
-                spin_unlock(&kafstimod_lock);
-                schedule_timeout(timeout);
-                set_current_state(TASK_INTERRUPTIBLE);
-        }
-        /* the thing on the front of the queue needs processing
-         * - we come here with the lock held and timer pointing to the expired
-         *   entry
-         */
- immediate:
-        remove_wait_queue(&kafstimod_sleepq, &myself);
-        set_current_state(TASK_RUNNING);
-        _debug("@@@ Begin Timeout of %p", timer);
-        /* dequeue the timer */
-        list_del_init(&timer->link);
-        spin_unlock(&kafstimod_lock);
-        /* call the timeout function */
-        timer->ops->timed_out(timer);
-        _debug("@@@ End Timeout");
-        goto loop;
-} /* end kafstimod() */
-/*****************************************************************************/
-/*
- * (re-)queue a timer
- */
-void afs_kafstimod_add_timer(struct afs_timer *timer, unsigned long timeout)
-{
-        struct afs_timer *ptimer;
-        struct list_head *_p;
-        _enter("%p,%lu", timer, timeout);
-        spin_lock(&kafstimod_lock);
-        list_del(&timer->link);
-        /* the timer was deferred or reset - put it back in the queue at the
-         * right place */
-        timer->timo_jif = jiffies + timeout;
-        list_for_each(_p, &kafstimod_list) {
-                ptimer = list_entry(_p, struct afs_timer, link);
-                if (time_before(timer->timo_jif, ptimer->timo_jif))
-                        break;
-        }
-        list_add_tail(&timer->link, _p); /* insert before stopping point */
-        spin_unlock(&kafstimod_lock);
-        wake_up(&kafstimod_sleepq);
-        _leave("");
-} /* end afs_kafstimod_add_timer() */
-/*****************************************************************************/
-/*
- * dequeue a timer
- * - returns 0 if the timer was deleted or -ENOENT if it wasn't queued
- */
-int afs_kafstimod_del_timer(struct afs_timer *timer)
-{
-        int ret = 0;
-        _enter("%p", timer);
-        spin_lock(&kafstimod_lock);
-        if (list_empty(&timer->link))
-                ret = -ENOENT;
-        else
-                list_del_init(&timer->link);
-        spin_unlock(&kafstimod_lock);
-        wake_up(&kafstimod_sleepq);
-        _leave(" = %d", ret);
-        return ret;
-} /* end afs_kafstimod_del_timer() */
diff --git a/fs/afs/kafstimod.h b/fs/afs/kafstimod.h
deleted file mode 100644
index e312f1a61a7f..000000000000
--- a/fs/afs/kafstimod.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* kafstimod.h: AFS timeout daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _LINUX_AFS_KAFSTIMOD_H
-#define _LINUX_AFS_KAFSTIMOD_H
-#include "types.h"
-struct afs_timer;
-struct afs_timer_ops {
-        /* called when the front of the timer queue has timed out */
-        void (*timed_out)(struct afs_timer *timer);
-};
-/*****************************************************************************/
-/*
- * AFS timer/timeout record
- */
-struct afs_timer
-{
-        struct list_head                link;           /* link in timer queue */
-        unsigned long                   timo_jif;       /* timeout time */
-        const struct afs_timer_ops      *ops;           /* timeout expiry function */
-};
-static inline void afs_timer_init(struct afs_timer *timer,
-                                  const struct afs_timer_ops *ops)
-{
-        INIT_LIST_HEAD(&timer->link);
-        timer->ops = ops;
-}
-extern int afs_kafstimod_start(void);
-extern void afs_kafstimod_stop(void);
-extern void afs_kafstimod_add_timer(struct afs_timer *timer,
-                                    unsigned long timeout);
-extern int afs_kafstimod_del_timer(struct afs_timer *timer);
-#endif /* _LINUX_AFS_KAFSTIMOD_H */
diff --git a/fs/afs/main.c b/fs/afs/main.c
index f2704ba53857..40c2704e7557 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -1,4 +1,4 @@
-/* main.c: AFS client file system
+/* AFS client file system
 *
 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
@@ -13,43 +13,21 @@
 #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/completion.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/call.h>
-#include <rxrpc/peer.h>
-#include "cache.h"
-#include "cell.h"
-#include "server.h"
-#include "fsclient.h"
-#include "cmservice.h"
-#include "kafstimod.h"
-#include "kafsasyncd.h"
 #include "internal.h"
-struct rxrpc_transport *afs_transport;
-static int afs_adding_peer(struct rxrpc_peer *peer);
-static void afs_discarding_peer(struct rxrpc_peer *peer);
 MODULE_DESCRIPTION("AFS Client File System");
 MODULE_AUTHOR("Red Hat, Inc.");
 MODULE_LICENSE("GPL");
+unsigned afs_debug;
+module_param_named(debug, afs_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(afs_debug, "AFS debugging mask");
 static char *rootcell;
 module_param(rootcell, charp, 0);
 MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
-static struct rxrpc_peer_ops afs_peer_ops = {
-        .adding         = afs_adding_peer,
-        .discarding     = afs_discarding_peer,
-};
-struct list_head afs_cb_hash_tbl[AFS_CB_HASH_COUNT];
-DEFINE_SPINLOCK(afs_cb_hash_lock);
 #ifdef AFS_CACHING_SUPPORT
 static struct cachefs_netfs_operations afs_cache_ops = {
        .get_page_cookie        = afs_cache_get_page_cookie,
@@ -62,20 +40,63 @@ struct cachefs_netfs afs_cache_netfs = {
 };
 #endif
-/*****************************************************************************/
+struct afs_uuid afs_uuid;
+/*
+ * get a client UUID
+ */
+static int __init afs_get_client_UUID(void)
+{
+        struct timespec ts;
+        u64 uuidtime;
+        u16 clockseq;
+        int ret;
+        /* read the MAC address of one of the external interfaces and construct
+         * a UUID from it */
+        ret = afs_get_MAC_address(afs_uuid.node);
+        if (ret < 0)
+                return ret;
+        getnstimeofday(&ts);
+        uuidtime = (u64) ts.tv_sec * 1000 * 1000 * 10;
+        uuidtime += ts.tv_nsec / 100;
+        uuidtime += AFS_UUID_TO_UNIX_TIME;
+        afs_uuid.time_low = uuidtime;
+        afs_uuid.time_mid = uuidtime >> 32;
+        afs_uuid.time_hi_and_version = (uuidtime >> 48) & AFS_UUID_TIMEHI_MASK;
+        afs_uuid.time_hi_and_version = AFS_UUID_VERSION_TIME;
+        get_random_bytes(&clockseq, 2);
+        afs_uuid.clock_seq_low = clockseq;
+        afs_uuid.clock_seq_hi_and_reserved =
+                (clockseq >> 8) & AFS_UUID_CLOCKHI_MASK;
+        afs_uuid.clock_seq_hi_and_reserved = AFS_UUID_VARIANT_STD;
+        _debug("AFS UUID: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+               afs_uuid.time_low,
+               afs_uuid.time_mid,
+               afs_uuid.time_hi_and_version,
+               afs_uuid.clock_seq_hi_and_reserved,
+               afs_uuid.clock_seq_low,
+               afs_uuid.node[0], afs_uuid.node[1], afs_uuid.node[2],
+               afs_uuid.node[3], afs_uuid.node[4], afs_uuid.node[5]);
+        return 0;
+}
 /*
 * initialise the AFS client FS module
 */
 static int __init afs_init(void)
 {
-        int loop, ret;
+        int ret;
        printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
-        /* initialise the callback hash table */
+        ret = afs_get_client_UUID();
-        spin_lock_init(&afs_cb_hash_lock);
+        if (ret < 0)
-        for (loop = AFS_CB_HASH_COUNT - 1; loop >= 0; loop--)
+                return ret;
-                INIT_LIST_HEAD(&afs_cb_hash_tbl[loop]);
        /* register the /proc stuff */
        ret = afs_proc_init();
@@ -87,70 +108,56 @@ static int __init afs_init(void)
        ret = cachefs_register_netfs(&afs_cache_netfs,
                                     &afs_cache_cell_index_def);
        if (ret < 0)
-                goto error;
-#endif
-#ifdef CONFIG_KEYS_TURNED_OFF
-        ret = afs_key_register();
-        if (ret < 0)
                goto error_cache;
 #endif
        /* initialise the cell DB */
        ret = afs_cell_init(rootcell);
        if (ret < 0)
-                goto error_keys;
+                goto error_cell_init;
-        /* start the timeout daemon */
+        /* initialise the VL update process */
-        ret = afs_kafstimod_start();
+        ret = afs_vlocation_update_init();
        if (ret < 0)
-                goto error_keys;
+                goto error_vl_update_init;
-        /* start the async operation daemon */
+        /* initialise the callback update process */
-        ret = afs_kafsasyncd_start();
+        ret = afs_callback_update_init();
-        if (ret < 0)
-                goto error_kafstimod;
        /* create the RxRPC transport */
-        ret = rxrpc_create_transport(7001, &afs_transport);
+        ret = afs_open_socket();
        if (ret < 0)
-                goto error_kafsasyncd;
+                goto error_open_socket;
-        afs_transport->peer_ops = &afs_peer_ops;
        /* register the filesystems */
        ret = afs_fs_init();
        if (ret < 0)
-                goto error_transport;
+                goto error_fs;
        return ret;
- error_transport:
+error_fs:
-        rxrpc_put_transport(afs_transport);
+        afs_close_socket();
- error_kafsasyncd:
+error_open_socket:
-        afs_kafsasyncd_stop();
+error_vl_update_init:
- error_kafstimod:
+error_cell_init:
-        afs_kafstimod_stop();
- error_keys:
-#ifdef CONFIG_KEYS_TURNED_OFF
-        afs_key_unregister();
- error_cache:
-#endif
 #ifdef AFS_CACHING_SUPPORT
        cachefs_unregister_netfs(&afs_cache_netfs);
- error:
+error_cache:
 #endif
+        afs_callback_update_kill();
+        afs_vlocation_purge();
        afs_cell_purge();
        afs_proc_cleanup();
        printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
        return ret;
-} /* end afs_init() */
+}
 /* XXX late_initcall is kludgy, but the only alternative seems to create
 * a transport upon the first mount, which is worse. Or is it?
 */
 late_initcall(afs_init);        /* must be called after net/ to create socket */
-/*****************************************************************************/
 /*
 * clean up on module removal
 */
@@ -159,127 +166,16 @@ static void __exit afs_exit(void)
        printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n");
        afs_fs_exit();
-        rxrpc_put_transport(afs_transport);
+        afs_close_socket();
-        afs_kafstimod_stop();
+        afs_purge_servers();
-        afs_kafsasyncd_stop();
+        afs_callback_update_kill();
+        afs_vlocation_purge();
+        flush_scheduled_work();
        afs_cell_purge();
-#ifdef CONFIG_KEYS_TURNED_OFF
-        afs_key_unregister();
-#endif
 #ifdef AFS_CACHING_SUPPORT
        cachefs_unregister_netfs(&afs_cache_netfs);
 #endif
        afs_proc_cleanup();
-} /* end afs_exit() */
-module_exit(afs_exit);
-/*****************************************************************************/
-/*
- * notification that new peer record is being added
- * - called from krxsecd
- * - return an error to induce an abort
- * - mustn't sleep (caller holds an rwlock)
- */
-static int afs_adding_peer(struct rxrpc_peer *peer)
-{
-        struct afs_server *server;
-        int ret;
-        _debug("kAFS: Adding new peer %08x\n", ntohl(peer->addr.s_addr));
-        /* determine which server the peer resides in (if any) */
-        ret = afs_server_find_by_peer(peer, &server);
-        if (ret < 0)
-                return ret; /* none that we recognise, so abort */
-        _debug("Server %p{u=%d}\n", server, atomic_read(&server->usage));
-        _debug("Cell %p{u=%d}\n",
-               server->cell, atomic_read(&server->cell->usage));
-        /* cross-point the structs under a global lock */
-        spin_lock(&afs_server_peer_lock);
-        peer->user = server;
-        server->peer = peer;
-        spin_unlock(&afs_server_peer_lock);
-        afs_put_server(server);
-        return 0;
-} /* end afs_adding_peer() */
-/*****************************************************************************/
-/*
- * notification that a peer record is being discarded
- * - called from krxiod or krxsecd
- */
-static void afs_discarding_peer(struct rxrpc_peer *peer)
-{
-        struct afs_server *server;
-        _enter("%p",peer);
-        _debug("Discarding peer %08x (rtt=%lu.%lumS)\n",
-               ntohl(peer->addr.s_addr),
-               (long) (peer->rtt / 1000),
-               (long) (peer->rtt % 1000));
-        /* uncross-point the structs under a global lock */
-        spin_lock(&afs_server_peer_lock);
-        server = peer->user;
-        if (server) {
-                peer->user = NULL;
-                server->peer = NULL;
-        }
-        spin_unlock(&afs_server_peer_lock);
-        _leave("");
-} /* end afs_discarding_peer() */
-/*****************************************************************************/
-/*
- * clear the dead space between task_struct and kernel stack
- * - called by supplying -finstrument-functions to gcc
- */
-#if 0
-void __cyg_profile_func_enter (void *this_fn, void *call_site)
-__attribute__((no_instrument_function));
-void __cyg_profile_func_enter (void *this_fn, void *call_site)
-{
-       asm volatile("  movl    %%esp,%%edi     \n"
-                    "  andl    %0,%%edi        \n"
-                    "  addl    %1,%%edi        \n"
-                    "  movl    %%esp,%%ecx     \n"
-                    "  subl    %%edi,%%ecx     \n"
-                    "  shrl    $2,%%ecx        \n"
-                    "  movl    $0xedededed,%%eax     \n"
-                    "  rep stosl               \n"
-                    :
-                    : "i"(~(THREAD_SIZE - 1)), "i"(sizeof(struct thread_info))
-                    : "eax", "ecx", "edi", "memory", "cc"
-                    );
 }
-void __cyg_profile_func_exit(void *this_fn, void *call_site)
+module_exit(afs_exit);
-__attribute__((no_instrument_function));
-void __cyg_profile_func_exit(void *this_fn, void *call_site)
-{
-       asm volatile("  movl    %%esp,%%edi     \n"
-                    "  andl    %0,%%edi        \n"
-                    "  addl    %1,%%edi        \n"
-                    "  movl    %%esp,%%ecx     \n"
-                    "  subl    %%edi,%%ecx     \n"
-                    "  shrl    $2,%%ecx        \n"
-                    "  movl    $0xdadadada,%%eax     \n"
-                    "  rep stosl               \n"
-                    :
-                    : "i"(~(THREAD_SIZE - 1)), "i"(sizeof(struct thread_info))
-                    : "eax", "ecx", "edi", "memory", "cc"
-                    );
-}
-#endif
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index e4fce66d76e0..cdb9792d8161 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -1,6 +1,6 @@
-/* misc.c: miscellaneous bits
+/* miscellaneous bits
 *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
@@ -12,19 +12,20 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
-#include "errors.h"
 #include "internal.h"
+#include "afs_fs.h"
-/*****************************************************************************/
 /*
 * convert an AFS abort code to a Linux error number
 */
-int afs_abort_to_error(int abortcode)
+int afs_abort_to_error(u32 abort_code)
 {
-        switch (abortcode) {
+        switch (abort_code) {
+        case 13:                return -EACCES;
+        case 30:                return -EROFS;
        case VSALVAGE:          return -EIO;
        case VNOVNODE:          return -ENOENT;
-        case VNOVOL:            return -ENXIO;
+        case VNOVOL:            return -ENOMEDIUM;
        case VVOLEXISTS:        return -EEXIST;
        case VNOSERVICE:        return -EIO;
        case VOFFLINE:          return -ENOENT;
@@ -33,7 +34,24 @@ int afs_abort_to_error(int abortcode)
        case VOVERQUOTA:        return -EDQUOT;
        case VBUSY:             return -EBUSY;
        case VMOVED:            return -ENXIO;
-        default:                return -EIO;
+        case 0x2f6df0c:         return -EACCES;
+        case 0x2f6df0f:         return -EBUSY;
+        case 0x2f6df10:         return -EEXIST;
+        case 0x2f6df11:         return -EXDEV;
+        case 0x2f6df13:         return -ENOTDIR;
+        case 0x2f6df14:         return -EISDIR;
+        case 0x2f6df15:         return -EINVAL;
+        case 0x2f6df1a:         return -EFBIG;
+        case 0x2f6df1b:         return -ENOSPC;
+        case 0x2f6df1d:         return -EROFS;
+        case 0x2f6df1e:         return -EMLINK;
+        case 0x2f6df20:         return -EDOM;
+        case 0x2f6df21:         return -ERANGE;
+        case 0x2f6df22:         return -EDEADLK;
+        case 0x2f6df23:         return -ENAMETOOLONG;
+        case 0x2f6df24:         return -ENOLCK;
+        case 0x2f6df26:         return -ENOTEMPTY;
+        case 0x2f6df78:         return -EDQUOT;
+        default:                return -EREMOTEIO;
        }
+}
-} /* end afs_abort_to_error() */
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 68495f0de7b3..b905ae37f912 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -1,4 +1,4 @@
-/* mntpt.c: mountpoint management
+/* mountpoint management
 *
 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
@@ -18,10 +18,6 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/mnt_namespace.h>
-#include "super.h"
-#include "cell.h"
-#include "volume.h"
-#include "vnode.h"
 #include "internal.h"
@@ -30,6 +26,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
                                       struct nameidata *nd);
 static int afs_mntpt_open(struct inode *inode, struct file *file);
 static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd);
+static void afs_mntpt_expiry_timed_out(struct work_struct *work);
 const struct file_operations afs_mntpt_file_operations = {
        .open           = afs_mntpt_open,
@@ -43,24 +40,19 @@ const struct inode_operations afs_mntpt_inode_operations = {
 };
 static LIST_HEAD(afs_vfsmounts);
+static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
-static void afs_mntpt_expiry_timed_out(struct afs_timer *timer);
+unsigned long afs_mntpt_expiry_timeout = 10 * 60;
-struct afs_timer_ops afs_mntpt_expiry_timer_ops = {
-        .timed_out      = afs_mntpt_expiry_timed_out,
-};
-struct afs_timer afs_mntpt_expiry_timer;
-unsigned long afs_mntpt_expiry_timeout = 20;
-/*****************************************************************************/
 /*
 * check a symbolic link to see whether it actually encodes a mountpoint
 * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
 */
-int afs_mntpt_check_symlink(struct afs_vnode *vnode)
+int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
 {
+        struct file file = {
+                .private_data = key,
+        };
        struct page *page;
        size_t size;
        char *buf;
@@ -69,7 +61,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode)
        _enter("{%u,%u}", vnode->fid.vnode, vnode->fid.unique);
        /* read the contents of the symlink into the pagecache */
-        page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, NULL);
+        page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, &file);
        if (IS_ERR(page)) {
                ret = PTR_ERR(page);
                goto out;
@@ -85,7 +77,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode)
        /* examine the symlink's contents */
        size = vnode->status.size;
-        _debug("symlink to %*.*s", size, (int) size, buf);
+        _debug("symlink to %*.*s", (int) size, (int) size, buf);
        if (size > 2 &&
            (buf[0] == '%' || buf[0] == '#') &&
@@ -93,22 +85,20 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode)
            ) {
                _debug("symlink is a mountpoint");
                spin_lock(&vnode->lock);
-                vnode->flags |= AFS_VNODE_MOUNTPOINT;
+                set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
                spin_unlock(&vnode->lock);
        }
        ret = 0;
- out_free:
+out_free:
        kunmap(page);
        page_cache_release(page);
- out:
+out:
        _leave(" = %d", ret);
        return ret;
+}
-} /* end afs_mntpt_check_symlink() */
-/*****************************************************************************/
 /*
 * no valid lookup procedure on this sort of dir
 */
@@ -116,7 +106,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
                                       struct dentry *dentry,
                                       struct nameidata *nd)
 {
-        kenter("%p,%p{%p{%s},%s}",
+        _enter("%p,%p{%p{%s},%s}",
               dir,
               dentry,
               dentry->d_parent,
@@ -125,15 +115,14 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
               dentry->d_name.name);
        return ERR_PTR(-EREMOTE);
-} /* end afs_mntpt_lookup() */
+}
-/*****************************************************************************/
 /*
 * no valid open procedure on this sort of dir
 */
 static int afs_mntpt_open(struct inode *inode, struct file *file)
 {
-        kenter("%p,%p{%p{%s},%s}",
+        _enter("%p,%p{%p{%s},%s}",
               inode, file,
               file->f_path.dentry->d_parent,
               file->f_path.dentry->d_parent ?
@@ -142,9 +131,8 @@ static int afs_mntpt_open(struct inode *inode, struct file *file)
               file->f_path.dentry->d_name.name);
        return -EREMOTE;
-} /* end afs_mntpt_open() */
+}
-/*****************************************************************************/
 /*
 * create a vfsmount to be automounted
 */
@@ -157,7 +145,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
        char *buf, *devname = NULL, *options = NULL;
        int ret;
-        kenter("{%s}", mntpt->d_name.name);
+        _enter("{%s}", mntpt->d_name.name);
        BUG_ON(!mntpt->d_inode);
@@ -201,79 +189,108 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
                strcat(options, ",rwpath");
        /* try and do the mount */
-        kdebug("--- attempting mount %s -o %s ---", devname, options);
+        _debug("--- attempting mount %s -o %s ---", devname, options);
        mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options);
-        kdebug("--- mount result %p ---", mnt);
+        _debug("--- mount result %p ---", mnt);
        free_page((unsigned long) devname);
        free_page((unsigned long) options);
-        kleave(" = %p", mnt);
+        _leave(" = %p", mnt);
        return mnt;
- error:
+error:
        if (page)
                page_cache_release(page);
        if (devname)
                free_page((unsigned long) devname);
        if (options)
                free_page((unsigned long) options);
-        kleave(" = %d", ret);
+        _leave(" = %d", ret);
        return ERR_PTR(ret);
-} /* end afs_mntpt_do_automount() */
+}
-/*****************************************************************************/
 /*
 * follow a link from a mountpoint directory, thus causing it to be mounted
 */
 static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        struct vfsmount *newmnt;
-        struct dentry *old_dentry;
        int err;
-        kenter("%p{%s},{%s:%p{%s}}",
+        _enter("%p{%s},{%s:%p{%s},}",
               dentry,
               dentry->d_name.name,
               nd->mnt->mnt_devname,
               dentry,
               nd->dentry->d_name.name);
-        newmnt = afs_mntpt_do_automount(dentry);
+        dput(nd->dentry);
+        nd->dentry = dget(dentry);
+        newmnt = afs_mntpt_do_automount(nd->dentry);
        if (IS_ERR(newmnt)) {
                path_release(nd);
                return (void *)newmnt;
        }
-        old_dentry = nd->dentry;
+        mntget(newmnt);
-        nd->dentry = dentry;
+        err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
-        err = do_add_mount(newmnt, nd, 0, &afs_vfsmounts);
+        switch (err) {
-        nd->dentry = old_dentry;
+        case 0:
+                mntput(nd->mnt);
-        path_release(nd);
+                dput(nd->dentry);
-        if (!err) {
-                mntget(newmnt);
                nd->mnt = newmnt;
-                dget(newmnt->mnt_root);
+                nd->dentry = dget(newmnt->mnt_root);
-                nd->dentry = newmnt->mnt_root;
+                schedule_delayed_work(&afs_mntpt_expiry_timer,
+                                      afs_mntpt_expiry_timeout * HZ);
+                break;
+        case -EBUSY:
+                /* someone else made a mount here whilst we were busy */
+                while (d_mountpoint(nd->dentry) &&
+                       follow_down(&nd->mnt, &nd->dentry))
+                        ;
+                err = 0;
+        default:
+                mntput(newmnt);
+                break;
        }
-        kleave(" = %d", err);
+        _leave(" = %d", err);
        return ERR_PTR(err);
-} /* end afs_mntpt_follow_link() */
+}
-/*****************************************************************************/
 /*
 * handle mountpoint expiry timer going off
 */
-static void afs_mntpt_expiry_timed_out(struct afs_timer *timer)
+static void afs_mntpt_expiry_timed_out(struct work_struct *work)
 {
-        kenter("");
+        _enter("");
-        mark_mounts_for_expiry(&afs_vfsmounts);
+        if (!list_empty(&afs_vfsmounts)) {
+                mark_mounts_for_expiry(&afs_vfsmounts);
+                schedule_delayed_work(&afs_mntpt_expiry_timer,
+                                      afs_mntpt_expiry_timeout * HZ);
+        }
+        _leave("");
+}
-        afs_kafstimod_add_timer(&afs_mntpt_expiry_timer,
+/*
-                                afs_mntpt_expiry_timeout * HZ);
+ * kill the AFS mountpoint timer if it's still running
+ */
+void afs_mntpt_kill_timer(void)
+{
+        _enter("");
-        kleave("");
+        ASSERT(list_empty(&afs_vfsmounts));
-} /* end afs_mntpt_expiry_timed_out() */
+        cancel_delayed_work(&afs_mntpt_expiry_timer);
+        flush_scheduled_work();
+}
+/*
+ * begin unmount by attempting to remove all automounted mountpoints we added
+ */
+void afs_umount_begin(struct vfsmount *vfsmnt, int flags)
+{
+        shrink_submounts(vfsmnt, &afs_vfsmounts);
+}
diff --git a/fs/afs/mount.h b/fs/afs/mount.h
deleted file mode 100644
index 9d2f46ec549f..000000000000
--- a/fs/afs/mount.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* mount.h: mount parameters
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _LINUX_AFS_MOUNT_H
-#define _LINUX_AFS_MOUNT_H
-struct afs_mountdata {
-        const char              *volume;        /* name of volume */
-        const char              *cell;          /* name of cell containing volume */
-        const char              *cache;         /* name of cache block device */
-        size_t                  nservers;       /* number of server addresses listed */
-        uint32_t                servers[10];    /* IP addresses of servers in this cell */
-};
-#endif /* _LINUX_AFS_MOUNT_H */
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index ae6b85b1e484..d5601f617cdb 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -1,4 +1,4 @@
-/* proc.c: /proc interface for AFS
+/* /proc interface for AFS
 *
 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
@@ -13,8 +13,6 @@
 #include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include "cell.h"
-#include "volume.h"
 #include <asm/uaccess.h>
 #include "internal.h"
@@ -130,7 +128,6 @@ static const struct file_operations afs_proc_cell_servers_fops = {
        .release        = afs_proc_cell_servers_release,
 };
-/*****************************************************************************/
 /*
 * initialise the /proc/fs/afs/ directory
 */
@@ -142,47 +139,43 @@ int afs_proc_init(void)
        proc_afs = proc_mkdir("fs/afs", NULL);
        if (!proc_afs)
-                goto error;
+                goto error_dir;
        proc_afs->owner = THIS_MODULE;
        p = create_proc_entry("cells", 0, proc_afs);
        if (!p)
-                goto error_proc;
+                goto error_cells;
        p->proc_fops = &afs_proc_cells_fops;
        p->owner = THIS_MODULE;
        p = create_proc_entry("rootcell", 0, proc_afs);
        if (!p)
-                goto error_cells;
+                goto error_rootcell;
        p->proc_fops = &afs_proc_rootcell_fops;
        p->owner = THIS_MODULE;
        _leave(" = 0");
        return 0;
- error_cells:
+error_rootcell:
        remove_proc_entry("cells", proc_afs);
- error_proc:
+error_cells:
        remove_proc_entry("fs/afs", NULL);
- error:
+error_dir:
        _leave(" = -ENOMEM");
        return -ENOMEM;
+}
-} /* end afs_proc_init() */
-/*****************************************************************************/
 /*
 * clean up the /proc/fs/afs/ directory
 */
 void afs_proc_cleanup(void)
 {
+        remove_proc_entry("rootcell", proc_afs);
        remove_proc_entry("cells", proc_afs);
        remove_proc_entry("fs/afs", NULL);
+}
-} /* end afs_proc_cleanup() */
-/*****************************************************************************/
 /*
 * open "/proc/fs/afs/cells" which provides a summary of extant cells
 */
@@ -199,9 +192,8 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
        m->private = PDE(inode)->data;
        return 0;
-} /* end afs_proc_cells_open() */
+}
-/*****************************************************************************/
 /*
 * set up the iterator to start reading from the cells list and return the
 * first item
@@ -225,9 +217,8 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
                        break;
        return _p != &afs_proc_cells ? _p : NULL;
-} /* end afs_proc_cells_start() */
+}
-/*****************************************************************************/
 /*
 * move to next cell in cells list
 */
@@ -241,19 +232,16 @@ static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos)
        _p = v == (void *) 1 ? afs_proc_cells.next : _p->next;
        return _p != &afs_proc_cells ? _p : NULL;
-} /* end afs_proc_cells_next() */
+}
-/*****************************************************************************/
 /*
 * clean up after reading from the cells list
 */
 static void afs_proc_cells_stop(struct seq_file *p, void *v)
 {
        up_read(&afs_proc_cells_sem);
+}
-} /* end afs_proc_cells_stop() */
-/*****************************************************************************/
 /*
 * display a header line followed by a load of cell lines
 */
@@ -261,19 +249,18 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
 {
        struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link);
-        /* display header on line 1 */
        if (v == (void *) 1) {
+                /* display header on line 1 */
                seq_puts(m, "USE NAME\n");
                return 0;
        }
        /* display one cell per line on subsequent lines */
-        seq_printf(m, "%3d %s\n", atomic_read(&cell->usage), cell->name);
+        seq_printf(m, "%3d %s\n",
+                   atomic_read(&cell->usage), cell->name);
        return 0;
-} /* end afs_proc_cells_show() */
+}
-/*****************************************************************************/
 /*
 * handle writes to /proc/fs/afs/cells
 * - to add cells: echo "add <cellname> <IP>[:<IP>][:<IP>]"
@@ -326,30 +313,32 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
        if (strcmp(kbuf, "add") == 0) {
                struct afs_cell *cell;
-                ret = afs_cell_create(name, args, &cell);
-                if (ret < 0)
+                cell = afs_cell_create(name, args);
+                if (IS_ERR(cell)) {
+                        ret = PTR_ERR(cell);
                        goto done;
+                }
+                afs_put_cell(cell);
                printk("kAFS: Added new cell '%s'\n", name);
-        }
+        } else {
-        else {
                goto inval;
        }
        ret = size;
- done:
+done:
        kfree(kbuf);
        _leave(" = %d", ret);
        return ret;
- inval:
+inval:
        ret = -EINVAL;
        printk("kAFS: Invalid Command on /proc/fs/afs/cells file\n");
        goto done;
-} /* end afs_proc_cells_write() */
+}
-/*****************************************************************************/
 /*
 * Stubs for /proc/fs/afs/rootcell
 */
@@ -369,7 +358,6 @@ static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf,
        return 0;
 }
-/*****************************************************************************/
 /*
 * handle writes to /proc/fs/afs/rootcell
 * - to initialize rootcell: echo "cell.name:192.168.231.14"
@@ -407,14 +395,13 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
        if (ret >= 0)
                ret = size;     /* consume everything, always */
- infault:
+infault:
        kfree(kbuf);
- nomem:
+nomem:
        _leave(" = %d", ret);
        return ret;
-} /* end afs_proc_rootcell_write() */
+}
-/*****************************************************************************/
 /*
 * initialise /proc/fs/afs/<cell>/
 */
@@ -426,25 +413,25 @@ int afs_proc_cell_setup(struct afs_cell *cell)
        cell->proc_dir = proc_mkdir(cell->name, proc_afs);
        if (!cell->proc_dir)
-                return -ENOMEM;
+                goto error_dir;
        p = create_proc_entry("servers", 0, cell->proc_dir);
        if (!p)
-                goto error_proc;
+                goto error_servers;
        p->proc_fops = &afs_proc_cell_servers_fops;
        p->owner = THIS_MODULE;
        p->data = cell;
        p = create_proc_entry("vlservers", 0, cell->proc_dir);
        if (!p)
-                goto error_servers;
+                goto error_vlservers;
        p->proc_fops = &afs_proc_cell_vlservers_fops;
        p->owner = THIS_MODULE;
        p->data = cell;
        p = create_proc_entry("volumes", 0, cell->proc_dir);
        if (!p)
-                goto error_vlservers;
+                goto error_volumes;
        p->proc_fops = &afs_proc_cell_volumes_fops;
        p->owner = THIS_MODULE;
        p->data = cell;
@@ -452,17 +439,17 @@ int afs_proc_cell_setup(struct afs_cell *cell)
        _leave(" = 0");
        return 0;
- error_vlservers:
+error_volumes:
        remove_proc_entry("vlservers", cell->proc_dir);
- error_servers:
+error_vlservers:
        remove_proc_entry("servers", cell->proc_dir);
- error_proc:
+error_servers:
        remove_proc_entry(cell->name, proc_afs);
+error_dir:
        _leave(" = -ENOMEM");
        return -ENOMEM;
-} /* end afs_proc_cell_setup() */
+}
-/*****************************************************************************/
 /*
 * remove /proc/fs/afs/<cell>/
 */
@@ -476,9 +463,8 @@ void afs_proc_cell_remove(struct afs_cell *cell)
        remove_proc_entry(cell->name, proc_afs);
        _leave("");
-} /* end afs_proc_cell_remove() */
+}
-/*****************************************************************************/
 /*
 * open "/proc/fs/afs/<cell>/volumes" which provides a summary of extant cells
 */
@@ -488,7 +474,7 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
        struct seq_file *m;
        int ret;
-        cell = afs_get_cell_maybe((struct afs_cell **) &PDE(inode)->data);
+        cell = PDE(inode)->data;
        if (!cell)
                return -ENOENT;
@@ -500,25 +486,16 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
        m->private = cell;
        return 0;
-} /* end afs_proc_cell_volumes_open() */
+}
-/*****************************************************************************/
 /*
 * close the file and release the ref to the cell
 */
 static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file)
 {
-        struct afs_cell *cell = PDE(inode)->data;
+        return seq_release(inode, file);
-        int ret;
+}
-        ret = seq_release(inode,file);
-        afs_put_cell(cell);
-        return ret;
-} /* end afs_proc_cell_volumes_release() */
-/*****************************************************************************/
 /*
 * set up the iterator to start reading from the cells list and return the
 * first item
@@ -545,9 +522,8 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
                        break;
        return _p != &cell->vl_list ? _p : NULL;
-} /* end afs_proc_cell_volumes_start() */
+}
-/*****************************************************************************/
 /*
 * move to next cell in cells list
 */
@@ -562,12 +538,11 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
        (*_pos)++;
        _p = v;
-        _p = v == (void *) 1 ? cell->vl_list.next : _p->next;
+        _p = (v == (void *) 1) ? cell->vl_list.next : _p->next;
-        return _p != &cell->vl_list ? _p : NULL;
+        return (_p != &cell->vl_list) ? _p : NULL;
-} /* end afs_proc_cell_volumes_next() */
+}
-/*****************************************************************************/
 /*
 * clean up after reading from the cells list
 */
@@ -576,10 +551,18 @@ static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
        struct afs_cell *cell = p->private;
        up_read(&cell->vl_sem);
+}
-} /* end afs_proc_cell_volumes_stop() */
+const char afs_vlocation_states[][4] = {
+        [AFS_VL_NEW]                    = "New",
+        [AFS_VL_CREATING]               = "Crt",
+        [AFS_VL_VALID]                  = "Val",
+        [AFS_VL_NO_VOLUME]              = "NoV",
+        [AFS_VL_UPDATING]               = "Upd",
+        [AFS_VL_VOLUME_DELETED]         = "Del",
+        [AFS_VL_UNCERTAIN]              = "Unc",
+};
-/*****************************************************************************/
 /*
 * display a header line followed by a load of volume lines
 */
@@ -590,23 +573,22 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
        /* display header on line 1 */
        if (v == (void *) 1) {
-                seq_puts(m, "USE VLID[0]  VLID[1]  VLID[2]  NAME\n");
+                seq_puts(m, "USE STT VLID[0]  VLID[1]  VLID[2]  NAME\n");
                return 0;
        }
        /* display one cell per line on subsequent lines */
-        seq_printf(m, "%3d %08x %08x %08x %s\n",
+        seq_printf(m, "%3d %s %08x %08x %08x %s\n",
                   atomic_read(&vlocation->usage),
+                   afs_vlocation_states[vlocation->state],
                   vlocation->vldb.vid[0],
                   vlocation->vldb.vid[1],
                   vlocation->vldb.vid[2],
-                   vlocation->vldb.name
+                   vlocation->vldb.name);
-                   );
        return 0;
-} /* end afs_proc_cell_volumes_show() */
+}
-/*****************************************************************************/
 /*
 * open "/proc/fs/afs/<cell>/vlservers" which provides a list of volume
 * location server
@@ -617,11 +599,11 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
        struct seq_file *m;
        int ret;
-        cell = afs_get_cell_maybe((struct afs_cell**)&PDE(inode)->data);
+        cell = PDE(inode)->data;
        if (!cell)
                return -ENOENT;
-        ret = seq_open(file,&afs_proc_cell_vlservers_ops);
+        ret = seq_open(file, &afs_proc_cell_vlservers_ops);
        if (ret<0)
                return ret;
@@ -629,26 +611,17 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
        m->private = cell;
        return 0;
-} /* end afs_proc_cell_vlservers_open() */
+}
-/*****************************************************************************/
 /*
 * close the file and release the ref to the cell
 */
 static int afs_proc_cell_vlservers_release(struct inode *inode,
                                           struct file *file)
 {
-        struct afs_cell *cell = PDE(inode)->data;
+        return seq_release(inode, file);
-        int ret;
+}
-        ret = seq_release(inode,file);
-        afs_put_cell(cell);
-        return ret;
-} /* end afs_proc_cell_vlservers_release() */
-/*****************************************************************************/
 /*
 * set up the iterator to start reading from the cells list and return the
 * first item
@@ -672,9 +645,8 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
                return NULL;
        return &cell->vl_addrs[pos];
-} /* end afs_proc_cell_vlservers_start() */
+}
-/*****************************************************************************/
 /*
 * move to next cell in cells list
 */
@@ -692,9 +664,8 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
                return NULL;
        return &cell->vl_addrs[pos];
-} /* end afs_proc_cell_vlservers_next() */
+}
-/*****************************************************************************/
 /*
 * clean up after reading from the cells list
 */
@@ -703,10 +674,8 @@ static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
        struct afs_cell *cell = p->private;
        up_read(&cell->vl_sem);
+}
-} /* end afs_proc_cell_vlservers_stop() */
-/*****************************************************************************/
 /*
 * display a header line followed by a load of volume lines
 */
@@ -722,11 +691,9 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
        /* display one cell per line on subsequent lines */
        seq_printf(m, "%u.%u.%u.%u\n", NIPQUAD(addr->s_addr));
        return 0;
-} /* end afs_proc_cell_vlservers_show() */
+}
-/*****************************************************************************/
 /*
 * open "/proc/fs/afs/<cell>/servers" which provides a summary of active
 * servers
@@ -737,7 +704,7 @@ static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
        struct seq_file *m;
        int ret;
-        cell = afs_get_cell_maybe((struct afs_cell **) &PDE(inode)->data);
+        cell = PDE(inode)->data;
        if (!cell)
                return -ENOENT;
@@ -747,34 +714,24 @@ static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
        m = file->private_data;
        m->private = cell;
        return 0;
-} /* end afs_proc_cell_servers_open() */
+}
-/*****************************************************************************/
 /*
 * close the file and release the ref to the cell
 */
 static int afs_proc_cell_servers_release(struct inode *inode,
                                         struct file *file)
 {
-        struct afs_cell *cell = PDE(inode)->data;
+        return seq_release(inode, file);
-        int ret;
+}
-        ret = seq_release(inode, file);
-        afs_put_cell(cell);
-        return ret;
-} /* end afs_proc_cell_servers_release() */
-/*****************************************************************************/
 /*
 * set up the iterator to start reading from the cells list and return the
 * first item
 */
 static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
-        __acquires(m->private->sv_lock)
+        __acquires(m->private->servers_lock)
 {
        struct list_head *_p;
        struct afs_cell *cell = m->private;
@@ -783,7 +740,7 @@ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
        _enter("cell=%p pos=%Ld", cell, *_pos);
        /* lock the list against modification */
-        read_lock(&cell->sv_lock);
+        read_lock(&cell->servers_lock);
        /* allow for the header line */
        if (!pos)
@@ -791,14 +748,13 @@ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
        pos--;
        /* find the n'th element in the list */
-        list_for_each(_p, &cell->sv_list)
+        list_for_each(_p, &cell->servers)
                if (!pos--)
                        break;
-        return _p != &cell->sv_list ? _p : NULL;
+        return _p != &cell->servers ? _p : NULL;
-} /* end afs_proc_cell_servers_start() */
+}
-/*****************************************************************************/
 /*
 * move to next cell in cells list
 */
@@ -813,25 +769,22 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
        (*_pos)++;
        _p = v;
-        _p = v == (void *) 1 ? cell->sv_list.next : _p->next;
+        _p = v == (void *) 1 ? cell->servers.next : _p->next;
-        return _p != &cell->sv_list ? _p : NULL;
+        return _p != &cell->servers ? _p : NULL;
-} /* end afs_proc_cell_servers_next() */
+}
-/*****************************************************************************/
 /*
 * clean up after reading from the cells list
 */
 static void afs_proc_cell_servers_stop(struct seq_file *p, void *v)
-        __releases(p->private->sv_lock)
+        __releases(p->private->servers_lock)
 {
        struct afs_cell *cell = p->private;
-        read_unlock(&cell->sv_lock);
+        read_unlock(&cell->servers_lock);
+}
-} /* end afs_proc_cell_servers_stop() */
-/*****************************************************************************/
 /*
 * display a header line followed by a load of volume lines
 */
@@ -849,10 +802,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
        /* display one cell per line on subsequent lines */
        sprintf(ipaddr, "%u.%u.%u.%u", NIPQUAD(server->addr));
        seq_printf(m, "%3d %-15.15s %5d\n",
-                   atomic_read(&server->usage),
+                   atomic_read(&server->usage), ipaddr, server->fs_state);
-                   ipaddr,
-                   server->fs_state
-                   );
        return 0;
-} /* end afs_proc_cell_servers_show() */
+}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
new file mode 100644
index 000000000000..e7b047328a39
--- /dev/null
+++ b/fs/afs/rxrpc.c
@@ -0,0 +1,782 @@
+/* Maintain an RxRPC server socket to do AFS communications through
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <rxrpc/packet.h>
+#include "internal.h"
+#include "afs_cm.h"
+static struct socket *afs_socket; /* my RxRPC socket */
+static struct workqueue_struct *afs_async_calls;
+static atomic_t afs_outstanding_calls;
+static atomic_t afs_outstanding_skbs;
+static void afs_wake_up_call_waiter(struct afs_call *);
+static int afs_wait_for_call_to_complete(struct afs_call *);
+static void afs_wake_up_async_call(struct afs_call *);
+static int afs_dont_wait_for_call_to_complete(struct afs_call *);
+static void afs_process_async_call(struct work_struct *);
+static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *);
+static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool);
+/* synchronous call management */
+const struct afs_wait_mode afs_sync_call = {
+        .rx_wakeup      = afs_wake_up_call_waiter,
+        .wait           = afs_wait_for_call_to_complete,
+};
+/* asynchronous call management */
+const struct afs_wait_mode afs_async_call = {
+        .rx_wakeup      = afs_wake_up_async_call,
+        .wait           = afs_dont_wait_for_call_to_complete,
+};
+/* asynchronous incoming call management */
+static const struct afs_wait_mode afs_async_incoming_call = {
+        .rx_wakeup      = afs_wake_up_async_call,
+};
+/* asynchronous incoming call initial processing */
+static const struct afs_call_type afs_RXCMxxxx = {
+        .name           = "CB.xxxx",
+        .deliver        = afs_deliver_cm_op_id,
+        .abort_to_error = afs_abort_to_error,
+};
+static void afs_collect_incoming_call(struct work_struct *);
+static struct sk_buff_head afs_incoming_calls;
+static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call);
+/*
+ * open an RxRPC socket and bind it to be a server for callback notifications
+ * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
+ */
+int afs_open_socket(void)
+{
+        struct sockaddr_rxrpc srx;
+        struct socket *socket;
+        int ret;
+        _enter("");
+        skb_queue_head_init(&afs_incoming_calls);
+        afs_async_calls = create_singlethread_workqueue("kafsd");
+        if (!afs_async_calls) {
+                _leave(" = -ENOMEM [wq]");
+                return -ENOMEM;
+        }
+        ret = sock_create_kern(AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
+        if (ret < 0) {
+                destroy_workqueue(afs_async_calls);
+                _leave(" = %d [socket]", ret);
+                return ret;
+        }
+        socket->sk->sk_allocation = GFP_NOFS;
+        /* bind the callback manager's address to make this a server socket */
+        srx.srx_family                  = AF_RXRPC;
+        srx.srx_service                 = CM_SERVICE;
+        srx.transport_type              = SOCK_DGRAM;
+        srx.transport_len               = sizeof(srx.transport.sin);
+        srx.transport.sin.sin_family    = AF_INET;
+        srx.transport.sin.sin_port      = htons(AFS_CM_PORT);
+        memset(&srx.transport.sin.sin_addr, 0,
+               sizeof(srx.transport.sin.sin_addr));
+        ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+        if (ret < 0) {
+                sock_release(socket);
+                _leave(" = %d [bind]", ret);
+                return ret;
+        }
+        rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor);
+        afs_socket = socket;
+        _leave(" = 0");
+        return 0;
+}
+/*
+ * close the RxRPC socket AFS was using
+ */
+void afs_close_socket(void)
+{
+        _enter("");
+        sock_release(afs_socket);
+        _debug("dework");
+        destroy_workqueue(afs_async_calls);
+        ASSERTCMP(atomic_read(&afs_outstanding_skbs), ==, 0);
+        ASSERTCMP(atomic_read(&afs_outstanding_calls), ==, 0);
+        _leave("");
+}
+/*
+ * note that the data in a socket buffer is now delivered and that the buffer
+ * should be freed
+ */
+static void afs_data_delivered(struct sk_buff *skb)
+{
+        if (!skb) {
+                _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
+                dump_stack();
+        } else {
+                _debug("DLVR %p{%u} [%d]",
+                       skb, skb->mark, atomic_read(&afs_outstanding_skbs));
+                if (atomic_dec_return(&afs_outstanding_skbs) == -1)
+                        BUG();
+                rxrpc_kernel_data_delivered(skb);
+        }
+}
+/*
+ * free a socket buffer
+ */
+static void afs_free_skb(struct sk_buff *skb)
+{
+        if (!skb) {
+                _debug("FREE NULL [%d]", atomic_read(&afs_outstanding_skbs));
+                dump_stack();
+        } else {
+                _debug("FREE %p{%u} [%d]",
+                       skb, skb->mark, atomic_read(&afs_outstanding_skbs));
+                if (atomic_dec_return(&afs_outstanding_skbs) == -1)
+                        BUG();
+                rxrpc_kernel_free_skb(skb);
+        }
+}
+/*
+ * free a call
+ */
+static void afs_free_call(struct afs_call *call)
+{
+        _debug("DONE %p{%s} [%d]",
+               call, call->type->name, atomic_read(&afs_outstanding_calls));
+        if (atomic_dec_return(&afs_outstanding_calls) == -1)
+                BUG();
+        ASSERTCMP(call->rxcall, ==, NULL);
+        ASSERT(!work_pending(&call->async_work));
+        ASSERT(skb_queue_empty(&call->rx_queue));
+        ASSERT(call->type->name != NULL);
+        kfree(call->request);
+        kfree(call);
+}
+/*
+ * allocate a call with flat request and reply buffers
+ */
+struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
+                                     size_t request_size, size_t reply_size)
+{
+        struct afs_call *call;
+        call = kzalloc(sizeof(*call), GFP_NOFS);
+        if (!call)
+                goto nomem_call;
+        _debug("CALL %p{%s} [%d]",
+               call, type->name, atomic_read(&afs_outstanding_calls));
+        atomic_inc(&afs_outstanding_calls);
+        call->type = type;
+        call->request_size = request_size;
+        call->reply_max = reply_size;
+        if (request_size) {
+                call->request = kmalloc(request_size, GFP_NOFS);
+                if (!call->request)
+                        goto nomem_free;
+        }
+        if (reply_size) {
+                call->buffer = kmalloc(reply_size, GFP_NOFS);
+                if (!call->buffer)
+                        goto nomem_free;
+        }
+        init_waitqueue_head(&call->waitq);
+        skb_queue_head_init(&call->rx_queue);
+        return call;
+nomem_free:
+        afs_free_call(call);
+nomem_call:
+        return NULL;
+}
+/*
+ * clean up a call with flat buffer
+ */
+void afs_flat_call_destructor(struct afs_call *call)
+{
+        _enter("");
+        kfree(call->request);
+        call->request = NULL;
+        kfree(call->buffer);
+        call->buffer = NULL;
+}
+/*
+ * initiate a call
+ */
+int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
+                  const struct afs_wait_mode *wait_mode)
+{
+        struct sockaddr_rxrpc srx;
+        struct rxrpc_call *rxcall;
+        struct msghdr msg;
+        struct kvec iov[1];
+        int ret;
+        _enter("%x,{%d},", addr->s_addr, ntohs(call->port));
+        ASSERT(call->type != NULL);
+        ASSERT(call->type->name != NULL);
+        _debug("MAKE %p{%s} [%d]",
+               call, call->type->name, atomic_read(&afs_outstanding_calls));
+        call->wait_mode = wait_mode;
+        INIT_WORK(&call->async_work, afs_process_async_call);
+        memset(&srx, 0, sizeof(srx));
+        srx.srx_family = AF_RXRPC;
+        srx.srx_service = call->service_id;
+        srx.transport_type = SOCK_DGRAM;
+        srx.transport_len = sizeof(srx.transport.sin);
+        srx.transport.sin.sin_family = AF_INET;
+        srx.transport.sin.sin_port = call->port;
+        memcpy(&srx.transport.sin.sin_addr, addr, 4);
+        /* create a call */
+        rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
+                                         (unsigned long) call, gfp);
+        call->key = NULL;
+        if (IS_ERR(rxcall)) {
+                ret = PTR_ERR(rxcall);
+                goto error_kill_call;
+        }
+        call->rxcall = rxcall;
+        /* send the request */
+        iov[0].iov_base = call->request;
+        iov[0].iov_len  = call->request_size;
+        msg.msg_name            = NULL;
+        msg.msg_namelen         = 0;
+        msg.msg_iov             = (struct iovec *) iov;
+        msg.msg_iovlen          = 1;
+        msg.msg_control         = NULL;
+        msg.msg_controllen      = 0;
+        msg.msg_flags           = 0;
+        /* have to change the state *before* sending the last packet as RxRPC
+         * might give us the reply before it returns from sending the
+         * request */
+        call->state = AFS_CALL_AWAIT_REPLY;
+        ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size);
+        if (ret < 0)
+                goto error_do_abort;
+        /* at this point, an async call may no longer exist as it may have
+         * already completed */
+        return wait_mode->wait(call);
+error_do_abort:
+        rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
+        rxrpc_kernel_end_call(rxcall);
+        call->rxcall = NULL;
+error_kill_call:
+        call->type->destructor(call);
+        afs_free_call(call);
+        _leave(" = %d", ret);
+        return ret;
+}
+/*
+ * handles intercepted messages that were arriving in the socket's Rx queue
+ * - called with the socket receive queue lock held to ensure message ordering
+ * - called with softirqs disabled
+ */
+static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID,
+                               struct sk_buff *skb)
+{
+        struct afs_call *call = (struct afs_call *) user_call_ID;
+        _enter("%p,,%u", call, skb->mark);
+        _debug("ICPT %p{%u} [%d]",
+               skb, skb->mark, atomic_read(&afs_outstanding_skbs));
+        ASSERTCMP(sk, ==, afs_socket->sk);
+        atomic_inc(&afs_outstanding_skbs);
+        if (!call) {
+                /* its an incoming call for our callback service */
+                skb_queue_tail(&afs_incoming_calls, skb);
+                schedule_work(&afs_collect_incoming_call_work);
+        } else {
+                /* route the messages directly to the appropriate call */
+                skb_queue_tail(&call->rx_queue, skb);
+                call->wait_mode->rx_wakeup(call);
+        }
+        _leave("");
+}
+/*
+ * deliver messages to a call
+ */
+static void afs_deliver_to_call(struct afs_call *call)
+{
+        struct sk_buff *skb;
+        bool last;
+        u32 abort_code;
+        int ret;
+        _enter("");
+        while ((call->state == AFS_CALL_AWAIT_REPLY ||
+                call->state == AFS_CALL_AWAIT_OP_ID ||
+                call->state == AFS_CALL_AWAIT_REQUEST ||
+                call->state == AFS_CALL_AWAIT_ACK) &&
+               (skb = skb_dequeue(&call->rx_queue))) {
+                switch (skb->mark) {
+                case RXRPC_SKB_MARK_DATA:
+                        _debug("Rcv DATA");
+                        last = rxrpc_kernel_is_data_last(skb);
+                        ret = call->type->deliver(call, skb, last);
+                        switch (ret) {
+                        case 0:
+                                if (last &&
+                                    call->state == AFS_CALL_AWAIT_REPLY)
+                                        call->state = AFS_CALL_COMPLETE;
+                                break;
+                        case -ENOTCONN:
+                                abort_code = RX_CALL_DEAD;
+                                goto do_abort;
+                        case -ENOTSUPP:
+                                abort_code = RX_INVALID_OPERATION;
+                                goto do_abort;
+                        default:
+                                abort_code = RXGEN_CC_UNMARSHAL;
+                                if (call->state != AFS_CALL_AWAIT_REPLY)
+                                        abort_code = RXGEN_SS_UNMARSHAL;
+                        do_abort:
+                                rxrpc_kernel_abort_call(call->rxcall,
+                                                        abort_code);
+                                call->error = ret;
+                                call->state = AFS_CALL_ERROR;
+                                break;
+                        }
+                        afs_data_delivered(skb);
+                        skb = NULL;
+                        continue;
+                case RXRPC_SKB_MARK_FINAL_ACK:
+                        _debug("Rcv ACK");
+                        call->state = AFS_CALL_COMPLETE;
+                        break;
+                case RXRPC_SKB_MARK_BUSY:
+                        _debug("Rcv BUSY");
+                        call->error = -EBUSY;
+                        call->state = AFS_CALL_BUSY;
+                        break;
+                case RXRPC_SKB_MARK_REMOTE_ABORT:
+                        abort_code = rxrpc_kernel_get_abort_code(skb);
+                        call->error = call->type->abort_to_error(abort_code);
+                        call->state = AFS_CALL_ABORTED;
+                        _debug("Rcv ABORT %u -> %d", abort_code, call->error);
+                        break;
+                case RXRPC_SKB_MARK_NET_ERROR:
+                        call->error = -rxrpc_kernel_get_error_number(skb);
+                        call->state = AFS_CALL_ERROR;
+                        _debug("Rcv NET ERROR %d", call->error);
+                        break;
+                case RXRPC_SKB_MARK_LOCAL_ERROR:
+                        call->error = -rxrpc_kernel_get_error_number(skb);
+                        call->state = AFS_CALL_ERROR;
+                        _debug("Rcv LOCAL ERROR %d", call->error);
+                        break;
+                default:
+                        BUG();
+                        break;
+                }
+                afs_free_skb(skb);
+        }
+        /* make sure the queue is empty if the call is done with (we might have
+         * aborted the call early because of an unmarshalling error) */
+        if (call->state >= AFS_CALL_COMPLETE) {
+                while ((skb = skb_dequeue(&call->rx_queue)))
+                        afs_free_skb(skb);
+                if (call->incoming) {
+                        rxrpc_kernel_end_call(call->rxcall);
+                        call->rxcall = NULL;
+                        call->type->destructor(call);
+                        afs_free_call(call);
+                }
+        }
+        _leave("");
+}
+/*
+ * wait synchronously for a call to complete
+ */
+static int afs_wait_for_call_to_complete(struct afs_call *call)
+{
+        struct sk_buff *skb;
+        int ret;
+        DECLARE_WAITQUEUE(myself, current);
+        _enter("");
+        add_wait_queue(&call->waitq, &myself);
+        for (;;) {
+                set_current_state(TASK_INTERRUPTIBLE);
+                /* deliver any messages that are in the queue */
+                if (!skb_queue_empty(&call->rx_queue)) {
+                        __set_current_state(TASK_RUNNING);
+                        afs_deliver_to_call(call);
+                        continue;
+                }
+                ret = call->error;
+                if (call->state >= AFS_CALL_COMPLETE)
+                        break;
+                ret = -EINTR;
+                if (signal_pending(current))
+                        break;
+                schedule();
+        }
+        remove_wait_queue(&call->waitq, &myself);
+        __set_current_state(TASK_RUNNING);
+        /* kill the call */
+        if (call->state < AFS_CALL_COMPLETE) {
+                _debug("call incomplete");
+                rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD);
+                while ((skb = skb_dequeue(&call->rx_queue)))
+                        afs_free_skb(skb);
+        }
+        _debug("call complete");
+        rxrpc_kernel_end_call(call->rxcall);
+        call->rxcall = NULL;
+        call->type->destructor(call);
+        afs_free_call(call);
+        _leave(" = %d", ret);
+        return ret;
+}
+/*
+ * wake up a waiting call
+ */
+static void afs_wake_up_call_waiter(struct afs_call *call)
+{
+        wake_up(&call->waitq);
+}
+/*
+ * wake up an asynchronous call
+ */
+static void afs_wake_up_async_call(struct afs_call *call)
+{
+        _enter("");
+        queue_work(afs_async_calls, &call->async_work);
+}
+/*
+ * put a call into asynchronous mode
+ * - mustn't touch the call descriptor as the call my have completed by the
+ *   time we get here
+ */
+static int afs_dont_wait_for_call_to_complete(struct afs_call *call)
+{
+        _enter("");
+        return -EINPROGRESS;
+}
+/*
+ * delete an asynchronous call
+ */
+static void afs_delete_async_call(struct work_struct *work)
+{
+        struct afs_call *call =
+                container_of(work, struct afs_call, async_work);
+        _enter("");
+        afs_free_call(call);
+        _leave("");
+}
+/*
+ * perform processing on an asynchronous call
+ * - on a multiple-thread workqueue this work item may try to run on several
+ *   CPUs at the same time
+ */
+static void afs_process_async_call(struct work_struct *work)
+{
+        struct afs_call *call =
+                container_of(work, struct afs_call, async_work);
+        _enter("");
+        if (!skb_queue_empty(&call->rx_queue))
+                afs_deliver_to_call(call);
+        if (call->state >= AFS_CALL_COMPLETE && call->wait_mode) {
+                if (call->wait_mode->async_complete)
+                        call->wait_mode->async_complete(call->reply,
+                                                        call->error);
+                call->reply = NULL;
+                /* kill the call */
+                rxrpc_kernel_end_call(call->rxcall);
+                call->rxcall = NULL;
+                if (call->type->destructor)
+                        call->type->destructor(call);
+                /* we can't just delete the call because the work item may be
+                 * queued */
+                PREPARE_WORK(&call->async_work, afs_delete_async_call);
+                queue_work(afs_async_calls, &call->async_work);
+        }
+        _leave("");
+}
+/*
+ * empty a socket buffer into a flat reply buffer
+ */
+void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
+{
+        size_t len = skb->len;
+        if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0)
+                BUG();
+        call->reply_size += len;
+}
+/*
+ * accept the backlog of incoming calls
+ */
+static void afs_collect_incoming_call(struct work_struct *work)
+{
+        struct rxrpc_call *rxcall;
+        struct afs_call *call = NULL;
+        struct sk_buff *skb;
+        while ((skb = skb_dequeue(&afs_incoming_calls))) {
+                _debug("new call");
+                /* don't need the notification */
+                afs_free_skb(skb);
+                if (!call) {
+                        call = kzalloc(sizeof(struct afs_call), GFP_KERNEL);
+                        if (!call) {
+                                rxrpc_kernel_reject_call(afs_socket);
+                                return;
+                        }
+                        INIT_WORK(&call->async_work, afs_process_async_call);
+                        call->wait_mode = &afs_async_incoming_call;
+                        call->type = &afs_RXCMxxxx;
+                        init_waitqueue_head(&call->waitq);
+                        skb_queue_head_init(&call->rx_queue);
+                        call->state = AFS_CALL_AWAIT_OP_ID;
+                        _debug("CALL %p{%s} [%d]",
+                               call, call->type->name,
+                               atomic_read(&afs_outstanding_calls));
+                        atomic_inc(&afs_outstanding_calls);
+                }
+                rxcall = rxrpc_kernel_accept_call(afs_socket,
+                                                  (unsigned long) call);
+                if (!IS_ERR(rxcall)) {
+                        call->rxcall = rxcall;
+                        call = NULL;
+                }
+        }
+        if (call)
+                afs_free_call(call);
+}
+/*
+ * grab the operation ID from an incoming cache manager call
+ */
+static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
+                                bool last)
+{
+        size_t len = skb->len;
+        void *oibuf = (void *) &call->operation_ID;
+        _enter("{%u},{%zu},%d", call->offset, len, last);
+        ASSERTCMP(call->offset, <, 4);
+        /* the operation ID forms the first four bytes of the request data */
+        len = min_t(size_t, len, 4 - call->offset);
+        if (skb_copy_bits(skb, 0, oibuf + call->offset, len) < 0)
+                BUG();
+        if (!pskb_pull(skb, len))
+                BUG();
+        call->offset += len;
+        if (call->offset < 4) {
+                if (last) {
+                        _leave(" = -EBADMSG [op ID short]");
+                        return -EBADMSG;
+                }
+                _leave(" = 0 [incomplete]");
+                return 0;
+        }
+        call->state = AFS_CALL_AWAIT_REQUEST;
+        /* ask the cache manager to route the call (it'll change the call type
+         * if successful) */
+        if (!afs_cm_incoming_call(call))
+                return -ENOTSUPP;
+        /* pass responsibility for the remainer of this message off to the
+         * cache manager op */
+        return call->type->deliver(call, skb, last);
+}
+/*
+ * send an empty reply
+ */
+void afs_send_empty_reply(struct afs_call *call)
+{
+        struct msghdr msg;
+        struct iovec iov[1];
+        _enter("");
+        iov[0].iov_base         = NULL;
+        iov[0].iov_len          = 0;
+        msg.msg_name            = NULL;
+        msg.msg_namelen         = 0;
+        msg.msg_iov             = iov;
+        msg.msg_iovlen          = 0;
+        msg.msg_control         = NULL;
+        msg.msg_controllen      = 0;
+        msg.msg_flags           = 0;
+        call->state = AFS_CALL_AWAIT_ACK;
+        switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) {
+        case 0:
+                _leave(" [replied]");
+                return;
+        case -ENOMEM:
+                _debug("oom");
+                rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+        default:
+                rxrpc_kernel_end_call(call->rxcall);
+                call->rxcall = NULL;
+                call->type->destructor(call);
+                afs_free_call(call);
+                _leave(" [error]");
+                return;
+        }
+}
+/*
+ * send a simple reply
+ */
+void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
+{
+        struct msghdr msg;
+        struct iovec iov[1];
+        _enter("");
+        iov[0].iov_base         = (void *) buf;
+        iov[0].iov_len          = len;
+        msg.msg_name            = NULL;
+        msg.msg_namelen         = 0;
+        msg.msg_iov             = iov;
+        msg.msg_iovlen          = 1;
+        msg.msg_control         = NULL;
+        msg.msg_controllen      = 0;
+        msg.msg_flags           = 0;
+        call->state = AFS_CALL_AWAIT_ACK;
+        switch (rxrpc_kernel_send_data(call->rxcall, &msg, len)) {
+        case 0:
+                _leave(" [replied]");
+                return;
+        case -ENOMEM:
+                _debug("oom");
+                rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+        default:
+                rxrpc_kernel_end_call(call->rxcall);
+                call->rxcall = NULL;
+                call->type->destructor(call);
+                afs_free_call(call);
+                _leave(" [error]");
+                return;
+        }
+}
+/*
+ * extract a piece of data from the received data socket buffers
+ */
+int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
+                     bool last, void *buf, size_t count)
+{
+        size_t len = skb->len;
+        _enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count);
+        ASSERTCMP(call->offset, <, count);
+        len = min_t(size_t, len, count - call->offset);
+        if (skb_copy_bits(skb, 0, buf + call->offset, len) < 0 ||
+            !pskb_pull(skb, len))
+                BUG();
+        call->offset += len;
+        if (call->offset < count) {
+                if (last) {
+                        _leave(" = -EBADMSG [%d < %lu]", call->offset, count);
+                        return -EBADMSG;
+                }
+                _leave(" = -EAGAIN");
+                return -EAGAIN;
+        }
+        return 0;
+}
diff --git a/fs/afs/security.c b/fs/afs/security.c
new file mode 100644
index 000000000000..f9f424d80458
--- /dev/null
+++ b/fs/afs/security.c
@@ -0,0 +1,356 @@
+/* AFS security handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/ctype.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+/*
+ * get a key
+ */
+struct key *afs_request_key(struct afs_cell *cell)
+{
+        struct key *key;
+        _enter("{%x}", key_serial(cell->anonymous_key));
+        _debug("key %s", cell->anonymous_key->description);
+        key = request_key(&key_type_rxrpc, cell->anonymous_key->description,
+                          NULL);
+        if (IS_ERR(key)) {
+                if (PTR_ERR(key) != -ENOKEY) {
+                        _leave(" = %ld", PTR_ERR(key));
+                        return key;
+                }
+                /* act as anonymous user */
+                _leave(" = {%x} [anon]", key_serial(cell->anonymous_key));
+                return key_get(cell->anonymous_key);
+        } else {
+                /* act as authorised user */
+                _leave(" = {%x} [auth]", key_serial(key));
+                return key;
+        }
+}
+/*
+ * dispose of a permits list
+ */
+void afs_zap_permits(struct rcu_head *rcu)
+{
+        struct afs_permits *permits =
+                container_of(rcu, struct afs_permits, rcu);
+        int loop;
+        _enter("{%d}", permits->count);
+        for (loop = permits->count - 1; loop >= 0; loop--)
+                key_put(permits->permits[loop].key);
+        kfree(permits);
+}
+/*
+ * dispose of a permits list in which all the key pointers have been copied
+ */
+static void afs_dispose_of_permits(struct rcu_head *rcu)
+{
+        struct afs_permits *permits =
+                container_of(rcu, struct afs_permits, rcu);
+        _enter("{%d}", permits->count);
+        kfree(permits);
+}
+/*
+ * get the authorising vnode - this is the specified inode itself if it's a
+ * directory or it's the parent directory if the specified inode is a file or
+ * symlink
+ * - the caller must release the ref on the inode
+ */
+static struct afs_vnode *afs_get_auth_inode(struct afs_vnode *vnode,
+                                            struct key *key)
+{
+        struct afs_vnode *auth_vnode;
+        struct inode *auth_inode;
+        _enter("");
+        if (S_ISDIR(vnode->vfs_inode.i_mode)) {
+                auth_inode = igrab(&vnode->vfs_inode);
+                ASSERT(auth_inode != NULL);
+        } else {
+                auth_inode = afs_iget(vnode->vfs_inode.i_sb, key,
+                                      &vnode->status.parent, NULL, NULL);
+                if (IS_ERR(auth_inode))
+                        return ERR_PTR(PTR_ERR(auth_inode));
+        }
+        auth_vnode = AFS_FS_I(auth_inode);
+        _leave(" = {%x}", auth_vnode->fid.vnode);
+        return auth_vnode;
+}
+/*
+ * clear the permit cache on a directory vnode
+ */
+void afs_clear_permits(struct afs_vnode *vnode)
+{
+        struct afs_permits *permits;
+        _enter("{%x}", vnode->fid.vnode);
+        mutex_lock(&vnode->permits_lock);
+        permits = vnode->permits;
+        rcu_assign_pointer(vnode->permits, NULL);
+        mutex_unlock(&vnode->permits_lock);
+        if (permits)
+                call_rcu(&permits->rcu, afs_zap_permits);
+        _leave("");
+}
+/*
+ * add the result obtained for a vnode to its or its parent directory's cache
+ * for the key used to access it
+ */
+void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order)
+{
+        struct afs_permits *permits, *xpermits;
+        struct afs_permit *permit;
+        struct afs_vnode *auth_vnode;
+        int count, loop;
+        _enter("{%x},%x,%lx", vnode->fid.vnode, key_serial(key), acl_order);
+        auth_vnode = afs_get_auth_inode(vnode, key);
+        if (IS_ERR(auth_vnode)) {
+                _leave(" [get error %ld]", PTR_ERR(auth_vnode));
+                return;
+        }
+        mutex_lock(&auth_vnode->permits_lock);
+        /* guard against a rename being detected whilst we waited for the
+         * lock */
+        if (memcmp(&auth_vnode->fid, &vnode->status.parent,
+                   sizeof(struct afs_fid)) != 0) {
+                _debug("renamed");
+                goto out_unlock;
+        }
+        /* have to be careful as the directory's callback may be broken between
+         * us receiving the status we're trying to cache and us getting the
+         * lock to update the cache for the status */
+        if (auth_vnode->acl_order - acl_order > 0) {
+                _debug("ACL changed?");
+                goto out_unlock;
+        }
+        /* always update the anonymous mask */
+        _debug("anon access %x", vnode->status.anon_access);
+        auth_vnode->status.anon_access = vnode->status.anon_access;
+        if (key == vnode->volume->cell->anonymous_key)
+                goto out_unlock;
+        xpermits = auth_vnode->permits;
+        count = 0;
+        if (xpermits) {
+                /* see if the permit is already in the list
+                 * - if it is then we just amend the list
+                 */
+                count = xpermits->count;
+                permit = xpermits->permits;
+                for (loop = count; loop > 0; loop--) {
+                        if (permit->key == key) {
+                                permit->access_mask =
+                                        vnode->status.caller_access;
+                                goto out_unlock;
+                        }
+                        permit++;
+                }
+        }
+        permits = kmalloc(sizeof(*permits) + sizeof(*permit) * (count + 1),
+                          GFP_NOFS);
+        if (!permits)
+                goto out_unlock;
+        memcpy(permits->permits, xpermits->permits,
+               count * sizeof(struct afs_permit));
+        _debug("key %x access %x",
+               key_serial(key), vnode->status.caller_access);
+        permits->permits[count].access_mask = vnode->status.caller_access;
+        permits->permits[count].key = key_get(key);
+        permits->count = count + 1;
+        rcu_assign_pointer(auth_vnode->permits, permits);
+        if (xpermits)
+                call_rcu(&xpermits->rcu, afs_dispose_of_permits);
+out_unlock:
+        mutex_unlock(&auth_vnode->permits_lock);
+        iput(&auth_vnode->vfs_inode);
+        _leave("");
+}
+/*
+ * check with the fileserver to see if the directory or parent directory is
+ * permitted to be accessed with this authorisation, and if so, what access it
+ * is granted
+ */
+static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
+                            afs_access_t *_access)
+{
+        struct afs_permits *permits;
+        struct afs_permit *permit;
+        struct afs_vnode *auth_vnode;
+        bool valid;
+        int loop, ret;
+        _enter("");
+        auth_vnode = afs_get_auth_inode(vnode, key);
+        if (IS_ERR(auth_vnode)) {
+                *_access = 0;
+                _leave(" = %ld", PTR_ERR(auth_vnode));
+                return PTR_ERR(auth_vnode);
+        }
+        ASSERT(S_ISDIR(auth_vnode->vfs_inode.i_mode));
+        /* check the permits to see if we've got one yet */
+        if (key == auth_vnode->volume->cell->anonymous_key) {
+                _debug("anon");
+                *_access = auth_vnode->status.anon_access;
+                valid = true;
+        } else {
+                valid = false;
+                rcu_read_lock();
+                permits = rcu_dereference(auth_vnode->permits);
+                if (permits) {
+                        permit = permits->permits;
+                        for (loop = permits->count; loop > 0; loop--) {
+                                if (permit->key == key) {
+                                        _debug("found in cache");
+                                        *_access = permit->access_mask;
+                                        valid = true;
+                                        break;
+                                }
+                                permit++;
+                        }
+                }
+                rcu_read_unlock();
+        }
+        if (!valid) {
+                /* check the status on the file we're actually interested in
+                 * (the post-processing will cache the result on auth_vnode) */
+                _debug("no valid permit");
+                set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+                ret = afs_vnode_fetch_status(vnode, auth_vnode, key);
+                if (ret < 0) {
+                        iput(&auth_vnode->vfs_inode);
+                        *_access = 0;
+                        _leave(" = %d", ret);
+                        return ret;
+                }
+        }
+        *_access = vnode->status.caller_access;
+        iput(&auth_vnode->vfs_inode);
+        _leave(" = 0 [access %x]", *_access);
+        return 0;
+}
+/*
+ * check the permissions on an AFS file
+ * - AFS ACLs are attached to directories only, and a file is controlled by its
+ *   parent directory's ACL
+ */
+int afs_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+        struct afs_vnode *vnode = AFS_FS_I(inode);
+        afs_access_t access;
+        struct key *key;
+        int ret;
+        _enter("{{%x:%x},%lx},%x,",
+               vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
+        key = afs_request_key(vnode->volume->cell);
+        if (IS_ERR(key)) {
+                _leave(" = %ld [key]", PTR_ERR(key));
+                return PTR_ERR(key);
+        }
+        /* if the promise has expired, we need to check the server again */
+        if (!vnode->cb_promised) {
+                _debug("not promised");
+                ret = afs_vnode_fetch_status(vnode, NULL, key);
+                if (ret < 0)
+                        goto error;
+                _debug("new promise [fl=%lx]", vnode->flags);
+        }
+        /* check the permits to see if we've got one yet */
+        ret = afs_check_permit(vnode, key, &access);
+        if (ret < 0)
+                goto error;
+        /* interpret the access mask */
+        _debug("REQ %x ACC %x on %s",
+               mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file");
+        if (S_ISDIR(inode->i_mode)) {
+                if (mask & MAY_EXEC) {
+                        if (!(access & AFS_ACE_LOOKUP))
+                                goto permission_denied;
+                } else if (mask & MAY_READ) {
+                        if (!(access & AFS_ACE_READ))
+                                goto permission_denied;
+                } else if (mask & MAY_WRITE) {
+                        if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */
+                                        AFS_ACE_INSERT | /* create, mkdir, symlink, rename to */
+                                        AFS_ACE_WRITE))) /* chmod */
+                                goto permission_denied;
+                } else {
+                        BUG();
+                }
+        } else {
+                if (!(access & AFS_ACE_LOOKUP))
+                        goto permission_denied;
+                if (mask & (MAY_EXEC | MAY_READ)) {
+                        if (!(access & AFS_ACE_READ))
+                                goto permission_denied;
+                } else if (mask & MAY_WRITE) {
+                        if (!(access & AFS_ACE_WRITE))
+                                goto permission_denied;
+                }
+        }
+        key_put(key);
+        ret = generic_permission(inode, mask, NULL);
+        _leave(" = %d", ret);
+        return ret;
+permission_denied:
+        ret = -EACCES;
+error:
+        key_put(key);
+        _leave(" = %d", ret);
+        return ret;
+}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 44aff81dc6a7..96bb23b476a2 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -1,6 +1,6 @@
-/* server.c: AFS server record management
+/* AFS server record management
 *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
@@ -11,489 +11,314 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include "volume.h"
-#include "cell.h"
-#include "server.h"
-#include "transport.h"
-#include "vlclient.h"
-#include "kafstimod.h"
 #include "internal.h"
-DEFINE_SPINLOCK(afs_server_peer_lock);
+unsigned afs_server_timeout = 10;       /* server timeout in seconds */
-#define FS_SERVICE_ID           1       /* AFS Volume Location Service ID */
+static void afs_reap_server(struct work_struct *);
-#define VL_SERVICE_ID           52      /* AFS Volume Location Service ID */
-static void __afs_server_timeout(struct afs_timer *timer)
+/* tree of all the servers, indexed by IP address */
+static struct rb_root afs_servers = RB_ROOT;
+static DEFINE_RWLOCK(afs_servers_lock);
+/* LRU list of all the servers not currently in use */
+static LIST_HEAD(afs_server_graveyard);
+static DEFINE_SPINLOCK(afs_server_graveyard_lock);
+static DECLARE_DELAYED_WORK(afs_server_reaper, afs_reap_server);
+/*
+ * install a server record in the master tree
+ */
+static int afs_install_server(struct afs_server *server)
 {
-        struct afs_server *server =
+        struct afs_server *xserver;
-                list_entry(timer, struct afs_server, timeout);
+        struct rb_node **pp, *p;
+        int ret;
-        _debug("SERVER TIMEOUT [%p{u=%d}]",
+        _enter("%p", server);
-               server, atomic_read(&server->usage));
-        afs_server_do_timeout(server);
+        write_lock(&afs_servers_lock);
-}
+        ret = -EEXIST;
+        pp = &afs_servers.rb_node;
+        p = NULL;
+        while (*pp) {
+                p = *pp;
+                _debug("- consider %p", p);
+                xserver = rb_entry(p, struct afs_server, master_rb);
+                if (server->addr.s_addr < xserver->addr.s_addr)
+                        pp = &(*pp)->rb_left;
+                else if (server->addr.s_addr > xserver->addr.s_addr)
+                        pp = &(*pp)->rb_right;
+                else
+                        goto error;
+        }
-static const struct afs_timer_ops afs_server_timer_ops = {
+        rb_link_node(&server->master_rb, p, pp);
-        .timed_out      = __afs_server_timeout,
+        rb_insert_color(&server->master_rb, &afs_servers);
-};
+        ret = 0;
+error:
+        write_unlock(&afs_servers_lock);
+        return ret;
+}
-/*****************************************************************************/
 /*
- * lookup a server record in a cell
+ * allocate a new server record
- * - TODO: search the cell's server list
 */
-int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr,
+static struct afs_server *afs_alloc_server(struct afs_cell *cell,
-                      struct afs_server **_server)
+                                           const struct in_addr *addr)
 {
-        struct afs_server *server, *active, *zombie;
+        struct afs_server *server;
-        int loop;
-        _enter("%p,%08x,", cell, ntohl(addr->s_addr));
+        _enter("");
-        /* allocate and initialise a server record */
        server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
-        if (!server) {
+        if (server) {
-                _leave(" = -ENOMEM");
+                atomic_set(&server->usage, 1);
-                return -ENOMEM;
+                server->cell = cell;
+                INIT_LIST_HEAD(&server->link);
+                INIT_LIST_HEAD(&server->grave);
+                init_rwsem(&server->sem);
+                spin_lock_init(&server->fs_lock);
+                server->fs_vnodes = RB_ROOT;
+                server->cb_promises = RB_ROOT;
+                spin_lock_init(&server->cb_lock);
+                init_waitqueue_head(&server->cb_break_waitq);
+                INIT_DELAYED_WORK(&server->cb_break_work,
+                                  afs_dispatch_give_up_callbacks);
+                memcpy(&server->addr, addr, sizeof(struct in_addr));
+                server->addr.s_addr = addr->s_addr;
        }
-        atomic_set(&server->usage, 1);
+        _leave(" = %p{%d}", server, atomic_read(&server->usage));
+        return server;
-        INIT_LIST_HEAD(&server->link);
+}
-        init_rwsem(&server->sem);
-        INIT_LIST_HEAD(&server->fs_callq);
-        spin_lock_init(&server->fs_lock);
-        INIT_LIST_HEAD(&server->cb_promises);
-        spin_lock_init(&server->cb_lock);
-        for (loop = 0; loop < AFS_SERVER_CONN_LIST_SIZE; loop++)
-                server->fs_conn_cnt[loop] = 4;
-        memcpy(&server->addr, addr, sizeof(struct in_addr));
+/*
-        server->addr.s_addr = addr->s_addr;
+ * get an FS-server record for a cell
+ */
+struct afs_server *afs_lookup_server(struct afs_cell *cell,
+                                     const struct in_addr *addr)
+{
+        struct afs_server *server, *candidate;
-        afs_timer_init(&server->timeout, &afs_server_timer_ops);
+        _enter("%p,"NIPQUAD_FMT, cell, NIPQUAD(addr->s_addr));
-        /* add to the cell */
+        /* quick scan of the list to see if we already have the server */
-        write_lock(&cell->sv_lock);
+        read_lock(&cell->servers_lock);
-        /* check the active list */
+        list_for_each_entry(server, &cell->servers, link) {
-        list_for_each_entry(active, &cell->sv_list, link) {
+                if (server->addr.s_addr == addr->s_addr)
-                if (active->addr.s_addr == addr->s_addr)
+                        goto found_server_quickly;
-                        goto use_active_server;
        }
+        read_unlock(&cell->servers_lock);
-        /* check the inactive list */
+        candidate = afs_alloc_server(cell, addr);
-        spin_lock(&cell->sv_gylock);
+        if (!candidate) {
-        list_for_each_entry(zombie, &cell->sv_graveyard, link) {
+                _leave(" = -ENOMEM");
-                if (zombie->addr.s_addr == addr->s_addr)
+                return ERR_PTR(-ENOMEM);
-                        goto resurrect_server;
        }
-        spin_unlock(&cell->sv_gylock);
-        afs_get_cell(cell);
+        write_lock(&cell->servers_lock);
-        server->cell = cell;
-        list_add_tail(&server->link, &cell->sv_list);
-        write_unlock(&cell->sv_lock);
+        /* check the cell's server list again */
+        list_for_each_entry(server, &cell->servers, link) {
+                if (server->addr.s_addr == addr->s_addr)
+                        goto found_server;
+        }
-        *_server = server;
+        _debug("new");
-        _leave(" = 0 (%p)", server);
+        server = candidate;
-        return 0;
+        if (afs_install_server(server) < 0)
+                goto server_in_two_cells;
-        /* found a matching active server */
+        afs_get_cell(cell);
- use_active_server:
+        list_add_tail(&server->link, &cell->servers);
-        _debug("active server");
-        afs_get_server(active);
+        write_unlock(&cell->servers_lock);
-        write_unlock(&cell->sv_lock);
+        _leave(" = %p{%d}", server, atomic_read(&server->usage));
+        return server;
+        /* found a matching server quickly */
+found_server_quickly:
+        _debug("found quickly");
+        afs_get_server(server);
+        read_unlock(&cell->servers_lock);
+no_longer_unused:
+        if (!list_empty(&server->grave)) {
+                spin_lock(&afs_server_graveyard_lock);
+                list_del_init(&server->grave);
+                spin_unlock(&afs_server_graveyard_lock);
+        }
+        _leave(" = %p{%d}", server, atomic_read(&server->usage));
+        return server;
+        /* found a matching server on the second pass */
+found_server:
+        _debug("found");
+        afs_get_server(server);
+        write_unlock(&cell->servers_lock);
+        kfree(candidate);
+        goto no_longer_unused;
+        /* found a server that seems to be in two cells */
+server_in_two_cells:
+        write_unlock(&cell->servers_lock);
+        kfree(candidate);
+        printk(KERN_NOTICE "kAFS:"
+               " Server "NIPQUAD_FMT" appears to be in two cells\n",
+               NIPQUAD(*addr));
+        _leave(" = -EEXIST");
+        return ERR_PTR(-EEXIST);
+}
-        kfree(server);
+/*
+ * look up a server by its IP address
+ */
+struct afs_server *afs_find_server(const struct in_addr *_addr)
+{
+        struct afs_server *server = NULL;
+        struct rb_node *p;
+        struct in_addr addr = *_addr;
-        *_server = active;
+        _enter(NIPQUAD_FMT, NIPQUAD(addr.s_addr));
-        _leave(" = 0 (%p)", active);
-        return 0;
-        /* found a matching server in the graveyard, so resurrect it and
+        read_lock(&afs_servers_lock);
-         * dispose of the new record */
- resurrect_server:
-        _debug("resurrecting server");
-        list_move_tail(&zombie->link, &cell->sv_list);
+        p = afs_servers.rb_node;
-        afs_get_server(zombie);
+        while (p) {
-        afs_kafstimod_del_timer(&zombie->timeout);
+                server = rb_entry(p, struct afs_server, master_rb);
-        spin_unlock(&cell->sv_gylock);
-        write_unlock(&cell->sv_lock);
-        kfree(server);
+                _debug("- consider %p", p);
-        *_server = zombie;
+                if (addr.s_addr < server->addr.s_addr) {
-        _leave(" = 0 (%p)", zombie);
+                        p = p->rb_left;
-        return 0;
+                } else if (addr.s_addr > server->addr.s_addr) {
+                        p = p->rb_right;
+                } else {
+                        afs_get_server(server);
+                        goto found;
+                }
+        }
-} /* end afs_server_lookup() */
+        server = NULL;
+found:
+        read_unlock(&afs_servers_lock);
+        ASSERTIFCMP(server, server->addr.s_addr, ==, addr.s_addr);
+        _leave(" = %p", server);
+        return server;
+}
-/*****************************************************************************/
 /*
 * destroy a server record
 * - removes from the cell list
 */
 void afs_put_server(struct afs_server *server)
 {
-        struct afs_cell *cell;
        if (!server)
                return;
-        _enter("%p", server);
+        _enter("%p{%d}", server, atomic_read(&server->usage));
-        cell = server->cell;
-        /* sanity check */
+        _debug("PUT SERVER %d", atomic_read(&server->usage));
-        BUG_ON(atomic_read(&server->usage) <= 0);
-        /* to prevent a race, the decrement and the dequeue must be effectively
+        ASSERTCMP(atomic_read(&server->usage), >, 0);
-         * atomic */
-        write_lock(&cell->sv_lock);
        if (likely(!atomic_dec_and_test(&server->usage))) {
-                write_unlock(&cell->sv_lock);
                _leave("");
                return;
        }
-        spin_lock(&cell->sv_gylock);
+        afs_flush_callback_breaks(server);
-        list_move_tail(&server->link, &cell->sv_graveyard);
-        /* time out in 10 secs */
+        spin_lock(&afs_server_graveyard_lock);
-        afs_kafstimod_add_timer(&server->timeout, 10 * HZ);
+        if (atomic_read(&server->usage) == 0) {
+                list_move_tail(&server->grave, &afs_server_graveyard);
-        spin_unlock(&cell->sv_gylock);
+                server->time_of_death = get_seconds();
-        write_unlock(&cell->sv_lock);
+                schedule_delayed_work(&afs_server_reaper,
+                                      afs_server_timeout * HZ);
-        _leave(" [killed]");
+        }
-} /* end afs_put_server() */
+        spin_unlock(&afs_server_graveyard_lock);
+        _leave(" [dead]");
+}
-/*****************************************************************************/
 /*
- * timeout server record
+ * destroy a dead server
- * - removes from the cell's graveyard if the usage count is zero
 */
-void afs_server_do_timeout(struct afs_server *server)
+static void afs_destroy_server(struct afs_server *server)
 {
-        struct rxrpc_peer *peer;
-        struct afs_cell *cell;
-        int loop;
        _enter("%p", server);
-        cell = server->cell;
+        ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL);
+        ASSERTCMP(server->cb_promises.rb_node, ==, NULL);
-        BUG_ON(atomic_read(&server->usage) < 0);
+        ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail);
+        ASSERTCMP(atomic_read(&server->cb_break_n), ==, 0);
-        /* remove from graveyard if still dead */
-        spin_lock(&cell->vl_gylock);
-        if (atomic_read(&server->usage) == 0)
-                list_del_init(&server->link);
-        else
-                server = NULL;
-        spin_unlock(&cell->vl_gylock);
-        if (!server) {
-                _leave("");
-                return; /* resurrected */
-        }
-        /* we can now destroy it properly */
-        afs_put_cell(cell);
-        /* uncross-point the structs under a global lock */
-        spin_lock(&afs_server_peer_lock);
-        peer = server->peer;
-        if (peer) {
-                server->peer = NULL;
-                peer->user = NULL;
-        }
-        spin_unlock(&afs_server_peer_lock);
-        /* finish cleaning up the server */
-        for (loop = AFS_SERVER_CONN_LIST_SIZE - 1; loop >= 0; loop--)
-                if (server->fs_conn[loop])
-                        rxrpc_put_connection(server->fs_conn[loop]);
-        if (server->vlserver)
-                rxrpc_put_connection(server->vlserver);
+        afs_put_cell(server->cell);
        kfree(server);
+}
-        _leave(" [destroyed]");
-} /* end afs_server_do_timeout() */
-/*****************************************************************************/
 /*
- * get a callslot on a connection to the fileserver on the specified server
+ * reap dead server records
 */
-int afs_server_request_callslot(struct afs_server *server,
+static void afs_reap_server(struct work_struct *work)
-                                struct afs_server_callslot *callslot)
 {
-        struct afs_server_callslot *pcallslot;
+        LIST_HEAD(corpses);
-        struct rxrpc_connection *conn;
+        struct afs_server *server;
-        int nconn, ret;
+        unsigned long delay, expiry;
+        time_t now;
-        _enter("%p,",server);
+        now = get_seconds();
-        INIT_LIST_HEAD(&callslot->link);
+        spin_lock(&afs_server_graveyard_lock);
-        callslot->task = current;
-        callslot->conn = NULL;
+        while (!list_empty(&afs_server_graveyard)) {
-        callslot->nconn = -1;
+                server = list_entry(afs_server_graveyard.next,
-        callslot->ready = 0;
+                                    struct afs_server, grave);
-        ret = 0;
+                /* the queue is ordered most dead first */
-        conn = NULL;
+                expiry = server->time_of_death + afs_server_timeout;
+                if (expiry > now) {
-        /* get hold of a callslot first */
+                        delay = (expiry - now) * HZ;
-        spin_lock(&server->fs_lock);
+                        if (!schedule_delayed_work(&afs_server_reaper, delay)) {
+                                cancel_delayed_work(&afs_server_reaper);
-        /* resurrect the server if it's death timeout has expired */
+                                schedule_delayed_work(&afs_server_reaper,
-        if (server->fs_state) {
+                                                      delay);
-                if (time_before(jiffies, server->fs_dead_jif)) {
+                        }
-                        ret = server->fs_state;
+                        break;
-                        spin_unlock(&server->fs_lock);
-                        _leave(" = %d [still dead]", ret);
-                        return ret;
                }
-                server->fs_state = 0;
+                write_lock(&server->cell->servers_lock);
-        }
+                write_lock(&afs_servers_lock);
+                if (atomic_read(&server->usage) > 0) {
-        /* try and find a connection that has spare callslots */
+                        list_del_init(&server->grave);
-        for (nconn = 0; nconn < AFS_SERVER_CONN_LIST_SIZE; nconn++) {
+                } else {
-                if (server->fs_conn_cnt[nconn] > 0) {
+                        list_move_tail(&server->grave, &corpses);
-                        server->fs_conn_cnt[nconn]--;
+                        list_del_init(&server->link);
-                        spin_unlock(&server->fs_lock);
+                        rb_erase(&server->master_rb, &afs_servers);
-                        callslot->nconn = nconn;
-                        goto obtained_slot;
                }
+                write_unlock(&afs_servers_lock);
+                write_unlock(&server->cell->servers_lock);
        }
-        /* none were available - wait interruptibly for one to become
+        spin_unlock(&afs_server_graveyard_lock);
-         * available */
-        set_current_state(TASK_INTERRUPTIBLE);
-        list_add_tail(&callslot->link, &server->fs_callq);
-        spin_unlock(&server->fs_lock);
-        while (!callslot->ready && !signal_pending(current)) {
-                schedule();
-                set_current_state(TASK_INTERRUPTIBLE);
-        }
-        set_current_state(TASK_RUNNING);
-        /* even if we were interrupted we may still be queued */
-        if (!callslot->ready) {
-                spin_lock(&server->fs_lock);
-                list_del_init(&callslot->link);
-                spin_unlock(&server->fs_lock);
-        }
-        nconn = callslot->nconn;
-        /* if interrupted, we must release any slot we also got before
+        /* now reap the corpses we've extracted */
-         * returning an error */
+        while (!list_empty(&corpses)) {
-        if (signal_pending(current)) {
+                server = list_entry(corpses.next, struct afs_server, grave);
-                ret = -EINTR;
+                list_del(&server->grave);
-                goto error_release;
+                afs_destroy_server(server);
        }
+}
-        /* if we were woken up with an error, then pass that error back to the
-         * called */
-        if (nconn < 0) {
-                _leave(" = %d", callslot->errno);
-                return callslot->errno;
-        }
-        /* were we given a connection directly? */
-        if (callslot->conn) {
-                /* yes - use it */
-                _leave(" = 0 (nc=%d)", nconn);
-                return 0;
-        }
-        /* got a callslot, but no connection */
- obtained_slot:
-        /* need to get hold of the RxRPC connection */
-        down_write(&server->sem);
-        /* quick check to see if there's an outstanding error */
-        ret = server->fs_state;
-        if (ret)
-                goto error_release_upw;
-        if (server->fs_conn[nconn]) {
-                /* reuse an existing connection */
-                rxrpc_get_connection(server->fs_conn[nconn]);
-                callslot->conn = server->fs_conn[nconn];
-        }
-        else {
-                /* create a new connection */
-                ret = rxrpc_create_connection(afs_transport,
-                                              htons(7000),
-                                              server->addr.s_addr,
-                                              FS_SERVICE_ID,
-                                              NULL,
-                                              &server->fs_conn[nconn]);
-                if (ret < 0)
-                        goto error_release_upw;
-                callslot->conn = server->fs_conn[0];
-                rxrpc_get_connection(callslot->conn);
-        }
-        up_write(&server->sem);
-        _leave(" = 0");
-        return 0;
-        /* handle an error occurring */
- error_release_upw:
-        up_write(&server->sem);
- error_release:
-        /* either release the callslot or pass it along to another deserving
-         * task */
-        spin_lock(&server->fs_lock);
-        if (nconn < 0) {
-                /* no callslot allocated */
-        }
-        else if (list_empty(&server->fs_callq)) {
-                /* no one waiting */
-                server->fs_conn_cnt[nconn]++;
-                spin_unlock(&server->fs_lock);
-        }
-        else {
-                /* someone's waiting - dequeue them and wake them up */
-                pcallslot = list_entry(server->fs_callq.next,
-                                       struct afs_server_callslot, link);
-                list_del_init(&pcallslot->link);
-                pcallslot->errno = server->fs_state;
-                if (!pcallslot->errno) {
-                        /* pass them out callslot details */
-                        callslot->conn = xchg(&pcallslot->conn,
-                                              callslot->conn);
-                        pcallslot->nconn = nconn;
-                        callslot->nconn = nconn = -1;
-                }
-                pcallslot->ready = 1;
-                wake_up_process(pcallslot->task);
-                spin_unlock(&server->fs_lock);
-        }
-        rxrpc_put_connection(callslot->conn);
-        callslot->conn = NULL;
-        _leave(" = %d", ret);
-        return ret;
-} /* end afs_server_request_callslot() */
-/*****************************************************************************/
-/*
- * release a callslot back to the server
- * - transfers the RxRPC connection to the next pending callslot if possible
- */
-void afs_server_release_callslot(struct afs_server *server,
-                                 struct afs_server_callslot *callslot)
-{
-        struct afs_server_callslot *pcallslot;
-        _enter("{ad=%08x,cnt=%u},{%d}",
-               ntohl(server->addr.s_addr),
-               server->fs_conn_cnt[callslot->nconn],
-               callslot->nconn);
-        BUG_ON(callslot->nconn < 0);
-        spin_lock(&server->fs_lock);
-        if (list_empty(&server->fs_callq)) {
-                /* no one waiting */
-                server->fs_conn_cnt[callslot->nconn]++;
-                spin_unlock(&server->fs_lock);
-        }
-        else {
-                /* someone's waiting - dequeue them and wake them up */
-                pcallslot = list_entry(server->fs_callq.next,
-                                       struct afs_server_callslot, link);
-                list_del_init(&pcallslot->link);
-                pcallslot->errno = server->fs_state;
-                if (!pcallslot->errno) {
-                        /* pass them out callslot details */
-                        callslot->conn = xchg(&pcallslot->conn, callslot->conn);
-                        pcallslot->nconn = callslot->nconn;
-                        callslot->nconn = -1;
-                }
-                pcallslot->ready = 1;
-                wake_up_process(pcallslot->task);
-                spin_unlock(&server->fs_lock);
-        }
-        rxrpc_put_connection(callslot->conn);
-        _leave("");
-} /* end afs_server_release_callslot() */
-/*****************************************************************************/
 /*
- * get a handle to a connection to the vlserver (volume location) on the
+ * discard all the server records for rmmod
- * specified server
 */
-int afs_server_get_vlconn(struct afs_server *server,
+void __exit afs_purge_servers(void)
-                          struct rxrpc_connection **_conn)
 {
-        struct rxrpc_connection *conn;
+        afs_server_timeout = 0;
-        int ret;
+        cancel_delayed_work(&afs_server_reaper);
+        schedule_delayed_work(&afs_server_reaper, 0);
-        _enter("%p,", server);
+}
-        ret = 0;
-        conn = NULL;
-        down_read(&server->sem);
-        if (server->vlserver) {
-                /* reuse an existing connection */
-                rxrpc_get_connection(server->vlserver);
-                conn = server->vlserver;
-                up_read(&server->sem);
-        }
-        else {
-                /* create a new connection */
-                up_read(&server->sem);
-                down_write(&server->sem);
-                if (!server->vlserver) {
-                        ret = rxrpc_create_connection(afs_transport,
-                                                      htons(7003),
-                                                      server->addr.s_addr,
-                                                      VL_SERVICE_ID,
-                                                      NULL,
-                                                      &server->vlserver);
-                }
-                if (ret == 0) {
-                        rxrpc_get_connection(server->vlserver);
-                        conn = server->vlserver;
-                }
-                up_write(&server->sem);
-        }
-        *_conn = conn;
-        _leave(" = %d", ret);
-        return ret;
-} /* end afs_server_get_vlconn() */
diff --git a/fs/afs/server.h b/fs/afs/server.h
deleted file mode 100644
index c3d24115578f..000000000000
--- a/fs/afs/server.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* server.h: AFS server record
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _LINUX_AFS_SERVER_H
-#define _LINUX_AFS_SERVER_H
-#include "types.h"
-#include "kafstimod.h"
-#include <rxrpc/peer.h>
-#include <linux/rwsem.h>
-extern spinlock_t afs_server_peer_lock;
-/*****************************************************************************/
-/*
- * AFS server record
- */
-struct afs_server
-{
-        atomic_t                usage;
-        struct afs_cell         *cell;          /* cell in which server resides */
-        struct list_head        link;           /* link in cell's server list */
-        struct rw_semaphore     sem;            /* access lock */
-        struct afs_timer        timeout;        /* graveyard timeout */
-        struct in_addr          addr;           /* server address */
-        struct rxrpc_peer       *peer;          /* peer record for this server */
-        struct rxrpc_connection *vlserver;      /* connection to the volume location service */
-        /* file service access */
-#define AFS_SERVER_CONN_LIST_SIZE 2
-        struct rxrpc_connection *fs_conn[AFS_SERVER_CONN_LIST_SIZE]; /* FS connections */
-        unsigned                fs_conn_cnt[AFS_SERVER_CONN_LIST_SIZE]; /* per conn call count */
-        struct list_head        fs_callq;       /* queue of processes waiting to make a call */
-        spinlock_t              fs_lock;        /* access lock */
-        int                     fs_state;       /* 0 or reason FS currently marked dead (-errno) */
-        unsigned                fs_rtt;         /* FS round trip time */
-        unsigned long           fs_act_jif;     /* time at which last activity occurred */
-        unsigned long           fs_dead_jif;    /* time at which no longer to be considered dead */
-        /* callback promise management */
-        struct list_head        cb_promises;    /* as yet unbroken promises from this server */
-        spinlock_t              cb_lock;        /* access lock */
-};
-extern int afs_server_lookup(struct afs_cell *cell,
-                             const struct in_addr *addr,
-                             struct afs_server **_server);
-#define afs_get_server(S) do { atomic_inc(&(S)->usage); } while(0)
-extern void afs_put_server(struct afs_server *server);
-extern void afs_server_do_timeout(struct afs_server *server);
-extern int afs_server_find_by_peer(const struct rxrpc_peer *peer,
-                                   struct afs_server **_server);
-extern int afs_server_get_vlconn(struct afs_server *server,
-                                 struct rxrpc_connection **_conn);
-static inline
-struct afs_server *afs_server_get_from_peer(struct rxrpc_peer *peer)
-{
-        struct afs_server *server;
-        spin_lock(&afs_server_peer_lock);
-        server = peer->user;
-        if (server)
-                afs_get_server(server);
-        spin_unlock(&afs_server_peer_lock);
-        return server;
-}
-/*****************************************************************************/
-/*
- * AFS server callslot grant record
- */
-struct afs_server_callslot
-{
-        struct list_head        link;           /* link in server's list */
-        struct task_struct      *task;          /* process waiting to make call */
-        struct rxrpc_connection *conn;          /* connection to use (or NULL on error) */
-        short                   nconn;          /* connection slot number (-1 on error) */
-        char                    ready;          /* T when ready */
-        int                     errno;          /* error number if nconn==-1 */
-};
-extern int afs_server_request_callslot(struct afs_server *server,
-                                       struct afs_server_callslot *callslot);
-extern void afs_server_release_callslot(struct afs_server *server,
-                                        struct afs_server_callslot *callslot);
-#endif /* _LINUX_AFS_SERVER_H */
diff --git a/fs/afs/super.c b/fs/afs/super.c
index eb7e32349da3..cebd03c91f57 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -1,5 +1,6 @@
-/*
+/* AFS superblock handling
- * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
+ *
+ * Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved.
 *
 * This software may be freely redistributed under the terms of the
 * GNU General Public License.
@@ -9,7 +10,7 @@
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * Authors: David Howells <dhowells@redhat.com>
- *          David Woodhouse <dwmw2@cambridge.redhat.com>
+ *          David Woodhouse <dwmw2@redhat.com>
 *
 */
@@ -19,22 +20,10 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include "vnode.h"
-#include "volume.h"
-#include "cell.h"
-#include "cmservice.h"
-#include "fsclient.h"
-#include "super.h"
 #include "internal.h"
 #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
-struct afs_mount_params {
-        int                     rwpath;
-        struct afs_cell         *default_cell;
-        struct afs_volume       *volume;
-};
 static void afs_i_init_once(void *foo, struct kmem_cache *cachep,
                            unsigned long flags);
@@ -62,13 +51,13 @@ static const struct super_operations afs_super_ops = {
        .drop_inode     = generic_delete_inode,
        .destroy_inode  = afs_destroy_inode,
        .clear_inode    = afs_clear_inode,
+        .umount_begin   = afs_umount_begin,
        .put_super      = afs_put_super,
 };
 static struct kmem_cache *afs_inode_cachep;
 static atomic_t afs_count_active_inodes;
-/*****************************************************************************/
 /*
 * initialise the filesystem
 */
@@ -78,8 +67,6 @@ int __init afs_fs_init(void)
        _enter("");
-        afs_timer_init(&afs_mntpt_expiry_timer, &afs_mntpt_expiry_timer_ops);
        /* create ourselves an inode cache */
        atomic_set(&afs_count_active_inodes, 0);
@@ -99,20 +86,22 @@ int __init afs_fs_init(void)
        ret = register_filesystem(&afs_fs_type);
        if (ret < 0) {
                kmem_cache_destroy(afs_inode_cachep);
-                kleave(" = %d", ret);
+                _leave(" = %d", ret);
                return ret;
        }
-        kleave(" = 0");
+        _leave(" = 0");
        return 0;
-} /* end afs_fs_init() */
+}
-/*****************************************************************************/
 /*
 * clean up the filesystem
 */
 void __exit afs_fs_exit(void)
 {
+        _enter("");
+        afs_mntpt_kill_timer();
        unregister_filesystem(&afs_fs_type);
        if (atomic_read(&afs_count_active_inodes) != 0) {
@@ -122,10 +111,9 @@ void __exit afs_fs_exit(void)
        }
        kmem_cache_destroy(afs_inode_cachep);
+        _leave("");
+}
-} /* end afs_fs_exit() */
-/*****************************************************************************/
 /*
 * check that an argument has a value
 */
@@ -136,9 +124,8 @@ static int want_arg(char **_value, const char *option)
                return 0;
        }
        return 1;
-} /* end want_arg() */
+}
-/*****************************************************************************/
 /*
 * check that there's no subsequent value
 */
@@ -150,18 +137,17 @@ static int want_no_value(char *const *_value, const char *option)
                return 0;
        }
        return 1;
-} /* end want_no_value() */
+}
-/*****************************************************************************/
 /*
 * parse the mount options
 * - this function has been shamelessly adapted from the ext3 fs which
 *   shamelessly adapted it from the msdos fs
 */
-static int afs_super_parse_options(struct afs_mount_params *params,
+static int afs_parse_options(struct afs_mount_params *params,
-                                   char *options,
+                             char *options, const char **devname)
-                                   const char **devname)
 {
+        struct afs_cell *cell;
        char *key, *value;
        int ret;
@@ -170,51 +156,135 @@ static int afs_super_parse_options(struct afs_mount_params *params,
        options[PAGE_SIZE - 1] = 0;
        ret = 0;
-        while ((key = strsep(&options, ",")) != 0)
+        while ((key = strsep(&options, ","))) {
-        {
                value = strchr(key, '=');
                if (value)
                        *value++ = 0;
-                printk("kAFS: KEY: %s, VAL:%s\n", key, value ?: "-");
+                _debug("kAFS: KEY: %s, VAL:%s", key, value ?: "-");
                if (strcmp(key, "rwpath") == 0) {
                        if (!want_no_value(&value, "rwpath"))
                                return -EINVAL;
                        params->rwpath = 1;
-                        continue;
+                } else if (strcmp(key, "vol") == 0) {
-                }
-                else if (strcmp(key, "vol") == 0) {
                        if (!want_arg(&value, "vol"))
                                return -EINVAL;
                        *devname = value;
-                        continue;
+                } else if (strcmp(key, "cell") == 0) {
-                }
-                else if (strcmp(key, "cell") == 0) {
                        if (!want_arg(&value, "cell"))
                                return -EINVAL;
-                        afs_put_cell(params->default_cell);
+                        cell = afs_cell_lookup(value, strlen(value));
-                        ret = afs_cell_lookup(value,
+                        if (IS_ERR(cell))
-                                              strlen(value),
+                                return PTR_ERR(cell);
-                                              &params->default_cell);
+                        afs_put_cell(params->cell);
-                        if (ret < 0)
+                        params->cell = cell;
-                                return -EINVAL;
+                } else {
-                        continue;
+                        printk("kAFS: Unknown mount option: '%s'\n",  key);
+                        ret = -EINVAL;
+                        goto error;
                }
-                printk("kAFS: Unknown mount option: '%s'\n",  key);
-                ret = -EINVAL;
-                goto error;
        }
        ret = 0;
+error:
- error:
        _leave(" = %d", ret);
        return ret;
-} /* end afs_super_parse_options() */
+}
+/*
+ * parse a device name to get cell name, volume name, volume type and R/W
+ * selector
+ * - this can be one of the following:
+ *      "%[cell:]volume[.]"             R/W volume
+ *      "#[cell:]volume[.]"             R/O or R/W volume (rwpath=0),
+ *                                       or R/W (rwpath=1) volume
+ *      "%[cell:]volume.readonly"       R/O volume
+ *      "#[cell:]volume.readonly"       R/O volume
+ *      "%[cell:]volume.backup"         Backup volume
+ *      "#[cell:]volume.backup"         Backup volume
+ */
+static int afs_parse_device_name(struct afs_mount_params *params,
+                                 const char *name)
+{
+        struct afs_cell *cell;
+        const char *cellname, *suffix;
+        int cellnamesz;
+        _enter(",%s", name);
+        if (!name) {
+                printk(KERN_ERR "kAFS: no volume name specified\n");
+                return -EINVAL;
+        }
+        if ((name[0] != '%' && name[0] != '#') || !name[1]) {
+                printk(KERN_ERR "kAFS: unparsable volume name\n");
+                return -EINVAL;
+        }
+        /* determine the type of volume we're looking for */
+        params->type = AFSVL_ROVOL;
+        params->force = false;
+        if (params->rwpath || name[0] == '%') {
+                params->type = AFSVL_RWVOL;
+                params->force = true;
+        }
+        name++;
+        /* split the cell name out if there is one */
+        params->volname = strchr(name, ':');
+        if (params->volname) {
+                cellname = name;
+                cellnamesz = params->volname - name;
+                params->volname++;
+        } else {
+                params->volname = name;
+                cellname = NULL;
+                cellnamesz = 0;
+        }
+        /* the volume type is further affected by a possible suffix */
+        suffix = strrchr(params->volname, '.');
+        if (suffix) {
+                if (strcmp(suffix, ".readonly") == 0) {
+                        params->type = AFSVL_ROVOL;
+                        params->force = true;
+                } else if (strcmp(suffix, ".backup") == 0) {
+                        params->type = AFSVL_BACKVOL;
+                        params->force = true;
+                } else if (suffix[1] == 0) {
+                } else {
+                        suffix = NULL;
+                }
+        }
+        params->volnamesz = suffix ?
+                suffix - params->volname : strlen(params->volname);
+        _debug("cell %*.*s [%p]",
+               cellnamesz, cellnamesz, cellname ?: "", params->cell);
+        /* lookup the cell record */
+        if (cellname || !params->cell) {
+                cell = afs_cell_lookup(cellname, cellnamesz);
+                if (IS_ERR(cell)) {
+                        printk(KERN_ERR "kAFS: unable to lookup cell '%s'\n",
+                               cellname ?: "");
+                        return PTR_ERR(cell);
+                }
+                afs_put_cell(params->cell);
+                params->cell = cell;
+        }
+        _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
+               params->cell->name, params->cell,
+               params->volnamesz, params->volnamesz, params->volname,
+               suffix ?: "-", params->type, params->force ? " FORCE" : "");
+        return 0;
+}
-/*****************************************************************************/
 /*
 * check a superblock to see if it's the one we're looking for
 */
@@ -224,13 +294,12 @@ static int afs_test_super(struct super_block *sb, void *data)
        struct afs_super_info *as = sb->s_fs_info;
        return as->volume == params->volume;
-} /* end afs_test_super() */
+}
-/*****************************************************************************/
 /*
 * fill in the superblock
 */
-static int afs_fill_super(struct super_block *sb, void *data, int silent)
+static int afs_fill_super(struct super_block *sb, void *data)
 {
        struct afs_mount_params *params = data;
        struct afs_super_info *as = NULL;
@@ -239,7 +308,7 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
        struct inode *inode = NULL;
        int ret;
-        kenter("");
+        _enter("");
        /* allocate a superblock info record */
        as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
@@ -262,9 +331,9 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
        fid.vid         = as->volume->vid;
        fid.vnode       = 1;
        fid.unique      = 1;
-        ret = afs_iget(sb, &fid, &inode);
+        inode = afs_iget(sb, params->key, &fid, NULL, NULL);
-        if (ret < 0)
+        if (IS_ERR(inode))
-                goto error;
+                goto error_inode;
        ret = -ENOMEM;
        root = d_alloc_root(inode);
@@ -273,21 +342,23 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_root = root;
-        kleave(" = 0");
+        _leave(" = 0");
        return 0;
- error:
+error_inode:
+        ret = PTR_ERR(inode);
+        inode = NULL;
+error:
        iput(inode);
        afs_put_volume(as->volume);
        kfree(as);
        sb->s_fs_info = NULL;
-        kleave(" = %d", ret);
+        _leave(" = %d", ret);
        return ret;
-} /* end afs_fill_super() */
+}
-/*****************************************************************************/
 /*
 * get an AFS superblock
 * - TODO: don't use get_sb_nodev(), but rather call sget() directly
@@ -300,69 +371,80 @@ static int afs_get_sb(struct file_system_type *fs_type,
 {
        struct afs_mount_params params;
        struct super_block *sb;
+        struct afs_volume *vol;
+        struct key *key;
        int ret;
        _enter(",,%s,%p", dev_name, options);
        memset(&params, 0, sizeof(params));
-        /* start the cache manager */
+        /* parse the options and device name */
-        ret = afscm_start();
-        if (ret < 0) {
-                _leave(" = %d", ret);
-                return ret;
-        }
-        /* parse the options */
        if (options) {
-                ret = afs_super_parse_options(&params, options, &dev_name);
+                ret = afs_parse_options(&params, options, &dev_name);
                if (ret < 0)
                        goto error;
-                if (!dev_name) {
-                        printk("kAFS: no volume name specified\n");
-                        ret = -EINVAL;
-                        goto error;
-                }
        }
-        /* parse the device name */
-        ret = afs_volume_lookup(dev_name,
+        ret = afs_parse_device_name(&params, dev_name);
-                                params.default_cell,
-                                params.rwpath,
-                                &params.volume);
        if (ret < 0)
                goto error;
-        /* allocate a deviceless superblock */
+        /* try and do the mount securely */
-        sb = sget(fs_type, afs_test_super, set_anon_super, &params);
+        key = afs_request_key(params.cell);
-        if (IS_ERR(sb))
+        if (IS_ERR(key)) {
+                _leave(" = %ld [key]", PTR_ERR(key));
+                ret = PTR_ERR(key);
                goto error;
+        }
+        params.key = key;
-        sb->s_flags = flags;
+        /* parse the device name */
+        vol = afs_volume_lookup(&params);
+        if (IS_ERR(vol)) {
+                ret = PTR_ERR(vol);
+                goto error;
+        }
+        params.volume = vol;
-        ret = afs_fill_super(sb, &params, flags & MS_SILENT ? 1 : 0);
+        /* allocate a deviceless superblock */
-        if (ret < 0) {
+        sb = sget(fs_type, afs_test_super, set_anon_super, &params);
-                up_write(&sb->s_umount);
+        if (IS_ERR(sb)) {
-                deactivate_super(sb);
+                ret = PTR_ERR(sb);
                goto error;
        }
-        sb->s_flags |= MS_ACTIVE;
-        simple_set_mnt(mnt, sb);
+        if (!sb->s_root) {
+                /* initial superblock/root creation */
+                _debug("create");
+                sb->s_flags = flags;
+                ret = afs_fill_super(sb, &params);
+                if (ret < 0) {
+                        up_write(&sb->s_umount);
+                        deactivate_super(sb);
+                        goto error;
+                }
+                sb->s_flags |= MS_ACTIVE;
+        } else {
+                _debug("reuse");
+                ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
+        }
+        simple_set_mnt(mnt, sb);
        afs_put_volume(params.volume);
-        afs_put_cell(params.default_cell);
+        afs_put_cell(params.cell);
-        _leave(" = 0 [%p]", 0, sb);
+        _leave(" = 0 [%p]", sb);
        return 0;
- error:
+error:
        afs_put_volume(params.volume);
-        afs_put_cell(params.default_cell);
+        afs_put_cell(params.cell);
-        afscm_stop();
+        key_put(params.key);
        _leave(" = %d", ret);
        return ret;
-} /* end afs_get_sb() */
+}
-/*****************************************************************************/
 /*
 * finish the unmounting process on the superblock
 */
@@ -373,35 +455,30 @@ static void afs_put_super(struct super_block *sb)
        _enter("");
        afs_put_volume(as->volume);
-        afscm_stop();
        _leave("");
-} /* end afs_put_super() */
+}
-/*****************************************************************************/
 /*
 * initialise an inode cache slab element prior to any use
 */
 static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
                            unsigned long flags)
 {
-        struct afs_vnode *vnode = (struct afs_vnode *) _vnode;
+        struct afs_vnode *vnode = _vnode;
        if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
            SLAB_CTOR_CONSTRUCTOR) {
                memset(vnode, 0, sizeof(*vnode));
                inode_init_once(&vnode->vfs_inode);
                init_waitqueue_head(&vnode->update_waitq);
+                mutex_init(&vnode->permits_lock);
+                mutex_init(&vnode->validate_lock);
                spin_lock_init(&vnode->lock);
-                INIT_LIST_HEAD(&vnode->cb_link);
+                INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
-                INIT_LIST_HEAD(&vnode->cb_hash_link);
-                afs_timer_init(&vnode->cb_timeout,
-                               &afs_vnode_cb_timed_out_ops);
        }
+}
-} /* end afs_i_init_once() */
-/*****************************************************************************/
 /*
 * allocate an AFS inode struct from our slab cache
 */
@@ -409,8 +486,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 {
        struct afs_vnode *vnode;
-        vnode = (struct afs_vnode *)
+        vnode = kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL);
-                kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL);
        if (!vnode)
                return NULL;
@@ -421,21 +497,25 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
        vnode->volume           = NULL;
        vnode->update_cnt       = 0;
-        vnode->flags            = 0;
+        vnode->flags            = 1 << AFS_VNODE_UNSET;
+        vnode->cb_promised      = false;
        return &vnode->vfs_inode;
-} /* end afs_alloc_inode() */
+}
-/*****************************************************************************/
 /*
 * destroy an AFS inode struct
 */
 static void afs_destroy_inode(struct inode *inode)
 {
+        struct afs_vnode *vnode = AFS_FS_I(inode);
        _enter("{%lu}", inode->i_ino);
-        kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode));
+        _debug("DESTROY INODE %p", inode);
-        atomic_dec(&afs_count_active_inodes);
+        ASSERTCMP(vnode->server, ==, NULL);
-} /* end afs_destroy_inode() */
+        kmem_cache_free(afs_inode_cachep, vnode);
+        atomic_dec(&afs_count_active_inodes);
+}
diff --git a/fs/afs/super.h b/fs/afs/super.h
deleted file mode 100644
index 32de8cc6fae8..000000000000
--- a/fs/afs/super.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* super.h: AFS filesystem internal private data
- *
- * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
- *
- * This software may be freely redistributed under the terms of the
- * GNU General Public License.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Authors: David Woodhouse <dwmw2@cambridge.redhat.com>
- *          David Howells <dhowells@redhat.com>
- *
- */
-#ifndef _LINUX_AFS_SUPER_H
-#define _LINUX_AFS_SUPER_H
-#include <linux/fs.h>
-#include "server.h"
-#ifdef __KERNEL__
-/*****************************************************************************/
-/*
- * AFS superblock private data
- * - there's one superblock per volume
- */
-struct afs_super_info
-{
-        struct afs_volume       *volume;        /* volume record */
-        char                    rwparent;       /* T if parent is R/W AFS volume */
-};
-static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
-{
-        return sb->s_fs_info;
-}
-extern struct file_system_type afs_fs_type;
-#endif /* __KERNEL__ */
-#endif /* _LINUX_AFS_SUPER_H */
diff --git a/fs/afs/transport.h b/fs/afs/transport.h
deleted file mode 100644
index 7013ae6ccc8c..000000000000
--- a/fs/afs/transport.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* transport.h: AFS transport management
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _LINUX_AFS_TRANSPORT_H
-#define _LINUX_AFS_TRANSPORT_H
-#include "types.h"
-#include <rxrpc/transport.h>
-/* the cache manager transport endpoint */
-extern struct rxrpc_transport *afs_transport;
-#endif /* _LINUX_AFS_TRANSPORT_H */
diff --git a/fs/afs/types.h b/fs/afs/types.h
deleted file mode 100644
index b1a2367c7587..000000000000
--- a/fs/afs/types.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/* types.h: AFS types
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _LINUX_AFS_TYPES_H
-#define _LINUX_AFS_TYPES_H
-#ifdef __KERNEL__
-#include <rxrpc/types.h>
-#endif /* __KERNEL__ */
-typedef unsigned                        afs_volid_t;
-typedef unsigned                        afs_vnodeid_t;
-typedef unsigned long long              afs_dataversion_t;
-typedef enum {
-        AFSVL_RWVOL,                    /* read/write volume */
-        AFSVL_ROVOL,                    /* read-only volume */
-        AFSVL_BACKVOL,                  /* backup volume */
-} __attribute__((packed)) afs_voltype_t;
-typedef enum {
-        AFS_FTYPE_INVALID       = 0,
-        AFS_FTYPE_FILE          = 1,
-        AFS_FTYPE_DIR           = 2,
-        AFS_FTYPE_SYMLINK       = 3,
-} afs_file_type_t;
-#ifdef __KERNEL__
-struct afs_cell;
-struct afs_vnode;
-/*****************************************************************************/
-/*
- * AFS file identifier
- */
-struct afs_fid
-{
-        afs_volid_t     vid;            /* volume ID */
-        afs_vnodeid_t   vnode;          /* file index within volume */
-        unsigned        unique;         /* unique ID number (file index version) */
-};
-/*****************************************************************************/
-/*
- * AFS callback notification
- */
-typedef enum {
-        AFSCM_CB_UNTYPED        = 0,    /* no type set on CB break */
-        AFSCM_CB_EXCLUSIVE      = 1,    /* CB exclusive to CM [not implemented] */
-        AFSCM_CB_SHARED         = 2,    /* CB shared by other CM's */
-        AFSCM_CB_DROPPED        = 3,    /* CB promise cancelled by file server */
-} afs_callback_type_t;
-struct afs_callback
-{
-        struct afs_server       *server;        /* server that made the promise */
-        struct afs_fid          fid;            /* file identifier */
-        unsigned                version;        /* callback version */
-        unsigned                expiry;         /* time at which expires */
-        afs_callback_type_t     type;           /* type of callback */
-};
-#define AFSCBMAX 50
-/*****************************************************************************/
-/*
- * AFS volume information
- */
-struct afs_volume_info
-{
-        afs_volid_t             vid;            /* volume ID */
-        afs_voltype_t           type;           /* type of this volume */
-        afs_volid_t             type_vids[5];   /* volume ID's for possible types for this vol */
-        
-        /* list of fileservers serving this volume */
-        size_t                  nservers;       /* number of entries used in servers[] */
-        struct {
-                struct in_addr  addr;           /* fileserver address */
-        } servers[8];
-};
-/*****************************************************************************/
-/*
- * AFS file status information
- */
-struct afs_file_status
-{
-        unsigned                if_version;     /* interface version */
-#define AFS_FSTATUS_VERSION     1
-        afs_file_type_t         type;           /* file type */
-        unsigned                nlink;          /* link count */
-        size_t                  size;           /* file size */
-        afs_dataversion_t       version;        /* current data version */
-        unsigned                author;         /* author ID */
-        unsigned                owner;          /* owner ID */
-        unsigned                caller_access;  /* access rights for authenticated caller */
-        unsigned                anon_access;    /* access rights for unauthenticated caller */
-        umode_t                 mode;           /* UNIX mode */
-        struct afs_fid          parent;         /* parent file ID */
-        time_t                  mtime_client;   /* last time client changed data */
-        time_t                  mtime_server;   /* last time server changed data */
-};
-/*****************************************************************************/
-/*
- * AFS volume synchronisation information
- */
-struct afs_volsync
-{
-        time_t                  creation;       /* volume creation time */
-};
-#endif /* __KERNEL__ */
-#endif /* _LINUX_AFS_TYPES_H */
diff --git a/fs/afs/use-rtnetlink.c b/fs/afs/use-rtnetlink.c
new file mode 100644
index 000000000000..82f0daa28970
--- /dev/null
+++ b/fs/afs/use-rtnetlink.c
@@ -0,0 +1,473 @@
+/* RTNETLINK client
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_addr.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <net/netlink.h>
+#include "internal.h"
+struct afs_rtm_desc {
+        struct socket           *nlsock;
+        struct afs_interface    *bufs;
+        u8                      *mac;
+        size_t                  nbufs;
+        size_t                  maxbufs;
+        void                    *data;
+        ssize_t                 datalen;
+        size_t                  datamax;
+        int                     msg_seq;
+        unsigned                mac_index;
+        bool                    wantloopback;
+        int (*parse)(struct afs_rtm_desc *, struct nlmsghdr *);
+};
+/*
+ * parse an RTM_GETADDR response
+ */
+static int afs_rtm_getaddr_parse(struct afs_rtm_desc *desc,
+                                 struct nlmsghdr *nlhdr)
+{
+        struct afs_interface *this;
+        struct ifaddrmsg *ifa;
+        struct rtattr *rtattr;
+        const char *name;
+        size_t len;
+        ifa = (struct ifaddrmsg *) NLMSG_DATA(nlhdr);
+        _enter("{ix=%d,af=%d}", ifa->ifa_index, ifa->ifa_family);
+        if (ifa->ifa_family != AF_INET) {
+                _leave(" = 0 [family %d]", ifa->ifa_family);
+                return 0;
+        }
+        if (desc->nbufs >= desc->maxbufs) {
+                _leave(" = 0 [max %zu/%zu]", desc->nbufs, desc->maxbufs);
+                return 0;
+        }
+        this = &desc->bufs[desc->nbufs];
+        this->index = ifa->ifa_index;
+        this->netmask.s_addr = inet_make_mask(ifa->ifa_prefixlen);
+        this->mtu = 0;
+        rtattr = NLMSG_DATA(nlhdr) + NLMSG_ALIGN(sizeof(struct ifaddrmsg));
+        len = NLMSG_PAYLOAD(nlhdr, sizeof(struct ifaddrmsg));
+        name = "unknown";
+        for (; RTA_OK(rtattr, len); rtattr = RTA_NEXT(rtattr, len)) {
+                switch (rtattr->rta_type) {
+                case IFA_ADDRESS:
+                        memcpy(&this->address, RTA_DATA(rtattr), 4);
+                        break;
+                case IFA_LABEL:
+                        name = RTA_DATA(rtattr);
+                        break;
+                }
+        }
+        _debug("%s: "NIPQUAD_FMT"/"NIPQUAD_FMT,
+               name, NIPQUAD(this->address), NIPQUAD(this->netmask));
+        desc->nbufs++;
+        _leave(" = 0");
+        return 0;
+}
+/*
+ * parse an RTM_GETLINK response for MTUs
+ */
+static int afs_rtm_getlink_if_parse(struct afs_rtm_desc *desc,
+                                    struct nlmsghdr *nlhdr)
+{
+        struct afs_interface *this;
+        struct ifinfomsg *ifi;
+        struct rtattr *rtattr;
+        const char *name;
+        size_t len, loop;
+        ifi = (struct ifinfomsg *) NLMSG_DATA(nlhdr);
+        _enter("{ix=%d}", ifi->ifi_index);
+        for (loop = 0; loop < desc->nbufs; loop++) {
+                this = &desc->bufs[loop];
+                if (this->index == ifi->ifi_index)
+                        goto found;
+        }
+        _leave(" = 0 [no match]");
+        return 0;
+found:
+        if (ifi->ifi_type == ARPHRD_LOOPBACK && !desc->wantloopback) {
+                _leave(" = 0 [loopback]");
+                return 0;
+        }
+        rtattr = NLMSG_DATA(nlhdr) + NLMSG_ALIGN(sizeof(struct ifinfomsg));
+        len = NLMSG_PAYLOAD(nlhdr, sizeof(struct ifinfomsg));
+        name = "unknown";
+        for (; RTA_OK(rtattr, len); rtattr = RTA_NEXT(rtattr, len)) {
+                switch (rtattr->rta_type) {
+                case IFLA_MTU:
+                        memcpy(&this->mtu, RTA_DATA(rtattr), 4);
+                        break;
+                case IFLA_IFNAME:
+                        name = RTA_DATA(rtattr);
+                        break;
+                }
+        }
+        _debug("%s: "NIPQUAD_FMT"/"NIPQUAD_FMT" mtu %u",
+               name, NIPQUAD(this->address), NIPQUAD(this->netmask),
+               this->mtu);
+        _leave(" = 0");
+        return 0;
+}
+/*
+ * parse an RTM_GETLINK response for the MAC address belonging to the lowest
+ * non-internal interface
+ */
+static int afs_rtm_getlink_mac_parse(struct afs_rtm_desc *desc,
+                                     struct nlmsghdr *nlhdr)
+{
+        struct ifinfomsg *ifi;
+        struct rtattr *rtattr;
+        const char *name;
+        size_t remain, len;
+        bool set;
+        ifi = (struct ifinfomsg *) NLMSG_DATA(nlhdr);
+        _enter("{ix=%d}", ifi->ifi_index);
+        if (ifi->ifi_index >= desc->mac_index) {
+                _leave(" = 0 [high]");
+                return 0;
+        }
+        if (ifi->ifi_type == ARPHRD_LOOPBACK) {
+                _leave(" = 0 [loopback]");
+                return 0;
+        }
+        rtattr = NLMSG_DATA(nlhdr) + NLMSG_ALIGN(sizeof(struct ifinfomsg));
+        remain = NLMSG_PAYLOAD(nlhdr, sizeof(struct ifinfomsg));
+        name = "unknown";
+        set = false;
+        for (; RTA_OK(rtattr, remain); rtattr = RTA_NEXT(rtattr, remain)) {
+                switch (rtattr->rta_type) {
+                case IFLA_ADDRESS:
+                        len = RTA_PAYLOAD(rtattr);
+                        memcpy(desc->mac, RTA_DATA(rtattr),
+                               min_t(size_t, len, 6));
+                        desc->mac_index = ifi->ifi_index;
+                        set = true;
+                        break;
+                case IFLA_IFNAME:
+                        name = RTA_DATA(rtattr);
+                        break;
+                }
+        }
+        if (set)
+                _debug("%s: %02x:%02x:%02x:%02x:%02x:%02x",
+                       name,
+                       desc->mac[0], desc->mac[1], desc->mac[2],
+                       desc->mac[3], desc->mac[4], desc->mac[5]);
+        _leave(" = 0");
+        return 0;
+}
+/*
+ * read the rtnetlink response and pass to parsing routine
+ */
+static int afs_read_rtm(struct afs_rtm_desc *desc)
+{
+        struct nlmsghdr *nlhdr, tmphdr;
+        struct msghdr msg;
+        struct kvec iov[1];
+        void *data;
+        bool last = false;
+        int len, ret, remain;
+        _enter("");
+        do {
+                /* first of all peek to see how big the packet is */
+                memset(&msg, 0, sizeof(msg));
+                iov[0].iov_base = &tmphdr;
+                iov[0].iov_len = sizeof(tmphdr);
+                len = kernel_recvmsg(desc->nlsock, &msg, iov, 1,
+                                     sizeof(tmphdr), MSG_PEEK | MSG_TRUNC);
+                if (len < 0) {
+                        _leave(" = %d [peek]", len);
+                        return len;
+                }
+                if (len == 0)
+                        continue;
+                if (len < sizeof(tmphdr) || len < NLMSG_PAYLOAD(&tmphdr, 0)) {
+                        _leave(" = -EMSGSIZE");
+                        return -EMSGSIZE;
+                }
+                if (desc->datamax < len) {
+                        kfree(desc->data);
+                        desc->data = NULL;
+                        data = kmalloc(len, GFP_KERNEL);
+                        if (!data)
+                                return -ENOMEM;
+                        desc->data = data;
+                }
+                desc->datamax = len;
+                /* read all the data from this packet */
+                iov[0].iov_base = desc->data;
+                iov[0].iov_len = desc->datamax;
+                desc->datalen = kernel_recvmsg(desc->nlsock, &msg, iov, 1,
+                                               desc->datamax, 0);
+                if (desc->datalen < 0) {
+                        _leave(" = %ld [recv]", desc->datalen);
+                        return desc->datalen;
+                }
+                nlhdr = desc->data;
+                /* check if the header is valid */
+                if (!NLMSG_OK(nlhdr, desc->datalen) ||
+                    nlhdr->nlmsg_type == NLMSG_ERROR) {
+                        _leave(" = -EIO");
+                        return -EIO;
+                }
+                /* see if this is the last message */
+                if (nlhdr->nlmsg_type == NLMSG_DONE ||
+                    !(nlhdr->nlmsg_flags & NLM_F_MULTI))
+                        last = true;
+                /* parse the bits we got this time */
+                nlmsg_for_each_msg(nlhdr, desc->data, desc->datalen, remain) {
+                        ret = desc->parse(desc, nlhdr);
+                        if (ret < 0) {
+                                _leave(" = %d [parse]", ret);
+                                return ret;
+                        }
+                }
+        } while (!last);
+        _leave(" = 0");
+        return 0;
+}
+/*
+ * list the interface bound addresses to get the address and netmask
+ */
+static int afs_rtm_getaddr(struct afs_rtm_desc *desc)
+{
+        struct msghdr msg;
+        struct kvec iov[1];
+        int ret;
+        struct {
+                struct nlmsghdr nl_msg __attribute__((aligned(NLMSG_ALIGNTO)));
+                struct ifaddrmsg addr_msg __attribute__((aligned(NLMSG_ALIGNTO)));
+        } request;
+        _enter("");
+        memset(&request, 0, sizeof(request));
+        request.nl_msg.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
+        request.nl_msg.nlmsg_type = RTM_GETADDR;
+        request.nl_msg.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+        request.nl_msg.nlmsg_seq = desc->msg_seq++;
+        request.nl_msg.nlmsg_pid = 0;
+        memset(&msg, 0, sizeof(msg));
+        iov[0].iov_base = &request;
+        iov[0].iov_len = sizeof(request);
+        ret = kernel_sendmsg(desc->nlsock, &msg, iov, 1, iov[0].iov_len);
+        _leave(" = %d", ret);
+        return ret;
+}
+/*
+ * list the interface link statuses to get the MTUs
+ */
+static int afs_rtm_getlink(struct afs_rtm_desc *desc)
+{
+        struct msghdr msg;
+        struct kvec iov[1];
+        int ret;
+        struct {
+                struct nlmsghdr nl_msg __attribute__((aligned(NLMSG_ALIGNTO)));
+                struct ifinfomsg link_msg __attribute__((aligned(NLMSG_ALIGNTO)));
+        } request;
+        _enter("");
+        memset(&request, 0, sizeof(request));
+        request.nl_msg.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+        request.nl_msg.nlmsg_type = RTM_GETLINK;
+        request.nl_msg.nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
+        request.nl_msg.nlmsg_seq = desc->msg_seq++;
+        request.nl_msg.nlmsg_pid = 0;
+        memset(&msg, 0, sizeof(msg));
+        iov[0].iov_base = &request;
+        iov[0].iov_len = sizeof(request);
+        ret = kernel_sendmsg(desc->nlsock, &msg, iov, 1, iov[0].iov_len);
+        _leave(" = %d", ret);
+        return ret;
+}
+/*
+ * cull any interface records for which there isn't an MTU value
+ */
+static void afs_cull_interfaces(struct afs_rtm_desc *desc)
+{
+        struct afs_interface *bufs = desc->bufs;
+        size_t nbufs = desc->nbufs;
+        int loop, point = 0;
+        _enter("{%zu}", nbufs);
+        for (loop = 0; loop < nbufs; loop++) {
+                if (desc->bufs[loop].mtu != 0) {
+                        if (loop != point) {
+                                ASSERTCMP(loop, >, point);
+                                bufs[point] = bufs[loop];
+                        }
+                        point++;
+                }
+        }
+        desc->nbufs = point;
+        _leave(" [%zu/%zu]", desc->nbufs, nbufs);
+}
+/*
+ * get a list of this system's interface IPv4 addresses, netmasks and MTUs
+ * - returns the number of interface records in the buffer
+ */
+int afs_get_ipv4_interfaces(struct afs_interface *bufs, size_t maxbufs,
+                            bool wantloopback)
+{
+        struct afs_rtm_desc desc;
+        int ret, loop;
+        _enter("");
+        memset(&desc, 0, sizeof(desc));
+        desc.bufs = bufs;
+        desc.maxbufs = maxbufs;
+        desc.wantloopback = wantloopback;
+        ret = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE,
+                               &desc.nlsock);
+        if (ret < 0) {
+                _leave(" = %d [sock]", ret);
+                return ret;
+        }
+        /* issue RTM_GETADDR */
+        desc.parse = afs_rtm_getaddr_parse;
+        ret = afs_rtm_getaddr(&desc);
+        if (ret < 0)
+                goto error;
+        ret = afs_read_rtm(&desc);
+        if (ret < 0)
+                goto error;
+        /* issue RTM_GETLINK */
+        desc.parse = afs_rtm_getlink_if_parse;
+        ret = afs_rtm_getlink(&desc);
+        if (ret < 0)
+                goto error;
+        ret = afs_read_rtm(&desc);
+        if (ret < 0)
+                goto error;
+        afs_cull_interfaces(&desc);
+        ret = desc.nbufs;
+        for (loop = 0; loop < ret; loop++)
+                _debug("[%d] "NIPQUAD_FMT"/"NIPQUAD_FMT" mtu %u",
+                       bufs[loop].index,
+                       NIPQUAD(bufs[loop].address),
+                       NIPQUAD(bufs[loop].netmask),
+                       bufs[loop].mtu);
+error:
+        kfree(desc.data);
+        sock_release(desc.nlsock);
+        _leave(" = %d", ret);
+        return ret;
+}
+/*
+ * get a MAC address from a random ethernet interface that has a real one
+ * - the buffer should be 6 bytes in size
+ */
+int afs_get_MAC_address(u8 mac[6])
+{
+        struct afs_rtm_desc desc;
+        int ret;
+        _enter("");
+        memset(&desc, 0, sizeof(desc));
+        desc.mac = mac;
+        desc.mac_index = UINT_MAX;
+        ret = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE,
+                               &desc.nlsock);
+        if (ret < 0) {
+                _leave(" = %d [sock]", ret);
+                return ret;
+        }
+        /* issue RTM_GETLINK */
+        desc.parse = afs_rtm_getlink_mac_parse;
+        ret = afs_rtm_getlink(&desc);
+        if (ret < 0)
+                goto error;
+        ret = afs_read_rtm(&desc);
+        if (ret < 0)
+                goto error;
+        if (desc.mac_index < UINT_MAX) {
+                /* got a MAC address */
+                _debug("[%d] %02x:%02x:%02x:%02x:%02x:%02x",
+                       desc.mac_index,
+                       mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+        } else {
+                ret = -ENONET;
+        }
+error:
+        sock_release(desc.nlsock);
+        _leave(" = %d", ret);
+        return ret;
+}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 7b0e3192ee39..36c1306e09e0 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -1,4 +1,4 @@
-/* vlclient.c: AFS Volume Location Service client
+/* AFS Volume Location Service client
 *
 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
@@ -11,247 +11,76 @@
 #include <linux/init.h>
 #include <linux/sched.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include "server.h"
-#include "volume.h"
-#include "vlclient.h"
-#include "kafsasyncd.h"
-#include "kafstimod.h"
-#include "errors.h"
 #include "internal.h"
-#define VLGETENTRYBYID          503     /* AFS Get Cache Entry By ID operation ID */
-#define VLGETENTRYBYNAME        504     /* AFS Get Cache Entry By Name operation ID */
-#define VLPROBE                 514     /* AFS Probe Volume Location Service operation ID */
-static void afs_rxvl_get_entry_by_id_attn(struct rxrpc_call *call);
-static void afs_rxvl_get_entry_by_id_error(struct rxrpc_call *call);
-/*****************************************************************************/
 /*
- * map afs VL abort codes to/from Linux error codes
+ * map volume locator abort codes to error codes
- * - called with call->lock held
 */
-static void afs_rxvl_aemap(struct rxrpc_call *call)
+static int afs_vl_abort_to_error(u32 abort_code)
 {
-        int err;
+        _enter("%u", abort_code);
-        _enter("{%u,%u,%d}",
+        switch (abort_code) {
-               call->app_err_state, call->app_abort_code, call->app_errno);
+        case AFSVL_IDEXIST:             return -EEXIST;
+        case AFSVL_IO:                  return -EREMOTEIO;
-        switch (call->app_err_state) {
+        case AFSVL_NAMEEXIST:           return -EEXIST;
-        case RXRPC_ESTATE_LOCAL_ABORT:
+        case AFSVL_CREATEFAIL:          return -EREMOTEIO;
-                call->app_abort_code = -call->app_errno;
+        case AFSVL_NOENT:               return -ENOMEDIUM;
-                return;
+        case AFSVL_EMPTY:               return -ENOMEDIUM;
+        case AFSVL_ENTDELETED:          return -ENOMEDIUM;
-        case RXRPC_ESTATE_PEER_ABORT:
+        case AFSVL_BADNAME:             return -EINVAL;
-                switch (call->app_abort_code) {
+        case AFSVL_BADINDEX:            return -EINVAL;
-                case AFSVL_IDEXIST:             err = -EEXIST;          break;
+        case AFSVL_BADVOLTYPE:          return -EINVAL;
-                case AFSVL_IO:                  err = -EREMOTEIO;       break;
+        case AFSVL_BADSERVER:           return -EINVAL;
-                case AFSVL_NAMEEXIST:           err = -EEXIST;          break;
+        case AFSVL_BADPARTITION:        return -EINVAL;
-                case AFSVL_CREATEFAIL:          err = -EREMOTEIO;       break;
+        case AFSVL_REPSFULL:            return -EFBIG;
-                case AFSVL_NOENT:               err = -ENOMEDIUM;       break;
+        case AFSVL_NOREPSERVER:         return -ENOENT;
-                case AFSVL_EMPTY:               err = -ENOMEDIUM;       break;
+        case AFSVL_DUPREPSERVER:        return -EEXIST;
-                case AFSVL_ENTDELETED:          err = -ENOMEDIUM;       break;
+        case AFSVL_RWNOTFOUND:          return -ENOENT;
-                case AFSVL_BADNAME:             err = -EINVAL;          break;
+        case AFSVL_BADREFCOUNT:         return -EINVAL;
-                case AFSVL_BADINDEX:            err = -EINVAL;          break;
+        case AFSVL_SIZEEXCEEDED:        return -EINVAL;
-                case AFSVL_BADVOLTYPE:          err = -EINVAL;          break;
+        case AFSVL_BADENTRY:            return -EINVAL;
-                case AFSVL_BADSERVER:           err = -EINVAL;          break;
+        case AFSVL_BADVOLIDBUMP:        return -EINVAL;
-                case AFSVL_BADPARTITION:        err = -EINVAL;          break;
+        case AFSVL_IDALREADYHASHED:     return -EINVAL;
-                case AFSVL_REPSFULL:            err = -EFBIG;           break;
+        case AFSVL_ENTRYLOCKED:         return -EBUSY;
-                case AFSVL_NOREPSERVER:         err = -ENOENT;          break;
+        case AFSVL_BADVOLOPER:          return -EBADRQC;
-                case AFSVL_DUPREPSERVER:        err = -EEXIST;          break;
+        case AFSVL_BADRELLOCKTYPE:      return -EINVAL;
-                case AFSVL_RWNOTFOUND:          err = -ENOENT;          break;
+        case AFSVL_RERELEASE:           return -EREMOTEIO;
-                case AFSVL_BADREFCOUNT:         err = -EINVAL;          break;
+        case AFSVL_BADSERVERFLAG:       return -EINVAL;
-                case AFSVL_SIZEEXCEEDED:        err = -EINVAL;          break;
+        case AFSVL_PERM:                return -EACCES;
-                case AFSVL_BADENTRY:            err = -EINVAL;          break;
+        case AFSVL_NOMEM:               return -EREMOTEIO;
-                case AFSVL_BADVOLIDBUMP:        err = -EINVAL;          break;
-                case AFSVL_IDALREADYHASHED:     err = -EINVAL;          break;
-                case AFSVL_ENTRYLOCKED:         err = -EBUSY;           break;
-                case AFSVL_BADVOLOPER:          err = -EBADRQC;         break;
-                case AFSVL_BADRELLOCKTYPE:      err = -EINVAL;          break;
-                case AFSVL_RERELEASE:           err = -EREMOTEIO;       break;
-                case AFSVL_BADSERVERFLAG:       err = -EINVAL;          break;
-                case AFSVL_PERM:                err = -EACCES;          break;
-                case AFSVL_NOMEM:               err = -EREMOTEIO;       break;
-                default:
-                        err = afs_abort_to_error(call->app_abort_code);
-                        break;
-                }
-                call->app_errno = err;
-                return;
        default:
-                return;
+                return afs_abort_to_error(abort_code);
        }
-} /* end afs_rxvl_aemap() */
+}
-#if 0
-/*****************************************************************************/
 /*
- * probe a volume location server to see if it is still alive -- unused
+ * deliver reply data to a VL.GetEntryByXXX call
 */
-static int afs_rxvl_probe(struct afs_server *server, int alloc_flags)
+static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
+                                           struct sk_buff *skb, bool last)
 {
-        struct rxrpc_connection *conn;
+        struct afs_cache_vlocation *entry;
-        struct rxrpc_call *call;
+        __be32 *bp;
-        struct kvec piov[1];
+        u32 tmp;
-        size_t sent;
+        int loop;
-        int ret;
-        __be32 param[1];
-        DECLARE_WAITQUEUE(myself, current);
-        /* get hold of the vlserver connection */
-        ret = afs_server_get_vlconn(server, &conn);
-        if (ret < 0)
-                goto out;
-        /* create a call through that connection */
-        ret = rxrpc_create_call(conn, NULL, NULL, afs_rxvl_aemap, &call);
-        if (ret < 0) {
-                printk("kAFS: Unable to create call: %d\n", ret);
-                goto out_put_conn;
-        }
-        call->app_opcode = VLPROBE;
-        /* we want to get event notifications from the call */
-        add_wait_queue(&call->waitq, &myself);
-        /* marshall the parameters */
-        param[0] = htonl(VLPROBE);
-        piov[0].iov_len = sizeof(param);
-        piov[0].iov_base = param;
-        /* send the parameters to the server */
-        ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET,
-                                    alloc_flags, 0, &sent);
-        if (ret < 0)
-                goto abort;
-        /* wait for the reply to completely arrive */
-        for (;;) {
-                set_current_state(TASK_INTERRUPTIBLE);
-                if (call->app_call_state != RXRPC_CSTATE_CLNT_RCV_REPLY ||
-                    signal_pending(current))
-                        break;
-                schedule();
-        }
-        set_current_state(TASK_RUNNING);
-        ret = -EINTR;
-        if (signal_pending(current))
-                goto abort;
-        switch (call->app_call_state) {
-        case RXRPC_CSTATE_ERROR:
-                ret = call->app_errno;
-                goto out_unwait;
-        case RXRPC_CSTATE_CLNT_GOT_REPLY:
-                ret = 0;
-                goto out_unwait;
-        default:
-                BUG();
-        }
- abort:
-        set_current_state(TASK_UNINTERRUPTIBLE);
-        rxrpc_call_abort(call, ret);
-        schedule();
- out_unwait:
-        set_current_state(TASK_RUNNING);
-        remove_wait_queue(&call->waitq, &myself);
-        rxrpc_put_call(call);
- out_put_conn:
-        rxrpc_put_connection(conn);
- out:
-        return ret;
-} /* end afs_rxvl_probe() */
+        _enter(",,%u", last);
-#endif
-/*****************************************************************************/
+        afs_transfer_reply(call, skb);
-/*
+        if (!last)
- * look up a volume location database entry by name
+                return 0;
- */
-int afs_rxvl_get_entry_by_name(struct afs_server *server,
-                               const char *volname,
-                               unsigned volnamesz,
-                               struct afs_cache_vlocation *entry)
-{
-        DECLARE_WAITQUEUE(myself, current);
-        struct rxrpc_connection *conn;
-        struct rxrpc_call *call;
-        struct kvec piov[3];
-        unsigned tmp;
-        size_t sent;
-        int ret, loop;
-        __be32 *bp, param[2], zero;
-        _enter(",%*.*s,%u,", volnamesz, volnamesz, volname, volnamesz);
-        memset(entry, 0, sizeof(*entry));
-        /* get hold of the vlserver connection */
-        ret = afs_server_get_vlconn(server, &conn);
-        if (ret < 0)
-                goto out;
-        /* create a call through that connection */
-        ret = rxrpc_create_call(conn, NULL, NULL, afs_rxvl_aemap, &call);
-        if (ret < 0) {
-                printk("kAFS: Unable to create call: %d\n", ret);
-                goto out_put_conn;
-        }
-        call->app_opcode = VLGETENTRYBYNAME;
-        /* we want to get event notifications from the call */
+        if (call->reply_size != call->reply_max)
-        add_wait_queue(&call->waitq, &myself);
+                return -EBADMSG;
-        /* marshall the parameters */
+        /* unmarshall the reply once we've received all of it */
-        piov[1].iov_len = volnamesz;
+        entry = call->reply;
-        piov[1].iov_base = (char *) volname;
+        bp = call->buffer;
-        zero = 0;
-        piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3;
-        piov[2].iov_base = &zero;
-        param[0] = htonl(VLGETENTRYBYNAME);
-        param[1] = htonl(piov[1].iov_len);
-        piov[0].iov_len = sizeof(param);
-        piov[0].iov_base = param;
-        /* send the parameters to the server */
-        ret = rxrpc_call_write_data(call, 3, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-                                    0, &sent);
-        if (ret < 0)
-                goto abort;
-        /* wait for the reply to completely arrive */
-        bp = rxrpc_call_alloc_scratch(call, 384);
-        ret = rxrpc_call_read_data(call, bp, 384,
-                                   RXRPC_CALL_READ_BLOCK |
-                                   RXRPC_CALL_READ_ALL);
-        if (ret < 0) {
-                if (ret == -ECONNABORTED) {
-                        ret = call->app_errno;
-                        goto out_unwait;
-                }
-                goto abort;
-        }
-        /* unmarshall the reply */
        for (loop = 0; loop < 64; loop++)
                entry->name[loop] = ntohl(*bp++);
+        entry->name[loop] = 0;
        bp++; /* final NUL */
        bp++; /* type */
@@ -264,6 +93,7 @@ int afs_rxvl_get_entry_by_name(struct afs_server *server,
        for (loop = 0; loop < 8; loop++) {
                tmp = ntohl(*bp++);
+                entry->srvtmask[loop] = 0;
                if (tmp & AFS_VLSF_RWVOL)
                        entry->srvtmask[loop] |= AFS_VOL_VTM_RW;
                if (tmp & AFS_VLSF_ROVOL)
@@ -279,417 +109,110 @@ int afs_rxvl_get_entry_by_name(struct afs_server *server,
        bp++; /* clone ID */
        tmp = ntohl(*bp++); /* flags */
+        entry->vidmask = 0;
        if (tmp & AFS_VLF_RWEXISTS)
                entry->vidmask |= AFS_VOL_VTM_RW;
        if (tmp & AFS_VLF_ROEXISTS)
                entry->vidmask |= AFS_VOL_VTM_RO;
        if (tmp & AFS_VLF_BACKEXISTS)
                entry->vidmask |= AFS_VOL_VTM_BAK;
-        ret = -ENOMEDIUM;
        if (!entry->vidmask)
-                goto abort;
+                return -EBADMSG;
-        /* success */
-        entry->rtime = get_seconds();
-        ret = 0;
- out_unwait:
-        set_current_state(TASK_RUNNING);
-        remove_wait_queue(&call->waitq, &myself);
-        rxrpc_put_call(call);
- out_put_conn:
-        rxrpc_put_connection(conn);
- out:
-        _leave(" = %d", ret);
-        return ret;
- abort:
-        set_current_state(TASK_UNINTERRUPTIBLE);
-        rxrpc_call_abort(call, ret);
-        schedule();
-        goto out_unwait;
-} /* end afs_rxvl_get_entry_by_name() */
-/*****************************************************************************/
-/*
- * look up a volume location database entry by ID
- */
-int afs_rxvl_get_entry_by_id(struct afs_server *server,
-                             afs_volid_t volid,
-                             afs_voltype_t voltype,
-                             struct afs_cache_vlocation *entry)
-{
-        DECLARE_WAITQUEUE(myself, current);
-        struct rxrpc_connection *conn;
-        struct rxrpc_call *call;
-        struct kvec piov[1];
-        unsigned tmp;
-        size_t sent;
-        int ret, loop;
-        __be32 *bp, param[3];
-        _enter(",%x,%d,", volid, voltype);
-        memset(entry, 0, sizeof(*entry));
-        /* get hold of the vlserver connection */
-        ret = afs_server_get_vlconn(server, &conn);
-        if (ret < 0)
-                goto out;
-        /* create a call through that connection */
-        ret = rxrpc_create_call(conn, NULL, NULL, afs_rxvl_aemap, &call);
-        if (ret < 0) {
-                printk("kAFS: Unable to create call: %d\n", ret);
-                goto out_put_conn;
-        }
-        call->app_opcode = VLGETENTRYBYID;
-        /* we want to get event notifications from the call */
-        add_wait_queue(&call->waitq, &myself);
-        /* marshall the parameters */
-        param[0] = htonl(VLGETENTRYBYID);
-        param[1] = htonl(volid);
-        param[2] = htonl(voltype);
-        piov[0].iov_len = sizeof(param);
-        piov[0].iov_base = param;
-        /* send the parameters to the server */
-        ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-                                    0, &sent);
-        if (ret < 0)
-                goto abort;
-        /* wait for the reply to completely arrive */
-        bp = rxrpc_call_alloc_scratch(call, 384);
-        ret = rxrpc_call_read_data(call, bp, 384,
-                                   RXRPC_CALL_READ_BLOCK |
-                                   RXRPC_CALL_READ_ALL);
-        if (ret < 0) {
-                if (ret == -ECONNABORTED) {
-                        ret = call->app_errno;
-                        goto out_unwait;
-                }
-                goto abort;
-        }
-        /* unmarshall the reply */
-        for (loop = 0; loop < 64; loop++)
-                entry->name[loop] = ntohl(*bp++);
-        bp++; /* final NUL */
-        bp++; /* type */
+        _leave(" = 0 [done]");
-        entry->nservers = ntohl(*bp++);
+        return 0;
+}
-        for (loop = 0; loop < 8; loop++)
-                entry->servers[loop].s_addr = *bp++;
-        bp += 8; /* partition IDs */
-        for (loop = 0; loop < 8; loop++) {
-                tmp = ntohl(*bp++);
-                if (tmp & AFS_VLSF_RWVOL)
-                        entry->srvtmask[loop] |= AFS_VOL_VTM_RW;
-                if (tmp & AFS_VLSF_ROVOL)
-                        entry->srvtmask[loop] |= AFS_VOL_VTM_RO;
-                if (tmp & AFS_VLSF_BACKVOL)
-                        entry->srvtmask[loop] |= AFS_VOL_VTM_BAK;
-        }
-        entry->vid[0] = ntohl(*bp++);
-        entry->vid[1] = ntohl(*bp++);
-        entry->vid[2] = ntohl(*bp++);
-        bp++; /* clone ID */
-        tmp = ntohl(*bp++); /* flags */
-        if (tmp & AFS_VLF_RWEXISTS)
-                entry->vidmask |= AFS_VOL_VTM_RW;
-        if (tmp & AFS_VLF_ROEXISTS)
-                entry->vidmask |= AFS_VOL_VTM_RO;
-        if (tmp & AFS_VLF_BACKEXISTS)
-                entry->vidmask |= AFS_VOL_VTM_BAK;
-        ret = -ENOMEDIUM;
-        if (!entry->vidmask)
-                goto abort;
-#if 0 /* TODO: remove */
-        entry->nservers = 3;
-        entry->servers[0].s_addr = htonl(0xac101249);
-        entry->servers[1].s_addr = htonl(0xac101243);
-        entry->servers[2].s_addr = htonl(0xac10125b /*0xac10125b*/);
-        entry->srvtmask[0] = AFS_VOL_VTM_RO;
-        entry->srvtmask[1] = AFS_VOL_VTM_RO;
-        entry->srvtmask[2] = AFS_VOL_VTM_RO | AFS_VOL_VTM_RW;
-#endif
-        /* success */
-        entry->rtime = get_seconds();
-        ret = 0;
- out_unwait:
-        set_current_state(TASK_RUNNING);
-        remove_wait_queue(&call->waitq, &myself);
-        rxrpc_put_call(call);
- out_put_conn:
-        rxrpc_put_connection(conn);
- out:
-        _leave(" = %d", ret);
-        return ret;
- abort:
-        set_current_state(TASK_UNINTERRUPTIBLE);
-        rxrpc_call_abort(call, ret);
-        schedule();
-        goto out_unwait;
-} /* end afs_rxvl_get_entry_by_id() */
-/*****************************************************************************/
 /*
- * look up a volume location database entry by ID asynchronously
+ * VL.GetEntryByName operation type
 */
-int afs_rxvl_get_entry_by_id_async(struct afs_async_op *op,
+static const struct afs_call_type afs_RXVLGetEntryByName = {
-                                   afs_volid_t volid,
+        .name           = "VL.GetEntryByName",
-                                   afs_voltype_t voltype)
+        .deliver        = afs_deliver_vl_get_entry_by_xxx,
-{
+        .abort_to_error = afs_vl_abort_to_error,
-        struct rxrpc_connection *conn;
+        .destructor     = afs_flat_call_destructor,
-        struct rxrpc_call *call;
+};
-        struct kvec piov[1];
-        size_t sent;
-        int ret;
-        __be32 param[3];
-        _enter(",%x,%d,", volid, voltype);
-        /* get hold of the vlserver connection */
-        ret = afs_server_get_vlconn(op->server, &conn);
-        if (ret < 0) {
-                _leave(" = %d", ret);
-                return ret;
-        }
-        /* create a call through that connection */
-        ret = rxrpc_create_call(conn,
-                                afs_rxvl_get_entry_by_id_attn,
-                                afs_rxvl_get_entry_by_id_error,
-                                afs_rxvl_aemap,
-                                &op->call);
-        rxrpc_put_connection(conn);
-        if (ret < 0) {
-                printk("kAFS: Unable to create call: %d\n", ret);
-                _leave(" = %d", ret);
-                return ret;
-        }
-        op->call->app_opcode = VLGETENTRYBYID;
+/*
-        op->call->app_user = op;
+ * VL.GetEntryById operation type
+ */
-        call = op->call;
+static const struct afs_call_type afs_RXVLGetEntryById = {
-        rxrpc_get_call(call);
+        .name           = "VL.GetEntryById",
+        .deliver        = afs_deliver_vl_get_entry_by_xxx,
-        /* send event notifications from the call to kafsasyncd */
+        .abort_to_error = afs_vl_abort_to_error,
-        afs_kafsasyncd_begin_op(op);
+        .destructor     = afs_flat_call_destructor,
+};
-        /* marshall the parameters */
-        param[0] = htonl(VLGETENTRYBYID);
-        param[1] = htonl(volid);
-        param[2] = htonl(voltype);
-        piov[0].iov_len = sizeof(param);
-        piov[0].iov_base = param;
-        /* allocate result read buffer in scratch space */
-        call->app_scr_ptr = rxrpc_call_alloc_scratch(op->call, 384);
-        /* send the parameters to the server */
-        ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-                                    0, &sent);
-        if (ret < 0) {
-                rxrpc_call_abort(call, ret); /* handle from kafsasyncd */
-                ret = 0;
-                goto out;
-        }
-        /* wait for the reply to completely arrive */
-        ret = rxrpc_call_read_data(call, call->app_scr_ptr, 384, 0);
-        switch (ret) {
-        case 0:
-        case -EAGAIN:
-        case -ECONNABORTED:
-                ret = 0;
-                break;  /* all handled by kafsasyncd */
-        default:
-                rxrpc_call_abort(call, ret); /* make kafsasyncd handle it */
-                ret = 0;
-                break;
-        }
- out:
-        rxrpc_put_call(call);
-        _leave(" = %d", ret);
-        return ret;
-} /* end afs_rxvl_get_entry_by_id_async() */
-/*****************************************************************************/
 /*
- * attend to the asynchronous get VLDB entry by ID
+ * dispatch a get volume entry by name operation
 */
-int afs_rxvl_get_entry_by_id_async2(struct afs_async_op *op,
+int afs_vl_get_entry_by_name(struct in_addr *addr,
-                                    struct afs_cache_vlocation *entry)
+                             struct key *key,
+                             const char *volname,
+                             struct afs_cache_vlocation *entry,
+                             const struct afs_wait_mode *wait_mode)
 {
+        struct afs_call *call;
+        size_t volnamesz, reqsz, padsz;
        __be32 *bp;
-        __u32 tmp;
-        int loop, ret;
-        _enter("{op=%p cst=%u}", op, op->call->app_call_state);
-        memset(entry, 0, sizeof(*entry));
-        if (op->call->app_call_state == RXRPC_CSTATE_COMPLETE) {
-                /* operation finished */
-                afs_kafsasyncd_terminate_op(op);
-                bp = op->call->app_scr_ptr;
-                /* unmarshall the reply */
-                for (loop = 0; loop < 64; loop++)
-                        entry->name[loop] = ntohl(*bp++);
-                bp++; /* final NUL */
-                bp++; /* type */
-                entry->nservers = ntohl(*bp++);
-                for (loop = 0; loop < 8; loop++)
-                        entry->servers[loop].s_addr = *bp++;
-                bp += 8; /* partition IDs */
-                for (loop = 0; loop < 8; loop++) {
-                        tmp = ntohl(*bp++);
-                        if (tmp & AFS_VLSF_RWVOL)
-                                entry->srvtmask[loop] |= AFS_VOL_VTM_RW;
-                        if (tmp & AFS_VLSF_ROVOL)
-                                entry->srvtmask[loop] |= AFS_VOL_VTM_RO;
-                        if (tmp & AFS_VLSF_BACKVOL)
-                                entry->srvtmask[loop] |= AFS_VOL_VTM_BAK;
-                }
-                entry->vid[0] = ntohl(*bp++);
-                entry->vid[1] = ntohl(*bp++);
-                entry->vid[2] = ntohl(*bp++);
-                bp++; /* clone ID */
-                tmp = ntohl(*bp++); /* flags */
-                if (tmp & AFS_VLF_RWEXISTS)
-                        entry->vidmask |= AFS_VOL_VTM_RW;
-                if (tmp & AFS_VLF_ROEXISTS)
-                        entry->vidmask |= AFS_VOL_VTM_RO;
-                if (tmp & AFS_VLF_BACKEXISTS)
-                        entry->vidmask |= AFS_VOL_VTM_BAK;
-                ret = -ENOMEDIUM;
-                if (!entry->vidmask) {
-                        rxrpc_call_abort(op->call, ret);
-                        goto done;
-                }
-#if 0 /* TODO: remove */
-                entry->nservers = 3;
-                entry->servers[0].s_addr = htonl(0xac101249);
-                entry->servers[1].s_addr = htonl(0xac101243);
-                entry->servers[2].s_addr = htonl(0xac10125b /*0xac10125b*/);
-                entry->srvtmask[0] = AFS_VOL_VTM_RO;
-                entry->srvtmask[1] = AFS_VOL_VTM_RO;
-                entry->srvtmask[2] = AFS_VOL_VTM_RO | AFS_VOL_VTM_RW;
-#endif
-                /* success */
-                entry->rtime = get_seconds();
-                ret = 0;
-                goto done;
-        }
-        if (op->call->app_call_state == RXRPC_CSTATE_ERROR) {
+        _enter("");
-                /* operation error */
-                ret = op->call->app_errno;
-                goto done;
-        }
-        _leave(" = -EAGAIN");
+        volnamesz = strlen(volname);
-        return -EAGAIN;
+        padsz = (4 - (volnamesz & 3)) & 3;
+        reqsz = 8 + volnamesz + padsz;
- done:
+        call = afs_alloc_flat_call(&afs_RXVLGetEntryByName, reqsz, 384);
-        rxrpc_put_call(op->call);
+        if (!call)
-        op->call = NULL;
+                return -ENOMEM;
-        _leave(" = %d", ret);
-        return ret;
-} /* end afs_rxvl_get_entry_by_id_async2() */
-/*****************************************************************************/
+        call->key = key;
-/*
+        call->reply = entry;
- * handle attention events on an async get-entry-by-ID op
+        call->service_id = VL_SERVICE;
- * - called from krxiod
+        call->port = htons(AFS_VL_PORT);
- */
-static void afs_rxvl_get_entry_by_id_attn(struct rxrpc_call *call)
-{
-        struct afs_async_op *op = call->app_user;
-        _enter("{op=%p cst=%u}", op, call->app_call_state);
-        switch (call->app_call_state) {
-        case RXRPC_CSTATE_COMPLETE:
-                afs_kafsasyncd_attend_op(op);
-                break;
-        case RXRPC_CSTATE_CLNT_RCV_REPLY:
-                if (call->app_async_read)
-                        break;
-        case RXRPC_CSTATE_CLNT_GOT_REPLY:
-                if (call->app_read_count == 0)
-                        break;
-                printk("kAFS: Reply bigger than expected"
-                       " {cst=%u asyn=%d mark=%Zu rdy=%Zu pr=%u%s}",
-                       call->app_call_state,
-                       call->app_async_read,
-                       call->app_mark,
-                       call->app_ready_qty,
-                       call->pkt_rcv_count,
-                       call->app_last_rcv ? " last" : "");
-                rxrpc_call_abort(call, -EBADMSG);
-                break;
-        default:
-                BUG();
-        }
-        _leave("");
+        /* marshall the parameters */
+        bp = call->request;
+        *bp++ = htonl(VLGETENTRYBYNAME);
+        *bp++ = htonl(volnamesz);
+        memcpy(bp, volname, volnamesz);
+        if (padsz > 0)
+                memset((void *) bp + volnamesz, 0, padsz);
-} /* end afs_rxvl_get_entry_by_id_attn() */
+        /* initiate the call */
+        return afs_make_call(addr, call, GFP_KERNEL, wait_mode);
+}
-/*****************************************************************************/
 /*
- * handle error events on an async get-entry-by-ID op
+ * dispatch a get volume entry by ID operation
- * - called from krxiod
 */
-static void afs_rxvl_get_entry_by_id_error(struct rxrpc_call *call)
+int afs_vl_get_entry_by_id(struct in_addr *addr,
+                           struct key *key,
+                           afs_volid_t volid,
+                           afs_voltype_t voltype,
+                           struct afs_cache_vlocation *entry,
+                           const struct afs_wait_mode *wait_mode)
 {
-        struct afs_async_op *op = call->app_user;
+        struct afs_call *call;
+        __be32 *bp;
-        _enter("{op=%p cst=%u}", op, call->app_call_state);
+        _enter("");
-        afs_kafsasyncd_attend_op(op);
+        call = afs_alloc_flat_call(&afs_RXVLGetEntryById, 12, 384);
+        if (!call)
+                return -ENOMEM;
-        _leave("");
+        call->key = key;
+        call->reply = entry;
+        call->service_id = VL_SERVICE;
+        call->port = htons(AFS_VL_PORT);
-} /* end afs_rxvl_get_entry_by_id_error() */
+        /* marshall the parameters */
+        bp = call->request;
+        *bp++ = htonl(VLGETENTRYBYID);
+        *bp++ = htonl(volid);
+        *bp   = htonl(voltype);
+        /* initiate the call */
+        return afs_make_call(addr, call, GFP_KERNEL, wait_mode);
+}
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 782ee7c600ca..74cce174882a 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -1,6 +1,6 @@
-/* vlocation.c: volume location management
+/* AFS volume location management
 *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
@@ -12,131 +12,61 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include "volume.h"
-#include "cell.h"
-#include "cmservice.h"
-#include "fsclient.h"
-#include "vlclient.h"
-#include "kafstimod.h"
-#include <rxrpc/connection.h>
 #include "internal.h"
-#define AFS_VLDB_TIMEOUT HZ*1000
+unsigned afs_vlocation_timeout = 10;    /* volume location timeout in seconds */
+unsigned afs_vlocation_update_timeout = 10 * 60;
-static void afs_vlocation_update_timer(struct afs_timer *timer);
+static void afs_vlocation_reaper(struct work_struct *);
-static void afs_vlocation_update_attend(struct afs_async_op *op);
+static void afs_vlocation_updater(struct work_struct *);
-static void afs_vlocation_update_discard(struct afs_async_op *op);
-static void __afs_put_vlocation(struct afs_vlocation *vlocation);
-static void __afs_vlocation_timeout(struct afs_timer *timer)
+static LIST_HEAD(afs_vlocation_updates);
-{
+static LIST_HEAD(afs_vlocation_graveyard);
-        struct afs_vlocation *vlocation =
+static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
-                list_entry(timer, struct afs_vlocation, timeout);
+static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
+static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
-        _debug("VL TIMEOUT [%s{u=%d}]",
+static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
-               vlocation->vldb.name, atomic_read(&vlocation->usage));
+static struct workqueue_struct *afs_vlocation_update_worker;
-        afs_vlocation_do_timeout(vlocation);
-}
-static const struct afs_timer_ops afs_vlocation_timer_ops = {
-        .timed_out      = __afs_vlocation_timeout,
-};
-static const struct afs_timer_ops afs_vlocation_update_timer_ops = {
-        .timed_out      = afs_vlocation_update_timer,
-};
-static const struct afs_async_op_ops afs_vlocation_update_op_ops = {
-        .attend         = afs_vlocation_update_attend,
-        .discard        = afs_vlocation_update_discard,
-};
-static LIST_HEAD(afs_vlocation_update_pendq);   /* queue of VLs awaiting update */
-static struct afs_vlocation *afs_vlocation_update;      /* VL currently being updated */
-static DEFINE_SPINLOCK(afs_vlocation_update_lock); /* lock guarding update queue */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vlocation_cache_match(void *target,
-                                                     const void *entry);
-static void afs_vlocation_cache_update(void *source, void *entry);
-struct cachefs_index_def afs_vlocation_cache_index_def = {
-        .name           = "vldb",
-        .data_size      = sizeof(struct afs_cache_vlocation),
-        .keys[0]        = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
-        .match          = afs_vlocation_cache_match,
-        .update         = afs_vlocation_cache_update,
-};
-#endif
-/*****************************************************************************/
 /*
 * iterate through the VL servers in a cell until one of them admits knowing
 * about the volume in question
- * - caller must have cell->vl_sem write-locked
 */
-static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vlocation,
+static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
-                                           const char *name,
+                                           struct key *key,
-                                           unsigned namesz,
                                           struct afs_cache_vlocation *vldb)
 {
-        struct afs_server *server = NULL;
+        struct afs_cell *cell = vl->cell;
-        struct afs_cell *cell = vlocation->cell;
+        struct in_addr addr;
        int count, ret;
-        _enter("%s,%*.*s,%u", cell->name, namesz, namesz, name, namesz);
+        _enter("%s,%s", cell->name, vl->vldb.name);
+        down_write(&vl->cell->vl_sem);
        ret = -ENOMEDIUM;
        for (count = cell->vl_naddrs; count > 0; count--) {
-                _debug("CellServ[%hu]: %08x",
+                addr = cell->vl_addrs[cell->vl_curr_svix];
-                       cell->vl_curr_svix,
-                       cell->vl_addrs[cell->vl_curr_svix].s_addr);
+                _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
-                /* try and create a server */
-                ret = afs_server_lookup(cell,
-                                        &cell->vl_addrs[cell->vl_curr_svix],
-                                        &server);
-                switch (ret) {
-                case 0:
-                        break;
-                case -ENOMEM:
-                case -ENONET:
-                        goto out;
-                default:
-                        goto rotate;
-                }
                /* attempt to access the VL server */
-                ret = afs_rxvl_get_entry_by_name(server, name, namesz, vldb);
+                ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
+                                               &afs_sync_call);
                switch (ret) {
                case 0:
-                        afs_put_server(server);
                        goto out;
                case -ENOMEM:
                case -ENONET:
                case -ENETUNREACH:
                case -EHOSTUNREACH:
                case -ECONNREFUSED:
-                        down_write(&server->sem);
-                        if (server->vlserver) {
-                                rxrpc_put_connection(server->vlserver);
-                                server->vlserver = NULL;
-                        }
-                        up_write(&server->sem);
-                        afs_put_server(server);
                        if (ret == -ENOMEM || ret == -ENONET)
                                goto out;
                        goto rotate;
                case -ENOMEDIUM:
-                        afs_put_server(server);
                        goto out;
                default:
-                        afs_put_server(server);
+                        ret = -EIO;
-                        ret = -ENOMEDIUM;
                        goto rotate;
                }
@@ -146,76 +76,66 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vlocation,
                cell->vl_curr_svix %= cell->vl_naddrs;
        }
- out:
+out:
+        up_write(&vl->cell->vl_sem);
        _leave(" = %d", ret);
        return ret;
+}
-} /* end afs_vlocation_access_vl_by_name() */
-/*****************************************************************************/
 /*
 * iterate through the VL servers in a cell until one of them admits knowing
 * about the volume in question
- * - caller must have cell->vl_sem write-locked
 */
-static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vlocation,
+static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
+                                         struct key *key,
                                         afs_volid_t volid,
                                         afs_voltype_t voltype,
                                         struct afs_cache_vlocation *vldb)
 {
-        struct afs_server *server = NULL;
+        struct afs_cell *cell = vl->cell;
-        struct afs_cell *cell = vlocation->cell;
+        struct in_addr addr;
        int count, ret;
        _enter("%s,%x,%d,", cell->name, volid, voltype);
+        down_write(&vl->cell->vl_sem);
        ret = -ENOMEDIUM;
        for (count = cell->vl_naddrs; count > 0; count--) {
-                _debug("CellServ[%hu]: %08x",
+                addr = cell->vl_addrs[cell->vl_curr_svix];
-                       cell->vl_curr_svix,
-                       cell->vl_addrs[cell->vl_curr_svix].s_addr);
+                _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
-                /* try and create a server */
-                ret = afs_server_lookup(cell,
-                                        &cell->vl_addrs[cell->vl_curr_svix],
-                                        &server);
-                switch (ret) {
-                case 0:
-                        break;
-                case -ENOMEM:
-                case -ENONET:
-                        goto out;
-                default:
-                        goto rotate;
-                }
                /* attempt to access the VL server */
-                ret = afs_rxvl_get_entry_by_id(server, volid, voltype, vldb);
+                ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
+                                             &afs_sync_call);
                switch (ret) {
                case 0:
-                        afs_put_server(server);
                        goto out;
                case -ENOMEM:
                case -ENONET:
                case -ENETUNREACH:
                case -EHOSTUNREACH:
                case -ECONNREFUSED:
-                        down_write(&server->sem);
-                        if (server->vlserver) {
-                                rxrpc_put_connection(server->vlserver);
-                                server->vlserver = NULL;
-                        }
-                        up_write(&server->sem);
-                        afs_put_server(server);
                        if (ret == -ENOMEM || ret == -ENONET)
                                goto out;
                        goto rotate;
+                case -EBUSY:
+                        vl->upd_busy_cnt++;
+                        if (vl->upd_busy_cnt <= 3) {
+                                if (vl->upd_busy_cnt > 1) {
+                                        /* second+ BUSY - sleep a little bit */
+                                        set_current_state(TASK_UNINTERRUPTIBLE);
+                                        schedule_timeout(1);
+                                        __set_current_state(TASK_RUNNING);
+                                }
+                                continue;
+                        }
+                        break;
                case -ENOMEDIUM:
-                        afs_put_server(server);
+                        vl->upd_rej_cnt++;
-                        goto out;
+                        goto rotate;
                default:
-                        afs_put_server(server);
+                        ret = -EIO;
-                        ret = -ENOMEDIUM;
                        goto rotate;
                }
@@ -223,729 +143,580 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vlocation,
        rotate:
                cell->vl_curr_svix++;
                cell->vl_curr_svix %= cell->vl_naddrs;
+                vl->upd_busy_cnt = 0;
        }
- out:
+out:
+        if (ret < 0 && vl->upd_rej_cnt > 0) {
+                printk(KERN_NOTICE "kAFS:"
+                       " Active volume no longer valid '%s'\n",
+                       vl->vldb.name);
+                vl->valid = 0;
+                ret = -ENOMEDIUM;
+        }
+        up_write(&vl->cell->vl_sem);
        _leave(" = %d", ret);
        return ret;
+}
-} /* end afs_vlocation_access_vl_by_id() */
-/*****************************************************************************/
 /*
- * lookup volume location
+ * allocate a volume location record
- * - caller must have cell->vol_sem write-locked
- * - iterate through the VL servers in a cell until one of them admits knowing
- *   about the volume in question
- * - lookup in the local cache if not able to find on the VL server
- * - insert/update in the local cache if did get a VL response
 */
-int afs_vlocation_lookup(struct afs_cell *cell,
+static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
-                         const char *name,
+                                                 const char *name,
-                         unsigned namesz,
+                                                 size_t namesz)
-                         struct afs_vlocation **_vlocation)
 {
-        struct afs_cache_vlocation vldb;
+        struct afs_vlocation *vl;
-        struct afs_vlocation *vlocation;
-        afs_voltype_t voltype;
+        vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
-        afs_volid_t vid;
+        if (vl) {
-        int active = 0, ret;
+                vl->cell = cell;
+                vl->state = AFS_VL_NEW;
-        _enter("{%s},%*.*s,%u,", cell->name, namesz, namesz, name, namesz);
+                atomic_set(&vl->usage, 1);
+                INIT_LIST_HEAD(&vl->link);
-        if (namesz > sizeof(vlocation->vldb.name)) {
+                INIT_LIST_HEAD(&vl->grave);
-                _leave(" = -ENAMETOOLONG");
+                INIT_LIST_HEAD(&vl->update);
-                return -ENAMETOOLONG;
+                init_waitqueue_head(&vl->waitq);
-        }
+                spin_lock_init(&vl->lock);
+                memcpy(vl->vldb.name, name, namesz);
-        /* search the cell's active list first */
-        list_for_each_entry(vlocation, &cell->vl_list, link) {
-                if (namesz < sizeof(vlocation->vldb.name) &&
-                    vlocation->vldb.name[namesz] != '\0')
-                        continue;
-                if (memcmp(vlocation->vldb.name, name, namesz) == 0)
-                        goto found_in_memory;
-        }
-        /* search the cell's graveyard list second */
-        spin_lock(&cell->vl_gylock);
-        list_for_each_entry(vlocation, &cell->vl_graveyard, link) {
-                if (namesz < sizeof(vlocation->vldb.name) &&
-                    vlocation->vldb.name[namesz] != '\0')
-                        continue;
-                if (memcmp(vlocation->vldb.name, name, namesz) == 0)
-                        goto found_in_graveyard;
-        }
-        spin_unlock(&cell->vl_gylock);
-        /* not in the cell's in-memory lists - create a new record */
-        vlocation = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
-        if (!vlocation)
-                return -ENOMEM;
-        atomic_set(&vlocation->usage, 1);
-        INIT_LIST_HEAD(&vlocation->link);
-        rwlock_init(&vlocation->lock);
-        memcpy(vlocation->vldb.name, name, namesz);
-        afs_timer_init(&vlocation->timeout, &afs_vlocation_timer_ops);
-        afs_timer_init(&vlocation->upd_timer, &afs_vlocation_update_timer_ops);
-        afs_async_op_init(&vlocation->upd_op, &afs_vlocation_update_op_ops);
-        afs_get_cell(cell);
-        vlocation->cell = cell;
-        list_add_tail(&vlocation->link, &cell->vl_list);
-#ifdef AFS_CACHING_SUPPORT
-        /* we want to store it in the cache, plus it might already be
-         * encached */
-        cachefs_acquire_cookie(cell->cache,
-                               &afs_volume_cache_index_def,
-                               vlocation,
-                               &vlocation->cache);
-        if (vlocation->valid)
-                goto found_in_cache;
-#endif
-        /* try to look up an unknown volume in the cell VL databases by name */
-        ret = afs_vlocation_access_vl_by_name(vlocation, name, namesz, &vldb);
-        if (ret < 0) {
-                printk("kAFS: failed to locate '%*.*s' in cell '%s'\n",
-                       namesz, namesz, name, cell->name);
-                goto error;
        }
-        goto found_on_vlserver;
+        _leave(" = %p", vl);
+        return vl;
- found_in_graveyard:
+}
-        /* found in the graveyard - resurrect */
-        _debug("found in graveyard");
-        atomic_inc(&vlocation->usage);
-        list_move_tail(&vlocation->link, &cell->vl_list);
-        spin_unlock(&cell->vl_gylock);
-        afs_kafstimod_del_timer(&vlocation->timeout);
-        goto active;
- found_in_memory:
-        /* found in memory - check to see if it's active */
-        _debug("found in memory");
-        atomic_inc(&vlocation->usage);
- active:
+/*
-        active = 1;
+ * update record if we found it in the cache
+ */
+static int afs_vlocation_update_record(struct afs_vlocation *vl,
+                                       struct key *key,
+                                       struct afs_cache_vlocation *vldb)
+{
+        afs_voltype_t voltype;
+        afs_volid_t vid;
+        int ret;
-#ifdef AFS_CACHING_SUPPORT
- found_in_cache:
-#endif
        /* try to look up a cached volume in the cell VL databases by ID */
-        _debug("found in cache");
        _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
-               vlocation->vldb.name,
+               vl->vldb.name,
-               vlocation->vldb.vidmask,
+               vl->vldb.vidmask,
-               ntohl(vlocation->vldb.servers[0].s_addr),
+               ntohl(vl->vldb.servers[0].s_addr),
-               vlocation->vldb.srvtmask[0],
+               vl->vldb.srvtmask[0],
-               ntohl(vlocation->vldb.servers[1].s_addr),
+               ntohl(vl->vldb.servers[1].s_addr),
-               vlocation->vldb.srvtmask[1],
+               vl->vldb.srvtmask[1],
-               ntohl(vlocation->vldb.servers[2].s_addr),
+               ntohl(vl->vldb.servers[2].s_addr),
-               vlocation->vldb.srvtmask[2]
+               vl->vldb.srvtmask[2]);
-               );
        _debug("Vids: %08x %08x %08x",
-               vlocation->vldb.vid[0],
+               vl->vldb.vid[0],
-               vlocation->vldb.vid[1],
+               vl->vldb.vid[1],
-               vlocation->vldb.vid[2]);
+               vl->vldb.vid[2]);
-        if (vlocation->vldb.vidmask & AFS_VOL_VTM_RW) {
+        if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
-                vid = vlocation->vldb.vid[0];
+                vid = vl->vldb.vid[0];
                voltype = AFSVL_RWVOL;
-        }
+        } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
-        else if (vlocation->vldb.vidmask & AFS_VOL_VTM_RO) {
+                vid = vl->vldb.vid[1];
-                vid = vlocation->vldb.vid[1];
                voltype = AFSVL_ROVOL;
-        }
+        } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
-        else if (vlocation->vldb.vidmask & AFS_VOL_VTM_BAK) {
+                vid = vl->vldb.vid[2];
-                vid = vlocation->vldb.vid[2];
                voltype = AFSVL_BACKVOL;
-        }
+        } else {
-        else {
                BUG();
                vid = 0;
                voltype = 0;
        }
-        ret = afs_vlocation_access_vl_by_id(vlocation, vid, voltype, &vldb);
+        /* contact the server to make sure the volume is still available
+         * - TODO: need to handle disconnected operation here
+         */
+        ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
        switch (ret) {
                /* net error */
        default:
-                printk("kAFS: failed to volume '%*.*s' (%x) up in '%s': %d\n",
+                printk(KERN_WARNING "kAFS:"
-                       namesz, namesz, name, vid, cell->name, ret);
+                       " failed to update volume '%s' (%x) up in '%s': %d\n",
-                goto error;
+                       vl->vldb.name, vid, vl->cell->name, ret);
+                _leave(" = %d", ret);
+                return ret;
                /* pulled from local cache into memory */
        case 0:
-                goto found_on_vlserver;
+                _leave(" = 0");
+                return 0;
                /* uh oh... looks like the volume got deleted */
        case -ENOMEDIUM:
-                printk("kAFS: volume '%*.*s' (%x) does not exist '%s'\n",
+                printk(KERN_ERR "kAFS:"
-                       namesz, namesz, name, vid, cell->name);
+                       " volume '%s' (%x) does not exist '%s'\n",
+                       vl->vldb.name, vid, vl->cell->name);
                /* TODO: make existing record unavailable */
-                goto error;
+                _leave(" = %d", ret);
+                return ret;
        }
+}
- found_on_vlserver:
+/*
-        _debug("Done VL Lookup: %*.*s %02x { %08x(%x) %08x(%x) %08x(%x) }",
+ * apply the update to a VL record
-               namesz, namesz, name,
+ */
-               vldb.vidmask,
+static void afs_vlocation_apply_update(struct afs_vlocation *vl,
-               ntohl(vldb.servers[0].s_addr), vldb.srvtmask[0],
+                                       struct afs_cache_vlocation *vldb)
-               ntohl(vldb.servers[1].s_addr), vldb.srvtmask[1],
+{
-               ntohl(vldb.servers[2].s_addr), vldb.srvtmask[2]
+        _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
-               );
+               vldb->name, vldb->vidmask,
+               ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
-        _debug("Vids: %08x %08x %08x", vldb.vid[0], vldb.vid[1], vldb.vid[2]);
+               ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
+               ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
-        if ((namesz < sizeof(vlocation->vldb.name) &&
+        _debug("Vids: %08x %08x %08x",
-             vlocation->vldb.name[namesz] != '\0') ||
+               vldb->vid[0], vldb->vid[1], vldb->vid[2]);
-            memcmp(vldb.name, name, namesz) != 0)
-                printk("kAFS: name of volume '%*.*s' changed to '%s' on server\n",
-                       namesz, namesz, name, vldb.name);
-        memcpy(&vlocation->vldb, &vldb, sizeof(vlocation->vldb));
+        if (strcmp(vldb->name, vl->vldb.name) != 0)
+                printk(KERN_NOTICE "kAFS:"
+                       " name of volume '%s' changed to '%s' on server\n",
+                       vl->vldb.name, vldb->name);
-        afs_kafstimod_add_timer(&vlocation->upd_timer, 10 * HZ);
+        vl->vldb = *vldb;
 #ifdef AFS_CACHING_SUPPORT
        /* update volume entry in local cache */
-        cachefs_update_cookie(vlocation->cache);
+        cachefs_update_cookie(vl->cache);
-#endif
-        *_vlocation = vlocation;
-        _leave(" = 0 (%p)",vlocation);
-        return 0;
- error:
-        if (vlocation) {
-                if (active) {
-                        __afs_put_vlocation(vlocation);
-                }
-                else {
-                        list_del(&vlocation->link);
-#ifdef AFS_CACHING_SUPPORT
-                        cachefs_relinquish_cookie(vlocation->cache, 0);
 #endif
-                        afs_put_cell(vlocation->cell);
+}
-                        kfree(vlocation);
-                }
-        }
-        _leave(" = %d", ret);
-        return ret;
-} /* end afs_vlocation_lookup() */
-/*****************************************************************************/
 /*
- * finish using a volume location record
+ * fill in a volume location record, consulting the cache and the VL server
- * - caller must have cell->vol_sem write-locked
+ * both
 */
-static void __afs_put_vlocation(struct afs_vlocation *vlocation)
+static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
+                                        struct key *key)
 {
-        struct afs_cell *cell;
+        struct afs_cache_vlocation vldb;
+        int ret;
-        if (!vlocation)
+        _enter("");
-                return;
-        _enter("%s", vlocation->vldb.name);
+        ASSERTCMP(vl->valid, ==, 0);
-        cell = vlocation->cell;
+        memset(&vldb, 0, sizeof(vldb));
-        /* sanity check */
+        /* see if we have an in-cache copy (will set vl->valid if there is) */
-        BUG_ON(atomic_read(&vlocation->usage) <= 0);
+#ifdef AFS_CACHING_SUPPORT
+        cachefs_acquire_cookie(cell->cache,
+                               &afs_volume_cache_index_def,
+                               vlocation,
+                               &vl->cache);
+#endif
-        spin_lock(&cell->vl_gylock);
+        if (vl->valid) {
-        if (likely(!atomic_dec_and_test(&vlocation->usage))) {
+                /* try to update a known volume in the cell VL databases by
-                spin_unlock(&cell->vl_gylock);
+                 * ID as the name may have changed */
-                _leave("");
+                _debug("found in cache");
-                return;
+                ret = afs_vlocation_update_record(vl, key, &vldb);
+        } else {
+                /* try to look up an unknown volume in the cell VL databases by
+                 * name */
+                ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
+                if (ret < 0) {
+                        printk("kAFS: failed to locate '%s' in cell '%s'\n",
+                               vl->vldb.name, vl->cell->name);
+                        return ret;
+                }
        }
-        /* move to graveyard queue */
+        afs_vlocation_apply_update(vl, &vldb);
-        list_move_tail(&vlocation->link,&cell->vl_graveyard);
+        _leave(" = 0");
+        return 0;
-        /* remove from pending timeout queue (refcounted if actually being
+}
-         * updated) */
-        list_del_init(&vlocation->upd_op.link);
-        /* time out in 10 secs */
-        afs_kafstimod_del_timer(&vlocation->upd_timer);
-        afs_kafstimod_add_timer(&vlocation->timeout, 10 * HZ);
-        spin_unlock(&cell->vl_gylock);
-        _leave(" [killed]");
-} /* end __afs_put_vlocation() */
-/*****************************************************************************/
-/*
- * finish using a volume location record
- */
-void afs_put_vlocation(struct afs_vlocation *vlocation)
-{
-        if (vlocation) {
-                struct afs_cell *cell = vlocation->cell;
-                down_write(&cell->vl_sem);
-                __afs_put_vlocation(vlocation);
-                up_write(&cell->vl_sem);
-        }
-} /* end afs_put_vlocation() */
-/*****************************************************************************/
 /*
- * timeout vlocation record
+ * queue a vlocation record for updates
- * - removes from the cell's graveyard if the usage count is zero
 */
-void afs_vlocation_do_timeout(struct afs_vlocation *vlocation)
+void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
 {
-        struct afs_cell *cell;
+        struct afs_vlocation *xvl;
-        _enter("%s", vlocation->vldb.name);
+        /* wait at least 10 minutes before updating... */
+        vl->update_at = get_seconds() + afs_vlocation_update_timeout;
-        cell = vlocation->cell;
+        spin_lock(&afs_vlocation_updates_lock);
-        BUG_ON(atomic_read(&vlocation->usage) < 0);
+        if (!list_empty(&afs_vlocation_updates)) {
+                /* ... but wait at least 1 second more than the newest record
-        /* remove from graveyard if still dead */
+                 * already queued so that we don't spam the VL server suddenly
-        spin_lock(&cell->vl_gylock);
+                 * with lots of requests
-        if (atomic_read(&vlocation->usage) == 0)
+                 */
-                list_del_init(&vlocation->link);
+                xvl = list_entry(afs_vlocation_updates.prev,
-        else
+                                 struct afs_vlocation, update);
-                vlocation = NULL;
+                if (vl->update_at <= xvl->update_at)
-        spin_unlock(&cell->vl_gylock);
+                        vl->update_at = xvl->update_at + 1;
+        } else {
-        if (!vlocation) {
+                queue_delayed_work(afs_vlocation_update_worker,
-                _leave("");
+                                   &afs_vlocation_update,
-                return; /* resurrected */
+                                   afs_vlocation_update_timeout * HZ);
        }
-        /* we can now destroy it properly */
+        list_add_tail(&vl->update, &afs_vlocation_updates);
-#ifdef AFS_CACHING_SUPPORT
+        spin_unlock(&afs_vlocation_updates_lock);
-        cachefs_relinquish_cookie(vlocation->cache, 0);
+}
-#endif
-        afs_put_cell(cell);
-        kfree(vlocation);
-        _leave(" [destroyed]");
-} /* end afs_vlocation_do_timeout() */
-/*****************************************************************************/
 /*
- * send an update operation to the currently selected server
+ * lookup volume location
+ * - iterate through the VL servers in a cell until one of them admits knowing
+ *   about the volume in question
+ * - lookup in the local cache if not able to find on the VL server
+ * - insert/update in the local cache if did get a VL response
 */
-static int afs_vlocation_update_begin(struct afs_vlocation *vlocation)
+struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
+                                           struct key *key,
+                                           const char *name,
+                                           size_t namesz)
 {
-        afs_voltype_t voltype;
+        struct afs_vlocation *vl;
-        afs_volid_t vid;
        int ret;
-        _enter("%s{ufs=%u ucs=%u}",
+        _enter("{%s},{%x},%*.*s,%zu",
-               vlocation->vldb.name,
+               cell->name, key_serial(key),
-               vlocation->upd_first_svix,
+               (int) namesz, (int) namesz, name, namesz);
-               vlocation->upd_curr_svix);
-        /* try to look up a cached volume in the cell VL databases by ID */
+        if (namesz > sizeof(vl->vldb.name)) {
-        if (vlocation->vldb.vidmask & AFS_VOL_VTM_RW) {
+                _leave(" = -ENAMETOOLONG");
-                vid = vlocation->vldb.vid[0];
+                return ERR_PTR(-ENAMETOOLONG);
-                voltype = AFSVL_RWVOL;
-        }
-        else if (vlocation->vldb.vidmask & AFS_VOL_VTM_RO) {
-                vid = vlocation->vldb.vid[1];
-                voltype = AFSVL_ROVOL;
        }
-        else if (vlocation->vldb.vidmask & AFS_VOL_VTM_BAK) {
-                vid = vlocation->vldb.vid[2];
+        /* see if we have an in-memory copy first */
-                voltype = AFSVL_BACKVOL;
+        down_write(&cell->vl_sem);
+        spin_lock(&cell->vl_lock);
+        list_for_each_entry(vl, &cell->vl_list, link) {
+                if (vl->vldb.name[namesz] != '\0')
+                        continue;
+                if (memcmp(vl->vldb.name, name, namesz) == 0)
+                        goto found_in_memory;
        }
-        else {
+        spin_unlock(&cell->vl_lock);
-                BUG();
-                vid = 0;
+        /* not in the cell's in-memory lists - create a new record */
-                voltype = 0;
+        vl = afs_vlocation_alloc(cell, name, namesz);
+        if (!vl) {
+                up_write(&cell->vl_sem);
+                return ERR_PTR(-ENOMEM);
        }
-        /* contact the chosen server */
+        afs_get_cell(cell);
-        ret = afs_server_lookup(
-                vlocation->cell,
-                &vlocation->cell->vl_addrs[vlocation->upd_curr_svix],
-                &vlocation->upd_op.server);
-        switch (ret) {
+        list_add_tail(&vl->link, &cell->vl_list);
-        case 0:
+        vl->state = AFS_VL_CREATING;
-                break;
+        up_write(&cell->vl_sem);
-        case -ENOMEM:
-        case -ENONET:
-        default:
-                _leave(" = %d", ret);
-                return ret;
-        }
-        /* initiate the update operation */
+fill_in_record:
-        ret = afs_rxvl_get_entry_by_id_async(&vlocation->upd_op, vid, voltype);
+        ret = afs_vlocation_fill_in_record(vl, key);
-        if (ret < 0) {
+        if (ret < 0)
-                _leave(" = %d", ret);
+                goto error_abandon;
-                return ret;
+        spin_lock(&vl->lock);
+        vl->state = AFS_VL_VALID;
+        wake_up(&vl->waitq);
+        spin_unlock(&vl->lock);
+        /* schedule for regular updates */
+        afs_vlocation_queue_for_updates(vl);
+        goto success;
+found_in_memory:
+        /* found in memory */
+        _debug("found in memory");
+        atomic_inc(&vl->usage);
+        spin_unlock(&cell->vl_lock);
+        if (!list_empty(&vl->grave)) {
+                spin_lock(&afs_vlocation_graveyard_lock);
+                list_del_init(&vl->grave);
+                spin_unlock(&afs_vlocation_graveyard_lock);
        }
+        up_write(&cell->vl_sem);
+        /* see if it was an abandoned record that we might try filling in */
+        spin_lock(&vl->lock);
+        while (vl->state != AFS_VL_VALID) {
+                afs_vlocation_state_t state = vl->state;
+                _debug("invalid [state %d]", state);
+                if ((state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME)) {
+                        vl->state = AFS_VL_CREATING;
+                        spin_unlock(&vl->lock);
+                        goto fill_in_record;
+                }
+                /* must now wait for creation or update by someone else to
+                 * complete */
+                _debug("wait");
+                spin_unlock(&vl->lock);
+                ret = wait_event_interruptible(
+                        vl->waitq,
+                        vl->state == AFS_VL_NEW ||
+                        vl->state == AFS_VL_VALID ||
+                        vl->state == AFS_VL_NO_VOLUME);
+                if (ret < 0)
+                        goto error;
+                spin_lock(&vl->lock);
+        }
+        spin_unlock(&vl->lock);
+success:
+        _leave(" = %p",vl);
+        return vl;
+error_abandon:
+        spin_lock(&vl->lock);
+        vl->state = AFS_VL_NEW;
+        wake_up(&vl->waitq);
+        spin_unlock(&vl->lock);
+error:
+        ASSERT(vl != NULL);
+        afs_put_vlocation(vl);
        _leave(" = %d", ret);
-        return ret;
+        return ERR_PTR(ret);
-} /* end afs_vlocation_update_begin() */
+}
-/*****************************************************************************/
 /*
- * abandon updating a VL record
+ * finish using a volume location record
- * - does not restart the update timer
 */
-static void afs_vlocation_update_abandon(struct afs_vlocation *vlocation,
+void afs_put_vlocation(struct afs_vlocation *vl)
-                                         afs_vlocation_upd_t state,
-                                         int ret)
 {
-        _enter("%s,%u", vlocation->vldb.name, state);
+        if (!vl)
+                return;
-        if (ret < 0)
-                printk("kAFS: Abandoning VL update '%s': %d\n",
-                       vlocation->vldb.name, ret);
-        /* discard the server record */
-        afs_put_server(vlocation->upd_op.server);
-        vlocation->upd_op.server = NULL;
-        spin_lock(&afs_vlocation_update_lock);
+        _enter("%s", vl->vldb.name);
-        afs_vlocation_update = NULL;
-        vlocation->upd_state = state;
-        /* TODO: start updating next VL record on pending list */
+        ASSERTCMP(atomic_read(&vl->usage), >, 0);
-        spin_unlock(&afs_vlocation_update_lock);
+        if (likely(!atomic_dec_and_test(&vl->usage))) {
+                _leave("");
+                return;
+        }
-        _leave("");
+        spin_lock(&afs_vlocation_graveyard_lock);
-} /* end afs_vlocation_update_abandon() */
+        if (atomic_read(&vl->usage) == 0) {
+                _debug("buried");
+                list_move_tail(&vl->grave, &afs_vlocation_graveyard);
+                vl->time_of_death = get_seconds();
+                schedule_delayed_work(&afs_vlocation_reap,
+                                      afs_vlocation_timeout * HZ);
+                /* suspend updates on this record */
+                if (!list_empty(&vl->update)) {
+                        spin_lock(&afs_vlocation_updates_lock);
+                        list_del_init(&vl->update);
+                        spin_unlock(&afs_vlocation_updates_lock);
+                }
+        }
+        spin_unlock(&afs_vlocation_graveyard_lock);
+        _leave(" [killed?]");
+}
-/*****************************************************************************/
 /*
- * handle periodic update timeouts and busy retry timeouts
+ * destroy a dead volume location record
- * - called from kafstimod
 */
-static void afs_vlocation_update_timer(struct afs_timer *timer)
+static void afs_vlocation_destroy(struct afs_vlocation *vl)
 {
-        struct afs_vlocation *vlocation =
+        _enter("%p", vl);
-                list_entry(timer, struct afs_vlocation, upd_timer);
-        int ret;
-        _enter("%s", vlocation->vldb.name);
+#ifdef AFS_CACHING_SUPPORT
+        cachefs_relinquish_cookie(vl->cache, 0);
+#endif
-        /* only update if not in the graveyard (defend against putting too) */
+        afs_put_cell(vl->cell);
-        spin_lock(&vlocation->cell->vl_gylock);
+        kfree(vl);
+}
-        if (!atomic_read(&vlocation->usage))
+/*
-                goto out_unlock1;
+ * reap dead volume location records
+ */
+static void afs_vlocation_reaper(struct work_struct *work)
+{
+        LIST_HEAD(corpses);
+        struct afs_vlocation *vl;
+        unsigned long delay, expiry;
+        time_t now;
-        spin_lock(&afs_vlocation_update_lock);
+        _enter("");
-        /* if we were woken up due to EBUSY sleep then restart immediately if
+        now = get_seconds();
-         * possible or else jump to front of pending queue */
+        spin_lock(&afs_vlocation_graveyard_lock);
-        if (vlocation->upd_state == AFS_VLUPD_BUSYSLEEP) {
-                if (afs_vlocation_update) {
+        while (!list_empty(&afs_vlocation_graveyard)) {
-                        list_add(&vlocation->upd_op.link,
+                vl = list_entry(afs_vlocation_graveyard.next,
-                                 &afs_vlocation_update_pendq);
+                                struct afs_vlocation, grave);
+                _debug("check %p", vl);
+                /* the queue is ordered most dead first */
+                expiry = vl->time_of_death + afs_vlocation_timeout;
+                if (expiry > now) {
+                        delay = (expiry - now) * HZ;
+                        _debug("delay %lu", delay);
+                        if (!schedule_delayed_work(&afs_vlocation_reap,
+                                                   delay)) {
+                                cancel_delayed_work(&afs_vlocation_reap);
+                                schedule_delayed_work(&afs_vlocation_reap,
+                                                      delay);
+                        }
+                        break;
                }
-                else {
-                        afs_get_vlocation(vlocation);
+                spin_lock(&vl->cell->vl_lock);
-                        afs_vlocation_update = vlocation;
+                if (atomic_read(&vl->usage) > 0) {
-                        vlocation->upd_state = AFS_VLUPD_INPROGRESS;
+                        _debug("no reap");
+                        list_del_init(&vl->grave);
+                } else {
+                        _debug("reap");
+                        list_move_tail(&vl->grave, &corpses);
+                        list_del_init(&vl->link);
                }
-                goto out_unlock2;
+                spin_unlock(&vl->cell->vl_lock);
        }
-        /* put on pending queue if there's already another update in progress */
+        spin_unlock(&afs_vlocation_graveyard_lock);
-        if (afs_vlocation_update) {
-                vlocation->upd_state = AFS_VLUPD_PENDING;
-                list_add_tail(&vlocation->upd_op.link,
-                              &afs_vlocation_update_pendq);
-                goto out_unlock2;
-        }
-        /* hold a ref on it while actually updating */
+        /* now reap the corpses we've extracted */
-        afs_get_vlocation(vlocation);
+        while (!list_empty(&corpses)) {
-        afs_vlocation_update = vlocation;
+                vl = list_entry(corpses.next, struct afs_vlocation, grave);
-        vlocation->upd_state = AFS_VLUPD_INPROGRESS;
+                list_del(&vl->grave);
+                afs_vlocation_destroy(vl);
-        spin_unlock(&afs_vlocation_update_lock);
-        spin_unlock(&vlocation->cell->vl_gylock);
-        /* okay... we can start the update */
-        _debug("BEGIN VL UPDATE [%s]", vlocation->vldb.name);
-        vlocation->upd_first_svix = vlocation->cell->vl_curr_svix;
-        vlocation->upd_curr_svix = vlocation->upd_first_svix;
-        vlocation->upd_rej_cnt = 0;
-        vlocation->upd_busy_cnt = 0;
-        ret = afs_vlocation_update_begin(vlocation);
-        if (ret < 0) {
-                afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, ret);
-                afs_kafstimod_add_timer(&vlocation->upd_timer,
-                                        AFS_VLDB_TIMEOUT);
-                afs_put_vlocation(vlocation);
        }
        _leave("");
-        return;
+}
- out_unlock2:
+/*
-        spin_unlock(&afs_vlocation_update_lock);
+ * initialise the VL update process
- out_unlock1:
+ */
-        spin_unlock(&vlocation->cell->vl_gylock);
+int __init afs_vlocation_update_init(void)
-        _leave("");
+{
-        return;
+        afs_vlocation_update_worker =
+                create_singlethread_workqueue("kafs_vlupdated");
+        return afs_vlocation_update_worker ? 0 : -ENOMEM;
+}
-} /* end afs_vlocation_update_timer() */
+/*
+ * discard all the volume location records for rmmod
+ */
+void __exit afs_vlocation_purge(void)
+{
+        afs_vlocation_timeout = 0;
+        spin_lock(&afs_vlocation_updates_lock);
+        list_del_init(&afs_vlocation_updates);
+        spin_unlock(&afs_vlocation_updates_lock);
+        cancel_delayed_work(&afs_vlocation_update);
+        queue_delayed_work(afs_vlocation_update_worker,
+                           &afs_vlocation_update, 0);
+        destroy_workqueue(afs_vlocation_update_worker);
+        cancel_delayed_work(&afs_vlocation_reap);
+        schedule_delayed_work(&afs_vlocation_reap, 0);
+}
-/*****************************************************************************/
 /*
- * attend to an update operation upon which an event happened
+ * update a volume location
- * - called in kafsasyncd context
 */
-static void afs_vlocation_update_attend(struct afs_async_op *op)
+static void afs_vlocation_updater(struct work_struct *work)
 {
        struct afs_cache_vlocation vldb;
-        struct afs_vlocation *vlocation =
+        struct afs_vlocation *vl, *xvl;
-                list_entry(op, struct afs_vlocation, upd_op);
+        time_t now;
-        unsigned tmp;
+        long timeout;
        int ret;
-        _enter("%s", vlocation->vldb.name);
+        _enter("");
-        ret = afs_rxvl_get_entry_by_id_async2(op, &vldb);
-        switch (ret) {
-        case -EAGAIN:
-                _leave(" [unfinished]");
-                return;
-        case 0:
-                _debug("END VL UPDATE: %d\n", ret);
-                vlocation->valid = 1;
-                _debug("Done VL Lookup: %02x { %08x(%x) %08x(%x) %08x(%x) }",
-                       vldb.vidmask,
-                       ntohl(vldb.servers[0].s_addr), vldb.srvtmask[0],
-                       ntohl(vldb.servers[1].s_addr), vldb.srvtmask[1],
-                       ntohl(vldb.servers[2].s_addr), vldb.srvtmask[2]
-                       );
-                _debug("Vids: %08x %08x %08x",
-                       vldb.vid[0], vldb.vid[1], vldb.vid[2]);
-                afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, 0);
-                down_write(&vlocation->cell->vl_sem);
-                /* actually update the cache */
-                if (strncmp(vldb.name, vlocation->vldb.name,
-                            sizeof(vlocation->vldb.name)) != 0)
-                        printk("kAFS: name of volume '%s'"
-                               " changed to '%s' on server\n",
-                               vlocation->vldb.name, vldb.name);
-                memcpy(&vlocation->vldb, &vldb, sizeof(vlocation->vldb));
-#if 0
-                /* TODO update volume entry in local cache */
-#endif
-                up_write(&vlocation->cell->vl_sem);
-                if (ret < 0)
-                        printk("kAFS: failed to update local cache: %d\n", ret);
-                afs_kafstimod_add_timer(&vlocation->upd_timer,
-                                        AFS_VLDB_TIMEOUT);
-                afs_put_vlocation(vlocation);
-                _leave(" [found]");
-                return;
-        case -ENOMEDIUM:
-                vlocation->upd_rej_cnt++;
-                goto try_next;
-                /* the server is locked - retry in a very short while */
-        case -EBUSY:
-                vlocation->upd_busy_cnt++;
-                if (vlocation->upd_busy_cnt > 3)
-                        goto try_next; /* too many retries */
-                afs_vlocation_update_abandon(vlocation,
-                                             AFS_VLUPD_BUSYSLEEP, 0);
-                afs_kafstimod_add_timer(&vlocation->upd_timer, HZ / 2);
-                afs_put_vlocation(vlocation);
-                _leave(" [busy]");
-                return;
-        case -ENETUNREACH:
-        case -EHOSTUNREACH:
-        case -ECONNREFUSED:
-        case -EREMOTEIO:
-                /* record bad vlserver info in the cell too
-                 * - TODO: use down_write_trylock() if available
-                 */
-                if (vlocation->upd_curr_svix == vlocation->cell->vl_curr_svix)
-                        vlocation->cell->vl_curr_svix =
-                                vlocation->cell->vl_curr_svix %
-                                vlocation->cell->vl_naddrs;
-        case -EBADRQC:
-        case -EINVAL:
-        case -EACCES:
-        case -EBADMSG:
-                goto try_next;
-        default:
-                goto abandon;
-        }
-        /* try contacting the next server */
- try_next:
-        vlocation->upd_busy_cnt = 0;
-        /* discard the server record */
-        afs_put_server(vlocation->upd_op.server);
-        vlocation->upd_op.server = NULL;
-        tmp = vlocation->cell->vl_naddrs;
+        now = get_seconds();
-        if (tmp == 0)
-                goto abandon;
-        vlocation->upd_curr_svix++;
+        /* find a record to update */
-        if (vlocation->upd_curr_svix >= tmp)
+        spin_lock(&afs_vlocation_updates_lock);
-                vlocation->upd_curr_svix = 0;
+        for (;;) {
-        if (vlocation->upd_first_svix >= tmp)
+                if (list_empty(&afs_vlocation_updates)) {
-                vlocation->upd_first_svix = tmp - 1;
+                        spin_unlock(&afs_vlocation_updates_lock);
+                        _leave(" [nothing]");
+                        return;
+                }
-        /* move to the next server */
+                vl = list_entry(afs_vlocation_updates.next,
-        if (vlocation->upd_curr_svix != vlocation->upd_first_svix) {
+                                struct afs_vlocation, update);
-                afs_vlocation_update_begin(vlocation);
+                if (atomic_read(&vl->usage) > 0)
-                _leave(" [next]");
+                        break;
-                return;
+                list_del_init(&vl->update);
        }
-        /* run out of servers to try - was the volume rejected? */
+        timeout = vl->update_at - now;
-        if (vlocation->upd_rej_cnt > 0) {
+        if (timeout > 0) {
-                printk("kAFS: Active volume no longer valid '%s'\n",
+                queue_delayed_work(afs_vlocation_update_worker,
-                       vlocation->vldb.name);
+                                   &afs_vlocation_update, timeout * HZ);
-                vlocation->valid = 0;
+                spin_unlock(&afs_vlocation_updates_lock);
-                afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, 0);
+                _leave(" [nothing]");
-                afs_kafstimod_add_timer(&vlocation->upd_timer,
-                                        AFS_VLDB_TIMEOUT);
-                afs_put_vlocation(vlocation);
-                _leave(" [invalidated]");
                return;
        }
-        /* abandon the update */
+        list_del_init(&vl->update);
- abandon:
+        atomic_inc(&vl->usage);
-        afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, ret);
+        spin_unlock(&afs_vlocation_updates_lock);
-        afs_kafstimod_add_timer(&vlocation->upd_timer, HZ * 10);
-        afs_put_vlocation(vlocation);
-        _leave(" [abandoned]");
-} /* end afs_vlocation_update_attend() */
-/*****************************************************************************/
-/*
- * deal with an update operation being discarded
- * - called in kafsasyncd context when it's dying due to rmmod
- * - the call has already been aborted and put()'d
- */
-static void afs_vlocation_update_discard(struct afs_async_op *op)
-{
-        struct afs_vlocation *vlocation =
-                list_entry(op, struct afs_vlocation, upd_op);
-        _enter("%s", vlocation->vldb.name);
+        /* we can now perform the update */
+        _debug("update %s", vl->vldb.name);
+        vl->state = AFS_VL_UPDATING;
+        vl->upd_rej_cnt = 0;
+        vl->upd_busy_cnt = 0;
-        afs_put_server(op->server);
+        ret = afs_vlocation_update_record(vl, NULL, &vldb);
-        op->server = NULL;
+        spin_lock(&vl->lock);
+        switch (ret) {
+        case 0:
+                afs_vlocation_apply_update(vl, &vldb);
+                vl->state = AFS_VL_VALID;
+                wake_up(&vl->waitq);
+                break;
+        case -ENOMEDIUM:
+                vl->state = AFS_VL_VOLUME_DELETED;
+                break;
+        default:
+                vl->state = AFS_VL_UNCERTAIN;
+                break;
+        }
+        spin_unlock(&vl->lock);
-        afs_put_vlocation(vlocation);
+        /* and then reschedule */
+        _debug("reschedule");
+        vl->update_at = get_seconds() + afs_vlocation_update_timeout;
-        _leave("");
+        spin_lock(&afs_vlocation_updates_lock);
-} /* end afs_vlocation_update_discard() */
-/*****************************************************************************/
+        if (!list_empty(&afs_vlocation_updates)) {
-/*
+                /* next update in 10 minutes, but wait at least 1 second more
- * match a VLDB record stored in the cache
+                 * than the newest record already queued so that we don't spam
- * - may also load target from entry
+                 * the VL server suddenly with lots of requests
- */
+                 */
-#ifdef AFS_CACHING_SUPPORT
+                xvl = list_entry(afs_vlocation_updates.prev,
-static cachefs_match_val_t afs_vlocation_cache_match(void *target,
+                                 struct afs_vlocation, update);
-                                                     const void *entry)
+                if (vl->update_at <= xvl->update_at)
-{
+                        vl->update_at = xvl->update_at + 1;
-        const struct afs_cache_vlocation *vldb = entry;
+                xvl = list_entry(afs_vlocation_updates.next,
-        struct afs_vlocation *vlocation = target;
+                                 struct afs_vlocation, update);
+                timeout = xvl->update_at - now;
-        _enter("{%s},{%s}", vlocation->vldb.name, vldb->name);
+                if (timeout < 0)
+                        timeout = 0;
-        if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0
+        } else {
-            ) {
+                timeout = afs_vlocation_update_timeout;
-                if (!vlocation->valid ||
-                    vlocation->vldb.rtime == vldb->rtime
-                    ) {
-                        vlocation->vldb = *vldb;
-                        vlocation->valid = 1;
-                        _leave(" = SUCCESS [c->m]");
-                        return CACHEFS_MATCH_SUCCESS;
-                }
-                /* need to update cache if cached info differs */
-                else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) {
-                        /* delete if VIDs for this name differ */
-                        if (memcmp(&vlocation->vldb.vid,
-                                   &vldb->vid,
-                                   sizeof(vldb->vid)) != 0) {
-                                _leave(" = DELETE");
-                                return CACHEFS_MATCH_SUCCESS_DELETE;
-                        }
-                        _leave(" = UPDATE");
-                        return CACHEFS_MATCH_SUCCESS_UPDATE;
-                }
-                else {
-                        _leave(" = SUCCESS");
-                        return CACHEFS_MATCH_SUCCESS;
-                }
        }
-        _leave(" = FAILED");
+        ASSERT(list_empty(&vl->update));
-        return CACHEFS_MATCH_FAILED;
-} /* end afs_vlocation_cache_match() */
-#endif
-/*****************************************************************************/
-/*
- * update a VLDB record stored in the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_vlocation_cache_update(void *source, void *entry)
-{
-        struct afs_cache_vlocation *vldb = entry;
-        struct afs_vlocation *vlocation = source;
-        _enter("");
+        list_add_tail(&vl->update, &afs_vlocation_updates);
-        *vldb = vlocation->vldb;
-} /* end afs_vlocation_cache_update() */
+        _debug("timeout %ld", timeout);
-#endif
+        queue_delayed_work(afs_vlocation_update_worker,
+                           &afs_vlocation_update, timeout * HZ);
+        spin_unlock(&afs_vlocation_updates_lock);
+        afs_put_vlocation(vl);
+}
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index cf62da5d7825..a1904ab8426a 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -1,6 +1,6 @@
-/* vnode.c: AFS vnode management
+/* AFS vnode management
 *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
@@ -14,142 +14,237 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
-#include <linux/pagemap.h>
-#include "volume.h"
-#include "cell.h"
-#include "cmservice.h"
-#include "fsclient.h"
-#include "vlclient.h"
-#include "vnode.h"
 #include "internal.h"
-static void afs_vnode_cb_timed_out(struct afs_timer *timer);
+#if 0
+static noinline bool dump_tree_aux(struct rb_node *node, struct rb_node *parent,
+                                   int depth, char lr)
+{
+        struct afs_vnode *vnode;
+        bool bad = false;
+        if (!node)
+                return false;
+        if (node->rb_left)
+                bad = dump_tree_aux(node->rb_left, node, depth + 2, '/');
+        vnode = rb_entry(node, struct afs_vnode, cb_promise);
+        _debug("%c %*.*s%c%p {%d}",
+               rb_is_red(node) ? 'R' : 'B',
+               depth, depth, "", lr,
+               vnode, vnode->cb_expires_at);
+        if (rb_parent(node) != parent) {
+                printk("BAD: %p != %p\n", rb_parent(node), parent);
+                bad = true;
+        }
-struct afs_timer_ops afs_vnode_cb_timed_out_ops = {
+        if (node->rb_right)
-        .timed_out      = afs_vnode_cb_timed_out,
+                bad |= dump_tree_aux(node->rb_right, node, depth + 2, '\\');
-};
-#ifdef AFS_CACHING_SUPPORT
+        return bad;
-static cachefs_match_val_t afs_vnode_cache_match(void *target,
+}
-                                                 const void *entry);
-static void afs_vnode_cache_update(void *source, void *entry);
-struct cachefs_index_def afs_vnode_cache_index_def = {
+static noinline void dump_tree(const char *name, struct afs_server *server)
-        .name           = "vnode",
+{
-        .data_size      = sizeof(struct afs_cache_vnode),
+        _enter("%s", name);
-        .keys[0]        = { CACHEFS_INDEX_KEYS_BIN, 4 },
+        if (dump_tree_aux(server->cb_promises.rb_node, NULL, 0, '-'))
-        .match          = afs_vnode_cache_match,
+                BUG();
-        .update         = afs_vnode_cache_update,
+}
-};
 #endif
-/*****************************************************************************/
 /*
- * handle a callback timing out
+ * insert a vnode into the backing server's vnode tree
- * TODO: retain a ref to vnode struct for an outstanding callback timeout
 */
-static void afs_vnode_cb_timed_out(struct afs_timer *timer)
+static void afs_install_vnode(struct afs_vnode *vnode,
+                              struct afs_server *server)
 {
-        struct afs_server *oldserver;
+        struct afs_server *old_server = vnode->server;
-        struct afs_vnode *vnode;
+        struct afs_vnode *xvnode;
+        struct rb_node *parent, **p;
-        vnode = list_entry(timer, struct afs_vnode, cb_timeout);
+        _enter("%p,%p", vnode, server);
-        _enter("%p", vnode);
+        if (old_server) {
+                spin_lock(&old_server->fs_lock);
+                rb_erase(&vnode->server_rb, &old_server->fs_vnodes);
+                spin_unlock(&old_server->fs_lock);
+        }
-        /* set the changed flag in the vnode and release the server */
+        afs_get_server(server);
-        spin_lock(&vnode->lock);
+        vnode->server = server;
+        afs_put_server(old_server);
+        /* insert into the server's vnode tree in FID order */
+        spin_lock(&server->fs_lock);
+        parent = NULL;
+        p = &server->fs_vnodes.rb_node;
+        while (*p) {
+                parent = *p;
+                xvnode = rb_entry(parent, struct afs_vnode, server_rb);
+                if (vnode->fid.vid < xvnode->fid.vid)
+                        p = &(*p)->rb_left;
+                else if (vnode->fid.vid > xvnode->fid.vid)
+                        p = &(*p)->rb_right;
+                else if (vnode->fid.vnode < xvnode->fid.vnode)
+                        p = &(*p)->rb_left;
+                else if (vnode->fid.vnode > xvnode->fid.vnode)
+                        p = &(*p)->rb_right;
+                else if (vnode->fid.unique < xvnode->fid.unique)
+                        p = &(*p)->rb_left;
+                else if (vnode->fid.unique > xvnode->fid.unique)
+                        p = &(*p)->rb_right;
+                else
+                        BUG(); /* can't happen unless afs_iget() malfunctions */
+        }
+        rb_link_node(&vnode->server_rb, parent, p);
+        rb_insert_color(&vnode->server_rb, &server->fs_vnodes);
-        oldserver = xchg(&vnode->cb_server, NULL);
+        spin_unlock(&server->fs_lock);
-        if (oldserver) {
+        _leave("");
-                vnode->flags |= AFS_VNODE_CHANGED;
+}
-                spin_lock(&afs_cb_hash_lock);
+/*
-                list_del_init(&vnode->cb_hash_link);
+ * insert a vnode into the promising server's update/expiration tree
-                spin_unlock(&afs_cb_hash_lock);
+ * - caller must hold vnode->lock
+ */
+static void afs_vnode_note_promise(struct afs_vnode *vnode,
+                                   struct afs_server *server)
+{
+        struct afs_server *old_server;
+        struct afs_vnode *xvnode;
+        struct rb_node *parent, **p;
-                spin_lock(&oldserver->cb_lock);
+        _enter("%p,%p", vnode, server);
-                list_del_init(&vnode->cb_link);
-                spin_unlock(&oldserver->cb_lock);
+        ASSERT(server != NULL);
+        old_server = vnode->server;
+        if (vnode->cb_promised) {
+                if (server == old_server &&
+                    vnode->cb_expires == vnode->cb_expires_at) {
+                        _leave(" [no change]");
+                        return;
+                }
+                spin_lock(&old_server->cb_lock);
+                if (vnode->cb_promised) {
+                        _debug("delete");
+                        rb_erase(&vnode->cb_promise, &old_server->cb_promises);
+                        vnode->cb_promised = false;
+                }
+                spin_unlock(&old_server->cb_lock);
        }
-        spin_unlock(&vnode->lock);
+        if (vnode->server != server)
+                afs_install_vnode(vnode, server);
+        vnode->cb_expires_at = vnode->cb_expires;
+        _debug("PROMISE on %p {%lu}",
+               vnode, (unsigned long) vnode->cb_expires_at);
+        /* abuse an RB-tree to hold the expiration order (we may have multiple
+         * items with the same expiration time) */
+        spin_lock(&server->cb_lock);
+        parent = NULL;
+        p = &server->cb_promises.rb_node;
+        while (*p) {
+                parent = *p;
+                xvnode = rb_entry(parent, struct afs_vnode, cb_promise);
+                if (vnode->cb_expires_at < xvnode->cb_expires_at)
+                        p = &(*p)->rb_left;
+                else
+                        p = &(*p)->rb_right;
+        }
-        afs_put_server(oldserver);
+        rb_link_node(&vnode->cb_promise, parent, p);
+        rb_insert_color(&vnode->cb_promise, &server->cb_promises);
+        vnode->cb_promised = true;
+        spin_unlock(&server->cb_lock);
        _leave("");
-} /* end afs_vnode_cb_timed_out() */
+}
-/*****************************************************************************/
 /*
- * finish off updating the recorded status of a file
+ * handle remote file deletion by discarding the callback promise
+ */
+static void afs_vnode_deleted_remotely(struct afs_vnode *vnode)
+{
+        struct afs_server *server;
+        set_bit(AFS_VNODE_DELETED, &vnode->flags);
+        server = vnode->server;
+        if (vnode->cb_promised) {
+                spin_lock(&server->cb_lock);
+                if (vnode->cb_promised) {
+                        rb_erase(&vnode->cb_promise, &server->cb_promises);
+                        vnode->cb_promised = false;
+                }
+                spin_unlock(&server->cb_lock);
+        }
+        spin_lock(&vnode->server->fs_lock);
+        rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes);
+        spin_unlock(&vnode->server->fs_lock);
+        vnode->server = NULL;
+        afs_put_server(server);
+}
+/*
+ * finish off updating the recorded status of a file after a successful
+ * operation completion
 * - starts callback expiry timer
 * - adds to server's callback list
 */
-static void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
+void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
-                                             struct afs_server *server,
+                                      struct afs_server *server)
-                                             int ret)
 {
        struct afs_server *oldserver = NULL;
-        _enter("%p,%p,%d", vnode, server, ret);
+        _enter("%p,%p", vnode, server);
        spin_lock(&vnode->lock);
+        clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+        afs_vnode_note_promise(vnode, server);
+        vnode->update_cnt--;
+        ASSERTCMP(vnode->update_cnt, >=, 0);
+        spin_unlock(&vnode->lock);
+        wake_up_all(&vnode->update_waitq);
+        afs_put_server(oldserver);
+        _leave("");
+}
-        vnode->flags &= ~AFS_VNODE_CHANGED;
+/*
+ * finish off updating the recorded status of a file after an operation failed
+ */
+static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
+{
+        _enter("%p,%d", vnode, ret);
-        if (ret == 0) {
+        spin_lock(&vnode->lock);
-                /* adjust the callback timeout appropriately */
-                afs_kafstimod_add_timer(&vnode->cb_timeout,
-                                        vnode->cb_expiry * HZ);
-                spin_lock(&afs_cb_hash_lock);
-                list_move_tail(&vnode->cb_hash_link,
-                              &afs_cb_hash(server, &vnode->fid));
-                spin_unlock(&afs_cb_hash_lock);
-                /* swap ref to old callback server with that for new callback
-                 * server */
-                oldserver = xchg(&vnode->cb_server, server);
-                if (oldserver != server) {
-                        if (oldserver) {
-                                spin_lock(&oldserver->cb_lock);
-                                list_del_init(&vnode->cb_link);
-                                spin_unlock(&oldserver->cb_lock);
-                        }
-                        afs_get_server(server);
+        clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-                        spin_lock(&server->cb_lock);
-                        list_add_tail(&vnode->cb_link, &server->cb_promises);
-                        spin_unlock(&server->cb_lock);
-                }
-                else {
-                        /* same server */
-                        oldserver = NULL;
-                }
-        }
-        else if (ret == -ENOENT) {
-                /* the file was deleted - clear the callback timeout */
-                oldserver = xchg(&vnode->cb_server, NULL);
-                afs_kafstimod_del_timer(&vnode->cb_timeout);
+        if (ret == -ENOENT) {
+                /* the file was deleted on the server */
                _debug("got NOENT from server - marking file deleted");
-                vnode->flags |= AFS_VNODE_DELETED;
+                afs_vnode_deleted_remotely(vnode);
        }
        vnode->update_cnt--;
+        ASSERTCMP(vnode->update_cnt, >=, 0);
        spin_unlock(&vnode->lock);
        wake_up_all(&vnode->update_waitq);
-        afs_put_server(oldserver);
        _leave("");
+}
-} /* end afs_vnode_finalise_status_update() */
-/*****************************************************************************/
 /*
 * fetch file status from the volume
 * - don't issue a fetch if:
@@ -157,9 +252,11 @@ static void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
 *   - there are any outstanding ops that will fetch the status
 * - TODO implement local caching
 */
-int afs_vnode_fetch_status(struct afs_vnode *vnode)
+int afs_vnode_fetch_status(struct afs_vnode *vnode,
+                           struct afs_vnode *auth_vnode, struct key *key)
 {
        struct afs_server *server;
+        unsigned long acl_order;
        int ret;
        DECLARE_WAITQUEUE(myself, current);
@@ -168,38 +265,49 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode)
               vnode->volume->vlocation->vldb.name,
               vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
-        if (!(vnode->flags & AFS_VNODE_CHANGED) && vnode->cb_server) {
+        if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
+            vnode->cb_promised) {
                _leave(" [unchanged]");
                return 0;
        }
-        if (vnode->flags & AFS_VNODE_DELETED) {
+        if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
                _leave(" [deleted]");
                return -ENOENT;
        }
+        acl_order = 0;
+        if (auth_vnode)
+                acl_order = auth_vnode->acl_order;
        spin_lock(&vnode->lock);
-        if (!(vnode->flags & AFS_VNODE_CHANGED)) {
+        if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
+            vnode->cb_promised) {
                spin_unlock(&vnode->lock);
                _leave(" [unchanged]");
                return 0;
        }
+        ASSERTCMP(vnode->update_cnt, >=, 0);
        if (vnode->update_cnt > 0) {
                /* someone else started a fetch */
+                _debug("wait on fetch %d", vnode->update_cnt);
                set_current_state(TASK_UNINTERRUPTIBLE);
+                ASSERT(myself.func != NULL);
                add_wait_queue(&vnode->update_waitq, &myself);
                /* wait for the status to be updated */
                for (;;) {
-                        if (!(vnode->flags & AFS_VNODE_CHANGED))
+                        if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
                                break;
-                        if (vnode->flags & AFS_VNODE_DELETED)
+                        if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
                                break;
-                        /* it got updated and invalidated all before we saw
+                        /* check to see if it got updated and invalidated all
-                         * it */
+                         * before we saw it */
                        if (vnode->update_cnt == 0) {
                                remove_wait_queue(&vnode->update_waitq,
                                                  &myself);
@@ -219,10 +327,11 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode)
                spin_unlock(&vnode->lock);
                set_current_state(TASK_RUNNING);
-                return vnode->flags & AFS_VNODE_DELETED ? -ENOENT : 0;
+                return test_bit(AFS_VNODE_DELETED, &vnode->flags) ?
+                        -ENOENT : 0;
        }
- get_anyway:
+get_anyway:
        /* okay... we're going to have to initiate the op */
        vnode->update_cnt++;
@@ -232,39 +341,60 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode)
         * vnode */
        do {
                /* pick a server to query */
-                ret = afs_volume_pick_fileserver(vnode->volume, &server);
+                server = afs_volume_pick_fileserver(vnode);
-                if (ret<0)
+                if (IS_ERR(server))
-                        return ret;
+                        goto no_server;
-                _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+                _debug("USING SERVER: %p{%08x}",
+                       server, ntohl(server->addr.s_addr));
-                ret = afs_rxfs_fetch_file_status(server, vnode, NULL);
+                ret = afs_fs_fetch_file_status(server, key, vnode, NULL,
+                                               &afs_sync_call);
-        } while (!afs_volume_release_fileserver(vnode->volume, server, ret));
+        } while (!afs_volume_release_fileserver(vnode, server, ret));
        /* adjust the flags */
-        afs_vnode_finalise_status_update(vnode, server, ret);
+        if (ret == 0) {
+                _debug("adjust");
+                if (auth_vnode)
+                        afs_cache_permit(vnode, key, acl_order);
+                afs_vnode_finalise_status_update(vnode, server);
+                afs_put_server(server);
+        } else {
+                _debug("failed [%d]", ret);
+                afs_vnode_status_update_failed(vnode, ret);
+        }
-        _leave(" = %d", ret);
+        ASSERTCMP(vnode->update_cnt, >=, 0);
+        _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
        return ret;
-} /* end afs_vnode_fetch_status() */
-/*****************************************************************************/
+no_server:
+        spin_lock(&vnode->lock);
+        vnode->update_cnt--;
+        ASSERTCMP(vnode->update_cnt, >=, 0);
+        spin_unlock(&vnode->lock);
+        _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
+        return PTR_ERR(server);
+}
 /*
 * fetch file data from the volume
- * - TODO implement caching and server failover
+ * - TODO implement caching
 */
-int afs_vnode_fetch_data(struct afs_vnode *vnode,
+int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
-                         struct afs_rxfs_fetch_descriptor *desc)
+                         off_t offset, size_t length, struct page *page)
 {
        struct afs_server *server;
        int ret;
-        _enter("%s,{%u,%u,%u}",
+        _enter("%s{%u,%u,%u},%x,,,",
               vnode->volume->vlocation->vldb.name,
               vnode->fid.vid,
               vnode->fid.vnode,
-               vnode->fid.unique);
+               vnode->fid.unique,
+               key_serial(key));
        /* this op will fetch the status */
        spin_lock(&vnode->lock);
@@ -275,120 +405,351 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode,
         * vnode */
        do {
                /* pick a server to query */
-                ret = afs_volume_pick_fileserver(vnode->volume, &server);
+                server = afs_volume_pick_fileserver(vnode);
-                if (ret < 0)
+                if (IS_ERR(server))
-                        return ret;
+                        goto no_server;
                _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-                ret = afs_rxfs_fetch_file_data(server, vnode, desc, NULL);
+                ret = afs_fs_fetch_data(server, key, vnode, offset, length,
+                                        page, &afs_sync_call);
-        } while (!afs_volume_release_fileserver(vnode->volume, server, ret));
+        } while (!afs_volume_release_fileserver(vnode, server, ret));
        /* adjust the flags */
-        afs_vnode_finalise_status_update(vnode, server, ret);
+        if (ret == 0) {
+                afs_vnode_finalise_status_update(vnode, server);
+                afs_put_server(server);
+        } else {
+                afs_vnode_status_update_failed(vnode, ret);
+        }
        _leave(" = %d", ret);
        return ret;
-} /* end afs_vnode_fetch_data() */
+no_server:
+        spin_lock(&vnode->lock);
+        vnode->update_cnt--;
+        ASSERTCMP(vnode->update_cnt, >=, 0);
+        spin_unlock(&vnode->lock);
+        return PTR_ERR(server);
+}
-/*****************************************************************************/
 /*
- * break any outstanding callback on a vnode
+ * make a file or a directory
- * - only relevent to server that issued it
 */
-int afs_vnode_give_up_callback(struct afs_vnode *vnode)
+int afs_vnode_create(struct afs_vnode *vnode, struct key *key,
+                     const char *name, umode_t mode, struct afs_fid *newfid,
+                     struct afs_file_status *newstatus,
+                     struct afs_callback *newcb, struct afs_server **_server)
 {
        struct afs_server *server;
        int ret;
-        _enter("%s,{%u,%u,%u}",
+        _enter("%s{%u,%u,%u},%x,%s,,",
               vnode->volume->vlocation->vldb.name,
               vnode->fid.vid,
               vnode->fid.vnode,
-               vnode->fid.unique);
+               vnode->fid.unique,
+               key_serial(key),
-        spin_lock(&afs_cb_hash_lock);
+               name);
-        list_del_init(&vnode->cb_hash_link);
-        spin_unlock(&afs_cb_hash_lock);
-        /* set the changed flag in the vnode and release the server */
+        /* this op will fetch the status on the directory we're creating in */
        spin_lock(&vnode->lock);
+        vnode->update_cnt++;
+        spin_unlock(&vnode->lock);
-        afs_kafstimod_del_timer(&vnode->cb_timeout);
+        do {
+                /* pick a server to query */
+                server = afs_volume_pick_fileserver(vnode);
+                if (IS_ERR(server))
+                        goto no_server;
+                _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-        server = xchg(&vnode->cb_server, NULL);
+                ret = afs_fs_create(server, key, vnode, name, mode, newfid,
-        if (server) {
+                                    newstatus, newcb, &afs_sync_call);
-                vnode->flags |= AFS_VNODE_CHANGED;
-                spin_lock(&server->cb_lock);
+        } while (!afs_volume_release_fileserver(vnode, server, ret));
-                list_del_init(&vnode->cb_link);
-                spin_unlock(&server->cb_lock);
+        /* adjust the flags */
+        if (ret == 0) {
+                afs_vnode_finalise_status_update(vnode, server);
+                *_server = server;
+        } else {
+                afs_vnode_status_update_failed(vnode, ret);
+                *_server = NULL;
        }
+        _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
+        return ret;
+no_server:
+        spin_lock(&vnode->lock);
+        vnode->update_cnt--;
+        ASSERTCMP(vnode->update_cnt, >=, 0);
        spin_unlock(&vnode->lock);
+        _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
+        return PTR_ERR(server);
+}
-        ret = 0;
+/*
-        if (server) {
+ * remove a file or directory
-                ret = afs_rxfs_give_up_callback(server, vnode);
+ */
+int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name,
+                     bool isdir)
+{
+        struct afs_server *server;
+        int ret;
+        _enter("%s{%u,%u,%u},%x,%s",
+               vnode->volume->vlocation->vldb.name,
+               vnode->fid.vid,
+               vnode->fid.vnode,
+               vnode->fid.unique,
+               key_serial(key),
+               name);
+        /* this op will fetch the status on the directory we're removing from */
+        spin_lock(&vnode->lock);
+        vnode->update_cnt++;
+        spin_unlock(&vnode->lock);
+        do {
+                /* pick a server to query */
+                server = afs_volume_pick_fileserver(vnode);
+                if (IS_ERR(server))
+                        goto no_server;
+                _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+                ret = afs_fs_remove(server, key, vnode, name, isdir,
+                                    &afs_sync_call);
+        } while (!afs_volume_release_fileserver(vnode, server, ret));
+        /* adjust the flags */
+        if (ret == 0) {
+                afs_vnode_finalise_status_update(vnode, server);
                afs_put_server(server);
+        } else {
+                afs_vnode_status_update_failed(vnode, ret);
        }
-        _leave(" = %d", ret);
+        _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
        return ret;
-} /* end afs_vnode_give_up_callback() */
-/*****************************************************************************/
+no_server:
+        spin_lock(&vnode->lock);
+        vnode->update_cnt--;
+        ASSERTCMP(vnode->update_cnt, >=, 0);
+        spin_unlock(&vnode->lock);
+        _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
+        return PTR_ERR(server);
+}
 /*
- * match a vnode record stored in the cache
+ * create a hard link
 */
-#ifdef AFS_CACHING_SUPPORT
+extern int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
-static cachefs_match_val_t afs_vnode_cache_match(void *target,
+                          struct key *key, const char *name)
-                                                 const void *entry)
 {
-        const struct afs_cache_vnode *cvnode = entry;
+        struct afs_server *server;
-        struct afs_vnode *vnode = target;
+        int ret;
-        _enter("{%x,%x,%Lx},{%x,%x,%Lx}",
+        _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s",
+               dvnode->volume->vlocation->vldb.name,
+               dvnode->fid.vid,
+               dvnode->fid.vnode,
+               dvnode->fid.unique,
+               vnode->volume->vlocation->vldb.name,
+               vnode->fid.vid,
               vnode->fid.vnode,
               vnode->fid.unique,
-               vnode->status.version,
+               key_serial(key),
-               cvnode->vnode_id,
+               name);
-               cvnode->vnode_unique,
-               cvnode->data_version);
+        /* this op will fetch the status on the directory we're removing from */
+        spin_lock(&vnode->lock);
-        if (vnode->fid.vnode != cvnode->vnode_id) {
+        vnode->update_cnt++;
-                _leave(" = FAILED");
+        spin_unlock(&vnode->lock);
-                return CACHEFS_MATCH_FAILED;
+        spin_lock(&dvnode->lock);
+        dvnode->update_cnt++;
+        spin_unlock(&dvnode->lock);
+        do {
+                /* pick a server to query */
+                server = afs_volume_pick_fileserver(dvnode);
+                if (IS_ERR(server))
+                        goto no_server;
+                _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+                ret = afs_fs_link(server, key, dvnode, vnode, name,
+                                  &afs_sync_call);
+        } while (!afs_volume_release_fileserver(dvnode, server, ret));
+        /* adjust the flags */
+        if (ret == 0) {
+                afs_vnode_finalise_status_update(vnode, server);
+                afs_vnode_finalise_status_update(dvnode, server);
+                afs_put_server(server);
+        } else {
+                afs_vnode_status_update_failed(vnode, ret);
+                afs_vnode_status_update_failed(dvnode, ret);
        }
-        if (vnode->fid.unique != cvnode->vnode_unique ||
+        _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
-            vnode->status.version != cvnode->data_version) {
+        return ret;
-                _leave(" = DELETE");
-                return CACHEFS_MATCH_SUCCESS_DELETE;
+no_server:
+        spin_lock(&vnode->lock);
+        vnode->update_cnt--;
+        ASSERTCMP(vnode->update_cnt, >=, 0);
+        spin_unlock(&vnode->lock);
+        spin_lock(&dvnode->lock);
+        dvnode->update_cnt--;
+        ASSERTCMP(dvnode->update_cnt, >=, 0);
+        spin_unlock(&dvnode->lock);
+        _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
+        return PTR_ERR(server);
+}
+/*
+ * create a symbolic link
+ */
+int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key,
+                      const char *name, const char *content,
+                      struct afs_fid *newfid,
+                      struct afs_file_status *newstatus,
+                      struct afs_server **_server)
+{
+        struct afs_server *server;
+        int ret;
+        _enter("%s{%u,%u,%u},%x,%s,%s,,,",
+               vnode->volume->vlocation->vldb.name,
+               vnode->fid.vid,
+               vnode->fid.vnode,
+               vnode->fid.unique,
+               key_serial(key),
+               name, content);
+        /* this op will fetch the status on the directory we're creating in */
+        spin_lock(&vnode->lock);
+        vnode->update_cnt++;
+        spin_unlock(&vnode->lock);
+        do {
+                /* pick a server to query */
+                server = afs_volume_pick_fileserver(vnode);
+                if (IS_ERR(server))
+                        goto no_server;
+                _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+                ret = afs_fs_symlink(server, key, vnode, name, content,
+                                     newfid, newstatus, &afs_sync_call);
+        } while (!afs_volume_release_fileserver(vnode, server, ret));
+        /* adjust the flags */
+        if (ret == 0) {
+                afs_vnode_finalise_status_update(vnode, server);
+                *_server = server;
+        } else {
+                afs_vnode_status_update_failed(vnode, ret);
+                *_server = NULL;
        }
-        _leave(" = SUCCESS");
+        _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
-        return CACHEFS_MATCH_SUCCESS;
+        return ret;
-} /* end afs_vnode_cache_match() */
-#endif
+no_server:
+        spin_lock(&vnode->lock);
+        vnode->update_cnt--;
+        ASSERTCMP(vnode->update_cnt, >=, 0);
+        spin_unlock(&vnode->lock);
+        _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
+        return PTR_ERR(server);
+}
-/*****************************************************************************/
 /*
- * update a vnode record stored in the cache
+ * rename a file
 */
-#ifdef AFS_CACHING_SUPPORT
+int afs_vnode_rename(struct afs_vnode *orig_dvnode,
-static void afs_vnode_cache_update(void *source, void *entry)
+                     struct afs_vnode *new_dvnode,
+                     struct key *key,
+                     const char *orig_name,
+                     const char *new_name)
 {
-        struct afs_cache_vnode *cvnode = entry;
+        struct afs_server *server;
-        struct afs_vnode *vnode = source;
+        int ret;
-        _enter("");
+        _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s,%s",
+               orig_dvnode->volume->vlocation->vldb.name,
+               orig_dvnode->fid.vid,
+               orig_dvnode->fid.vnode,
+               orig_dvnode->fid.unique,
+               new_dvnode->volume->vlocation->vldb.name,
+               new_dvnode->fid.vid,
+               new_dvnode->fid.vnode,
+               new_dvnode->fid.unique,
+               key_serial(key),
+               orig_name,
+               new_name);
+        /* this op will fetch the status on both the directories we're dealing
+         * with */
+        spin_lock(&orig_dvnode->lock);
+        orig_dvnode->update_cnt++;
+        spin_unlock(&orig_dvnode->lock);
+        if (new_dvnode != orig_dvnode) {
+                spin_lock(&new_dvnode->lock);
+                new_dvnode->update_cnt++;
+                spin_unlock(&new_dvnode->lock);
+        }
-        cvnode->vnode_id        = vnode->fid.vnode;
+        do {
-        cvnode->vnode_unique    = vnode->fid.unique;
+                /* pick a server to query */
-        cvnode->data_version    = vnode->status.version;
+                server = afs_volume_pick_fileserver(orig_dvnode);
+                if (IS_ERR(server))
+                        goto no_server;
-} /* end afs_vnode_cache_update() */
+                _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-#endif
+                ret = afs_fs_rename(server, key, orig_dvnode, orig_name,
+                                    new_dvnode, new_name, &afs_sync_call);
+        } while (!afs_volume_release_fileserver(orig_dvnode, server, ret));
+        /* adjust the flags */
+        if (ret == 0) {
+                afs_vnode_finalise_status_update(orig_dvnode, server);
+                if (new_dvnode != orig_dvnode)
+                        afs_vnode_finalise_status_update(new_dvnode, server);
+                afs_put_server(server);
+        } else {
+                afs_vnode_status_update_failed(orig_dvnode, ret);
+                if (new_dvnode != orig_dvnode)
+                        afs_vnode_status_update_failed(new_dvnode, ret);
+        }
+        _leave(" = %d [cnt %d]", ret, orig_dvnode->update_cnt);
+        return ret;
+no_server:
+        spin_lock(&orig_dvnode->lock);
+        orig_dvnode->update_cnt--;
+        ASSERTCMP(orig_dvnode->update_cnt, >=, 0);
+        spin_unlock(&orig_dvnode->lock);
+        if (new_dvnode != orig_dvnode) {
+                spin_lock(&new_dvnode->lock);
+                new_dvnode->update_cnt--;
+                ASSERTCMP(new_dvnode->update_cnt, >=, 0);
+                spin_unlock(&new_dvnode->lock);
+        }
+        _leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt);
+        return PTR_ERR(server);
+}
diff --git a/fs/afs/vnode.h b/fs/afs/vnode.h
deleted file mode 100644
index b86a97102e8b..000000000000
--- a/fs/afs/vnode.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* vnode.h: AFS vnode record
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _LINUX_AFS_VNODE_H
-#define _LINUX_AFS_VNODE_H
-#include <linux/fs.h>
-#include "server.h"
-#include "kafstimod.h"
-#include "cache.h"
-#ifdef __KERNEL__
-struct afs_rxfs_fetch_descriptor;
-/*****************************************************************************/
-/*
- * vnode catalogue entry
- */
-struct afs_cache_vnode
-{
-        afs_vnodeid_t           vnode_id;       /* vnode ID */
-        unsigned                vnode_unique;   /* vnode ID uniquifier */
-        afs_dataversion_t       data_version;   /* data version */
-};
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_vnode_cache_index_def;
-#endif
-/*****************************************************************************/
-/*
- * AFS inode private data
- */
-struct afs_vnode
-{
-        struct inode            vfs_inode;      /* the VFS's inode record */
-        struct afs_volume       *volume;        /* volume on which vnode resides */
-        struct afs_fid          fid;            /* the file identifier for this inode */
-        struct afs_file_status  status;         /* AFS status info for this file */
-#ifdef AFS_CACHING_SUPPORT
-        struct cachefs_cookie   *cache;         /* caching cookie */
-#endif
-        wait_queue_head_t       update_waitq;   /* status fetch waitqueue */
-        unsigned                update_cnt;     /* number of outstanding ops that will update the
-                                                 * status */
-        spinlock_t              lock;           /* waitqueue/flags lock */
-        unsigned                flags;
-#define AFS_VNODE_CHANGED       0x00000001      /* set if vnode reported changed by callback */
-#define AFS_VNODE_DELETED       0x00000002      /* set if vnode deleted on server */
-#define AFS_VNODE_MOUNTPOINT    0x00000004      /* set if vnode is a mountpoint symlink */
-        /* outstanding callback notification on this file */
-        struct afs_server       *cb_server;     /* server that made the current promise */
-        struct list_head        cb_link;        /* link in server's promises list */
-        struct list_head        cb_hash_link;   /* link in master callback hash */
-        struct afs_timer        cb_timeout;     /* timeout on promise */
-        unsigned                cb_version;     /* callback version */
-        unsigned                cb_expiry;      /* callback expiry time */
-        afs_callback_type_t     cb_type;        /* type of callback */
-};
-static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
-{
-        return container_of(inode,struct afs_vnode,vfs_inode);
-}
-static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
-{
-        return &vnode->vfs_inode;
-}
-extern int afs_vnode_fetch_status(struct afs_vnode *vnode);
-extern int afs_vnode_fetch_data(struct afs_vnode *vnode,
-                                struct afs_rxfs_fetch_descriptor *desc);
-extern int afs_vnode_give_up_callback(struct afs_vnode *vnode);
-extern struct afs_timer_ops afs_vnode_cb_timed_out_ops;
-#endif /* __KERNEL__ */
-#endif /* _LINUX_AFS_VNODE_H */
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 768c6dbd323a..dd160cada45d 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -1,6 +1,6 @@
-/* volume.c: AFS volume management
+/* AFS volume management
 *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
@@ -15,35 +15,10 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include "volume.h"
-#include "vnode.h"
-#include "cell.h"
-#include "cache.h"
-#include "cmservice.h"
-#include "fsclient.h"
-#include "vlclient.h"
 #include "internal.h"
-#ifdef __KDEBUG
 static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
-#endif
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_volume_cache_match(void *target,
-                                                  const void *entry);
-static void afs_volume_cache_update(void *source, void *entry);
-struct cachefs_index_def afs_volume_cache_index_def = {
-        .name           = "volume",
-        .data_size      = sizeof(struct afs_cache_vhash),
-        .keys[0]        = { CACHEFS_INDEX_KEYS_BIN, 1 },
-        .keys[1]        = { CACHEFS_INDEX_KEYS_BIN, 1 },
-        .match          = afs_volume_cache_match,
-        .update         = afs_volume_cache_update,
-};
-#endif
-/*****************************************************************************/
 /*
 * lookup a volume by name
 * - this can be one of the following:
@@ -66,118 +41,52 @@ struct cachefs_index_def afs_volume_cache_index_def = {
 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
 *           explicitly told otherwise
 */
-int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
+struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
-                      struct afs_volume **_volume)
 {
        struct afs_vlocation *vlocation = NULL;
        struct afs_volume *volume = NULL;
-        afs_voltype_t type;
+        struct afs_server *server = NULL;
-        const char *cellname, *volname, *suffix;
        char srvtmask;
-        int force, ret, loop, cellnamesz, volnamesz;
+        int ret, loop;
-        _enter("%s,,%d,", name, rwpath);
-        if (!name || (name[0] != '%' && name[0] != '#') || !name[1]) {
-                printk("kAFS: unparsable volume name\n");
-                return -EINVAL;
-        }
-        /* determine the type of volume we're looking for */
-        force = 0;
-        type = AFSVL_ROVOL;
-        if (rwpath || name[0] == '%') {
-                type = AFSVL_RWVOL;
-                force = 1;
-        }
-        suffix = strrchr(name, '.');
-        if (suffix) {
-                if (strcmp(suffix, ".readonly") == 0) {
-                        type = AFSVL_ROVOL;
-                        force = 1;
-                }
-                else if (strcmp(suffix, ".backup") == 0) {
-                        type = AFSVL_BACKVOL;
-                        force = 1;
-                }
-                else if (suffix[1] == 0) {
-                }
-                else {
-                        suffix = NULL;
-                }
-        }
-        /* split the cell and volume names */
+        _enter("{%*.*s,%d}",
-        name++;
+               params->volnamesz, params->volnamesz, params->volname, params->rwpath);
-        volname = strchr(name, ':');
-        if (volname) {
-                cellname = name;
-                cellnamesz = volname - name;
-                volname++;
-        }
-        else {
-                volname = name;
-                cellname = NULL;
-                cellnamesz = 0;
-        }
-        volnamesz = suffix ? suffix - volname : strlen(volname);
-        _debug("CELL:%*.*s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
-               cellnamesz, cellnamesz, cellname ?: "", cell,
-               volnamesz, volnamesz, volname, suffix ?: "-",
-               type,
-               force ? " FORCE" : "");
-        /* lookup the cell record */
-        if (cellname || !cell) {
-                ret = afs_cell_lookup(cellname, cellnamesz, &cell);
-                if (ret<0) {
-                        printk("kAFS: unable to lookup cell '%s'\n",
-                               cellname ?: "");
-                        goto error;
-                }
-        }
-        else {
-                afs_get_cell(cell);
-        }
        /* lookup the volume location record */
-        ret = afs_vlocation_lookup(cell, volname, volnamesz, &vlocation);
+        vlocation = afs_vlocation_lookup(params->cell, params->key,
-        if (ret < 0)
+                                         params->volname, params->volnamesz);
+        if (IS_ERR(vlocation)) {
+                ret = PTR_ERR(vlocation);
+                vlocation = NULL;
                goto error;
+        }
        /* make the final decision on the type we want */
        ret = -ENOMEDIUM;
-        if (force && !(vlocation->vldb.vidmask & (1 << type)))
+        if (params->force && !(vlocation->vldb.vidmask & (1 << params->type)))
                goto error;
        srvtmask = 0;
        for (loop = 0; loop < vlocation->vldb.nservers; loop++)
                srvtmask |= vlocation->vldb.srvtmask[loop];
-        if (force) {
+        if (params->force) {
-                if (!(srvtmask & (1 << type)))
+                if (!(srvtmask & (1 << params->type)))
                        goto error;
-        }
+        } else if (srvtmask & AFS_VOL_VTM_RO) {
-        else if (srvtmask & AFS_VOL_VTM_RO) {
+                params->type = AFSVL_ROVOL;
-                type = AFSVL_ROVOL;
+        } else if (srvtmask & AFS_VOL_VTM_RW) {
-        }
+                params->type = AFSVL_RWVOL;
-        else if (srvtmask & AFS_VOL_VTM_RW) {
+        } else {
-                type = AFSVL_RWVOL;
-        }
-        else {
                goto error;
        }
-        down_write(&cell->vl_sem);
+        down_write(&params->cell->vl_sem);
        /* is the volume already active? */
-        if (vlocation->vols[type]) {
+        if (vlocation->vols[params->type]) {
                /* yes - re-use it */
-                volume = vlocation->vols[type];
+                volume = vlocation->vols[params->type];
                afs_get_volume(volume);
                goto success;
        }
@@ -191,23 +100,24 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
                goto error_up;
        atomic_set(&volume->usage, 1);
-        volume->type            = type;
+        volume->type            = params->type;
-        volume->type_force      = force;
+        volume->type_force      = params->force;
-        volume->cell            = cell;
+        volume->cell            = params->cell;
-        volume->vid             = vlocation->vldb.vid[type];
+        volume->vid             = vlocation->vldb.vid[params->type];
        init_rwsem(&volume->server_sem);
        /* look up all the applicable server records */
        for (loop = 0; loop < 8; loop++) {
                if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
-                        ret = afs_server_lookup(
+                        server = afs_lookup_server(
-                                volume->cell,
+                               volume->cell, &vlocation->vldb.servers[loop]);
-                                &vlocation->vldb.servers[loop],
+                        if (IS_ERR(server)) {
-                                &volume->servers[volume->nservers]);
+                                ret = PTR_ERR(server);
-                        if (ret < 0)
                                goto error_discard;
+                        }
+                        volume->servers[volume->nservers] = server;
                        volume->nservers++;
                }
        }
@@ -223,35 +133,34 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
        afs_get_vlocation(vlocation);
        volume->vlocation = vlocation;
-        vlocation->vols[type] = volume;
+        vlocation->vols[volume->type] = volume;
- success:
+success:
        _debug("kAFS selected %s volume %08x",
               afs_voltypes[volume->type], volume->vid);
-        *_volume = volume;
+        up_write(&params->cell->vl_sem);
-        ret = 0;
+        afs_put_vlocation(vlocation);
+        _leave(" = %p", volume);
+        return volume;
        /* clean up */
- error_up:
+error_up:
-        up_write(&cell->vl_sem);
+        up_write(&params->cell->vl_sem);
- error:
+error:
        afs_put_vlocation(vlocation);
-        afs_put_cell(cell);
+        _leave(" = %d", ret);
+        return ERR_PTR(ret);
-        _leave(" = %d (%p)", ret, volume);
-        return ret;
- error_discard:
+error_discard:
-        up_write(&cell->vl_sem);
+        up_write(&params->cell->vl_sem);
        for (loop = volume->nservers - 1; loop >= 0; loop--)
                afs_put_server(volume->servers[loop]);
        kfree(volume);
        goto error;
-} /* end afs_volume_lookup() */
+}
-/*****************************************************************************/
 /*
 * destroy a volume record
 */
@@ -265,10 +174,9 @@ void afs_put_volume(struct afs_volume *volume)
        _enter("%p", volume);
-        vlocation = volume->vlocation;
+        ASSERTCMP(atomic_read(&volume->usage), >, 0);
-        /* sanity check */
+        vlocation = volume->vlocation;
-        BUG_ON(atomic_read(&volume->usage) <= 0);
        /* to prevent a race, the decrement and the dequeue must be effectively
         * atomic */
@@ -296,21 +204,27 @@ void afs_put_volume(struct afs_volume *volume)
        kfree(volume);
        _leave(" [destroyed]");
-} /* end afs_put_volume() */
+}
-/*****************************************************************************/
 /*
 * pick a server to use to try accessing this volume
 * - returns with an elevated usage count on the server chosen
 */
-int afs_volume_pick_fileserver(struct afs_volume *volume,
+struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
-                               struct afs_server **_server)
 {
+        struct afs_volume *volume = vnode->volume;
        struct afs_server *server;
        int ret, state, loop;
        _enter("%s", volume->vlocation->vldb.name);
+        /* stick with the server we're already using if we can */
+        if (vnode->server && vnode->server->fs_state == 0) {
+                afs_get_server(vnode->server);
+                _leave(" = %p [current]", vnode->server);
+                return vnode->server;
+        }
        down_read(&volume->server_sem);
        /* handle the no-server case */
@@ -318,7 +232,7 @@ int afs_volume_pick_fileserver(struct afs_volume *volume,
                ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
                up_read(&volume->server_sem);
                _leave(" = %d [no servers]", ret);
-                return ret;
+                return ERR_PTR(ret);
        }
        /* basically, just search the list for the first live server and use
@@ -328,15 +242,16 @@ int afs_volume_pick_fileserver(struct afs_volume *volume,
                server = volume->servers[loop];
                state = server->fs_state;
+                _debug("consider %d [%d]", loop, state);
                switch (state) {
                        /* found an apparently healthy server */
                case 0:
                        afs_get_server(server);
                        up_read(&volume->server_sem);
-                        *_server = server;
+                        _leave(" = %p (picked %08x)",
-                        _leave(" = 0 (picked %08x)",
+                               server, ntohl(server->addr.s_addr));
-                               ntohl(server->addr.s_addr));
+                        return server;
-                        return 0;
                case -ENETUNREACH:
                        if (ret == 0)
@@ -372,20 +287,21 @@ int afs_volume_pick_fileserver(struct afs_volume *volume,
         */
        up_read(&volume->server_sem);
        _leave(" = %d", ret);
-        return ret;
+        return ERR_PTR(ret);
-} /* end afs_volume_pick_fileserver() */
+}
-/*****************************************************************************/
 /*
 * release a server after use
 * - releases the ref on the server struct that was acquired by picking
 * - records result of using a particular server to access a volume
 * - return 0 to try again, 1 if okay or to issue error
+ * - the caller must release the server struct if result was 0
 */
-int afs_volume_release_fileserver(struct afs_volume *volume,
+int afs_volume_release_fileserver(struct afs_vnode *vnode,
                                  struct afs_server *server,
                                  int result)
 {
+        struct afs_volume *volume = vnode->volume;
        unsigned loop;
        _enter("%s,%08x,%d",
@@ -396,14 +312,16 @@ int afs_volume_release_fileserver(struct afs_volume *volume,
                /* success */
        case 0:
                server->fs_act_jif = jiffies;
-                break;
+                server->fs_state = 0;
+                _leave("");
+                return 1;
                /* the fileserver denied all knowledge of the volume */
        case -ENOMEDIUM:
                server->fs_act_jif = jiffies;
                down_write(&volume->server_sem);
-                /* first, find where the server is in the active list (if it
+                /* firstly, find where the server is in the active list (if it
                 * is) */
                for (loop = 0; loop < volume->nservers; loop++)
                        if (volume->servers[loop] == server)
@@ -441,6 +359,7 @@ int afs_volume_release_fileserver(struct afs_volume *volume,
        case -ENETUNREACH:
        case -EHOSTUNREACH:
        case -ECONNREFUSED:
+        case -ETIME:
        case -ETIMEDOUT:
        case -EREMOTEIO:
                /* mark the server as dead
@@ -460,60 +379,17 @@ int afs_volume_release_fileserver(struct afs_volume *volume,
                server->fs_act_jif = jiffies;
        case -ENOMEM:
        case -ENONET:
-                break;
+                /* tell the caller to accept the result */
+                afs_put_server(server);
+                _leave(" [local failure]");
+                return 1;
        }
-        /* tell the caller to accept the result */
-        afs_put_server(server);
-        _leave("");
-        return 1;
        /* tell the caller to loop around and try the next server */
- try_next_server_upw:
+try_next_server_upw:
        up_write(&volume->server_sem);
- try_next_server:
+try_next_server:
        afs_put_server(server);
        _leave(" [try next server]");
        return 0;
+}
-} /* end afs_volume_release_fileserver() */
-/*****************************************************************************/
-/*
- * match a volume hash record stored in the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_volume_cache_match(void *target,
-                                                  const void *entry)
-{
-        const struct afs_cache_vhash *vhash = entry;
-        struct afs_volume *volume = target;
-        _enter("{%u},{%u}", volume->type, vhash->vtype);
-        if (volume->type == vhash->vtype) {
-                _leave(" = SUCCESS");
-                return CACHEFS_MATCH_SUCCESS;
-        }
-        _leave(" = FAILED");
-        return CACHEFS_MATCH_FAILED;
-} /* end afs_volume_cache_match() */
-#endif
-/*****************************************************************************/
-/*
- * update a volume hash record stored in the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_volume_cache_update(void *source, void *entry)
-{
-        struct afs_cache_vhash *vhash = entry;
-        struct afs_volume *volume = source;
-        _enter("");
-        vhash->vtype = volume->type;
-} /* end afs_volume_cache_update() */
-#endif
diff --git a/fs/afs/volume.h b/fs/afs/volume.h
deleted file mode 100644
index bfdcf19ba3f3..000000000000
--- a/fs/afs/volume.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/* volume.h: AFS volume management
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _LINUX_AFS_VOLUME_H
-#define _LINUX_AFS_VOLUME_H
-#include "types.h"
-#include "fsclient.h"
-#include "kafstimod.h"
-#include "kafsasyncd.h"
-#include "cache.h"
-typedef enum {
-        AFS_VLUPD_SLEEP,                /* sleeping waiting for update timer to fire */
-        AFS_VLUPD_PENDING,              /* on pending queue */
-        AFS_VLUPD_INPROGRESS,           /* op in progress */
-        AFS_VLUPD_BUSYSLEEP,            /* sleeping because server returned EBUSY */
-        
-} __attribute__((packed)) afs_vlocation_upd_t;
-/*****************************************************************************/
-/*
- * entry in the cached volume location catalogue
- */
-struct afs_cache_vlocation
-{
-        uint8_t                 name[64];       /* volume name (lowercase, padded with NULs) */
-        uint8_t                 nservers;       /* number of entries used in servers[] */
-        uint8_t                 vidmask;        /* voltype mask for vid[] */
-        uint8_t                 srvtmask[8];    /* voltype masks for servers[] */
-#define AFS_VOL_VTM_RW  0x01 /* R/W version of the volume is available (on this server) */
-#define AFS_VOL_VTM_RO  0x02 /* R/O version of the volume is available (on this server) */
-#define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */
-        afs_volid_t             vid[3];         /* volume IDs for R/W, R/O and Bak volumes */
-        struct in_addr          servers[8];     /* fileserver addresses */
-        time_t                  rtime;          /* last retrieval time */
-};
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_vlocation_cache_index_def;
-#endif
-/*****************************************************************************/
-/*
- * volume -> vnode hash table entry
- */
-struct afs_cache_vhash
-{
-        afs_voltype_t           vtype;          /* which volume variation */
-        uint8_t                 hash_bucket;    /* which hash bucket this represents */
-} __attribute__((packed));
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_volume_cache_index_def;
-#endif
-/*****************************************************************************/
-/*
- * AFS volume location record
- */
-struct afs_vlocation
-{
-        atomic_t                usage;
-        struct list_head        link;           /* link in cell volume location list */
-        struct afs_timer        timeout;        /* decaching timer */
-        struct afs_cell         *cell;          /* cell to which volume belongs */
-#ifdef AFS_CACHING_SUPPORT
-        struct cachefs_cookie   *cache;         /* caching cookie */
-#endif
-        struct afs_cache_vlocation vldb;        /* volume information DB record */
-        struct afs_volume       *vols[3];       /* volume access record pointer (index by type) */
-        rwlock_t                lock;           /* access lock */
-        unsigned long           read_jif;       /* time at which last read from vlserver */
-        struct afs_timer        upd_timer;      /* update timer */
-        struct afs_async_op     upd_op;         /* update operation */
-        afs_vlocation_upd_t     upd_state;      /* update state */
-        unsigned short          upd_first_svix; /* first server index during update */
-        unsigned short          upd_curr_svix;  /* current server index during update */
-        unsigned short          upd_rej_cnt;    /* ENOMEDIUM count during update */
-        unsigned short          upd_busy_cnt;   /* EBUSY count during update */
-        unsigned short          valid;          /* T if valid */
-};
-extern int afs_vlocation_lookup(struct afs_cell *cell,
-                                const char *name,
-                                unsigned namesz,
-                                struct afs_vlocation **_vlocation);
-#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
-extern void afs_put_vlocation(struct afs_vlocation *vlocation);
-extern void afs_vlocation_do_timeout(struct afs_vlocation *vlocation);
-/*****************************************************************************/
-/*
- * AFS volume access record
- */
-struct afs_volume
-{
-        atomic_t                usage;
-        struct afs_cell         *cell;          /* cell to which belongs (unrefd ptr) */
-        struct afs_vlocation    *vlocation;     /* volume location */
-#ifdef AFS_CACHING_SUPPORT
-        struct cachefs_cookie   *cache;         /* caching cookie */
-#endif
-        afs_volid_t             vid;            /* volume ID */
-        afs_voltype_t           type;           /* type of volume */
-        char                    type_force;     /* force volume type (suppress R/O -> R/W) */
-        unsigned short          nservers;       /* number of server slots filled */
-        unsigned short          rjservers;      /* number of servers discarded due to -ENOMEDIUM */
-        struct afs_server       *servers[8];    /* servers on which volume resides (ordered) */
-        struct rw_semaphore     server_sem;     /* lock for accessing current server */
-};
-extern int afs_volume_lookup(const char *name,
-                             struct afs_cell *cell,
-                             int rwpath,
-                             struct afs_volume **_volume);
-#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
-extern void afs_put_volume(struct afs_volume *volume);
-extern int afs_volume_pick_fileserver(struct afs_volume *volume,
-                                      struct afs_server **_server);
-extern int afs_volume_release_fileserver(struct afs_volume *volume,
-                                         struct afs_server *server,
-                                         int result);
-#endif /* _LINUX_AFS_VOLUME_H */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 8b1c5d8bf4ef..c68b055fa26e 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -266,6 +266,23 @@ static int do_siocgstamp(unsigned int fd, unsigned int cmd, unsigned long arg)
        return err;
 }
+static int do_siocgstampns(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+        struct compat_timespec __user *up = compat_ptr(arg);
+        struct timespec kts;
+        mm_segment_t old_fs = get_fs();
+        int err;
+        set_fs(KERNEL_DS);
+        err = sys_ioctl(fd, cmd, (unsigned long)&kts);
+        set_fs(old_fs);
+        if (!err) {
+                err = put_user(kts.tv_sec, &up->tv_sec);
+                err |= __put_user(kts.tv_nsec, &up->tv_nsec);
+        }
+        return err;
+}
 struct ifmap32 {
        compat_ulong_t mem_start;
        compat_ulong_t mem_end;
@@ -2437,6 +2454,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc)
 /* Note SIOCRTMSG is no longer, so this is safe and * the user would have seen just an -EINVAL anyways. */
 HANDLE_IOCTL(SIOCRTMSG, ret_einval)
 HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
+HANDLE_IOCTL(SIOCGSTAMPNS, do_siocgstampns)
 #endif
 #ifdef CONFIG_BLOCK
 HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
index e3aa2253c850..fe9186312d7c 100644
--- a/fs/ecryptfs/netlink.c
+++ b/fs/ecryptfs/netlink.c
@@ -97,7 +97,7 @@ out:
 */
 static int ecryptfs_process_nl_response(struct sk_buff *skb)
 {
-        struct nlmsghdr *nlh = (struct nlmsghdr*)skb->data;
+        struct nlmsghdr *nlh = nlmsg_hdr(skb);
        struct ecryptfs_message *msg = NLMSG_DATA(nlh);
        int rc;
@@ -181,7 +181,7 @@ receive:
                                "rc = [%d]\n", rc);
                return;
        }
-        nlh = (struct nlmsghdr *)skb->data;
+        nlh = nlmsg_hdr(skb);
        if (!NLMSG_OK(nlh, skb->len)) {
                ecryptfs_printk(KERN_ERR, "Received corrupt netlink "
                                "message\n");
@@ -229,7 +229,7 @@ int ecryptfs_init_netlink(void)
        ecryptfs_nl_sock = netlink_kernel_create(NETLINK_ECRYPTFS, 0,
                                                 ecryptfs_receive_nl_message,
-                                                 THIS_MODULE);
+                                                 NULL, THIS_MODULE);
        if (!ecryptfs_nl_sock) {
                rc = -EIO;
                ecryptfs_printk(KERN_ERR, "Failed to create netlink socket\n");