Merge commit 'upstream/master'

author: Haavard Skinnemoen <haavard.skinnemoen@atmel.com> 2008-07-27 07:54:08 -0400
committer: Haavard Skinnemoen <haavard.skinnemoen@atmel.com> 2008-07-27 07:54:08 -0400
commit: eda3d8f5604860aae1bb9996bb5efc4213778369 (patch)
tree: 9d3887d2665bcc5f5abf200758794545c7b2c69b /fs
parent: 87a9f704658a40940e740b1d73d861667e9164d3 (diff)
parent: 8be1a6d6c77ab4532e4476fdb8177030ef48b52c (diff)
249 files changed, 6820 insertions, 4290 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 37db79a2ff95..d3873583360b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -902,65 +902,7 @@ endif # BLOCK
 menu "Pseudo filesystems"
-config PROC_FS
+source "fs/proc/Kconfig"
-        bool "/proc file system support" if EMBEDDED
-        default y
-        help
-          This is a virtual file system providing information about the status
-          of the system. "Virtual" means that it doesn't take up any space on
-          your hard disk: the files are created on the fly by the kernel when
-          you try to access them. Also, you cannot read the files with older
-          version of the program less: you need to use more or cat.
-          It's totally cool; for example, "cat /proc/interrupts" gives
-          information about what the different IRQs are used for at the moment
-          (there is a small number of Interrupt ReQuest lines in your computer
-          that are used by the attached devices to gain the CPU's attention --
-          often a source of trouble if two devices are mistakenly configured
-          to use the same IRQ). The program procinfo to display some
-          information about your system gathered from the /proc file system.
-          Before you can use the /proc file system, it has to be mounted,
-          meaning it has to be given a location in the directory hierarchy.
-          That location should be /proc. A command such as "mount -t proc proc
-          /proc" or the equivalent line in /etc/fstab does the job.
-          The /proc file system is explained in the file
-          <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
-          ("man 5 proc").
-          This option will enlarge your kernel by about 67 KB. Several
-          programs depend on this, so everyone should say Y here.
-config PROC_KCORE
-        bool "/proc/kcore support" if !ARM
-        depends on PROC_FS && MMU
-config PROC_VMCORE
-        bool "/proc/vmcore support (EXPERIMENTAL)"
-        depends on PROC_FS && CRASH_DUMP
-        default y
-        help
-        Exports the dump image of crashed kernel in ELF format.
-config PROC_SYSCTL
-        bool "Sysctl support (/proc/sys)" if EMBEDDED
-        depends on PROC_FS
-        select SYSCTL
-        default y
-        ---help---
-          The sysctl interface provides a means of dynamically changing
-          certain kernel parameters and variables on the fly without requiring
-          a recompile of the kernel or reboot of the system.  The primary
-          interface is through /proc/sys.  If you say Y here a tree of
-          modifiable sysctl entries will be generated beneath the
-          /proc/sys directory. They are explained in the files
-          in <file:Documentation/sysctl/>.  Note that enabling this
-          option will enlarge the kernel by at least 8 KB.
-          As it is generally a good thing, you should say Y here unless
-          building a kernel for install/rescue disks or your system is very
-          limited in memory.
 config SYSFS
        bool "sysfs file system support" if EMBEDDED
@@ -1441,6 +1383,19 @@ config MINIX_FS
          partition (the one containing the directory /) cannot be compiled as
          a module.
+config OMFS_FS
+        tristate "SonicBlue Optimized MPEG File System support"
+        depends on BLOCK
+        select CRC_ITU_T
+        help
+          This is the proprietary file system used by the Rio Karma music
+          player and ReplayTV DVR.  Despite the name, this filesystem is not
+          more efficient than a standard FS for MPEG files, in fact likely
+          the opposite is true.  Say Y if you have either of these devices
+          and wish to mount its disk.
+          To compile this file system support as a module, choose M here: the
+          module will be called omfs.  If unsure, say N.
 config HPFS_FS
        tristate "OS/2 HPFS file system support"
@@ -2093,20 +2048,6 @@ config CODA_FS
          To compile the coda client support as a module, choose M here: the
          module will be called coda.
-config CODA_FS_OLD_API
-        bool "Use 96-bit Coda file identifiers"
-        depends on CODA_FS
-        help
-          A new kernel-userspace API had to be introduced for Coda v6.0
-          to support larger 128-bit file identifiers as needed by the
-          new realms implementation.
-          However this new API is not backward compatible with older
-          clients. If you really need to run the old Coda userspace
-          cache manager then say Y.
-          For most cases you probably want to say N.
 config AFS_FS
        tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
        depends on INET && EXPERIMENTAL
diff --git a/fs/Makefile b/fs/Makefile
index 3b2178b4bb66..a1482a5eff15 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_ADFS_FS)		+= adfs/
 obj-$(CONFIG_FUSE_FS)           += fuse/
 obj-$(CONFIG_UDF_FS)            += udf/
 obj-$(CONFIG_SUN_OPENPROMFS)    += openpromfs/
+obj-$(CONFIG_OMFS_FS)           += omfs/
 obj-$(CONFIG_JFS_FS)            += jfs/
 obj-$(CONFIG_XFS_FS)            += xfs/
 obj-$(CONFIG_9P_FS)             += 9p/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 9e421eeb672b..26f3b43726bb 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -249,7 +249,7 @@ static void adfs_destroy_inode(struct inode *inode)
        kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 223b1917093e..e9ec915f7553 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -2,6 +2,7 @@
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
 #include <linux/amigaffs.h>
+#include <linux/mutex.h>
 /* AmigaOS allows file names with up to 30 characters length.
 * Names longer than that will be silently truncated. If you
@@ -98,7 +99,7 @@ struct affs_sb_info {
        gid_t s_gid;                    /* gid to override */
        umode_t s_mode;                 /* mode to override */
        struct buffer_head *s_root_bh;  /* Cached root block. */
-        struct semaphore s_bmlock;      /* Protects bitmap access. */
+        struct mutex s_bmlock;          /* Protects bitmap access. */
        struct affs_bm_info *s_bitmap;  /* Bitmap infos. */
        u32 s_bmap_count;               /* # of bitmap blocks. */
        u32 s_bmap_bits;                /* # of bits in one bitmap blocks */
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index c4a5ad09ddf2..dc5ef14bdc1c 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -45,14 +45,14 @@ affs_count_free_blocks(struct super_block *sb)
        if (sb->s_flags & MS_RDONLY)
                return 0;
-        down(&AFFS_SB(sb)->s_bmlock);
+        mutex_lock(&AFFS_SB(sb)->s_bmlock);
        bm = AFFS_SB(sb)->s_bitmap;
        free = 0;
        for (i = AFFS_SB(sb)->s_bmap_count; i > 0; bm++, i--)
                free += bm->bm_free;
-        up(&AFFS_SB(sb)->s_bmlock);
+        mutex_unlock(&AFFS_SB(sb)->s_bmlock);
        return free;
 }
@@ -76,7 +76,7 @@ affs_free_block(struct super_block *sb, u32 block)
        bit     = blk % sbi->s_bmap_bits;
        bm      = &sbi->s_bitmap[bmap];
-        down(&sbi->s_bmlock);
+        mutex_lock(&sbi->s_bmlock);
        bh = sbi->s_bmap_bh;
        if (sbi->s_last_bmap != bmap) {
@@ -105,19 +105,19 @@ affs_free_block(struct super_block *sb, u32 block)
        sb->s_dirt = 1;
        bm->bm_free++;
-        up(&sbi->s_bmlock);
+        mutex_unlock(&sbi->s_bmlock);
        return;
 err_free:
        affs_warning(sb,"affs_free_block","Trying to free block %u which is already free", block);
-        up(&sbi->s_bmlock);
+        mutex_unlock(&sbi->s_bmlock);
        return;
 err_bh_read:
        affs_error(sb,"affs_free_block","Cannot read bitmap block %u", bm->bm_key);
        sbi->s_bmap_bh = NULL;
        sbi->s_last_bmap = ~0;
-        up(&sbi->s_bmlock);
+        mutex_unlock(&sbi->s_bmlock);
        return;
 err_range:
@@ -168,7 +168,7 @@ affs_alloc_block(struct inode *inode, u32 goal)
        bmap = blk / sbi->s_bmap_bits;
        bm = &sbi->s_bitmap[bmap];
-        down(&sbi->s_bmlock);
+        mutex_lock(&sbi->s_bmlock);
        if (bm->bm_free)
                goto find_bmap_bit;
@@ -249,7 +249,7 @@ find_bit:
        mark_buffer_dirty(bh);
        sb->s_dirt = 1;
-        up(&sbi->s_bmlock);
+        mutex_unlock(&sbi->s_bmlock);
        pr_debug("%d\n", blk);
        return blk;
@@ -259,7 +259,7 @@ err_bh_read:
        sbi->s_bmap_bh = NULL;
        sbi->s_last_bmap = ~0;
 err_full:
-        up(&sbi->s_bmlock);
+        mutex_unlock(&sbi->s_bmlock);
        pr_debug("failed\n");
        return 0;
 }
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 6eac7bdeec94..1377b1240b6e 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -46,8 +46,6 @@ const struct inode_operations affs_file_inode_operations = {
 static int
 affs_file_open(struct inode *inode, struct file *filp)
 {
-        if (atomic_read(&filp->f_count) != 1)
-                return 0;
        pr_debug("AFFS: open(%lu,%d)\n",
                 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
        atomic_inc(&AFFS_I(inode)->i_opencnt);
@@ -57,8 +55,6 @@ affs_file_open(struct inode *inode, struct file *filp)
 static int
 affs_file_release(struct inode *inode, struct file *filp)
 {
-        if (atomic_read(&filp->f_count) != 0)
-                return 0;
        pr_debug("AFFS: release(%lu, %d)\n",
                 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d214837d5e42..3a89094f93d0 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -90,7 +90,7 @@ static void affs_destroy_inode(struct inode *inode)
        kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct affs_inode_info *ei = (struct affs_inode_info *) foo;
@@ -290,7 +290,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
        if (!sbi)
                return -ENOMEM;
        sb->s_fs_info = sbi;
-        init_MUTEX(&sbi->s_bmlock);
+        mutex_init(&sbi->s_bmlock);
        if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
                                &blocksize,&sbi->s_prefix,
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7102824ba847..3cb6920ff30b 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -469,8 +469,6 @@ extern bool afs_cm_incoming_call(struct afs_call *);
 extern const struct inode_operations afs_dir_inode_operations;
 extern const struct file_operations afs_dir_file_operations;
-extern int afs_permission(struct inode *, int, struct nameidata *);
 /*
 * file.c
 */
@@ -605,7 +603,7 @@ extern void afs_clear_permits(struct afs_vnode *);
 extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
 extern void afs_zap_permits(struct rcu_head *);
 extern struct key *afs_request_key(struct afs_cell *);
-extern int afs_permission(struct inode *, int, struct nameidata *);
+extern int afs_permission(struct inode *, int);
 /*
 * server.c
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 3bcbeceba1bb..3ef504370034 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -284,7 +284,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
 * - AFS ACLs are attached to directories only, and a file is controlled by its
 *   parent directory's ACL
 */
-int afs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int afs_permission(struct inode *inode, int mask)
 {
        struct afs_vnode *vnode = AFS_FS_I(inode);
        afs_access_t uninitialized_var(access);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 7e3faeef6818..250d8c4d66e4 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -27,7 +27,7 @@
 #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
-static void afs_i_init_once(struct kmem_cache *cachep, void *foo);
+static void afs_i_init_once(void *foo);
 static int afs_get_sb(struct file_system_type *fs_type,
                      int flags, const char *dev_name,
                      void *data, struct vfsmount *mnt);
@@ -449,7 +449,7 @@ static void afs_put_super(struct super_block *sb)
 /*
 * initialise an inode cache slab element prior to any use
 */
-static void afs_i_init_once(struct kmem_cache *cachep, void *_vnode)
+static void afs_i_init_once(void *_vnode)
 {
        struct afs_vnode *vnode = _vnode;
diff --git a/fs/aio.c b/fs/aio.c
index 0fb3117ddd93..f658441d5666 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -512,8 +512,8 @@ static void aio_fput_routine(struct work_struct *data)
 */
 static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 {
-        dprintk(KERN_DEBUG "aio_put(%p): f_count=%d\n",
+        dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
-                req, atomic_read(&req->ki_filp->f_count));
+                req, atomic_long_read(&req->ki_filp->f_count));
        assert_spin_locked(&ctx->ctx_lock);
@@ -528,7 +528,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
        /* Must be done under the lock to serialise against cancellation.
         * Call this aio_fput as it duplicates fput via the fput_work.
         */
-        if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
+        if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
                get_ioctx(ctx);
                spin_lock(&fput_lock);
                list_add(&req->ki_list, &fput_head);
@@ -586,7 +586,6 @@ static void use_mm(struct mm_struct *mm)
        struct task_struct *tsk = current;
        task_lock(tsk);
-        tsk->flags |= PF_BORROWED_MM;
        active_mm = tsk->active_mm;
        atomic_inc(&mm->mm_count);
        tsk->mm = mm;
@@ -610,7 +609,6 @@ static void unuse_mm(struct mm_struct *mm)
        struct task_struct *tsk = current;
        task_lock(tsk);
-        tsk->flags &= ~PF_BORROWED_MM;
        tsk->mm = NULL;
        /* active_mm is still 'mm' */
        enter_lazy_tlb(mm, tsk);
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 977ef208c051..3662dd44896b 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -58,8 +58,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
 *                    of the file
 *
 * @name:    [in]    name of the "class" of the new file
- * @fops     [in]    file operations for the new file
+ * @fops:    [in]    file operations for the new file
- * @priv     [in]    private data for the new file (will be file's private_data)
+ * @priv:    [in]    private data for the new file (will be file's private_data)
+ * @flags:   [in]    flags
 *
 * Creates a new file by hooking it on a single inode. This is useful for files
 * that do not need to have a full-fledged inode in order to operate correctly.
@@ -68,7 +69,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
 * setup.  Returns new descriptor or -error.
 */
 int anon_inode_getfd(const char *name, const struct file_operations *fops,
-                     void *priv)
+                     void *priv, int flags)
 {
        struct qstr this;
        struct dentry *dentry;
@@ -78,7 +79,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
        if (IS_ERR(anon_inode_inode))
                return -ENODEV;
-        error = get_unused_fd();
+        error = get_unused_fd_flags(flags);
        if (error < 0)
                return error;
        fd = error;
@@ -115,7 +116,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
        file->f_mapping = anon_inode_inode->i_mapping;
        file->f_pos = 0;
-        file->f_flags = O_RDWR;
+        file->f_flags = O_RDWR | (flags & O_NONBLOCK);
        file->f_version = 0;
        file->private_data = priv;
diff --git a/fs/attr.c b/fs/attr.c
index 966b73e25f82..26c71ba1eed4 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -51,7 +51,7 @@ int inode_change_ok(struct inode *inode, struct iattr *attr)
        }
        /* Check for setting the inode time. */
-        if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
+        if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
                if (!is_owner_or_cap(inode))
                        goto error;
        }
@@ -108,6 +108,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
        struct timespec now;
        unsigned int ia_valid = attr->ia_valid;
+        if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
+                if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+                        return -EPERM;
+        }
        now = current_fs_time(inode->i_sb);
        attr->ia_ctime = now;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c3d352d7fa93..69a2f5c92319 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -52,7 +52,10 @@ struct autofs_info {
        int             flags;
-        struct list_head rehash;
+        struct completion expire_complete;
+        struct list_head active;
+        struct list_head expiring;
        struct autofs_sb_info *sbi;
        unsigned long last_used;
@@ -68,15 +71,14 @@ struct autofs_info {
 };
 #define AUTOFS_INF_EXPIRING     (1<<0) /* dentry is in the process of expiring */
+#define AUTOFS_INF_MOUNTPOINT   (1<<1) /* mountpoint status for direct expire */
 struct autofs_wait_queue {
        wait_queue_head_t queue;
        struct autofs_wait_queue *next;
        autofs_wqt_t wait_queue_token;
        /* We use the following to see what we are waiting for */
-        unsigned int hash;
+        struct qstr name;
-        unsigned int len;
-        char *name;
        u32 dev;
        u64 ino;
        uid_t uid;
@@ -85,7 +87,7 @@ struct autofs_wait_queue {
        pid_t tgid;
        /* This is for status reporting upon return */
        int status;
-        atomic_t wait_ctr;
+        unsigned int wait_ctr;
 };
 #define AUTOFS_SBI_MAGIC 0x6d4a556d
@@ -112,8 +114,9 @@ struct autofs_sb_info {
        struct mutex wq_mutex;
        spinlock_t fs_lock;
        struct autofs_wait_queue *queues; /* Wait queue pointer */
-        spinlock_t rehash_lock;
+        spinlock_t lookup_lock;
-        struct list_head rehash_list;
+        struct list_head active_list;
+        struct list_head expiring_list;
 };
 static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
@@ -138,18 +141,14 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
 static inline int autofs4_ispending(struct dentry *dentry)
 {
        struct autofs_info *inf = autofs4_dentry_ino(dentry);
-        int pending = 0;
        if (dentry->d_flags & DCACHE_AUTOFS_PENDING)
                return 1;
-        if (inf) {
+        if (inf->flags & AUTOFS_INF_EXPIRING)
-                spin_lock(&inf->sbi->fs_lock);
+                return 1;
-                pending = inf->flags & AUTOFS_INF_EXPIRING;
-                spin_unlock(&inf->sbi->fs_lock);
-        }
-        return pending;
+        return 0;
 }
 static inline void autofs4_copy_atime(struct file *src, struct file *dst)
@@ -164,6 +163,7 @@ void autofs4_free_ino(struct autofs_info *);
 /* Expiration */
 int is_autofs4_dentry(struct dentry *);
+int autofs4_expire_wait(struct dentry *dentry);
 int autofs4_expire_run(struct super_block *, struct vfsmount *,
                        struct autofs_sb_info *,
                        struct autofs_packet_expire __user *);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 894fee54d4d8..cdabb796ff01 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -259,13 +259,15 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
        now = jiffies;
        timeout = sbi->exp_timeout;
-        /* Lock the tree as we must expire as a whole */
        spin_lock(&sbi->fs_lock);
        if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
                struct autofs_info *ino = autofs4_dentry_ino(root);
+                if (d_mountpoint(root)) {
-                /* Set this flag early to catch sys_chdir and the like */
+                        ino->flags |= AUTOFS_INF_MOUNTPOINT;
+                        root->d_mounted--;
+                }
                ino->flags |= AUTOFS_INF_EXPIRING;
+                init_completion(&ino->expire_complete);
                spin_unlock(&sbi->fs_lock);
                return root;
        }
@@ -292,6 +294,8 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
        struct list_head *next;
        int do_now = how & AUTOFS_EXP_IMMEDIATE;
        int exp_leaves = how & AUTOFS_EXP_LEAVES;
+        struct autofs_info *ino;
+        unsigned int ino_count;
        if (!root)
                return NULL;
@@ -316,6 +320,9 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
                dentry = dget(dentry);
                spin_unlock(&dcache_lock);
+                spin_lock(&sbi->fs_lock);
+                ino = autofs4_dentry_ino(dentry);
                /*
                 * Case 1: (i) indirect mount or top level pseudo direct mount
                 *         (autofs-4.1).
@@ -326,6 +333,11 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
                        DPRINTK("checking mountpoint %p %.*s",
                                dentry, (int)dentry->d_name.len, dentry->d_name.name);
+                        /* Path walk currently on this dentry? */
+                        ino_count = atomic_read(&ino->count) + 2;
+                        if (atomic_read(&dentry->d_count) > ino_count)
+                                goto next;
                        /* Can we umount this guy */
                        if (autofs4_mount_busy(mnt, dentry))
                                goto next;
@@ -343,23 +355,25 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
                /* Case 2: tree mount, expire iff entire tree is not busy */
                if (!exp_leaves) {
-                        /* Lock the tree as we must expire as a whole */
+                        /* Path walk currently on this dentry? */
-                        spin_lock(&sbi->fs_lock);
+                        ino_count = atomic_read(&ino->count) + 1;
-                        if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
+                        if (atomic_read(&dentry->d_count) > ino_count)
-                                struct autofs_info *inf = autofs4_dentry_ino(dentry);
+                                goto next;
-                                /* Set this flag early to catch sys_chdir and the like */
+                        if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
-                                inf->flags |= AUTOFS_INF_EXPIRING;
-                                spin_unlock(&sbi->fs_lock);
                                expired = dentry;
                                goto found;
                        }
-                        spin_unlock(&sbi->fs_lock);
                /*
                 * Case 3: pseudo direct mount, expire individual leaves
                 *         (autofs-4.1).
                 */
                } else {
+                        /* Path walk currently on this dentry? */
+                        ino_count = atomic_read(&ino->count) + 1;
+                        if (atomic_read(&dentry->d_count) > ino_count)
+                                goto next;
                        expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
                        if (expired) {
                                dput(dentry);
@@ -367,6 +381,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
                        }
                }
 next:
+                spin_unlock(&sbi->fs_lock);
                dput(dentry);
                spin_lock(&dcache_lock);
                next = next->next;
@@ -377,12 +392,45 @@ next:
 found:
        DPRINTK("returning %p %.*s",
                expired, (int)expired->d_name.len, expired->d_name.name);
+        ino = autofs4_dentry_ino(expired);
+        ino->flags |= AUTOFS_INF_EXPIRING;
+        init_completion(&ino->expire_complete);
+        spin_unlock(&sbi->fs_lock);
        spin_lock(&dcache_lock);
        list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
        spin_unlock(&dcache_lock);
        return expired;
 }
+int autofs4_expire_wait(struct dentry *dentry)
+{
+        struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+        struct autofs_info *ino = autofs4_dentry_ino(dentry);
+        int status;
+        /* Block on any pending expire */
+        spin_lock(&sbi->fs_lock);
+        if (ino->flags & AUTOFS_INF_EXPIRING) {
+                spin_unlock(&sbi->fs_lock);
+                DPRINTK("waiting for expire %p name=%.*s",
+                         dentry, dentry->d_name.len, dentry->d_name.name);
+                status = autofs4_wait(sbi, dentry, NFY_NONE);
+                wait_for_completion(&ino->expire_complete);
+                DPRINTK("expire done status=%d", status);
+                if (d_unhashed(dentry))
+                        return -EAGAIN;
+                return status;
+        }
+        spin_unlock(&sbi->fs_lock);
+        return 0;
+}
 /* Perform an expiry operation */
 int autofs4_expire_run(struct super_block *sb,
                      struct vfsmount *mnt,
@@ -390,7 +438,9 @@ int autofs4_expire_run(struct super_block *sb,
                      struct autofs_packet_expire __user *pkt_p)
 {
        struct autofs_packet_expire pkt;
+        struct autofs_info *ino;
        struct dentry *dentry;
+        int ret = 0;
        memset(&pkt,0,sizeof pkt);
@@ -406,9 +456,15 @@ int autofs4_expire_run(struct super_block *sb,
        dput(dentry);
        if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) )
-                return -EFAULT;
+                ret = -EFAULT;
-        return 0;
+        spin_lock(&sbi->fs_lock);
+        ino = autofs4_dentry_ino(dentry);
+        ino->flags &= ~AUTOFS_INF_EXPIRING;
+        complete_all(&ino->expire_complete);
+        spin_unlock(&sbi->fs_lock);
+        return ret;
 }
 /* Call repeatedly until it returns -EAGAIN, meaning there's nothing
@@ -433,9 +489,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
                /* This is synchronous because it makes the daemon a
                   little easier */
-                ino->flags |= AUTOFS_INF_EXPIRING;
                ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
+                spin_lock(&sbi->fs_lock);
+                if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
+                        sb->s_root->d_mounted++;
+                        ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
+                }
                ino->flags &= ~AUTOFS_INF_EXPIRING;
+                complete_all(&ino->expire_complete);
+                spin_unlock(&sbi->fs_lock);
                dput(dentry);
        }
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 2fdcf5e1d236..7bb3e5ba0537 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -24,8 +24,10 @@
 static void ino_lnkfree(struct autofs_info *ino)
 {
-        kfree(ino->u.symlink);
+        if (ino->u.symlink) {
-        ino->u.symlink = NULL;
+                kfree(ino->u.symlink);
+                ino->u.symlink = NULL;
+        }
 }
 struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
@@ -41,16 +43,18 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
        if (ino == NULL)
                return NULL;
-        ino->flags = 0;
+        if (!reinit) {
-        ino->mode = mode;
+                ino->flags = 0;
-        ino->inode = NULL;
+                ino->inode = NULL;
-        ino->dentry = NULL;
+                ino->dentry = NULL;
-        ino->size = 0;
+                ino->size = 0;
+                INIT_LIST_HEAD(&ino->active);
-        INIT_LIST_HEAD(&ino->rehash);
+                INIT_LIST_HEAD(&ino->expiring);
+                atomic_set(&ino->count, 0);
+        }
+        ino->mode = mode;
        ino->last_used = jiffies;
-        atomic_set(&ino->count, 0);
        ino->sbi = sbi;
@@ -159,8 +163,8 @@ void autofs4_kill_sb(struct super_block *sb)
        if (!sbi)
                goto out_kill_sb;
-        if (!sbi->catatonic)
+        /* Free wait queues, close pipe */
-                autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */
+        autofs4_catatonic_mode(sbi);
        /* Clean up and release dangling references */
        autofs4_force_release(sbi);
@@ -338,8 +342,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
        mutex_init(&sbi->wq_mutex);
        spin_lock_init(&sbi->fs_lock);
        sbi->queues = NULL;
-        spin_lock_init(&sbi->rehash_lock);
+        spin_lock_init(&sbi->lookup_lock);
-        INIT_LIST_HEAD(&sbi->rehash_list);
+        INIT_LIST_HEAD(&sbi->active_list);
+        INIT_LIST_HEAD(&sbi->expiring_list);
        s->s_blocksize = 1024;
        s->s_blocksize_bits = 10;
        s->s_magic = AUTOFS_SUPER_MAGIC;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index edf5b6bddb52..bcfb2dc0a61b 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -25,25 +25,25 @@ static int autofs4_dir_rmdir(struct inode *,struct dentry *);
 static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
 static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
 static int autofs4_dir_open(struct inode *inode, struct file *file);
-static int autofs4_dir_close(struct inode *inode, struct file *file);
-static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir);
-static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
 static void *autofs4_follow_link(struct dentry *, struct nameidata *);
+#define TRIGGER_FLAGS   (LOOKUP_CONTINUE | LOOKUP_DIRECTORY)
+#define TRIGGER_INTENTS (LOOKUP_OPEN | LOOKUP_CREATE)
 const struct file_operations autofs4_root_operations = {
        .open           = dcache_dir_open,
        .release        = dcache_dir_close,
        .read           = generic_read_dir,
-        .readdir        = autofs4_root_readdir,
+        .readdir        = dcache_readdir,
        .ioctl          = autofs4_root_ioctl,
 };
 const struct file_operations autofs4_dir_operations = {
        .open           = autofs4_dir_open,
-        .release        = autofs4_dir_close,
+        .release        = dcache_dir_close,
        .read           = generic_read_dir,
-        .readdir        = autofs4_dir_readdir,
+        .readdir        = dcache_readdir,
 };
 const struct inode_operations autofs4_indirect_root_inode_operations = {
@@ -70,42 +70,10 @@ const struct inode_operations autofs4_dir_inode_operations = {
        .rmdir          = autofs4_dir_rmdir,
 };
-static int autofs4_root_readdir(struct file *file, void *dirent,
-                                filldir_t filldir)
-{
-        struct autofs_sb_info *sbi = autofs4_sbi(file->f_path.dentry->d_sb);
-        int oz_mode = autofs4_oz_mode(sbi);
-        DPRINTK("called, filp->f_pos = %lld", file->f_pos);
-        /*
-         * Don't set reghost flag if:
-         * 1) f_pos is larger than zero -- we've already been here.
-         * 2) we haven't even enabled reghosting in the 1st place.
-         * 3) this is the daemon doing a readdir
-         */
-        if (oz_mode && file->f_pos == 0 && sbi->reghost_enabled)
-                sbi->needs_reghost = 1;
-        DPRINTK("needs_reghost = %d", sbi->needs_reghost);
-        return dcache_readdir(file, dirent, filldir);
-}
 static int autofs4_dir_open(struct inode *inode, struct file *file)
 {
        struct dentry *dentry = file->f_path.dentry;
-        struct vfsmount *mnt = file->f_path.mnt;
        struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
-        struct dentry *cursor;
-        int status;
-        status = dcache_dir_open(inode, file);
-        if (status)
-                goto out;
-        cursor = file->private_data;
-        cursor->d_fsdata = NULL;
        DPRINTK("file=%p dentry=%p %.*s",
                file, dentry, dentry->d_name.len, dentry->d_name.name);
@@ -113,159 +81,32 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
        if (autofs4_oz_mode(sbi))
                goto out;
-        if (autofs4_ispending(dentry)) {
+        /*
-                DPRINTK("dentry busy");
+         * An empty directory in an autofs file system is always a
-                dcache_dir_close(inode, file);
+         * mount point. The daemon must have failed to mount this
-                status = -EBUSY;
+         * during lookup so it doesn't exist. This can happen, for
-                goto out;
+         * example, if user space returns an incorrect status for a
-        }
+         * mount request. Otherwise we're doing a readdir on the
+         * autofs file system so just let the libfs routines handle
-        status = -ENOENT;
+         * it.
-        if (!d_mountpoint(dentry) && dentry->d_op && dentry->d_op->d_revalidate) {
+         */
-                struct nameidata nd;
+        spin_lock(&dcache_lock);
-                int empty, ret;
+        if (!d_mountpoint(dentry) && __simple_empty(dentry)) {
-                /* In case there are stale directory dentrys from a failed mount */
-                spin_lock(&dcache_lock);
-                empty = list_empty(&dentry->d_subdirs);
                spin_unlock(&dcache_lock);
+                return -ENOENT;
-                if (!empty)
-                        d_invalidate(dentry);
-                nd.flags = LOOKUP_DIRECTORY;
-                ret = (dentry->d_op->d_revalidate)(dentry, &nd);
-                if (ret <= 0) {
-                        if (ret < 0)
-                                status = ret;
-                        dcache_dir_close(inode, file);
-                        goto out;
-                }
        }
+        spin_unlock(&dcache_lock);
-        if (d_mountpoint(dentry)) {
-                struct file *fp = NULL;
-                struct path fp_path = { .dentry = dentry, .mnt = mnt };
-                path_get(&fp_path);
-                if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) {
-                        path_put(&fp_path);
-                        dcache_dir_close(inode, file);
-                        goto out;
-                }
-                fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags);
-                status = PTR_ERR(fp);
-                if (IS_ERR(fp)) {
-                        dcache_dir_close(inode, file);
-                        goto out;
-                }
-                cursor->d_fsdata = fp;
-        }
-        return 0;
-out:
-        return status;
-}
-static int autofs4_dir_close(struct inode *inode, struct file *file)
-{
-        struct dentry *dentry = file->f_path.dentry;
-        struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
-        struct dentry *cursor = file->private_data;
-        int status = 0;
-        DPRINTK("file=%p dentry=%p %.*s",
-                file, dentry, dentry->d_name.len, dentry->d_name.name);
-        if (autofs4_oz_mode(sbi))
-                goto out;
-        if (autofs4_ispending(dentry)) {
-                DPRINTK("dentry busy");
-                status = -EBUSY;
-                goto out;
-        }
-        if (d_mountpoint(dentry)) {
-                struct file *fp = cursor->d_fsdata;
-                if (!fp) {
-                        status = -ENOENT;
-                        goto out;
-                }
-                filp_close(fp, current->files);
-        }
-out:
-        dcache_dir_close(inode, file);
-        return status;
-}
-static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir)
-{
-        struct dentry *dentry = file->f_path.dentry;
-        struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
-        struct dentry *cursor = file->private_data;
-        int status;
-        DPRINTK("file=%p dentry=%p %.*s",
-                file, dentry, dentry->d_name.len, dentry->d_name.name);
-        if (autofs4_oz_mode(sbi))
-                goto out;
-        if (autofs4_ispending(dentry)) {
-                DPRINTK("dentry busy");
-                return -EBUSY;
-        }
-        if (d_mountpoint(dentry)) {
-                struct file *fp = cursor->d_fsdata;
-                if (!fp)
-                        return -ENOENT;
-                if (!fp->f_op || !fp->f_op->readdir)
-                        goto out;
-                status = vfs_readdir(fp, filldir, dirent);
-                file->f_pos = fp->f_pos;
-                if (status)
-                        autofs4_copy_atime(file, fp);
-                return status;
-        }
 out:
-        return dcache_readdir(file, dirent, filldir);
+        return dcache_dir_open(inode, file);
 }
 static int try_to_fill_dentry(struct dentry *dentry, int flags)
 {
        struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
        struct autofs_info *ino = autofs4_dentry_ino(dentry);
-        struct dentry *new;
        int status;
-        /* Block on any pending expiry here; invalidate the dentry
-           when expiration is done to trigger mount request with a new
-           dentry */
-        if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
-                DPRINTK("waiting for expire %p name=%.*s",
-                         dentry, dentry->d_name.len, dentry->d_name.name);
-                status = autofs4_wait(sbi, dentry, NFY_NONE);
-                DPRINTK("expire done status=%d", status);
-                /*
-                 * If the directory still exists the mount request must
-                 * continue otherwise it can't be followed at the right
-                 * time during the walk.
-                 */
-                status = d_invalidate(dentry);
-                if (status != -EBUSY)
-                        return -EAGAIN;
-        }
        DPRINTK("dentry=%p %.*s ino=%p",
                 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
@@ -292,7 +133,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
                        return status;
                }
        /* Trigger mount for path component or follow link */
-        } else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) ||
+        } else if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
+                        flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) ||
                        current->link_count) {
                DPRINTK("waiting for mount name=%.*s",
                        dentry->d_name.len, dentry->d_name.name);
@@ -320,26 +162,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
        dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
        spin_unlock(&dentry->d_lock);
-        /*
-         * The dentry that is passed in from lookup may not be the one
-         * we end up using, as mkdir can create a new one.  If this
-         * happens, and another process tries the lookup at the same time,
-         * it will set the PENDING flag on this new dentry, but add itself
-         * to our waitq.  Then, if after the lookup succeeds, the first
-         * process that requested the mount performs another lookup of the
-         * same directory, it will show up as still pending!  So, we need
-         * to redo the lookup here and clear pending on that dentry.
-         */
-        if (d_unhashed(dentry)) {
-                new = d_lookup(dentry->d_parent, &dentry->d_name);
-                if (new) {
-                        spin_lock(&new->d_lock);
-                        new->d_flags &= ~DCACHE_AUTOFS_PENDING;
-                        spin_unlock(&new->d_lock);
-                        dput(new);
-                }
-        }
        return 0;
 }
@@ -355,51 +177,63 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
        DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d",
                dentry, dentry->d_name.len, dentry->d_name.name, oz_mode,
                nd->flags);
+        /*
-        /* If it's our master or we shouldn't trigger a mount we're done */
+         * For an expire of a covered direct or offset mount we need
-        lookup_type = nd->flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY);
+         * to beeak out of follow_down() at the autofs mount trigger
-        if (oz_mode || !lookup_type)
+         * (d_mounted--), so we can see the expiring flag, and manage
+         * the blocking and following here until the expire is completed.
+         */
+        if (oz_mode) {
+                spin_lock(&sbi->fs_lock);
+                if (ino->flags & AUTOFS_INF_EXPIRING) {
+                        spin_unlock(&sbi->fs_lock);
+                        /* Follow down to our covering mount. */
+                        if (!follow_down(&nd->path.mnt, &nd->path.dentry))
+                                goto done;
+                        goto follow;
+                }
+                spin_unlock(&sbi->fs_lock);
                goto done;
+        }
-        /* If an expire request is pending wait for it. */
+        /* If an expire request is pending everyone must wait. */
-        if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
+        autofs4_expire_wait(dentry);
-                DPRINTK("waiting for active request %p name=%.*s",
-                        dentry, dentry->d_name.len, dentry->d_name.name);
-                status = autofs4_wait(sbi, dentry, NFY_NONE);
-                DPRINTK("request done status=%d", status);
+        /* We trigger a mount for almost all flags */
-        }
+        lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS);
+        if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING))
+                goto follow;
        /*
-         * If the dentry contains directories then it is an
+         * If the dentry contains directories then it is an autofs
-         * autofs multi-mount with no root mount offset. So
+         * multi-mount with no root mount offset. So don't try to
-         * don't try to mount it again.
+         * mount it again.
         */
        spin_lock(&dcache_lock);
-        if (!d_mountpoint(dentry) && __simple_empty(dentry)) {
+        if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
+            (!d_mountpoint(dentry) && __simple_empty(dentry))) {
                spin_unlock(&dcache_lock);
                status = try_to_fill_dentry(dentry, 0);
                if (status)
                        goto out_error;
-                /*
+                goto follow;
-                 * The mount succeeded but if there is no root mount
-                 * it must be an autofs multi-mount with no root offset
-                 * so we don't need to follow the mount.
-                 */
-                if (d_mountpoint(dentry)) {
-                        if (!autofs4_follow_mount(&nd->path.mnt,
-                                                  &nd->path.dentry)) {
-                                status = -ENOENT;
-                                goto out_error;
-                        }
-                }
-                goto done;
        }
        spin_unlock(&dcache_lock);
+follow:
+        /*
+         * If there is no root mount it must be an autofs
+         * multi-mount with no root offset so we don't need
+         * to follow it.
+         */
+        if (d_mountpoint(dentry)) {
+                if (!autofs4_follow_mount(&nd->path.mnt,
+                                          &nd->path.dentry)) {
+                        status = -ENOENT;
+                        goto out_error;
+                }
+        }
 done:
        return NULL;
@@ -424,12 +258,23 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
        int status = 1;
        /* Pending dentry */
+        spin_lock(&sbi->fs_lock);
        if (autofs4_ispending(dentry)) {
                /* The daemon never causes a mount to trigger */
+                spin_unlock(&sbi->fs_lock);
                if (oz_mode)
                        return 1;
                /*
+                 * If the directory has gone away due to an expire
+                 * we have been called as ->d_revalidate() and so
+                 * we need to return false and proceed to ->lookup().
+                 */
+                if (autofs4_expire_wait(dentry) == -EAGAIN)
+                        return 0;
+                /*
                 * A zero status is success otherwise we have a
                 * negative error code.
                 */
@@ -437,17 +282,9 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
                if (status == 0)
                        return 1;
-                /*
-                 * A status of EAGAIN here means that the dentry has gone
-                 * away while waiting for an expire to complete. If we are
-                 * racing with expire lookup will wait for it so this must
-                 * be a revalidate and we need to send it to lookup.
-                 */
-                if (status == -EAGAIN)
-                        return 0;
                return status;
        }
+        spin_unlock(&sbi->fs_lock);
        /* Negative dentry.. invalidate if "old" */
        if (dentry->d_inode == NULL)
@@ -461,6 +298,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
                DPRINTK("dentry=%p %.*s, emptydir",
                         dentry, dentry->d_name.len, dentry->d_name.name);
                spin_unlock(&dcache_lock);
                /* The daemon never causes a mount to trigger */
                if (oz_mode)
                        return 1;
@@ -493,10 +331,12 @@ void autofs4_dentry_release(struct dentry *de)
                struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
                if (sbi) {
-                        spin_lock(&sbi->rehash_lock);
+                        spin_lock(&sbi->lookup_lock);
-                        if (!list_empty(&inf->rehash))
+                        if (!list_empty(&inf->active))
-                                list_del(&inf->rehash);
+                                list_del(&inf->active);
-                        spin_unlock(&sbi->rehash_lock);
+                        if (!list_empty(&inf->expiring))
+                                list_del(&inf->expiring);
+                        spin_unlock(&sbi->lookup_lock);
                }
                inf->dentry = NULL;
@@ -518,7 +358,7 @@ static struct dentry_operations autofs4_dentry_operations = {
        .d_release      = autofs4_dentry_release,
 };
-static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
+static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
 {
        unsigned int len = name->len;
        unsigned int hash = name->hash;
@@ -526,14 +366,66 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
        struct list_head *p, *head;
        spin_lock(&dcache_lock);
-        spin_lock(&sbi->rehash_lock);
+        spin_lock(&sbi->lookup_lock);
-        head = &sbi->rehash_list;
+        head = &sbi->active_list;
        list_for_each(p, head) {
                struct autofs_info *ino;
                struct dentry *dentry;
                struct qstr *qstr;
-                ino = list_entry(p, struct autofs_info, rehash);
+                ino = list_entry(p, struct autofs_info, active);
+                dentry = ino->dentry;
+                spin_lock(&dentry->d_lock);
+                /* Already gone? */
+                if (atomic_read(&dentry->d_count) == 0)
+                        goto next;
+                qstr = &dentry->d_name;
+                if (dentry->d_name.hash != hash)
+                        goto next;
+                if (dentry->d_parent != parent)
+                        goto next;
+                if (qstr->len != len)
+                        goto next;
+                if (memcmp(qstr->name, str, len))
+                        goto next;
+                if (d_unhashed(dentry)) {
+                        dget(dentry);
+                        spin_unlock(&dentry->d_lock);
+                        spin_unlock(&sbi->lookup_lock);
+                        spin_unlock(&dcache_lock);
+                        return dentry;
+                }
+next:
+                spin_unlock(&dentry->d_lock);
+        }
+        spin_unlock(&sbi->lookup_lock);
+        spin_unlock(&dcache_lock);
+        return NULL;
+}
+static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
+{
+        unsigned int len = name->len;
+        unsigned int hash = name->hash;
+        const unsigned char *str = name->name;
+        struct list_head *p, *head;
+        spin_lock(&dcache_lock);
+        spin_lock(&sbi->lookup_lock);
+        head = &sbi->expiring_list;
+        list_for_each(p, head) {
+                struct autofs_info *ino;
+                struct dentry *dentry;
+                struct qstr *qstr;
+                ino = list_entry(p, struct autofs_info, expiring);
                dentry = ino->dentry;
                spin_lock(&dentry->d_lock);
@@ -555,33 +447,16 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
                        goto next;
                if (d_unhashed(dentry)) {
-                        struct inode *inode = dentry->d_inode;
-                        ino = autofs4_dentry_ino(dentry);
-                        list_del_init(&ino->rehash);
                        dget(dentry);
-                        /*
-                         * Make the rehashed dentry negative so the VFS
-                         * behaves as it should.
-                         */
-                        if (inode) {
-                                dentry->d_inode = NULL;
-                                list_del_init(&dentry->d_alias);
-                                spin_unlock(&dentry->d_lock);
-                                spin_unlock(&sbi->rehash_lock);
-                                spin_unlock(&dcache_lock);
-                                iput(inode);
-                                return dentry;
-                        }
                        spin_unlock(&dentry->d_lock);
-                        spin_unlock(&sbi->rehash_lock);
+                        spin_unlock(&sbi->lookup_lock);
                        spin_unlock(&dcache_lock);
                        return dentry;
                }
 next:
                spin_unlock(&dentry->d_lock);
        }
-        spin_unlock(&sbi->rehash_lock);
+        spin_unlock(&sbi->lookup_lock);
        spin_unlock(&dcache_lock);
        return NULL;
@@ -591,7 +466,8 @@ next:
 static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
        struct autofs_sb_info *sbi;
-        struct dentry *unhashed;
+        struct autofs_info *ino;
+        struct dentry *expiring, *unhashed;
        int oz_mode;
        DPRINTK("name = %.*s",
@@ -607,8 +483,26 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
        DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
                 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
-        unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name);
+        expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name);
-        if (!unhashed) {
+        if (expiring) {
+                /*
+                 * If we are racing with expire the request might not
+                 * be quite complete but the directory has been removed
+                 * so it must have been successful, so just wait for it.
+                 */
+                ino = autofs4_dentry_ino(expiring);
+                autofs4_expire_wait(expiring);
+                spin_lock(&sbi->lookup_lock);
+                if (!list_empty(&ino->expiring))
+                        list_del_init(&ino->expiring);
+                spin_unlock(&sbi->lookup_lock);
+                dput(expiring);
+        }
+        unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name);
+        if (unhashed)
+                dentry = unhashed;
+        else {
                /*
                 * Mark the dentry incomplete but don't hash it. We do this
                 * to serialize our inode creation operations (symlink and
@@ -622,39 +516,34 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
                 */
                dentry->d_op = &autofs4_root_dentry_operations;
-                dentry->d_fsdata = NULL;
-                d_instantiate(dentry, NULL);
-        } else {
-                struct autofs_info *ino = autofs4_dentry_ino(unhashed);
-                DPRINTK("rehash %p with %p", dentry, unhashed);
                /*
-                 * If we are racing with expire the request might not
+                 * And we need to ensure that the same dentry is used for
-                 * be quite complete but the directory has been removed
+                 * all following lookup calls until it is hashed so that
-                 * so it must have been successful, so just wait for it.
+                 * the dentry flags are persistent throughout the request.
-                 * We need to ensure the AUTOFS_INF_EXPIRING flag is clear
-                 * before continuing as revalidate may fail when calling
-                 * try_to_fill_dentry (returning EAGAIN) if we don't.
                 */
-                while (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
+                ino = autofs4_init_ino(NULL, sbi, 0555);
-                        DPRINTK("wait for incomplete expire %p name=%.*s",
+                if (!ino)
-                                unhashed, unhashed->d_name.len,
+                        return ERR_PTR(-ENOMEM);
-                                unhashed->d_name.name);
-                        autofs4_wait(sbi, unhashed, NFY_NONE);
+                dentry->d_fsdata = ino;
-                        DPRINTK("request completed");
+                ino->dentry = dentry;
-                }
-                dentry = unhashed;
+                spin_lock(&sbi->lookup_lock);
+                list_add(&ino->active, &sbi->active_list);
+                spin_unlock(&sbi->lookup_lock);
+                d_instantiate(dentry, NULL);
        }
        if (!oz_mode) {
                spin_lock(&dentry->d_lock);
                dentry->d_flags |= DCACHE_AUTOFS_PENDING;
                spin_unlock(&dentry->d_lock);
-        }
+                if (dentry->d_op && dentry->d_op->d_revalidate) {
+                        mutex_unlock(&dir->i_mutex);
-        if (dentry->d_op && dentry->d_op->d_revalidate) {
+                        (dentry->d_op->d_revalidate)(dentry, nd);
-                mutex_unlock(&dir->i_mutex);
+                        mutex_lock(&dir->i_mutex);
-                (dentry->d_op->d_revalidate)(dentry, nd);
+                }
-                mutex_lock(&dir->i_mutex);
        }
        /*
@@ -673,9 +562,11 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
                            return ERR_PTR(-ERESTARTNOINTR);
                        }
                }
-                spin_lock(&dentry->d_lock);
+                if (!oz_mode) {
-                dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
+                        spin_lock(&dentry->d_lock);
-                spin_unlock(&dentry->d_lock);
+                        dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
+                        spin_unlock(&dentry->d_lock);
+                }
        }
        /*
@@ -706,7 +597,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
        }
        if (unhashed)
-                return dentry;
+                return unhashed;
        return NULL;
 }
@@ -728,20 +619,31 @@ static int autofs4_dir_symlink(struct inode *dir,
                return -EACCES;
        ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555);
-        if (ino == NULL)
+        if (!ino)
-                return -ENOSPC;
+                return -ENOMEM;
-        ino->size = strlen(symname);
+        spin_lock(&sbi->lookup_lock);
-        ino->u.symlink = cp = kmalloc(ino->size + 1, GFP_KERNEL);
+        if (!list_empty(&ino->active))
+                list_del_init(&ino->active);
+        spin_unlock(&sbi->lookup_lock);
-        if (cp == NULL) {
+        ino->size = strlen(symname);
-                kfree(ino);
+        cp = kmalloc(ino->size + 1, GFP_KERNEL);
-                return -ENOSPC;
+        if (!cp) {
+                if (!dentry->d_fsdata)
+                        kfree(ino);
+                return -ENOMEM;
        }
        strcpy(cp, symname);
        inode = autofs4_get_inode(dir->i_sb, ino);
+        if (!inode) {
+                kfree(cp);
+                if (!dentry->d_fsdata)
+                        kfree(ino);
+                return -ENOMEM;
+        }
        d_add(dentry, inode);
        if (dir == dir->i_sb->s_root->d_inode)
@@ -757,6 +659,7 @@ static int autofs4_dir_symlink(struct inode *dir,
                atomic_inc(&p_ino->count);
        ino->inode = inode;
+        ino->u.symlink = cp;
        dir->i_mtime = CURRENT_TIME;
        return 0;
@@ -769,9 +672,8 @@ static int autofs4_dir_symlink(struct inode *dir,
 * that the file no longer exists. However, doing that means that the
 * VFS layer can turn the dentry into a negative dentry.  We don't want
 * this, because the unlink is probably the result of an expire.
- * We simply d_drop it and add it to a rehash candidates list in the
+ * We simply d_drop it and add it to a expiring list in the super block,
- * super block, which allows the dentry lookup to reuse it retaining
+ * which allows the dentry lookup to check for an incomplete expire.
- * the flags, such as expire in progress, in case we're racing with expire.
 *
 * If a process is blocked on the dentry waiting for the expire to finish,
 * it will invalidate the dentry and try to mount with a new one.
@@ -801,9 +703,10 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
        dir->i_mtime = CURRENT_TIME;
        spin_lock(&dcache_lock);
-        spin_lock(&sbi->rehash_lock);
+        spin_lock(&sbi->lookup_lock);
-        list_add(&ino->rehash, &sbi->rehash_list);
+        if (list_empty(&ino->expiring))
-        spin_unlock(&sbi->rehash_lock);
+                list_add(&ino->expiring, &sbi->expiring_list);
+        spin_unlock(&sbi->lookup_lock);
        spin_lock(&dentry->d_lock);
        __d_drop(dentry);
        spin_unlock(&dentry->d_lock);
@@ -829,9 +732,10 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
                spin_unlock(&dcache_lock);
                return -ENOTEMPTY;
        }
-        spin_lock(&sbi->rehash_lock);
+        spin_lock(&sbi->lookup_lock);
-        list_add(&ino->rehash, &sbi->rehash_list);
+        if (list_empty(&ino->expiring))
-        spin_unlock(&sbi->rehash_lock);
+                list_add(&ino->expiring, &sbi->expiring_list);
+        spin_unlock(&sbi->lookup_lock);
        spin_lock(&dentry->d_lock);
        __d_drop(dentry);
        spin_unlock(&dentry->d_lock);
@@ -866,10 +770,20 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                dentry, dentry->d_name.len, dentry->d_name.name);
        ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555);
-        if (ino == NULL)
+        if (!ino)
-                return -ENOSPC;
+                return -ENOMEM;
+        spin_lock(&sbi->lookup_lock);
+        if (!list_empty(&ino->active))
+                list_del_init(&ino->active);
+        spin_unlock(&sbi->lookup_lock);
        inode = autofs4_get_inode(dir->i_sb, ino);
+        if (!inode) {
+                if (!dentry->d_fsdata)
+                        kfree(ino);
+                return -ENOMEM;
+        }
        d_add(dentry, inode);
        if (dir == dir->i_sb->s_root->d_inode)
@@ -922,44 +836,6 @@ static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int __user
 }
 /*
- * Tells the daemon whether we need to reghost or not. Also, clears
- * the reghost_needed flag.
- */
-static inline int autofs4_ask_reghost(struct autofs_sb_info *sbi, int __user *p)
-{
-        int status;
-        DPRINTK("returning %d", sbi->needs_reghost);
-        status = put_user(sbi->needs_reghost, p);
-        if (status)
-                return status;
-        sbi->needs_reghost = 0;
-        return 0;
-}
-/*
- * Enable / Disable reghosting ioctl() operation
- */
-static inline int autofs4_toggle_reghost(struct autofs_sb_info *sbi, int __user *p)
-{
-        int status;
-        int val;
-        status = get_user(val, p);
-        DPRINTK("reghost = %d", val);
-        if (status)
-                return status;
-        /* turn on/off reghosting, with the val */
-        sbi->reghost_enabled = val;
-        return 0;
-}
-/*
 * Tells the daemon whether it can umount the autofs mount.
 */
 static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
@@ -1023,11 +899,6 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp,
        case AUTOFS_IOC_SETTIMEOUT:
                return autofs4_get_set_timeout(sbi, p);
-        case AUTOFS_IOC_TOGGLEREGHOST:
-                return autofs4_toggle_reghost(sbi, p);
-        case AUTOFS_IOC_ASKREGHOST:
-                return autofs4_ask_reghost(sbi, p);
        case AUTOFS_IOC_ASKUMOUNT:
                return autofs4_ask_umount(filp->f_path.mnt, p);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 75e5955c3f6d..35216d18d8b5 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -28,6 +28,12 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 {
        struct autofs_wait_queue *wq, *nwq;
+        mutex_lock(&sbi->wq_mutex);
+        if (sbi->catatonic) {
+                mutex_unlock(&sbi->wq_mutex);
+                return;
+        }
        DPRINTK("entering catatonic mode");
        sbi->catatonic = 1;
@@ -36,13 +42,18 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
        while (wq) {
                nwq = wq->next;
                wq->status = -ENOENT; /* Magic is gone - report failure */
-                kfree(wq->name);
+                if (wq->name.name) {
-                wq->name = NULL;
+                        kfree(wq->name.name);
+                        wq->name.name = NULL;
+                }
+                wq->wait_ctr--;
                wake_up_interruptible(&wq->queue);
                wq = nwq;
        }
        fput(sbi->pipe);        /* Close the pipe */
        sbi->pipe = NULL;
+        sbi->pipefd = -1;
+        mutex_unlock(&sbi->wq_mutex);
 }
 static int autofs4_write(struct file *file, const void *addr, int bytes)
@@ -89,10 +100,11 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
                union autofs_packet_union v4_pkt;
                union autofs_v5_packet_union v5_pkt;
        } pkt;
+        struct file *pipe = NULL;
        size_t pktsz;
        DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
-                wq->wait_queue_token, wq->len, wq->name, type);
+                wq->wait_queue_token, wq->name.len, wq->name.name, type);
        memset(&pkt,0,sizeof pkt); /* For security reasons */
@@ -107,9 +119,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
                pktsz = sizeof(*mp);
                mp->wait_queue_token = wq->wait_queue_token;
-                mp->len = wq->len;
+                mp->len = wq->name.len;
-                memcpy(mp->name, wq->name, wq->len);
+                memcpy(mp->name, wq->name.name, wq->name.len);
-                mp->name[wq->len] = '\0';
+                mp->name[wq->name.len] = '\0';
                break;
        }
        case autofs_ptype_expire_multi:
@@ -119,9 +131,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
                pktsz = sizeof(*ep);
                ep->wait_queue_token = wq->wait_queue_token;
-                ep->len = wq->len;
+                ep->len = wq->name.len;
-                memcpy(ep->name, wq->name, wq->len);
+                memcpy(ep->name, wq->name.name, wq->name.len);
-                ep->name[wq->len] = '\0';
+                ep->name[wq->name.len] = '\0';
                break;
        }
        /*
@@ -138,9 +150,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
                pktsz = sizeof(*packet);
                packet->wait_queue_token = wq->wait_queue_token;
-                packet->len = wq->len;
+                packet->len = wq->name.len;
-                memcpy(packet->name, wq->name, wq->len);
+                memcpy(packet->name, wq->name.name, wq->name.len);
-                packet->name[wq->len] = '\0';
+                packet->name[wq->name.len] = '\0';
                packet->dev = wq->dev;
                packet->ino = wq->ino;
                packet->uid = wq->uid;
@@ -154,8 +166,19 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
                return;
        }
-        if (autofs4_write(sbi->pipe, &pkt, pktsz))
+        /* Check if we have become catatonic */
-                autofs4_catatonic_mode(sbi);
+        mutex_lock(&sbi->wq_mutex);
+        if (!sbi->catatonic) {
+                pipe = sbi->pipe;
+                get_file(pipe);
+        }
+        mutex_unlock(&sbi->wq_mutex);
+        if (pipe) {
+                if (autofs4_write(pipe, &pkt, pktsz))
+                        autofs4_catatonic_mode(sbi);
+                fput(pipe);
+        }
 }
 static int autofs4_getpath(struct autofs_sb_info *sbi,
@@ -191,58 +214,55 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
 }
 static struct autofs_wait_queue *
-autofs4_find_wait(struct autofs_sb_info *sbi,
+autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr)
-                  char *name, unsigned int hash, unsigned int len)
 {
        struct autofs_wait_queue *wq;
        for (wq = sbi->queues; wq; wq = wq->next) {
-                if (wq->hash == hash &&
+                if (wq->name.hash == qstr->hash &&
-                    wq->len == len &&
+                    wq->name.len == qstr->len &&
-                    wq->name && !memcmp(wq->name, name, len))
+                    wq->name.name &&
+                         !memcmp(wq->name.name, qstr->name, qstr->len))
                        break;
        }
        return wq;
 }
-int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
+/*
-                enum autofs_notify notify)
+ * Check if we have a valid request.
+ * Returns
+ * 1 if the request should continue.
+ *   In this case we can return an autofs_wait_queue entry if one is
+ *   found or NULL to idicate a new wait needs to be created.
+ * 0 or a negative errno if the request shouldn't continue.
+ */
+static int validate_request(struct autofs_wait_queue **wait,
+                            struct autofs_sb_info *sbi,
+                            struct qstr *qstr,
+                            struct dentry*dentry, enum autofs_notify notify)
 {
-        struct autofs_info *ino;
        struct autofs_wait_queue *wq;
-        char *name;
+        struct autofs_info *ino;
-        unsigned int len = 0;
-        unsigned int hash = 0;
-        int status, type;
-        /* In catatonic mode, we don't wait for nobody */
-        if (sbi->catatonic)
-                return -ENOENT;
-        
-        name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
-        if (!name)
-                return -ENOMEM;
-        /* If this is a direct mount request create a dummy name */
+        /* Wait in progress, continue; */
-        if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
+        wq = autofs4_find_wait(sbi, qstr);
-                len = sprintf(name, "%p", dentry);
+        if (wq) {
-        else {
+                *wait = wq;
-                len = autofs4_getpath(sbi, dentry, &name);
+                return 1;
-                if (!len) {
-                        kfree(name);
-                        return -ENOENT;
-                }
        }
-        hash = full_name_hash(name, len);
-        if (mutex_lock_interruptible(&sbi->wq_mutex)) {
+        *wait = NULL;
-                kfree(name);
-                return -EINTR;
-        }
-        wq = autofs4_find_wait(sbi, name, hash, len);
+        /* If we don't yet have any info this is a new request */
        ino = autofs4_dentry_ino(dentry);
-        if (!wq && ino && notify == NFY_NONE) {
+        if (!ino)
+                return 1;
+        /*
+         * If we've been asked to wait on an existing expire (NFY_NONE)
+         * but there is no wait in the queue ...
+         */
+        if (notify == NFY_NONE) {
                /*
                 * Either we've betean the pending expire to post it's
                 * wait or it finished while we waited on the mutex.
@@ -253,13 +273,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
                while (ino->flags & AUTOFS_INF_EXPIRING) {
                        mutex_unlock(&sbi->wq_mutex);
                        schedule_timeout_interruptible(HZ/10);
-                        if (mutex_lock_interruptible(&sbi->wq_mutex)) {
+                        if (mutex_lock_interruptible(&sbi->wq_mutex))
-                                kfree(name);
                                return -EINTR;
+                        wq = autofs4_find_wait(sbi, qstr);
+                        if (wq) {
+                                *wait = wq;
+                                return 1;
                        }
-                        wq = autofs4_find_wait(sbi, name, hash, len);
-                        if (wq)
-                                break;
                }
                /*
@@ -267,18 +288,96 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
                 * cases where we wait on NFY_NONE neither depend on the
                 * return status of the wait.
                 */
-                if (!wq) {
+                return 0;
+        }
+        /*
+         * If we've been asked to trigger a mount and the request
+         * completed while we waited on the mutex ...
+         */
+        if (notify == NFY_MOUNT) {
+                /*
+                 * If the dentry isn't hashed just go ahead and try the
+                 * mount again with a new wait (not much else we can do).
+                */
+                if (!d_unhashed(dentry)) {
+                        /*
+                         * But if the dentry is hashed, that means that we
+                         * got here through the revalidate path.  Thus, we
+                         * need to check if the dentry has been mounted
+                         * while we waited on the wq_mutex. If it has,
+                         * simply return success.
+                         */
+                        if (d_mountpoint(dentry))
+                                return 0;
+                }
+        }
+        return 1;
+}
+int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
+                enum autofs_notify notify)
+{
+        struct autofs_wait_queue *wq;
+        struct qstr qstr;
+        char *name;
+        int status, ret, type;
+        /* In catatonic mode, we don't wait for nobody */
+        if (sbi->catatonic)
+                return -ENOENT;
+        if (!dentry->d_inode) {
+                /*
+                 * A wait for a negative dentry is invalid for certain
+                 * cases. A direct or offset mount "always" has its mount
+                 * point directory created and so the request dentry must
+                 * be positive or the map key doesn't exist. The situation
+                 * is very similar for indirect mounts except only dentrys
+                 * in the root of the autofs file system may be negative.
+                 */
+                if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET))
+                        return -ENOENT;
+                else if (!IS_ROOT(dentry->d_parent))
+                        return -ENOENT;
+        }
+        name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
+        if (!name)
+                return -ENOMEM;
+        /* If this is a direct mount request create a dummy name */
+        if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
+                qstr.len = sprintf(name, "%p", dentry);
+        else {
+                qstr.len = autofs4_getpath(sbi, dentry, &name);
+                if (!qstr.len) {
                        kfree(name);
-                        mutex_unlock(&sbi->wq_mutex);
+                        return -ENOENT;
-                        return 0;
                }
        }
+        qstr.name = name;
+        qstr.hash = full_name_hash(name, qstr.len);
+        if (mutex_lock_interruptible(&sbi->wq_mutex)) {
+                kfree(qstr.name);
+                return -EINTR;
+        }
+        ret = validate_request(&wq, sbi, &qstr, dentry, notify);
+        if (ret <= 0) {
+                if (ret == 0)
+                        mutex_unlock(&sbi->wq_mutex);
+                kfree(qstr.name);
+                return ret;
+        }
        if (!wq) {
                /* Create a new wait queue */
                wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
                if (!wq) {
-                        kfree(name);
+                        kfree(qstr.name);
                        mutex_unlock(&sbi->wq_mutex);
                        return -ENOMEM;
                }
@@ -289,9 +388,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
                wq->next = sbi->queues;
                sbi->queues = wq;
                init_waitqueue_head(&wq->queue);
-                wq->hash = hash;
+                memcpy(&wq->name, &qstr, sizeof(struct qstr));
-                wq->name = name;
-                wq->len = len;
                wq->dev = autofs4_get_dev(sbi);
                wq->ino = autofs4_get_ino(sbi);
                wq->uid = current->uid;
@@ -299,7 +396,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
                wq->pid = current->pid;
                wq->tgid = current->tgid;
                wq->status = -EINTR; /* Status return if interrupted */
-                atomic_set(&wq->wait_ctr, 2);
+                wq->wait_ctr = 2;
                mutex_unlock(&sbi->wq_mutex);
                if (sbi->version < 5) {
@@ -319,28 +416,25 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
                }
                DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
-                        (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
+                        (unsigned long) wq->wait_queue_token, wq->name.len,
+                        wq->name.name, notify);
                /* autofs4_notify_daemon() may block */
                autofs4_notify_daemon(sbi, wq, type);
        } else {
-                atomic_inc(&wq->wait_ctr);
+                wq->wait_ctr++;
                mutex_unlock(&sbi->wq_mutex);
-                kfree(name);
+                kfree(qstr.name);
                DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
-                        (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
+                        (unsigned long) wq->wait_queue_token, wq->name.len,
-        }
+                        wq->name.name, notify);
-        /* wq->name is NULL if and only if the lock is already released */
-        if (sbi->catatonic) {
-                /* We might have slept, so check again for catatonic mode */
-                wq->status = -ENOENT;
-                kfree(wq->name);
-                wq->name = NULL;
        }
-        if (wq->name) {
+        /*
+         * wq->name.name is NULL iff the lock is already released
+         * or the mount has been made catatonic.
+         */
+        if (wq->name.name) {
                /* Block all but "shutdown" signals while waiting */
                sigset_t oldset;
                unsigned long irqflags;
@@ -351,7 +445,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
                recalc_sigpending();
                spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
-                wait_event_interruptible(wq->queue, wq->name == NULL);
+                wait_event_interruptible(wq->queue, wq->name.name == NULL);
                spin_lock_irqsave(&current->sighand->siglock, irqflags);
                current->blocked = oldset;
@@ -364,8 +458,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
        status = wq->status;
        /* Are we the last process to need status? */
-        if (atomic_dec_and_test(&wq->wait_ctr))
+        mutex_lock(&sbi->wq_mutex);
+        if (!--wq->wait_ctr)
                kfree(wq);
+        mutex_unlock(&sbi->wq_mutex);
        return status;
 }
@@ -387,16 +483,13 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
        }
        *wql = wq->next;        /* Unlink from chain */
-        mutex_unlock(&sbi->wq_mutex);
+        kfree(wq->name.name);
-        kfree(wq->name);
+        wq->name.name = NULL;   /* Do not wait on this queue */
-        wq->name = NULL;        /* Do not wait on this queue */
        wq->status = status;
+        wake_up_interruptible(&wq->queue);
-        if (atomic_dec_and_test(&wq->wait_ctr)) /* Is anyone still waiting for this guy? */
+        if (!--wq->wait_ctr)
                kfree(wq);
-        else
+        mutex_unlock(&sbi->wq_mutex);
-                wake_up_interruptible(&wq->queue);
        return 0;
 }
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index f1c2ea8342f5..5f1538c03b1b 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -243,8 +243,7 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
        return -EIO;
 }
-static int bad_inode_permission(struct inode *inode, int mask,
+static int bad_inode_permission(struct inode *inode, int mask)
-                        struct nameidata *nd)
 {
        return -EIO;
 }
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e8717de3bab3..02c6e62b72f8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -289,7 +289,7 @@ befs_destroy_inode(struct inode *inode)
        kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct befs_inode_info *bi = (struct befs_inode_info *) foo;
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 70f5d3a8eede..7109e451abf7 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -16,8 +16,9 @@ struct bfs_sb_info {
        unsigned long si_freei;
        unsigned long si_lf_eblk;
        unsigned long si_lasti;
-        unsigned long * si_imap;
+        unsigned long *si_imap;
-        struct buffer_head * si_sbh;            /* buffer header w/superblock */
+        struct buffer_head *si_sbh;             /* buffer header w/superblock */
+        struct mutex bfs_lock;
 };
 /*
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 034950cb3cbe..87ee5ccee348 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -32,16 +32,17 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
        struct inode *dir = f->f_path.dentry->d_inode;
        struct buffer_head *bh;
        struct bfs_dirent *de;
+        struct bfs_sb_info *info = BFS_SB(dir->i_sb);
        unsigned int offset;
        int block;
-        lock_kernel();
+        mutex_lock(&info->bfs_lock);
        if (f->f_pos & (BFS_DIRENT_SIZE - 1)) {
                printf("Bad f_pos=%08lx for %s:%08lx\n",
                                        (unsigned long)f->f_pos,
                                        dir->i_sb->s_id, dir->i_ino);
-                unlock_kernel();
+                mutex_unlock(&info->bfs_lock);
                return -EBADF;
        }
@@ -61,7 +62,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
                                                le16_to_cpu(de->ino),
                                                DT_UNKNOWN) < 0) {
                                        brelse(bh);
-                                        unlock_kernel();
+                                        mutex_unlock(&info->bfs_lock);
                                        return 0;
                                }
                        }
@@ -71,7 +72,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
                brelse(bh);
        }
-        unlock_kernel();
+        mutex_unlock(&info->bfs_lock);
        return 0;       
 }
@@ -95,10 +96,10 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
        inode = new_inode(s);
        if (!inode)
                return -ENOSPC;
-        lock_kernel();
+        mutex_lock(&info->bfs_lock);
        ino = find_first_zero_bit(info->si_imap, info->si_lasti);
        if (ino > info->si_lasti) {
-                unlock_kernel();
+                mutex_unlock(&info->bfs_lock);
                iput(inode);
                return -ENOSPC;
        }
@@ -125,10 +126,10 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
        if (err) {
                inode_dec_link_count(inode);
                iput(inode);
-                unlock_kernel();
+                mutex_unlock(&info->bfs_lock);
                return err;
        }
-        unlock_kernel();
+        mutex_unlock(&info->bfs_lock);
        d_instantiate(dentry, inode);
        return 0;
 }
@@ -139,22 +140,23 @@ static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry,
        struct inode *inode = NULL;
        struct buffer_head *bh;
        struct bfs_dirent *de;
+        struct bfs_sb_info *info = BFS_SB(dir->i_sb);
        if (dentry->d_name.len > BFS_NAMELEN)
                return ERR_PTR(-ENAMETOOLONG);
-        lock_kernel();
+        mutex_lock(&info->bfs_lock);
        bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
        if (bh) {
                unsigned long ino = (unsigned long)le16_to_cpu(de->ino);
                brelse(bh);
                inode = bfs_iget(dir->i_sb, ino);
                if (IS_ERR(inode)) {
-                        unlock_kernel();
+                        mutex_unlock(&info->bfs_lock);
                        return ERR_CAST(inode);
                }
        }
-        unlock_kernel();
+        mutex_unlock(&info->bfs_lock);
        d_add(dentry, inode);
        return NULL;
 }
@@ -163,13 +165,14 @@ static int bfs_link(struct dentry *old, struct inode *dir,
                                                struct dentry *new)
 {
        struct inode *inode = old->d_inode;
+        struct bfs_sb_info *info = BFS_SB(inode->i_sb);
        int err;
-        lock_kernel();
+        mutex_lock(&info->bfs_lock);
        err = bfs_add_entry(dir, new->d_name.name, new->d_name.len,
                                                        inode->i_ino);
        if (err) {
-                unlock_kernel();
+                mutex_unlock(&info->bfs_lock);
                return err;
        }
        inc_nlink(inode);
@@ -177,19 +180,19 @@ static int bfs_link(struct dentry *old, struct inode *dir,
        mark_inode_dirty(inode);
        atomic_inc(&inode->i_count);
        d_instantiate(new, inode);
-        unlock_kernel();
+        mutex_unlock(&info->bfs_lock);
        return 0;
 }
 static int bfs_unlink(struct inode *dir, struct dentry *dentry)
 {
        int error = -ENOENT;
-        struct inode *inode;
+        struct inode *inode = dentry->d_inode;
        struct buffer_head *bh;
        struct bfs_dirent *de;
+        struct bfs_sb_info *info = BFS_SB(inode->i_sb);
-        inode = dentry->d_inode;
+        mutex_lock(&info->bfs_lock);
-        lock_kernel();
        bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
        if (!bh || (le16_to_cpu(de->ino) != inode->i_ino))
                goto out_brelse;
@@ -210,7 +213,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
 out_brelse:
        brelse(bh);
-        unlock_kernel();
+        mutex_unlock(&info->bfs_lock);
        return error;
 }
@@ -220,6 +223,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct inode *old_inode, *new_inode;
        struct buffer_head *old_bh, *new_bh;
        struct bfs_dirent *old_de, *new_de;
+        struct bfs_sb_info *info;
        int error = -ENOENT;
        old_bh = new_bh = NULL;
@@ -227,7 +231,9 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (S_ISDIR(old_inode->i_mode))
                return -EINVAL;
-        lock_kernel();
+        info = BFS_SB(old_inode->i_sb);
+        mutex_lock(&info->bfs_lock);
        old_bh = bfs_find_entry(old_dir, 
                                old_dentry->d_name.name, 
                                old_dentry->d_name.len, &old_de);
@@ -264,7 +270,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        error = 0;
 end_rename:
-        unlock_kernel();
+        mutex_unlock(&info->bfs_lock);
        brelse(old_bh);
        brelse(new_bh);
        return error;
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index b11e63e8fbcd..6a021265f018 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -99,7 +99,7 @@ static int bfs_get_block(struct inode *inode, sector_t block,
                return -ENOSPC;
        /* The rest has to be protected against itself. */
-        lock_kernel();
+        mutex_lock(&info->bfs_lock);
        /*
         * If the last data block for this file is the last allocated
@@ -151,7 +151,7 @@ static int bfs_get_block(struct inode *inode, sector_t block,
        mark_buffer_dirty(sbh);
        map_bh(bh_result, sb, phys);
 out:
-        unlock_kernel();
+        mutex_unlock(&info->bfs_lock);
        return err;
 }
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 8db623838b50..0ed57b5ee012 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -104,6 +104,7 @@ static int bfs_write_inode(struct inode *inode, int unused)
        struct bfs_inode *di;
        struct buffer_head *bh;
        int block, off;
+        struct bfs_sb_info *info = BFS_SB(inode->i_sb);
        dprintf("ino=%08x\n", ino);
@@ -112,13 +113,13 @@ static int bfs_write_inode(struct inode *inode, int unused)
                return -EIO;
        }
-        lock_kernel();
+        mutex_lock(&info->bfs_lock);
        block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
        bh = sb_bread(inode->i_sb, block);
        if (!bh) {
                printf("Unable to read inode %s:%08x\n",
                                inode->i_sb->s_id, ino);
-                unlock_kernel();
+                mutex_unlock(&info->bfs_lock);
                return -EIO;
        }
@@ -145,7 +146,7 @@ static int bfs_write_inode(struct inode *inode, int unused)
        mark_buffer_dirty(bh);
        brelse(bh);
-        unlock_kernel();
+        mutex_unlock(&info->bfs_lock);
        return 0;
 }
@@ -170,7 +171,7 @@ static void bfs_delete_inode(struct inode *inode)
        
        inode->i_size = 0;
        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-        lock_kernel();
+        mutex_lock(&info->bfs_lock);
        mark_inode_dirty(inode);
        block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
@@ -178,7 +179,7 @@ static void bfs_delete_inode(struct inode *inode)
        if (!bh) {
                printf("Unable to read inode %s:%08lx\n",
                                        inode->i_sb->s_id, ino);
-                unlock_kernel();
+                mutex_unlock(&info->bfs_lock);
                return;
        }
        off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
@@ -204,14 +205,16 @@ static void bfs_delete_inode(struct inode *inode)
                info->si_lf_eblk = bi->i_sblock - 1;
                mark_buffer_dirty(info->si_sbh);
        }
-        unlock_kernel();
+        mutex_unlock(&info->bfs_lock);
        clear_inode(inode);
 }
 static void bfs_put_super(struct super_block *s)
 {
        struct bfs_sb_info *info = BFS_SB(s);
        brelse(info->si_sbh);
+        mutex_destroy(&info->bfs_lock);
        kfree(info->si_imap);
        kfree(info);
        s->s_fs_info = NULL;
@@ -236,11 +239,13 @@ static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 static void bfs_write_super(struct super_block *s)
 {
-        lock_kernel();
+        struct bfs_sb_info *info = BFS_SB(s);
+        mutex_lock(&info->bfs_lock);
        if (!(s->s_flags & MS_RDONLY))
-                mark_buffer_dirty(BFS_SB(s)->si_sbh);
+                mark_buffer_dirty(info->si_sbh);
        s->s_dirt = 0;
-        unlock_kernel();
+        mutex_unlock(&info->bfs_lock);
 }
 static struct kmem_cache *bfs_inode_cachep;
@@ -259,7 +264,7 @@ static void bfs_destroy_inode(struct inode *inode)
        kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct bfs_inode_info *bi = foo;
@@ -380,7 +385,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
                struct bfs_inode *di;
                int block = (i - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
                int off = (i - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
-                unsigned long sblock, eblock;
+                unsigned long eblock;
                if (!off) {
                        brelse(bh);
@@ -399,7 +404,6 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
                set_bit(i, info->si_imap);
                info->si_freeb -= BFS_FILEBLOCKS(di);
-                sblock =  le32_to_cpu(di->i_sblock);
                eblock =  le32_to_cpu(di->i_eblock);
                if (eblock > info->si_lf_eblk)
                        info->si_lf_eblk = eblock;
@@ -410,6 +414,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
                s->s_dirt = 1;
        } 
        dump_imap("read_super", s);
+        mutex_init(&info->bfs_lock);
        return 0;
 out:
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index ba4cddb92f1d..204cfd1d7676 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -444,12 +444,6 @@ beyond_if:
        regs->gp = ex.a_gpvalue;
 #endif
        start_thread(regs, ex.a_entry, current->mm->start_stack);
-        if (unlikely(current->ptrace & PT_PTRACED)) {
-                if (current->ptrace & PT_TRACE_EXEC)
-                        ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-                else
-                        send_sig(SIGTRAP, current, 0);
-        }
        return 0;
 }
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d48ff5f370f4..655ed8d30a86 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss)
 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 #endif
+#ifndef ELF_BASE_PLATFORM
+/*
+ * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
+ * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
+ * will be copied to the user stack in the same manner as AT_PLATFORM.
+ */
+#define ELF_BASE_PLATFORM NULL
+#endif
 static int
 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
                unsigned long load_addr, unsigned long interp_load_addr)
@@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
        elf_addr_t __user *envp;
        elf_addr_t __user *sp;
        elf_addr_t __user *u_platform;
+        elf_addr_t __user *u_base_platform;
        const char *k_platform = ELF_PLATFORM;
+        const char *k_base_platform = ELF_BASE_PLATFORM;
        int items;
        elf_addr_t *elf_info;
        int ei_index = 0;
@@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
                        return -EFAULT;
        }
+        /*
+         * If this architecture has a "base" platform capability
+         * string, copy it to userspace.
+         */
+        u_base_platform = NULL;
+        if (k_base_platform) {
+                size_t len = strlen(k_base_platform) + 1;
+                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
+                if (__copy_to_user(u_base_platform, k_base_platform, len))
+                        return -EFAULT;
+        }
        /* Create the ELF interpreter info */
        elf_info = (elf_addr_t *)current->mm->saved_auxv;
        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
@@ -204,10 +228,15 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
        NEW_AUX_ENT(AT_GID, tsk->gid);
        NEW_AUX_ENT(AT_EGID, tsk->egid);
        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
+        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
        if (k_platform) {
                NEW_AUX_ENT(AT_PLATFORM,
                            (elf_addr_t)(unsigned long)u_platform);
        }
+        if (k_base_platform) {
+                NEW_AUX_ENT(AT_BASE_PLATFORM,
+                            (elf_addr_t)(unsigned long)u_base_platform);
+        }
        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
        }
@@ -974,12 +1003,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 #endif
        start_thread(regs, elf_entry, bprm->p);
-        if (unlikely(current->ptrace & PT_PTRACED)) {
-                if (current->ptrace & PT_TRACE_EXEC)
-                        ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-                else
-                        send_sig(SIGTRAP, current, 0);
-        }
        retval = 0;
 out:
        kfree(loc);
@@ -1477,7 +1500,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
        const struct user_regset_view *view = task_user_regset_view(dump_task);
        struct elf_thread_core_info *t;
        struct elf_prpsinfo *psinfo;
-        struct task_struct *g, *p;
+        struct core_thread *ct;
        unsigned int i;
        info->size = 0;
@@ -1516,31 +1539,26 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
        /*
         * Allocate a structure for each thread.
         */
-        rcu_read_lock();
+        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
-        do_each_thread(g, p)
+                t = kzalloc(offsetof(struct elf_thread_core_info,
-                if (p->mm == dump_task->mm) {
+                                     notes[info->thread_notes]),
-                        t = kzalloc(offsetof(struct elf_thread_core_info,
+                            GFP_KERNEL);
-                                             notes[info->thread_notes]),
+                if (unlikely(!t))
-                                    GFP_ATOMIC);
+                        return 0;
-                        if (unlikely(!t)) {
-                                rcu_read_unlock();
+                t->task = ct->task;
-                                return 0;
+                if (ct->task == dump_task || !info->thread) {
-                        }
+                        t->next = info->thread;
-                        t->task = p;
+                        info->thread = t;
-                        if (p == dump_task || !info->thread) {
+                } else {
-                                t->next = info->thread;
+                        /*
-                                info->thread = t;
+                         * Make sure to keep the original task at
-                        } else {
+                         * the head of the list.
-                                /*
+                         */
-                                 * Make sure to keep the original task at
+                        t->next = info->thread->next;
-                                 * the head of the list.
+                        info->thread->next = t;
-                                 */
-                                t->next = info->thread->next;
-                                info->thread->next = t;
-                        }
                }
-        while_each_thread(g, p);
+        }
-        rcu_read_unlock();
        /*
         * Now fill in each thread's information.
@@ -1687,7 +1705,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 {
 #define NUM_NOTES       6
        struct list_head *t;
-        struct task_struct *g, *p;
        info->notes = NULL;
        info->prstatus = NULL;
@@ -1719,20 +1736,19 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
        info->thread_status_size = 0;
        if (signr) {
+                struct core_thread *ct;
                struct elf_thread_status *ets;
-                rcu_read_lock();
-                do_each_thread(g, p)
+                for (ct = current->mm->core_state->dumper.next;
-                        if (current->mm == p->mm && current != p) {
+                                                ct; ct = ct->next) {
-                                ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
+                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
-                                if (!ets) {
+                        if (!ets)
-                                        rcu_read_unlock();
+                                return 0;
-                                        return 0;
-                                }
+                        ets->thread = ct->task;
-                                ets->thread = p;
+                        list_add(&ets->list, &info->thread_list);
-                                list_add(&ets->list, &info->thread_list);
+                }
-                        }
-                while_each_thread(g, p);
-                rcu_read_unlock();
                list_for_each(t, &info->thread_list) {
                        int sz;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index d051a32e6270..fdeadab2f18b 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -433,13 +433,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
        entryaddr = interp_params.entry_addr ?: exec_params.entry_addr;
        start_thread(regs, entryaddr, current->mm->start_stack);
-        if (unlikely(current->ptrace & PT_PTRACED)) {
-                if (current->ptrace & PT_TRACE_EXEC)
-                        ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-                else
-                        send_sig(SIGTRAP, current, 0);
-        }
        retval = 0;
 error:
@@ -1573,7 +1566,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
        struct memelfnote *notes = NULL;
        struct elf_prstatus *prstatus = NULL;   /* NT_PRSTATUS */
        struct elf_prpsinfo *psinfo = NULL;     /* NT_PRPSINFO */
-        struct task_struct *g, *p;
        LIST_HEAD(thread_list);
        struct list_head *t;
        elf_fpregset_t *fpu = NULL;
@@ -1622,20 +1614,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 #endif
        if (signr) {
+                struct core_thread *ct;
                struct elf_thread_status *tmp;
-                rcu_read_lock();
-                do_each_thread(g,p)
+                for (ct = current->mm->core_state->dumper.next;
-                        if (current->mm == p->mm && current != p) {
+                                                ct; ct = ct->next) {
-                                tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
+                        tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
-                                if (!tmp) {
+                        if (!tmp)
-                                        rcu_read_unlock();
+                                goto cleanup;
-                                        goto cleanup;
-                                }
+                        tmp->thread = ct->task;
-                                tmp->thread = p;
+                        list_add(&tmp->list, &thread_list);
-                                list_add(&tmp->list, &thread_list);
+                }
-                        }
-                while_each_thread(g,p);
-                rcu_read_unlock();
                list_for_each(t, &thread_list) {
                        struct elf_thread_status *tmp;
                        int sz;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 2cb1acda3a82..56372ecf1690 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -920,9 +920,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
        
        start_thread(regs, start_addr, current->mm->start_stack);
-        if (current->ptrace & PT_PTRACED)
-                send_sig(SIGTRAP, current, 0);
        return 0;
 }
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 7191306367c5..756205314c24 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -27,6 +27,7 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/syscalls.h>
+#include <linux/fs.h>
 #include <asm/uaccess.h>
@@ -535,31 +536,16 @@ static ssize_t
 bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
 {
        Node *e = file->f_path.dentry->d_inode->i_private;
-        loff_t pos = *ppos;
        ssize_t res;
        char *page;
-        int len;
        if (!(page = (char*) __get_free_page(GFP_KERNEL)))
                return -ENOMEM;
        entry_status(e, page);
-        len = strlen(page);
-        res = -EINVAL;
+        res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page));
-        if (pos < 0)
-                goto out;
-        res = 0;
-        if (pos >= len)
-                goto out;
-        if (len < pos + nbytes)
-                nbytes = len - pos;
-        res = -EFAULT;
-        if (copy_to_user(buf, page + pos, nbytes))
-                goto out;
-        *ppos = pos + nbytes;
-        res = nbytes;
-out:
        free_page((unsigned long) page);
        return res;
 }
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index fdc36bfd6a7b..68be580ba289 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -274,8 +274,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
        map_hpux_gateway_page(current,current->mm);
        start_thread_som(regs, som_entry, bprm->p);
-        if (current->ptrace & PT_PTRACED)
-                send_sig(SIGTRAP, current, 0);
        return 0;
        /* error cleanup */
diff --git a/fs/bio.c b/fs/bio.c
index 88322b066acb..25f1af0d81e5 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -721,12 +721,8 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
                const int local_nr_pages = end - start;
                const int page_limit = cur_page + local_nr_pages;
                
-                down_read(&current->mm->mmap_sem);
+                ret = get_user_pages_fast(uaddr, local_nr_pages,
-                ret = get_user_pages(current, current->mm, uaddr,
+                                write_to_vm, &pages[cur_page]);
-                                     local_nr_pages,
-                                     write_to_vm, 0, &pages[cur_page], NULL);
-                up_read(&current->mm->mmap_sem);
                if (ret < local_nr_pages) {
                        ret = -EFAULT;
                        goto out_unmap;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 10d8a0aa871a..dcf37cada369 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -271,7 +271,7 @@ static void bdev_destroy_inode(struct inode *inode)
        kmem_cache_free(bdev_cachep, bdi);
 }
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
        struct bdev_inode *ei = (struct bdev_inode *) foo;
        struct block_device *bdev = &ei->bdev;
diff --git a/fs/buffer.c b/fs/buffer.c
index d48caee12e2a..f95805019639 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -706,7 +706,7 @@ static int __set_page_dirty(struct page *page,
        if (TestSetPageDirty(page))
                return 0;
-        write_lock_irq(&mapping->tree_lock);
+        spin_lock_irq(&mapping->tree_lock);
        if (page->mapping) {    /* Race with truncate? */
                WARN_ON_ONCE(warn && !PageUptodate(page));
@@ -719,7 +719,7 @@ static int __set_page_dirty(struct page *page,
                radix_tree_tag_set(&mapping->page_tree,
                                page_index(page), PAGECACHE_TAG_DIRTY);
        }
-        write_unlock_irq(&mapping->tree_lock);
+        spin_unlock_irq(&mapping->tree_lock);
        __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
        return 1;
@@ -1214,8 +1214,7 @@ void __brelse(struct buffer_head * buf)
                put_bh(buf);
                return;
        }
-        printk(KERN_ERR "VFS: brelse: Trying to free free buffer\n");
+        WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
-        WARN_ON(1);
 }
 /*
@@ -3272,7 +3271,7 @@ int bh_submit_read(struct buffer_head *bh)
 EXPORT_SYMBOL(bh_submit_read);
 static void
-init_buffer_head(struct kmem_cache *cachep, void *data)
+init_buffer_head(void *data)
 {
        struct buffer_head *bh = data;
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index f58e41d3ba48..6bb440b257b0 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -400,7 +400,7 @@ asn1_oid_decode(struct asn1_ctx *ctx,
        size = eoc - ctx->pointer + 1;
        /* first subid actually encodes first two subids */
-        if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
+        if (size < 2 || size > UINT_MAX/sizeof(unsigned long))
                return 0;
        *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
@@ -494,7 +494,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
                /*      remember to free obj->oid */
                rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
                if (rc) {
-                        if ((tag == ASN1_OJI) && (cls == ASN1_PRI)) {
+                        if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
                                rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
                                if (rc) {
                                        rc = compare_oid(oid, oidlen,
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index cc950f69e51e..688a2d42153f 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -107,9 +107,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
 #endif /* CONFIG_CIFS_DEBUG2 */
 #ifdef CONFIG_PROC_FS
-static int
+static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
-cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
-                     int count, int *eof, void *data)
 {
        struct list_head *tmp;
        struct list_head *tmp1;
@@ -117,23 +115,13 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
        struct cifsSesInfo *ses;
        struct cifsTconInfo *tcon;
        int i;
-        int length = 0;
-        char *original_buf = buf;
-        *beginBuffer = buf + offset;
+        seq_puts(m,
-        length =
-            sprintf(buf,
                    "Display Internal CIFS Data Structures for Debugging\n"
                    "---------------------------------------------------\n");
-        buf += length;
+        seq_printf(m, "CIFS Version %s\n", CIFS_VERSION);
-        length = sprintf(buf, "CIFS Version %s\n", CIFS_VERSION);
+        seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
-        buf += length;
+        seq_printf(m, "Servers:");
-        length = sprintf(buf,
-                "Active VFS Requests: %d\n", GlobalTotalActiveXid);
-        buf += length;
-        length = sprintf(buf, "Servers:");
-        buf += length;
        i = 0;
        read_lock(&GlobalSMBSeslock);
@@ -142,11 +130,10 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
                ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList);
                if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) ||
                   (ses->serverNOS == NULL)) {
-                        buf += sprintf(buf, "\nentry for %s not fully "
+                        seq_printf(m, "\nentry for %s not fully "
                                        "displayed\n\t", ses->serverName);
                } else {
-                        length =
+                        seq_printf(m,
-                            sprintf(buf,
                                    "\n%d) Name: %s  Domain: %s Mounts: %d OS:"
                                    " %s  \n\tNOS: %s\tCapability: 0x%x\n\tSMB"
                                    " session status: %d\t",
@@ -154,10 +141,9 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
                                atomic_read(&ses->inUse),
                                ses->serverOS, ses->serverNOS,
                                ses->capabilities, ses->status);
-                        buf += length;
                }
                if (ses->server) {
-                        buf += sprintf(buf, "TCP status: %d\n\tLocal Users To "
+                        seq_printf(m, "TCP status: %d\n\tLocal Users To "
                                    "Server: %d SecMode: 0x%x Req On Wire: %d",
                                ses->server->tcpStatus,
                                atomic_read(&ses->server->socketUseCount),
@@ -165,13 +151,12 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
                                atomic_read(&ses->server->inFlight));
 #ifdef CONFIG_CIFS_STATS2
-                        buf += sprintf(buf, " In Send: %d In MaxReq Wait: %d",
+                        seq_printf(m, " In Send: %d In MaxReq Wait: %d",
                                atomic_read(&ses->server->inSend),
                                atomic_read(&ses->server->num_waiters));
 #endif
-                        length = sprintf(buf, "\nMIDs:\n");
+                        seq_puts(m, "\nMIDs:\n");
-                        buf += length;
                        spin_lock(&GlobalMid_Lock);
                        list_for_each(tmp1, &ses->server->pending_mid_q) {
@@ -179,7 +164,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
                                        mid_q_entry,
                                        qhead);
                                if (mid_entry) {
-                                        length = sprintf(buf,
+                                        seq_printf(m,
                                                        "State: %d com: %d pid:"
                                                        " %d tsk: %p mid %d\n",
                                                        mid_entry->midState,
@@ -187,7 +172,6 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
                                                        mid_entry->pid,
                                                        mid_entry->tsk,
                                                        mid_entry->mid);
-                                        buf += length;
                                }
                        }
                        spin_unlock(&GlobalMid_Lock);
@@ -195,11 +179,9 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
        }
        read_unlock(&GlobalSMBSeslock);
-        sprintf(buf, "\n");
+        seq_putc(m, '\n');
-        buf++;
-        length = sprintf(buf, "Shares:");
+        seq_puts(m, "Shares:");
-        buf += length;
        i = 0;
        read_lock(&GlobalSMBSeslock);
@@ -208,62 +190,52 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
                i++;
                tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
                dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
-                length = sprintf(buf, "\n%d) %s Uses: %d ", i,
+                seq_printf(m, "\n%d) %s Uses: %d ", i,
                                 tcon->treeName, atomic_read(&tcon->useCount));
-                buf += length;
                if (tcon->nativeFileSystem) {
-                        length = sprintf(buf, "Type: %s ",
+                        seq_printf(m, "Type: %s ",
                                         tcon->nativeFileSystem);
-                        buf += length;
                }
-                length = sprintf(buf, "DevInfo: 0x%x Attributes: 0x%x"
+                seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x"
                                 "\nPathComponentMax: %d Status: %d",
                            le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
                            le32_to_cpu(tcon->fsAttrInfo.Attributes),
                            le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
                            tcon->tidStatus);
-                buf += length;
                if (dev_type == FILE_DEVICE_DISK)
-                        length = sprintf(buf, " type: DISK ");
+                        seq_puts(m, " type: DISK ");
                else if (dev_type == FILE_DEVICE_CD_ROM)
-                        length = sprintf(buf, " type: CDROM ");
+                        seq_puts(m, " type: CDROM ");
                else
-                        length =
+                        seq_printf(m, " type: %d ", dev_type);
-                            sprintf(buf, " type: %d ", dev_type);
-                buf += length;
+                if (tcon->tidStatus == CifsNeedReconnect)
-                if (tcon->tidStatus == CifsNeedReconnect) {
+                        seq_puts(m, "\tDISCONNECTED ");
-                        buf += sprintf(buf, "\tDISCONNECTED ");
-                        length += 14;
-                }
        }
        read_unlock(&GlobalSMBSeslock);
-        length = sprintf(buf, "\n");
+        seq_putc(m, '\n');
-        buf += length;
        /* BB add code to dump additional info such as TCP session info now */
-        /* Now calculate total size of returned data */
+        return 0;
-        length = buf - original_buf;
+}
-        if (offset + count >= length)
-                *eof = 1;
-        if (length < offset) {
-                *eof = 1;
-                return 0;
-        } else {
-                length = length - offset;
-        }
-        if (length > count)
-                length = count;
-        return length;
+static int cifs_debug_data_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, cifs_debug_data_proc_show, NULL);
 }
-#ifdef CONFIG_CIFS_STATS
+static const struct file_operations cifs_debug_data_proc_fops = {
+        .owner          = THIS_MODULE,
+        .open           = cifs_debug_data_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
-static int
+#ifdef CONFIG_CIFS_STATS
-cifs_stats_write(struct file *file, const char __user *buffer,
+static ssize_t cifs_stats_proc_write(struct file *file,
-                 unsigned long count, void *data)
+                const char __user *buffer, size_t count, loff_t *ppos)
 {
        char c;
        int rc;
@@ -307,236 +279,132 @@ cifs_stats_write(struct file *file, const char __user *buffer,
        return count;
 }
-static int
+static int cifs_stats_proc_show(struct seq_file *m, void *v)
-cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
-                  int count, int *eof, void *data)
 {
-        int item_length, i, length;
+        int i;
        struct list_head *tmp;
        struct cifsTconInfo *tcon;
-        *beginBuffer = buf + offset;
+        seq_printf(m,
-        length = sprintf(buf,
                        "Resources in use\nCIFS Session: %d\n",
                        sesInfoAllocCount.counter);
-        buf += length;
+        seq_printf(m, "Share (unique mount targets): %d\n",
-        item_length =
-                sprintf(buf, "Share (unique mount targets): %d\n",
                        tconInfoAllocCount.counter);
-        length += item_length;
+        seq_printf(m, "SMB Request/Response Buffer: %d Pool size: %d\n",
-        buf += item_length;
-        item_length =
-                sprintf(buf, "SMB Request/Response Buffer: %d Pool size: %d\n",
                        bufAllocCount.counter,
                        cifs_min_rcv + tcpSesAllocCount.counter);
-        length += item_length;
+        seq_printf(m, "SMB Small Req/Resp Buffer: %d Pool size: %d\n",
-        buf += item_length;
-        item_length =
-                sprintf(buf, "SMB Small Req/Resp Buffer: %d Pool size: %d\n",
                        smBufAllocCount.counter, cifs_min_small);
-        length += item_length;
-        buf += item_length;
 #ifdef CONFIG_CIFS_STATS2
-        item_length = sprintf(buf, "Total Large %d Small %d Allocations\n",
+        seq_printf(m, "Total Large %d Small %d Allocations\n",
                                atomic_read(&totBufAllocCount),
                                atomic_read(&totSmBufAllocCount));
-        length += item_length;
-        buf += item_length;
 #endif /* CONFIG_CIFS_STATS2 */
-        item_length =
+        seq_printf(m, "Operations (MIDs): %d\n", midCount.counter);
-                sprintf(buf, "Operations (MIDs): %d\n",
+        seq_printf(m,
-                        midCount.counter);
-        length += item_length;
-        buf += item_length;
-        item_length = sprintf(buf,
                "\n%d session %d share reconnects\n",
                tcpSesReconnectCount.counter, tconInfoReconnectCount.counter);
-        length += item_length;
-        buf += item_length;
-        item_length = sprintf(buf,
+        seq_printf(m,
                "Total vfs operations: %d maximum at one time: %d\n",
                GlobalCurrentXid, GlobalMaxActiveXid);
-        length += item_length;
-        buf += item_length;
        i = 0;
        read_lock(&GlobalSMBSeslock);
        list_for_each(tmp, &GlobalTreeConnectionList) {
                i++;
                tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
-                item_length = sprintf(buf, "\n%d) %s", i, tcon->treeName);
+                seq_printf(m, "\n%d) %s", i, tcon->treeName);
-                buf += item_length;
+                if (tcon->tidStatus == CifsNeedReconnect)
-                length += item_length;
+                        seq_puts(m, "\tDISCONNECTED ");
-                if (tcon->tidStatus == CifsNeedReconnect) {
+                seq_printf(m, "\nSMBs: %d Oplock Breaks: %d",
-                        buf += sprintf(buf, "\tDISCONNECTED ");
-                        length += 14;
-                }
-                item_length = sprintf(buf, "\nSMBs: %d Oplock Breaks: %d",
                        atomic_read(&tcon->num_smbs_sent),
                        atomic_read(&tcon->num_oplock_brks));
-                buf += item_length;
+                seq_printf(m, "\nReads:  %d Bytes: %lld",
-                length += item_length;
-                item_length = sprintf(buf, "\nReads:  %d Bytes: %lld",
                        atomic_read(&tcon->num_reads),
                        (long long)(tcon->bytes_read));
-                buf += item_length;
+                seq_printf(m, "\nWrites: %d Bytes: %lld",
-                length += item_length;
-                item_length = sprintf(buf, "\nWrites: %d Bytes: %lld",
                        atomic_read(&tcon->num_writes),
                        (long long)(tcon->bytes_written));
-                buf += item_length;
+                seq_printf(m,
-                length += item_length;
-                item_length = sprintf(buf,
                        "\nLocks: %d HardLinks: %d Symlinks: %d",
                        atomic_read(&tcon->num_locks),
                        atomic_read(&tcon->num_hardlinks),
                        atomic_read(&tcon->num_symlinks));
-                buf += item_length;
-                length += item_length;
-                item_length = sprintf(buf, "\nOpens: %d Closes: %d Deletes: %d",
+                seq_printf(m, "\nOpens: %d Closes: %d Deletes: %d",
                        atomic_read(&tcon->num_opens),
                        atomic_read(&tcon->num_closes),
                        atomic_read(&tcon->num_deletes));
-                buf += item_length;
+                seq_printf(m, "\nMkdirs: %d Rmdirs: %d",
-                length += item_length;
-                item_length = sprintf(buf, "\nMkdirs: %d Rmdirs: %d",
                        atomic_read(&tcon->num_mkdirs),
                        atomic_read(&tcon->num_rmdirs));
-                buf += item_length;
+                seq_printf(m, "\nRenames: %d T2 Renames %d",
-                length += item_length;
-                item_length = sprintf(buf, "\nRenames: %d T2 Renames %d",
                        atomic_read(&tcon->num_renames),
                        atomic_read(&tcon->num_t2renames));
-                buf += item_length;
+                seq_printf(m, "\nFindFirst: %d FNext %d FClose %d",
-                length += item_length;
-                item_length = sprintf(buf, "\nFindFirst: %d FNext %d FClose %d",
                        atomic_read(&tcon->num_ffirst),
                        atomic_read(&tcon->num_fnext),
                        atomic_read(&tcon->num_fclose));
-                buf += item_length;
-                length += item_length;
        }
        read_unlock(&GlobalSMBSeslock);
-        buf += sprintf(buf, "\n");
+        seq_putc(m, '\n');
-        length++;
+        return 0;
+}
-        if (offset + count >= length)
-                *eof = 1;
-        if (length < offset) {
-                *eof = 1;
-                return 0;
-        } else {
-                length = length - offset;
-        }
-        if (length > count)
-                length = count;
-        return length;
+static int cifs_stats_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, cifs_stats_proc_show, NULL);
 }
+static const struct file_operations cifs_stats_proc_fops = {
+        .owner          = THIS_MODULE,
+        .open           = cifs_stats_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+        .write          = cifs_stats_proc_write,
+};
 #endif /* STATS */
 static struct proc_dir_entry *proc_fs_cifs;
-read_proc_t cifs_txanchor_read;
+static const struct file_operations cifsFYI_proc_fops;
-static read_proc_t cifsFYI_read;
+static const struct file_operations cifs_oplock_proc_fops;
-static write_proc_t cifsFYI_write;
+static const struct file_operations cifs_lookup_cache_proc_fops;
-static read_proc_t oplockEnabled_read;
+static const struct file_operations traceSMB_proc_fops;
-static write_proc_t oplockEnabled_write;
+static const struct file_operations cifs_multiuser_mount_proc_fops;
-static read_proc_t lookupFlag_read;
+static const struct file_operations cifs_security_flags_proc_fops;
-static write_proc_t lookupFlag_write;
+static const struct file_operations cifs_experimental_proc_fops;
-static read_proc_t traceSMB_read;
+static const struct file_operations cifs_linux_ext_proc_fops;
-static write_proc_t traceSMB_write;
-static read_proc_t multiuser_mount_read;
-static write_proc_t multiuser_mount_write;
-static read_proc_t security_flags_read;
-static write_proc_t security_flags_write;
-/* static read_proc_t ntlmv2_enabled_read;
-static write_proc_t ntlmv2_enabled_write;
-static read_proc_t packet_signing_enabled_read;
-static write_proc_t packet_signing_enabled_write;*/
-static read_proc_t experimEnabled_read;
-static write_proc_t experimEnabled_write;
-static read_proc_t linuxExtensionsEnabled_read;
-static write_proc_t linuxExtensionsEnabled_write;
 void
 cifs_proc_init(void)
 {
-        struct proc_dir_entry *pde;
        proc_fs_cifs = proc_mkdir("fs/cifs", NULL);
        if (proc_fs_cifs == NULL)
                return;
        proc_fs_cifs->owner = THIS_MODULE;
-        create_proc_read_entry("DebugData", 0, proc_fs_cifs,
+        proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops);
-                                cifs_debug_data_read, NULL);
 #ifdef CONFIG_CIFS_STATS
-        pde = create_proc_read_entry("Stats", 0, proc_fs_cifs,
+        proc_create("Stats", 0, proc_fs_cifs, &cifs_stats_proc_fops);
-                                cifs_stats_read, NULL);
-        if (pde)
-                pde->write_proc = cifs_stats_write;
 #endif /* STATS */
-        pde = create_proc_read_entry("cifsFYI", 0, proc_fs_cifs,
+        proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops);
-                                cifsFYI_read, NULL);
+        proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
-        if (pde)
+        proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops);
-                pde->write_proc = cifsFYI_write;
+        proc_create("Experimental", 0, proc_fs_cifs,
+                    &cifs_experimental_proc_fops);
-        pde =
+        proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
-            create_proc_read_entry("traceSMB", 0, proc_fs_cifs,
+                    &cifs_linux_ext_proc_fops);
-                                traceSMB_read, NULL);
+        proc_create("MultiuserMount", 0, proc_fs_cifs,
-        if (pde)
+                    &cifs_multiuser_mount_proc_fops);
-                pde->write_proc = traceSMB_write;
+        proc_create("SecurityFlags", 0, proc_fs_cifs,
+                    &cifs_security_flags_proc_fops);
-        pde = create_proc_read_entry("OplockEnabled", 0, proc_fs_cifs,
+        proc_create("LookupCacheEnabled", 0, proc_fs_cifs,
-                                oplockEnabled_read, NULL);
+                    &cifs_lookup_cache_proc_fops);
-        if (pde)
-                pde->write_proc = oplockEnabled_write;
-        pde = create_proc_read_entry("Experimental", 0, proc_fs_cifs,
-                                experimEnabled_read, NULL);
-        if (pde)
-                pde->write_proc = experimEnabled_write;
-        pde = create_proc_read_entry("LinuxExtensionsEnabled", 0, proc_fs_cifs,
-                                linuxExtensionsEnabled_read, NULL);
-        if (pde)
-                pde->write_proc = linuxExtensionsEnabled_write;
-        pde =
-            create_proc_read_entry("MultiuserMount", 0, proc_fs_cifs,
-                                multiuser_mount_read, NULL);
-        if (pde)
-                pde->write_proc = multiuser_mount_write;
-        pde =
-            create_proc_read_entry("SecurityFlags", 0, proc_fs_cifs,
-                                security_flags_read, NULL);
-        if (pde)
-                pde->write_proc = security_flags_write;
-        pde =
-        create_proc_read_entry("LookupCacheEnabled", 0, proc_fs_cifs,
-                                lookupFlag_read, NULL);
-        if (pde)
-                pde->write_proc = lookupFlag_write;
-/*      pde =
-            create_proc_read_entry("NTLMV2Enabled", 0, proc_fs_cifs,
-                                ntlmv2_enabled_read, NULL);
-        if (pde)
-                pde->write_proc = ntlmv2_enabled_write;
-        pde =
-            create_proc_read_entry("PacketSigningEnabled", 0, proc_fs_cifs,
-                                packet_signing_enabled_read, NULL);
-        if (pde)
-                pde->write_proc = packet_signing_enabled_write;*/
 }
 void
@@ -553,39 +421,26 @@ cifs_proc_clean(void)
 #endif
        remove_proc_entry("MultiuserMount", proc_fs_cifs);
        remove_proc_entry("OplockEnabled", proc_fs_cifs);
-/*      remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */
        remove_proc_entry("SecurityFlags", proc_fs_cifs);
-/*      remove_proc_entry("PacketSigningEnabled", proc_fs_cifs); */
        remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
        remove_proc_entry("Experimental", proc_fs_cifs);
        remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
        remove_proc_entry("fs/cifs", NULL);
 }
-static int
+static int cifsFYI_proc_show(struct seq_file *m, void *v)
-cifsFYI_read(char *page, char **start, off_t off, int count,
-             int *eof, void *data)
 {
-        int len;
+        seq_printf(m, "%d\n", cifsFYI);
+        return 0;
-        len = sprintf(page, "%d\n", cifsFYI);
+}
-        len -= off;
-        *start = page + off;
-        if (len > count)
-                len = count;
-        else
-                *eof = 1;
-        if (len < 0)
-                len = 0;
-        return len;
+static int cifsFYI_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, cifsFYI_proc_show, NULL);
 }
-static int
-cifsFYI_write(struct file *file, const char __user *buffer,
+static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer,
-              unsigned long count, void *data)
+                size_t count, loff_t *ppos)
 {
        char c;
        int rc;
@@ -603,30 +458,28 @@ cifsFYI_write(struct file *file, const char __user *buffer,
        return count;
 }
-static int
+static const struct file_operations cifsFYI_proc_fops = {
-oplockEnabled_read(char *page, char **start, off_t off,
+        .owner          = THIS_MODULE,
-                   int count, int *eof, void *data)
+        .open           = cifsFYI_proc_open,
-{
+        .read           = seq_read,
-        int len;
+        .llseek         = seq_lseek,
+        .release        = single_release,
-        len = sprintf(page, "%d\n", oplockEnabled);
+        .write          = cifsFYI_proc_write,
+};
-        len -= off;
-        *start = page + off;
-        if (len > count)
-                len = count;
-        else
-                *eof = 1;
-        if (len < 0)
+static int cifs_oplock_proc_show(struct seq_file *m, void *v)
-                len = 0;
+{
+        seq_printf(m, "%d\n", oplockEnabled);
+        return 0;
+}
-        return len;
+static int cifs_oplock_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, cifs_oplock_proc_show, NULL);
 }
-static int
-oplockEnabled_write(struct file *file, const char __user *buffer,
+static ssize_t cifs_oplock_proc_write(struct file *file,
-                    unsigned long count, void *data)
+                const char __user *buffer, size_t count, loff_t *ppos)
 {
        char c;
        int rc;
@@ -642,30 +495,28 @@ oplockEnabled_write(struct file *file, const char __user *buffer,
        return count;
 }
-static int
+static const struct file_operations cifs_oplock_proc_fops = {
-experimEnabled_read(char *page, char **start, off_t off,
+        .owner          = THIS_MODULE,
-                    int count, int *eof, void *data)
+        .open           = cifs_oplock_proc_open,
-{
+        .read           = seq_read,
-        int len;
+        .llseek         = seq_lseek,
+        .release        = single_release,
-        len = sprintf(page, "%d\n", experimEnabled);
+        .write          = cifs_oplock_proc_write,
+};
-        len -= off;
-        *start = page + off;
-        if (len > count)
+static int cifs_experimental_proc_show(struct seq_file *m, void *v)
-                len = count;
+{
-        else
+        seq_printf(m, "%d\n", experimEnabled);
-                *eof = 1;
+        return 0;
+}
-        if (len < 0)
-                len = 0;
-        return len;
+static int cifs_experimental_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, cifs_experimental_proc_show, NULL);
 }
-static int
-experimEnabled_write(struct file *file, const char __user *buffer,
+static ssize_t cifs_experimental_proc_write(struct file *file,
-                     unsigned long count, void *data)
+                const char __user *buffer, size_t count, loff_t *ppos)
 {
        char c;
        int rc;
@@ -683,29 +534,28 @@ experimEnabled_write(struct file *file, const char __user *buffer,
        return count;
 }
-static int
+static const struct file_operations cifs_experimental_proc_fops = {
-linuxExtensionsEnabled_read(char *page, char **start, off_t off,
+        .owner          = THIS_MODULE,
-                            int count, int *eof, void *data)
+        .open           = cifs_experimental_proc_open,
-{
+        .read           = seq_read,
-        int len;
+        .llseek         = seq_lseek,
+        .release        = single_release,
-        len = sprintf(page, "%d\n", linuxExtEnabled);
+        .write          = cifs_experimental_proc_write,
-        len -= off;
+};
-        *start = page + off;
-        if (len > count)
+static int cifs_linux_ext_proc_show(struct seq_file *m, void *v)
-                len = count;
+{
-        else
+        seq_printf(m, "%d\n", linuxExtEnabled);
-                *eof = 1;
+        return 0;
+}
-        if (len < 0)
-                len = 0;
-        return len;
+static int cifs_linux_ext_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, cifs_linux_ext_proc_show, NULL);
 }
-static int
-linuxExtensionsEnabled_write(struct file *file, const char __user *buffer,
+static ssize_t cifs_linux_ext_proc_write(struct file *file,
-                             unsigned long count, void *data)
+                const char __user *buffer, size_t count, loff_t *ppos)
 {
        char c;
        int rc;
@@ -721,31 +571,28 @@ linuxExtensionsEnabled_write(struct file *file, const char __user *buffer,
        return count;
 }
+static const struct file_operations cifs_linux_ext_proc_fops = {
+        .owner          = THIS_MODULE,
+        .open           = cifs_linux_ext_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+        .write          = cifs_linux_ext_proc_write,
+};
-static int
+static int cifs_lookup_cache_proc_show(struct seq_file *m, void *v)
-lookupFlag_read(char *page, char **start, off_t off,
-                int count, int *eof, void *data)
 {
-        int len;
+        seq_printf(m, "%d\n", lookupCacheEnabled);
+        return 0;
-        len = sprintf(page, "%d\n", lookupCacheEnabled);
+}
-        len -= off;
-        *start = page + off;
-        if (len > count)
-                len = count;
-        else
-                *eof = 1;
-        if (len < 0)
-                len = 0;
-        return len;
+static int cifs_lookup_cache_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, cifs_lookup_cache_proc_show, NULL);
 }
-static int
-lookupFlag_write(struct file *file, const char __user *buffer,
+static ssize_t cifs_lookup_cache_proc_write(struct file *file,
-                    unsigned long count, void *data)
+                const char __user *buffer, size_t count, loff_t *ppos)
 {
        char c;
        int rc;
@@ -760,30 +607,29 @@ lookupFlag_write(struct file *file, const char __user *buffer,
        return count;
 }
-static int
-traceSMB_read(char *page, char **start, off_t off, int count,
-              int *eof, void *data)
-{
-        int len;
-        len = sprintf(page, "%d\n", traceSMB);
-        len -= off;
-        *start = page + off;
-        if (len > count)
+static const struct file_operations cifs_lookup_cache_proc_fops = {
-                len = count;
+        .owner          = THIS_MODULE,
-        else
+        .open           = cifs_lookup_cache_proc_open,
-                *eof = 1;
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+        .write          = cifs_lookup_cache_proc_write,
+};
-        if (len < 0)
+static int traceSMB_proc_show(struct seq_file *m, void *v)
-                len = 0;
+{
+        seq_printf(m, "%d\n", traceSMB);
+        return 0;
+}
-        return len;
+static int traceSMB_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, traceSMB_proc_show, NULL);
 }
-static int
-traceSMB_write(struct file *file, const char __user *buffer,
+static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer,
-               unsigned long count, void *data)
+                size_t count, loff_t *ppos)
 {
        char c;
        int rc;
@@ -799,30 +645,28 @@ traceSMB_write(struct file *file, const char __user *buffer,
        return count;
 }
-static int
+static const struct file_operations traceSMB_proc_fops = {
-multiuser_mount_read(char *page, char **start, off_t off,
+        .owner          = THIS_MODULE,
-                     int count, int *eof, void *data)
+        .open           = traceSMB_proc_open,
-{
+        .read           = seq_read,
-        int len;
+        .llseek         = seq_lseek,
+        .release        = single_release,
-        len = sprintf(page, "%d\n", multiuser_mount);
+        .write          = traceSMB_proc_write,
+};
-        len -= off;
-        *start = page + off;
-        if (len > count)
+static int cifs_multiuser_mount_proc_show(struct seq_file *m, void *v)
-                len = count;
+{
-        else
+        seq_printf(m, "%d\n", multiuser_mount);
-                *eof = 1;
+        return 0;
+}
-        if (len < 0)
-                len = 0;
-        return len;
+static int cifs_multiuser_mount_proc_open(struct inode *inode, struct file *fh)
+{
+        return single_open(fh, cifs_multiuser_mount_proc_show, NULL);
 }
-static int
-multiuser_mount_write(struct file *file, const char __user *buffer,
+static ssize_t cifs_multiuser_mount_proc_write(struct file *file,
-                      unsigned long count, void *data)
+                const char __user *buffer, size_t count, loff_t *ppos)
 {
        char c;
        int rc;
@@ -838,30 +682,28 @@ multiuser_mount_write(struct file *file, const char __user *buffer,
        return count;
 }
-static int
+static const struct file_operations cifs_multiuser_mount_proc_fops = {
-security_flags_read(char *page, char **start, off_t off,
+        .owner          = THIS_MODULE,
-                       int count, int *eof, void *data)
+        .open           = cifs_multiuser_mount_proc_open,
-{
+        .read           = seq_read,
-        int len;
+        .llseek         = seq_lseek,
+        .release        = single_release,
-        len = sprintf(page, "0x%x\n", extended_security);
+        .write          = cifs_multiuser_mount_proc_write,
+};
-        len -= off;
-        *start = page + off;
-        if (len > count)
+static int cifs_security_flags_proc_show(struct seq_file *m, void *v)
-                len = count;
+{
-        else
+        seq_printf(m, "0x%x\n", extended_security);
-                *eof = 1;
+        return 0;
+}
-        if (len < 0)
-                len = 0;
-        return len;
+static int cifs_security_flags_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, cifs_security_flags_proc_show, NULL);
 }
-static int
-security_flags_write(struct file *file, const char __user *buffer,
+static ssize_t cifs_security_flags_proc_write(struct file *file,
-                        unsigned long count, void *data)
+                const char __user *buffer, size_t count, loff_t *ppos)
 {
        unsigned int flags;
        char flags_string[12];
@@ -917,6 +759,15 @@ security_flags_write(struct file *file, const char __user *buffer,
        /* BB should we turn on MAY flags for other MUST options? */
        return count;
 }
+static const struct file_operations cifs_security_flags_proc_fops = {
+        .owner          = THIS_MODULE,
+        .open           = cifs_security_flags_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+        .write          = cifs_security_flags_proc_write,
+};
 #else
 inline void cifs_proc_init(void)
 {
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 0e9fc2ba90ee..57ecdc83c26f 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -56,7 +56,7 @@ int match_sid(struct cifs_sid *ctsid)
        struct cifs_sid *cwsid;
        if (!ctsid)
-                return (-1);
+                return -1;
        for (i = 0; i < NUM_WK_SIDS; ++i) {
                cwsid = &(wksidarr[i].cifssid);
@@ -87,11 +87,11 @@ int match_sid(struct cifs_sid *ctsid)
                }
                cFYI(1, ("matching sid: %s\n", wksidarr[i].sidname));
-                return (0); /* sids compare/match */
+                return 0; /* sids compare/match */
        }
        cFYI(1, ("No matching sid"));
-        return (-1);
+        return -1;
 }
 /* if the two SIDs (roughly equivalent to a UUID for a user or group) are
@@ -102,16 +102,16 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
        int num_subauth, num_sat, num_saw;
        if ((!ctsid) || (!cwsid))
-                return (0);
+                return 0;
        /* compare the revision */
        if (ctsid->revision != cwsid->revision)
-                return (0);
+                return 0;
        /* compare all of the six auth values */
        for (i = 0; i < 6; ++i) {
                if (ctsid->authority[i] != cwsid->authority[i])
-                        return (0);
+                        return 0;
        }
        /* compare all of the subauth values if any */
@@ -121,11 +121,11 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
        if (num_subauth) {
                for (i = 0; i < num_subauth; ++i) {
                        if (ctsid->sub_auth[i] != cwsid->sub_auth[i])
-                                return (0);
+                                return 0;
                }
        }
-        return (1); /* sids compare/match */
+        return 1; /* sids compare/match */
 }
@@ -169,8 +169,7 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
        for (i = 0; i < 6; i++)
                ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
        for (i = 0; i < 5; i++)
-                ngroup_sid_ptr->sub_auth[i] =
+                ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
-                                cpu_to_le32(group_sid_ptr->sub_auth[i]);
        return;
 }
@@ -285,7 +284,7 @@ static __u16 fill_ace_for_sid(struct cifs_ace *pntace,
        size = 1 + 1 + 2 + 4 + 1 + 1 + 6 + (psid->num_subauth * 4);
        pntace->size = cpu_to_le16(size);
-        return (size);
+        return size;
 }
@@ -426,7 +425,7 @@ static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
        pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl));
        pndacl->num_aces = cpu_to_le32(3);
-        return (0);
+        return 0;
 }
@@ -510,7 +509,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
                        sizeof(struct cifs_sid)); */
-        return (0);
+        return 0;
 }
@@ -527,7 +526,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
        struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */
        if ((inode == NULL) || (pntsd == NULL) || (pnntsd == NULL))
-                return (-EIO);
+                return -EIO;
        owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
                                le32_to_cpu(pntsd->osidoffset));
@@ -550,7 +549,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
        /* copy security descriptor control portion and owner and group sid */
        copy_sec_desc(pntsd, pnntsd, sidsoffset);
-        return (rc);
+        return rc;
 }
@@ -629,11 +628,11 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
        cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode));
        if (!inode)
-                return (rc);
+                return rc;
        sb = inode->i_sb;
        if (sb == NULL)
-                return (rc);
+                return rc;
        cifs_sb = CIFS_SB(sb);
        xid = GetXid();
@@ -652,7 +651,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
                if (rc != 0) {
                        cERROR(1, ("Unable to open file to set ACL"));
                        FreeXid(xid);
-                        return (rc);
+                        return rc;
                }
        }
@@ -665,7 +664,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
        FreeXid(xid);
-        return (rc);
+        return rc;
 }
 /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
@@ -715,7 +714,7 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
                if (!pnntsd) {
                        cERROR(1, ("Unable to allocate security descriptor"));
                        kfree(pntsd);
-                        return (-ENOMEM);
+                        return -ENOMEM;
                }
                rc = build_sec_desc(pntsd, pnntsd, inode, nmode);
@@ -732,6 +731,6 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
                kfree(pntsd);
        }
-        return (rc);
+        return rc;
 }
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 4ff8939c6cc7..83fd40dc1ef0 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -310,9 +310,8 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key)
        utf8 and other multibyte codepages each need their own strupper
        function since a byte at a time will ont work. */
-        for (i = 0; i < CIFS_ENCPWD_SIZE; i++) {
+        for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
                password_with_pad[i] = toupper(password_with_pad[i]);
-        }
        SMBencrypt(password_with_pad, ses->server->cryptKey, lnm_session_key);
        /* clear password before we return/free memory */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 22857c639df5..1ec7076f7b24 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -267,7 +267,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
        return 0;
 }
-static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int cifs_permission(struct inode *inode, int mask)
 {
        struct cifs_sb_info *cifs_sb;
@@ -766,7 +766,7 @@ const struct file_operations cifs_dir_ops = {
 };
 static void
-cifs_init_once(struct kmem_cache *cachep, void *inode)
+cifs_init_once(void *inode)
 {
        struct cifsInodeInfo *cifsi = inode;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 9cfcf326ead3..7e1cf262effe 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -27,7 +27,7 @@
 #define MAX_SES_INFO 2
 #define MAX_TCON_INFO 4
-#define MAX_TREE_SIZE 2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1
+#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
 #define MAX_SERVER_SIZE 15
 #define MAX_SHARE_SIZE  64      /* used to be 20, this should still be enough */
 #define MAX_USERNAME_SIZE 32    /* 32 is to allow for 15 char names + null
@@ -537,8 +537,8 @@ require use of the stronger protocol */
 #endif /* WEAK_PW_HASH */
 #define   CIFSSEC_MUST_SEAL     0x40040 /* not supported yet */
-#define   CIFSSEC_DEF  CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2
+#define   CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2)
-#define   CIFSSEC_MAX  CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2
+#define   CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2)
 #define   CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5)
 /*
 *****************************************************************
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 0f327c224da3..409abce12732 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -31,7 +31,7 @@
 #else
 #define CIFS_PROT   0
 #endif
-#define POSIX_PROT  CIFS_PROT+1
+#define POSIX_PROT  (CIFS_PROT+1)
 #define BAD_PROT 0xFFFF
 /* SMB command codes */
@@ -341,7 +341,7 @@
 #define CREATE_COMPLETE_IF_OPLK 0x00000100      /* should be zero */
 #define CREATE_NO_EA_KNOWLEDGE  0x00000200
 #define CREATE_EIGHT_DOT_THREE  0x00000400      /* doc says this is obsolete
-                                                 "open for recovery" flag - should
+                                                 "open for recovery" flag should
                                                 be zero in any case */
 #define CREATE_OPEN_FOR_RECOVERY 0x00000400
 #define CREATE_RANDOM_ACCESS    0x00000800
@@ -414,8 +414,8 @@ struct smb_hdr {
        __u8 WordCount;
 } __attribute__((packed));
 /* given a pointer to an smb_hdr retrieve the value of byte count */
-#define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
+#define BCC(smb_var) (*(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
-#define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
+#define BCC_LE(smb_var) (*(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
 /* given a pointer to an smb_hdr retrieve the pointer to the byte area */
 #define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount) + 2)
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4511b708f0f3..c621ffa2ca90 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -686,11 +686,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
                                                 SecurityBlob,
                                                 count - 16,
                                                 &server->secType);
-                        if (rc == 1) {
+                        if (rc == 1)
                                rc = 0;
-                        } else {
+                        else
                                rc = -EINVAL;
-                        }
                }
        } else
                server->capabilities &= ~CAP_EXTENDED_SECURITY;
@@ -3914,7 +3913,10 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
        bool is_unicode;
        struct dfs_referral_level_3 *ref;
-        is_unicode = pSMBr->hdr.Flags2 & SMBFLG2_UNICODE;
+        if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
+                is_unicode = true;
+        else
+                is_unicode = false;
        *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals);
        if (*num_of_nodes < 1) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e8fa46c7cff2..b51d5777cde6 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -455,7 +455,7 @@ incomplete_rcv:
                /* Note that FC 1001 length is big endian on the wire,
                but we convert it here so it is always manipulated
                as host byte order */
-                pdu_length = ntohl(smb_buffer->smb_buf_length);
+                pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length);
                smb_buffer->smb_buf_length = pdu_length;
                cFYI(1, ("rfc1002 length 0x%x", pdu_length+4));
@@ -1461,6 +1461,39 @@ get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path,
        return rc;
 }
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key cifs_key[2];
+static struct lock_class_key cifs_slock_key[2];
+static inline void
+cifs_reclassify_socket4(struct socket *sock)
+{
+        struct sock *sk = sock->sk;
+        BUG_ON(sock_owned_by_user(sk));
+        sock_lock_init_class_and_name(sk, "slock-AF_INET-CIFS",
+                &cifs_slock_key[0], "sk_lock-AF_INET-CIFS", &cifs_key[0]);
+}
+static inline void
+cifs_reclassify_socket6(struct socket *sock)
+{
+        struct sock *sk = sock->sk;
+        BUG_ON(sock_owned_by_user(sk));
+        sock_lock_init_class_and_name(sk, "slock-AF_INET6-CIFS",
+                &cifs_slock_key[1], "sk_lock-AF_INET6-CIFS", &cifs_key[1]);
+}
+#else
+static inline void
+cifs_reclassify_socket4(struct socket *sock)
+{
+}
+static inline void
+cifs_reclassify_socket6(struct socket *sock)
+{
+}
+#endif
 /* See RFC1001 section 14 on representation of Netbios names */
 static void rfc1002mangle(char *target, char *source, unsigned int length)
 {
@@ -1495,6 +1528,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
                /* BB other socket options to set KEEPALIVE, NODELAY? */
                        cFYI(1, ("Socket created"));
                        (*csocket)->sk->sk_allocation = GFP_NOFS;
+                        cifs_reclassify_socket4(*csocket);
                }
        }
@@ -1627,6 +1661,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
                /* BB other socket options to set KEEPALIVE, NODELAY? */
                         cFYI(1, ("ipv6 Socket created"));
                        (*csocket)->sk->sk_allocation = GFP_NOFS;
+                        cifs_reclassify_socket6(*csocket);
                }
        }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2e904bd111c8..46e54d39461d 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1413,6 +1413,82 @@ out_busy:
        return -ETXTBSY;
 }
+static int
+cifs_set_file_size(struct inode *inode, struct iattr *attrs,
+                   int xid, char *full_path)
+{
+        int rc;
+        struct cifsFileInfo *open_file;
+        struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+        struct cifsTconInfo *pTcon = cifs_sb->tcon;
+        /*
+         * To avoid spurious oplock breaks from server, in the case of
+         * inodes that we already have open, avoid doing path based
+         * setting of file size if we can do it by handle.
+         * This keeps our caching token (oplock) and avoids timeouts
+         * when the local oplock break takes longer to flush
+         * writebehind data than the SMB timeout for the SetPathInfo
+         * request would allow
+         */
+        open_file = find_writable_file(cifsInode);
+        if (open_file) {
+                __u16 nfid = open_file->netfid;
+                __u32 npid = open_file->pid;
+                rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid,
+                                        npid, false);
+                atomic_dec(&open_file->wrtPending);
+                cFYI(1, ("SetFSize for attrs rc = %d", rc));
+                if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
+                        unsigned int bytes_written;
+                        rc = CIFSSMBWrite(xid, pTcon, nfid, 0, attrs->ia_size,
+                                          &bytes_written, NULL, NULL, 1);
+                        cFYI(1, ("Wrt seteof rc %d", rc));
+                }
+        } else
+                rc = -EINVAL;
+        if (rc != 0) {
+                /* Set file size by pathname rather than by handle
+                   either because no valid, writeable file handle for
+                   it was found or because there was an error setting
+                   it by handle */
+                rc = CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size,
+                                   false, cifs_sb->local_nls,
+                                   cifs_sb->mnt_cifs_flags &
+                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
+                cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc));
+                if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
+                        __u16 netfid;
+                        int oplock = 0;
+                        rc = SMBLegacyOpen(xid, pTcon, full_path,
+                                FILE_OPEN, GENERIC_WRITE,
+                                CREATE_NOT_DIR, &netfid, &oplock, NULL,
+                                cifs_sb->local_nls,
+                                cifs_sb->mnt_cifs_flags &
+                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
+                        if (rc == 0) {
+                                unsigned int bytes_written;
+                                rc = CIFSSMBWrite(xid, pTcon, netfid, 0,
+                                                  attrs->ia_size,
+                                                  &bytes_written, NULL,
+                                                  NULL, 1);
+                                cFYI(1, ("wrt seteof rc %d", rc));
+                                CIFSSMBClose(xid, pTcon, netfid);
+                        }
+                }
+        }
+        if (rc == 0) {
+                rc = cifs_vmtruncate(inode, attrs->ia_size);
+                cifs_truncate_page(inode->i_mapping, inode->i_size);
+        }
+        return rc;
+}
 int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 {
        int xid;
@@ -1420,7 +1496,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
        struct cifsTconInfo *pTcon;
        char *full_path = NULL;
        int rc = -EACCES;
-        struct cifsFileInfo *open_file = NULL;
        FILE_BASIC_INFO time_buf;
        bool set_time = false;
        bool set_dosattr = false;
@@ -1472,78 +1547,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
        }
        if (attrs->ia_valid & ATTR_SIZE) {
-                /* To avoid spurious oplock breaks from server, in the case of
+                rc = cifs_set_file_size(inode, attrs, xid, full_path);
-                   inodes that we already have open, avoid doing path based
+                if (rc != 0)
-                   setting of file size if we can do it by handle.
-                   This keeps our caching token (oplock) and avoids timeouts
-                   when the local oplock break takes longer to flush
-                   writebehind data than the SMB timeout for the SetPathInfo
-                   request would allow */
-                open_file = find_writable_file(cifsInode);
-                if (open_file) {
-                        __u16 nfid = open_file->netfid;
-                        __u32 npid = open_file->pid;
-                        rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size,
-                                                nfid, npid, false);
-                        atomic_dec(&open_file->wrtPending);
-                        cFYI(1, ("SetFSize for attrs rc = %d", rc));
-                        if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
-                                unsigned int bytes_written;
-                                rc = CIFSSMBWrite(xid, pTcon,
-                                                  nfid, 0, attrs->ia_size,
-                                                  &bytes_written, NULL, NULL,
-                                                  1 /* 45 seconds */);
-                                cFYI(1, ("Wrt seteof rc %d", rc));
-                        }
-                } else
-                        rc = -EINVAL;
-                if (rc != 0) {
-                        /* Set file size by pathname rather than by handle
-                           either because no valid, writeable file handle for
-                           it was found or because there was an error setting
-                           it by handle */
-                        rc = CIFSSMBSetEOF(xid, pTcon, full_path,
-                                           attrs->ia_size, false,
-                                           cifs_sb->local_nls,
-                                           cifs_sb->mnt_cifs_flags &
-                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
-                        cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc));
-                        if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
-                                __u16 netfid;
-                                int oplock = 0;
-                                rc = SMBLegacyOpen(xid, pTcon, full_path,
-                                        FILE_OPEN, GENERIC_WRITE,
-                                        CREATE_NOT_DIR, &netfid, &oplock,
-                                        NULL, cifs_sb->local_nls,
-                                        cifs_sb->mnt_cifs_flags &
-                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
-                                if (rc == 0) {
-                                        unsigned int bytes_written;
-                                        rc = CIFSSMBWrite(xid, pTcon,
-                                                        netfid, 0,
-                                                        attrs->ia_size,
-                                                        &bytes_written, NULL,
-                                                        NULL, 1 /* 45 sec */);
-                                        cFYI(1, ("wrt seteof rc %d", rc));
-                                        CIFSSMBClose(xid, pTcon, netfid);
-                                }
-                        }
-                }
-                /* Server is ok setting allocation size implicitly - no need
-                   to call:
-                CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, true,
-                         cifs_sb->local_nls);
-                   */
-                if (rc == 0) {
-                        rc = cifs_vmtruncate(inode, attrs->ia_size);
-                        cifs_truncate_page(inode->i_mapping, inode->i_size);
-                } else
                        goto cifs_setattr_exit;
        }
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 83f306954883..5f40ed3473f5 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -690,6 +690,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
                        else
                                cifs_buf_release(cifsFile->srch_inf.
                                                ntwrk_buf_start);
+                        cifsFile->srch_inf.ntwrk_buf_start = NULL;
                }
                rc = initiate_cifs_search(xid, file);
                if (rc) {
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index e1c854890f94..bf4a3fd3c8e3 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -28,11 +28,9 @@ int coda_fake_statfs;
 char * coda_f2s(struct CodaFid *f)
 {
        static char s[60];
-#ifdef CONFIG_CODA_FS_OLD_API
-        sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]);
-#else
        sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]);
-#endif
        return s;
 }
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 3d2580e00a3e..c5916228243c 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -137,9 +137,11 @@ exit:
 }
-int coda_permission(struct inode *inode, int mask, struct nameidata *nd)
+int coda_permission(struct inode *inode, int mask)
 {
        int error = 0;
+        mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
 
        if (!mask)
                return 0; 
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 2f58dfc70083..830f51abb971 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -58,7 +58,7 @@ static void coda_destroy_inode(struct inode *inode)
        kmem_cache_free(coda_inode_cachep, ITOC(inode));
 }
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
        struct coda_inode_info *ei = (struct coda_inode_info *) foo;
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index c21a1f552a63..c51365422aa8 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,8 +24,7 @@
 #include <linux/coda_psdev.h>
 /* pioctl ops */
-static int coda_ioctl_permission(struct inode *inode, int mask,
+static int coda_ioctl_permission(struct inode *inode, int mask);
-                                 struct nameidata *nd);
 static int coda_pioctl(struct inode * inode, struct file * filp, 
                       unsigned int cmd, unsigned long user_data);
@@ -42,8 +41,7 @@ const struct file_operations coda_ioctl_operations = {
 };
 /* the coda pioctl inode ops */
-static int coda_ioctl_permission(struct inode *inode, int mask,
+static int coda_ioctl_permission(struct inode *inode, int mask)
-                                 struct nameidata *nd)
 {
        return 0;
 }
@@ -51,7 +49,7 @@ static int coda_ioctl_permission(struct inode *inode, int mask,
 static int coda_pioctl(struct inode * inode, struct file * filp, 
                       unsigned int cmd, unsigned long user_data)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
        struct PioctlData data;
        struct inode *target_inode = NULL;
@@ -66,21 +64,21 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
         * Look up the pathname. Note that the pathname is in 
         * user memory, and namei takes care of this
         */
-        if ( data.follow ) {
+        if (data.follow) {
-                error = user_path_walk(data.path, &nd);
+                error = user_path(data.path, &path);
        } else {
-                error = user_path_walk_link(data.path, &nd);
+                error = user_lpath(data.path, &path);
        }
                
        if ( error ) {
                return error;
        } else {
-                target_inode = nd.path.dentry->d_inode;
+                target_inode = path.dentry->d_inode;
        }
        
        /* return if it is not a Coda inode */
        if ( target_inode->i_sb != inode->i_sb ) {
-                path_put(&nd.path);
+                path_put(&path);
                return  -EINVAL;
        }
@@ -89,7 +87,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
        error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data);
-        path_put(&nd.path);
+        path_put(&path);
        return error;
 }
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index e3eb3556622b..0d9b80ec689c 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -362,8 +362,9 @@ static int init_coda_psdev(void)
                goto out_chrdev;
        }               
        for (i = 0; i < MAX_CODADEVS; i++)
-                device_create(coda_psdev_class, NULL,
+                device_create_drvdata(coda_psdev_class, NULL,
-                              MKDEV(CODA_PSDEV_MAJOR,i), "cfs%d", i);
+                                      MKDEV(CODA_PSDEV_MAJOR, i),
+                                      NULL, "cfs%d", i);
        coda_sysctl_init();
        goto out;
@@ -377,11 +378,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam");
 MODULE_DESCRIPTION("Coda Distributed File System VFS interface");
 MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR);
 MODULE_LICENSE("GPL");
-#ifdef CONFIG_CODA_FS_OLD_API
-MODULE_VERSION("5.3.21");
-#else
 MODULE_VERSION("6.6");
-#endif
 static int __init init_coda(void)
 {
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 359e531094dd..ce432bca95d1 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,12 +52,8 @@ static void *alloc_upcall(int opcode, int size)
        inp->ih.opcode = opcode;
        inp->ih.pid = current->pid;
        inp->ih.pgid = task_pgrp_nr(current);
-#ifdef CONFIG_CODA_FS_OLD_API
-        memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
-        inp->ih.cred.cr_fsuid = current->fsuid;
-#else
        inp->ih.uid = current->fsuid;
-#endif
        return (void*)inp;
 }
@@ -166,20 +162,11 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
        union inputArgs *inp;
        union outputArgs *outp;
        int insize, outsize, error;
-#ifdef CONFIG_CODA_FS_OLD_API
-        struct coda_cred cred = { 0, };
-        cred.cr_fsuid = uid;
-#endif
        
        insize = SIZE(release);
        UPARG(CODA_CLOSE);
        
-#ifdef CONFIG_CODA_FS_OLD_API
-        memcpy(&(inp->ih.cred), &cred, sizeof(cred));
-#else
        inp->ih.uid = uid;
-#endif
-        
        inp->coda_close.VFid = *fid;
        inp->coda_close.flags = flags;
diff --git a/fs/compat.c b/fs/compat.c
index ed43e17a5dc6..c9d1472e65c5 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -197,8 +197,8 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
 {
        
        if (sizeof ubuf->f_blocks == 4) {
-                if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) &
+                if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
-                    0xffffffff00000000ULL)
+                     kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
                        return -EOVERFLOW;
                /* f_files and f_ffree may be -1; it's okay
                 * to stuff that into 32 bits */
@@ -234,18 +234,18 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
 * The following statfs calls are copies of code from fs/open.c and
 * should be checked against those from time to time
 */
-asmlinkage long compat_sys_statfs(const char __user *path, struct compat_statfs __user *buf)
+asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
-        error = user_path_walk(path, &nd);
+        error = user_path(pathname, &path);
        if (!error) {
                struct kstatfs tmp;
-                error = vfs_statfs(nd.path.dentry, &tmp);
+                error = vfs_statfs(path.dentry, &tmp);
                if (!error)
                        error = put_compat_statfs(buf, &tmp);
-                path_put(&nd.path);
+                path_put(&path);
        }
        return error;
 }
@@ -271,8 +271,8 @@ out:
 static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf)
 {
        if (sizeof ubuf->f_blocks == 4) {
-                if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) &
+                if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
-                    0xffffffff00000000ULL)
+                     kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
                        return -EOVERFLOW;
                /* f_files and f_ffree may be -1; it's okay
                 * to stuff that into 32 bits */
@@ -299,21 +299,21 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
        return 0;
 }
-asmlinkage long compat_sys_statfs64(const char __user *path, compat_size_t sz, struct compat_statfs64 __user *buf)
+asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
        if (sz != sizeof(*buf))
                return -EINVAL;
-        error = user_path_walk(path, &nd);
+        error = user_path(pathname, &path);
        if (!error) {
                struct kstatfs tmp;
-                error = vfs_statfs(nd.path.dentry, &tmp);
+                error = vfs_statfs(path.dentry, &tmp);
                if (!error)
                        error = put_compat_statfs64(buf, &tmp);
-                path_put(&nd.path);
+                path_put(&path);
        }
        return error;
 }
@@ -2131,9 +2131,9 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
 #ifdef CONFIG_SIGNALFD
-asmlinkage long compat_sys_signalfd(int ufd,
+asmlinkage long compat_sys_signalfd4(int ufd,
-                                    const compat_sigset_t __user *sigmask,
+                                     const compat_sigset_t __user *sigmask,
-                                    compat_size_t sigsetsize)
+                                     compat_size_t sigsetsize, int flags)
 {
        compat_sigset_t ss32;
        sigset_t tmp;
@@ -2148,9 +2148,15 @@ asmlinkage long compat_sys_signalfd(int ufd,
        if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t)))
                return -EFAULT;
-        return sys_signalfd(ufd, ksigmask, sizeof(sigset_t));
+        return sys_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags);
 }
+asmlinkage long compat_sys_signalfd(int ufd,
+                                    const compat_sigset_t __user *sigmask,
+                                    compat_size_t sigsetsize)
+{
+        return compat_sys_signalfd4(ufd, sigmask, sigsetsize, 0);
+}
 #endif /* CONFIG_SIGNALFD */
 #ifdef CONFIG_TIMERFD
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c54eaab71a19..5235c67e7594 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/raid/md.h>
 #include <linux/kd.h>
-#include <linux/dirent.h>
 #include <linux/route.h>
 #include <linux/in6.h>
 #include <linux/ipv6_route.h>
@@ -58,7 +57,6 @@
 #include <linux/syscalls.h>
 #include <linux/i2c.h>
 #include <linux/i2c-dev.h>
-#include <linux/wireless.h>
 #include <linux/atalk.h>
 #include <linux/loop.h>
@@ -1759,64 +1757,6 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
        return sys_ioctl(fd, cmd, (unsigned long)tdata);
 }
-struct compat_iw_point {
-        compat_caddr_t pointer;
-        __u16 length;
-        __u16 flags;
-};
-static int do_wireless_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-        struct iwreq __user *iwr;
-        struct iwreq __user *iwr_u;
-        struct iw_point __user *iwp;
-        struct compat_iw_point __user *iwp_u;
-        compat_caddr_t pointer_u;
-        void __user *pointer;
-        __u16 length, flags;
-        int ret;
-        iwr_u = compat_ptr(arg);
-        iwp_u = (struct compat_iw_point __user *) &iwr_u->u.data;
-        iwr = compat_alloc_user_space(sizeof(*iwr));
-        if (iwr == NULL)
-                return -ENOMEM;
-        iwp = &iwr->u.data;
-        if (!access_ok(VERIFY_WRITE, iwr, sizeof(*iwr)))
-                return -EFAULT;
-        if (__copy_in_user(&iwr->ifr_ifrn.ifrn_name[0],
-                           &iwr_u->ifr_ifrn.ifrn_name[0],
-                           sizeof(iwr->ifr_ifrn.ifrn_name)))
-                return -EFAULT;
-        if (__get_user(pointer_u, &iwp_u->pointer) ||
-            __get_user(length, &iwp_u->length) ||
-            __get_user(flags, &iwp_u->flags))
-                return -EFAULT;
-        if (__put_user(compat_ptr(pointer_u), &iwp->pointer) ||
-            __put_user(length, &iwp->length) ||
-            __put_user(flags, &iwp->flags))
-                return -EFAULT;
-        ret = sys_ioctl(fd, cmd, (unsigned long) iwr);
-        if (__get_user(pointer, &iwp->pointer) ||
-            __get_user(length, &iwp->length) ||
-            __get_user(flags, &iwp->flags))
-                return -EFAULT;
-        if (__put_user(ptr_to_compat(pointer), &iwp_u->pointer) ||
-            __put_user(length, &iwp_u->length) ||
-            __put_user(flags, &iwp_u->flags))
-                return -EFAULT;
-        return ret;
-}
 /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
 * for some operations; this forces use of the newer bridge-utils that
 * use compatiable ioctls
@@ -2356,8 +2296,6 @@ COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
 COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
 COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
 COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER)
-COMPATIBLE_IOCTL(AUTOFS_IOC_ASKREGHOST)
-COMPATIBLE_IOCTL(AUTOFS_IOC_TOGGLEREGHOST)
 COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT)
 /* Raw devices */
 COMPATIBLE_IOCTL(RAW_SETBIND)
@@ -2405,6 +2343,7 @@ COMPATIBLE_IOCTL(HCIGETDEVLIST)
 COMPATIBLE_IOCTL(HCIGETDEVINFO)
 COMPATIBLE_IOCTL(HCIGETCONNLIST)
 COMPATIBLE_IOCTL(HCIGETCONNINFO)
+COMPATIBLE_IOCTL(HCIGETAUTHINFO)
 COMPATIBLE_IOCTL(HCISETRAW)
 COMPATIBLE_IOCTL(HCISETSCAN)
 COMPATIBLE_IOCTL(HCISETAUTH)
@@ -2501,36 +2440,6 @@ COMPATIBLE_IOCTL(I2C_TENBIT)
 COMPATIBLE_IOCTL(I2C_PEC)
 COMPATIBLE_IOCTL(I2C_RETRIES)
 COMPATIBLE_IOCTL(I2C_TIMEOUT)
-/* wireless */
-COMPATIBLE_IOCTL(SIOCSIWCOMMIT)
-COMPATIBLE_IOCTL(SIOCGIWNAME)
-COMPATIBLE_IOCTL(SIOCSIWNWID)
-COMPATIBLE_IOCTL(SIOCGIWNWID)
-COMPATIBLE_IOCTL(SIOCSIWFREQ)
-COMPATIBLE_IOCTL(SIOCGIWFREQ)
-COMPATIBLE_IOCTL(SIOCSIWMODE)
-COMPATIBLE_IOCTL(SIOCGIWMODE)
-COMPATIBLE_IOCTL(SIOCSIWSENS)
-COMPATIBLE_IOCTL(SIOCGIWSENS)
-COMPATIBLE_IOCTL(SIOCSIWRANGE)
-COMPATIBLE_IOCTL(SIOCSIWPRIV)
-COMPATIBLE_IOCTL(SIOCSIWSTATS)
-COMPATIBLE_IOCTL(SIOCSIWAP)
-COMPATIBLE_IOCTL(SIOCGIWAP)
-COMPATIBLE_IOCTL(SIOCSIWRATE)
-COMPATIBLE_IOCTL(SIOCGIWRATE)
-COMPATIBLE_IOCTL(SIOCSIWRTS)
-COMPATIBLE_IOCTL(SIOCGIWRTS)
-COMPATIBLE_IOCTL(SIOCSIWFRAG)
-COMPATIBLE_IOCTL(SIOCGIWFRAG)
-COMPATIBLE_IOCTL(SIOCSIWTXPOW)
-COMPATIBLE_IOCTL(SIOCGIWTXPOW)
-COMPATIBLE_IOCTL(SIOCSIWRETRY)
-COMPATIBLE_IOCTL(SIOCGIWRETRY)
-COMPATIBLE_IOCTL(SIOCSIWPOWER)
-COMPATIBLE_IOCTL(SIOCGIWPOWER)
-COMPATIBLE_IOCTL(SIOCSIWAUTH)
-COMPATIBLE_IOCTL(SIOCGIWAUTH)
 /* hiddev */
 COMPATIBLE_IOCTL(HIDIOCGVERSION)
 COMPATIBLE_IOCTL(HIDIOCAPPLICATION)
@@ -2761,29 +2670,7 @@ COMPATIBLE_IOCTL(USBDEVFS_IOCTL32)
 HANDLE_IOCTL(I2C_FUNCS, w_long)
 HANDLE_IOCTL(I2C_RDWR, do_i2c_rdwr_ioctl)
 HANDLE_IOCTL(I2C_SMBUS, do_i2c_smbus_ioctl)
-/* wireless */
+/* bridge */
-HANDLE_IOCTL(SIOCGIWRANGE, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCGIWPRIV, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCGIWSTATS, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCSIWSPY, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCGIWSPY, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCSIWTHRSPY, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCGIWTHRSPY, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCSIWMLME, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCGIWAPLIST, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCSIWSCAN, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCGIWSCAN, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCSIWESSID, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCGIWESSID, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCSIWNICKN, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCGIWNICKN, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCSIWENCODE, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCGIWENCODE, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCSIWGENIE, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCGIWGENIE, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCSIWENCODEEXT, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCGIWENCODEEXT, do_wireless_ioctl)
-HANDLE_IOCTL(SIOCSIWPMKSA, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl)
 HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl)
 /* Not implemented in the native kernel */
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 0e64312a084c..179589be063a 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1027,9 +1027,10 @@ EXPORT_SYMBOL(configfs_undepend_item);
 static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
-        int ret, module_got = 0;
+        int ret = 0;
-        struct config_group *group;
+        int module_got = 0;
-        struct config_item *item;
+        struct config_group *group = NULL;
+        struct config_item *item = NULL;
        struct config_item *parent_item;
        struct configfs_subsystem *subsys;
        struct configfs_dirent *sd;
@@ -1070,25 +1071,30 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
        mutex_lock(&subsys->su_mutex);
-        group = NULL;
-        item = NULL;
        if (type->ct_group_ops->make_group) {
-                ret = type->ct_group_ops->make_group(to_config_group(parent_item), name, &group);
+                group = type->ct_group_ops->make_group(to_config_group(parent_item), name);
-                if (!ret) {
+                if (!group)
+                        group = ERR_PTR(-ENOMEM);
+                if (!IS_ERR(group)) {
                        link_group(to_config_group(parent_item), group);
                        item = &group->cg_item;
-                }
+                } else
+                        ret = PTR_ERR(group);
        } else {
-                ret = type->ct_group_ops->make_item(to_config_group(parent_item), name, &item);
+                item = type->ct_group_ops->make_item(to_config_group(parent_item), name);
-                if (!ret)
+                if (!item)
+                        item = ERR_PTR(-ENOMEM);
+                if (!IS_ERR(item))
                        link_obj(parent_item, item);
+                else
+                        ret = PTR_ERR(item);
        }
        mutex_unlock(&subsys->su_mutex);
        kfree(name);
        if (ret) {
                /*
-                 * If ret != 0, then link_obj() was never called.
+                 * If item == NULL, then link_obj() was never called.
                 * There are no extra references to clean up.
                 */
                goto out_put;
diff --git a/fs/dcache.c b/fs/dcache.c
index 6068c25b393c..f2584d22cb45 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -61,7 +61,6 @@ static struct kmem_cache *dentry_cache __read_mostly;
 static unsigned int d_hash_mask __read_mostly;
 static unsigned int d_hash_shift __read_mostly;
 static struct hlist_head *dentry_hashtable __read_mostly;
-static LIST_HEAD(dentry_unused);
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {
@@ -96,14 +95,6 @@ static void d_free(struct dentry *dentry)
                call_rcu(&dentry->d_u.d_rcu, d_callback);
 }
-static void dentry_lru_remove(struct dentry *dentry)
-{
-        if (!list_empty(&dentry->d_lru)) {
-                list_del_init(&dentry->d_lru);
-                dentry_stat.nr_unused--;
-        }
-}
 /*
 * Release the dentry's inode, using the filesystem
 * d_iput() operation if defined.
@@ -130,6 +121,41 @@ static void dentry_iput(struct dentry * dentry)
        }
 }
+/*
+ * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
+ */
+static void dentry_lru_add(struct dentry *dentry)
+{
+        list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+        dentry->d_sb->s_nr_dentry_unused++;
+        dentry_stat.nr_unused++;
+}
+static void dentry_lru_add_tail(struct dentry *dentry)
+{
+        list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+        dentry->d_sb->s_nr_dentry_unused++;
+        dentry_stat.nr_unused++;
+}
+static void dentry_lru_del(struct dentry *dentry)
+{
+        if (!list_empty(&dentry->d_lru)) {
+                list_del(&dentry->d_lru);
+                dentry->d_sb->s_nr_dentry_unused--;
+                dentry_stat.nr_unused--;
+        }
+}
+static void dentry_lru_del_init(struct dentry *dentry)
+{
+        if (likely(!list_empty(&dentry->d_lru))) {
+                list_del_init(&dentry->d_lru);
+                dentry->d_sb->s_nr_dentry_unused--;
+                dentry_stat.nr_unused--;
+        }
+}
 /**
 * d_kill - kill dentry and return parent
 * @dentry: dentry to kill
@@ -212,8 +238,7 @@ repeat:
                goto kill_it;
        if (list_empty(&dentry->d_lru)) {
                dentry->d_flags |= DCACHE_REFERENCED;
-                list_add(&dentry->d_lru, &dentry_unused);
+                dentry_lru_add(dentry);
-                dentry_stat.nr_unused++;
        }
        spin_unlock(&dentry->d_lock);
        spin_unlock(&dcache_lock);
@@ -222,7 +247,8 @@ repeat:
 unhash_it:
        __d_drop(dentry);
 kill_it:
-        dentry_lru_remove(dentry);
+        /* if dentry was on the d_lru list delete it from there */
+        dentry_lru_del(dentry);
        dentry = d_kill(dentry);
        if (dentry)
                goto repeat;
@@ -290,7 +316,7 @@ int d_invalidate(struct dentry * dentry)
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
        atomic_inc(&dentry->d_count);
-        dentry_lru_remove(dentry);
+        dentry_lru_del_init(dentry);
        return dentry;
 }
@@ -406,133 +432,168 @@ static void prune_one_dentry(struct dentry * dentry)
                if (dentry->d_op && dentry->d_op->d_delete)
                        dentry->d_op->d_delete(dentry);
-                dentry_lru_remove(dentry);
+                dentry_lru_del_init(dentry);
                __d_drop(dentry);
                dentry = d_kill(dentry);
                spin_lock(&dcache_lock);
        }
 }
-/**
+/*
- * prune_dcache - shrink the dcache
+ * Shrink the dentry LRU on a given superblock.
- * @count: number of entries to try and free
+ * @sb   : superblock to shrink dentry LRU.
- * @sb: if given, ignore dentries for other superblocks
+ * @count: If count is NULL, we prune all dentries on superblock.
- *         which are being unmounted.
+ * @flags: If flags is non-zero, we need to do special processing based on
- *
+ * which flags are set. This means we don't need to maintain multiple
- * Shrink the dcache. This is done when we need
+ * similar copies of this loop.
- * more memory, or simply when we need to unmount
- * something (at which point we need to unuse
- * all dentries).
- *
- * This function may fail to free any resources if
- * all the dentries are in use.
 */
- 
+static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
-static void prune_dcache(int count, struct super_block *sb)
 {
-        spin_lock(&dcache_lock);
+        LIST_HEAD(referenced);
-        for (; count ; count--) {
+        LIST_HEAD(tmp);
-                struct dentry *dentry;
+        struct dentry *dentry;
-                struct list_head *tmp;
+        int cnt = 0;
-                struct rw_semaphore *s_umount;
-                cond_resched_lock(&dcache_lock);
-                tmp = dentry_unused.prev;
+        BUG_ON(!sb);
-                if (sb) {
+        BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
-                        /* Try to find a dentry for this sb, but don't try
+        spin_lock(&dcache_lock);
-                         * too hard, if they aren't near the tail they will
+        if (count != NULL)
-                         * be moved down again soon
+                /* called from prune_dcache() and shrink_dcache_parent() */
+                cnt = *count;
+restart:
+        if (count == NULL)
+                list_splice_init(&sb->s_dentry_lru, &tmp);
+        else {
+                while (!list_empty(&sb->s_dentry_lru)) {
+                        dentry = list_entry(sb->s_dentry_lru.prev,
+                                        struct dentry, d_lru);
+                        BUG_ON(dentry->d_sb != sb);
+                        spin_lock(&dentry->d_lock);
+                        /*
+                         * If we are honouring the DCACHE_REFERENCED flag and
+                         * the dentry has this flag set, don't free it. Clear
+                         * the flag and put it back on the LRU.
                         */
-                        int skip = count;
+                        if ((flags & DCACHE_REFERENCED)
-                        while (skip && tmp != &dentry_unused &&
+                                && (dentry->d_flags & DCACHE_REFERENCED)) {
-                            list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
+                                dentry->d_flags &= ~DCACHE_REFERENCED;
-                                skip--;
+                                list_move_tail(&dentry->d_lru, &referenced);
-                                tmp = tmp->prev;
+                                spin_unlock(&dentry->d_lock);
+                        } else {
+                                list_move_tail(&dentry->d_lru, &tmp);
+                                spin_unlock(&dentry->d_lock);
+                                cnt--;
+                                if (!cnt)
+                                        break;
                        }
+                        cond_resched_lock(&dcache_lock);
                }
-                if (tmp == &dentry_unused)
+        }
-                        break;
+        while (!list_empty(&tmp)) {
-                list_del_init(tmp);
+                dentry = list_entry(tmp.prev, struct dentry, d_lru);
-                prefetch(dentry_unused.prev);
+                dentry_lru_del_init(dentry);
-                dentry_stat.nr_unused--;
+                spin_lock(&dentry->d_lock);
-                dentry = list_entry(tmp, struct dentry, d_lru);
-                spin_lock(&dentry->d_lock);
                /*
                 * We found an inuse dentry which was not removed from
-                 * dentry_unused because of laziness during lookup.  Do not free
+                 * the LRU because of laziness during lookup.  Do not free
-                 * it - just keep it off the dentry_unused list.
+                 * it - just keep it off the LRU list.
                 */
-                if (atomic_read(&dentry->d_count)) {
+                if (atomic_read(&dentry->d_count)) {
-                        spin_unlock(&dentry->d_lock);
+                        spin_unlock(&dentry->d_lock);
                        continue;
                }
-                /* If the dentry was recently referenced, don't free it. */
+                prune_one_dentry(dentry);
-                if (dentry->d_flags & DCACHE_REFERENCED) {
+                /* dentry->d_lock was dropped in prune_one_dentry() */
-                        dentry->d_flags &= ~DCACHE_REFERENCED;
+                cond_resched_lock(&dcache_lock);
-                        list_add(&dentry->d_lru, &dentry_unused);
+        }
-                        dentry_stat.nr_unused++;
+        if (count == NULL && !list_empty(&sb->s_dentry_lru))
-                        spin_unlock(&dentry->d_lock);
+                goto restart;
+        if (count != NULL)
+                *count = cnt;
+        if (!list_empty(&referenced))
+                list_splice(&referenced, &sb->s_dentry_lru);
+        spin_unlock(&dcache_lock);
+}
+/**
+ * prune_dcache - shrink the dcache
+ * @count: number of entries to try to free
+ *
+ * Shrink the dcache. This is done when we need more memory, or simply when we
+ * need to unmount something (at which point we need to unuse all dentries).
+ *
+ * This function may fail to free any resources if all the dentries are in use.
+ */
+static void prune_dcache(int count)
+{
+        struct super_block *sb;
+        int w_count;
+        int unused = dentry_stat.nr_unused;
+        int prune_ratio;
+        int pruned;
+        if (unused == 0 || count == 0)
+                return;
+        spin_lock(&dcache_lock);
+restart:
+        if (count >= unused)
+                prune_ratio = 1;
+        else
+                prune_ratio = unused / count;
+        spin_lock(&sb_lock);
+        list_for_each_entry(sb, &super_blocks, s_list) {
+                if (sb->s_nr_dentry_unused == 0)
                        continue;
-                }
+                sb->s_count++;
-                /*
+                /* Now, we reclaim unused dentrins with fairness.
-                 * If the dentry is not DCACHED_REFERENCED, it is time
+                 * We reclaim them same percentage from each superblock.
-                 * to remove it from the dcache, provided the super block is
+                 * We calculate number of dentries to scan on this sb
-                 * NULL (which means we are trying to reclaim memory)
+                 * as follows, but the implementation is arranged to avoid
-                 * or this dentry belongs to the same super block that
+                 * overflows:
-                 * we want to shrink.
+                 * number of dentries to scan on this sb =
-                 */
+                 * count * (number of dentries on this sb /
-                /*
+                 * number of dentries in the machine)
-                 * If this dentry is for "my" filesystem, then I can prune it
-                 * without taking the s_umount lock (I already hold it).
                 */
-                if (sb && dentry->d_sb == sb) {
+                spin_unlock(&sb_lock);
-                        prune_one_dentry(dentry);
+                if (prune_ratio != 1)
-                        continue;
+                        w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
-                }
+                else
+                        w_count = sb->s_nr_dentry_unused;
+                pruned = w_count;
                /*
-                 * ...otherwise we need to be sure this filesystem isn't being
+                 * We need to be sure this filesystem isn't being unmounted,
-                 * unmounted, otherwise we could race with
+                 * otherwise we could race with generic_shutdown_super(), and
-                 * generic_shutdown_super(), and end up holding a reference to
+                 * end up holding a reference to an inode while the filesystem
-                 * an inode while the filesystem is unmounted.
+                 * is unmounted.  So we try to get s_umount, and make sure
-                 * So we try to get s_umount, and make sure s_root isn't NULL.
+                 * s_root isn't NULL.
-                 * (Take a local copy of s_umount to avoid a use-after-free of
-                 * `dentry').
                 */
-                s_umount = &dentry->d_sb->s_umount;
+                if (down_read_trylock(&sb->s_umount)) {
-                if (down_read_trylock(s_umount)) {
+                        if ((sb->s_root != NULL) &&
-                        if (dentry->d_sb->s_root != NULL) {
+                            (!list_empty(&sb->s_dentry_lru))) {
-                                prune_one_dentry(dentry);
+                                spin_unlock(&dcache_lock);
-                                up_read(s_umount);
+                                __shrink_dcache_sb(sb, &w_count,
-                                continue;
+                                                DCACHE_REFERENCED);
+                                pruned -= w_count;
+                                spin_lock(&dcache_lock);
                        }
-                        up_read(s_umount);
+                        up_read(&sb->s_umount);
                }
-                spin_unlock(&dentry->d_lock);
+                spin_lock(&sb_lock);
+                count -= pruned;
                /*
-                 * Insert dentry at the head of the list as inserting at the
+                 * restart only when sb is no longer on the list and
-                 * tail leads to a cycle.
+                 * we have more work to do.
                 */
-                list_add(&dentry->d_lru, &dentry_unused);
+                if (__put_super_and_need_restart(sb) && count > 0) {
-                dentry_stat.nr_unused++;
+                        spin_unlock(&sb_lock);
+                        goto restart;
+                }
        }
+        spin_unlock(&sb_lock);
        spin_unlock(&dcache_lock);
 }
-/*
- * Shrink the dcache for the specified super block.
- * This allows us to unmount a device without disturbing
- * the dcache for the other devices.
- *
- * This implementation makes just two traversals of the
- * unused list.  On the first pass we move the selected
- * dentries to the most recent end, and on the second
- * pass we free them.  The second pass must restart after
- * each dput(), but since the target dentries are all at
- * the end, it's really just a single traversal.
- */
 /**
 * shrink_dcache_sb - shrink dcache for a superblock
 * @sb: superblock
@@ -541,44 +602,9 @@ static void prune_dcache(int count, struct super_block *sb)
 * is used to free the dcache before unmounting a file
 * system
 */
 void shrink_dcache_sb(struct super_block * sb)
 {
-        struct list_head *tmp, *next;
+        __shrink_dcache_sb(sb, NULL, 0);
-        struct dentry *dentry;
-        /*
-         * Pass one ... move the dentries for the specified
-         * superblock to the most recent end of the unused list.
-         */
-        spin_lock(&dcache_lock);
-        list_for_each_prev_safe(tmp, next, &dentry_unused) {
-                dentry = list_entry(tmp, struct dentry, d_lru);
-                if (dentry->d_sb != sb)
-                        continue;
-                list_move_tail(tmp, &dentry_unused);
-        }
-        /*
-         * Pass two ... free the dentries for this superblock.
-         */
-repeat:
-        list_for_each_prev_safe(tmp, next, &dentry_unused) {
-                dentry = list_entry(tmp, struct dentry, d_lru);
-                if (dentry->d_sb != sb)
-                        continue;
-                dentry_stat.nr_unused--;
-                list_del_init(tmp);
-                spin_lock(&dentry->d_lock);
-                if (atomic_read(&dentry->d_count)) {
-                        spin_unlock(&dentry->d_lock);
-                        continue;
-                }
-                prune_one_dentry(dentry);
-                cond_resched_lock(&dcache_lock);
-                goto repeat;
-        }
-        spin_unlock(&dcache_lock);
 }
 /*
@@ -595,7 +621,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
        /* detach this root from the system */
        spin_lock(&dcache_lock);
-        dentry_lru_remove(dentry);
+        dentry_lru_del_init(dentry);
        __d_drop(dentry);
        spin_unlock(&dcache_lock);
@@ -609,7 +635,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
                        spin_lock(&dcache_lock);
                        list_for_each_entry(loop, &dentry->d_subdirs,
                                            d_u.d_child) {
-                                dentry_lru_remove(loop);
+                                dentry_lru_del_init(loop);
                                __d_drop(loop);
                                cond_resched_lock(&dcache_lock);
                        }
@@ -791,14 +817,13 @@ resume:
                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
                next = tmp->next;
-                dentry_lru_remove(dentry);
+                dentry_lru_del_init(dentry);
                /* 
                 * move only zero ref count dentries to the end 
                 * of the unused list for prune_dcache
                 */
                if (!atomic_read(&dentry->d_count)) {
-                        list_add_tail(&dentry->d_lru, &dentry_unused);
+                        dentry_lru_add_tail(dentry);
-                        dentry_stat.nr_unused++;
                        found++;
                }
@@ -840,10 +865,11 @@ out:
 
 void shrink_dcache_parent(struct dentry * parent)
 {
+        struct super_block *sb = parent->d_sb;
        int found;
        while ((found = select_parent(parent)) != 0)
-                prune_dcache(found, parent->d_sb);
+                __shrink_dcache_sb(sb, &found, 0);
 }
 /*
@@ -863,7 +889,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
        if (nr) {
                if (!(gfp_mask & __GFP_FS))
                        return -1;
-                prune_dcache(nr, NULL);
+                prune_dcache(nr);
        }
        return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
@@ -1215,7 +1241,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
 * lookup is going on.
 *
- * dentry_unused list is not updated even if lookup finds the required dentry
+ * The dentry unused LRU is not updated even if lookup finds the required dentry
 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
 * acquisition.
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e9602d85c11d..08e28c9bb416 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -309,6 +309,31 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_symlink);
+static void __debugfs_remove(struct dentry *dentry, struct dentry *parent)
+{
+        int ret = 0;
+        if (debugfs_positive(dentry)) {
+                if (dentry->d_inode) {
+                        dget(dentry);
+                        switch (dentry->d_inode->i_mode & S_IFMT) {
+                        case S_IFDIR:
+                                ret = simple_rmdir(parent->d_inode, dentry);
+                                break;
+                        case S_IFLNK:
+                                kfree(dentry->d_inode->i_private);
+                                /* fall through */
+                        default:
+                                simple_unlink(parent->d_inode, dentry);
+                                break;
+                        }
+                        if (!ret)
+                                d_delete(dentry);
+                        dput(dentry);
+                }
+        }
+}
 /**
 * debugfs_remove - removes a file or directory from the debugfs filesystem
 * @dentry: a pointer to a the dentry of the file or directory to be
@@ -325,7 +350,6 @@ EXPORT_SYMBOL_GPL(debugfs_create_symlink);
 void debugfs_remove(struct dentry *dentry)
 {
        struct dentry *parent;
-        int ret = 0;
        
        if (!dentry)
                return;
@@ -335,29 +359,83 @@ void debugfs_remove(struct dentry *dentry)
                return;
        mutex_lock(&parent->d_inode->i_mutex);
-        if (debugfs_positive(dentry)) {
+        __debugfs_remove(dentry, parent);
-                if (dentry->d_inode) {
+        mutex_unlock(&parent->d_inode->i_mutex);
-                        dget(dentry);
+        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-                        switch (dentry->d_inode->i_mode & S_IFMT) {
+}
-                        case S_IFDIR:
+EXPORT_SYMBOL_GPL(debugfs_remove);
-                                ret = simple_rmdir(parent->d_inode, dentry);
-                                break;
+/**
-                        case S_IFLNK:
+ * debugfs_remove_recursive - recursively removes a directory
-                                kfree(dentry->d_inode->i_private);
+ * @dentry: a pointer to a the dentry of the directory to be removed.
-                                /* fall through */
+ *
-                        default:
+ * This function recursively removes a directory tree in debugfs that
-                                simple_unlink(parent->d_inode, dentry);
+ * was previously created with a call to another debugfs function
+ * (like debugfs_create_file() or variants thereof.)
+ *
+ * This function is required to be called in order for the file to be
+ * removed, no automatic cleanup of files will happen when a module is
+ * removed, you are responsible here.
+ */
+void debugfs_remove_recursive(struct dentry *dentry)
+{
+        struct dentry *child;
+        struct dentry *parent;
+        if (!dentry)
+                return;
+        parent = dentry->d_parent;
+        if (!parent || !parent->d_inode)
+                return;
+        parent = dentry;
+        mutex_lock(&parent->d_inode->i_mutex);
+        while (1) {
+                /*
+                 * When all dentries under "parent" has been removed,
+                 * walk up the tree until we reach our starting point.
+                 */
+                if (list_empty(&parent->d_subdirs)) {
+                        mutex_unlock(&parent->d_inode->i_mutex);
+                        if (parent == dentry)
                                break;
-                        }
+                        parent = parent->d_parent;
-                        if (!ret)
+                        mutex_lock(&parent->d_inode->i_mutex);
-                                d_delete(dentry);
+                }
-                        dput(dentry);
+                child = list_entry(parent->d_subdirs.next, struct dentry,
+                                d_u.d_child);
+                /*
+                 * If "child" isn't empty, walk down the tree and
+                 * remove all its descendants first.
+                 */
+                if (!list_empty(&child->d_subdirs)) {
+                        mutex_unlock(&parent->d_inode->i_mutex);
+                        parent = child;
+                        mutex_lock(&parent->d_inode->i_mutex);
+                        continue;
                }
+                __debugfs_remove(child, parent);
+                if (parent->d_subdirs.next == &child->d_u.d_child) {
+                        /*
+                         * Avoid infinite loop if we fail to remove
+                         * one dentry.
+                         */
+                        mutex_unlock(&parent->d_inode->i_mutex);
+                        break;
+                }
+                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
        }
+        parent = dentry->d_parent;
+        mutex_lock(&parent->d_inode->i_mutex);
+        __debugfs_remove(dentry, parent);
        mutex_unlock(&parent->d_inode->i_mutex);
        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 }
-EXPORT_SYMBOL_GPL(debugfs_remove);
+EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
 /**
 * debugfs_rename - rename a file/directory in the debugfs filesystem
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 9e81addbd6ea..9606ee848fd8 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -150,17 +150,11 @@ static int dio_refill_pages(struct dio *dio)
        int nr_pages;
        nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES);
-        down_read(&current->mm->mmap_sem);
+        ret = get_user_pages_fast(
-        ret = get_user_pages(
-                current,                        /* Task for fault acounting */
-                current->mm,                    /* whose pages? */
                dio->curr_user_address,         /* Where from? */
                nr_pages,                       /* How many pages? */
                dio->rw == READ,                /* Write to memory? */
-                0,                              /* force (?) */
+                &dio->pages[0]);                /* Put results here */
-                &dio->pages[0],
-                NULL);                          /* vmas */
-        up_read(&current->mm->mmap_sem);
        if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
                struct page *page = ZERO_PAGE(0);
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 492d8caaaf25..c4e7d721bd8d 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -41,20 +41,16 @@ struct comm;
 struct nodes;
 struct node;
-static int make_cluster(struct config_group *, const char *,
+static struct config_group *make_cluster(struct config_group *, const char *);
-                        struct config_group **);
 static void drop_cluster(struct config_group *, struct config_item *);
 static void release_cluster(struct config_item *);
-static int make_space(struct config_group *, const char *,
+static struct config_group *make_space(struct config_group *, const char *);
-                      struct config_group **);
 static void drop_space(struct config_group *, struct config_item *);
 static void release_space(struct config_item *);
-static int make_comm(struct config_group *, const char *,
+static struct config_item *make_comm(struct config_group *, const char *);
-                     struct config_item **);
 static void drop_comm(struct config_group *, struct config_item *);
 static void release_comm(struct config_item *);
-static int make_node(struct config_group *, const char *,
+static struct config_item *make_node(struct config_group *, const char *);
-                     struct config_item **);
 static void drop_node(struct config_group *, struct config_item *);
 static void release_node(struct config_item *);
@@ -396,8 +392,8 @@ static struct node *to_node(struct config_item *i)
        return i ? container_of(i, struct node, item) : NULL;
 }
-static int make_cluster(struct config_group *g, const char *name,
+static struct config_group *make_cluster(struct config_group *g,
-                        struct config_group **new_g)
+                                         const char *name)
 {
        struct cluster *cl = NULL;
        struct spaces *sps = NULL;
@@ -435,15 +431,14 @@ static int make_cluster(struct config_group *g, const char *name,
        space_list = &sps->ss_group;
        comm_list = &cms->cs_group;
-        *new_g = &cl->group;
+        return &cl->group;
-        return 0;
 fail:
        kfree(cl);
        kfree(gps);
        kfree(sps);
        kfree(cms);
-        return -ENOMEM;
+        return ERR_PTR(-ENOMEM);
 }
 static void drop_cluster(struct config_group *g, struct config_item *i)
@@ -471,8 +466,7 @@ static void release_cluster(struct config_item *i)
        kfree(cl);
 }
-static int make_space(struct config_group *g, const char *name,
+static struct config_group *make_space(struct config_group *g, const char *name)
-                      struct config_group **new_g)
 {
        struct space *sp = NULL;
        struct nodes *nds = NULL;
@@ -495,14 +489,13 @@ static int make_space(struct config_group *g, const char *name,
        INIT_LIST_HEAD(&sp->members);
        mutex_init(&sp->members_lock);
        sp->members_count = 0;
-        *new_g = &sp->group;
+        return &sp->group;
-        return 0;
 fail:
        kfree(sp);
        kfree(gps);
        kfree(nds);
-        return -ENOMEM;
+        return ERR_PTR(-ENOMEM);
 }
 static void drop_space(struct config_group *g, struct config_item *i)
@@ -529,21 +522,19 @@ static void release_space(struct config_item *i)
        kfree(sp);
 }
-static int make_comm(struct config_group *g, const char *name,
+static struct config_item *make_comm(struct config_group *g, const char *name)
-                     struct config_item **new_i)
 {
        struct comm *cm;
        cm = kzalloc(sizeof(struct comm), GFP_KERNEL);
        if (!cm)
-                return -ENOMEM;
+                return ERR_PTR(-ENOMEM);
        config_item_init_type_name(&cm->item, name, &comm_type);
        cm->nodeid = -1;
        cm->local = 0;
        cm->addr_count = 0;
-        *new_i = &cm->item;
+        return &cm->item;
-        return 0;
 }
 static void drop_comm(struct config_group *g, struct config_item *i)
@@ -563,15 +554,14 @@ static void release_comm(struct config_item *i)
        kfree(cm);
 }
-static int make_node(struct config_group *g, const char *name,
+static struct config_item *make_node(struct config_group *g, const char *name)
-                     struct config_item **new_i)
 {
        struct space *sp = to_space(g->cg_item.ci_parent);
        struct node *nd;
        nd = kzalloc(sizeof(struct node), GFP_KERNEL);
        if (!nd)
-                return -ENOMEM;
+                return ERR_PTR(-ENOMEM);
        config_item_init_type_name(&nd->item, name, &node_type);
        nd->nodeid = -1;
@@ -583,8 +573,7 @@ static int make_node(struct config_group *g, const char *name,
        sp->members_count++;
        mutex_unlock(&sp->members_lock);
-        *new_i = &nd->item;
+        return &nd->item;
-        return 0;
 }
 static void drop_node(struct config_group *g, struct config_item *i)
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 78878c5781ca..eba87ff3177b 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -116,7 +116,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
        if (xop->callback == NULL)
                wait_event(recv_wq, (op->done != 0));
        else {
-                rv = -EINPROGRESS;
+                rv = FILE_LOCK_DEFERRED;
                goto out;
        }
diff --git a/fs/dquot.c b/fs/dquot.c
index 5ac77da19959..1346eebe74ce 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -562,6 +562,8 @@ static struct shrinker dqcache_shrinker = {
 */
 static void dqput(struct dquot *dquot)
 {
+        int ret;
        if (!dquot)
                return;
 #ifdef __DQUOT_PARANOIA
@@ -594,7 +596,19 @@ we_slept:
        if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
                spin_unlock(&dq_list_lock);
                /* Commit dquot before releasing */
-                dquot->dq_sb->dq_op->write_dquot(dquot);
+                ret = dquot->dq_sb->dq_op->write_dquot(dquot);
+                if (ret < 0) {
+                        printk(KERN_ERR "VFS: cannot write quota structure on "
+                                "device %s (error %d). Quota may get out of "
+                                "sync!\n", dquot->dq_sb->s_id, ret);
+                        /*
+                         * We clear dirty bit anyway, so that we avoid
+                         * infinite loop here
+                         */
+                        spin_lock(&dq_list_lock);
+                        clear_dquot_dirty(dquot);
+                        spin_unlock(&dq_list_lock);
+                }
                goto we_slept;
        }
        /* Clear flag in case dquot was inactive (something bad happened) */
@@ -875,7 +889,10 @@ static void print_warning(struct dquot *dquot, const int warntype)
        char *msg = NULL;
        struct tty_struct *tty;
-        if (!need_print_warning(dquot))
+        if (warntype == QUOTA_NL_IHARDBELOW ||
+            warntype == QUOTA_NL_ISOFTBELOW ||
+            warntype == QUOTA_NL_BHARDBELOW ||
+            warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
                return;
        mutex_lock(&tty_mutex);
@@ -1083,6 +1100,35 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
        return QUOTA_OK;
 }
+static int info_idq_free(struct dquot *dquot, ulong inodes)
+{
+        if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+            dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
+                return QUOTA_NL_NOWARN;
+        if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
+                return QUOTA_NL_ISOFTBELOW;
+        if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit &&
+            dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit)
+                return QUOTA_NL_IHARDBELOW;
+        return QUOTA_NL_NOWARN;
+}
+static int info_bdq_free(struct dquot *dquot, qsize_t space)
+{
+        if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+            toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+                return QUOTA_NL_NOWARN;
+        if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
+            dquot->dq_dqb.dqb_bsoftlimit)
+                return QUOTA_NL_BSOFTBELOW;
+        if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
+            toqb(dquot->dq_dqb.dqb_curspace - space) <
+                                                dquot->dq_dqb.dqb_bhardlimit)
+                return QUOTA_NL_BHARDBELOW;
+        return QUOTA_NL_NOWARN;
+}
 /*
 *      Initialize quota pointers in inode
 *      Transaction must be started at entry
@@ -1139,6 +1185,28 @@ int dquot_drop(struct inode *inode)
        return 0;
 }
+/* Wrapper to remove references to quota structures from inode */
+void vfs_dq_drop(struct inode *inode)
+{
+        /* Here we can get arbitrary inode from clear_inode() so we have
+         * to be careful. OTOH we don't need locking as quota operations
+         * are allowed to change only at mount time */
+        if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
+            && inode->i_sb->dq_op->drop) {
+                int cnt;
+                /* Test before calling to rule out calls from proc and such
+                 * where we are not allowed to block. Note that this is
+                 * actually reliable test even without the lock - the caller
+                 * must assure that nobody can come after the DQUOT_DROP and
+                 * add quota pointers back anyway */
+                for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+                        if (inode->i_dquot[cnt] != NODQUOT)
+                                break;
+                if (cnt < MAXQUOTAS)
+                        inode->i_sb->dq_op->drop(inode);
+        }
+}
 /*
 * Following four functions update i_blocks+i_bytes fields and
 * quota information (together with appropriate checks)
@@ -1248,6 +1316,7 @@ warn_put_all:
 int dquot_free_space(struct inode *inode, qsize_t number)
 {
        unsigned int cnt;
+        char warntype[MAXQUOTAS];
        /* First test before acquiring mutex - solves deadlocks when we
         * re-enter the quota code and are already holding the mutex */
@@ -1256,6 +1325,7 @@ out_sub:
                inode_sub_bytes(inode, number);
                return QUOTA_OK;
        }
        down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
        /* Now recheck reliably when holding dqptr_sem */
        if (IS_NOQUOTA(inode)) {
@@ -1266,6 +1336,7 @@ out_sub:
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (inode->i_dquot[cnt] == NODQUOT)
                        continue;
+                warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
                dquot_decr_space(inode->i_dquot[cnt], number);
        }
        inode_sub_bytes(inode, number);
@@ -1274,6 +1345,7 @@ out_sub:
        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                if (inode->i_dquot[cnt])
                        mark_dquot_dirty(inode->i_dquot[cnt]);
+        flush_warnings(inode->i_dquot, warntype);
        up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
        return QUOTA_OK;
 }
@@ -1284,11 +1356,13 @@ out_sub:
 int dquot_free_inode(const struct inode *inode, unsigned long number)
 {
        unsigned int cnt;
+        char warntype[MAXQUOTAS];
        /* First test before acquiring mutex - solves deadlocks when we
         * re-enter the quota code and are already holding the mutex */
        if (IS_NOQUOTA(inode))
                return QUOTA_OK;
        down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
        /* Now recheck reliably when holding dqptr_sem */
        if (IS_NOQUOTA(inode)) {
@@ -1299,6 +1373,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (inode->i_dquot[cnt] == NODQUOT)
                        continue;
+                warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
                dquot_decr_inodes(inode->i_dquot[cnt], number);
        }
        spin_unlock(&dq_data_lock);
@@ -1306,6 +1381,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                if (inode->i_dquot[cnt])
                        mark_dquot_dirty(inode->i_dquot[cnt]);
+        flush_warnings(inode->i_dquot, warntype);
        up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
        return QUOTA_OK;
 }
@@ -1323,7 +1399,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
        struct dquot *transfer_to[MAXQUOTAS];
        int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
            chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
-        char warntype[MAXQUOTAS];
+        char warntype_to[MAXQUOTAS];
+        char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
        /* First test before acquiring mutex - solves deadlocks when we
         * re-enter the quota code and are already holding the mutex */
@@ -1332,7 +1409,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
        /* Clear the arrays */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
-                warntype[cnt] = QUOTA_NL_NOWARN;
+                warntype_to[cnt] = QUOTA_NL_NOWARN;
        }
        down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
        /* Now recheck reliably when holding dqptr_sem */
@@ -1364,8 +1441,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
                if (transfer_to[cnt] == NODQUOT)
                        continue;
                transfer_from[cnt] = inode->i_dquot[cnt];
-                if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA ||
+                if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
-                    check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA)
+                    NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
+                    warntype_to + cnt) == NO_QUOTA)
                        goto warn_put_all;
        }
@@ -1381,6 +1459,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
                /* Due to IO error we might not have transfer_from[] structure */
                if (transfer_from[cnt]) {
+                        warntype_from_inodes[cnt] =
+                                info_idq_free(transfer_from[cnt], 1);
+                        warntype_from_space[cnt] =
+                                info_bdq_free(transfer_from[cnt], space);
                        dquot_decr_inodes(transfer_from[cnt], 1);
                        dquot_decr_space(transfer_from[cnt], space);
                }
@@ -1400,7 +1482,9 @@ warn_put_all:
                if (transfer_to[cnt])
                        mark_dquot_dirty(transfer_to[cnt]);
        }
-        flush_warnings(transfer_to, warntype);
+        flush_warnings(transfer_to, warntype_to);
+        flush_warnings(transfer_from, warntype_from_inodes);
+        flush_warnings(transfer_from, warntype_from_space);
        
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT)
@@ -1412,6 +1496,18 @@ warn_put_all:
        return ret;
 }
+/* Wrapper for transferring ownership of an inode */
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+{
+        if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
+                vfs_dq_init(inode);
+                if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
+                        return 1;
+        }
+        return 0;
+}
 /*
 * Write info of quota file to disk
 */
@@ -1752,6 +1848,22 @@ out:
        return error;
 }
+/* Wrapper to turn on quotas when remounting rw */
+int vfs_dq_quota_on_remount(struct super_block *sb)
+{
+        int cnt;
+        int ret = 0, err;
+        if (!sb->s_qcop || !sb->s_qcop->quota_on)
+                return -ENOSYS;
+        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+                err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
+                if (err < 0 && !ret)
+                        ret = err;
+        }
+        return ret;
+}
 /* Generic routine for getting common part of quota structure */
 static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
 {
@@ -2087,8 +2199,11 @@ EXPORT_SYMBOL(dquot_release);
 EXPORT_SYMBOL(dquot_mark_dquot_dirty);
 EXPORT_SYMBOL(dquot_initialize);
 EXPORT_SYMBOL(dquot_drop);
+EXPORT_SYMBOL(vfs_dq_drop);
 EXPORT_SYMBOL(dquot_alloc_space);
 EXPORT_SYMBOL(dquot_alloc_inode);
 EXPORT_SYMBOL(dquot_free_space);
 EXPORT_SYMBOL(dquot_free_inode);
 EXPORT_SYMBOL(dquot_transfer);
+EXPORT_SYMBOL(vfs_dq_transfer);
+EXPORT_SYMBOL(vfs_dq_quota_on_remount);
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 1e34a7fd4884..b4755a85996e 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
 obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
-ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o
+ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o kthread.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index e2832bc7869a..7b99917ffadc 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -33,6 +33,7 @@
 #include <linux/crypto.h>
 #include <linux/file.h>
 #include <linux/scatterlist.h>
+#include <asm/unaligned.h>
 #include "ecryptfs_kernel.h"
 static int
@@ -1032,10 +1033,8 @@ static int contains_ecryptfs_marker(char *data)
 {
        u32 m_1, m_2;
-        memcpy(&m_1, data, 4);
+        m_1 = get_unaligned_be32(data);
-        m_1 = be32_to_cpu(m_1);
+        m_2 = get_unaligned_be32(data + 4);
-        memcpy(&m_2, (data + 4), 4);
-        m_2 = be32_to_cpu(m_2);
        if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2)
                return 1;
        ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; "
@@ -1073,8 +1072,7 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
        int i;
        u32 flags;
-        memcpy(&flags, page_virt, 4);
+        flags = get_unaligned_be32(page_virt);
-        flags = be32_to_cpu(flags);
        for (i = 0; i < ((sizeof(ecryptfs_flag_map)
                          / sizeof(struct ecryptfs_flag_map_elem))); i++)
                if (flags & ecryptfs_flag_map[i].file_flag) {
@@ -1100,11 +1098,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written)
        get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
        m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER);
-        m_1 = cpu_to_be32(m_1);
+        put_unaligned_be32(m_1, page_virt);
-        memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
+        page_virt += (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2);
-        m_2 = cpu_to_be32(m_2);
+        put_unaligned_be32(m_2, page_virt);
-        memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2,
-               (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
        (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
 }
@@ -1121,8 +1117,7 @@ write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
                        flags |= ecryptfs_flag_map[i].file_flag;
        /* Version is in top 8 bits of the 32-bit flag vector */
        flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000);
-        flags = cpu_to_be32(flags);
+        put_unaligned_be32(flags, page_virt);
-        memcpy(page_virt, &flags, 4);
        (*written) = 4;
 }
@@ -1238,11 +1233,9 @@ ecryptfs_write_header_metadata(char *virt,
        num_header_extents_at_front =
                (u16)(crypt_stat->num_header_bytes_at_front
                      / crypt_stat->extent_size);
-        header_extent_size = cpu_to_be32(header_extent_size);
+        put_unaligned_be32(header_extent_size, virt);
-        memcpy(virt, &header_extent_size, 4);
        virt += 4;
-        num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front);
+        put_unaligned_be16(num_header_extents_at_front, virt);
-        memcpy(virt, &num_header_extents_at_front, 2);
        (*written) = 6;
 }
@@ -1410,15 +1403,13 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
        u32 header_extent_size;
        u16 num_header_extents_at_front;
-        memcpy(&header_extent_size, virt, sizeof(u32));
+        header_extent_size = get_unaligned_be32(virt);
-        header_extent_size = be32_to_cpu(header_extent_size);
+        virt += sizeof(__be32);
-        virt += sizeof(u32);
+        num_header_extents_at_front = get_unaligned_be16(virt);
-        memcpy(&num_header_extents_at_front, virt, sizeof(u16));
-        num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front);
        crypt_stat->num_header_bytes_at_front =
                (((size_t)num_header_extents_at_front
                  * (size_t)header_extent_size));
-        (*bytes_read) = (sizeof(u32) + sizeof(u16));
+        (*bytes_read) = (sizeof(__be32) + sizeof(__be16));
        if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
            && (crypt_stat->num_header_bytes_at_front
                < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index c15c25745e05..b73fb752c5f8 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -559,10 +559,25 @@ extern struct kmem_cache *ecryptfs_key_record_cache;
 extern struct kmem_cache *ecryptfs_key_sig_cache;
 extern struct kmem_cache *ecryptfs_global_auth_tok_cache;
 extern struct kmem_cache *ecryptfs_key_tfm_cache;
+extern struct kmem_cache *ecryptfs_open_req_cache;
+struct ecryptfs_open_req {
+#define ECRYPTFS_REQ_PROCESSED 0x00000001
+#define ECRYPTFS_REQ_DROPPED   0x00000002
+#define ECRYPTFS_REQ_ZOMBIE    0x00000004
+        u32 flags;
+        struct file **lower_file;
+        struct dentry *lower_dentry;
+        struct vfsmount *lower_mnt;
+        wait_queue_head_t wait;
+        struct mutex mux;
+        struct list_head kthread_ctl_list;
+};
+#define ECRYPTFS_INTERPOSE_FLAG_D_ADD                 0x00000001
 int ecryptfs_interpose(struct dentry *hidden_dentry,
                       struct dentry *this_dentry, struct super_block *sb,
-                       int flag);
+                       u32 flags);
 int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
 int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
                             const char *name, int length,
@@ -690,5 +705,11 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
 int
 ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
                      struct user_namespace *user_ns, struct pid *pid);
+int ecryptfs_init_kthread(void);
+void ecryptfs_destroy_kthread(void);
+int ecryptfs_privileged_open(struct file **lower_file,
+                             struct dentry *lower_dentry,
+                             struct vfsmount *lower_mnt);
+int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry);
 #endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 24749bf0668f..9244d653743e 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -192,6 +192,23 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
                                      | ECRYPTFS_ENCRYPTED);
        }
        mutex_unlock(&crypt_stat->cs_mutex);
+        if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
+            && !(file->f_flags & O_RDONLY)) {
+                rc = -EPERM;
+                printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
+                       "file must hence be opened RO\n", __func__);
+                goto out;
+        }
+        if (!ecryptfs_inode_to_private(inode)->lower_file) {
+                rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+                if (rc) {
+                        printk(KERN_ERR "%s: Error attempting to initialize "
+                               "the persistent file for the dentry with name "
+                               "[%s]; rc = [%d]\n", __func__,
+                               ecryptfs_dentry->d_name.name, rc);
+                        goto out;
+                }
+        }
        ecryptfs_set_file_lower(
                file, ecryptfs_inode_to_private(inode)->lower_file);
        if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index c92cc1c00aae..89209f00f9c7 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -31,6 +31,7 @@
 #include <linux/mount.h>
 #include <linux/crypto.h>
 #include <linux/fs_stack.h>
+#include <asm/unaligned.h>
 #include "ecryptfs_kernel.h"
 static struct dentry *lock_parent(struct dentry *dentry)
@@ -188,6 +189,16 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
                                "context; rc = [%d]\n", rc);
                goto out;
        }
+        if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
+                rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+                if (rc) {
+                        printk(KERN_ERR "%s: Error attempting to initialize "
+                               "the persistent file for the dentry with name "
+                               "[%s]; rc = [%d]\n", __func__,
+                               ecryptfs_dentry->d_name.name, rc);
+                        goto out;
+                }
+        }
        rc = ecryptfs_write_metadata(ecryptfs_dentry);
        if (rc) {
                printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
@@ -307,10 +318,11 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
                d_add(dentry, NULL);
                goto out;
        }
-        rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1);
+        rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb,
+                                ECRYPTFS_INTERPOSE_FLAG_D_ADD);
        if (rc) {
                ecryptfs_printk(KERN_ERR, "Error interposing\n");
-                goto out_dput;
+                goto out;
        }
        if (S_ISDIR(lower_inode->i_mode)) {
                ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n");
@@ -336,11 +348,21 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
                rc = -ENOMEM;
                ecryptfs_printk(KERN_ERR,
                                "Cannot ecryptfs_kmalloc a page\n");
-                goto out_dput;
+                goto out;
        }
        crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
        if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
                ecryptfs_set_default_sizes(crypt_stat);
+        if (!ecryptfs_inode_to_private(dentry->d_inode)->lower_file) {
+                rc = ecryptfs_init_persistent_file(dentry);
+                if (rc) {
+                        printk(KERN_ERR "%s: Error attempting to initialize "
+                               "the persistent file for the dentry with name "
+                               "[%s]; rc = [%d]\n", __func__,
+                               dentry->d_name.name, rc);
+                        goto out;
+                }
+        }
        rc = ecryptfs_read_and_validate_header_region(page_virt,
                                                      dentry->d_inode);
        if (rc) {
@@ -364,8 +386,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
                else
                        file_size = i_size_read(lower_dentry->d_inode);
        } else {
-                memcpy(&file_size, page_virt, sizeof(file_size));
+                file_size = get_unaligned_be64(page_virt);
-                file_size = be64_to_cpu(file_size);
        }
        i_size_write(dentry->d_inode, (loff_t)file_size);
        kmem_cache_free(ecryptfs_header_cache_2, page_virt);
@@ -444,7 +465,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
        int rc;
        struct dentry *lower_dentry;
        struct dentry *lower_dir_dentry;
-        umode_t mode;
        char *encoded_symname;
        int encoded_symlen;
        struct ecryptfs_crypt_stat *crypt_stat = NULL;
@@ -452,7 +472,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
        lower_dentry = ecryptfs_dentry_to_lower(dentry);
        dget(lower_dentry);
        lower_dir_dentry = lock_parent(lower_dentry);
-        mode = S_IALLUGO;
        encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname,
                                                  strlen(symname),
                                                  &encoded_symname);
@@ -461,7 +480,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
                goto out_lock;
        }
        rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
-                         encoded_symname, mode);
+                         encoded_symname);
        kfree(encoded_symname);
        if (rc || !lower_dentry->d_inode)
                goto out_lock;
@@ -809,22 +828,9 @@ out:
 }
 static int
-ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+ecryptfs_permission(struct inode *inode, int mask)
 {
-        int rc;
+        return inode_permission(ecryptfs_inode_to_lower(inode), mask);
-        if (nd) {
-                struct vfsmount *vfsmnt_save = nd->path.mnt;
-                struct dentry *dentry_save = nd->path.dentry;
-                nd->path.mnt = ecryptfs_dentry_to_lower_mnt(nd->path.dentry);
-                nd->path.dentry = ecryptfs_dentry_to_lower(nd->path.dentry);
-                rc = permission(ecryptfs_inode_to_lower(inode), mask, nd);
-                nd->path.mnt = vfsmnt_save;
-                nd->path.dentry = dentry_save;
-        } else
-                rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL);
-        return rc;
 }
 /**
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index e82b457180be..f5b76a331b9c 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -44,15 +44,15 @@ static int process_request_key_err(long err_code)
        int rc = 0;
        switch (err_code) {
-        case ENOKEY:
+        case -ENOKEY:
                ecryptfs_printk(KERN_WARNING, "No key\n");
                rc = -ENOENT;
                break;
-        case EKEYEXPIRED:
+        case -EKEYEXPIRED:
                ecryptfs_printk(KERN_WARNING, "Key expired\n");
                rc = -ETIME;
                break;
-        case EKEYREVOKED:
+        case -EKEYREVOKED:
                ecryptfs_printk(KERN_WARNING, "Key revoked\n");
                rc = -EINVAL;
                break;
@@ -963,8 +963,7 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
        if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
                printk(KERN_ERR "Could not find key with description: [%s]\n",
                       sig);
-                process_request_key_err(PTR_ERR(*auth_tok_key));
+                rc = process_request_key_err(PTR_ERR(*auth_tok_key));
-                rc = -EINVAL;
                goto out;
        }
        (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key);
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
new file mode 100644
index 000000000000..c440c6b58b2d
--- /dev/null
+++ b/fs/ecryptfs/kthread.c
@@ -0,0 +1,203 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ *
+ * Copyright (C) 2008 International Business Machines Corp.
+ *   Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/wait.h>
+#include <linux/mount.h>
+#include "ecryptfs_kernel.h"
+struct kmem_cache *ecryptfs_open_req_cache;
+static struct ecryptfs_kthread_ctl {
+#define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001
+        u32 flags;
+        struct mutex mux;
+        struct list_head req_list;
+        wait_queue_head_t wait;
+} ecryptfs_kthread_ctl;
+static struct task_struct *ecryptfs_kthread;
+/**
+ * ecryptfs_threadfn
+ * @ignored: ignored
+ *
+ * The eCryptfs kernel thread that has the responsibility of getting
+ * the lower persistent file with RW permissions.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_threadfn(void *ignored)
+{
+        set_freezable();
+        while (1)  {
+                struct ecryptfs_open_req *req;
+                wait_event_freezable(
+                        ecryptfs_kthread_ctl.wait,
+                        (!list_empty(&ecryptfs_kthread_ctl.req_list)
+                         || kthread_should_stop()));
+                mutex_lock(&ecryptfs_kthread_ctl.mux);
+                if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
+                        mutex_unlock(&ecryptfs_kthread_ctl.mux);
+                        goto out;
+                }
+                while (!list_empty(&ecryptfs_kthread_ctl.req_list)) {
+                        req = list_first_entry(&ecryptfs_kthread_ctl.req_list,
+                                               struct ecryptfs_open_req,
+                                               kthread_ctl_list);
+                        mutex_lock(&req->mux);
+                        list_del(&req->kthread_ctl_list);
+                        if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) {
+                                dget(req->lower_dentry);
+                                mntget(req->lower_mnt);
+                                (*req->lower_file) = dentry_open(
+                                        req->lower_dentry, req->lower_mnt,
+                                        (O_RDWR | O_LARGEFILE));
+                                req->flags |= ECRYPTFS_REQ_PROCESSED;
+                        }
+                        wake_up(&req->wait);
+                        mutex_unlock(&req->mux);
+                }
+                mutex_unlock(&ecryptfs_kthread_ctl.mux);
+        }
+out:
+        return 0;
+}
+int ecryptfs_init_kthread(void)
+{
+        int rc = 0;
+        mutex_init(&ecryptfs_kthread_ctl.mux);
+        init_waitqueue_head(&ecryptfs_kthread_ctl.wait);
+        INIT_LIST_HEAD(&ecryptfs_kthread_ctl.req_list);
+        ecryptfs_kthread = kthread_run(&ecryptfs_threadfn, NULL,
+                                       "ecryptfs-kthread");
+        if (IS_ERR(ecryptfs_kthread)) {
+                rc = PTR_ERR(ecryptfs_kthread);
+                printk(KERN_ERR "%s: Failed to create kernel thread; rc = [%d]"
+                       "\n", __func__, rc);
+        }
+        return rc;
+}
+void ecryptfs_destroy_kthread(void)
+{
+        struct ecryptfs_open_req *req;
+        mutex_lock(&ecryptfs_kthread_ctl.mux);
+        ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
+        list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list,
+                            kthread_ctl_list) {
+                mutex_lock(&req->mux);
+                req->flags |= ECRYPTFS_REQ_ZOMBIE;
+                wake_up(&req->wait);
+                mutex_unlock(&req->mux);
+        }
+        mutex_unlock(&ecryptfs_kthread_ctl.mux);
+        kthread_stop(ecryptfs_kthread);
+        wake_up(&ecryptfs_kthread_ctl.wait);
+}
+/**
+ * ecryptfs_privileged_open
+ * @lower_file: Result of dentry_open by root on lower dentry
+ * @lower_dentry: Lower dentry for file to open
+ * @lower_mnt: Lower vfsmount for file to open
+ *
+ * This function gets a r/w file opened againt the lower dentry.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_privileged_open(struct file **lower_file,
+                             struct dentry *lower_dentry,
+                             struct vfsmount *lower_mnt)
+{
+        struct ecryptfs_open_req *req;
+        int rc = 0;
+        /* Corresponding dput() and mntput() are done when the
+         * persistent file is fput() when the eCryptfs inode is
+         * destroyed. */
+        dget(lower_dentry);
+        mntget(lower_mnt);
+        (*lower_file) = dentry_open(lower_dentry, lower_mnt,
+                                    (O_RDWR | O_LARGEFILE));
+        if (!IS_ERR(*lower_file))
+                goto out;
+        req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
+        if (!req) {
+                rc = -ENOMEM;
+                goto out;
+        }
+        mutex_init(&req->mux);
+        req->lower_file = lower_file;
+        req->lower_dentry = lower_dentry;
+        req->lower_mnt = lower_mnt;
+        init_waitqueue_head(&req->wait);
+        req->flags = 0;
+        mutex_lock(&ecryptfs_kthread_ctl.mux);
+        if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
+                rc = -EIO;
+                mutex_unlock(&ecryptfs_kthread_ctl.mux);
+                printk(KERN_ERR "%s: We are in the middle of shutting down; "
+                       "aborting privileged request to open lower file\n",
+                        __func__);
+                goto out_free;
+        }
+        list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list);
+        mutex_unlock(&ecryptfs_kthread_ctl.mux);
+        wake_up(&ecryptfs_kthread_ctl.wait);
+        wait_event(req->wait, (req->flags != 0));
+        mutex_lock(&req->mux);
+        BUG_ON(req->flags == 0);
+        if (req->flags & ECRYPTFS_REQ_DROPPED
+            || req->flags & ECRYPTFS_REQ_ZOMBIE) {
+                rc = -EIO;
+                printk(KERN_WARNING "%s: Privileged open request dropped\n",
+                       __func__);
+                goto out_unlock;
+        }
+        if (IS_ERR(*req->lower_file)) {
+                rc = PTR_ERR(*req->lower_file);
+                dget(lower_dentry);
+                mntget(lower_mnt);
+                (*lower_file) = dentry_open(lower_dentry, lower_mnt,
+                                            (O_RDONLY | O_LARGEFILE));
+                if (IS_ERR(*lower_file)) {
+                        rc = PTR_ERR(*req->lower_file);
+                        (*lower_file) = NULL;
+                        printk(KERN_WARNING "%s: Error attempting privileged "
+                               "open of lower file with either RW or RO "
+                               "perms; rc = [%d]. Giving up.\n",
+                               __func__, rc);
+                }
+        }
+out_unlock:
+        mutex_unlock(&req->mux);
+out_free:
+        kmem_cache_free(ecryptfs_open_req_cache, req);
+out:
+        return rc;
+}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d603631601eb..448dfd597b5f 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -117,7 +117,7 @@ void __ecryptfs_printk(const char *fmt, ...)
 *
 * Returns zero on success; non-zero otherwise
 */
-static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
+int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
 {
        struct ecryptfs_inode_info *inode_info =
                ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
@@ -130,26 +130,12 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
                        ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
                lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
-                /* Corresponding dput() and mntput() are done when the
+                rc = ecryptfs_privileged_open(&inode_info->lower_file,
-                 * persistent file is fput() when the eCryptfs inode
+                                                     lower_dentry, lower_mnt);
-                 * is destroyed. */
+                if (rc || IS_ERR(inode_info->lower_file)) {
-                dget(lower_dentry);
-                mntget(lower_mnt);
-                inode_info->lower_file = dentry_open(lower_dentry,
-                                                     lower_mnt,
-                                                     (O_RDWR | O_LARGEFILE));
-                if (IS_ERR(inode_info->lower_file)) {
-                        dget(lower_dentry);
-                        mntget(lower_mnt);
-                        inode_info->lower_file = dentry_open(lower_dentry,
-                                                             lower_mnt,
-                                                             (O_RDONLY
-                                                              | O_LARGEFILE));
-                }
-                if (IS_ERR(inode_info->lower_file)) {
                        printk(KERN_ERR "Error opening lower persistent file "
-                               "for lower_dentry [0x%p] and lower_mnt [0x%p]\n",
+                               "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
-                               lower_dentry, lower_mnt);
+                               "rc = [%d]\n", lower_dentry, lower_mnt, rc);
                        rc = PTR_ERR(inode_info->lower_file);
                        inode_info->lower_file = NULL;
                }
@@ -163,14 +149,14 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
 * @lower_dentry: Existing dentry in the lower filesystem
 * @dentry: ecryptfs' dentry
 * @sb: ecryptfs's super_block
- * @flag: If set to true, then d_add is called, else d_instantiate is called
+ * @flags: flags to govern behavior of interpose procedure
 *
 * Interposes upper and lower dentries.
 *
 * Returns zero on success; non-zero otherwise
 */
 int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
-                       struct super_block *sb, int flag)
+                       struct super_block *sb, u32 flags)
 {
        struct inode *lower_inode;
        struct inode *inode;
@@ -207,7 +193,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
                init_special_inode(inode, lower_inode->i_mode,
                                   lower_inode->i_rdev);
        dentry->d_op = &ecryptfs_dops;
-        if (flag)
+        if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
                d_add(dentry, inode);
        else
                d_instantiate(dentry, inode);
@@ -215,13 +201,6 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
        /* This size will be overwritten for real files w/ headers and
         * other metadata */
        fsstack_copy_inode_size(inode, lower_inode);
-        rc = ecryptfs_init_persistent_file(dentry);
-        if (rc) {
-                printk(KERN_ERR "%s: Error attempting to initialize the "
-                       "persistent file for the dentry with name [%s]; "
-                       "rc = [%d]\n", __func__, dentry->d_name.name, rc);
-                goto out;
-        }
 out:
        return rc;
 }
@@ -262,10 +241,11 @@ static int ecryptfs_init_global_auth_toks(
                               "session keyring for sig specified in mount "
                               "option: [%s]\n", global_auth_tok->sig);
                        global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID;
-                        rc = 0;
+                        goto out;
                } else
                        global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID;
        }
+out:
        return rc;
 }
@@ -314,7 +294,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
        char *cipher_name_dst;
        char *cipher_name_src;
        char *cipher_key_bytes_src;
-        int cipher_name_len;
        if (!options) {
                rc = -EINVAL;
@@ -395,17 +374,12 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
                goto out;
        }
        if (!cipher_name_set) {
-                cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
+                int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
-                if (unlikely(cipher_name_len
-                             >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) {
+                BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE);
-                        rc = -EINVAL;
-                        BUG();
+                strcpy(mount_crypt_stat->global_default_cipher_name,
-                        goto out;
+                       ECRYPTFS_DEFAULT_CIPHER);
-                }
-                memcpy(mount_crypt_stat->global_default_cipher_name,
-                       ECRYPTFS_DEFAULT_CIPHER, cipher_name_len);
-                mount_crypt_stat->global_default_cipher_name[cipher_name_len]
-                    = '\0';
        }
        if (!cipher_key_bytes_set) {
                mount_crypt_stat->global_default_cipher_key_size = 0;
@@ -430,7 +404,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
                printk(KERN_WARNING "One or more global auth toks could not "
                       "properly register; rc = [%d]\n", rc);
        }
-        rc = 0;
 out:
        return rc;
 }
@@ -605,7 +578,7 @@ static struct file_system_type ecryptfs_fs_type = {
 * Initializes the ecryptfs_inode_info_cache when it is created
 */
 static void
-inode_info_init_once(struct kmem_cache *cachep, void *vptr)
+inode_info_init_once(void *vptr)
 {
        struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr;
@@ -616,7 +589,7 @@ static struct ecryptfs_cache_info {
        struct kmem_cache **cache;
        const char *name;
        size_t size;
-        void (*ctor)(struct kmem_cache *cache, void *obj);
+        void (*ctor)(void *obj);
 } ecryptfs_cache_infos[] = {
        {
                .cache = &ecryptfs_auth_tok_list_item_cache,
@@ -679,6 +652,11 @@ static struct ecryptfs_cache_info {
                .name = "ecryptfs_key_tfm_cache",
                .size = sizeof(struct ecryptfs_key_tfm),
        },
+        {
+                .cache = &ecryptfs_open_req_cache,
+                .name = "ecryptfs_open_req_cache",
+                .size = sizeof(struct ecryptfs_open_req),
+        },
 };
 static void ecryptfs_free_kmem_caches(void)
@@ -795,11 +773,17 @@ static int __init ecryptfs_init(void)
                printk(KERN_ERR "sysfs registration failed\n");
                goto out_unregister_filesystem;
        }
+        rc = ecryptfs_init_kthread();
+        if (rc) {
+                printk(KERN_ERR "%s: kthread initialization failed; "
+                       "rc = [%d]\n", __func__, rc);
+                goto out_do_sysfs_unregistration;
+        }
        rc = ecryptfs_init_messaging(ecryptfs_transport);
        if (rc) {
-                ecryptfs_printk(KERN_ERR, "Failure occured while attempting to "
+                printk(KERN_ERR "Failure occured while attempting to "
                                "initialize the eCryptfs netlink socket\n");
-                goto out_do_sysfs_unregistration;
+                goto out_destroy_kthread;
        }
        rc = ecryptfs_init_crypto();
        if (rc) {
@@ -814,6 +798,8 @@ static int __init ecryptfs_init(void)
        goto out;
 out_release_messaging:
        ecryptfs_release_messaging(ecryptfs_transport);
+out_destroy_kthread:
+        ecryptfs_destroy_kthread();
 out_do_sysfs_unregistration:
        do_sysfs_unregistration();
 out_unregister_filesystem:
@@ -833,6 +819,7 @@ static void __exit ecryptfs_exit(void)
                printk(KERN_ERR "Failure whilst attempting to destroy crypto; "
                       "rc = [%d]\n", rc);
        ecryptfs_release_messaging(ecryptfs_transport);
+        ecryptfs_destroy_kthread();
        do_sysfs_unregistration();
        unregister_filesystem(&ecryptfs_fs_type);
        ecryptfs_free_kmem_caches();
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 09a4522f65e6..b484792a0996 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -358,46 +358,6 @@ out_unlock_daemon:
 }
 /**
- * ecryptfs_miscdev_helo
- * @euid: effective user id of miscdevess sending helo packet
- * @user_ns: The namespace in which @euid applies
- * @pid: miscdevess id of miscdevess sending helo packet
- *
- * Returns zero on success; non-zero otherwise
- */
-static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns,
-                                 struct pid *pid)
-{
-        int rc;
-        rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns,
-                                   pid);
-        if (rc)
-                printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
-        return rc;
-}
-/**
- * ecryptfs_miscdev_quit
- * @euid: effective user id of miscdevess sending quit packet
- * @user_ns: The namespace in which @euid applies
- * @pid: miscdevess id of miscdevess sending quit packet
- *
- * Returns zero on success; non-zero otherwise
- */
-static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns,
-                                 struct pid *pid)
-{
-        int rc;
-        rc = ecryptfs_process_quit(euid, user_ns, pid);
-        if (rc)
-                printk(KERN_WARNING
-                       "Error processing QUIT message; rc = [%d]\n", rc);
-        return rc;
-}
-/**
 * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
 * @data: Bytes comprising struct ecryptfs_message
 * @data_size: sizeof(struct ecryptfs_message) + data len
@@ -512,26 +472,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
                               __func__, rc);
                break;
        case ECRYPTFS_MSG_HELO:
-                rc = ecryptfs_miscdev_helo(current->euid,
-                                           current->nsproxy->user_ns,
-                                           task_pid(current));
-                if (rc) {
-                        printk(KERN_ERR "%s: Error attempting to process "
-                               "helo from pid [0x%p]; rc = [%d]\n", __func__,
-                               task_pid(current), rc);
-                        goto out_free;
-                }
-                break;
        case ECRYPTFS_MSG_QUIT:
-                rc = ecryptfs_miscdev_quit(current->euid,
-                                           current->nsproxy->user_ns,
-                                           task_pid(current));
-                if (rc) {
-                        printk(KERN_ERR "%s: Error attempting to process "
-                               "quit from pid [0x%p]; rc = [%d]\n", __func__,
-                               task_pid(current), rc);
-                        goto out_free;
-                }
                break;
        default:
                ecryptfs_printk(KERN_WARNING, "Dropping miscdev "
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 2b6fe1e6e8ba..245c2dc02d5c 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -32,6 +32,7 @@
 #include <linux/file.h>
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
+#include <asm/unaligned.h>
 #include "ecryptfs_kernel.h"
 /**
@@ -372,7 +373,6 @@ out:
 */
 static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
 {
-        u64 file_size;
        char *file_size_virt;
        int rc;
@@ -381,9 +381,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
                rc = -ENOMEM;
                goto out;
        }
-        file_size = (u64)i_size_read(ecryptfs_inode);
+        put_unaligned_be64(i_size_read(ecryptfs_inode), file_size_virt);
-        file_size = cpu_to_be64(file_size);
-        memcpy(file_size_virt, &file_size, sizeof(u64));
        rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0,
                                  sizeof(u64));
        kfree(file_size_virt);
@@ -403,7 +401,6 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
        struct dentry *lower_dentry =
                ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
        struct inode *lower_inode = lower_dentry->d_inode;
-        u64 file_size;
        int rc;
        if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) {
@@ -424,9 +421,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
                                           xattr_virt, PAGE_CACHE_SIZE);
        if (size < 0)
                size = 8;
-        file_size = (u64)i_size_read(ecryptfs_inode);
+        put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
-        file_size = cpu_to_be64(file_size);
-        memcpy(xattr_virt, &file_size, sizeof(u64));
        rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
                                         xattr_virt, size, 0);
        mutex_unlock(&lower_inode->i_mutex);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index d733531b55e2..567b134fa1f1 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -70,7 +70,7 @@ static void efs_destroy_inode(struct inode *inode)
        kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct efs_inode_info *ei = (struct efs_inode_info *) foo;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 343942deeec1..08bf558d0408 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -198,11 +198,18 @@ struct file *eventfd_fget(int fd)
        return file;
 }
-asmlinkage long sys_eventfd(unsigned int count)
+asmlinkage long sys_eventfd2(unsigned int count, int flags)
 {
        int fd;
        struct eventfd_ctx *ctx;
+        /* Check the EFD_* constants for consistency.  */
+        BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
+        BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
+        if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK))
+                return -EINVAL;
        ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
        if (!ctx)
                return -ENOMEM;
@@ -214,9 +221,15 @@ asmlinkage long sys_eventfd(unsigned int count)
         * When we call this, the initialization must be complete, since
         * anon_inode_getfd() will install the fd.
         */
-        fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx);
+        fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
+                              flags & (O_CLOEXEC | O_NONBLOCK));
        if (fd < 0)
                kfree(ctx);
        return fd;
 }
+asmlinkage long sys_eventfd(unsigned int count)
+{
+        return sys_eventfd2(count, 0);
+}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 990c01d2d66b..0c87474f7917 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1046,20 +1046,25 @@ retry:
 * RB tree. With the current implementation, the "size" parameter is ignored
 * (besides sanity checks).
 */
-asmlinkage long sys_epoll_create(int size)
+asmlinkage long sys_epoll_create1(int flags)
 {
        int error, fd = -1;
        struct eventpoll *ep;
+        /* Check the EPOLL_* constant for consistency.  */
+        BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
+        if (flags & ~EPOLL_CLOEXEC)
+                return -EINVAL;
        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
-                     current, size));
+                     current, flags));
        /*
-         * Sanity check on the size parameter, and create the internal data
+         * Create the internal data structure ( "struct eventpoll" ).
-         * structure ( "struct eventpoll" ).
         */
-        error = -EINVAL;
+        error = ep_alloc(&ep);
-        if (size <= 0 || (error = ep_alloc(&ep)) < 0) {
+        if (error < 0) {
                fd = error;
                goto error_return;
        }
@@ -1068,17 +1073,26 @@ asmlinkage long sys_epoll_create(int size)
         * Creates all the items needed to setup an eventpoll file. That is,
         * a file structure and a free file descriptor.
         */
-        fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep);
+        fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
+                              flags & O_CLOEXEC);
        if (fd < 0)
                ep_free(ep);
 error_return:
        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
-                     current, size, fd));
+                     current, flags, fd));
        return fd;
 }
+asmlinkage long sys_epoll_create(int size)
+{
+        if (size < 0)
+                return -EINVAL;
+        return sys_epoll_create1(0);
+}
 /*
 * The following function implements the controller interface for
 * the eventpoll file that enables the insertion/removal/change of
diff --git a/fs/exec.c b/fs/exec.c
index fd9234379e8d..9696bbf0f0b1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -25,32 +25,30 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
-#include <linux/mman.h>
+#include <linux/mm.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/smp_lock.h>
+#include <linux/swap.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
 #include <linux/key.h>
 #include <linux/personality.h>
 #include <linux/binfmts.h>
-#include <linux/swap.h>
 #include <linux/utsname.h>
 #include <linux/pid_namespace.h>
 #include <linux/module.h>
 #include <linux/namei.h>
 #include <linux/proc_fs.h>
-#include <linux/ptrace.h>
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
-#include <linux/rmap.h>
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
 #include <linux/audit.h>
+#include <linux/tracehook.h>
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -108,11 +106,17 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
 */
 asmlinkage long sys_uselib(const char __user * library)
 {
-        struct file * file;
+        struct file *file;
        struct nameidata nd;
-        int error;
+        char *tmp = getname(library);
+        int error = PTR_ERR(tmp);
-        error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
+        if (!IS_ERR(tmp)) {
+                error = path_lookup_open(AT_FDCWD, tmp,
+                                         LOOKUP_FOLLOW, &nd,
+                                         FMODE_READ|FMODE_EXEC);
+                putname(tmp);
+        }
        if (error)
                goto out;
@@ -120,7 +124,11 @@ asmlinkage long sys_uselib(const char __user * library)
        if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
                goto exit;
-        error = vfs_permission(&nd, MAY_READ | MAY_EXEC);
+        error = -EACCES;
+        if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
+                goto exit;
+        error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN);
        if (error)
                goto exit;
@@ -541,7 +549,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
                /*
                 * when the old and new regions overlap clear from new_end.
                 */
-                free_pgd_range(&tlb, new_end, old_end, new_end,
+                free_pgd_range(tlb, new_end, old_end, new_end,
                        vma->vm_next ? vma->vm_next->vm_start : 0);
        } else {
                /*
@@ -550,7 +558,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
                 * have constraints on va-space that make this illegal (IA64) -
                 * for the others its just a little faster.
                 */
-                free_pgd_range(&tlb, old_start, old_end, new_end,
+                free_pgd_range(tlb, old_start, old_end, new_end,
                        vma->vm_next ? vma->vm_next->vm_start : 0);
        }
        tlb_finish_mmu(tlb, new_end, old_end);
@@ -658,38 +666,43 @@ EXPORT_SYMBOL(setup_arg_pages);
 struct file *open_exec(const char *name)
 {
        struct nameidata nd;
-        int err;
        struct file *file;
+        int err;
-        err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
+        err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd,
-        file = ERR_PTR(err);
+                                FMODE_READ|FMODE_EXEC);
+        if (err)
-        if (!err) {
+                goto out;
-                struct inode *inode = nd.path.dentry->d_inode;
-                file = ERR_PTR(-EACCES);
+        err = -EACCES;
-                if (S_ISREG(inode->i_mode)) {
+        if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
-                        int err = vfs_permission(&nd, MAY_EXEC);
+                goto out_path_put;
-                        file = ERR_PTR(err);
-                        if (!err) {
+        if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
-                                file = nameidata_to_filp(&nd,
+                goto out_path_put;
-                                                        O_RDONLY|O_LARGEFILE);
-                                if (!IS_ERR(file)) {
+        err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN);
-                                        err = deny_write_access(file);
+        if (err)
-                                        if (err) {
+                goto out_path_put;
-                                                fput(file);
-                                                file = ERR_PTR(err);
+        file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
-                                        }
+        if (IS_ERR(file))
-                                }
+                return file;
-out:
-                                return file;
+        err = deny_write_access(file);
-                        }
+        if (err) {
-                }
+                fput(file);
-                release_open_intent(&nd);
+                goto out;
-                path_put(&nd.path);
        }
-        goto out;
-}
+        return file;
+ out_path_put:
+        release_open_intent(&nd);
+        path_put(&nd.path);
+ out:
+        return ERR_PTR(err);
+}
 EXPORT_SYMBOL(open_exec);
 int kernel_read(struct file *file, unsigned long offset,
@@ -724,12 +737,10 @@ static int exec_mmap(struct mm_struct *mm)
                 * Make sure that if there is a core dump in progress
                 * for the old mm, we get out and die instead of going
                 * through with the exec.  We must hold mmap_sem around
-                 * checking core_waiters and changing tsk->mm.  The
+                 * checking core_state and changing tsk->mm.
-                 * core-inducing thread will increment core_waiters for
-                 * each thread whose ->mm == old_mm.
                 */
                down_read(&old_mm->mmap_sem);
-                if (unlikely(old_mm->core_waiters)) {
+                if (unlikely(old_mm->core_state)) {
                        up_read(&old_mm->mmap_sem);
                        return -EINTR;
                }
@@ -1075,13 +1086,8 @@ EXPORT_SYMBOL(prepare_binprm);
 static int unsafe_exec(struct task_struct *p)
 {
-        int unsafe = 0;
+        int unsafe = tracehook_unsafe_exec(p);
-        if (p->ptrace & PT_PTRACED) {
-                if (p->ptrace & PT_PTRACE_CAP)
-                        unsafe |= LSM_UNSAFE_PTRACE_CAP;
-                else
-                        unsafe |= LSM_UNSAFE_PTRACE;
-        }
        if (atomic_read(&p->fs->count) > 1 ||
            atomic_read(&p->files->count) > 1 ||
            atomic_read(&p->sighand->count) > 1)
@@ -1218,6 +1224,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
                        read_unlock(&binfmt_lock);
                        retval = fn(bprm, regs);
                        if (retval >= 0) {
+                                tracehook_report_exec(fmt, bprm, regs);
                                put_binfmt(fmt);
                                allow_write_access(bprm->file);
                                if (bprm->file)
@@ -1328,6 +1335,7 @@ int do_execve(char * filename,
        if (retval < 0)
                goto out;
+        current->flags &= ~PF_KTHREAD;
        retval = search_binary_handler(bprm,regs);
        if (retval >= 0) {
                /* execve success */
@@ -1382,17 +1390,14 @@ EXPORT_SYMBOL(set_binfmt);
 * name into corename, which must have space for at least
 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
 */
-static int format_corename(char *corename, const char *pattern, long signr)
+static int format_corename(char *corename, int nr_threads, long signr)
 {
-        const char *pat_ptr = pattern;
+        const char *pat_ptr = core_pattern;
+        int ispipe = (*pat_ptr == '|');
        char *out_ptr = corename;
        char *const out_end = corename + CORENAME_MAX_SIZE;
        int rc;
        int pid_in_pattern = 0;
-        int ispipe = 0;
-        if (*pattern == '|')
-                ispipe = 1;
        /* Repeat as long as we have more pattern to process and more output
           space */
@@ -1493,7 +1498,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
         * and core_uses_pid is set, then .%pid will be appended to
         * the filename. Do not do this for piped commands. */
        if (!ispipe && !pid_in_pattern
-            && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
+            && (core_uses_pid || nr_threads)) {
                rc = snprintf(out_ptr, out_end - out_ptr,
                              ".%d", task_tgid_vnr(current));
                if (rc > out_end - out_ptr)
@@ -1505,9 +1510,10 @@ out:
        return ispipe;
 }
-static void zap_process(struct task_struct *start)
+static int zap_process(struct task_struct *start)
 {
        struct task_struct *t;
+        int nr = 0;
        start->signal->flags = SIGNAL_GROUP_EXIT;
        start->signal->group_stop_count = 0;
@@ -1515,72 +1521,99 @@ static void zap_process(struct task_struct *start)
        t = start;
        do {
                if (t != current && t->mm) {
-                        t->mm->core_waiters++;
                        sigaddset(&t->pending.signal, SIGKILL);
                        signal_wake_up(t, 1);
+                        nr++;
                }
-        } while ((t = next_thread(t)) != start);
+        } while_each_thread(start, t);
+        return nr;
 }
 static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
-                                int exit_code)
+                                struct core_state *core_state, int exit_code)
 {
        struct task_struct *g, *p;
        unsigned long flags;
-        int err = -EAGAIN;
+        int nr = -EAGAIN;
        spin_lock_irq(&tsk->sighand->siglock);
        if (!signal_group_exit(tsk->signal)) {
+                mm->core_state = core_state;
                tsk->signal->group_exit_code = exit_code;
-                zap_process(tsk);
+                nr = zap_process(tsk);
-                err = 0;
        }
        spin_unlock_irq(&tsk->sighand->siglock);
-        if (err)
+        if (unlikely(nr < 0))
-                return err;
+                return nr;
-        if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
+        if (atomic_read(&mm->mm_users) == nr + 1)
                goto done;
+        /*
+         * We should find and kill all tasks which use this mm, and we should
+         * count them correctly into ->nr_threads. We don't take tasklist
+         * lock, but this is safe wrt:
+         *
+         * fork:
+         *      None of sub-threads can fork after zap_process(leader). All
+         *      processes which were created before this point should be
+         *      visible to zap_threads() because copy_process() adds the new
+         *      process to the tail of init_task.tasks list, and lock/unlock
+         *      of ->siglock provides a memory barrier.
+         *
+         * do_exit:
+         *      The caller holds mm->mmap_sem. This means that the task which
+         *      uses this mm can't pass exit_mm(), so it can't exit or clear
+         *      its ->mm.
+         *
+         * de_thread:
+         *      It does list_replace_rcu(&leader->tasks, &current->tasks),
+         *      we must see either old or new leader, this does not matter.
+         *      However, it can change p->sighand, so lock_task_sighand(p)
+         *      must be used. Since p->mm != NULL and we hold ->mmap_sem
+         *      it can't fail.
+         *
+         *      Note also that "g" can be the old leader with ->mm == NULL
+         *      and already unhashed and thus removed from ->thread_group.
+         *      This is OK, __unhash_process()->list_del_rcu() does not
+         *      clear the ->next pointer, we will find the new leader via
+         *      next_thread().
+         */
        rcu_read_lock();
        for_each_process(g) {
                if (g == tsk->group_leader)
                        continue;
+                if (g->flags & PF_KTHREAD)
+                        continue;
                p = g;
                do {
                        if (p->mm) {
-                                if (p->mm == mm) {
+                                if (unlikely(p->mm == mm)) {
-                                        /*
-                                         * p->sighand can't disappear, but
-                                         * may be changed by de_thread()
-                                         */
                                        lock_task_sighand(p, &flags);
-                                        zap_process(p);
+                                        nr += zap_process(p);
                                        unlock_task_sighand(p, &flags);
                                }
                                break;
                        }
-                } while ((p = next_thread(p)) != g);
+                } while_each_thread(g, p);
        }
        rcu_read_unlock();
 done:
-        return mm->core_waiters;
+        atomic_set(&core_state->nr_threads, nr);
+        return nr;
 }
-static int coredump_wait(int exit_code)
+static int coredump_wait(int exit_code, struct core_state *core_state)
 {
        struct task_struct *tsk = current;
        struct mm_struct *mm = tsk->mm;
-        struct completion startup_done;
        struct completion *vfork_done;
        int core_waiters;
-        init_completion(&mm->core_done);
+        init_completion(&core_state->startup);
-        init_completion(&startup_done);
+        core_state->dumper.task = tsk;
-        mm->core_startup_done = &startup_done;
+        core_state->dumper.next = NULL;
+        core_waiters = zap_threads(tsk, mm, core_state, exit_code);
-        core_waiters = zap_threads(tsk, mm, exit_code);
        up_write(&mm->mmap_sem);
        if (unlikely(core_waiters < 0))
@@ -1597,12 +1630,32 @@ static int coredump_wait(int exit_code)
        }
        if (core_waiters)
-                wait_for_completion(&startup_done);
+                wait_for_completion(&core_state->startup);
 fail:
-        BUG_ON(mm->core_waiters);
        return core_waiters;
 }
+static void coredump_finish(struct mm_struct *mm)
+{
+        struct core_thread *curr, *next;
+        struct task_struct *task;
+        next = mm->core_state->dumper.next;
+        while ((curr = next) != NULL) {
+                next = curr->next;
+                task = curr->task;
+                /*
+                 * see exit_mm(), curr->task must not see
+                 * ->task == NULL before we read ->next.
+                 */
+                smp_mb();
+                curr->task = NULL;
+                wake_up_process(task);
+        }
+        mm->core_state = NULL;
+}
 /*
 * set_dumpable converts traditional three-value dumpable to two flags and
 * stores them into mm->flags.  It modifies lower two bits of mm->flags, but
@@ -1654,6 +1707,7 @@ int get_dumpable(struct mm_struct *mm)
 int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 {
+        struct core_state core_state;
        char corename[CORENAME_MAX_SIZE + 1];
        struct mm_struct *mm = current->mm;
        struct linux_binfmt * binfmt;
@@ -1677,7 +1731,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        /*
         * If another thread got here first, or we are not dumpable, bail out.
         */
-        if (mm->core_waiters || !get_dumpable(mm)) {
+        if (mm->core_state || !get_dumpable(mm)) {
                up_write(&mm->mmap_sem);
                goto fail;
        }
@@ -1692,7 +1746,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
                current->fsuid = 0;     /* Dump root private */
        }
-        retval = coredump_wait(exit_code);
+        retval = coredump_wait(exit_code, &core_state);
        if (retval < 0)
                goto fail;
@@ -1707,7 +1761,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         * uses lock_kernel()
         */
        lock_kernel();
-        ispipe = format_corename(corename, core_pattern, signr);
+        ispipe = format_corename(corename, retval, signr);
        unlock_kernel();
        /*
         * Don't bother to check the RLIMIT_CORE value if core_pattern points
@@ -1786,7 +1840,7 @@ fail_unlock:
                argv_free(helper_argv);
        current->fsuid = fsuid;
-        complete_all(&mm->core_done);
+        coredump_finish(mm);
 fail:
        return retval;
 }
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index e58669e1b87c..ae8c4f850b27 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -294,7 +294,7 @@ ext2_check_acl(struct inode *inode, int mask)
 }
 int
-ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext2_permission(struct inode *inode, int mask)
 {
        return generic_permission(inode, mask, ext2_check_acl);
 }
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 0bde85bafe38..b42cf578554b 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -58,7 +58,7 @@ static inline int ext2_acl_count(size_t size)
 #define EXT2_ACL_NOT_CACHED ((void *)-1)
 /* acl.c */
-extern int ext2_permission (struct inode *, int, struct nameidata *);
+extern int ext2_permission (struct inode *, int);
 extern int ext2_acl_chmod (struct inode *);
 extern int ext2_init_acl (struct inode *, struct inode *);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ef50cbc792db..fd88c7b43e66 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include <linux/log2.h>
+#include <linux/quotaops.h>
 #include <asm/uaccess.h>
 #include "ext2.h"
 #include "xattr.h"
@@ -158,7 +159,7 @@ static void ext2_destroy_inode(struct inode *inode)
        kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
 }
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
        struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index eaa23d2d5213..70c0dbdcdcb7 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -14,7 +14,7 @@ static size_t
 ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
                         const char *name, size_t name_len)
 {
-        const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+        const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
        const size_t total_len = prefix_len + name_len + 1;
        if (list && total_len <= list_size) {
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 83ee149f353d..e8219f8eae9f 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -12,13 +12,11 @@
 #include <linux/ext2_fs.h>
 #include "xattr.h"
-#define XATTR_TRUSTED_PREFIX "trusted."
 static size_t
 ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
                        const char *name, size_t name_len)
 {
-        const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+        const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
        const size_t total_len = prefix_len + name_len + 1;
        if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index f383e7c3a7b5..92495d28c62f 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -11,13 +11,11 @@
 #include "ext2.h"
 #include "xattr.h"
-#define XATTR_USER_PREFIX "user."
 static size_t
 ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
                     const char *name, size_t name_len)
 {
-        const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+        const size_t prefix_len = XATTR_USER_PREFIX_LEN;
        const size_t total_len = prefix_len + name_len + 1;
        if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index a754d1848173..b60bb241880c 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -299,7 +299,7 @@ ext3_check_acl(struct inode *inode, int mask)
 }
 int
-ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext3_permission(struct inode *inode, int mask)
 {
        return generic_permission(inode, mask, ext3_check_acl);
 }
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 0d1e6279cbfd..42da16b8cac0 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -58,7 +58,7 @@ static inline int ext3_acl_count(size_t size)
 #define EXT3_ACL_NOT_CACHED ((void *)-1)
 /* acl.c */
-extern int ext3_permission (struct inode *, int, struct nameidata *);
+extern int ext3_permission (struct inode *, int);
 extern int ext3_acl_chmod (struct inode *);
 extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 8ca3bfd72427..2eea96ec78ed 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -272,7 +272,7 @@ static void free_rb_tree_fname(struct rb_root *root)
        while (n) {
                /* Do the node's children first */
-                if ((n)->rb_left) {
+                if (n->rb_left) {
                        n = n->rb_left;
                        continue;
                }
@@ -301,24 +301,18 @@ static void free_rb_tree_fname(struct rb_root *root)
                        parent->rb_right = NULL;
                n = parent;
        }
-        root->rb_node = NULL;
 }
-static struct dir_private_info *create_dir_info(loff_t pos)
+static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
 {
        struct dir_private_info *p;
-        p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
+        p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
        if (!p)
                return NULL;
-        p->root.rb_node = NULL;
-        p->curr_node = NULL;
-        p->extra_fname = NULL;
-        p->last_pos = 0;
        p->curr_hash = pos2maj_hash(pos);
        p->curr_minor_hash = pos2min_hash(pos);
-        p->next_hash = 0;
        return p;
 }
@@ -433,7 +427,7 @@ static int ext3_dx_readdir(struct file * filp,
        int     ret;
        if (!info) {
-                info = create_dir_info(filp->f_pos);
+                info = ext3_htree_create_dir_info(filp->f_pos);
                if (!info)
                        return -ENOMEM;
                filp->private_data = info;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 77126821b2e9..47b678d73e7a 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
        if (IS_ERR(inode))
                goto iget_failed;
+        /*
+         * If the orphans has i_nlinks > 0 then it should be able to be
+         * truncated, otherwise it won't be removed from the orphan list
+         * during processing and an infinite loop will result.
+         */
+        if (inode->i_nlink && !ext3_can_truncate(inode))
+                goto bad_orphan;
        if (NEXT_ORPHAN(inode) > max_ino)
                goto bad_orphan;
        brelse(bitmap_bh);
@@ -690,6 +698,7 @@ bad_orphan:
                printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
                       NEXT_ORPHAN(inode));
                printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+                printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
                /* Avoid freeing blocks if we got a bad deleted inode */
                if (inode->i_nlink == 0)
                        inode->i_blocks = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf3ce40..3bf07d70b914 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2127,7 +2127,21 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
        if (this_bh) {
                BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
-                ext3_journal_dirty_metadata(handle, this_bh);
+                /*
+                 * The buffer head should have an attached journal head at this
+                 * point. However, if the data is corrupted and an indirect
+                 * block pointed to itself, it would have been detached when
+                 * the block was cleared. Check for this instead of OOPSing.
+                 */
+                if (bh2jh(this_bh))
+                        ext3_journal_dirty_metadata(handle, this_bh);
+                else
+                        ext3_error(inode->i_sb, "ext3_free_data",
+                                   "circular indirect block detected, "
+                                   "inode=%lu, block=%llu",
+                                   inode->i_ino,
+                                   (unsigned long long)this_bh->b_blocknr);
        }
 }
@@ -2253,6 +2267,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
        }
 }
+int ext3_can_truncate(struct inode *inode)
+{
+        if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+                return 0;
+        if (S_ISREG(inode->i_mode))
+                return 1;
+        if (S_ISDIR(inode->i_mode))
+                return 1;
+        if (S_ISLNK(inode->i_mode))
+                return !ext3_inode_is_fast_symlink(inode);
+        return 0;
+}
 /*
 * ext3_truncate()
 *
@@ -2297,12 +2324,7 @@ void ext3_truncate(struct inode *inode)
        unsigned blocksize = inode->i_sb->s_blocksize;
        struct page *page;
-        if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+        if (!ext3_can_truncate(inode))
-            S_ISLNK(inode->i_mode)))
-                return;
-        if (ext3_inode_is_fast_symlink(inode))
-                return;
-        if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
                return;
        /*
@@ -2513,6 +2535,16 @@ static int __ext3_get_inode_loc(struct inode *inode,
        }
        if (!buffer_uptodate(bh)) {
                lock_buffer(bh);
+                /*
+                 * If the buffer has the write error flag, we have failed
+                 * to write out another inode in the same block.  In this
+                 * case, we don't have to read the block because we may
+                 * read the old inode data successfully.
+                 */
+                if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
+                        set_buffer_uptodate(bh);
                if (buffer_uptodate(bh)) {
                        /* someone brought it uptodate while we waited */
                        unlock_buffer(bh);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80154f1..de13e919cd81 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -240,13 +240,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
 {
        unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
                EXT3_DIR_REC_LEN(2) - infosize;
-        return 0? 20: entry_space / sizeof(struct dx_entry);
+        return entry_space / sizeof(struct dx_entry);
 }
 static inline unsigned dx_node_limit (struct inode *dir)
 {
        unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
-        return 0? 22: entry_space / sizeof(struct dx_entry);
+        return entry_space / sizeof(struct dx_entry);
 }
 /*
@@ -991,19 +991,21 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
                de = (struct ext3_dir_entry_2 *) bh->b_data;
                top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
                                       EXT3_DIR_REC_LEN(0));
-                for (; de < top; de = ext3_next_entry(de))
+                for (; de < top; de = ext3_next_entry(de)) {
-                if (ext3_match (namelen, name, de)) {
+                        int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
-                        if (!ext3_check_dir_entry("ext3_find_entry",
+                                  + ((char *) de - bh->b_data);
-                                                  dir, de, bh,
-                                  (block<<EXT3_BLOCK_SIZE_BITS(sb))
+                        if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
-                                          +((char *)de - bh->b_data))) {
+                                brelse(bh);
-                                brelse (bh);
                                *err = ERR_BAD_DX_DIR;
                                goto errout;
                        }
-                        *res_dir = de;
-                        dx_release (frames);
+                        if (ext3_match(namelen, name, de)) {
-                        return bh;
+                                *res_dir = de;
+                                dx_release(frames);
+                                return bh;
+                        }
                }
                brelse (bh);
                /* Check to see if we should continue to search */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 2845425077e8..8ddced384674 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -472,7 +472,7 @@ static void ext3_destroy_inode(struct inode *inode)
        kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
 }
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
        struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
@@ -842,7 +842,7 @@ static int parse_options (char *options, struct super_block *sb,
        int data_opt = 0;
        int option;
 #ifdef CONFIG_QUOTA
-        int qtype;
+        int qtype, qfmt;
        char *qname;
 #endif
@@ -1018,9 +1018,11 @@ static int parse_options (char *options, struct super_block *sb,
                case Opt_grpjquota:
                        qtype = GRPQUOTA;
 set_qf_name:
-                        if (sb_any_quota_enabled(sb)) {
+                        if ((sb_any_quota_enabled(sb) ||
+                             sb_any_quota_suspended(sb)) &&
+                            !sbi->s_qf_names[qtype]) {
                                printk(KERN_ERR
-                                        "EXT3-fs: Cannot change journalled "
+                                        "EXT3-fs: Cannot change journaled "
                                        "quota options when quota turned on.\n");
                                return 0;
                        }
@@ -1056,9 +1058,11 @@ set_qf_name:
                case Opt_offgrpjquota:
                        qtype = GRPQUOTA;
 clear_qf_name:
-                        if (sb_any_quota_enabled(sb)) {
+                        if ((sb_any_quota_enabled(sb) ||
+                             sb_any_quota_suspended(sb)) &&
+                            sbi->s_qf_names[qtype]) {
                                printk(KERN_ERR "EXT3-fs: Cannot change "
-                                        "journalled quota options when "
+                                        "journaled quota options when "
                                        "quota turned on.\n");
                                return 0;
                        }
@@ -1069,10 +1073,20 @@ clear_qf_name:
                        sbi->s_qf_names[qtype] = NULL;
                        break;
                case Opt_jqfmt_vfsold:
-                        sbi->s_jquota_fmt = QFMT_VFS_OLD;
+                        qfmt = QFMT_VFS_OLD;
-                        break;
+                        goto set_qf_format;
                case Opt_jqfmt_vfsv0:
-                        sbi->s_jquota_fmt = QFMT_VFS_V0;
+                        qfmt = QFMT_VFS_V0;
+set_qf_format:
+                        if ((sb_any_quota_enabled(sb) ||
+                             sb_any_quota_suspended(sb)) &&
+                            sbi->s_jquota_fmt != qfmt) {
+                                printk(KERN_ERR "EXT3-fs: Cannot change "
+                                        "journaled quota options when "
+                                        "quota turned on.\n");
+                                return 0;
+                        }
+                        sbi->s_jquota_fmt = qfmt;
                        break;
                case Opt_quota:
                case Opt_usrquota:
@@ -1084,7 +1098,8 @@ clear_qf_name:
                        set_opt(sbi->s_mount_opt, GRPQUOTA);
                        break;
                case Opt_noquota:
-                        if (sb_any_quota_enabled(sb)) {
+                        if (sb_any_quota_enabled(sb) ||
+                            sb_any_quota_suspended(sb)) {
                                printk(KERN_ERR "EXT3-fs: Cannot change quota "
                                        "options when quota turned on.\n");
                                return 0;
@@ -1169,14 +1184,14 @@ clear_qf_name:
                }
                if (!sbi->s_jquota_fmt) {
-                        printk(KERN_ERR "EXT3-fs: journalled quota format "
+                        printk(KERN_ERR "EXT3-fs: journaled quota format "
                                        "not specified.\n");
                        return 0;
                }
        } else {
                if (sbi->s_jquota_fmt) {
-                        printk(KERN_ERR "EXT3-fs: journalled quota format "
+                        printk(KERN_ERR "EXT3-fs: journaled quota format "
-                                        "specified with no journalling "
+                                        "specified with no journaling "
                                        "enabled.\n");
                        return 0;
                }
@@ -1370,7 +1385,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
                        int ret = ext3_quota_on_mount(sb, i);
                        if (ret < 0)
                                printk(KERN_ERR
-                                        "EXT3-fs: Cannot turn on journalled "
+                                        "EXT3-fs: Cannot turn on journaled "
                                        "quota: error %d\n", ret);
                }
        }
@@ -2712,7 +2727,7 @@ static int ext3_release_dquot(struct dquot *dquot)
 static int ext3_mark_dquot_dirty(struct dquot *dquot)
 {
-        /* Are we journalling quotas? */
+        /* Are we journaling quotas? */
        if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
            EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
                dquot_mark_dquot_dirty(dquot);
@@ -2759,23 +2774,42 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
        if (!test_opt(sb, QUOTA))
                return -EINVAL;
-        /* Not journalling quota or remount? */
+        /* When remounting, no checks are needed and in fact, path is NULL */
-        if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
+        if (remount)
-            !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
                return vfs_quota_on(sb, type, format_id, path, remount);
        err = path_lookup(path, LOOKUP_FOLLOW, &nd);
        if (err)
                return err;
        /* Quotafile not on the same filesystem? */
        if (nd.path.mnt->mnt_sb != sb) {
                path_put(&nd.path);
                return -EXDEV;
        }
-        /* Quotafile not in fs root? */
+        /* Journaling quota? */
-        if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+        if (EXT3_SB(sb)->s_qf_names[type]) {
-                printk(KERN_WARNING
+                /* Quotafile not of fs root? */
-                        "EXT3-fs: Quota file not on filesystem root. "
+                if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
-                        "Journalled quota will not work.\n");
+                        printk(KERN_WARNING
+                                "EXT3-fs: Quota file not on filesystem root. "
+                                "Journaled quota will not work.\n");
+        }
+        /*
+         * When we journal data on quota file, we have to flush journal to see
+         * all updates to the file when we bypass pagecache...
+         */
+        if (ext3_should_journal_data(nd.path.dentry->d_inode)) {
+                /*
+                 * We don't need to lock updates but journal_flush() could
+                 * otherwise be livelocked...
+                 */
+                journal_lock_updates(EXT3_SB(sb)->s_journal);
+                journal_flush(EXT3_SB(sb)->s_journal);
+                journal_unlock_updates(EXT3_SB(sb)->s_journal);
+        }
        path_put(&nd.path);
        return vfs_quota_on(sb, type, format_id, path, remount);
 }
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 821efaf2b94e..37b81097bdf2 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -15,7 +15,7 @@ static size_t
 ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
                         const char *name, size_t name_len)
 {
-        const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+        const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
        const size_t total_len = prefix_len + name_len + 1;
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index 0327497a55ce..c7c41a410c4b 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -13,13 +13,11 @@
 #include <linux/ext3_fs.h>
 #include "xattr.h"
-#define XATTR_TRUSTED_PREFIX "trusted."
 static size_t
 ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
                        const char *name, size_t name_len)
 {
-        const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+        const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
        const size_t total_len = prefix_len + name_len + 1;
        if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 1abd8f92c440..430fe63b31b3 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -12,13 +12,11 @@
 #include <linux/ext3_fs.h>
 #include "xattr.h"
-#define XATTR_USER_PREFIX "user."
 static size_t
 ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
                     const char *name, size_t name_len)
 {
-        const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+        const size_t prefix_len = XATTR_USER_PREFIX_LEN;
        const size_t total_len = prefix_len + name_len + 1;
        if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 3c8dab880d91..c7d04e165446 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -299,7 +299,7 @@ ext4_check_acl(struct inode *inode, int mask)
 }
 int
-ext4_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext4_permission(struct inode *inode, int mask)
 {
        return generic_permission(inode, mask, ext4_check_acl);
 }
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 26a5c1abf147..cd2b855a07d6 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -58,7 +58,7 @@ static inline int ext4_acl_count(size_t size)
 #define EXT4_ACL_NOT_CACHED ((void *)-1)
 /* acl.c */
-extern int ext4_permission (struct inode *, int, struct nameidata *);
+extern int ext4_permission (struct inode *, int);
 extern int ext4_acl_chmod (struct inode *);
 extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1cb371dcd609..b5479b1dff14 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -595,7 +595,7 @@ static void ext4_destroy_inode(struct inode *inode)
        kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 3a9ecac8d61f..3222f51c41cf 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -36,7 +36,7 @@ static inline int fat_max_cache(struct inode *inode)
 static struct kmem_cache *fat_cache_cachep;
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct fat_cache *cache = (struct fat_cache *)foo;
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 34541d06e626..cd4a0162e10d 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -17,7 +17,6 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/msdos_fs.h>
-#include <linux/dirent.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/compat.h>
@@ -124,10 +123,11 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
 * but ignore that right now.
 * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
 */
-static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
+static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
                       int uni_xlate, struct nls_table *nls)
 {
-        wchar_t *ip, ec;
+        const wchar_t *ip;
+        wchar_t ec;
        unsigned char *op, nc;
        int charlen;
        int k;
@@ -167,6 +167,16 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
        return (op - ascii);
 }
+static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
+                                unsigned char *buf, int size)
+{
+        if (sbi->options.utf8)
+                return utf8_wcstombs(buf, uni, size);
+        else
+                return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
+                                   sbi->nls_io);
+}
 static inline int
 fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni)
 {
@@ -227,6 +237,19 @@ fat_shortname2uni(struct nls_table *nls, unsigned char *buf, int buf_size,
        return len;
 }
+static inline int fat_name_match(struct msdos_sb_info *sbi,
+                                 const unsigned char *a, int a_len,
+                                 const unsigned char *b, int b_len)
+{
+        if (a_len != b_len)
+                return 0;
+        if (sbi->options.name_check != 's')
+                return !nls_strnicmp(sbi->nls_io, a, b, a_len);
+        else
+                return !memcmp(a, b, a_len);
+}
 enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, };
 /**
@@ -302,6 +325,19 @@ parse_long:
 }
 /*
+ * Maximum buffer size of short name.
+ * [(MSDOS_NAME + '.') * max one char + nul]
+ * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
+ */
+#define FAT_MAX_SHORT_SIZE      ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
+/*
+ * Maximum buffer size of unicode chars from slots.
+ * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
+ */
+#define FAT_MAX_UNI_CHARS       ((MSDOS_SLOTS - 1) * 13 + 1)
+#define FAT_MAX_UNI_SIZE        (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
+/*
 * Return values: negative -> error, 0 -> not found, positive -> found,
 * value is the total amount of slots, including the shortname entry.
 */
@@ -312,29 +348,20 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
        struct buffer_head *bh = NULL;
        struct msdos_dir_entry *de;
-        struct nls_table *nls_io = sbi->nls_io;
        struct nls_table *nls_disk = sbi->nls_disk;
-        wchar_t bufuname[14];
        unsigned char nr_slots;
-        int xlate_len;
+        wchar_t bufuname[14];
        wchar_t *unicode = NULL;
        unsigned char work[MSDOS_NAME];
-        unsigned char *bufname = NULL;
+        unsigned char bufname[FAT_MAX_SHORT_SIZE];
-        int uni_xlate = sbi->options.unicode_xlate;
-        int utf8 = sbi->options.utf8;
-        int anycase = (sbi->options.name_check != 's');
        unsigned short opt_shortname = sbi->options.shortname;
        loff_t cpos = 0;
-        int chl, i, j, last_u, err;
+        int chl, i, j, last_u, err, len;
-        bufname = __getname();
-        if (!bufname)
-                return -ENOMEM;
        err = -ENOENT;
-        while(1) {
+        while (1) {
                if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
-                        goto EODir;
+                        goto end_of_dir;
 parse_record:
                nr_slots = 0;
                if (de->name[0] == DELETED_FLAG)
@@ -353,7 +380,7 @@ parse_record:
                        else if (status == PARSE_NOT_LONGNAME)
                                goto parse_record;
                        else if (status == PARSE_EOF)
-                                goto EODir;
+                                goto end_of_dir;
                }
                memcpy(work, de->name, sizeof(de->name));
@@ -394,30 +421,24 @@ parse_record:
                if (!last_u)
                        continue;
+                /* Compare shortname */
                bufuname[last_u] = 0x0000;
-                xlate_len = utf8
+                len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
-                        ?utf8_wcstombs(bufname, bufuname, PATH_MAX)
+                if (fat_name_match(sbi, name, name_len, bufname, len))
-                        :uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io);
+                        goto found;
-                if (xlate_len == name_len)
-                        if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
-                            (anycase && !nls_strnicmp(nls_io, name, bufname,
-                                                                xlate_len)))
-                                goto Found;
                if (nr_slots) {
-                        xlate_len = utf8
+                        void *longname = unicode + FAT_MAX_UNI_CHARS;
-                                ?utf8_wcstombs(bufname, unicode, PATH_MAX)
+                        int size = PATH_MAX - FAT_MAX_UNI_SIZE;
-                                :uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io);
-                        if (xlate_len != name_len)
+                        /* Compare longname */
-                                continue;
+                        len = fat_uni_to_x8(sbi, unicode, longname, size);
-                        if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
+                        if (fat_name_match(sbi, name, name_len, longname, len))
-                            (anycase && !nls_strnicmp(nls_io, name, bufname,
+                                goto found;
-                                                                xlate_len)))
-                                goto Found;
                }
        }
-Found:
+found:
        nr_slots++;     /* include the de */
        sinfo->slot_off = cpos - nr_slots * sizeof(*de);
        sinfo->nr_slots = nr_slots;
@@ -425,9 +446,7 @@ Found:
        sinfo->bh = bh;
        sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
        err = 0;
-EODir:
+end_of_dir:
-        if (bufname)
-                __putname(bufname);
        if (unicode)
                __putname(unicode);
@@ -453,23 +472,20 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
        struct buffer_head *bh;
        struct msdos_dir_entry *de;
-        struct nls_table *nls_io = sbi->nls_io;
        struct nls_table *nls_disk = sbi->nls_disk;
-        unsigned char long_slots;
+        unsigned char nr_slots;
-        const char *fill_name;
-        int fill_len;
        wchar_t bufuname[14];
        wchar_t *unicode = NULL;
-        unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname;
+        unsigned char c, work[MSDOS_NAME];
-        unsigned long lpos, dummy, *furrfu = &lpos;
+        unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname;
-        int uni_xlate = sbi->options.unicode_xlate;
+        unsigned short opt_shortname = sbi->options.shortname;
        int isvfat = sbi->options.isvfat;
-        int utf8 = sbi->options.utf8;
        int nocase = sbi->options.nocase;
-        unsigned short opt_shortname = sbi->options.shortname;
+        const char *fill_name = NULL;
        unsigned long inum;
-        int chi, chl, i, i2, j, last, last_u, dotoffset = 0;
+        unsigned long lpos, dummy, *furrfu = &lpos;
        loff_t cpos;
+        int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0;
        int ret = 0;
        lock_super(sb);
@@ -489,43 +505,58 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
                        cpos = 0;
                }
        }
-        if (cpos & (sizeof(struct msdos_dir_entry)-1)) {
+        if (cpos & (sizeof(struct msdos_dir_entry) - 1)) {
                ret = -ENOENT;
                goto out;
        }
        bh = NULL;
-GetNew:
+get_new:
        if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
-                goto EODir;
+                goto end_of_dir;
 parse_record:
-        long_slots = 0;
+        nr_slots = 0;
-        /* Check for long filename entry */
+        /*
-        if (isvfat) {
+         * Check for long filename entry, but if short_only, we don't
+         * need to parse long filename.
+         */
+        if (isvfat && !short_only) {
                if (de->name[0] == DELETED_FLAG)
-                        goto RecEnd;
+                        goto record_end;
                if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME))
-                        goto RecEnd;
+                        goto record_end;
                if (de->attr != ATTR_EXT && IS_FREE(de->name))
-                        goto RecEnd;
+                        goto record_end;
        } else {
                if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name))
-                        goto RecEnd;
+                        goto record_end;
        }
        if (isvfat && de->attr == ATTR_EXT) {
                int status = fat_parse_long(inode, &cpos, &bh, &de,
-                                            &unicode, &long_slots);
+                                            &unicode, &nr_slots);
                if (status < 0) {
                        filp->f_pos = cpos;
                        ret = status;
                        goto out;
                } else if (status == PARSE_INVALID)
-                        goto RecEnd;
+                        goto record_end;
                else if (status == PARSE_NOT_LONGNAME)
                        goto parse_record;
                else if (status == PARSE_EOF)
-                        goto EODir;
+                        goto end_of_dir;
+                if (nr_slots) {
+                        void *longname = unicode + FAT_MAX_UNI_CHARS;
+                        int size = PATH_MAX - FAT_MAX_UNI_SIZE;
+                        int len = fat_uni_to_x8(sbi, unicode, longname, size);
+                        fill_name = longname;
+                        fill_len = len;
+                        /* !both && !short_only, so we don't need shortname. */
+                        if (!both)
+                                goto start_filldir;
+                }
        }
        if (sbi->options.dotsOK) {
@@ -587,12 +618,32 @@ parse_record:
                }
        }
        if (!last)
-                goto RecEnd;
+                goto record_end;
        i = last + dotoffset;
        j = last_u;
-        lpos = cpos - (long_slots+1)*sizeof(struct msdos_dir_entry);
+        if (isvfat) {
+                bufuname[j] = 0x0000;
+                i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+        }
+        if (nr_slots) {
+                /* hack for fat_ioctl_filldir() */
+                struct fat_ioctl_filldir_callback *p = dirent;
+                p->longname = fill_name;
+                p->long_len = fill_len;
+                p->shortname = bufname;
+                p->short_len = i;
+                fill_name = NULL;
+                fill_len = 0;
+        } else {
+                fill_name = bufname;
+                fill_len = i;
+        }
+start_filldir:
+        lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
        if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME))
                inum = inode->i_ino;
        else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
@@ -607,49 +658,17 @@ parse_record:
                        inum = iunique(sb, MSDOS_ROOT_INO);
        }
-        if (isvfat) {
-                bufuname[j] = 0x0000;
-                i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname))
-                         : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io);
-        }
-        fill_name = bufname;
-        fill_len = i;
-        if (!short_only && long_slots) {
-                /* convert the unicode long name. 261 is maximum size
-                 * of unicode buffer. (13 * slots + nul) */
-                void *longname = unicode + 261;
-                int buf_size = PATH_MAX - (261 * sizeof(unicode[0]));
-                int long_len = utf8
-                        ? utf8_wcstombs(longname, unicode, buf_size)
-                        : uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io);
-                if (!both) {
-                        fill_name = longname;
-                        fill_len = long_len;
-                } else {
-                        /* hack for fat_ioctl_filldir() */
-                        struct fat_ioctl_filldir_callback *p = dirent;
-                        p->longname = longname;
-                        p->long_len = long_len;
-                        p->shortname = bufname;
-                        p->short_len = i;
-                        fill_name = NULL;
-                        fill_len = 0;
-                }
-        }
        if (filldir(dirent, fill_name, fill_len, *furrfu, inum,
                    (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0)
-                goto FillFailed;
+                goto fill_failed;
-RecEnd:
+record_end:
        furrfu = &lpos;
        filp->f_pos = cpos;
-        goto GetNew;
+        goto get_new;
-EODir:
+end_of_dir:
        filp->f_pos = cpos;
-FillFailed:
+fill_failed:
        brelse(bh);
        if (unicode)
                __putname(unicode);
@@ -715,7 +734,7 @@ efault:									   \
        return -EFAULT;                                                    \
 }
-FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent)
+FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent)
 static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
                             void __user *dirent, filldir_t filldir,
@@ -741,7 +760,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
 static int fat_dir_ioctl(struct inode *inode, struct file *filp,
                         unsigned int cmd, unsigned long arg)
 {
-        struct dirent __user *d1 = (struct dirent __user *)arg;
+        struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
        int short_only, both;
        switch (cmd) {
@@ -757,7 +776,7 @@ static int fat_dir_ioctl(struct inode *inode, struct file *filp,
                return fat_generic_ioctl(inode, filp, cmd, arg);
        }
-        if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2])))
+        if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
                return -EFAULT;
        /*
         * Yes, we don't need this put_user() absolutely. However old
@@ -1082,7 +1101,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
                goto error_free;
        }
-        fat_date_unix2dos(ts->tv_sec, &time, &date);
+        fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
        de = (struct msdos_dir_entry *)bhs[0]->b_data;
        /* filling the new directory slots ("." and ".." entries) */
diff --git a/fs/fat/file.c b/fs/fat/file.c
index c672df4036e9..8707a8cfa02c 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -15,6 +15,8 @@
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/blkdev.h>
+#include <linux/fsnotify.h>
+#include <linux/security.h>
 int fat_generic_ioctl(struct inode *inode, struct file *filp,
                      unsigned int cmd, unsigned long arg)
@@ -64,6 +66,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
                /* Equivalent to a chmod() */
                ia.ia_valid = ATTR_MODE | ATTR_CTIME;
+                ia.ia_ctime = current_fs_time(inode->i_sb);
                if (is_dir) {
                        ia.ia_mode = MSDOS_MKMODE(attr,
                                S_IRWXUGO & ~sbi->options.fs_dmask)
@@ -90,11 +93,21 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
                        }
                }
+                /*
+                 * The security check is questionable...  We single
+                 * out the RO attribute for checking by the security
+                 * module, just because it maps to a file mode.
+                 */
+                err = security_inode_setattr(filp->f_path.dentry, &ia);
+                if (err)
+                        goto up;
                /* This MUST be done before doing anything irreversible... */
-                err = notify_change(filp->f_path.dentry, &ia);
+                err = fat_setattr(filp->f_path.dentry, &ia);
                if (err)
                        goto up;
+                fsnotify_change(filp->f_path.dentry, ia.ia_valid);
                if (sbi->options.sys_immutable) {
                        if (attr & ATTR_SYS)
                                inode->i_flags |= S_IMMUTABLE;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 46a4508ffd2e..6d266d793e2c 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -382,17 +382,20 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
        inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
                           & ~((loff_t)sbi->cluster_size - 1)) >> 9;
        inode->i_mtime.tv_sec =
-                date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date));
+                date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
+                              sbi->options.tz_utc);
        inode->i_mtime.tv_nsec = 0;
        if (sbi->options.isvfat) {
                int secs = de->ctime_cs / 100;
                int csecs = de->ctime_cs % 100;
                inode->i_ctime.tv_sec  =
                        date_dos2unix(le16_to_cpu(de->ctime),
-                                      le16_to_cpu(de->cdate)) + secs;
+                                      le16_to_cpu(de->cdate),
+                                      sbi->options.tz_utc) + secs;
                inode->i_ctime.tv_nsec = csecs * 10000000;
                inode->i_atime.tv_sec =
-                        date_dos2unix(0, le16_to_cpu(de->adate));
+                        date_dos2unix(0, le16_to_cpu(de->adate),
+                                      sbi->options.tz_utc);
                inode->i_atime.tv_nsec = 0;
        } else
                inode->i_ctime = inode->i_atime = inode->i_mtime;
@@ -495,7 +498,7 @@ static void fat_destroy_inode(struct inode *inode)
        kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
@@ -591,11 +594,14 @@ retry:
        raw_entry->attr = fat_attr(inode);
        raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
        raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
-        fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date);
+        fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
+                          &raw_entry->date, sbi->options.tz_utc);
        if (sbi->options.isvfat) {
                __le16 atime;
-                fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate);
+                fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
-                fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate);
+                                  &raw_entry->cdate, sbi->options.tz_utc);
+                fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
+                                  &raw_entry->adate, sbi->options.tz_utc);
                raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
                        inode->i_ctime.tv_nsec / 10000000;
        }
@@ -836,6 +842,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
        }
        if (sbi->options.flush)
                seq_puts(m, ",flush");
+        if (opts->tz_utc)
+                seq_puts(m, ",tz=UTC");
        return 0;
 }
@@ -848,7 +856,7 @@ enum {
        Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
        Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
        Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
-        Opt_obsolate, Opt_flush, Opt_err,
+        Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
 };
 static match_table_t fat_tokens = {
@@ -883,6 +891,7 @@ static match_table_t fat_tokens = {
        {Opt_obsolate, "cvf_options=%100s"},
        {Opt_obsolate, "posix"},
        {Opt_flush, "flush"},
+        {Opt_tz_utc, "tz=UTC"},
        {Opt_err, NULL},
 };
 static match_table_t msdos_tokens = {
@@ -947,10 +956,11 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
        opts->utf8 = opts->unicode_xlate = 0;
        opts->numtail = 1;
        opts->usefree = opts->nocase = 0;
+        opts->tz_utc = 0;
        *debug = 0;
        if (!options)
-                return 0;
+                goto out;
        while ((p = strsep(&options, ",")) != NULL) {
                int token;
@@ -1036,6 +1046,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
                case Opt_flush:
                        opts->flush = 1;
                        break;
+                case Opt_tz_utc:
+                        opts->tz_utc = 1;
+                        break;
                /* msdos specific */
                case Opt_dots:
@@ -1104,10 +1117,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
                        return -EINVAL;
                }
        }
+out:
        /* UTF-8 doesn't provide FAT semantics */
        if (!strcmp(opts->iocharset, "utf8")) {
                printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
-                       " for FAT filesystems, filesystem will be case sensitive!\n");
+                       " for FAT filesystems, filesystem will be "
+                       "case sensitive!\n");
        }
        /* If user doesn't specify allow_utime, it's initialized from dmask. */
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 61f23511eacf..79fb98ad36d4 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -142,7 +142,7 @@ static int day_n[] = {
 };
 /* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
-int date_dos2unix(unsigned short time, unsigned short date)
+int date_dos2unix(unsigned short time, unsigned short date, int tz_utc)
 {
        int month, year, secs;
@@ -156,16 +156,18 @@ int date_dos2unix(unsigned short time, unsigned short date)
            ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 &&
            month < 2 ? 1 : 0)+3653);
                        /* days since 1.1.70 plus 80's leap day */
-        secs += sys_tz.tz_minuteswest*60;
+        if (!tz_utc)
+                secs += sys_tz.tz_minuteswest*60;
        return secs;
 }
 /* Convert linear UNIX date to a MS-DOS time/date pair. */
-void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
+void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc)
 {
        int day, year, nl_day, month;
-        unix_date -= sys_tz.tz_minuteswest*60;
+        if (!tz_utc)
+                unix_date -= sys_tz.tz_minuteswest*60;
        /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
        if (unix_date < 315532800)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 330a7d782591..61d625136813 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -64,11 +64,6 @@ static int locate_fd(unsigned int orig_start, int cloexec)
        struct fdtable *fdt;
        spin_lock(&files->file_lock);
-        error = -EINVAL;
-        if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
-                goto out;
 repeat:
        fdt = files_fdtable(files);
        /*
@@ -83,10 +78,6 @@ repeat:
        if (start < fdt->max_fds)
                newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
                                           fdt->max_fds, start);
-        
-        error = -EMFILE;
-        if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
-                goto out;
        error = expand_files(files, newfd);
        if (error < 0)
@@ -125,27 +116,30 @@ static int dupfd(struct file *file, unsigned int start, int cloexec)
        return fd;
 }
-asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
+asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 {
        int err = -EBADF;
        struct file * file, *tofree;
        struct files_struct * files = current->files;
        struct fdtable *fdt;
+        if ((flags & ~O_CLOEXEC) != 0)
+                return -EINVAL;
+        if (unlikely(oldfd == newfd))
+                return -EINVAL;
        spin_lock(&files->file_lock);
        if (!(file = fcheck(oldfd)))
                goto out_unlock;
-        err = newfd;
-        if (newfd == oldfd)
-                goto out_unlock;
-        err = -EBADF;
-        if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
-                goto out_unlock;
        get_file(file);                 /* We are now finished with oldfd */
        err = expand_files(files, newfd);
-        if (err < 0)
+        if (unlikely(err < 0)) {
+                if (err == -EMFILE)
+                        err = -EBADF;
                goto out_fput;
+        }
        /* To avoid races with open() and dup(), we will mark the fd as
         * in-use in the open-file bitmap throughout the entire dup2()
@@ -163,7 +157,10 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
        rcu_assign_pointer(fdt->fd[newfd], file);
        FD_SET(newfd, fdt->open_fds);
-        FD_CLR(newfd, fdt->close_on_exec);
+        if (flags & O_CLOEXEC)
+                FD_SET(newfd, fdt->close_on_exec);
+        else
+                FD_CLR(newfd, fdt->close_on_exec);
        spin_unlock(&files->file_lock);
        if (tofree)
@@ -181,6 +178,19 @@ out_fput:
        goto out;
 }
+asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
+{
+        if (unlikely(newfd == oldfd)) { /* corner case */
+                struct files_struct *files = current->files;
+                rcu_read_lock();
+                if (!fcheck_files(files, oldfd))
+                        oldfd = -EBADF;
+                rcu_read_unlock();
+                return oldfd;
+        }
+        return sys_dup3(oldfd, newfd, 0);
+}
 asmlinkage long sys_dup(unsigned int fildes)
 {
        int ret = -EBADF;
@@ -310,6 +320,8 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
        switch (cmd) {
        case F_DUPFD:
        case F_DUPFD_CLOEXEC:
+                if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
+                        break;
                get_file(filp);
                err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC);
                break;
diff --git a/fs/fifo.c b/fs/fifo.c
index 9785e36f81e7..987bf9411495 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -57,7 +57,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
         *  POSIX.1 says that O_NONBLOCK means return with the FIFO
         *  opened, even when there is no process writing the FIFO.
         */
-                filp->f_op = &read_fifo_fops;
+                filp->f_op = &read_pipefifo_fops;
                pipe->r_counter++;
                if (pipe->readers++ == 0)
                        wake_up_partner(inode);
@@ -86,7 +86,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
                if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
                        goto err;
-                filp->f_op = &write_fifo_fops;
+                filp->f_op = &write_pipefifo_fops;
                pipe->w_counter++;
                if (!pipe->writers++)
                        wake_up_partner(inode);
@@ -105,7 +105,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
         *  This implementation will NEVER block on a O_RDWR open, since
         *  the process can at least talk to itself.
         */
-                filp->f_op = &rdwr_fifo_fops;
+                filp->f_op = &rdwr_pipefifo_fops;
                pipe->readers++;
                pipe->writers++;
@@ -151,5 +151,5 @@ err_nocleanup:
 * depending on the access mode of the file...
 */
 const struct file_operations def_fifo_fops = {
-        .open           = fifo_open,    /* will set read or write pipe_fops */
+        .open           = fifo_open,    /* will set read_ or write_pipefifo_fops */
 };
diff --git a/fs/file.c b/fs/file.c
index 7b3887e054d0..d8773b19fe47 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -250,9 +250,18 @@ int expand_files(struct files_struct *files, int nr)
        struct fdtable *fdt;
        fdt = files_fdtable(files);
+        /*
+         * N.B. For clone tasks sharing a files structure, this test
+         * will limit the total number of files that can be opened.
+         */
+        if (nr >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
+                return -EMFILE;
        /* Do we need to expand? */
        if (nr < fdt->max_fds)
                return 0;
        /* Can we expand? */
        if (nr >= sysctl_nr_open)
                return -EMFILE;
diff --git a/fs/file_table.c b/fs/file_table.c
index 83084225b4c3..f45a4493f9e7 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -120,7 +120,7 @@ struct file *get_empty_filp(void)
        tsk = current;
        INIT_LIST_HEAD(&f->f_u.fu_list);
-        atomic_set(&f->f_count, 1);
+        atomic_long_set(&f->f_count, 1);
        rwlock_init(&f->f_owner.lock);
        f->f_uid = tsk->fsuid;
        f->f_gid = tsk->fsgid;
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(init_file);
 void fput(struct file *file)
 {
-        if (atomic_dec_and_test(&file->f_count))
+        if (atomic_long_dec_and_test(&file->f_count))
                __fput(file);
 }
@@ -294,7 +294,7 @@ struct file *fget(unsigned int fd)
        rcu_read_lock();
        file = fcheck_files(files, fd);
        if (file) {
-                if (!atomic_inc_not_zero(&file->f_count)) {
+                if (!atomic_long_inc_not_zero(&file->f_count)) {
                        /* File object ref couldn't be taken */
                        rcu_read_unlock();
                        return NULL;
@@ -326,7 +326,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
                rcu_read_lock();
                file = fcheck_files(files, fd);
                if (file) {
-                        if (atomic_inc_not_zero(&file->f_count))
+                        if (atomic_long_inc_not_zero(&file->f_count))
                                *fput_needed = 1;
                        else
                                /* Didn't get the reference, someone's freed */
@@ -341,7 +341,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
 void put_filp(struct file *file)
 {
-        if (atomic_dec_and_test(&file->f_count)) {
+        if (atomic_long_dec_and_test(&file->f_count)) {
                security_file_free(file);
                file_kill(file);
                file_free(file);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2060bf06b906..fd03330cadeb 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -97,7 +97,7 @@ void fuse_invalidate_attr(struct inode *inode)
 * timeout is unknown (unlink, rmdir, rename and in some cases
 * lookup)
 */
-static void fuse_invalidate_entry_cache(struct dentry *entry)
+void fuse_invalidate_entry_cache(struct dentry *entry)
 {
        fuse_dentry_settime(entry, 0);
 }
@@ -112,18 +112,16 @@ static void fuse_invalidate_entry(struct dentry *entry)
        fuse_invalidate_entry_cache(entry);
 }
-static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
+static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
-                             struct dentry *entry,
+                             u64 nodeid, struct qstr *name,
                             struct fuse_entry_out *outarg)
 {
-        struct fuse_conn *fc = get_fuse_conn(dir);
        memset(outarg, 0, sizeof(struct fuse_entry_out));
        req->in.h.opcode = FUSE_LOOKUP;
-        req->in.h.nodeid = get_node_id(dir);
+        req->in.h.nodeid = nodeid;
        req->in.numargs = 1;
-        req->in.args[0].size = entry->d_name.len + 1;
+        req->in.args[0].size = name->len + 1;
-        req->in.args[0].value = entry->d_name.name;
+        req->in.args[0].value = name->name;
        req->out.numargs = 1;
        if (fc->minor < 9)
                req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
@@ -189,7 +187,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
                attr_version = fuse_get_attr_version(fc);
                parent = dget_parent(entry);
-                fuse_lookup_init(req, parent->d_inode, entry, &outarg);
+                fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
+                                 &entry->d_name, &outarg);
                request_send(fc, req);
                dput(parent);
                err = req->out.h.error;
@@ -225,7 +224,7 @@ static int invalid_nodeid(u64 nodeid)
        return !nodeid || nodeid == FUSE_ROOT_ID;
 }
-static struct dentry_operations fuse_dentry_operations = {
+struct dentry_operations fuse_dentry_operations = {
        .d_revalidate   = fuse_dentry_revalidate,
 };
@@ -239,85 +238,127 @@ int fuse_valid_type(int m)
 * Add a directory inode to a dentry, ensuring that no other dentry
 * refers to this inode.  Called with fc->inst_mutex.
 */
-static int fuse_d_add_directory(struct dentry *entry, struct inode *inode)
+static struct dentry *fuse_d_add_directory(struct dentry *entry,
+                                           struct inode *inode)
 {
        struct dentry *alias = d_find_alias(inode);
-        if (alias) {
+        if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
                /* This tries to shrink the subtree below alias */
                fuse_invalidate_entry(alias);
                dput(alias);
                if (!list_empty(&inode->i_dentry))
-                        return -EBUSY;
+                        return ERR_PTR(-EBUSY);
+        } else {
+                dput(alias);
        }
-        d_add(entry, inode);
+        return d_splice_alias(inode, entry);
-        return 0;
 }
-static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
-                                  struct nameidata *nd)
+                     struct fuse_entry_out *outarg, struct inode **inode)
 {
-        int err;
+        struct fuse_conn *fc = get_fuse_conn_super(sb);
-        struct fuse_entry_out outarg;
-        struct inode *inode = NULL;
-        struct fuse_conn *fc = get_fuse_conn(dir);
        struct fuse_req *req;
        struct fuse_req *forget_req;
        u64 attr_version;
+        int err;
-        if (entry->d_name.len > FUSE_NAME_MAX)
+        *inode = NULL;
-                return ERR_PTR(-ENAMETOOLONG);
+        err = -ENAMETOOLONG;
+        if (name->len > FUSE_NAME_MAX)
+                goto out;
        req = fuse_get_req(fc);
+        err = PTR_ERR(req);
        if (IS_ERR(req))
-                return ERR_CAST(req);
+                goto out;
        forget_req = fuse_get_req(fc);
+        err = PTR_ERR(forget_req);
        if (IS_ERR(forget_req)) {
                fuse_put_request(fc, req);
-                return ERR_CAST(forget_req);
+                goto out;
        }
        attr_version = fuse_get_attr_version(fc);
-        fuse_lookup_init(req, dir, entry, &outarg);
+        fuse_lookup_init(fc, req, nodeid, name, outarg);
        request_send(fc, req);
        err = req->out.h.error;
        fuse_put_request(fc, req);
        /* Zero nodeid is same as -ENOENT, but with valid timeout */
-        if (!err && outarg.nodeid &&
+        if (err || !outarg->nodeid)
-            (invalid_nodeid(outarg.nodeid) ||
+                goto out_put_forget;
-             !fuse_valid_type(outarg.attr.mode)))
-                err = -EIO;
+        err = -EIO;
-        if (!err && outarg.nodeid) {
+        if (!outarg->nodeid)
-                inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
+                goto out_put_forget;
-                                  &outarg.attr, entry_attr_timeout(&outarg),
+        if (!fuse_valid_type(outarg->attr.mode))
-                                  attr_version);
+                goto out_put_forget;
-                if (!inode) {
-                        fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
+        *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
-                        return ERR_PTR(-ENOMEM);
+                           &outarg->attr, entry_attr_timeout(outarg),
-                }
+                           attr_version);
+        err = -ENOMEM;
+        if (!*inode) {
+                fuse_send_forget(fc, forget_req, outarg->nodeid, 1);
+                goto out;
        }
+        err = 0;
+ out_put_forget:
        fuse_put_request(fc, forget_req);
-        if (err && err != -ENOENT)
+ out:
-                return ERR_PTR(err);
+        return err;
+}
+static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+                                  struct nameidata *nd)
+{
+        int err;
+        struct fuse_entry_out outarg;
+        struct inode *inode;
+        struct dentry *newent;
+        struct fuse_conn *fc = get_fuse_conn(dir);
+        bool outarg_valid = true;
+        err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
+                               &outarg, &inode);
+        if (err == -ENOENT) {
+                outarg_valid = false;
+                err = 0;
+        }
+        if (err)
+                goto out_err;
+        err = -EIO;
+        if (inode && get_node_id(inode) == FUSE_ROOT_ID)
+                goto out_iput;
        if (inode && S_ISDIR(inode->i_mode)) {
                mutex_lock(&fc->inst_mutex);
-                err = fuse_d_add_directory(entry, inode);
+                newent = fuse_d_add_directory(entry, inode);
                mutex_unlock(&fc->inst_mutex);
-                if (err) {
+                err = PTR_ERR(newent);
-                        iput(inode);
+                if (IS_ERR(newent))
-                        return ERR_PTR(err);
+                        goto out_iput;
-                }
+        } else {
-        } else
+                newent = d_splice_alias(inode, entry);
-                d_add(entry, inode);
+        }
+        entry = newent ? newent : entry;
        entry->d_op = &fuse_dentry_operations;
-        if (!err)
+        if (outarg_valid)
                fuse_change_entry_timeout(entry, &outarg);
        else
                fuse_invalidate_entry_cache(entry);
-        return NULL;
+        return newent;
+ out_iput:
+        iput(inode);
+ out_err:
+        return ERR_PTR(err);
 }
 /*
@@ -857,7 +898,7 @@ static int fuse_access(struct inode *inode, int mask)
                return PTR_ERR(req);
        memset(&inarg, 0, sizeof(inarg));
-        inarg.mask = mask;
+        inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
        req->in.h.opcode = FUSE_ACCESS;
        req->in.h.nodeid = get_node_id(inode);
        req->in.numargs = 1;
@@ -886,7 +927,7 @@ static int fuse_access(struct inode *inode, int mask)
 * access request is sent.  Execute permission is still checked
 * locally based on file mode.
 */
-static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int fuse_permission(struct inode *inode, int mask)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
        bool refreshed = false;
@@ -921,7 +962,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
                   exist.  So if permissions are revoked this won't be
                   noticed immediately, only after the attribute
                   timeout has expired */
-        } else if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR))) {
+        } else if (mask & MAY_ACCESS) {
                err = fuse_access(inode, mask);
        } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
                if (!(inode->i_mode & S_IXUGO)) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8092f0d9fd1f..2bada6bbc317 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -893,7 +893,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        if (count == 0)
                goto out;
-        err = remove_suid(file->f_path.dentry);
+        err = file_remove_suid(file);
        if (err)
                goto out;
@@ -1341,6 +1341,11 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
        pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
        int err;
+        if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
+                /* NLM needs asynchronous locks, which we don't support yet */
+                return -ENOLCK;
+        }
        /* Unlock on close is handled by the flush method */
        if (fl->fl_flags & FL_CLOSE)
                return 0;
@@ -1365,7 +1370,9 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
        struct fuse_conn *fc = get_fuse_conn(inode);
        int err;
-        if (cmd == F_GETLK) {
+        if (cmd == F_CANCELLK) {
+                err = 0;
+        } else if (cmd == F_GETLK) {
                if (fc->no_lock) {
                        posix_test_lock(file, fl);
                        err = 0;
@@ -1373,7 +1380,7 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
                        err = fuse_getlk(file, fl);
        } else {
                if (fc->no_lock)
-                        err = posix_lock_file_wait(file, fl);
+                        err = posix_lock_file(file, fl, NULL);
                else
                        err = fuse_setlk(file, fl, 0);
        }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index bae948657c4f..3a876076bdd1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -363,6 +363,9 @@ struct fuse_conn {
        /** Do not send separate SETATTR request before open(O_TRUNC)  */
        unsigned atomic_o_trunc : 1;
+        /** Filesystem supports NFS exporting.  Only set in INIT */
+        unsigned export_support : 1;
        /*
         * The following bitfields are only for optimization purposes
         * and hence races in setting them will not cause malfunction
@@ -464,6 +467,8 @@ static inline u64 get_node_id(struct inode *inode)
 /** Device operations */
 extern const struct file_operations fuse_dev_operations;
+extern struct dentry_operations fuse_dentry_operations;
 /**
 * Get a filled in inode
 */
@@ -471,6 +476,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
                        int generation, struct fuse_attr *attr,
                        u64 attr_valid, u64 attr_version);
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+                     struct fuse_entry_out *outarg, struct inode **inode);
 /**
 * Send FORGET command
 */
@@ -604,6 +612,8 @@ void fuse_abort_conn(struct fuse_conn *fc);
 */
 void fuse_invalidate_attr(struct inode *inode);
+void fuse_invalidate_entry_cache(struct dentry *entry);
 /**
 * Acquire reference to fuse_conn
 */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3141690558c8..d2249f174e20 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -18,6 +18,7 @@
 #include <linux/statfs.h>
 #include <linux/random.h>
 #include <linux/sched.h>
+#include <linux/exportfs.h>
 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
 MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -552,6 +553,174 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
        return fuse_iget(sb, 1, 0, &attr, 0, 0);
 }
+struct fuse_inode_handle
+{
+        u64 nodeid;
+        u32 generation;
+};
+static struct dentry *fuse_get_dentry(struct super_block *sb,
+                                      struct fuse_inode_handle *handle)
+{
+        struct fuse_conn *fc = get_fuse_conn_super(sb);
+        struct inode *inode;
+        struct dentry *entry;
+        int err = -ESTALE;
+        if (handle->nodeid == 0)
+                goto out_err;
+        inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
+        if (!inode) {
+                struct fuse_entry_out outarg;
+                struct qstr name;
+                if (!fc->export_support)
+                        goto out_err;
+                name.len = 1;
+                name.name = ".";
+                err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
+                                       &inode);
+                if (err && err != -ENOENT)
+                        goto out_err;
+                if (err || !inode) {
+                        err = -ESTALE;
+                        goto out_err;
+                }
+                err = -EIO;
+                if (get_node_id(inode) != handle->nodeid)
+                        goto out_iput;
+        }
+        err = -ESTALE;
+        if (inode->i_generation != handle->generation)
+                goto out_iput;
+        entry = d_alloc_anon(inode);
+        err = -ENOMEM;
+        if (!entry)
+                goto out_iput;
+        if (get_node_id(inode) != FUSE_ROOT_ID) {
+                entry->d_op = &fuse_dentry_operations;
+                fuse_invalidate_entry_cache(entry);
+        }
+        return entry;
+ out_iput:
+        iput(inode);
+ out_err:
+        return ERR_PTR(err);
+}
+static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
+                           int connectable)
+{
+        struct inode *inode = dentry->d_inode;
+        bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
+        int len = encode_parent ? 6 : 3;
+        u64 nodeid;
+        u32 generation;
+        if (*max_len < len)
+                return  255;
+        nodeid = get_fuse_inode(inode)->nodeid;
+        generation = inode->i_generation;
+        fh[0] = (u32)(nodeid >> 32);
+        fh[1] = (u32)(nodeid & 0xffffffff);
+        fh[2] = generation;
+        if (encode_parent) {
+                struct inode *parent;
+                spin_lock(&dentry->d_lock);
+                parent = dentry->d_parent->d_inode;
+                nodeid = get_fuse_inode(parent)->nodeid;
+                generation = parent->i_generation;
+                spin_unlock(&dentry->d_lock);
+                fh[3] = (u32)(nodeid >> 32);
+                fh[4] = (u32)(nodeid & 0xffffffff);
+                fh[5] = generation;
+        }
+        *max_len = len;
+        return encode_parent ? 0x82 : 0x81;
+}
+static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
+                struct fid *fid, int fh_len, int fh_type)
+{
+        struct fuse_inode_handle handle;
+        if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
+                return NULL;
+        handle.nodeid = (u64) fid->raw[0] << 32;
+        handle.nodeid |= (u64) fid->raw[1];
+        handle.generation = fid->raw[2];
+        return fuse_get_dentry(sb, &handle);
+}
+static struct dentry *fuse_fh_to_parent(struct super_block *sb,
+                struct fid *fid, int fh_len, int fh_type)
+{
+        struct fuse_inode_handle parent;
+        if (fh_type != 0x82 || fh_len < 6)
+                return NULL;
+        parent.nodeid = (u64) fid->raw[3] << 32;
+        parent.nodeid |= (u64) fid->raw[4];
+        parent.generation = fid->raw[5];
+        return fuse_get_dentry(sb, &parent);
+}
+static struct dentry *fuse_get_parent(struct dentry *child)
+{
+        struct inode *child_inode = child->d_inode;
+        struct fuse_conn *fc = get_fuse_conn(child_inode);
+        struct inode *inode;
+        struct dentry *parent;
+        struct fuse_entry_out outarg;
+        struct qstr name;
+        int err;
+        if (!fc->export_support)
+                return ERR_PTR(-ESTALE);
+        name.len = 2;
+        name.name = "..";
+        err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
+                               &name, &outarg, &inode);
+        if (err && err != -ENOENT)
+                return ERR_PTR(err);
+        if (err || !inode)
+                return ERR_PTR(-ESTALE);
+        parent = d_alloc_anon(inode);
+        if (!parent) {
+                iput(inode);
+                return ERR_PTR(-ENOMEM);
+        }
+        if (get_node_id(inode) != FUSE_ROOT_ID) {
+                parent->d_op = &fuse_dentry_operations;
+                fuse_invalidate_entry_cache(parent);
+        }
+        return parent;
+}
+static const struct export_operations fuse_export_operations = {
+        .fh_to_dentry   = fuse_fh_to_dentry,
+        .fh_to_parent   = fuse_fh_to_parent,
+        .encode_fh      = fuse_encode_fh,
+        .get_parent     = fuse_get_parent,
+};
 static const struct super_operations fuse_super_operations = {
        .alloc_inode    = fuse_alloc_inode,
        .destroy_inode  = fuse_destroy_inode,
@@ -581,6 +750,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                                fc->no_lock = 1;
                        if (arg->flags & FUSE_ATOMIC_O_TRUNC)
                                fc->atomic_o_trunc = 1;
+                        if (arg->minor >= 9) {
+                                /* LOOKUP has dependency on proto version */
+                                if (arg->flags & FUSE_EXPORT_SUPPORT)
+                                        fc->export_support = 1;
+                        }
                        if (arg->flags & FUSE_BIG_WRITES)
                                fc->big_writes = 1;
                } else {
@@ -607,7 +781,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
        arg->minor = FUSE_KERNEL_MINOR_VERSION;
        arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
        arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
-                FUSE_BIG_WRITES;
+                FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
        req->in.h.opcode = FUSE_INIT;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(*arg);
@@ -652,6 +826,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_magic = FUSE_SUPER_MAGIC;
        sb->s_op = &fuse_super_operations;
        sb->s_maxbytes = MAX_LFS_FILESIZE;
+        sb->s_export_op = &fuse_export_operations;
        file = fget(d.fd);
        if (!file)
@@ -781,7 +956,7 @@ static inline void unregister_fuseblk(void)
 }
 #endif
-static void fuse_inode_init_once(struct kmem_cache *cachep, void *foo)
+static void fuse_inode_init_once(void *foo)
 {
        struct inode * inode = foo;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6da0ab355b8a..8b0806a32948 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -448,7 +448,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
        struct qstr qstr;
        struct inode *inode;
        gfs2_str2qstr(&qstr, name);
-        inode = gfs2_lookupi(dip, &qstr, 1, NULL);
+        inode = gfs2_lookupi(dip, &qstr, 1);
        /* gfs2_lookupi has inconsistent callers: vfs
         * related routines expect NULL for no entry found,
         * gfs2_lookup_simple callers expect ENOENT
@@ -477,7 +477,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
 */
 struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
-                           int is_root, struct nameidata *nd)
+                           int is_root)
 {
        struct super_block *sb = dir->i_sb;
        struct gfs2_inode *dip = GFS2_I(dir);
@@ -1173,7 +1173,7 @@ int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
                        break;
                }
-                tmp = gfs2_lookupi(dir, &dotdot, 1, NULL);
+                tmp = gfs2_lookupi(dir, &dotdot, 1);
                if (IS_ERR(tmp)) {
                        error = PTR_ERR(tmp);
                        break;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 6074c2506f75..58f9607d6a86 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -83,7 +83,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip);
 int gfs2_dinode_dealloc(struct gfs2_inode *inode);
 int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
 struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
-                           int is_root, struct nameidata *nd);
+                           int is_root);
 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
                           unsigned int mode, dev_t dev);
 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index bcc668d0fadd..bb2cc303ac29 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -24,7 +24,7 @@
 #include "util.h"
 #include "glock.h"
-static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo)
+static void gfs2_init_inode_once(void *foo)
 {
        struct gfs2_inode *ip = foo;
@@ -33,7 +33,7 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo)
        ip->i_alloc = NULL;
 }
-static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo)
+static void gfs2_init_glock_once(void *foo)
 {
        struct gfs2_glock *gl = foo;
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 990d9f4bc463..9cda8536530c 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -134,7 +134,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
        struct dentry *dentry;
        gfs2_str2qstr(&dotdot, "..");
-        inode = gfs2_lookupi(child->d_inode, &dotdot, 1, NULL);
+        inode = gfs2_lookupi(child->d_inode, &dotdot, 1);
        if (!inode)
                return ERR_PTR(-ENOENT);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 1e252dfc5294..e2c62f73a778 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -74,7 +74,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
                        return PTR_ERR(inode);
                }
-                inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
+                inode = gfs2_lookupi(dir, &dentry->d_name, 0);
                if (inode) {
                        if (!IS_ERR(inode)) {
                                gfs2_holder_uninit(ghs);
@@ -109,7 +109,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
        dentry->d_op = &gfs2_dops;
-        inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
+        inode = gfs2_lookupi(dir, &dentry->d_name, 0);
        if (inode && IS_ERR(inode))
                return ERR_CAST(inode);
@@ -915,12 +915,6 @@ int gfs2_permission(struct inode *inode, int mask)
        return error;
 }
-static int gfs2_iop_permission(struct inode *inode, int mask,
-                               struct nameidata *nd)
-{
-        return gfs2_permission(inode, mask);
-}
 static int setattr_size(struct inode *inode, struct iattr *attr)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
@@ -1150,7 +1144,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
 }
 const struct inode_operations gfs2_file_iops = {
-        .permission = gfs2_iop_permission,
+        .permission = gfs2_permission,
        .setattr = gfs2_setattr,
        .getattr = gfs2_getattr,
        .setxattr = gfs2_setxattr,
@@ -1169,7 +1163,7 @@ const struct inode_operations gfs2_dir_iops = {
        .rmdir = gfs2_rmdir,
        .mknod = gfs2_mknod,
        .rename = gfs2_rename,
-        .permission = gfs2_iop_permission,
+        .permission = gfs2_permission,
        .setattr = gfs2_setattr,
        .getattr = gfs2_getattr,
        .setxattr = gfs2_setxattr,
@@ -1181,7 +1175,7 @@ const struct inode_operations gfs2_dir_iops = {
 const struct inode_operations gfs2_symlink_iops = {
        .readlink = gfs2_readlink,
        .follow_link = gfs2_follow_link,
-        .permission = gfs2_iop_permission,
+        .permission = gfs2_permission,
        .setattr = gfs2_setattr,
        .getattr = gfs2_getattr,
        .setxattr = gfs2_setxattr,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 63a8a902d9db..ca831991cbc2 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -389,7 +389,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
                        break;
                INIT_LIST_HEAD(&jd->extent_list);
-                jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL);
+                jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
                if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
                        if (!jd->jd_inode)
                                error = -ENOENT;
diff --git a/fs/hfs/bitmap.c b/fs/hfs/bitmap.c
index 24e75798ddf0..c6e97366e8ac 100644
--- a/fs/hfs/bitmap.c
+++ b/fs/hfs/bitmap.c
@@ -145,7 +145,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
        if (!*num_bits)
                return 0;
-        down(&HFS_SB(sb)->bitmap_lock);
+        mutex_lock(&HFS_SB(sb)->bitmap_lock);
        bitmap = HFS_SB(sb)->bitmap;
        pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits);
@@ -162,7 +162,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
        HFS_SB(sb)->free_ablocks -= *num_bits;
        hfs_bitmap_dirty(sb);
 out:
-        up(&HFS_SB(sb)->bitmap_lock);
+        mutex_unlock(&HFS_SB(sb)->bitmap_lock);
        return pos;
 }
@@ -205,7 +205,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
        if ((start + count) > HFS_SB(sb)->fs_ablocks)
                return -2;
-        down(&HFS_SB(sb)->bitmap_lock);
+        mutex_lock(&HFS_SB(sb)->bitmap_lock);
        /* bitmap is always on a 32-bit boundary */
        curr = HFS_SB(sb)->bitmap + (start / 32);
        len = count;
@@ -236,7 +236,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
        }
 out:
        HFS_SB(sb)->free_ablocks += len;
-        up(&HFS_SB(sb)->bitmap_lock);
+        mutex_unlock(&HFS_SB(sb)->bitmap_lock);
        hfs_bitmap_dirty(sb);
        return 0;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index f6621a785202..9b9d6395bad3 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -40,7 +40,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
        {
        struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
        HFS_I(tree->inode)->flags = 0;
-        init_MUTEX(&HFS_I(tree->inode)->extents_lock);
+        mutex_init(&HFS_I(tree->inode)->extents_lock);
        switch (id) {
        case HFS_EXT_CNID:
                hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index c176f67ba0a5..2c16316d2917 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -343,16 +343,16 @@ int hfs_get_block(struct inode *inode, sector_t block,
                goto done;
        }
-        down(&HFS_I(inode)->extents_lock);
+        mutex_lock(&HFS_I(inode)->extents_lock);
        res = hfs_ext_read_extent(inode, ablock);
        if (!res)
                dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents,
                                            ablock - HFS_I(inode)->cached_start);
        else {
-                up(&HFS_I(inode)->extents_lock);
+                mutex_unlock(&HFS_I(inode)->extents_lock);
                return -EIO;
        }
-        up(&HFS_I(inode)->extents_lock);
+        mutex_unlock(&HFS_I(inode)->extents_lock);
 done:
        map_bh(bh_result, sb, HFS_SB(sb)->fs_start +
@@ -375,7 +375,7 @@ int hfs_extend_file(struct inode *inode)
        u32 start, len, goal;
        int res;
-        down(&HFS_I(inode)->extents_lock);
+        mutex_lock(&HFS_I(inode)->extents_lock);
        if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks)
                goal = hfs_ext_lastblock(HFS_I(inode)->first_extents);
        else {
@@ -425,7 +425,7 @@ int hfs_extend_file(struct inode *inode)
                        goto insert_extent;
        }
 out:
-        up(&HFS_I(inode)->extents_lock);
+        mutex_unlock(&HFS_I(inode)->extents_lock);
        if (!res) {
                HFS_I(inode)->alloc_blocks += len;
                mark_inode_dirty(inode);
@@ -487,7 +487,7 @@ void hfs_file_truncate(struct inode *inode)
        if (blk_cnt == alloc_cnt)
                goto out;
-        down(&HFS_I(inode)->extents_lock);
+        mutex_lock(&HFS_I(inode)->extents_lock);
        hfs_find_init(HFS_SB(sb)->ext_tree, &fd);
        while (1) {
                if (alloc_cnt == HFS_I(inode)->first_blocks) {
@@ -514,7 +514,7 @@ void hfs_file_truncate(struct inode *inode)
                hfs_brec_remove(&fd);
        }
        hfs_find_exit(&fd);
-        up(&HFS_I(inode)->extents_lock);
+        mutex_unlock(&HFS_I(inode)->extents_lock);
        HFS_I(inode)->alloc_blocks = blk_cnt;
 out:
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 147374b6f675..9955232fdf8c 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -11,6 +11,7 @@
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/mutex.h>
 #include <linux/buffer_head.h>
 #include <linux/fs.h>
@@ -53,7 +54,7 @@ struct hfs_inode_info {
        struct list_head open_dir_list;
        struct inode *rsrc_inode;
-        struct semaphore extents_lock;
+        struct mutex extents_lock;
        u16 alloc_blocks, clump_blocks;
        sector_t fs_blocks;
@@ -139,7 +140,7 @@ struct hfs_sb_info {
        struct nls_table *nls_io, *nls_disk;
-        struct semaphore bitmap_lock;
+        struct mutex bitmap_lock;
        unsigned long flags;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 97f8446c4ff4..7e19835efa2e 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -150,7 +150,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
        if (!inode)
                return NULL;
-        init_MUTEX(&HFS_I(inode)->extents_lock);
+        mutex_init(&HFS_I(inode)->extents_lock);
        INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
        hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
        inode->i_ino = HFS_SB(sb)->next_id++;
@@ -281,7 +281,7 @@ static int hfs_read_inode(struct inode *inode, void *data)
        HFS_I(inode)->flags = 0;
        HFS_I(inode)->rsrc_inode = NULL;
-        init_MUTEX(&HFS_I(inode)->extents_lock);
+        mutex_init(&HFS_I(inode)->extents_lock);
        INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
        /* Initialize the inode */
@@ -511,8 +511,7 @@ void hfs_clear_inode(struct inode *inode)
        }
 }
-static int hfs_permission(struct inode *inode, int mask,
+static int hfs_permission(struct inode *inode, int mask)
-                          struct nameidata *nd)
 {
        if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
                return 0;
@@ -523,8 +522,6 @@ static int hfs_file_open(struct inode *inode, struct file *file)
 {
        if (HFS_IS_RSRC(inode))
                inode = HFS_I(inode)->rsrc_inode;
-        if (atomic_read(&file->f_count) != 1)
-                return 0;
        atomic_inc(&HFS_I(inode)->opencnt);
        return 0;
 }
@@ -535,8 +532,6 @@ static int hfs_file_release(struct inode *inode, struct file *file)
        if (HFS_IS_RSRC(inode))
                inode = HFS_I(inode)->rsrc_inode;
-        if (atomic_read(&file->f_count) != 0)
-                return 0;
        if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
                mutex_lock(&inode->i_mutex);
                hfs_file_truncate(inode);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 8cf67974adf6..4abb1047c689 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -372,7 +372,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_op = &hfs_super_operations;
        sb->s_flags |= MS_NODIRATIME;
-        init_MUTEX(&sbi->bitmap_lock);
+        mutex_init(&sbi->bitmap_lock);
        res = hfs_mdb_get(sb);
        if (res) {
@@ -432,7 +432,7 @@ static struct file_system_type hfs_fs_type = {
        .fs_flags       = FS_REQUIRES_DEV,
 };
-static void hfs_init_once(struct kmem_cache *cachep, void *p)
+static void hfs_init_once(void *p)
 {
        struct hfs_inode_info *i = p;
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 12e899cd7886..fec8f61227ff 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,16 +199,16 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
                goto done;
        }
-        down(&HFSPLUS_I(inode).extents_lock);
+        mutex_lock(&HFSPLUS_I(inode).extents_lock);
        res = hfsplus_ext_read_extent(inode, ablock);
        if (!res) {
                dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock -
                                             HFSPLUS_I(inode).cached_start);
        } else {
-                up(&HFSPLUS_I(inode).extents_lock);
+                mutex_unlock(&HFSPLUS_I(inode).extents_lock);
                return -EIO;
        }
-        up(&HFSPLUS_I(inode).extents_lock);
+        mutex_unlock(&HFSPLUS_I(inode).extents_lock);
 done:
        dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock);
@@ -355,7 +355,7 @@ int hfsplus_file_extend(struct inode *inode)
                return -ENOSPC;
        }
-        down(&HFSPLUS_I(inode).extents_lock);
+        mutex_lock(&HFSPLUS_I(inode).extents_lock);
        if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks)
                goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents);
        else {
@@ -408,7 +408,7 @@ int hfsplus_file_extend(struct inode *inode)
                        goto insert_extent;
        }
 out:
-        up(&HFSPLUS_I(inode).extents_lock);
+        mutex_unlock(&HFSPLUS_I(inode).extents_lock);
        if (!res) {
                HFSPLUS_I(inode).alloc_blocks += len;
                mark_inode_dirty(inode);
@@ -465,7 +465,7 @@ void hfsplus_file_truncate(struct inode *inode)
        if (blk_cnt == alloc_cnt)
                goto out;
-        down(&HFSPLUS_I(inode).extents_lock);
+        mutex_lock(&HFSPLUS_I(inode).extents_lock);
        hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd);
        while (1) {
                if (alloc_cnt == HFSPLUS_I(inode).first_blocks) {
@@ -492,7 +492,7 @@ void hfsplus_file_truncate(struct inode *inode)
                hfs_brec_remove(&fd);
        }
        hfs_find_exit(&fd);
-        up(&HFSPLUS_I(inode).extents_lock);
+        mutex_unlock(&HFSPLUS_I(inode).extents_lock);
        HFSPLUS_I(inode).alloc_blocks = blk_cnt;
 out:
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 9e59537b43d5..f027a905225f 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -11,6 +11,7 @@
 #define _LINUX_HFSPLUS_FS_H
 #include <linux/fs.h>
+#include <linux/mutex.h>
 #include <linux/buffer_head.h>
 #include "hfsplus_raw.h"
@@ -154,7 +155,7 @@ struct hfsplus_sb_info {
 struct hfsplus_inode_info {
-        struct semaphore extents_lock;
+        struct mutex extents_lock;
        u32 clump_blocks, alloc_blocks;
        sector_t fs_blocks;
        /* Allocation extents from catalog record or volume header */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 67e1c8b467c4..b085d64a2b67 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -163,7 +163,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
        inode->i_ino = dir->i_ino;
        INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
-        init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+        mutex_init(&HFSPLUS_I(inode).extents_lock);
        HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC;
        hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
@@ -238,7 +238,7 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms)
        perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
 }
-static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int hfsplus_permission(struct inode *inode, int mask)
 {
        /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
         * open_exec has the same test, so it's still not executable, if a x bit
@@ -254,8 +254,6 @@ static int hfsplus_file_open(struct inode *inode, struct file *file)
 {
        if (HFSPLUS_IS_RSRC(inode))
                inode = HFSPLUS_I(inode).rsrc_inode;
-        if (atomic_read(&file->f_count) != 1)
-                return 0;
        atomic_inc(&HFSPLUS_I(inode).opencnt);
        return 0;
 }
@@ -266,8 +264,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
        if (HFSPLUS_IS_RSRC(inode))
                inode = HFSPLUS_I(inode).rsrc_inode;
-        if (atomic_read(&file->f_count) != 0)
-                return 0;
        if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) {
                mutex_lock(&inode->i_mutex);
                hfsplus_file_truncate(inode);
@@ -316,7 +312,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
        inode->i_nlink = 1;
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
        INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
-        init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+        mutex_init(&HFSPLUS_I(inode).extents_lock);
        atomic_set(&HFSPLUS_I(inode).opencnt, 0);
        HFSPLUS_I(inode).flags = 0;
        memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index ce97a54518d8..e834e578c93f 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -34,7 +34,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
                return inode;
        INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
-        init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+        mutex_init(&HFSPLUS_I(inode).extents_lock);
        HFSPLUS_I(inode).flags = 0;
        HFSPLUS_I(inode).rsrc_inode = NULL;
        atomic_set(&HFSPLUS_I(inode).opencnt, 0);
@@ -485,7 +485,7 @@ static struct file_system_type hfsplus_fs_type = {
        .fs_flags       = FS_REQUIRES_DEV,
 };
-static void hfsplus_init_once(struct kmem_cache *cachep, void *p)
+static void hfsplus_init_once(void *p)
 {
        struct hfsplus_inode_info *i = p;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 5222345ddccf..d6ecabf4d231 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -822,7 +822,7 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
        return err;
 }
-int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
+int hostfs_permission(struct inode *ino, int desired)
 {
        char *name;
        int r = 0, w = 0, x = 0, err;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index d256559b4104..d9c59a775449 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -415,7 +415,7 @@ again:
                d_drop(dentry);
                spin_lock(&dentry->d_lock);
                if (atomic_read(&dentry->d_count) > 1 ||
-                    permission(inode, MAY_WRITE, NULL) ||
+                    generic_permission(inode, MAY_WRITE, NULL) ||
                    !S_ISREG(inode->i_mode) ||
                    get_write_access(inode)) {
                        spin_unlock(&dentry->d_lock);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f63a699ec659..b8ae9c90ada0 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -173,7 +173,7 @@ static void hpfs_destroy_inode(struct inode *inode)
        kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 65077aa90f0a..2b3d1828db99 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -655,20 +655,13 @@ static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
        return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
 }
-int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd)
-{
-        return generic_permission(inode, mask, NULL);
-}
 static const struct inode_operations hppfs_dir_iops = {
        .lookup         = hppfs_lookup,
-        .permission     = hppfs_permission,
 };
 static const struct inode_operations hppfs_link_iops = {
        .readlink       = hppfs_readlink,
        .follow_link    = hppfs_follow_link,
-        .permission     = hppfs_permission,
 };
 static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index aeabf80f81a5..3f58923fb39b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
 enum {
        Opt_size, Opt_nr_inodes,
        Opt_mode, Opt_uid, Opt_gid,
+        Opt_pagesize,
        Opt_err,
 };
@@ -62,6 +63,7 @@ static match_table_t tokens = {
        {Opt_mode,      "mode=%o"},
        {Opt_uid,       "uid=%u"},
        {Opt_gid,       "gid=%u"},
+        {Opt_pagesize,  "pagesize=%s"},
        {Opt_err,       NULL},
 };
@@ -80,6 +82,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
        struct inode *inode = file->f_path.dentry->d_inode;
        loff_t len, vma_len;
        int ret;
+        struct hstate *h = hstate_file(file);
        /*
         * vma address alignment (but not the pgoff alignment) has
@@ -92,7 +95,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
        vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
        vma->vm_ops = &hugetlb_vm_ops;
-        if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT))
+        if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT))
                return -EINVAL;
        vma_len = (loff_t)(vma->vm_end - vma->vm_start);
@@ -103,9 +106,9 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
        ret = -ENOMEM;
        len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
-        if (vma->vm_flags & VM_MAYSHARE &&
+        if (hugetlb_reserve_pages(inode,
-            hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
+                                vma->vm_pgoff >> huge_page_order(h),
-                                  len >> HPAGE_SHIFT))
+                                len >> huge_page_shift(h), vma))
                goto out;
        ret = 0;
@@ -130,20 +133,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        unsigned long start_addr;
+        struct hstate *h = hstate_file(file);
-        if (len & ~HPAGE_MASK)
+        if (len & ~huge_page_mask(h))
                return -EINVAL;
        if (len > TASK_SIZE)
                return -ENOMEM;
        if (flags & MAP_FIXED) {
-                if (prepare_hugepage_range(addr, len))
+                if (prepare_hugepage_range(file, addr, len))
                        return -EINVAL;
                return addr;
        }
        if (addr) {
-                addr = ALIGN(addr, HPAGE_SIZE);
+                addr = ALIGN(addr, huge_page_size(h));
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr &&
                    (!vma || addr + len <= vma->vm_start))
@@ -156,7 +160,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                start_addr = TASK_UNMAPPED_BASE;
 full_search:
-        addr = ALIGN(start_addr, HPAGE_SIZE);
+        addr = ALIGN(start_addr, huge_page_size(h));
        for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
                /* At this point:  (!vma || addr < vma->vm_end). */
@@ -174,7 +178,7 @@ full_search:
                if (!vma || addr + len <= vma->vm_start)
                        return addr;
-                addr = ALIGN(vma->vm_end, HPAGE_SIZE);
+                addr = ALIGN(vma->vm_end, huge_page_size(h));
        }
 }
 #endif
@@ -225,10 +229,11 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
 static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
                              size_t len, loff_t *ppos)
 {
+        struct hstate *h = hstate_file(filp);
        struct address_space *mapping = filp->f_mapping;
        struct inode *inode = mapping->host;
-        unsigned long index = *ppos >> HPAGE_SHIFT;
+        unsigned long index = *ppos >> huge_page_shift(h);
-        unsigned long offset = *ppos & ~HPAGE_MASK;
+        unsigned long offset = *ppos & ~huge_page_mask(h);
        unsigned long end_index;
        loff_t isize;
        ssize_t retval = 0;
@@ -243,17 +248,17 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
        if (!isize)
                goto out;
-        end_index = (isize - 1) >> HPAGE_SHIFT;
+        end_index = (isize - 1) >> huge_page_shift(h);
        for (;;) {
                struct page *page;
-                int nr, ret;
+                unsigned long nr, ret;
                /* nr is the maximum number of bytes to copy from this page */
-                nr = HPAGE_SIZE;
+                nr = huge_page_size(h);
                if (index >= end_index) {
                        if (index > end_index)
                                goto out;
-                        nr = ((isize - 1) & ~HPAGE_MASK) + 1;
+                        nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
                        if (nr <= offset) {
                                goto out;
                        }
@@ -287,8 +292,8 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
                offset += ret;
                retval += ret;
                len -= ret;
-                index += offset >> HPAGE_SHIFT;
+                index += offset >> huge_page_shift(h);
-                offset &= ~HPAGE_MASK;
+                offset &= ~huge_page_mask(h);
                if (page)
                        page_cache_release(page);
@@ -298,7 +303,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
                        break;
        }
 out:
-        *ppos = ((loff_t)index << HPAGE_SHIFT) + offset;
+        *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
        mutex_unlock(&inode->i_mutex);
        return retval;
 }
@@ -339,8 +344,9 @@ static void truncate_huge_page(struct page *page)
 static void truncate_hugepages(struct inode *inode, loff_t lstart)
 {
+        struct hstate *h = hstate_inode(inode);
        struct address_space *mapping = &inode->i_data;
-        const pgoff_t start = lstart >> HPAGE_SHIFT;
+        const pgoff_t start = lstart >> huge_page_shift(h);
        struct pagevec pvec;
        pgoff_t next;
        int i, freed = 0;
@@ -441,7 +447,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
                        v_offset = 0;
                __unmap_hugepage_range(vma,
-                                vma->vm_start + v_offset, vma->vm_end);
+                                vma->vm_start + v_offset, vma->vm_end, NULL);
        }
 }
@@ -449,8 +455,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 {
        pgoff_t pgoff;
        struct address_space *mapping = inode->i_mapping;
+        struct hstate *h = hstate_inode(inode);
-        BUG_ON(offset & ~HPAGE_MASK);
+        BUG_ON(offset & ~huge_page_mask(h));
        pgoff = offset >> PAGE_SHIFT;
        i_size_write(inode, offset);
@@ -465,6 +472,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = dentry->d_inode;
+        struct hstate *h = hstate_inode(inode);
        int error;
        unsigned int ia_valid = attr->ia_valid;
@@ -476,7 +484,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
        if (ia_valid & ATTR_SIZE) {
                error = -EINVAL;
-                if (!(attr->ia_size & ~HPAGE_MASK))
+                if (!(attr->ia_size & ~huge_page_mask(h)))
                        error = hugetlb_vmtruncate(inode, attr->ia_size);
                if (error)
                        goto out;
@@ -610,9 +618,10 @@ static int hugetlbfs_set_page_dirty(struct page *page)
 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
+        struct hstate *h = hstate_inode(dentry->d_inode);
        buf->f_type = HUGETLBFS_MAGIC;
-        buf->f_bsize = HPAGE_SIZE;
+        buf->f_bsize = huge_page_size(h);
        if (sbinfo) {
                spin_lock(&sbinfo->stat_lock);
                /* If no limits set, just report 0 for max/free/used
@@ -696,7 +705,7 @@ static const struct address_space_operations hugetlbfs_aops = {
 };
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
@@ -743,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
        char *p, *rest;
        substring_t args[MAX_OPT_ARGS];
        int option;
+        unsigned long long size = 0;
+        enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
        if (!options)
                return 0;
@@ -773,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
                        break;
                case Opt_size: {
-                        unsigned long long size;
                        /* memparse() will accept a K/M/G without a digit */
                        if (!isdigit(*args[0].from))
                                goto bad_val;
                        size = memparse(args[0].from, &rest);
-                        if (*rest == '%') {
+                        setsize = SIZE_STD;
-                                size <<= HPAGE_SHIFT;
+                        if (*rest == '%')
-                                size *= max_huge_pages;
+                                setsize = SIZE_PERCENT;
-                                do_div(size, 100);
-                        }
-                        pconfig->nr_blocks = (size >> HPAGE_SHIFT);
                        break;
                }
@@ -794,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
                        pconfig->nr_inodes = memparse(args[0].from, &rest);
                        break;
+                case Opt_pagesize: {
+                        unsigned long ps;
+                        ps = memparse(args[0].from, &rest);
+                        pconfig->hstate = size_to_hstate(ps);
+                        if (!pconfig->hstate) {
+                                printk(KERN_ERR
+                                "hugetlbfs: Unsupported page size %lu MB\n",
+                                        ps >> 20);
+                                return -EINVAL;
+                        }
+                        break;
+                }
                default:
                        printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
                                 p);
@@ -801,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
                        break;
                }
        }
+        /* Do size after hstate is set up */
+        if (setsize > NO_SIZE) {
+                struct hstate *h = pconfig->hstate;
+                if (setsize == SIZE_PERCENT) {
+                        size <<= huge_page_shift(h);
+                        size *= h->max_huge_pages;
+                        do_div(size, 100);
+                }
+                pconfig->nr_blocks = (size >> huge_page_shift(h));
+        }
        return 0;
 bad_val:
@@ -825,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
        config.uid = current->fsuid;
        config.gid = current->fsgid;
        config.mode = 0755;
+        config.hstate = &default_hstate;
        ret = hugetlbfs_parse_options(data, &config);
        if (ret)
                return ret;
@@ -833,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
        if (!sbinfo)
                return -ENOMEM;
        sb->s_fs_info = sbinfo;
+        sbinfo->hstate = config.hstate;
        spin_lock_init(&sbinfo->stat_lock);
        sbinfo->max_blocks = config.nr_blocks;
        sbinfo->free_blocks = config.nr_blocks;
        sbinfo->max_inodes = config.nr_inodes;
        sbinfo->free_inodes = config.nr_inodes;
        sb->s_maxbytes = MAX_LFS_FILESIZE;
-        sb->s_blocksize = HPAGE_SIZE;
+        sb->s_blocksize = huge_page_size(config.hstate);
-        sb->s_blocksize_bits = HPAGE_SHIFT;
+        sb->s_blocksize_bits = huge_page_shift(config.hstate);
        sb->s_magic = HUGETLBFS_MAGIC;
        sb->s_op = &hugetlbfs_ops;
        sb->s_time_gran = 1;
@@ -942,7 +976,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
                goto out_dentry;
        error = -ENOMEM;
-        if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT))
+        if (hugetlb_reserve_pages(inode, 0,
+                        size >> huge_page_shift(hstate_inode(inode)), NULL))
                goto out_inode;
        d_instantiate(dentry, inode);
diff --git a/fs/inode.c b/fs/inode.c
index c36d9480335c..b6726f644530 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -209,7 +209,7 @@ void inode_init_once(struct inode *inode)
        INIT_LIST_HEAD(&inode->i_dentry);
        INIT_LIST_HEAD(&inode->i_devices);
        INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
-        rwlock_init(&inode->i_data.tree_lock);
+        spin_lock_init(&inode->i_data.tree_lock);
        spin_lock_init(&inode->i_data.i_mmap_lock);
        INIT_LIST_HEAD(&inode->i_data.private_list);
        spin_lock_init(&inode->i_data.private_lock);
@@ -224,7 +224,7 @@ void inode_init_once(struct inode *inode)
 EXPORT_SYMBOL(inode_init_once);
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
        struct inode * inode = (struct inode *) foo;
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 6676c06bb7c1..60249429a253 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -354,20 +354,20 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev)
 }
 /*
- * find_inode - resolve a user-given path to a specific inode and return a nd
+ * find_inode - resolve a user-given path to a specific inode
 */
-static int find_inode(const char __user *dirname, struct nameidata *nd,
+static int find_inode(const char __user *dirname, struct path *path,
                      unsigned flags)
 {
        int error;
-        error = __user_walk(dirname, flags, nd);
+        error = user_path_at(AT_FDCWD, dirname, flags, path);
        if (error)
                return error;
        /* you can only watch an inode if you have read permissions on it */
-        error = vfs_permission(nd, MAY_READ);
+        error = inode_permission(path->dentry->d_inode, MAY_READ);
        if (error)
-                path_put(&nd->path);
+                path_put(path);
        return error;
 }
@@ -566,7 +566,7 @@ static const struct inotify_operations inotify_user_ops = {
        .destroy_watch  = free_inotify_user_watch,
 };
-asmlinkage long sys_inotify_init(void)
+asmlinkage long sys_inotify_init1(int flags)
 {
        struct inotify_device *dev;
        struct inotify_handle *ih;
@@ -574,7 +574,14 @@ asmlinkage long sys_inotify_init(void)
        struct file *filp;
        int fd, ret;
-        fd = get_unused_fd();
+        /* Check the IN_* constants for consistency.  */
+        BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
+        BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
+        if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
+                return -EINVAL;
+        fd = get_unused_fd_flags(flags & O_CLOEXEC);
        if (fd < 0)
                return fd;
@@ -610,7 +617,7 @@ asmlinkage long sys_inotify_init(void)
        filp->f_path.dentry = dget(inotify_mnt->mnt_root);
        filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
        filp->f_mode = FMODE_READ;
-        filp->f_flags = O_RDONLY;
+        filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
        filp->private_data = dev;
        INIT_LIST_HEAD(&dev->events);
@@ -638,11 +645,16 @@ out_put_fd:
        return ret;
 }
-asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
+asmlinkage long sys_inotify_init(void)
+{
+        return sys_inotify_init1(0);
+}
+asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
 {
        struct inode *inode;
        struct inotify_device *dev;
-        struct nameidata nd;
+        struct path path;
        struct file *filp;
        int ret, fput_needed;
        unsigned flags = 0;
@@ -662,12 +674,12 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
        if (mask & IN_ONLYDIR)
                flags |= LOOKUP_DIRECTORY;
-        ret = find_inode(path, &nd, flags);
+        ret = find_inode(pathname, &path, flags);
        if (unlikely(ret))
                goto fput_and_out;
-        /* inode held in place by reference to nd; dev by fget on fd */
+        /* inode held in place by reference to path; dev by fget on fd */
-        inode = nd.path.dentry->d_inode;
+        inode = path.dentry->d_inode;
        dev = filp->private_data;
        mutex_lock(&dev->up_mutex);
@@ -676,7 +688,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
                ret = create_watch(dev, inode, mask);
        mutex_unlock(&dev->up_mutex);
-        path_put(&nd.path);
+        path_put(&path);
 fput_and_out:
        fput_light(filp, fput_needed);
        return ret;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 044a254d526b..26948a6033b6 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -73,7 +73,7 @@ static void isofs_destroy_inode(struct inode *inode)
        kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct iso_inode_info *ei = foo;
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 6bd48f0a7047..c2fb2dd0131f 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -209,6 +209,11 @@ repeat:
        while (rs.len > 2) { /* There may be one byte for padding somewhere */
                rr = (struct rock_ridge *)rs.chr;
+                /*
+                 * Ignore rock ridge info if rr->len is out of range, but
+                 * don't return -EIO because that would make the file
+                 * invisible.
+                 */
                if (rr->len < 3)
                        goto out;       /* Something got screwed up here */
                sig = isonum_721(rs.chr);
@@ -216,8 +221,12 @@ repeat:
                        goto eio;
                rs.chr += rr->len;
                rs.len -= rr->len;
+                /*
+                 * As above, just ignore the rock ridge info if rr->len
+                 * is bogus.
+                 */
                if (rs.len < 0)
-                        goto eio;       /* corrupted isofs */
+                        goto out;       /* Something got screwed up here */
                switch (sig) {
                case SIG('R', 'R'):
@@ -307,6 +316,11 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
 repeat:
        while (rs.len > 2) { /* There may be one byte for padding somewhere */
                rr = (struct rock_ridge *)rs.chr;
+                /*
+                 * Ignore rock ridge info if rr->len is out of range, but
+                 * don't return -EIO because that would make the file
+                 * invisible.
+                 */
                if (rr->len < 3)
                        goto out;       /* Something got screwed up here */
                sig = isonum_721(rs.chr);
@@ -314,8 +328,12 @@ repeat:
                        goto eio;
                rs.chr += rr->len;
                rs.len -= rr->len;
+                /*
+                 * As above, just ignore the rock ridge info if rr->len
+                 * is bogus.
+                 */
                if (rs.len < 0)
-                        goto eio;       /* corrupted isofs */
+                        goto out;       /* Something got screwed up here */
                switch (sig) {
 #ifndef CONFIG_ZISOFS           /* No flag for SF or ZF */
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 5a8ca61498ca..2eccbfaa1d48 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 /*
 * When an ext3-ordered file is truncated, it is possible that many pages are
- * not sucessfully freed, because they are attached to a committing transaction.
+ * not successfully freed, because they are attached to a committing transaction.
 * After the transaction commits, these pages are left on the LRU, with no
 * ->mapping, and with attached buffers.  These pages are trivially reclaimable
 * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 * So here, we have a buffer which has just come off the forget list.  Look to
 * see if we can strip all buffers from the backing page.
 *
- * Called under lock_journal(), and possibly under journal_datalist_lock.  The
+ * Called under journal->j_list_lock.  The caller provided us with a ref
- * caller provided us with a ref against the buffer, and we drop that here.
+ * against the buffer, and we drop that here.
 */
 static void release_buffer_page(struct buffer_head *bh)
 {
@@ -78,6 +78,19 @@ nope:
 }
 /*
+ * Decrement reference counter for data buffer. If it has been marked
+ * 'BH_Freed', release it and the page to which it belongs if possible.
+ */
+static void release_data_buffer(struct buffer_head *bh)
+{
+        if (buffer_freed(bh)) {
+                clear_buffer_freed(bh);
+                release_buffer_page(bh);
+        } else
+                put_bh(bh);
+}
+/*
 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
 * held.  For ranking reasons we must trylock.  If we lose, schedule away and
 * return 0.  j_list_lock is dropped in this case.
@@ -172,7 +185,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
 /*
 *  Submit all the data buffers to disk
 */
-static void journal_submit_data_buffers(journal_t *journal,
+static int journal_submit_data_buffers(journal_t *journal,
                                transaction_t *commit_transaction)
 {
        struct journal_head *jh;
@@ -180,6 +193,7 @@ static void journal_submit_data_buffers(journal_t *journal,
        int locked;
        int bufs = 0;
        struct buffer_head **wbuf = journal->j_wbuf;
+        int err = 0;
        /*
         * Whenever we unlock the journal and sleep, things can get added
@@ -231,7 +245,7 @@ write_out_data:
                        if (locked)
                                unlock_buffer(bh);
                        BUFFER_TRACE(bh, "already cleaned up");
-                        put_bh(bh);
+                        release_data_buffer(bh);
                        continue;
                }
                if (locked && test_clear_buffer_dirty(bh)) {
@@ -253,15 +267,17 @@ write_out_data:
                        put_bh(bh);
                } else {
                        BUFFER_TRACE(bh, "writeout complete: unfile");
+                        if (unlikely(!buffer_uptodate(bh)))
+                                err = -EIO;
                        __journal_unfile_buffer(jh);
                        jbd_unlock_bh_state(bh);
                        if (locked)
                                unlock_buffer(bh);
                        journal_remove_journal_head(bh);
-                        /* Once for our safety reference, once for
+                        /* One for our safety reference, other for
                         * journal_remove_journal_head() */
                        put_bh(bh);
-                        put_bh(bh);
+                        release_data_buffer(bh);
                }
                if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
@@ -271,6 +287,8 @@ write_out_data:
        }
        spin_unlock(&journal->j_list_lock);
        journal_do_submit_data(wbuf, bufs);
+        return err;
 }
 /*
@@ -410,8 +428,7 @@ void journal_commit_transaction(journal_t *journal)
         * Now start flushing things to disk, in the order they appear
         * on the transaction lists.  Data blocks go first.
         */
-        err = 0;
+        err = journal_submit_data_buffers(journal, commit_transaction);
-        journal_submit_data_buffers(journal, commit_transaction);
        /*
         * Wait for all previously submitted IO to complete.
@@ -426,10 +443,21 @@ void journal_commit_transaction(journal_t *journal)
                if (buffer_locked(bh)) {
                        spin_unlock(&journal->j_list_lock);
                        wait_on_buffer(bh);
-                        if (unlikely(!buffer_uptodate(bh)))
-                                err = -EIO;
                        spin_lock(&journal->j_list_lock);
                }
+                if (unlikely(!buffer_uptodate(bh))) {
+                        if (TestSetPageLocked(bh->b_page)) {
+                                spin_unlock(&journal->j_list_lock);
+                                lock_page(bh->b_page);
+                                spin_lock(&journal->j_list_lock);
+                        }
+                        if (bh->b_page->mapping)
+                                set_bit(AS_EIO, &bh->b_page->mapping->flags);
+                        unlock_page(bh->b_page);
+                        SetPageError(bh->b_page);
+                        err = -EIO;
+                }
                if (!inverted_lock(journal, bh)) {
                        put_bh(bh);
                        spin_lock(&journal->j_list_lock);
@@ -443,17 +471,21 @@ void journal_commit_transaction(journal_t *journal)
                } else {
                        jbd_unlock_bh_state(bh);
                }
-                put_bh(bh);
+                release_data_buffer(bh);
                cond_resched_lock(&journal->j_list_lock);
        }
        spin_unlock(&journal->j_list_lock);
-        if (err)
+        if (err) {
-                journal_abort(journal, err);
+                char b[BDEVNAME_SIZE];
-        journal_write_revoke_records(journal, commit_transaction);
+                printk(KERN_WARNING
+                        "JBD: Detected IO errors while flushing file data "
+                        "on %s\n", bdevname(journal->j_fs_dev, b));
+                err = 0;
+        }
-        jbd_debug(3, "JBD: commit phase 2\n");
+        journal_write_revoke_records(journal, commit_transaction);
        /*
         * If we found any dirty or locked buffers, then we should have
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b99c3b3654c4..aa7143a8349b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features);
 EXPORT_SYMBOL(journal_create);
 EXPORT_SYMBOL(journal_load);
 EXPORT_SYMBOL(journal_destroy);
-EXPORT_SYMBOL(journal_update_superblock);
 EXPORT_SYMBOL(journal_abort);
 EXPORT_SYMBOL(journal_errno);
 EXPORT_SYMBOL(journal_ack_err);
@@ -1636,9 +1635,10 @@ static int journal_init_journal_head_cache(void)
 static void journal_destroy_journal_head_cache(void)
 {
-        J_ASSERT(journal_head_cache != NULL);
+        if (journal_head_cache) {
-        kmem_cache_destroy(journal_head_cache);
+                kmem_cache_destroy(journal_head_cache);
-        journal_head_cache = NULL;
+                journal_head_cache = NULL;
+        }
 }
 /*
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 1bb43e987f4b..c7bd649bbbdc 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -166,138 +166,123 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
        return NULL;
 }
+void journal_destroy_revoke_caches(void)
+{
+        if (revoke_record_cache) {
+                kmem_cache_destroy(revoke_record_cache);
+                revoke_record_cache = NULL;
+        }
+        if (revoke_table_cache) {
+                kmem_cache_destroy(revoke_table_cache);
+                revoke_table_cache = NULL;
+        }
+}
 int __init journal_init_revoke_caches(void)
 {
+        J_ASSERT(!revoke_record_cache);
+        J_ASSERT(!revoke_table_cache);
        revoke_record_cache = kmem_cache_create("revoke_record",
                                           sizeof(struct jbd_revoke_record_s),
                                           0,
                                           SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
                                           NULL);
        if (!revoke_record_cache)
-                return -ENOMEM;
+                goto record_cache_failure;
        revoke_table_cache = kmem_cache_create("revoke_table",
                                           sizeof(struct jbd_revoke_table_s),
                                           0, SLAB_TEMPORARY, NULL);
-        if (!revoke_table_cache) {
+        if (!revoke_table_cache)
-                kmem_cache_destroy(revoke_record_cache);
+                goto table_cache_failure;
-                revoke_record_cache = NULL;
-                return -ENOMEM;
-        }
        return 0;
-}
-void journal_destroy_revoke_caches(void)
+table_cache_failure:
-{
+        journal_destroy_revoke_caches();
-        kmem_cache_destroy(revoke_record_cache);
+record_cache_failure:
-        revoke_record_cache = NULL;
+        return -ENOMEM;
-        kmem_cache_destroy(revoke_table_cache);
-        revoke_table_cache = NULL;
 }
-/* Initialise the revoke table for a given journal to a given size. */
+static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
-int journal_init_revoke(journal_t *journal, int hash_size)
 {
-        int shift, tmp;
+        int shift = 0;
+        int tmp = hash_size;
+        struct jbd_revoke_table_s *table;
-        J_ASSERT (journal->j_revoke_table[0] == NULL);
+        table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+        if (!table)
+                goto out;
-        shift = 0;
-        tmp = hash_size;
        while((tmp >>= 1UL) != 0UL)
                shift++;
-        journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+        table->hash_size = hash_size;
-        if (!journal->j_revoke_table[0])
+        table->hash_shift = shift;
-                return -ENOMEM;
+        table->hash_table =
-        journal->j_revoke = journal->j_revoke_table[0];
-        /* Check that the hash_size is a power of two */
-        J_ASSERT(is_power_of_2(hash_size));
-        journal->j_revoke->hash_size = hash_size;
-        journal->j_revoke->hash_shift = shift;
-        journal->j_revoke->hash_table =
                kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
-        if (!journal->j_revoke->hash_table) {
+        if (!table->hash_table) {
-                kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
+                kmem_cache_free(revoke_table_cache, table);
-                journal->j_revoke = NULL;
+                table = NULL;
-                return -ENOMEM;
+                goto out;
        }
        for (tmp = 0; tmp < hash_size; tmp++)
-                INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+                INIT_LIST_HEAD(&table->hash_table[tmp]);
-        journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+out:
-        if (!journal->j_revoke_table[1]) {
+        return table;
-                kfree(journal->j_revoke_table[0]->hash_table);
+}
-                kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
-                return -ENOMEM;
+static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table)
+{
+        int i;
+        struct list_head *hash_list;
+        for (i = 0; i < table->hash_size; i++) {
+                hash_list = &table->hash_table[i];
+                J_ASSERT(list_empty(hash_list));
        }
-        journal->j_revoke = journal->j_revoke_table[1];
+        kfree(table->hash_table);
+        kmem_cache_free(revoke_table_cache, table);
+}
-        /* Check that the hash_size is a power of two */
+/* Initialise the revoke table for a given journal to a given size. */
+int journal_init_revoke(journal_t *journal, int hash_size)
+{
+        J_ASSERT(journal->j_revoke_table[0] == NULL);
        J_ASSERT(is_power_of_2(hash_size));
-        journal->j_revoke->hash_size = hash_size;
+        journal->j_revoke_table[0] = journal_init_revoke_table(hash_size);
+        if (!journal->j_revoke_table[0])
+                goto fail0;
-        journal->j_revoke->hash_shift = shift;
+        journal->j_revoke_table[1] = journal_init_revoke_table(hash_size);
+        if (!journal->j_revoke_table[1])
+                goto fail1;
-        journal->j_revoke->hash_table =
+        journal->j_revoke = journal->j_revoke_table[1];
-                kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
-        if (!journal->j_revoke->hash_table) {
-                kfree(journal->j_revoke_table[0]->hash_table);
-                kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
-                kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
-                journal->j_revoke = NULL;
-                return -ENOMEM;
-        }
-        for (tmp = 0; tmp < hash_size; tmp++)
-                INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
        spin_lock_init(&journal->j_revoke_lock);
        return 0;
-}
-/* Destoy a journal's revoke table.  The table must already be empty! */
+fail1:
+        journal_destroy_revoke_table(journal->j_revoke_table[0]);
+fail0:
+        return -ENOMEM;
+}
+/* Destroy a journal's revoke table.  The table must already be empty! */
 void journal_destroy_revoke(journal_t *journal)
 {
-        struct jbd_revoke_table_s *table;
-        struct list_head *hash_list;
-        int i;
-        table = journal->j_revoke_table[0];
-        if (!table)
-                return;
-        for (i=0; i<table->hash_size; i++) {
-                hash_list = &table->hash_table[i];
-                J_ASSERT (list_empty(hash_list));
-        }
-        kfree(table->hash_table);
-        kmem_cache_free(revoke_table_cache, table);
-        journal->j_revoke = NULL;
-        table = journal->j_revoke_table[1];
-        if (!table)
-                return;
-        for (i=0; i<table->hash_size; i++) {
-                hash_list = &table->hash_table[i];
-                J_ASSERT (list_empty(hash_list));
-        }
-        kfree(table->hash_table);
-        kmem_cache_free(revoke_table_cache, table);
        journal->j_revoke = NULL;
+        if (journal->j_revoke_table[0])
+                journal_destroy_revoke_table(journal->j_revoke_table[0]);
+        if (journal->j_revoke_table[1])
+                journal_destroy_revoke_table(journal->j_revoke_table[1]);
 }
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 67ff2024c23c..8dee32007500 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1648,12 +1648,42 @@ out:
        return;
 }
+/*
+ * journal_try_to_free_buffers() could race with journal_commit_transaction()
+ * The latter might still hold the a count on buffers when inspecting
+ * them on t_syncdata_list or t_locked_list.
+ *
+ * journal_try_to_free_buffers() will call this function to
+ * wait for the current transaction to finish syncing data buffers, before
+ * tryinf to free that buffer.
+ *
+ * Called with journal->j_state_lock held.
+ */
+static void journal_wait_for_transaction_sync_data(journal_t *journal)
+{
+        transaction_t *transaction = NULL;
+        tid_t tid;
+        spin_lock(&journal->j_state_lock);
+        transaction = journal->j_committing_transaction;
+        if (!transaction) {
+                spin_unlock(&journal->j_state_lock);
+                return;
+        }
+        tid = transaction->t_tid;
+        spin_unlock(&journal->j_state_lock);
+        log_wait_commit(journal, tid);
+}
 /**
 * int journal_try_to_free_buffers() - try to free page buffers.
 * @journal: journal for operation
 * @page: to try and free
- * @unused_gfp_mask: unused
+ * @gfp_mask: we use the mask to detect how hard should we try to release
+ * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
+ * release the buffers.
 *
 *
 * For all the buffers on this page,
@@ -1682,9 +1712,11 @@ out:
 * journal_try_to_free_buffer() is changing its state.  But that
 * cannot happen because we never reallocate freed data as metadata
 * while the data is part of a transaction.  Yes?
+ *
+ * Return 0 on failure, 1 on success
 */
 int journal_try_to_free_buffers(journal_t *journal,
-                                struct page *page, gfp_t unused_gfp_mask)
+                                struct page *page, gfp_t gfp_mask)
 {
        struct buffer_head *head;
        struct buffer_head *bh;
@@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal,
                if (buffer_jbd(bh))
                        goto busy;
        } while ((bh = bh->b_this_page) != head);
        ret = try_to_free_buffers(page);
+        /*
+         * There are a number of places where journal_try_to_free_buffers()
+         * could race with journal_commit_transaction(), the later still
+         * holds the reference to the buffers to free while processing them.
+         * try_to_free_buffers() failed to free those buffers. Some of the
+         * caller of releasepage() request page buffers to be dropped, otherwise
+         * treat the fail-to-free as errors (such as generic_file_direct_IO())
+         *
+         * So, if the caller of try_to_release_page() wants the synchronous
+         * behaviour(i.e make sure buffers are dropped upon return),
+         * let's wait for the current transaction to finish flush of
+         * dirty data buffers, then try to free those buffers again,
+         * with the journal locked.
+         */
+        if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
+                journal_wait_for_transaction_sync_data(journal);
+                ret = try_to_free_buffers(page);
+        }
 busy:
        return ret;
 }
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 4c80404a9aba..d98713777a1b 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -314,7 +314,7 @@ static int jffs2_check_acl(struct inode *inode, int mask)
        return -EAGAIN;
 }
-int jffs2_permission(struct inode *inode, int mask, struct nameidata *nd)
+int jffs2_permission(struct inode *inode, int mask)
 {
        return generic_permission(inode, mask, jffs2_check_acl);
 }
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 0bb7f003fd80..8ca058aed384 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -28,7 +28,7 @@ struct jffs2_acl_header {
 #define JFFS2_ACL_NOT_CACHED ((void *)-1)
-extern int jffs2_permission(struct inode *, int, struct nameidata *);
+extern int jffs2_permission(struct inode *, int);
 extern int jffs2_acl_chmod(struct inode *);
 extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
 extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index c0c141f6fde1..cd219ef55254 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -38,7 +38,7 @@ const struct file_operations jffs2_dir_operations =
 {
        .read =         generic_read_dir,
        .readdir =      jffs2_readdir,
-        .ioctl =        jffs2_ioctl,
+        .unlocked_ioctl=jffs2_ioctl,
        .fsync =        jffs2_fsync
 };
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 5e920343b2c5..5a98aa87c853 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -46,7 +46,7 @@ const struct file_operations jffs2_file_operations =
        .aio_read =     generic_file_aio_read,
        .write =        do_sync_write,
        .aio_write =    generic_file_aio_write,
-        .ioctl =        jffs2_ioctl,
+        .unlocked_ioctl=jffs2_ioctl,
        .mmap =         generic_file_readonly_mmap,
        .fsync =        jffs2_fsync,
        .splice_read =  generic_file_splice_read,
diff --git a/fs/jffs2/ioctl.c b/fs/jffs2/ioctl.c
index e2177210f621..9d41f43e47bb 100644
--- a/fs/jffs2/ioctl.c
+++ b/fs/jffs2/ioctl.c
@@ -12,8 +12,7 @@
 #include <linux/fs.h>
 #include "nodelist.h"
-int jffs2_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
+long jffs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-                unsigned long arg)
 {
        /* Later, this will provide for lsattr.jffs2 and chattr.jffs2, which
           will include compression support etc. */
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 2cc866cf134f..5e194a5c8e29 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -167,7 +167,7 @@ int jffs2_fsync(struct file *, struct dentry *, int);
 int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
 /* ioctl.c */
-int jffs2_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+long jffs2_ioctl(struct file *, unsigned int, unsigned long);
 /* symlink.c */
 extern const struct inode_operations jffs2_symlink_inode_operations;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 7da69eae49e4..efd401257ed9 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -44,7 +44,7 @@ static void jffs2_destroy_inode(struct inode *inode)
        kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
 }
-static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo)
+static void jffs2_i_init_once(void *foo)
 {
        struct jffs2_inode_info *f = foo;
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 4d84bdc88299..d3e5c33665de 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -140,7 +140,7 @@ static int jfs_check_acl(struct inode *inode, int mask)
        return -EAGAIN;
 }
-int jfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int jfs_permission(struct inode *inode, int mask)
 {
        return generic_permission(inode, mask, jfs_check_acl);
 }
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 455fa4292045..88475f10a389 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
 #ifdef CONFIG_JFS_POSIX_ACL
-int jfs_permission(struct inode *, int, struct nameidata *);
+int jfs_permission(struct inode *, int);
 int jfs_init_acl(tid_t, struct inode *, struct inode *);
 int jfs_setattr(struct dentry *, struct iattr *);
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 854ff0ec574f..c350057087dd 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -182,7 +182,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
 #endif
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct metapage *mp = (struct metapage *)foo;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0288e6d7936a..3630718be395 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -22,6 +22,7 @@
 #include <linux/parser.h>
 #include <linux/completion.h>
 #include <linux/vfs.h>
+#include <linux/quotaops.h>
 #include <linux/mount.h>
 #include <linux/moduleparam.h>
 #include <linux/kthread.h>
@@ -759,7 +760,7 @@ static struct file_system_type jfs_fs_type = {
        .fs_flags       = FS_REQUIRES_DEV,
 };
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 1f6dc518505c..31668b690e03 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -582,7 +582,15 @@ again:
        }
        if (status < 0)
                goto out_unlock;
-        status = nlm_stat_to_errno(resp->status);
+        /*
+         * EAGAIN doesn't make sense for sleeping locks, and in some
+         * cases NLM_LCK_DENIED is returned for a permanent error.  So
+         * turn it into an ENOLCK.
+         */
+        if (resp->status == nlm_lck_denied && (fl_flags & FL_SLEEP))
+                status = -ENOLCK;
+        else
+                status = nlm_stat_to_errno(resp->status);
 out_unblock:
        nlmclnt_finish_block(block);
 out:
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 2169af4d5455..5bd9bf0fa9df 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -50,7 +50,7 @@ EXPORT_SYMBOL(nlmsvc_ops);
 static DEFINE_MUTEX(nlmsvc_mutex);
 static unsigned int             nlmsvc_users;
 static struct task_struct       *nlmsvc_task;
-static struct svc_serv          *nlmsvc_serv;
+static struct svc_rqst          *nlmsvc_rqst;
 int                             nlmsvc_grace_period;
 unsigned long                   nlmsvc_timeout;
@@ -194,20 +194,11 @@ lockd(void *vrqstp)
                svc_process(rqstp);
        }
        flush_signals(current);
        if (nlmsvc_ops)
                nlmsvc_invalidate_all();
        nlm_shutdown_hosts();
        unlock_kernel();
-        nlmsvc_task = NULL;
-        nlmsvc_serv = NULL;
-        /* Exit the RPC thread */
-        svc_exit_thread(rqstp);
        return 0;
 }
@@ -254,16 +245,15 @@ int
 lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
 {
        struct svc_serv *serv;
-        struct svc_rqst *rqstp;
        int             error = 0;
        mutex_lock(&nlmsvc_mutex);
        /*
         * Check whether we're already up and running.
         */
-        if (nlmsvc_serv) {
+        if (nlmsvc_rqst) {
                if (proto)
-                        error = make_socks(nlmsvc_serv, proto);
+                        error = make_socks(nlmsvc_rqst->rq_server, proto);
                goto out;
        }
@@ -288,9 +278,10 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
        /*
         * Create the kernel thread and wait for it to start.
         */
-        rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
+        nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
-        if (IS_ERR(rqstp)) {
+        if (IS_ERR(nlmsvc_rqst)) {
-                error = PTR_ERR(rqstp);
+                error = PTR_ERR(nlmsvc_rqst);
+                nlmsvc_rqst = NULL;
                printk(KERN_WARNING
                        "lockd_up: svc_rqst allocation failed, error=%d\n",
                        error);
@@ -298,16 +289,15 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
        }
        svc_sock_update_bufs(serv);
-        nlmsvc_serv = rqstp->rq_server;
-        nlmsvc_task = kthread_run(lockd, rqstp, serv->sv_name);
+        nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name);
        if (IS_ERR(nlmsvc_task)) {
                error = PTR_ERR(nlmsvc_task);
+                svc_exit_thread(nlmsvc_rqst);
                nlmsvc_task = NULL;
-                nlmsvc_serv = NULL;
+                nlmsvc_rqst = NULL;
                printk(KERN_WARNING
                        "lockd_up: kthread_run failed, error=%d\n", error);
-                svc_exit_thread(rqstp);
                goto destroy_and_out;
        }
@@ -346,6 +336,9 @@ lockd_down(void)
                BUG();
        }
        kthread_stop(nlmsvc_task);
+        svc_exit_thread(nlmsvc_rqst);
+        nlmsvc_task = NULL;
+        nlmsvc_rqst = NULL;
 out:
        mutex_unlock(&nlmsvc_mutex);
 }
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 2e27176ff42f..399444639337 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -58,8 +58,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
        return 0;
 no_locks:
-        if (host)
+        nlm_release_host(host);
-                nlm_release_host(host);
        if (error)
                return error;   
        return nlm_lck_denied_nolocks;
@@ -100,7 +99,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
                return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
        /* Now check for conflicting locks */
-        resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie);
+        resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie);
        if (resp->status == nlm_drop_reply)
                rc = rpc_drop_reply;
        else
@@ -146,7 +145,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 #endif
        /* Now try to lock the file */
-        resp->status = nlmsvc_lock(rqstp, file, &argp->lock,
+        resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
                                        argp->block, &argp->cookie);
        if (resp->status == nlm_drop_reply)
                rc = rpc_drop_reply;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 56a08ab9a4cb..cf0d5c2c318d 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -129,9 +129,9 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
 static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b)
 {
-        if(a->len != b->len)
+        if (a->len != b->len)
                return 0;
-        if(memcmp(a->data,b->data,a->len))
+        if (memcmp(a->data, b->data, a->len))
                return 0;
        return 1;
 }
@@ -180,6 +180,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
        struct nlm_block        *block;
        struct nlm_rqst         *call = NULL;
+        nlm_get_host(host);
        call = nlm_alloc_call(host);
        if (call == NULL)
                return NULL;
@@ -358,10 +359,10 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
 */
 __be32
 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
-                        struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
+            struct nlm_host *host, struct nlm_lock *lock, int wait,
+            struct nlm_cookie *cookie)
 {
        struct nlm_block        *block = NULL;
-        struct nlm_host         *host;
        int                     error;
        __be32                  ret;
@@ -373,11 +374,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
                                (long long)lock->fl.fl_end,
                                wait);
-        /* Create host handle for callback */
-        host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
-        if (host == NULL)
-                return nlm_lck_denied_nolocks;
        /* Lock file against concurrent access */
        mutex_lock(&file->f_mutex);
        /* Get existing block (in case client is busy-waiting)
@@ -385,8 +381,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
         */
        block = nlmsvc_lookup_block(file, lock);
        if (block == NULL) {
-                block = nlmsvc_create_block(rqstp, nlm_get_host(host), file,
+                block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
-                                lock, cookie);
                ret = nlm_lck_denied_nolocks;
                if (block == NULL)
                        goto out;
@@ -417,14 +412,14 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
        lock->fl.fl_flags &= ~FL_SLEEP;
        dprintk("lockd: vfs_lock_file returned %d\n", error);
-        switch(error) {
+        switch (error) {
                case 0:
                        ret = nlm_granted;
                        goto out;
                case -EAGAIN:
                        ret = nlm_lck_denied;
-                        break;
+                        goto out;
-                case -EINPROGRESS:
+                case FILE_LOCK_DEFERRED:
                        if (wait)
                                break;
                        /* Filesystem lock operation is in progress
@@ -439,10 +434,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
                        goto out;
        }
-        ret = nlm_lck_denied;
-        if (!wait)
-                goto out;
        ret = nlm_lck_blocked;
        /* Append to list of blocked */
@@ -450,7 +441,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 out:
        mutex_unlock(&file->f_mutex);
        nlmsvc_release_block(block);
-        nlm_release_host(host);
        dprintk("lockd: nlmsvc_lock returned %u\n", ret);
        return ret;
 }
@@ -460,8 +450,8 @@ out:
 */
 __be32
 nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
-                struct nlm_lock *lock, struct nlm_lock *conflock,
+                struct nlm_host *host, struct nlm_lock *lock,
-                struct nlm_cookie *cookie)
+                struct nlm_lock *conflock, struct nlm_cookie *cookie)
 {
        struct nlm_block        *block = NULL;
        int                     error;
@@ -479,16 +469,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
        if (block == NULL) {
                struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
-                struct nlm_host *host;
                if (conf == NULL)
                        return nlm_granted;
-                /* Create host handle for callback */
-                host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
-                if (host == NULL) {
-                        kfree(conf);
-                        return nlm_lck_denied_nolocks;
-                }
                block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
                if (block == NULL) {
                        kfree(conf);
@@ -520,7 +503,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
        }
        error = vfs_test_lock(file->f_file, &lock->fl);
-        if (error == -EINPROGRESS) {
+        if (error == FILE_LOCK_DEFERRED) {
                ret = nlmsvc_defer_lock_rqst(rqstp, block);
                goto out;
        }
@@ -744,8 +727,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
        switch (error) {
        case 0:
                break;
-        case -EAGAIN:
+        case FILE_LOCK_DEFERRED:
-        case -EINPROGRESS:
                dprintk("lockd: lock still blocked error %d\n", error);
                nlmsvc_insert_block(block, NLM_NEVER);
                nlmsvc_release_block(block);
@@ -897,7 +879,7 @@ nlmsvc_retry_blocked(void)
                if (block->b_when == NLM_NEVER)
                        break;
-                if (time_after(block->b_when,jiffies)) {
+                if (time_after(block->b_when, jiffies)) {
                        timeout = block->b_when - jiffies;
                        break;
                }
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index ce6952b50a75..76019d2ff72d 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -87,8 +87,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
        return 0;
 no_locks:
-        if (host)
+        nlm_release_host(host);
-                nlm_release_host(host);
        if (error)
                return error;
        return nlm_lck_denied_nolocks;
@@ -129,7 +128,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
                return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
        /* Now check for conflicting locks */
-        resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie));
+        resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie));
        if (resp->status == nlm_drop_reply)
                rc = rpc_drop_reply;
        else
@@ -176,7 +175,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 #endif
        /* Now try to lock the file */
-        resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock,
+        resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock,
                                               argp->block, &argp->cookie));
        if (resp->status == nlm_drop_reply)
                rc = rpc_drop_reply;
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index d1c48b539df8..198b4e55b373 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -373,13 +373,16 @@ nlmsvc_free_host_resources(struct nlm_host *host)
        }
 }
-/*
+/**
- * Remove all locks held for clients
+ * nlmsvc_invalidate_all - remove all locks held for clients
+ *
+ * Release all locks held by NFS clients.
+ *
 */
 void
 nlmsvc_invalidate_all(void)
 {
-        /* Release all locks held by NFS clients.
+        /*
         * Previously, the code would call
         * nlmsvc_free_host_resources for each client in
         * turn, which is about as inefficient as it gets.
@@ -396,6 +399,12 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file)
        return sb == file->f_file->f_path.mnt->mnt_sb;
 }
+/**
+ * nlmsvc_unlock_all_by_sb - release locks held on this file system
+ * @sb: super block
+ *
+ * Release all locks held by clients accessing this file system.
+ */
 int
 nlmsvc_unlock_all_by_sb(struct super_block *sb)
 {
@@ -409,17 +418,22 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
 static int
 nlmsvc_match_ip(void *datap, struct nlm_host *host)
 {
-        __be32 *server_addr = datap;
+        return nlm_cmp_addr(&host->h_saddr, datap);
-        return host->h_saddr.sin_addr.s_addr == *server_addr;
 }
+/**
+ * nlmsvc_unlock_all_by_ip - release local locks by IP address
+ * @server_addr: server's IP address as seen by clients
+ *
+ * Release all locks held by clients accessing this host
+ * via the passed in IP address.
+ */
 int
-nlmsvc_unlock_all_by_ip(__be32 server_addr)
+nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr)
 {
        int ret;
-        ret = nlm_traverse_files(&server_addr, nlmsvc_match_ip, NULL);
-        return ret ? -EIO : 0;
+        ret = nlm_traverse_files(server_addr, nlmsvc_match_ip, NULL);
+        return ret ? -EIO : 0;
 }
 EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_ip);
diff --git a/fs/locks.c b/fs/locks.c
index dce8c747371c..5eb259e3cd38 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -201,7 +201,7 @@ EXPORT_SYMBOL(locks_init_lock);
 * Initialises the fields of the file lock which are invariant for
 * free file_locks.
 */
-static void init_once(struct kmem_cache *cache, void *foo)
+static void init_once(void *foo)
 {
        struct file_lock *lock = (struct file_lock *) foo;
@@ -779,8 +779,10 @@ find_conflict:
                if (!flock_locks_conflict(request, fl))
                        continue;
                error = -EAGAIN;
-                if (request->fl_flags & FL_SLEEP)
+                if (!(request->fl_flags & FL_SLEEP))
-                        locks_insert_block(fl, request);
+                        goto out;
+                error = FILE_LOCK_DEFERRED;
+                locks_insert_block(fl, request);
                goto out;
        }
        if (request->fl_flags & FL_ACCESS)
@@ -836,7 +838,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
                        error = -EDEADLK;
                        if (posix_locks_deadlock(request, fl))
                                goto out;
-                        error = -EAGAIN;
+                        error = FILE_LOCK_DEFERRED;
                        locks_insert_block(fl, request);
                        goto out;
                }
@@ -1035,7 +1037,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
        might_sleep ();
        for (;;) {
                error = posix_lock_file(filp, fl, NULL);
-                if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+                if (error != FILE_LOCK_DEFERRED)
                        break;
                error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
                if (!error)
@@ -1107,9 +1109,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
        for (;;) {
                error = __posix_lock_file(inode, &fl, NULL);
-                if (error != -EAGAIN)
+                if (error != FILE_LOCK_DEFERRED)
-                        break;
-                if (!(fl.fl_flags & FL_SLEEP))
                        break;
                error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
                if (!error) {
@@ -1531,7 +1531,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
        might_sleep();
        for (;;) {
                error = flock_lock_file(filp, fl);
-                if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+                if (error != FILE_LOCK_DEFERRED)
                        break;
                error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
                if (!error)
@@ -1716,17 +1716,17 @@ out:
 * fl_grant is set. Callers expecting ->lock() to return asynchronously
 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
 * the request is for a blocking lock. When ->lock() does return asynchronously,
- * it must return -EINPROGRESS, and call ->fl_grant() when the lock
+ * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock
 * request completes.
 * If the request is for non-blocking lock the file system should return
- * -EINPROGRESS then try to get the lock and call the callback routine with
+ * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
- * the result. If the request timed out the callback routine will return a
+ * with the result. If the request timed out the callback routine will return a
 * nonzero return code and the file system should release the lock. The file
 * system is also responsible to keep a corresponding posix lock when it
 * grants a lock so the VFS can find out which locks are locally held and do
 * the correct lock cleanup when required.
 * The underlying filesystem must not drop the kernel lock or call
- * ->fl_grant() before returning to the caller with a -EINPROGRESS
+ * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED
 * return code.
 */
 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
@@ -1738,6 +1738,30 @@ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, str
 }
 EXPORT_SYMBOL_GPL(vfs_lock_file);
+static int do_lock_file_wait(struct file *filp, unsigned int cmd,
+                             struct file_lock *fl)
+{
+        int error;
+        error = security_file_lock(filp, fl->fl_type);
+        if (error)
+                return error;
+        for (;;) {
+                error = vfs_lock_file(filp, cmd, fl, NULL);
+                if (error != FILE_LOCK_DEFERRED)
+                        break;
+                error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
+                if (!error)
+                        continue;
+                locks_delete_block(fl);
+                break;
+        }
+        return error;
+}
 /* Apply the lock described by l to an open file descriptor.
 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
 */
@@ -1795,26 +1819,7 @@ again:
                goto out;
        }
-        error = security_file_lock(filp, file_lock->fl_type);
+        error = do_lock_file_wait(filp, cmd, file_lock);
-        if (error)
-                goto out;
-        if (filp->f_op && filp->f_op->lock != NULL)
-                error = filp->f_op->lock(filp, cmd, file_lock);
-        else {
-                for (;;) {
-                        error = posix_lock_file(filp, file_lock, NULL);
-                        if (error != -EAGAIN || cmd == F_SETLK)
-                                break;
-                        error = wait_event_interruptible(file_lock->fl_wait,
-                                        !file_lock->fl_next);
-                        if (!error)
-                                continue;
-                        locks_delete_block(file_lock);
-                        break;
-                }
-        }
        /*
         * Attempt to detect a close/fcntl race and recover by
@@ -1932,26 +1937,7 @@ again:
                goto out;
        }
-        error = security_file_lock(filp, file_lock->fl_type);
+        error = do_lock_file_wait(filp, cmd, file_lock);
-        if (error)
-                goto out;
-        if (filp->f_op && filp->f_op->lock != NULL)
-                error = filp->f_op->lock(filp, cmd, file_lock);
-        else {
-                for (;;) {
-                        error = posix_lock_file(filp, file_lock, NULL);
-                        if (error != -EAGAIN || cmd == F_SETLK64)
-                                break;
-                        error = wait_event_interruptible(file_lock->fl_wait,
-                                        !file_lock->fl_next);
-                        if (!error)
-                                continue;
-                        locks_delete_block(file_lock);
-                        break;
-                }
-        }
        /*
         * Attempt to detect a close/fcntl race and recover by
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 84f6242ba6fc..d1d1eb84679d 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,7 +68,7 @@ static void minix_destroy_inode(struct inode *inode)
        kmem_cache_free(minix_inode_cachep, minix_i(inode));
 }
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
        struct minix_inode_info *ei = (struct minix_inode_info *) foo;
@@ -256,9 +256,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
        if (!s->s_root)
                goto out_iput;
-        if (!NO_TRUNCATE)
-                s->s_root->d_op = &minix_dentry_operations;
        if (!(s->s_flags & MS_RDONLY)) {
                if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
                        ms->s_state &= ~MINIX_VALID_FS;
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 326edfe96108..e6a0b193bea4 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -2,11 +2,6 @@
 #include <linux/pagemap.h>
 #include <linux/minix_fs.h>
-/*
- * change the define below to 0 if you want names > info->s_namelen chars to be
- * truncated. Else they will be disallowed (ENAMETOOLONG).
- */
-#define NO_TRUNCATE 1
 #define INODE_VERSION(inode)    minix_sb(inode->i_sb)->s_version
 #define MINIX_V1                0x0001          /* original minix fs */
 #define MINIX_V2                0x0002          /* minix V2 fs */
@@ -83,7 +78,6 @@ extern const struct inode_operations minix_file_inode_operations;
 extern const struct inode_operations minix_dir_inode_operations;
 extern const struct file_operations minix_file_operations;
 extern const struct file_operations minix_dir_operations;
-extern struct dentry_operations minix_dentry_operations;
 static inline struct minix_sb_info *minix_sb(struct super_block *sb)
 {
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 102241bc9c79..32b131cd6121 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -18,30 +18,6 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
        return err;
 }
-static int minix_hash(struct dentry *dentry, struct qstr *qstr)
-{
-        unsigned long hash;
-        int i;
-        const unsigned char *name;
-        i = minix_sb(dentry->d_inode->i_sb)->s_namelen;
-        if (i >= qstr->len)
-                return 0;
-        /* Truncate the name in place, avoids having to define a compare
-           function. */
-        qstr->len = i;
-        name = qstr->name;
-        hash = init_name_hash();
-        while (i--)
-                hash = partial_name_hash(*name++, hash);
-        qstr->hash = end_name_hash(hash);
-        return 0;
-}
-struct dentry_operations minix_dentry_operations = {
-        .d_hash         = minix_hash,
-};
 static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
 {
        struct inode * inode = NULL;
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 1f7f2956412a..e844b9809d27 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -14,12 +14,7 @@
 /* Characters that are undesirable in an MS-DOS file name */
 static unsigned char bad_chars[] = "*?<>|\"";
-static unsigned char bad_if_strict_pc[] = "+=,; ";
+static unsigned char bad_if_strict[] = "+=,; ";
-/* GEMDOS is less restrictive */
-static unsigned char bad_if_strict_atari[] = " ";
-#define bad_if_strict(opts) \
-        ((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc)
 /***** Formats an MS-DOS file name. Rejects invalid names. */
 static int msdos_format_name(const unsigned char *name, int len,
@@ -40,21 +35,20 @@ static int msdos_format_name(const unsigned char *name, int len,
                        /* Get rid of dot - test for it elsewhere */
                        name++;
                        len--;
-                } else if (!opts->atari)
+                } else
                        return -EINVAL;
        }
        /*
-         * disallow names that _really_ start with a dot for MS-DOS,
+         * disallow names that _really_ start with a dot
-         * GEMDOS does not care
         */
-        space = !opts->atari;
+        space = 1;
        c = 0;
        for (walk = res; len && walk - res < 8; walk++) {
                c = *name++;
                len--;
                if (opts->name_check != 'r' && strchr(bad_chars, c))
                        return -EINVAL;
-                if (opts->name_check == 's' && strchr(bad_if_strict(opts), c))
+                if (opts->name_check == 's' && strchr(bad_if_strict, c))
                        return -EINVAL;
                if (c >= 'A' && c <= 'Z' && opts->name_check == 's')
                        return -EINVAL;
@@ -94,7 +88,7 @@ static int msdos_format_name(const unsigned char *name, int len,
                        if (opts->name_check != 'r' && strchr(bad_chars, c))
                                return -EINVAL;
                        if (opts->name_check == 's' &&
-                            strchr(bad_if_strict(opts), c))
+                            strchr(bad_if_strict, c))
                                return -EINVAL;
                        if (c < ' ' || c == ':' || c == '\\')
                                return -EINVAL;
@@ -243,6 +237,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
                           int is_dir, int is_hid, int cluster,
                           struct timespec *ts, struct fat_slot_info *sinfo)
 {
+        struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
        struct msdos_dir_entry de;
        __le16 time, date;
        int err;
@@ -252,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
        if (is_hid)
                de.attr |= ATTR_HIDDEN;
        de.lcase = 0;
-        fat_date_unix2dos(ts->tv_sec, &time, &date);
+        fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
        de.cdate = de.adate = 0;
        de.ctime = 0;
        de.ctime_cs = 0;
diff --git a/fs/namei.c b/fs/namei.c
index 01e67dddcc3d..a7b0a0b80128 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -31,7 +31,6 @@
 #include <linux/file.h>
 #include <linux/fcntl.h>
 #include <linux/device_cgroup.h>
-#include <asm/namei.h>
 #include <asm/uaccess.h>
 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
@@ -185,6 +184,8 @@ int generic_permission(struct inode *inode, int mask,
 {
        umode_t                 mode = inode->i_mode;
+        mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
        if (current->fsuid == inode->i_uid)
                mode >>= 6;
        else {
@@ -203,7 +204,7 @@ int generic_permission(struct inode *inode, int mask,
        /*
         * If the DACs are ok we don't need any capability check.
         */
-        if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
+        if ((mask & ~mode) == 0)
                return 0;
 check_capabilities:
@@ -226,13 +227,9 @@ int generic_permission(struct inode *inode, int mask,
        return -EACCES;
 }
-int permission(struct inode *inode, int mask, struct nameidata *nd)
+int inode_permission(struct inode *inode, int mask)
 {
-        int retval, submask;
+        int retval;
-        struct vfsmount *mnt = NULL;
-        if (nd)
-                mnt = nd->path.mnt;
        if (mask & MAY_WRITE) {
                umode_t mode = inode->i_mode;
@@ -251,19 +248,9 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
                        return -EACCES;
        }
-        if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
-                /*
-                 * MAY_EXEC on regular files is denied if the fs is mounted
-                 * with the "noexec" flag.
-                 */
-                if (mnt && (mnt->mnt_flags & MNT_NOEXEC))
-                        return -EACCES;
-        }
        /* Ordinary permission routines do not understand MAY_APPEND. */
-        submask = mask & ~MAY_APPEND;
        if (inode->i_op && inode->i_op->permission) {
-                retval = inode->i_op->permission(inode, submask, nd);
+                retval = inode->i_op->permission(inode, mask);
                if (!retval) {
                        /*
                         * Exec permission on a regular file is denied if none
@@ -277,7 +264,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
                                return -EACCES;
                }
        } else {
-                retval = generic_permission(inode, submask, NULL);
+                retval = generic_permission(inode, mask, NULL);
        }
        if (retval)
                return retval;
@@ -286,7 +273,8 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
        if (retval)
                return retval;
-        return security_inode_permission(inode, mask, nd);
+        return security_inode_permission(inode,
+                        mask & (MAY_READ|MAY_WRITE|MAY_EXEC));
 }
 /**
@@ -301,7 +289,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 */
 int vfs_permission(struct nameidata *nd, int mask)
 {
-        return permission(nd->path.dentry->d_inode, mask, nd);
+        return inode_permission(nd->path.dentry->d_inode, mask);
 }
 /**
@@ -318,7 +306,7 @@ int vfs_permission(struct nameidata *nd, int mask)
 */
 int file_permission(struct file *file, int mask)
 {
-        return permission(file->f_path.dentry->d_inode, mask, NULL);
+        return inode_permission(file->f_path.dentry->d_inode, mask);
 }
 /*
@@ -459,8 +447,7 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
 * short-cut DAC fails, then call permission() to do more
 * complete permission check.
 */
-static int exec_permission_lite(struct inode *inode,
+static int exec_permission_lite(struct inode *inode)
-                                       struct nameidata *nd)
 {
        umode_t mode = inode->i_mode;
@@ -486,7 +473,7 @@ static int exec_permission_lite(struct inode *inode,
        return -EACCES;
 ok:
-        return security_inode_permission(inode, MAY_EXEC, nd);
+        return security_inode_permission(inode, MAY_EXEC);
 }
 /*
@@ -519,7 +506,14 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
         */
        result = d_lookup(parent, name);
        if (!result) {
-                struct dentry * dentry = d_alloc(parent, name);
+                struct dentry *dentry;
+                /* Don't create child dentry for a dead directory. */
+                result = ERR_PTR(-ENOENT);
+                if (IS_DEADDIR(dir))
+                        goto out_unlock;
+                dentry = d_alloc(parent, name);
                result = ERR_PTR(-ENOMEM);
                if (dentry) {
                        result = dir->i_op->lookup(dir, dentry, nd);
@@ -528,6 +522,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
                        else
                                result = dentry;
                }
+out_unlock:
                mutex_unlock(&dir->i_mutex);
                return result;
        }
@@ -545,27 +540,16 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
        return result;
 }
-static int __emul_lookup_dentry(const char *, struct nameidata *);
 /* SMP-safe */
-static __always_inline int
+static __always_inline void
 walk_init_root(const char *name, struct nameidata *nd)
 {
        struct fs_struct *fs = current->fs;
        read_lock(&fs->lock);
-        if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
-                nd->path = fs->altroot;
-                path_get(&fs->altroot);
-                read_unlock(&fs->lock);
-                if (__emul_lookup_dentry(name,nd))
-                        return 0;
-                read_lock(&fs->lock);
-        }
        nd->path = fs->root;
        path_get(&fs->root);
        read_unlock(&fs->lock);
-        return 1;
 }
 /*
@@ -606,12 +590,9 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
        if (*link == '/') {
                path_put(&nd->path);
-                if (!walk_init_root(link, nd))
+                walk_init_root(link, nd);
-                        /* weird __emul_prefix() stuff did it */
-                        goto out;
        }
        res = link_path_walk(link, nd);
-out:
        if (nd->depth || res || nd->last_type!=LAST_NORM)
                return res;
        /*
@@ -889,7 +870,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
                unsigned int c;
                nd->flags |= LOOKUP_CONTINUE;
-                err = exec_permission_lite(inode, nd);
+                err = exec_permission_lite(inode);
                if (err == -EAGAIN)
                        err = vfs_permission(nd, MAY_EXEC);
                if (err)
@@ -1060,67 +1041,6 @@ static int path_walk(const char *name, struct nameidata *nd)
        return link_path_walk(name, nd);
 }
-/* 
- * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if
- * everything is done. Returns 0 and drops input nd, if lookup failed;
- */
-static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
-{
-        if (path_walk(name, nd))
-                return 0;               /* something went wrong... */
-        if (!nd->path.dentry->d_inode ||
-            S_ISDIR(nd->path.dentry->d_inode->i_mode)) {
-                struct path old_path = nd->path;
-                struct qstr last = nd->last;
-                int last_type = nd->last_type;
-                struct fs_struct *fs = current->fs;
-                /*
-                 * NAME was not found in alternate root or it's a directory.
-                 * Try to find it in the normal root:
-                 */
-                nd->last_type = LAST_ROOT;
-                read_lock(&fs->lock);
-                nd->path = fs->root;
-                path_get(&fs->root);
-                read_unlock(&fs->lock);
-                if (path_walk(name, nd) == 0) {
-                        if (nd->path.dentry->d_inode) {
-                                path_put(&old_path);
-                                return 1;
-                        }
-                        path_put(&nd->path);
-                }
-                nd->path = old_path;
-                nd->last = last;
-                nd->last_type = last_type;
-        }
-        return 1;
-}
-void set_fs_altroot(void)
-{
-        char *emul = __emul_prefix();
-        struct nameidata nd;
-        struct path path = {}, old_path;
-        int err;
-        struct fs_struct *fs = current->fs;
-        if (!emul)
-                goto set_it;
-        err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd);
-        if (!err)
-                path = nd.path;
-set_it:
-        write_lock(&fs->lock);
-        old_path = fs->altroot;
-        fs->altroot = path;
-        write_unlock(&fs->lock);
-        if (old_path.dentry)
-                path_put(&old_path);
-}
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
 static int do_path_lookup(int dfd, const char *name,
                                unsigned int flags, struct nameidata *nd)
@@ -1136,14 +1056,6 @@ static int do_path_lookup(int dfd, const char *name,
        if (*name=='/') {
                read_lock(&fs->lock);
-                if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
-                        nd->path = fs->altroot;
-                        path_get(&fs->altroot);
-                        read_unlock(&fs->lock);
-                        if (__emul_lookup_dentry(name,nd))
-                                goto out; /* found in altroot */
-                        read_lock(&fs->lock);
-                }
                nd->path = fs->root;
                path_get(&fs->root);
                read_unlock(&fs->lock);
@@ -1177,7 +1089,6 @@ static int do_path_lookup(int dfd, const char *name,
        }
        retval = path_walk(name, nd);
-out:
        if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
                                nd->path.dentry->d_inode))
                audit_inode(name, nd->path.dentry);
@@ -1282,19 +1193,6 @@ static int path_lookup_create(int dfd, const char *name,
                        nd, open_flags, create_mode);
 }
-int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
-                struct nameidata *nd, int open_flags)
-{
-        char *tmp = getname(name);
-        int err = PTR_ERR(tmp);
-        if (!IS_ERR(tmp)) {
-                err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0);
-                putname(tmp);
-        }
-        return err;
-}
 static struct dentry *__lookup_hash(struct qstr *name,
                struct dentry *base, struct nameidata *nd)
 {
@@ -1317,7 +1215,14 @@ static struct dentry *__lookup_hash(struct qstr *name,
        dentry = cached_lookup(base, name, nd);
        if (!dentry) {
-                struct dentry *new = d_alloc(base, name);
+                struct dentry *new;
+                /* Don't create child dentry for a dead directory. */
+                dentry = ERR_PTR(-ENOENT);
+                if (IS_DEADDIR(inode))
+                        goto out;
+                new = d_alloc(base, name);
                dentry = ERR_PTR(-ENOMEM);
                if (!new)
                        goto out;
@@ -1340,7 +1245,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
 {
        int err;
-        err = permission(nd->path.dentry->d_inode, MAY_EXEC, nd);
+        err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC);
        if (err)
                return ERR_PTR(err);
        return __lookup_hash(&nd->last, nd->path.dentry, nd);
@@ -1388,7 +1293,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
        if (err)
                return ERR_PTR(err);
-        err = permission(base->d_inode, MAY_EXEC, NULL);
+        err = inode_permission(base->d_inode, MAY_EXEC);
        if (err)
                return ERR_PTR(err);
        return __lookup_hash(&this, base, NULL);
@@ -1416,22 +1321,40 @@ struct dentry *lookup_one_noperm(const char *name, struct dentry *base)
        return __lookup_hash(&this, base, NULL);
 }
-int __user_walk_fd(int dfd, const char __user *name, unsigned flags,
+int user_path_at(int dfd, const char __user *name, unsigned flags,
-                            struct nameidata *nd)
+                 struct path *path)
 {
+        struct nameidata nd;
        char *tmp = getname(name);
        int err = PTR_ERR(tmp);
        if (!IS_ERR(tmp)) {
-                err = do_path_lookup(dfd, tmp, flags, nd);
+                BUG_ON(flags & LOOKUP_PARENT);
+                err = do_path_lookup(dfd, tmp, flags, &nd);
                putname(tmp);
+                if (!err)
+                        *path = nd.path;
        }
        return err;
 }
-int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
+static int user_path_parent(int dfd, const char __user *path,
+                        struct nameidata *nd, char **name)
 {
-        return __user_walk_fd(AT_FDCWD, name, flags, nd);
+        char *s = getname(path);
+        int error;
+        if (IS_ERR(s))
+                return PTR_ERR(s);
+        error = do_path_lookup(dfd, s, LOOKUP_PARENT, nd);
+        if (error)
+                putname(s);
+        else
+                *name = s;
+        return error;
 }
 /*
@@ -1478,7 +1401,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
        BUG_ON(victim->d_parent->d_inode != dir);
        audit_inode_child(victim->d_name.name, victim, dir);
-        error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
+        error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
        if (error)
                return error;
        if (IS_APPEND(dir))
@@ -1515,7 +1438,7 @@ static inline int may_create(struct inode *dir, struct dentry *child,
                return -EEXIST;
        if (IS_DEADDIR(dir))
                return -ENOENT;
-        return permission(dir,MAY_WRITE | MAY_EXEC, nd);
+        return inode_permission(dir, MAY_WRITE | MAY_EXEC);
 }
 /* 
@@ -1755,7 +1678,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
        int will_write;
        int flag = open_to_namei_flags(open_flag);
-        acc_mode = ACC_MODE(flag);
+        acc_mode = MAY_OPEN | ACC_MODE(flag);
        /* O_TRUNC implies we need access checks for write permissions */
        if (flag & O_TRUNC)
@@ -2071,20 +1994,18 @@ static int may_mknod(mode_t mode)
 asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
                                unsigned dev)
 {
-        int error = 0;
+        int error;
-        char * tmp;
+        char *tmp;
-        struct dentry * dentry;
+        struct dentry *dentry;
        struct nameidata nd;
        if (S_ISDIR(mode))
                return -EPERM;
-        tmp = getname(filename);
-        if (IS_ERR(tmp))
-                return PTR_ERR(tmp);
-        error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
+        error = user_path_parent(dfd, filename, &nd, &tmp);
        if (error)
-                goto out;
+                return error;
        dentry = lookup_create(&nd, 0);
        if (IS_ERR(dentry)) {
                error = PTR_ERR(dentry);
@@ -2116,7 +2037,6 @@ out_dput:
 out_unlock:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
        path_put(&nd.path);
-out:
        putname(tmp);
        return error;
@@ -2156,14 +2076,10 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
        struct dentry *dentry;
        struct nameidata nd;
-        tmp = getname(pathname);
+        error = user_path_parent(dfd, pathname, &nd, &tmp);
-        error = PTR_ERR(tmp);
+        if (error)
-        if (IS_ERR(tmp))
                goto out_err;
-        error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
-        if (error)
-                goto out;
        dentry = lookup_create(&nd, 1);
        error = PTR_ERR(dentry);
        if (IS_ERR(dentry))
@@ -2181,7 +2097,6 @@ out_dput:
 out_unlock:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
        path_put(&nd.path);
-out:
        putname(tmp);
 out_err:
        return error;
@@ -2259,13 +2174,9 @@ static long do_rmdir(int dfd, const char __user *pathname)
        struct dentry *dentry;
        struct nameidata nd;
-        name = getname(pathname);
+        error = user_path_parent(dfd, pathname, &nd, &name);
-        if(IS_ERR(name))
-                return PTR_ERR(name);
-        error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
        if (error)
-                goto exit;
+                return error;
        switch(nd.last_type) {
                case LAST_DOTDOT:
@@ -2294,7 +2205,6 @@ exit2:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 exit1:
        path_put(&nd.path);
-exit:
        putname(name);
        return error;
 }
@@ -2343,19 +2253,16 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 */
 static long do_unlinkat(int dfd, const char __user *pathname)
 {
-        int error = 0;
+        int error;
-        char * name;
+        char *name;
        struct dentry *dentry;
        struct nameidata nd;
        struct inode *inode = NULL;
-        name = getname(pathname);
+        error = user_path_parent(dfd, pathname, &nd, &name);
-        if(IS_ERR(name))
-                return PTR_ERR(name);
-        error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
        if (error)
-                goto exit;
+                return error;
        error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
@@ -2382,7 +2289,6 @@ static long do_unlinkat(int dfd, const char __user *pathname)
                iput(inode);    /* truncate the inode here */
 exit1:
        path_put(&nd.path);
-exit:
        putname(name);
        return error;
@@ -2408,7 +2314,7 @@ asmlinkage long sys_unlink(const char __user *pathname)
        return do_unlinkat(AT_FDCWD, pathname);
 }
-int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
+int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
 {
        int error = may_create(dir, dentry, NULL);
@@ -2432,23 +2338,20 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
 asmlinkage long sys_symlinkat(const char __user *oldname,
                              int newdfd, const char __user *newname)
 {
-        int error = 0;
+        int error;
-        char * from;
+        char *from;
-        char * to;
+        char *to;
        struct dentry *dentry;
        struct nameidata nd;
        from = getname(oldname);
-        if(IS_ERR(from))
+        if (IS_ERR(from))
                return PTR_ERR(from);
-        to = getname(newname);
-        error = PTR_ERR(to);
-        if (IS_ERR(to))
-                goto out_putname;
-        error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
+        error = user_path_parent(newdfd, newname, &nd, &to);
        if (error)
-                goto out;
+                goto out_putname;
        dentry = lookup_create(&nd, 0);
        error = PTR_ERR(dentry);
        if (IS_ERR(dentry))
@@ -2457,14 +2360,13 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
        error = mnt_want_write(nd.path.mnt);
        if (error)
                goto out_dput;
-        error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO);
+        error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
        mnt_drop_write(nd.path.mnt);
 out_dput:
        dput(dentry);
 out_unlock:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
        path_put(&nd.path);
-out:
        putname(to);
 out_putname:
        putname(from);
@@ -2498,19 +2400,19 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
                return -EPERM;
        if (!dir->i_op || !dir->i_op->link)
                return -EPERM;
-        if (S_ISDIR(old_dentry->d_inode->i_mode))
+        if (S_ISDIR(inode->i_mode))
                return -EPERM;
        error = security_inode_link(old_dentry, dir, new_dentry);
        if (error)
                return error;
-        mutex_lock(&old_dentry->d_inode->i_mutex);
+        mutex_lock(&inode->i_mutex);
        DQUOT_INIT(dir);
        error = dir->i_op->link(old_dentry, dir, new_dentry);
-        mutex_unlock(&old_dentry->d_inode->i_mutex);
+        mutex_unlock(&inode->i_mutex);
        if (!error)
-                fsnotify_link(dir, old_dentry->d_inode, new_dentry);
+                fsnotify_link(dir, inode, new_dentry);
        return error;
 }
@@ -2528,27 +2430,25 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
                           int flags)
 {
        struct dentry *new_dentry;
-        struct nameidata nd, old_nd;
+        struct nameidata nd;
+        struct path old_path;
        int error;
-        char * to;
+        char *to;
        if ((flags & ~AT_SYMLINK_FOLLOW) != 0)
                return -EINVAL;
-        to = getname(newname);
+        error = user_path_at(olddfd, oldname,
-        if (IS_ERR(to))
+                             flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
-                return PTR_ERR(to);
+                             &old_path);
-        error = __user_walk_fd(olddfd, oldname,
-                               flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
-                               &old_nd);
        if (error)
-                goto exit;
+                return error;
-        error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
+        error = user_path_parent(newdfd, newname, &nd, &to);
        if (error)
                goto out;
        error = -EXDEV;
-        if (old_nd.path.mnt != nd.path.mnt)
+        if (old_path.mnt != nd.path.mnt)
                goto out_release;
        new_dentry = lookup_create(&nd, 0);
        error = PTR_ERR(new_dentry);
@@ -2557,7 +2457,7 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
        error = mnt_want_write(nd.path.mnt);
        if (error)
                goto out_dput;
-        error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry);
+        error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
        mnt_drop_write(nd.path.mnt);
 out_dput:
        dput(new_dentry);
@@ -2565,10 +2465,9 @@ out_unlock:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 out_release:
        path_put(&nd.path);
-out:
-        path_put(&old_nd.path);
-exit:
        putname(to);
+out:
+        path_put(&old_path);
        return error;
 }
@@ -2621,7 +2520,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
         * we'll need to flip '..'.
         */
        if (new_dir != old_dir) {
-                error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
+                error = inode_permission(old_dentry->d_inode, MAY_WRITE);
                if (error)
                        return error;
        }
@@ -2724,20 +2623,22 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        return error;
 }
-static int do_rename(int olddfd, const char *oldname,
+asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
-                        int newdfd, const char *newname)
+                             int newdfd, const char __user *newname)
 {
-        int error = 0;
+        struct dentry *old_dir, *new_dir;
-        struct dentry * old_dir, * new_dir;
+        struct dentry *old_dentry, *new_dentry;
-        struct dentry * old_dentry, *new_dentry;
+        struct dentry *trap;
-        struct dentry * trap;
        struct nameidata oldnd, newnd;
+        char *from;
+        char *to;
+        int error;
-        error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd);
+        error = user_path_parent(olddfd, oldname, &oldnd, &from);
        if (error)
                goto exit;
-        error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd);
+        error = user_path_parent(newdfd, newname, &newnd, &to);
        if (error)
                goto exit1;
@@ -2799,29 +2700,11 @@ exit3:
        unlock_rename(new_dir, old_dir);
 exit2:
        path_put(&newnd.path);
+        putname(to);
 exit1:
        path_put(&oldnd.path);
-exit:
-        return error;
-}
-asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
-                             int newdfd, const char __user *newname)
-{
-        int error;
-        char * from;
-        char * to;
-        from = getname(oldname);
-        if(IS_ERR(from))
-                return PTR_ERR(from);
-        to = getname(newname);
-        error = PTR_ERR(to);
-        if (!IS_ERR(to)) {
-                error = do_rename(olddfd, from, newdfd, to);
-                putname(to);
-        }
        putname(from);
+exit:
        return error;
 }
@@ -2959,8 +2842,7 @@ const struct inode_operations page_symlink_inode_operations = {
        .put_link       = page_put_link,
 };
-EXPORT_SYMBOL(__user_walk);
+EXPORT_SYMBOL(user_path_at);
-EXPORT_SYMBOL(__user_walk_fd);
 EXPORT_SYMBOL(follow_down);
 EXPORT_SYMBOL(follow_up);
 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
@@ -2975,7 +2857,7 @@ EXPORT_SYMBOL(page_symlink);
 EXPORT_SYMBOL(page_symlink_inode_operations);
 EXPORT_SYMBOL(path_lookup);
 EXPORT_SYMBOL(vfs_path_lookup);
-EXPORT_SYMBOL(permission);
+EXPORT_SYMBOL(inode_permission);
 EXPORT_SYMBOL(vfs_permission);
 EXPORT_SYMBOL(file_permission);
 EXPORT_SYMBOL(unlock_rename);
diff --git a/fs/namespace.c b/fs/namespace.c
index 4f6f7635b59c..411728c0c8bb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -112,9 +112,13 @@ struct vfsmount *alloc_vfsmnt(const char *name)
                int err;
                err = mnt_alloc_id(mnt);
-                if (err) {
+                if (err)
-                        kmem_cache_free(mnt_cache, mnt);
+                        goto out_free_cache;
-                        return NULL;
+                if (name) {
+                        mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
+                        if (!mnt->mnt_devname)
+                                goto out_free_id;
                }
                atomic_set(&mnt->mnt_count, 1);
@@ -127,16 +131,14 @@ struct vfsmount *alloc_vfsmnt(const char *name)
                INIT_LIST_HEAD(&mnt->mnt_slave_list);
                INIT_LIST_HEAD(&mnt->mnt_slave);
                atomic_set(&mnt->__mnt_writers, 0);
-                if (name) {
-                        int size = strlen(name) + 1;
-                        char *newname = kmalloc(size, GFP_KERNEL);
-                        if (newname) {
-                                memcpy(newname, name, size);
-                                mnt->mnt_devname = newname;
-                        }
-                }
        }
        return mnt;
+out_free_id:
+        mnt_free_id(mnt);
+out_free_cache:
+        kmem_cache_free(mnt_cache, mnt);
+        return NULL;
 }
 /*
@@ -309,10 +311,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt)
         */
        if ((atomic_read(&mnt->__mnt_writers) < 0) &&
            !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
-                printk(KERN_DEBUG "leak detected on mount(%p) writers "
+                WARN(1, KERN_DEBUG "leak detected on mount(%p) writers "
                                "count: %d\n",
                        mnt, atomic_read(&mnt->__mnt_writers));
-                WARN_ON(1);
                /* use the flag to keep the dmesg spam down */
                mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
        }
@@ -1129,27 +1130,27 @@ static int do_umount(struct vfsmount *mnt, int flags)
 asmlinkage long sys_umount(char __user * name, int flags)
 {
-        struct nameidata nd;
+        struct path path;
        int retval;
-        retval = __user_walk(name, LOOKUP_FOLLOW, &nd);
+        retval = user_path(name, &path);
        if (retval)
                goto out;
        retval = -EINVAL;
-        if (nd.path.dentry != nd.path.mnt->mnt_root)
+        if (path.dentry != path.mnt->mnt_root)
                goto dput_and_out;
-        if (!check_mnt(nd.path.mnt))
+        if (!check_mnt(path.mnt))
                goto dput_and_out;
        retval = -EPERM;
        if (!capable(CAP_SYS_ADMIN))
                goto dput_and_out;
-        retval = do_umount(nd.path.mnt, flags);
+        retval = do_umount(path.mnt, flags);
 dput_and_out:
        /* we mustn't call path_put() as that would clear mnt_expiry_mark */
-        dput(nd.path.dentry);
+        dput(path.dentry);
-        mntput_no_expire(nd.path.mnt);
+        mntput_no_expire(path.mnt);
 out:
        return retval;
 }
@@ -1973,7 +1974,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
                struct fs_struct *fs)
 {
        struct mnt_namespace *new_ns;
-        struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
+        struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
        struct vfsmount *p, *q;
        new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
@@ -2016,10 +2017,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
                                pwdmnt = p;
                                fs->pwd.mnt = mntget(q);
                        }
-                        if (p == fs->altroot.mnt) {
-                                altrootmnt = p;
-                                fs->altroot.mnt = mntget(q);
-                        }
                }
                p = next_mnt(p, mnt_ns->root);
                q = next_mnt(q, new_ns->root);
@@ -2030,8 +2027,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
                mntput(rootmnt);
        if (pwdmnt)
                mntput(pwdmnt);
-        if (altrootmnt)
-                mntput(altrootmnt);
        return new_ns;
 }
@@ -2184,28 +2179,26 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
                               const char __user * put_old)
 {
        struct vfsmount *tmp;
-        struct nameidata new_nd, old_nd;
+        struct path new, old, parent_path, root_parent, root;
-        struct path parent_path, root_parent, root;
        int error;
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
+        error = user_path_dir(new_root, &new);
-                            &new_nd);
        if (error)
                goto out0;
        error = -EINVAL;
-        if (!check_mnt(new_nd.path.mnt))
+        if (!check_mnt(new.mnt))
                goto out1;
-        error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd);
+        error = user_path_dir(put_old, &old);
        if (error)
                goto out1;
-        error = security_sb_pivotroot(&old_nd.path, &new_nd.path);
+        error = security_sb_pivotroot(&old, &new);
        if (error) {
-                path_put(&old_nd.path);
+                path_put(&old);
                goto out1;
        }
@@ -2214,69 +2207,69 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
        path_get(&current->fs->root);
        read_unlock(&current->fs->lock);
        down_write(&namespace_sem);
-        mutex_lock(&old_nd.path.dentry->d_inode->i_mutex);
+        mutex_lock(&old.dentry->d_inode->i_mutex);
        error = -EINVAL;
-        if (IS_MNT_SHARED(old_nd.path.mnt) ||
+        if (IS_MNT_SHARED(old.mnt) ||
-                IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) ||
+                IS_MNT_SHARED(new.mnt->mnt_parent) ||
                IS_MNT_SHARED(root.mnt->mnt_parent))
                goto out2;
        if (!check_mnt(root.mnt))
                goto out2;
        error = -ENOENT;
-        if (IS_DEADDIR(new_nd.path.dentry->d_inode))
+        if (IS_DEADDIR(new.dentry->d_inode))
                goto out2;
-        if (d_unhashed(new_nd.path.dentry) && !IS_ROOT(new_nd.path.dentry))
+        if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
                goto out2;
-        if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry))
+        if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
                goto out2;
        error = -EBUSY;
-        if (new_nd.path.mnt == root.mnt ||
+        if (new.mnt == root.mnt ||
-            old_nd.path.mnt == root.mnt)
+            old.mnt == root.mnt)
                goto out2; /* loop, on the same file system  */
        error = -EINVAL;
        if (root.mnt->mnt_root != root.dentry)
                goto out2; /* not a mountpoint */
        if (root.mnt->mnt_parent == root.mnt)
                goto out2; /* not attached */
-        if (new_nd.path.mnt->mnt_root != new_nd.path.dentry)
+        if (new.mnt->mnt_root != new.dentry)
                goto out2; /* not a mountpoint */
-        if (new_nd.path.mnt->mnt_parent == new_nd.path.mnt)
+        if (new.mnt->mnt_parent == new.mnt)
                goto out2; /* not attached */
        /* make sure we can reach put_old from new_root */
-        tmp = old_nd.path.mnt;
+        tmp = old.mnt;
        spin_lock(&vfsmount_lock);
-        if (tmp != new_nd.path.mnt) {
+        if (tmp != new.mnt) {
                for (;;) {
                        if (tmp->mnt_parent == tmp)
                                goto out3; /* already mounted on put_old */
-                        if (tmp->mnt_parent == new_nd.path.mnt)
+                        if (tmp->mnt_parent == new.mnt)
                                break;
                        tmp = tmp->mnt_parent;
                }
-                if (!is_subdir(tmp->mnt_mountpoint, new_nd.path.dentry))
+                if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
                        goto out3;
-        } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry))
+        } else if (!is_subdir(old.dentry, new.dentry))
                goto out3;
-        detach_mnt(new_nd.path.mnt, &parent_path);
+        detach_mnt(new.mnt, &parent_path);
        detach_mnt(root.mnt, &root_parent);
        /* mount old root on put_old */
-        attach_mnt(root.mnt, &old_nd.path);
+        attach_mnt(root.mnt, &old);
        /* mount new_root on / */
-        attach_mnt(new_nd.path.mnt, &root_parent);
+        attach_mnt(new.mnt, &root_parent);
        touch_mnt_namespace(current->nsproxy->mnt_ns);
        spin_unlock(&vfsmount_lock);
-        chroot_fs_refs(&root, &new_nd.path);
+        chroot_fs_refs(&root, &new);
-        security_sb_post_pivotroot(&root, &new_nd.path);
+        security_sb_post_pivotroot(&root, &new);
        error = 0;
        path_put(&root_parent);
        path_put(&parent_path);
 out2:
-        mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex);
+        mutex_unlock(&old.dentry->d_inode->i_mutex);
        up_write(&namespace_sem);
        path_put(&root);
-        path_put(&old_nd.path);
+        path_put(&old);
 out1:
-        path_put(&new_nd.path);
+        path_put(&new);
 out0:
        return error;
 out3:
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 011ef0b6d2d4..07e9715b8658 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -266,7 +266,7 @@ leave_me:;
 static int
-__ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd)
+__ncp_lookup_validate(struct dentry *dentry)
 {
        struct ncp_server *server;
        struct dentry *parent;
@@ -340,7 +340,7 @@ ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd)
 {
        int res;
        lock_kernel();
-        res = __ncp_lookup_validate(dentry, nd);
+        res = __ncp_lookup_validate(dentry);
        unlock_kernel();
        return res;
 }
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 2e5ab1204dec..d642f0e5b365 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -64,7 +64,7 @@ static void ncp_destroy_inode(struct inode *inode)
        kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 28a238dab23a..74f92b717f78 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1884,7 +1884,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
                return status;
        nfs_access_add_cache(inode, &cache);
 out:
-        if ((cache.mask & mask) == mask)
+        if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
                return 0;
        return -EACCES;
 }
@@ -1907,17 +1907,17 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
        return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
 }
-int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int nfs_permission(struct inode *inode, int mask)
 {
        struct rpc_cred *cred;
        int res = 0;
        nfs_inc_stats(inode, NFSIOS_VFSACCESS);
-        if (mask == 0)
+        if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
                goto out;
        /* Is this sys_access() ? */
-        if (nd != NULL && (nd->flags & LOOKUP_ACCESS))
+        if (mask & MAY_ACCESS)
                goto force_lookup;
        switch (inode->i_mode & S_IFMT) {
@@ -1926,8 +1926,7 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
                case S_IFREG:
                        /* NFSv4 has atomic_open... */
                        if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
-                                        && nd != NULL
+                                        && (mask & MAY_OPEN))
-                                        && (nd->flags & LOOKUP_OPEN))
                                goto out;
                        break;
                case S_IFDIR:
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index df23f987da6b..52daefa2f521 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1242,7 +1242,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
 #endif
 }
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
        struct nfs_inode *nfsi = (struct nfs_inode *) foo;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 46763d1cd397..8478fc25daee 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -127,7 +127,7 @@ enum {
        Opt_err
 };
-static match_table_t __initdata tokens = {
+static match_table_t __initconst tokens = {
        {Opt_port, "port=%u"},
        {Opt_rsize, "rsize=%u"},
        {Opt_wsize, "wsize=%u"},
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 9e4a568a5013..15c6faeec77c 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -19,6 +19,13 @@
 #define NFSDDBG_FACILITY                NFSDDBG_LOCKD
+#ifdef CONFIG_LOCKD_V4
+#define nlm_stale_fh    nlm4_stale_fh
+#define nlm_failed      nlm4_failed
+#else
+#define nlm_stale_fh    nlm_lck_denied_nolocks
+#define nlm_failed      nlm_lck_denied_nolocks
+#endif
 /*
 * Note: we hold the dentry use count while the file is open.
 */
@@ -35,7 +42,7 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
        fh.fh_export = NULL;
        exp_readlock();
-        nfserr = nfsd_open(rqstp, &fh, S_IFREG, MAY_LOCK, filp);
+        nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
        fh_put(&fh);
        rqstp->rq_client = NULL;
        exp_readunlock();
@@ -47,12 +54,10 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
                return 0;
        case nfserr_dropit:
                return nlm_drop_reply;
-#ifdef CONFIG_LOCKD_V4
        case nfserr_stale:
-                return nlm4_stale_fh;
+                return nlm_stale_fh;
-#endif
        default:
-                return nlm_lck_denied;
+                return nlm_failed;
        }
 }
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 1c3b7654e966..4e3219e84116 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -40,7 +40,8 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
        dprintk("nfsd: GETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh));
        fh = fh_copy(&resp->fh, &argp->fh);
-        if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
+        nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+        if (nfserr)
                RETURN_STATUS(nfserr);
        if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
@@ -107,7 +108,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
        dprintk("nfsd: SETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh));
        fh = fh_copy(&resp->fh, &argp->fh);
-        nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR);
+        nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
        if (!nfserr) {
                nfserr = nfserrno( nfsd_set_posix_acl(
@@ -134,7 +135,7 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
        dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
        fh_copy(&resp->fh, &argp->fh);
-        return fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+        return fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
 }
 /*
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index b647f2f872dc..9981dbb377a3 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -36,7 +36,8 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
        __be32 nfserr = 0;
        fh = fh_copy(&resp->fh, &argp->fh);
-        if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
+        nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+        if (nfserr)
                RETURN_STATUS(nfserr);
        if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
@@ -101,7 +102,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
        __be32 nfserr = 0;
        fh = fh_copy(&resp->fh, &argp->fh);
-        nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR);
+        nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
        if (!nfserr) {
                nfserr = nfserrno( nfsd_set_posix_acl(
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index c721a1e6e9dd..4d617ea28cfc 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -63,7 +63,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
                SVCFH_fmt(&argp->fh));
        fh_copy(&resp->fh, &argp->fh);
-        nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+        nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
        if (nfserr)
                RETURN_STATUS(nfserr);
@@ -242,7 +242,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
        attr   = &argp->attrs;
        /* Get the directory inode */
-        nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_CREATE);
+        nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE);
        if (nfserr)
                RETURN_STATUS(nfserr);
@@ -558,7 +558,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
        resp->f_maxfilesize = ~(u32) 0;
        resp->f_properties = NFS3_FSF_DEFAULT;
-        nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP);
+        nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
        /* Check special features of the file system. May request
         * different read/write sizes for file systems known to have
@@ -597,7 +597,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
        resp->p_case_insensitive = 0;
        resp->p_case_preserving = 1;
-        nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP);
+        nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
        if (nfserr == 0) {
                struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c309c881bd4e..eef1629806f5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -71,11 +71,11 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
                return nfserr_inval;
        if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
-                accmode |= MAY_READ;
+                accmode |= NFSD_MAY_READ;
        if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
-                accmode |= (MAY_WRITE | MAY_TRUNC);
+                accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC);
        if (open->op_share_deny & NFS4_SHARE_DENY_WRITE)
-                accmode |= MAY_WRITE;
+                accmode |= NFSD_MAY_WRITE;
        status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
@@ -126,7 +126,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
                        &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size);
        if (!created)
-                status = do_open_permission(rqstp, current_fh, open, MAY_NOP);
+                status = do_open_permission(rqstp, current_fh, open,
+                                            NFSD_MAY_NOP);
 out:
        fh_put(&resfh);
@@ -157,7 +158,8 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
        open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
                (open->op_iattr.ia_size == 0);
-        status = do_open_permission(rqstp, current_fh, open, MAY_OWNER_OVERRIDE);
+        status = do_open_permission(rqstp, current_fh, open,
+                                    NFSD_MAY_OWNER_OVERRIDE);
        return status;
 }
@@ -186,7 +188,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len;
                memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh,
                                rp->rp_openfh_len);
-                status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
+                status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
                if (status)
                        dprintk("nfsd4_open: replay failed"
                                " restoring previous filehandle\n");
@@ -285,7 +287,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
        memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
               putfh->pf_fhlen);
-        return fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
+        return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
 }
 static __be32
@@ -363,7 +365,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        fh_init(&resfh, NFS4_FHSIZE);
-        status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, MAY_CREATE);
+        status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR,
+                           NFSD_MAY_CREATE);
        if (status == nfserr_symlink)
                status = nfserr_notdir;
        if (status)
@@ -445,7 +448,7 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
        __be32 status;
-        status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
+        status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
        if (status)
                return status;
@@ -730,7 +733,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        int count;
        __be32 status;
-        status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
+        status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
        if (status)
                return status;
@@ -843,10 +846,13 @@ struct nfsd4_operation {
 #define ALLOWED_WITHOUT_FH 1
 /* GETATTR and ops not listed as returning NFS4ERR_MOVED: */
 #define ALLOWED_ON_ABSENT_FS 2
+        char *op_name;
 };
 static struct nfsd4_operation nfsd4_ops[];
+static inline char *nfsd4_op_name(unsigned opnum);
 /*
 * COMPOUND call.
 */
@@ -888,7 +894,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
        while (!status && resp->opcnt < args->opcnt) {
                op = &args->ops[resp->opcnt++];
-                dprintk("nfsv4 compound op #%d: %d\n", resp->opcnt, op->opnum);
+                dprintk("nfsv4 compound op #%d/%d: %d (%s)\n",
+                        resp->opcnt, args->opcnt, op->opnum,
+                        nfsd4_op_name(op->opnum));
                /*
                 * The XDR decode routines may have pre-set op->status;
@@ -952,126 +960,170 @@ encode_op:
 out:
        nfsd4_release_compoundargs(args);
        cstate_free(cstate);
+        dprintk("nfsv4 compound returned %d\n", ntohl(status));
        return status;
 }
 static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
        [OP_ACCESS] = {
                .op_func = (nfsd4op_func)nfsd4_access,
+                .op_name = "OP_ACCESS",
        },
        [OP_CLOSE] = {
                .op_func = (nfsd4op_func)nfsd4_close,
+                .op_name = "OP_CLOSE",
        },
        [OP_COMMIT] = {
                .op_func = (nfsd4op_func)nfsd4_commit,
+                .op_name = "OP_COMMIT",
        },
        [OP_CREATE] = {
                .op_func = (nfsd4op_func)nfsd4_create,
+                .op_name = "OP_CREATE",
        },
        [OP_DELEGRETURN] = {
                .op_func = (nfsd4op_func)nfsd4_delegreturn,
+                .op_name = "OP_DELEGRETURN",
        },
        [OP_GETATTR] = {
                .op_func = (nfsd4op_func)nfsd4_getattr,
                .op_flags = ALLOWED_ON_ABSENT_FS,
+                .op_name = "OP_GETATTR",
        },
        [OP_GETFH] = {
                .op_func = (nfsd4op_func)nfsd4_getfh,
+                .op_name = "OP_GETFH",
        },
        [OP_LINK] = {
                .op_func = (nfsd4op_func)nfsd4_link,
+                .op_name = "OP_LINK",
        },
        [OP_LOCK] = {
                .op_func = (nfsd4op_func)nfsd4_lock,
+                .op_name = "OP_LOCK",
        },
        [OP_LOCKT] = {
                .op_func = (nfsd4op_func)nfsd4_lockt,
+                .op_name = "OP_LOCKT",
        },
        [OP_LOCKU] = {
                .op_func = (nfsd4op_func)nfsd4_locku,
+                .op_name = "OP_LOCKU",
        },
        [OP_LOOKUP] = {
                .op_func = (nfsd4op_func)nfsd4_lookup,
+                .op_name = "OP_LOOKUP",
        },
        [OP_LOOKUPP] = {
                .op_func = (nfsd4op_func)nfsd4_lookupp,
+                .op_name = "OP_LOOKUPP",
        },
        [OP_NVERIFY] = {
                .op_func = (nfsd4op_func)nfsd4_nverify,
+                .op_name = "OP_NVERIFY",
        },
        [OP_OPEN] = {
                .op_func = (nfsd4op_func)nfsd4_open,
+                .op_name = "OP_OPEN",
        },
        [OP_OPEN_CONFIRM] = {
                .op_func = (nfsd4op_func)nfsd4_open_confirm,
+                .op_name = "OP_OPEN_CONFIRM",
        },
        [OP_OPEN_DOWNGRADE] = {
                .op_func = (nfsd4op_func)nfsd4_open_downgrade,
+                .op_name = "OP_OPEN_DOWNGRADE",
        },
        [OP_PUTFH] = {
                .op_func = (nfsd4op_func)nfsd4_putfh,
                .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+                .op_name = "OP_PUTFH",
        },
        [OP_PUTPUBFH] = {
-                /* unsupported; just for future reference: */
+                /* unsupported, just for future reference: */
                .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+                .op_name = "OP_PUTPUBFH",
        },
        [OP_PUTROOTFH] = {
                .op_func = (nfsd4op_func)nfsd4_putrootfh,
                .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+                .op_name = "OP_PUTROOTFH",
        },
        [OP_READ] = {
                .op_func = (nfsd4op_func)nfsd4_read,
+                .op_name = "OP_READ",
        },
        [OP_READDIR] = {
                .op_func = (nfsd4op_func)nfsd4_readdir,
+                .op_name = "OP_READDIR",
        },
        [OP_READLINK] = {
                .op_func = (nfsd4op_func)nfsd4_readlink,
+                .op_name = "OP_READLINK",
        },
        [OP_REMOVE] = {
                .op_func = (nfsd4op_func)nfsd4_remove,
+                .op_name = "OP_REMOVE",
        },
        [OP_RENAME] = {
+                .op_name = "OP_RENAME",
                .op_func = (nfsd4op_func)nfsd4_rename,
        },
        [OP_RENEW] = {
                .op_func = (nfsd4op_func)nfsd4_renew,
                .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+                .op_name = "OP_RENEW",
        },
        [OP_RESTOREFH] = {
                .op_func = (nfsd4op_func)nfsd4_restorefh,
                .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+                .op_name = "OP_RESTOREFH",
        },
        [OP_SAVEFH] = {
                .op_func = (nfsd4op_func)nfsd4_savefh,
+                .op_name = "OP_SAVEFH",
        },
        [OP_SECINFO] = {
                .op_func = (nfsd4op_func)nfsd4_secinfo,
+                .op_name = "OP_SECINFO",
        },
        [OP_SETATTR] = {
                .op_func = (nfsd4op_func)nfsd4_setattr,
+                .op_name = "OP_SETATTR",
        },
        [OP_SETCLIENTID] = {
                .op_func = (nfsd4op_func)nfsd4_setclientid,
                .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+                .op_name = "OP_SETCLIENTID",
        },
        [OP_SETCLIENTID_CONFIRM] = {
                .op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
                .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+                .op_name = "OP_SETCLIENTID_CONFIRM",
        },
        [OP_VERIFY] = {
                .op_func = (nfsd4op_func)nfsd4_verify,
+                .op_name = "OP_VERIFY",
        },
        [OP_WRITE] = {
                .op_func = (nfsd4op_func)nfsd4_write,
+                .op_name = "OP_WRITE",
        },
        [OP_RELEASE_LOCKOWNER] = {
                .op_func = (nfsd4op_func)nfsd4_release_lockowner,
                .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+                .op_name = "OP_RELEASE_LOCKOWNER",
        },
 };
+static inline char *
+nfsd4_op_name(unsigned opnum)
+{
+        if (opnum < ARRAY_SIZE(nfsd4_ops))
+                return nfsd4_ops[opnum].op_name;
+        return "unknown_operation";
+}
 #define nfs4svc_decode_voidargs         NULL
 #define nfs4svc_release_void            NULL
 #define nfsd4_voidres                   nfsd4_voidargs
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8799b8708188..1578d7a2667e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1173,6 +1173,24 @@ static inline int deny_valid(u32 x)
        return x <= NFS4_SHARE_DENY_BOTH;
 }
+/*
+ * We store the NONE, READ, WRITE, and BOTH bits separately in the
+ * st_{access,deny}_bmap field of the stateid, in order to track not
+ * only what share bits are currently in force, but also what
+ * combinations of share bits previous opens have used.  This allows us
+ * to enforce the recommendation of rfc 3530 14.2.19 that the server
+ * return an error if the client attempt to downgrade to a combination
+ * of share bits not explicable by closing some of its previous opens.
+ *
+ * XXX: This enforcement is actually incomplete, since we don't keep
+ * track of access/deny bit combinations; so, e.g., we allow:
+ *
+ *      OPEN allow read, deny write
+ *      OPEN allow both, deny none
+ *      DOWNGRADE allow read, deny none
+ *
+ * which we should reject.
+ */
 static void
 set_access(unsigned int *access, unsigned long bmap) {
        int i;
@@ -1570,6 +1588,10 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
                int err = get_write_access(inode);
                if (err)
                        return nfserrno(err);
+                err = mnt_want_write(cur_fh->fh_export->ex_path.mnt);
+                if (err)
+                        return nfserrno(err);
+                file_take_write(filp);
        }
        status = nfsd4_truncate(rqstp, cur_fh, open);
        if (status) {
@@ -1579,8 +1601,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
        }
        /* remember the open */
        filp->f_mode |= open->op_share_access;
-        set_bit(open->op_share_access, &stp->st_access_bmap);
+        __set_bit(open->op_share_access, &stp->st_access_bmap);
-        set_bit(open->op_share_deny, &stp->st_deny_bmap);
+        __set_bit(open->op_share_deny, &stp->st_deny_bmap);
        return nfs_ok;
 }
@@ -1722,9 +1744,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
                /* Stateid was not found, this is a new OPEN */
                int flags = 0;
                if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
-                        flags |= MAY_READ;
+                        flags |= NFSD_MAY_READ;
                if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
-                        flags |= MAY_WRITE;
+                        flags |= NFSD_MAY_WRITE;
                status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
                if (status)
                        goto out;
@@ -2610,7 +2632,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                 return nfserr_inval;
        if ((status = fh_verify(rqstp, &cstate->current_fh,
-                                S_IFREG, MAY_LOCK))) {
+                                S_IFREG, NFSD_MAY_LOCK))) {
                dprintk("NFSD: nfsd4_lock: permission denied!\n");
                return status;
        }
@@ -3249,12 +3271,14 @@ nfs4_state_shutdown(void)
        nfs4_unlock_state();
 }
+/*
+ * user_recovery_dirname is protected by the nfsd_mutex since it's only
+ * accessed when nfsd is starting.
+ */
 static void
 nfs4_set_recdir(char *recdir)
 {
-        nfs4_lock_state();
        strcpy(user_recovery_dirname, recdir);
-        nfs4_unlock_state();
 }
 /*
@@ -3278,6 +3302,12 @@ nfs4_reset_recoverydir(char *recdir)
        return status;
 }
+char *
+nfs4_recoverydir(void)
+{
+        return user_recovery_dirname;
+}
 /*
 * Called when leasetime is changed.
 *
@@ -3286,11 +3316,12 @@ nfs4_reset_recoverydir(char *recdir)
 * we start to register any changes in lease time.  If the administrator
 * really wants to change the lease time *now*, they can go ahead and bring
 * nfsd down and then back up again after changing the lease time.
+ *
+ * user_lease_time is protected by nfsd_mutex since it's only really accessed
+ * when nfsd is starting
 */
 void
 nfs4_reset_lease(time_t leasetime)
 {
-        lock_kernel();
        user_lease_time = leasetime;
-        unlock_kernel();
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c513bbdf2d36..14ba4d9b2859 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -986,10 +986,74 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
 }
 static __be32
+nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
+{
+        return nfs_ok;
+}
+static __be32
+nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p)
+{
+        return nfserr_opnotsupp;
+}
+typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *);
+static nfsd4_dec nfsd4_dec_ops[] = {
+        [OP_ACCESS]             = (nfsd4_dec)nfsd4_decode_access,
+        [OP_CLOSE]              = (nfsd4_dec)nfsd4_decode_close,
+        [OP_COMMIT]             = (nfsd4_dec)nfsd4_decode_commit,
+        [OP_CREATE]             = (nfsd4_dec)nfsd4_decode_create,
+        [OP_DELEGPURGE]         = (nfsd4_dec)nfsd4_decode_notsupp,
+        [OP_DELEGRETURN]        = (nfsd4_dec)nfsd4_decode_delegreturn,
+        [OP_GETATTR]            = (nfsd4_dec)nfsd4_decode_getattr,
+        [OP_GETFH]              = (nfsd4_dec)nfsd4_decode_noop,
+        [OP_LINK]               = (nfsd4_dec)nfsd4_decode_link,
+        [OP_LOCK]               = (nfsd4_dec)nfsd4_decode_lock,
+        [OP_LOCKT]              = (nfsd4_dec)nfsd4_decode_lockt,
+        [OP_LOCKU]              = (nfsd4_dec)nfsd4_decode_locku,
+        [OP_LOOKUP]             = (nfsd4_dec)nfsd4_decode_lookup,
+        [OP_LOOKUPP]            = (nfsd4_dec)nfsd4_decode_noop,
+        [OP_NVERIFY]            = (nfsd4_dec)nfsd4_decode_verify,
+        [OP_OPEN]               = (nfsd4_dec)nfsd4_decode_open,
+        [OP_OPENATTR]           = (nfsd4_dec)nfsd4_decode_notsupp,
+        [OP_OPEN_CONFIRM]       = (nfsd4_dec)nfsd4_decode_open_confirm,
+        [OP_OPEN_DOWNGRADE]     = (nfsd4_dec)nfsd4_decode_open_downgrade,
+        [OP_PUTFH]              = (nfsd4_dec)nfsd4_decode_putfh,
+        [OP_PUTPUBFH]           = (nfsd4_dec)nfsd4_decode_notsupp,
+        [OP_PUTROOTFH]          = (nfsd4_dec)nfsd4_decode_noop,
+        [OP_READ]               = (nfsd4_dec)nfsd4_decode_read,
+        [OP_READDIR]            = (nfsd4_dec)nfsd4_decode_readdir,
+        [OP_READLINK]           = (nfsd4_dec)nfsd4_decode_noop,
+        [OP_REMOVE]             = (nfsd4_dec)nfsd4_decode_remove,
+        [OP_RENAME]             = (nfsd4_dec)nfsd4_decode_rename,
+        [OP_RENEW]              = (nfsd4_dec)nfsd4_decode_renew,
+        [OP_RESTOREFH]          = (nfsd4_dec)nfsd4_decode_noop,
+        [OP_SAVEFH]             = (nfsd4_dec)nfsd4_decode_noop,
+        [OP_SECINFO]            = (nfsd4_dec)nfsd4_decode_secinfo,
+        [OP_SETATTR]            = (nfsd4_dec)nfsd4_decode_setattr,
+        [OP_SETCLIENTID]        = (nfsd4_dec)nfsd4_decode_setclientid,
+        [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm,
+        [OP_VERIFY]             = (nfsd4_dec)nfsd4_decode_verify,
+        [OP_WRITE]              = (nfsd4_dec)nfsd4_decode_write,
+        [OP_RELEASE_LOCKOWNER]  = (nfsd4_dec)nfsd4_decode_release_lockowner,
+};
+struct nfsd4_minorversion_ops {
+        nfsd4_dec *decoders;
+        int nops;
+};
+static struct nfsd4_minorversion_ops nfsd4_minorversion[] = {
+        [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) },
+};
+static __be32
 nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 {
        DECODE_HEAD;
        struct nfsd4_op *op;
+        struct nfsd4_minorversion_ops *ops;
        int i;
        /*
@@ -1019,6 +1083,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
                }
        }
+        if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion))
+                argp->opcnt = 0;
+        ops = &nfsd4_minorversion[argp->minorversion];
        for (i = 0; i < argp->opcnt; i++) {
                op = &argp->ops[i];
                op->replay = NULL;
@@ -1056,120 +1124,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
                }
                op->opnum = ntohl(*argp->p++);
-                switch (op->opnum) {
+                if (op->opnum >= OP_ACCESS && op->opnum < ops->nops)
-                case 2: /* Reserved operation */
+                        op->status = ops->decoders[op->opnum](argp, &op->u);
-                        op->opnum = OP_ILLEGAL;
+                else {
-                        if (argp->minorversion == 0)
-                                op->status = nfserr_op_illegal;
-                        else
-                                op->status = nfserr_minor_vers_mismatch;
-                        break;
-                case OP_ACCESS:
-                        op->status = nfsd4_decode_access(argp, &op->u.access);
-                        break;
-                case OP_CLOSE:
-                        op->status = nfsd4_decode_close(argp, &op->u.close);
-                        break;
-                case OP_COMMIT:
-                        op->status = nfsd4_decode_commit(argp, &op->u.commit);
-                        break;
-                case OP_CREATE:
-                        op->status = nfsd4_decode_create(argp, &op->u.create);
-                        break;
-                case OP_DELEGRETURN:
-                        op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn);
-                        break;
-                case OP_GETATTR:
-                        op->status = nfsd4_decode_getattr(argp, &op->u.getattr);
-                        break;
-                case OP_GETFH:
-                        op->status = nfs_ok;
-                        break;
-                case OP_LINK:
-                        op->status = nfsd4_decode_link(argp, &op->u.link);
-                        break;
-                case OP_LOCK:
-                        op->status = nfsd4_decode_lock(argp, &op->u.lock);
-                        break;
-                case OP_LOCKT:
-                        op->status = nfsd4_decode_lockt(argp, &op->u.lockt);
-                        break;
-                case OP_LOCKU:
-                        op->status = nfsd4_decode_locku(argp, &op->u.locku);
-                        break;
-                case OP_LOOKUP:
-                        op->status = nfsd4_decode_lookup(argp, &op->u.lookup);
-                        break;
-                case OP_LOOKUPP:
-                        op->status = nfs_ok;
-                        break;
-                case OP_NVERIFY:
-                        op->status = nfsd4_decode_verify(argp, &op->u.nverify);
-                        break;
-                case OP_OPEN:
-                        op->status = nfsd4_decode_open(argp, &op->u.open);
-                        break;
-                case OP_OPEN_CONFIRM:
-                        op->status = nfsd4_decode_open_confirm(argp, &op->u.open_confirm);
-                        break;
-                case OP_OPEN_DOWNGRADE:
-                        op->status = nfsd4_decode_open_downgrade(argp, &op->u.open_downgrade);
-                        break;
-                case OP_PUTFH:
-                        op->status = nfsd4_decode_putfh(argp, &op->u.putfh);
-                        break;
-                case OP_PUTROOTFH:
-                        op->status = nfs_ok;
-                        break;
-                case OP_READ:
-                        op->status = nfsd4_decode_read(argp, &op->u.read);
-                        break;
-                case OP_READDIR:
-                        op->status = nfsd4_decode_readdir(argp, &op->u.readdir);
-                        break;
-                case OP_READLINK:
-                        op->status = nfs_ok;
-                        break;
-                case OP_REMOVE:
-                        op->status = nfsd4_decode_remove(argp, &op->u.remove);
-                        break;
-                case OP_RENAME:
-                        op->status = nfsd4_decode_rename(argp, &op->u.rename);
-                        break;
-                case OP_RESTOREFH:
-                        op->status = nfs_ok;
-                        break;
-                case OP_RENEW:
-                        op->status = nfsd4_decode_renew(argp, &op->u.renew);
-                        break;
-                case OP_SAVEFH:
-                        op->status = nfs_ok;
-                        break;
-                case OP_SECINFO:
-                        op->status = nfsd4_decode_secinfo(argp, &op->u.secinfo);
-                        break;
-                case OP_SETATTR:
-                        op->status = nfsd4_decode_setattr(argp, &op->u.setattr);
-                        break;
-                case OP_SETCLIENTID:
-                        op->status = nfsd4_decode_setclientid(argp, &op->u.setclientid);
-                        break;
-                case OP_SETCLIENTID_CONFIRM:
-                        op->status = nfsd4_decode_setclientid_confirm(argp, &op->u.setclientid_confirm);
-                        break;
-                case OP_VERIFY:
-                        op->status = nfsd4_decode_verify(argp, &op->u.verify);
-                        break;
-                case OP_WRITE:
-                        op->status = nfsd4_decode_write(argp, &op->u.write);
-                        break;
-                case OP_RELEASE_LOCKOWNER:
-                        op->status = nfsd4_decode_release_lockowner(argp, &op->u.release_lockowner);
-                        break;
-                default:
                        op->opnum = OP_ILLEGAL;
                        op->status = nfserr_op_illegal;
-                        break;
                }
                if (op->status) {
@@ -1201,11 +1160,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
        *p++ = htonl((u32)((n) >> 32));                         \
        *p++ = htonl((u32)(n));                                 \
 } while (0)
-#define WRITEMEM(ptr,nbytes)     do {                           \
+#define WRITEMEM(ptr,nbytes)     do { if (nbytes > 0) {         \
        *(p + XDR_QUADLEN(nbytes) -1) = 0;                      \
        memcpy(p, ptr, nbytes);                                 \
        p += XDR_QUADLEN(nbytes);                               \
-} while (0)
+}} while (0)
 #define WRITECINFO(c)           do {                            \
        *p++ = htonl(c.atomic);                                 \
        *p++ = htonl(c.before_ctime_sec);                               \
@@ -1991,7 +1950,7 @@ fail:
        return -EINVAL;
 }
-static void
+static __be32
 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
 {
        ENCODE_HEAD;
@@ -2002,9 +1961,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
                WRITE32(access->ac_resp_access);
                ADJUST_ARGS();
        }
+        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
 {
        ENCODE_SEQID_OP_HEAD;
@@ -2016,10 +1976,11 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
                ADJUST_ARGS();
        }
        ENCODE_SEQID_OP_TAIL(close->cl_stateowner);
+        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
 {
        ENCODE_HEAD;
@@ -2029,9 +1990,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
                WRITEMEM(commit->co_verf.data, 8);
                ADJUST_ARGS();
        }
+        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
 {
        ENCODE_HEAD;
@@ -2044,6 +2006,7 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
                WRITE32(create->cr_bmval[1]);
                ADJUST_ARGS();
        }
+        return nfserr;
 }
 static __be32
@@ -2064,9 +2027,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
        return nfserr;
 }
-static void
+static __be32
-nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh *fhp)
+nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)
 {
+        struct svc_fh *fhp = *fhpp;
        unsigned int len;
        ENCODE_HEAD;
@@ -2077,6 +2041,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
                WRITEMEM(&fhp->fh_handle.fh_base, len);
                ADJUST_ARGS();
        }
+        return nfserr;
 }
 /*
@@ -2104,7 +2069,7 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie
        ADJUST_ARGS();
 }
-static void
+static __be32
 nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
 {
        ENCODE_SEQID_OP_HEAD;
@@ -2118,16 +2083,18 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo
                nfsd4_encode_lock_denied(resp, &lock->lk_denied);
        ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner);
+        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
 {
        if (nfserr == nfserr_denied)
                nfsd4_encode_lock_denied(resp, &lockt->lt_denied);
+        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
 {
        ENCODE_SEQID_OP_HEAD;
@@ -2140,10 +2107,11 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
        }
                                        
        ENCODE_SEQID_OP_TAIL(locku->lu_stateowner);
+        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
 {
        ENCODE_HEAD;
@@ -2153,10 +2121,11 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
                WRITECINFO(link->li_cinfo);
                ADJUST_ARGS();
        }
+        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
 {
        ENCODE_SEQID_OP_HEAD;
@@ -2219,9 +2188,10 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
        /* XXX save filehandle here */
 out:
        ENCODE_SEQID_OP_TAIL(open->op_stateowner);
+        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
 {
        ENCODE_SEQID_OP_HEAD;
@@ -2234,9 +2204,10 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct
        }
        ENCODE_SEQID_OP_TAIL(oc->oc_stateowner);
+        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
 {
        ENCODE_SEQID_OP_HEAD;
@@ -2249,6 +2220,7 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc
        }
        ENCODE_SEQID_OP_TAIL(od->od_stateowner);
+        return nfserr;
 }
 static __be32
@@ -2443,7 +2415,7 @@ err_no_verf:
        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
 {
        ENCODE_HEAD;
@@ -2453,9 +2425,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
                WRITECINFO(remove->rm_cinfo);
                ADJUST_ARGS();
        }
+        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
 {
        ENCODE_HEAD;
@@ -2466,9 +2439,10 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
                WRITECINFO(rename->rn_tinfo);
                ADJUST_ARGS();
        }
+        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
                     struct nfsd4_secinfo *secinfo)
 {
@@ -2532,13 +2506,14 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
 out:
        if (exp)
                exp_put(exp);
+        return nfserr;
 }
 /*
 * The SETATTR encode routine is special -- it always encodes a bitmap,
 * regardless of the error status.
 */
-static void
+static __be32
 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
 {
        ENCODE_HEAD;
@@ -2555,9 +2530,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
                WRITE32(setattr->sa_bmval[1]);
        }
        ADJUST_ARGS();
+        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
 {
        ENCODE_HEAD;
@@ -2574,9 +2550,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
                WRITE32(0);
                ADJUST_ARGS();
        }
+        return nfserr;
 }
-static void
+static __be32
 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
 {
        ENCODE_HEAD;
@@ -2588,8 +2565,56 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w
                WRITEMEM(write->wr_verifier.data, 8);
                ADJUST_ARGS();
        }
+        return nfserr;
 }
+static __be32
+nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
+{
+        return nfserr;
+}
+typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
+static nfsd4_enc nfsd4_enc_ops[] = {
+        [OP_ACCESS]             = (nfsd4_enc)nfsd4_encode_access,
+        [OP_CLOSE]              = (nfsd4_enc)nfsd4_encode_close,
+        [OP_COMMIT]             = (nfsd4_enc)nfsd4_encode_commit,
+        [OP_CREATE]             = (nfsd4_enc)nfsd4_encode_create,
+        [OP_DELEGPURGE]         = (nfsd4_enc)nfsd4_encode_noop,
+        [OP_DELEGRETURN]        = (nfsd4_enc)nfsd4_encode_noop,
+        [OP_GETATTR]            = (nfsd4_enc)nfsd4_encode_getattr,
+        [OP_GETFH]              = (nfsd4_enc)nfsd4_encode_getfh,
+        [OP_LINK]               = (nfsd4_enc)nfsd4_encode_link,
+        [OP_LOCK]               = (nfsd4_enc)nfsd4_encode_lock,
+        [OP_LOCKT]              = (nfsd4_enc)nfsd4_encode_lockt,
+        [OP_LOCKU]              = (nfsd4_enc)nfsd4_encode_locku,
+        [OP_LOOKUP]             = (nfsd4_enc)nfsd4_encode_noop,
+        [OP_LOOKUPP]            = (nfsd4_enc)nfsd4_encode_noop,
+        [OP_NVERIFY]            = (nfsd4_enc)nfsd4_encode_noop,
+        [OP_OPEN]               = (nfsd4_enc)nfsd4_encode_open,
+        [OP_OPEN_CONFIRM]       = (nfsd4_enc)nfsd4_encode_open_confirm,
+        [OP_OPEN_DOWNGRADE]     = (nfsd4_enc)nfsd4_encode_open_downgrade,
+        [OP_PUTFH]              = (nfsd4_enc)nfsd4_encode_noop,
+        [OP_PUTPUBFH]           = (nfsd4_enc)nfsd4_encode_noop,
+        [OP_PUTROOTFH]          = (nfsd4_enc)nfsd4_encode_noop,
+        [OP_READ]               = (nfsd4_enc)nfsd4_encode_read,
+        [OP_READDIR]            = (nfsd4_enc)nfsd4_encode_readdir,
+        [OP_READLINK]           = (nfsd4_enc)nfsd4_encode_readlink,
+        [OP_REMOVE]             = (nfsd4_enc)nfsd4_encode_remove,
+        [OP_RENAME]             = (nfsd4_enc)nfsd4_encode_rename,
+        [OP_RENEW]              = (nfsd4_enc)nfsd4_encode_noop,
+        [OP_RESTOREFH]          = (nfsd4_enc)nfsd4_encode_noop,
+        [OP_SAVEFH]             = (nfsd4_enc)nfsd4_encode_noop,
+        [OP_SECINFO]            = (nfsd4_enc)nfsd4_encode_secinfo,
+        [OP_SETATTR]            = (nfsd4_enc)nfsd4_encode_setattr,
+        [OP_SETCLIENTID]        = (nfsd4_enc)nfsd4_encode_setclientid,
+        [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop,
+        [OP_VERIFY]             = (nfsd4_enc)nfsd4_encode_noop,
+        [OP_WRITE]              = (nfsd4_enc)nfsd4_encode_write,
+        [OP_RELEASE_LOCKOWNER]  = (nfsd4_enc)nfsd4_encode_noop,
+};
 void
 nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 {
@@ -2601,101 +2626,12 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
        statp = p++;    /* to be backfilled at the end */
        ADJUST_ARGS();
-        switch (op->opnum) {
+        if (op->opnum == OP_ILLEGAL)
-        case OP_ACCESS:
+                goto status;
-                nfsd4_encode_access(resp, op->status, &op->u.access);
+        BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
-                break;
+               !nfsd4_enc_ops[op->opnum]);
-        case OP_CLOSE:
+        op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
-                nfsd4_encode_close(resp, op->status, &op->u.close);
+status:
-                break;
-        case OP_COMMIT:
-                nfsd4_encode_commit(resp, op->status, &op->u.commit);
-                break;
-        case OP_CREATE:
-                nfsd4_encode_create(resp, op->status, &op->u.create);
-                break;
-        case OP_DELEGRETURN:
-                break;
-        case OP_GETATTR:
-                op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr);
-                break;
-        case OP_GETFH:
-                nfsd4_encode_getfh(resp, op->status, op->u.getfh);
-                break;
-        case OP_LINK:
-                nfsd4_encode_link(resp, op->status, &op->u.link);
-                break;
-        case OP_LOCK:
-                nfsd4_encode_lock(resp, op->status, &op->u.lock);
-                break;
-        case OP_LOCKT:
-                nfsd4_encode_lockt(resp, op->status, &op->u.lockt);
-                break;
-        case OP_LOCKU:
-                nfsd4_encode_locku(resp, op->status, &op->u.locku);
-                break;
-        case OP_LOOKUP:
-                break;
-        case OP_LOOKUPP:
-                break;
-        case OP_NVERIFY:
-                break;
-        case OP_OPEN:
-                nfsd4_encode_open(resp, op->status, &op->u.open);
-                break;
-        case OP_OPEN_CONFIRM:
-                nfsd4_encode_open_confirm(resp, op->status, &op->u.open_confirm);
-                break;
-        case OP_OPEN_DOWNGRADE:
-                nfsd4_encode_open_downgrade(resp, op->status, &op->u.open_downgrade);
-                break;
-        case OP_PUTFH:
-                break;
-        case OP_PUTROOTFH:
-                break;
-        case OP_READ:
-                op->status = nfsd4_encode_read(resp, op->status, &op->u.read);
-                break;
-        case OP_READDIR:
-                op->status = nfsd4_encode_readdir(resp, op->status, &op->u.readdir);
-                break;
-        case OP_READLINK:
-                op->status = nfsd4_encode_readlink(resp, op->status, &op->u.readlink);
-                break;
-        case OP_REMOVE:
-                nfsd4_encode_remove(resp, op->status, &op->u.remove);
-                break;
-        case OP_RENAME:
-                nfsd4_encode_rename(resp, op->status, &op->u.rename);
-                break;
-        case OP_RENEW:
-                break;
-        case OP_RESTOREFH:
-                break;
-        case OP_SAVEFH:
-                break;
-        case OP_SECINFO:
-                nfsd4_encode_secinfo(resp, op->status, &op->u.secinfo);
-                break;
-        case OP_SETATTR:
-                nfsd4_encode_setattr(resp, op->status, &op->u.setattr);
-                break;
-        case OP_SETCLIENTID:
-                nfsd4_encode_setclientid(resp, op->status, &op->u.setclientid);
-                break;
-        case OP_SETCLIENTID_CONFIRM:
-                break;
-        case OP_VERIFY:
-                break;
-        case OP_WRITE:
-                nfsd4_encode_write(resp, op->status, &op->u.write);
-                break;
-        case OP_RELEASE_LOCKOWNER:
-                break;
-        default:
-                break;
-        }
        /*
         * Note: We write the status directly, instead of using WRITE32(),
         * since it is already in network byte order.
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5ac00c4fee91..c53e65f8f3a2 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -12,6 +12,7 @@
 #include <linux/time.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/namei.h>
 #include <linux/fcntl.h>
 #include <linux/net.h>
 #include <linux/in.h>
@@ -310,9 +311,12 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
 static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
 {
-        __be32 server_ip;
+        struct sockaddr_in sin = {
-        char *fo_path, c;
+                .sin_family     = AF_INET,
+        };
        int b1, b2, b3, b4;
+        char c;
+        char *fo_path;
        /* sanity check */
        if (size == 0)
@@ -326,11 +330,13 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
                return -EINVAL;
        /* get ipv4 address */
-        if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
+        if (sscanf(fo_path, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) != 4)
                return -EINVAL;
-        server_ip = htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
+        if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255)
+                return -EINVAL;
+        sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
-        return nlmsvc_unlock_all_by_ip(server_ip);
+        return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin);
 }
 static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
@@ -450,22 +456,26 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
        int i;
        int rv;
        int len;
-        int npools = nfsd_nrpools();
+        int npools;
        int *nthreads;
+        mutex_lock(&nfsd_mutex);
+        npools = nfsd_nrpools();
        if (npools == 0) {
                /*
                 * NFS is shut down.  The admin can start it by
                 * writing to the threads file but NOT the pool_threads
                 * file, sorry.  Report zero threads.
                 */
+                mutex_unlock(&nfsd_mutex);
                strcpy(buf, "0\n");
                return strlen(buf);
        }
        nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
+        rv = -ENOMEM;
        if (nthreads == NULL)
-                return -ENOMEM;
+                goto out_free;
        if (size > 0) {
                for (i = 0; i < npools; i++) {
@@ -496,14 +506,16 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
                mesg += len;
        }
+        mutex_unlock(&nfsd_mutex);
        return (mesg-buf);
 out_free:
        kfree(nthreads);
+        mutex_unlock(&nfsd_mutex);
        return rv;
 }
-static ssize_t write_versions(struct file *file, char *buf, size_t size)
+static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 {
        /*
         * Format:
@@ -566,14 +578,23 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
        return len;
 }
-static ssize_t write_ports(struct file *file, char *buf, size_t size)
+static ssize_t write_versions(struct file *file, char *buf, size_t size)
+{
+        ssize_t rv;
+        mutex_lock(&nfsd_mutex);
+        rv = __write_versions(file, buf, size);
+        mutex_unlock(&nfsd_mutex);
+        return rv;
+}
+static ssize_t __write_ports(struct file *file, char *buf, size_t size)
 {
        if (size == 0) {
                int len = 0;
-                lock_kernel();
                if (nfsd_serv)
                        len = svc_xprt_names(nfsd_serv, buf, 0);
-                unlock_kernel();
                return len;
        }
        /* Either a single 'fd' number is written, in which
@@ -603,9 +624,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
                        /* Decrease the count, but don't shutdown the
                         * the service
                         */
-                        lock_kernel();
                        nfsd_serv->sv_nrthreads--;
-                        unlock_kernel();
                }
                return err < 0 ? err : 0;
        }
@@ -614,10 +633,8 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
                int len = 0;
                if (!toclose)
                        return -ENOMEM;
-                lock_kernel();
                if (nfsd_serv)
                        len = svc_sock_names(buf, nfsd_serv, toclose);
-                unlock_kernel();
                if (len >= 0)
                        lockd_down();
                kfree(toclose);
@@ -655,7 +672,6 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
                if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
                        if (port == 0)
                                return -EINVAL;
-                        lock_kernel();
                        if (nfsd_serv) {
                                xprt = svc_find_xprt(nfsd_serv, transport,
                                                     AF_UNSPEC, port);
@@ -666,13 +682,23 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
                                } else
                                        err = -ENOTCONN;
                        }
-                        unlock_kernel();
                        return err < 0 ? err : 0;
                }
        }
        return -EINVAL;
 }
+static ssize_t write_ports(struct file *file, char *buf, size_t size)
+{
+        ssize_t rv;
+        mutex_lock(&nfsd_mutex);
+        rv = __write_ports(file, buf, size);
+        mutex_unlock(&nfsd_mutex);
+        return rv;
+}
 int nfsd_max_blksize;
 static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
@@ -691,13 +717,13 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
                if (bsize > NFSSVC_MAXBLKSIZE)
                        bsize = NFSSVC_MAXBLKSIZE;
                bsize &= ~(1024-1);
-                lock_kernel();
+                mutex_lock(&nfsd_mutex);
                if (nfsd_serv && nfsd_serv->sv_nrthreads) {
-                        unlock_kernel();
+                        mutex_unlock(&nfsd_mutex);
                        return -EBUSY;
                }
                nfsd_max_blksize = bsize;
-                unlock_kernel();
+                mutex_unlock(&nfsd_mutex);
        }
        return sprintf(buf, "%d\n", nfsd_max_blksize);
 }
@@ -705,16 +731,17 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 #ifdef CONFIG_NFSD_V4
 extern time_t nfs4_leasetime(void);
-static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
 {
        /* if size > 10 seconds, call
         * nfs4_reset_lease() then write out the new lease (seconds) as reply
         */
        char *mesg = buf;
-        int rv;
+        int rv, lease;
        if (size > 0) {
-                int lease;
+                if (nfsd_serv)
+                        return -EBUSY;
                rv = get_int(&mesg, &lease);
                if (rv)
                        return rv;
@@ -726,24 +753,52 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
        return strlen(buf);
 }
-static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
+static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+{
+        ssize_t rv;
+        mutex_lock(&nfsd_mutex);
+        rv = __write_leasetime(file, buf, size);
+        mutex_unlock(&nfsd_mutex);
+        return rv;
+}
+extern char *nfs4_recoverydir(void);
+static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
 {
        char *mesg = buf;
        char *recdir;
        int len, status;
-        if (size == 0 || size > PATH_MAX || buf[size-1] != '\n')
+        if (size > 0) {
-                return -EINVAL;
+                if (nfsd_serv)
-        buf[size-1] = 0;
+                        return -EBUSY;
+                if (size > PATH_MAX || buf[size-1] != '\n')
+                        return -EINVAL;
+                buf[size-1] = 0;
-        recdir = mesg;
+                recdir = mesg;
-        len = qword_get(&mesg, recdir, size);
+                len = qword_get(&mesg, recdir, size);
-        if (len <= 0)
+                if (len <= 0)
-                return -EINVAL;
+                        return -EINVAL;
-        status = nfs4_reset_recoverydir(recdir);
+                status = nfs4_reset_recoverydir(recdir);
+        }
+        sprintf(buf, "%s\n", nfs4_recoverydir());
        return strlen(buf);
 }
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
+{
+        ssize_t rv;
+        mutex_lock(&nfsd_mutex);
+        rv = __write_recoverydir(file, buf, size);
+        mutex_unlock(&nfsd_mutex);
+        return rv;
+}
 #endif
 /*----------------------------------------------------------------------------*/
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 100ae5641162..ea37c96f0445 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -51,7 +51,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
                /* make sure parents give x permission to user */
                int err;
                parent = dget_parent(tdentry);
-                err = permission(parent->d_inode, MAY_EXEC, NULL);
+                err = inode_permission(parent->d_inode, MAY_EXEC);
                if (err < 0) {
                        dput(parent);
                        break;
@@ -176,9 +176,24 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
        if (IS_ERR(exp))
                return nfserrno(PTR_ERR(exp));
-        error = nfsd_setuser_and_check_port(rqstp, exp);
+        if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
-        if (error)
+                /* Elevate privileges so that the lack of 'r' or 'x'
-                goto out;
+                 * permission on some parent directory will
+                 * not stop exportfs_decode_fh from being able
+                 * to reconnect a directory into the dentry cache.
+                 * The same problem can affect "SUBTREECHECK" exports,
+                 * but as nfsd_acceptable depends on correct
+                 * access control settings being in effect, we cannot
+                 * fix that case easily.
+                 */
+                current->cap_effective =
+                        cap_raise_nfsd_set(current->cap_effective,
+                                           current->cap_permitted);
+        } else {
+                error = nfsd_setuser_and_check_port(rqstp, exp);
+                if (error)
+                        goto out;
+        }
        /*
         * Look up the dentry using the NFS file handle.
@@ -215,6 +230,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
                goto out;
        }
+        if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
+                error = nfsd_setuser_and_check_port(rqstp, exp);
+                if (error) {
+                        dput(dentry);
+                        goto out;
+                }
+        }
        if (S_ISDIR(dentry->d_inode->i_mode) &&
                        (dentry->d_flags & DCACHE_DISCONNECTED)) {
                printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
@@ -279,7 +302,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
        if (error)
                goto out;
-        if (!(access & MAY_LOCK)) {
+        if (!(access & NFSD_MAY_LOCK)) {
                /*
                 * pseudoflavor restrictions are not enforced on NLM,
                 * which clients virtually always use auth_sys for,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 6cfc96a12483..0766f95d236a 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -65,7 +65,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
        dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
        fh_copy(&resp->fh, &argp->fh);
-        nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+        nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
        return nfsd_return_attrs(nfserr, resp);
 }
@@ -215,11 +215,11 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
                SVCFH_fmt(dirfhp), argp->len, argp->name);
        /* First verify the parent file handle */
-        nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_EXEC);
+        nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC);
        if (nfserr)
                goto done; /* must fh_put dirfhp even on error */
-        /* Check for MAY_WRITE in nfsd_create if necessary */
+        /* Check for NFSD_MAY_WRITE in nfsd_create if necessary */
        nfserr = nfserr_acces;
        if (!argp->len)
@@ -281,7 +281,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
                                        nfserr = nfsd_permission(rqstp,
                                                                 newfhp->fh_export,
                                                                 newfhp->fh_dentry,
-                                                                 MAY_WRITE|MAY_LOCAL_ACCESS);
+                                                                 NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS);
                                        if (nfserr && nfserr != nfserr_rofs)
                                                goto out_unlock;
                                }
@@ -614,6 +614,7 @@ nfserrno (int errno)
 #endif
                { nfserr_stale, -ESTALE },
                { nfserr_jukebox, -ETIMEDOUT },
+                { nfserr_jukebox, -ERESTARTSYS },
                { nfserr_dropit, -EAGAIN },
                { nfserr_dropit, -ENOMEM },
                { nfserr_badname, -ESRCH },
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 941041f4b136..80292ff5e924 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -21,6 +21,7 @@
 #include <linux/smp_lock.h>
 #include <linux/freezer.h>
 #include <linux/fs_struct.h>
+#include <linux/kthread.h>
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/stats.h>
@@ -36,28 +37,38 @@
 #define NFSDDBG_FACILITY        NFSDDBG_SVC
-/* these signals will be delivered to an nfsd thread 
- * when handling a request
- */
-#define ALLOWED_SIGS    (sigmask(SIGKILL))
-/* these signals will be delivered to an nfsd thread
- * when not handling a request. i.e. when waiting
- */
-#define SHUTDOWN_SIGS   (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))
-/* if the last thread dies with SIGHUP, then the exports table is
- * left unchanged ( like 2.4-{0-9} ).  Any other signal will clear
- * the exports table (like 2.2).
- */
-#define SIG_NOCLEAN     SIGHUP
 extern struct svc_program       nfsd_program;
-static void                     nfsd(struct svc_rqst *rqstp);
+static int                      nfsd(void *vrqstp);
 struct timeval                  nfssvc_boot;
-       struct svc_serv          *nfsd_serv;
 static atomic_t                 nfsd_busy;
 static unsigned long            nfsd_last_call;
 static DEFINE_SPINLOCK(nfsd_call_lock);
+/*
+ * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
+ * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
+ * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
+ *
+ * If (out side the lock) nfsd_serv is non-NULL, then it must point to a
+ * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
+ * of nfsd threads must exist and each must listed in ->sp_all_threads in each
+ * entry of ->sv_pools[].
+ *
+ * Transitions of the thread count between zero and non-zero are of particular
+ * interest since the svc_serv needs to be created and initialized at that
+ * point, or freed.
+ *
+ * Finally, the nfsd_mutex also protects some of the global variables that are
+ * accessed when nfsd starts and that are settable via the write_* routines in
+ * nfsctl.c. In particular:
+ *
+ *      user_recovery_dirname
+ *      user_lease_time
+ *      nfsd_versions
+ */
+DEFINE_MUTEX(nfsd_mutex);
+struct svc_serv                 *nfsd_serv;
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat  nfsd_acl_svcstats;
 static struct svc_version *     nfsd_acl_version[] = {
@@ -145,13 +156,14 @@ int nfsd_vers(int vers, enum vers_op change)
 int nfsd_nrthreads(void)
 {
-        if (nfsd_serv == NULL)
+        int rv = 0;
-                return 0;
+        mutex_lock(&nfsd_mutex);
-        else
+        if (nfsd_serv)
-                return nfsd_serv->sv_nrthreads;
+                rv = nfsd_serv->sv_nrthreads;
+        mutex_unlock(&nfsd_mutex);
+        return rv;
 }
-static int killsig;     /* signal that was used to kill last nfsd */
 static void nfsd_last_thread(struct svc_serv *serv)
 {
        /* When last nfsd thread exits we need to do some clean-up */
@@ -162,11 +174,9 @@ static void nfsd_last_thread(struct svc_serv *serv)
        nfsd_racache_shutdown();
        nfs4_state_shutdown();
-        printk(KERN_WARNING "nfsd: last server has exited\n");
+        printk(KERN_WARNING "nfsd: last server has exited, flushing export "
-        if (killsig != SIG_NOCLEAN) {
+                            "cache\n");
-                printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
+        nfsd_export_flush();
-                nfsd_export_flush();
-        }
 }
 void nfsd_reset_versions(void)
@@ -190,13 +200,14 @@ void nfsd_reset_versions(void)
        }
 }
 int nfsd_create_serv(void)
 {
        int err = 0;
-        lock_kernel();
+        WARN_ON(!mutex_is_locked(&nfsd_mutex));
        if (nfsd_serv) {
                svc_get(nfsd_serv);
-                unlock_kernel();
                return 0;
        }
        if (nfsd_max_blksize == 0) {
@@ -217,13 +228,11 @@ int nfsd_create_serv(void)
        }
        atomic_set(&nfsd_busy, 0);
-        nfsd_serv = svc_create_pooled(&nfsd_program,
+        nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
-                                      nfsd_max_blksize,
+                                      nfsd_last_thread, nfsd, THIS_MODULE);
-                                      nfsd_last_thread,
-                                      nfsd, SIG_NOCLEAN, THIS_MODULE);
        if (nfsd_serv == NULL)
                err = -ENOMEM;
-        unlock_kernel();
        do_gettimeofday(&nfssvc_boot);          /* record boot time */
        return err;
 }
@@ -282,6 +291,8 @@ int nfsd_set_nrthreads(int n, int *nthreads)
        int tot = 0;
        int err = 0;
+        WARN_ON(!mutex_is_locked(&nfsd_mutex));
        if (nfsd_serv == NULL || n <= 0)
                return 0;
@@ -316,7 +327,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
                nthreads[0] = 1;
        /* apply the new numbers */
-        lock_kernel();
        svc_get(nfsd_serv);
        for (i = 0; i < n; i++) {
                err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
@@ -325,7 +335,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
                        break;
        }
        svc_destroy(nfsd_serv);
-        unlock_kernel();
        return err;
 }
@@ -334,8 +343,8 @@ int
 nfsd_svc(unsigned short port, int nrservs)
 {
        int     error;
-        
-        lock_kernel();
+        mutex_lock(&nfsd_mutex);
        dprintk("nfsd: creating service\n");
        error = -EINVAL;
        if (nrservs <= 0)
@@ -363,7 +372,7 @@ nfsd_svc(unsigned short port, int nrservs)
 failure:
        svc_destroy(nfsd_serv);         /* Release server */
 out:
-        unlock_kernel();
+        mutex_unlock(&nfsd_mutex);
        return error;
 }
@@ -391,18 +400,17 @@ update_thread_usage(int busy_threads)
 /*
 * This is the NFS server kernel thread
 */
-static void
+static int
-nfsd(struct svc_rqst *rqstp)
+nfsd(void *vrqstp)
 {
+        struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
        struct fs_struct *fsp;
-        int             err;
+        int err, preverr = 0;
-        sigset_t shutdown_mask, allowed_mask;
        /* Lock module and set up kernel thread */
-        lock_kernel();
+        mutex_lock(&nfsd_mutex);
-        daemonize("nfsd");
-        /* After daemonize() this kernel thread shares current->fs
+        /* At this point, the thread shares current->fs
         * with the init process. We need to create files with a
         * umask of 0 instead of init's umask. */
        fsp = copy_fs_struct(current->fs);
@@ -414,14 +422,17 @@ nfsd(struct svc_rqst *rqstp)
        current->fs = fsp;
        current->fs->umask = 0;
-        siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
+        /*
-        siginitsetinv(&allowed_mask, ALLOWED_SIGS);
+         * thread is spawned with all signals set to SIG_IGN, re-enable
+         * the ones that will bring down the thread
+         */
+        allow_signal(SIGKILL);
+        allow_signal(SIGHUP);
+        allow_signal(SIGINT);
+        allow_signal(SIGQUIT);
        nfsdstats.th_cnt++;
+        mutex_unlock(&nfsd_mutex);
-        rqstp->rq_task = current;
-        unlock_kernel();
        /*
         * We want less throttling in balance_dirty_pages() so that nfs to
@@ -435,26 +446,30 @@ nfsd(struct svc_rqst *rqstp)
         * The main request loop
         */
        for (;;) {
-                /* Block all but the shutdown signals */
-                sigprocmask(SIG_SETMASK, &shutdown_mask, NULL);
                /*
                 * Find a socket with data available and call its
                 * recvfrom routine.
                 */
                while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
                        ;
-                if (err < 0)
+                if (err == -EINTR)
                        break;
+                else if (err < 0) {
+                        if (err != preverr) {
+                                printk(KERN_WARNING "%s: unexpected error "
+                                        "from svc_recv (%d)\n", __func__, -err);
+                                preverr = err;
+                        }
+                        schedule_timeout_uninterruptible(HZ);
+                        continue;
+                }
                update_thread_usage(atomic_read(&nfsd_busy));
                atomic_inc(&nfsd_busy);
                /* Lock the export hash tables for reading. */
                exp_readlock();
-                /* Process request with signals blocked.  */
-                sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
                svc_process(rqstp);
                /* Unlock export hash tables */
@@ -463,22 +478,10 @@ nfsd(struct svc_rqst *rqstp)
                atomic_dec(&nfsd_busy);
        }
-        if (err != -EINTR) {
-                printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
-        } else {
-                unsigned int    signo;
-                for (signo = 1; signo <= _NSIG; signo++)
-                        if (sigismember(&current->pending.signal, signo) &&
-                            !sigismember(&current->blocked, signo))
-                                break;
-                killsig = signo;
-        }
        /* Clear signals before calling svc_exit_thread() */
        flush_signals(current);
-        lock_kernel();
+        mutex_lock(&nfsd_mutex);
        nfsdstats.th_cnt --;
 out:
@@ -486,8 +489,9 @@ out:
        svc_exit_thread(rqstp);
        /* Release module */
-        unlock_kernel();
+        mutex_unlock(&nfsd_mutex);
        module_put_and_exit(0);
+        return 0;
 }
 static __be32 map_new_errors(u32 vers, __be32 nfserr)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a3a291f771f4..18060bed5267 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -144,7 +144,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
        dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
        /* Obtain dentry and export. */
-        err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC);
+        err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
        if (err)
                return err;
@@ -262,14 +262,14 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 {
        struct dentry   *dentry;
        struct inode    *inode;
-        int             accmode = MAY_SATTR;
+        int             accmode = NFSD_MAY_SATTR;
        int             ftype = 0;
        __be32          err;
        int             host_err;
        int             size_change = 0;
        if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
-                accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE;
+                accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
        if (iap->ia_valid & ATTR_SIZE)
                ftype = S_IFREG;
@@ -331,7 +331,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
         */
        if (iap->ia_valid & ATTR_SIZE) {
                if (iap->ia_size < inode->i_size) {
-                        err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE);
+                        err = nfsd_permission(rqstp, fhp->fh_export, dentry,
+                                        NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
                        if (err)
                                goto out;
                }
@@ -462,7 +463,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
        unsigned int flags = 0;
        /* Get inode */
-        error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR);
+        error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
        if (error)
                return error;
@@ -563,20 +564,20 @@ struct accessmap {
        int             how;
 };
 static struct accessmap nfs3_regaccess[] = {
-    {   NFS3_ACCESS_READ,       MAY_READ                        },
+    {   NFS3_ACCESS_READ,       NFSD_MAY_READ                   },
-    {   NFS3_ACCESS_EXECUTE,    MAY_EXEC                        },
+    {   NFS3_ACCESS_EXECUTE,    NFSD_MAY_EXEC                   },
-    {   NFS3_ACCESS_MODIFY,     MAY_WRITE|MAY_TRUNC             },
+    {   NFS3_ACCESS_MODIFY,     NFSD_MAY_WRITE|NFSD_MAY_TRUNC   },
-    {   NFS3_ACCESS_EXTEND,     MAY_WRITE                       },
+    {   NFS3_ACCESS_EXTEND,     NFSD_MAY_WRITE                  },
    {   0,                      0                               }
 };
 static struct accessmap nfs3_diraccess[] = {
-    {   NFS3_ACCESS_READ,       MAY_READ                        },
+    {   NFS3_ACCESS_READ,       NFSD_MAY_READ                   },
-    {   NFS3_ACCESS_LOOKUP,     MAY_EXEC                        },
+    {   NFS3_ACCESS_LOOKUP,     NFSD_MAY_EXEC                   },
-    {   NFS3_ACCESS_MODIFY,     MAY_EXEC|MAY_WRITE|MAY_TRUNC    },
+    {   NFS3_ACCESS_MODIFY,     NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC},
-    {   NFS3_ACCESS_EXTEND,     MAY_EXEC|MAY_WRITE              },
+    {   NFS3_ACCESS_EXTEND,     NFSD_MAY_EXEC|NFSD_MAY_WRITE    },
-    {   NFS3_ACCESS_DELETE,     MAY_REMOVE                      },
+    {   NFS3_ACCESS_DELETE,     NFSD_MAY_REMOVE                 },
    {   0,                      0                               }
 };
@@ -589,10 +590,10 @@ static struct accessmap	nfs3_anyaccess[] = {
         * mainly at mode bits, and we make sure to ignore read-only
         * filesystem checks
         */
-    {   NFS3_ACCESS_READ,       MAY_READ                        },
+    {   NFS3_ACCESS_READ,       NFSD_MAY_READ                   },
-    {   NFS3_ACCESS_EXECUTE,    MAY_EXEC                        },
+    {   NFS3_ACCESS_EXECUTE,    NFSD_MAY_EXEC                   },
-    {   NFS3_ACCESS_MODIFY,     MAY_WRITE|MAY_LOCAL_ACCESS      },
+    {   NFS3_ACCESS_MODIFY,     NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS    },
-    {   NFS3_ACCESS_EXTEND,     MAY_WRITE|MAY_LOCAL_ACCESS      },
+    {   NFS3_ACCESS_EXTEND,     NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS    },
    {   0,                      0                               }
 };
@@ -606,7 +607,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
        u32                     query, result = 0, sresult = 0;
        __be32                  error;
-        error = fh_verify(rqstp, fhp, 0, MAY_NOP);
+        error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
        if (error)
                goto out;
@@ -678,7 +679,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
         * and (hopefully) checked permission - so allow OWNER_OVERRIDE
         * in case a chmod has now revoked permission.
         */
-        err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE);
+        err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE);
        if (err)
                goto out;
@@ -689,7 +690,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
         * or any access when mandatory locking enabled
         */
        err = nfserr_perm;
-        if (IS_APPEND(inode) && (access & MAY_WRITE))
+        if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE))
                goto out;
        /*
         * We must ignore files (but only files) which might have mandatory
@@ -706,14 +707,14 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
         * Check to see if there are any leases on this file.
         * This may block while leases are broken.
         */
-        host_err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0));
+        host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? FMODE_WRITE : 0));
        if (host_err == -EWOULDBLOCK)
                host_err = -ETIMEDOUT;
        if (host_err) /* NOMEM or WOULDBLOCK */
                goto out_nfserr;
-        if (access & MAY_WRITE) {
+        if (access & NFSD_MAY_WRITE) {
-                if (access & MAY_READ)
+                if (access & NFSD_MAY_READ)
                        flags = O_RDWR|O_LARGEFILE;
                else
                        flags = O_WRONLY|O_LARGEFILE;
@@ -1069,12 +1070,12 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        if (file) {
                err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
-                                MAY_READ|MAY_OWNER_OVERRIDE);
+                                NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
                if (err)
                        goto out;
                err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
        } else {
-                err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file);
+                err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
                if (err)
                        goto out;
                err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
@@ -1098,13 +1099,13 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        if (file) {
                err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
-                                MAY_WRITE|MAY_OWNER_OVERRIDE);
+                                NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
                if (err)
                        goto out;
                err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
                                stablep);
        } else {
-                err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file);
+                err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
                if (err)
                        goto out;
@@ -1136,7 +1137,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if ((u64)count > ~(u64)offset)
                return nfserr_inval;
-        if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0)
+        err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+        if (err)
                return err;
        if (EX_ISSYNC(fhp->fh_export)) {
                if (file->f_op && file->f_op->fsync) {
@@ -1197,7 +1199,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (isdotent(fname, flen))
                goto out;
-        err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+        err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
        if (err)
                goto out;
@@ -1248,36 +1250,34 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                iap->ia_mode = 0;
        iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
+        err = nfserr_inval;
+        if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) {
+                printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
+                       type);
+                goto out;
+        }
+        host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+        if (host_err)
+                goto out_nfserr;
        /*
         * Get the dir op function pointer.
         */
        err = 0;
        switch (type) {
        case S_IFREG:
-                host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
-                if (host_err)
-                        goto out_nfserr;
                host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
                break;
        case S_IFDIR:
-                host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
-                if (host_err)
-                        goto out_nfserr;
                host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
                break;
        case S_IFCHR:
        case S_IFBLK:
        case S_IFIFO:
        case S_IFSOCK:
-                host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
-                if (host_err)
-                        goto out_nfserr;
                host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
                break;
-        default:
-                printk("nfsd: bad file type %o in nfsd_create\n", type);
-                host_err = -EINVAL;
-                goto out_nfserr;
        }
        if (host_err < 0) {
                mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1289,7 +1289,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                write_inode_now(dchild->d_inode, 1);
        }
        err2 = nfsd_create_setattr(rqstp, resfhp, iap);
        if (err2)
                err = err2;
@@ -1334,7 +1333,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
                goto out;
        if (!(iap->ia_valid & ATTR_MODE))
                iap->ia_mode = 0;
-        err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+        err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
        if (err)
                goto out;
@@ -1471,7 +1470,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
        __be32          err;
        int             host_err;
-        err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP);
+        err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
        if (err)
                goto out;
@@ -1517,7 +1516,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
        struct dentry   *dentry, *dnew;
        __be32          err, cerr;
        int             host_err;
-        umode_t         mode;
        err = nfserr_noent;
        if (!flen || !plen)
@@ -1526,7 +1524,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (isdotent(fname, flen))
                goto out;
-        err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+        err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
        if (err)
                goto out;
        fh_lock(fhp);
@@ -1536,11 +1534,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (IS_ERR(dnew))
                goto out_nfserr;
-        mode = S_IALLUGO;
-        /* Only the MODE ATTRibute is even vaguely meaningful */
-        if (iap && (iap->ia_valid & ATTR_MODE))
-                mode = iap->ia_mode & S_IALLUGO;
        host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
        if (host_err)
                goto out_nfserr;
@@ -1552,11 +1545,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
                else {
                        strncpy(path_alloced, path, plen);
                        path_alloced[plen] = 0;
-                        host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
+                        host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced);
                        kfree(path_alloced);
                }
        } else
-                host_err = vfs_symlink(dentry->d_inode, dnew, path, mode);
+                host_err = vfs_symlink(dentry->d_inode, dnew, path);
        if (!host_err) {
                if (EX_ISSYNC(fhp->fh_export))
@@ -1591,10 +1584,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
        __be32          err;
        int             host_err;
-        err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
+        err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
        if (err)
                goto out;
-        err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP);
+        err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP);
        if (err)
                goto out;
@@ -1661,10 +1654,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        __be32          err;
        int             host_err;
-        err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE);
+        err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
        if (err)
                goto out;
-        err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE);
+        err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE);
        if (err)
                goto out;
@@ -1768,7 +1761,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        err = nfserr_acces;
        if (!flen || isdotent(fname, flen))
                goto out;
-        err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE);
+        err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE);
        if (err)
                goto out;
@@ -1834,7 +1827,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
        struct file     *file;
        loff_t          offset = *offsetp;
-        err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file);
+        err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file);
        if (err)
                goto out;
@@ -1875,7 +1868,7 @@ out:
 __be32
 nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
 {
-        __be32 err = fh_verify(rqstp, fhp, 0, MAY_NOP);
+        __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
        if (!err && vfs_statfs(fhp->fh_dentry,stat))
                err = nfserr_io;
        return err;
@@ -1896,18 +1889,18 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
        struct inode    *inode = dentry->d_inode;
        int             err;
-        if (acc == MAY_NOP)
+        if (acc == NFSD_MAY_NOP)
                return 0;
 #if 0
        dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
                acc,
-                (acc & MAY_READ)?       " read"  : "",
+                (acc & NFSD_MAY_READ)?  " read"  : "",
-                (acc & MAY_WRITE)?      " write" : "",
+                (acc & NFSD_MAY_WRITE)? " write" : "",
-                (acc & MAY_EXEC)?       " exec"  : "",
+                (acc & NFSD_MAY_EXEC)?  " exec"  : "",
-                (acc & MAY_SATTR)?      " sattr" : "",
+                (acc & NFSD_MAY_SATTR)? " sattr" : "",
-                (acc & MAY_TRUNC)?      " trunc" : "",
+                (acc & NFSD_MAY_TRUNC)? " trunc" : "",
-                (acc & MAY_LOCK)?       " lock"  : "",
+                (acc & NFSD_MAY_LOCK)?  " lock"  : "",
-                (acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "",
+                (acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
                inode->i_mode,
                IS_IMMUTABLE(inode)?    " immut" : "",
                IS_APPEND(inode)?       " append" : "",
@@ -1920,18 +1913,18 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
         * system.  But if it is IRIX doing check on write-access for a 
         * device special file, we ignore rofs.
         */
-        if (!(acc & MAY_LOCAL_ACCESS))
+        if (!(acc & NFSD_MAY_LOCAL_ACCESS))
-                if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
+                if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) {
                        if (exp_rdonly(rqstp, exp) ||
                            __mnt_is_readonly(exp->ex_path.mnt))
                                return nfserr_rofs;
-                        if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
+                        if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode))
                                return nfserr_perm;
                }
-        if ((acc & MAY_TRUNC) && IS_APPEND(inode))
+        if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
                return nfserr_perm;
-        if (acc & MAY_LOCK) {
+        if (acc & NFSD_MAY_LOCK) {
                /* If we cannot rely on authentication in NLM requests,
                 * just allow locks, otherwise require read permission, or
                 * ownership
@@ -1939,7 +1932,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
                if (exp->ex_flags & NFSEXP_NOAUTHNLM)
                        return 0;
                else
-                        acc = MAY_READ | MAY_OWNER_OVERRIDE;
+                        acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE;
        }
        /*
         * The file owner always gets access permission for accesses that
@@ -1955,16 +1948,17 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
         * We must trust the client to do permission checking - using "ACCESS"
         * with NFSv3.
         */
-        if ((acc & MAY_OWNER_OVERRIDE) &&
+        if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
            inode->i_uid == current->fsuid)
                return 0;
-        err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
+        /* This assumes  NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
+        err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC));
        /* Allow read access to binaries even when mode 111 */
        if (err == -EACCES && S_ISREG(inode->i_mode) &&
-            acc == (MAY_READ | MAY_OWNER_OVERRIDE))
+            acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
-                err = permission(inode, MAY_EXEC, NULL);
+                err = inode_permission(inode, MAY_EXEC);
        return err? nfserrno(err) : 0;
 }
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 3c5550cd11d6..d020866d4232 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2118,7 +2118,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
                goto out;
        if (!count)
                goto out;
-        err = remove_suid(file->f_path.dentry);
+        err = file_remove_suid(file);
        if (err)
                goto out;
        file_update_time(file);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 3e76f3b216bc..4a46743b5077 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3080,7 +3080,7 @@ struct kmem_cache *ntfs_inode_cache;
 struct kmem_cache *ntfs_big_inode_cache;
 /* Init once constructor for the inode slab cache. */
-static void ntfs_big_inode_init_once(struct kmem_cache *cachep, void *foo)
+static void ntfs_big_inode_init_once(void *foo)
 {
        ntfs_inode *ni = (ntfs_inode *)foo;
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 443d108211ab..7dce1612553e 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1489,31 +1489,22 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group
                : NULL;
 }
-static int o2hb_heartbeat_group_make_item(struct config_group *group,
+static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group,
-                                          const char *name,
+                                                          const char *name)
-                                          struct config_item **new_item)
 {
        struct o2hb_region *reg = NULL;
-        int ret = 0;
        reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL);
-        if (reg == NULL) {
+        if (reg == NULL)
-                ret = -ENOMEM;
+                return ERR_PTR(-ENOMEM);
-                goto out;
-        }
        config_item_init_type_name(&reg->hr_item, name, &o2hb_region_type);
-        *new_item = &reg->hr_item;
        spin_lock(&o2hb_live_lock);
        list_add_tail(&reg->hr_all_item, &o2hb_all_regions);
        spin_unlock(&o2hb_live_lock);
-out:
-        if (ret)
-                kfree(reg);
-        return ret;
+        return &reg->hr_item;
 }
 static void o2hb_heartbeat_group_drop_item(struct config_group *group,
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index b364b7052e46..816a3f61330c 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -644,35 +644,23 @@ out:
        return ret;
 }
-static int o2nm_node_group_make_item(struct config_group *group,
+static struct config_item *o2nm_node_group_make_item(struct config_group *group,
-                                     const char *name,
+                                                     const char *name)
-                                     struct config_item **new_item)
 {
        struct o2nm_node *node = NULL;
-        int ret = 0;
-        if (strlen(name) > O2NM_MAX_NAME_LEN) {
+        if (strlen(name) > O2NM_MAX_NAME_LEN)
-                ret = -ENAMETOOLONG;
+                return ERR_PTR(-ENAMETOOLONG);
-                goto out;
-        }
        node = kzalloc(sizeof(struct o2nm_node), GFP_KERNEL);
-        if (node == NULL) {
+        if (node == NULL)
-                ret = -ENOMEM;
+                return ERR_PTR(-ENOMEM);
-                goto out;
-        }
        strcpy(node->nd_name, name); /* use item.ci_namebuf instead? */
        config_item_init_type_name(&node->nd_item, name, &o2nm_node_type);
        spin_lock_init(&node->nd_lock);
-        *new_item = &node->nd_item;
+        return &node->nd_item;
-out:
-        if (ret)
-                kfree(node);
-        return ret;
 }
 static void o2nm_node_group_drop_item(struct config_group *group,
@@ -756,31 +744,25 @@ static struct o2nm_cluster_group *to_o2nm_cluster_group(struct config_group *gro
 }
 #endif
-static int o2nm_cluster_group_make_group(struct config_group *group,
+static struct config_group *o2nm_cluster_group_make_group(struct config_group *group,
-                                         const char *name,
+                                                          const char *name)
-                                         struct config_group **new_group)
 {
        struct o2nm_cluster *cluster = NULL;
        struct o2nm_node_group *ns = NULL;
-        struct config_group *o2hb_group = NULL;
+        struct config_group *o2hb_group = NULL, *ret = NULL;
        void *defs = NULL;
-        int ret = 0;
        /* this runs under the parent dir's i_mutex; there can be only
         * one caller in here at a time */
-        if (o2nm_single_cluster) {
+        if (o2nm_single_cluster)
-                ret = -ENOSPC;
+                return ERR_PTR(-ENOSPC);
-                goto out;
-        }
        cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL);
        ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL);
        defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
        o2hb_group = o2hb_alloc_hb_set();
-        if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL) {
+        if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL)
-                ret = -ENOMEM;
                goto out;
-        }
        config_group_init_type_name(&cluster->cl_group, name,
                                    &o2nm_cluster_type);
@@ -797,15 +779,16 @@ static int o2nm_cluster_group_make_group(struct config_group *group,
        cluster->cl_idle_timeout_ms    = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
        cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
-        *new_group = &cluster->cl_group;
+        ret = &cluster->cl_group;
        o2nm_single_cluster = cluster;
 out:
-        if (ret) {
+        if (ret == NULL) {
                kfree(cluster);
                kfree(ns);
                o2hb_free_hb_set(o2hb_group);
                kfree(defs);
+                ret = ERR_PTR(-ENOMEM);
        }
        return ret;
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index e48aba698b77..533a789c3ef8 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -267,8 +267,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
        return writelen;
 }
-static void dlmfs_init_once(struct kmem_cache *cachep,
+static void dlmfs_init_once(void *foo)
-                            void *foo)
 {
        struct dlmfs_inode_private *ip =
                (struct dlmfs_inode_private *) foo;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e8514e8b6ce8..be2dd95d3a1d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1176,7 +1176,7 @@ bail:
        return err;
 }
-int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
+int ocfs2_permission(struct inode *inode, int mask)
 {
        int ret;
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 048ddcaf5c80..1e27b4d017ea 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -62,8 +62,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
 int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
                  struct kstat *stat);
-int ocfs2_permission(struct inode *inode, int mask,
+int ocfs2_permission(struct inode *inode, int mask);
-                     struct nameidata *nd);
 int ocfs2_should_update_atime(struct inode *inode,
                              struct vfsmount *vfsmnt);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index ccecfe5094fa..2560b33889aa 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1118,7 +1118,7 @@ bail:
        return status;
 }
-static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data)
+static void ocfs2_inode_init_once(void *data)
 {
        struct ocfs2_inode_info *oi = data;
diff --git a/fs/omfs/Makefile b/fs/omfs/Makefile
new file mode 100644
index 000000000000..8b82b63f1129
--- /dev/null
+++ b/fs/omfs/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_OMFS_FS) += omfs.o
+omfs-y := bitmap.o dir.o file.o inode.o
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
new file mode 100644
index 000000000000..dc75f22be3f2
--- /dev/null
+++ b/fs/omfs/bitmap.c
@@ -0,0 +1,192 @@
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <asm/div64.h>
+#include "omfs.h"
+unsigned long omfs_count_free(struct super_block *sb)
+{
+        unsigned int i;
+        unsigned long sum = 0;
+        struct omfs_sb_info *sbi = OMFS_SB(sb);
+        int nbits = sb->s_blocksize * 8;
+        for (i = 0; i < sbi->s_imap_size; i++)
+                sum += nbits - bitmap_weight(sbi->s_imap[i], nbits);
+        return sum;
+}
+/*
+ *  Counts the run of zero bits starting at bit up to max.
+ *  It handles the case where a run might spill over a buffer.
+ *  Called with bitmap lock.
+ */
+static int count_run(unsigned long **addr, int nbits,
+                int addrlen, int bit, int max)
+{
+        int count = 0;
+        int x;
+        for (; addrlen > 0; addrlen--, addr++) {
+                x = find_next_bit(*addr, nbits, bit);
+                count += x - bit;
+                if (x < nbits || count > max)
+                        return min(count, max);
+                bit = 0;
+        }
+        return min(count, max);
+}
+/*
+ * Sets or clears the run of count bits starting with bit.
+ * Called with bitmap lock.
+ */
+static int set_run(struct super_block *sb, int map,
+                int nbits, int bit, int count, int set)
+{
+        int i;
+        int err;
+        struct buffer_head *bh;
+        struct omfs_sb_info *sbi = OMFS_SB(sb);
+        err = -ENOMEM;
+        bh = sb_bread(sb, clus_to_blk(sbi, sbi->s_bitmap_ino) + map);
+        if (!bh)
+                goto out;
+        for (i = 0; i < count; i++, bit++) {
+                if (bit >= nbits) {
+                        bit = 0;
+                        map++;
+                        mark_buffer_dirty(bh);
+                        brelse(bh);
+                        bh = sb_bread(sb,
+                                clus_to_blk(sbi, sbi->s_bitmap_ino) + map);
+                        if (!bh)
+                                goto out;
+                }
+                if (set) {
+                        set_bit(bit, sbi->s_imap[map]);
+                        set_bit(bit, (long *) bh->b_data);
+                } else {
+                        clear_bit(bit, sbi->s_imap[map]);
+                        clear_bit(bit, (long *) bh->b_data);
+                }
+        }
+        mark_buffer_dirty(bh);
+        brelse(bh);
+        err = 0;
+out:
+        return err;
+}
+/*
+ * Tries to allocate exactly one block.  Returns true if sucessful.
+ */
+int omfs_allocate_block(struct super_block *sb, u64 block)
+{
+        struct buffer_head *bh;
+        struct omfs_sb_info *sbi = OMFS_SB(sb);
+        int bits_per_entry = 8 * sb->s_blocksize;
+        int map, bit;
+        int ret = 0;
+        u64 tmp;
+        tmp = block;
+        bit = do_div(tmp, bits_per_entry);
+        map = tmp;
+        mutex_lock(&sbi->s_bitmap_lock);
+        if (map >= sbi->s_imap_size || test_and_set_bit(bit, sbi->s_imap[map]))
+                goto out;
+        if (sbi->s_bitmap_ino > 0) {
+                bh = sb_bread(sb, clus_to_blk(sbi, sbi->s_bitmap_ino) + map);
+                if (!bh)
+                        goto out;
+                set_bit(bit, (long *) bh->b_data);
+                mark_buffer_dirty(bh);
+                brelse(bh);
+        }
+        ret = 1;
+out:
+        mutex_unlock(&sbi->s_bitmap_lock);
+        return ret;
+}
+/*
+ *  Tries to allocate a set of blocks.  The request size depends on the
+ *  type: for inodes, we must allocate sbi->s_mirrors blocks, and for file
+ *  blocks, we try to allocate sbi->s_clustersize, but can always get away
+ *  with just one block.
+ */
+int omfs_allocate_range(struct super_block *sb,
+                        int min_request,
+                        int max_request,
+                        u64 *return_block,
+                        int *return_size)
+{
+        struct omfs_sb_info *sbi = OMFS_SB(sb);
+        int bits_per_entry = 8 * sb->s_blocksize;
+        int ret = 0;
+        int i, run, bit;
+        mutex_lock(&sbi->s_bitmap_lock);
+        for (i = 0; i < sbi->s_imap_size; i++) {
+                bit = 0;
+                while (bit < bits_per_entry) {
+                        bit = find_next_zero_bit(sbi->s_imap[i], bits_per_entry,
+                                bit);
+                        if (bit == bits_per_entry)
+                                break;
+                        run = count_run(&sbi->s_imap[i], bits_per_entry,
+                                sbi->s_imap_size-i, bit, max_request);
+                        if (run >= min_request)
+                                goto found;
+                        bit += run;
+                }
+        }
+        ret = -ENOSPC;
+        goto out;
+found:
+        *return_block = i * bits_per_entry + bit;
+        *return_size = run;
+        ret = set_run(sb, i, bits_per_entry, bit, run, 1);
+out:
+        mutex_unlock(&sbi->s_bitmap_lock);
+        return ret;
+}
+/*
+ * Clears count bits starting at a given block.
+ */
+int omfs_clear_range(struct super_block *sb, u64 block, int count)
+{
+        struct omfs_sb_info *sbi = OMFS_SB(sb);
+        int bits_per_entry = 8 * sb->s_blocksize;
+        u64 tmp;
+        int map, bit, ret;
+        tmp = block;
+        bit = do_div(tmp, bits_per_entry);
+        map = tmp;
+        if (map >= sbi->s_imap_size)
+                return 0;
+        mutex_lock(&sbi->s_bitmap_lock);
+        ret = set_run(sb, map, bits_per_entry, bit, count, 0);
+        mutex_unlock(&sbi->s_bitmap_lock);
+        return ret;
+}
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
new file mode 100644
index 000000000000..05a5bc31e4bd
--- /dev/null
+++ b/fs/omfs/dir.c
@@ -0,0 +1,504 @@
+/*
+ * OMFS (as used by RIO Karma) directory operations.
+ * Copyright (C) 2005 Bob Copeland <me@bobcopeland.com>
+ * Released under GPL v2.
+ */
+#include <linux/fs.h>
+#include <linux/ctype.h>
+#include <linux/buffer_head.h>
+#include "omfs.h"
+static int omfs_hash(const char *name, int namelen, int mod)
+{
+        int i, hash = 0;
+        for (i = 0; i < namelen; i++)
+                hash ^= tolower(name[i]) << (i % 24);
+        return hash % mod;
+}
+/*
+ * Finds the bucket for a given name and reads the containing block;
+ * *ofs is set to the offset of the first list entry.
+ */
+static struct buffer_head *omfs_get_bucket(struct inode *dir,
+                const char *name, int namelen, int *ofs)
+{
+        int nbuckets = (dir->i_size - OMFS_DIR_START)/8;
+        int block = clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino);
+        int bucket = omfs_hash(name, namelen, nbuckets);
+        *ofs = OMFS_DIR_START + bucket * 8;
+        return sb_bread(dir->i_sb, block);
+}
+static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block,
+                                const char *name, int namelen,
+                                u64 *prev_block)
+{
+        struct buffer_head *bh;
+        struct omfs_inode *oi;
+        int err = -ENOENT;
+        *prev_block = ~0;
+        while (block != ~0) {
+                bh = sb_bread(dir->i_sb,
+                        clus_to_blk(OMFS_SB(dir->i_sb), block));
+                if (!bh) {
+                        err = -EIO;
+                        goto err;
+                }
+                oi = (struct omfs_inode *) bh->b_data;
+                if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, block)) {
+                        brelse(bh);
+                        goto err;
+                }
+                if (strncmp(oi->i_name, name, namelen) == 0)
+                        return bh;
+                *prev_block = block;
+                block = be64_to_cpu(oi->i_sibling);
+                brelse(bh);
+        }
+err:
+        return ERR_PTR(err);
+}
+static struct buffer_head *omfs_find_entry(struct inode *dir,
+                                           const char *name, int namelen)
+{
+        struct buffer_head *bh;
+        int ofs;
+        u64 block, dummy;
+        bh = omfs_get_bucket(dir, name, namelen, &ofs);
+        if (!bh)
+                return ERR_PTR(-EIO);
+        block = be64_to_cpu(*((__be64 *) &bh->b_data[ofs]));
+        brelse(bh);
+        return omfs_scan_list(dir, block, name, namelen, &dummy);
+}
+int omfs_make_empty(struct inode *inode, struct super_block *sb)
+{
+        struct omfs_sb_info *sbi = OMFS_SB(sb);
+        int block = clus_to_blk(sbi, inode->i_ino);
+        struct buffer_head *bh;
+        struct omfs_inode *oi;
+        bh = sb_bread(sb, block);
+        if (!bh)
+                return -ENOMEM;
+        memset(bh->b_data, 0, sizeof(struct omfs_inode));
+        if (inode->i_mode & S_IFDIR) {
+                memset(&bh->b_data[OMFS_DIR_START], 0xff,
+                        sbi->s_sys_blocksize - OMFS_DIR_START);
+        } else
+                omfs_make_empty_table(bh, OMFS_EXTENT_START);
+        oi = (struct omfs_inode *) bh->b_data;
+        oi->i_head.h_self = cpu_to_be64(inode->i_ino);
+        oi->i_sibling = ~0ULL;
+        mark_buffer_dirty(bh);
+        brelse(bh);
+        return 0;
+}
+static int omfs_add_link(struct dentry *dentry, struct inode *inode)
+{
+        struct inode *dir = dentry->d_parent->d_inode;
+        const char *name = dentry->d_name.name;
+        int namelen = dentry->d_name.len;
+        struct omfs_inode *oi;
+        struct buffer_head *bh;
+        u64 block;
+        __be64 *entry;
+        int ofs;
+        /* just prepend to head of queue in proper bucket */
+        bh = omfs_get_bucket(dir, name, namelen, &ofs);
+        if (!bh)
+                goto out;
+        entry = (__be64 *) &bh->b_data[ofs];
+        block = be64_to_cpu(*entry);
+        *entry = cpu_to_be64(inode->i_ino);
+        mark_buffer_dirty(bh);
+        brelse(bh);
+        /* now set the sibling and parent pointers on the new inode */
+        bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), inode->i_ino));
+        if (!bh)
+                goto out;
+        oi = (struct omfs_inode *) bh->b_data;
+        memcpy(oi->i_name, name, namelen);
+        memset(oi->i_name + namelen, 0, OMFS_NAMELEN - namelen);
+        oi->i_sibling = cpu_to_be64(block);
+        oi->i_parent = cpu_to_be64(dir->i_ino);
+        mark_buffer_dirty(bh);
+        brelse(bh);
+        dir->i_ctime = CURRENT_TIME_SEC;
+        /* mark affected inodes dirty to rebuild checksums */
+        mark_inode_dirty(dir);
+        mark_inode_dirty(inode);
+        return 0;
+out:
+        return -ENOMEM;
+}
+static int omfs_delete_entry(struct dentry *dentry)
+{
+        struct inode *dir = dentry->d_parent->d_inode;
+        struct inode *dirty;
+        const char *name = dentry->d_name.name;
+        int namelen = dentry->d_name.len;
+        struct omfs_inode *oi;
+        struct buffer_head *bh, *bh2;
+        __be64 *entry, next;
+        u64 block, prev;
+        int ofs;
+        int err = -ENOMEM;
+        /* delete the proper node in the bucket's linked list */
+        bh = omfs_get_bucket(dir, name, namelen, &ofs);
+        if (!bh)
+                goto out;
+        entry = (__be64 *) &bh->b_data[ofs];
+        block = be64_to_cpu(*entry);
+        bh2 = omfs_scan_list(dir, block, name, namelen, &prev);
+        if (IS_ERR(bh2)) {
+                err = PTR_ERR(bh2);
+                goto out_free_bh;
+        }
+        oi = (struct omfs_inode *) bh2->b_data;
+        next = oi->i_sibling;
+        brelse(bh2);
+        if (prev != ~0) {
+                /* found in middle of list, get list ptr */
+                brelse(bh);
+                bh = sb_bread(dir->i_sb,
+                        clus_to_blk(OMFS_SB(dir->i_sb), prev));
+                if (!bh)
+                        goto out;
+                oi = (struct omfs_inode *) bh->b_data;
+                entry = &oi->i_sibling;
+        }
+        *entry = next;
+        mark_buffer_dirty(bh);
+        if (prev != ~0) {
+                dirty = omfs_iget(dir->i_sb, prev);
+                if (!IS_ERR(dirty)) {
+                        mark_inode_dirty(dirty);
+                        iput(dirty);
+                }
+        }
+        err = 0;
+out_free_bh:
+        brelse(bh);
+out:
+        return err;
+}
+static int omfs_dir_is_empty(struct inode *inode)
+{
+        int nbuckets = (inode->i_size - OMFS_DIR_START) / 8;
+        struct buffer_head *bh;
+        u64 *ptr;
+        int i;
+        bh = sb_bread(inode->i_sb, clus_to_blk(OMFS_SB(inode->i_sb),
+                        inode->i_ino));
+        if (!bh)
+                return 0;
+        ptr = (u64 *) &bh->b_data[OMFS_DIR_START];
+        for (i = 0; i < nbuckets; i++, ptr++)
+                if (*ptr != ~0)
+                        break;
+        brelse(bh);
+        return *ptr != ~0;
+}
+static int omfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+        int ret;
+        struct inode *inode = dentry->d_inode;
+        ret = omfs_delete_entry(dentry);
+        if (ret)
+                goto end_unlink;
+        inode_dec_link_count(inode);
+        mark_inode_dirty(dir);
+end_unlink:
+        return ret;
+}
+static int omfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+        int err = -ENOTEMPTY;
+        struct inode *inode = dentry->d_inode;
+        if (omfs_dir_is_empty(inode)) {
+                err = omfs_unlink(dir, dentry);
+                if (!err)
+                        inode_dec_link_count(inode);
+        }
+        return err;
+}
+static int omfs_add_node(struct inode *dir, struct dentry *dentry, int mode)
+{
+        int err;
+        struct inode *inode = omfs_new_inode(dir, mode);
+        if (IS_ERR(inode))
+                return PTR_ERR(inode);
+        err = omfs_make_empty(inode, dir->i_sb);
+        if (err)
+                goto out_free_inode;
+        err = omfs_add_link(dentry, inode);
+        if (err)
+                goto out_free_inode;
+        d_instantiate(dentry, inode);
+        return 0;
+out_free_inode:
+        iput(inode);
+        return err;
+}
+static int omfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+        return omfs_add_node(dir, dentry, mode | S_IFDIR);
+}
+static int omfs_create(struct inode *dir, struct dentry *dentry, int mode,
+                struct nameidata *nd)
+{
+        return omfs_add_node(dir, dentry, mode | S_IFREG);
+}
+static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry,
+                                  struct nameidata *nd)
+{
+        struct buffer_head *bh;
+        struct inode *inode = NULL;
+        if (dentry->d_name.len > OMFS_NAMELEN)
+                return ERR_PTR(-ENAMETOOLONG);
+        bh = omfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len);
+        if (!IS_ERR(bh)) {
+                struct omfs_inode *oi = (struct omfs_inode *)bh->b_data;
+                ino_t ino = be64_to_cpu(oi->i_head.h_self);
+                brelse(bh);
+                inode = omfs_iget(dir->i_sb, ino);
+                if (IS_ERR(inode))
+                        return ERR_CAST(inode);
+        }
+        d_add(dentry, inode);
+        return NULL;
+}
+/* sanity check block's self pointer */
+int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
+        u64 fsblock)
+{
+        int is_bad;
+        u64 ino = be64_to_cpu(header->h_self);
+        is_bad = ((ino != fsblock) || (ino < sbi->s_root_ino) ||
+                (ino > sbi->s_num_blocks));
+        if (is_bad)
+                printk(KERN_WARNING "omfs: bad hash chain detected\n");
+        return is_bad;
+}
+static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir,
+                u64 fsblock, int hindex)
+{
+        struct inode *dir = filp->f_dentry->d_inode;
+        struct buffer_head *bh;
+        struct omfs_inode *oi;
+        u64 self;
+        int res = 0;
+        unsigned char d_type;
+        /* follow chain in this bucket */
+        while (fsblock != ~0) {
+                bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb),
+                                fsblock));
+                if (!bh)
+                        goto out;
+                oi = (struct omfs_inode *) bh->b_data;
+                if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, fsblock)) {
+                        brelse(bh);
+                        goto out;
+                }
+                self = fsblock;
+                fsblock = be64_to_cpu(oi->i_sibling);
+                /* skip visited nodes */
+                if (hindex) {
+                        hindex--;
+                        brelse(bh);
+                        continue;
+                }
+                d_type = (oi->i_type == OMFS_DIR) ? DT_DIR : DT_REG;
+                res = filldir(dirent, oi->i_name, strnlen(oi->i_name,
+                        OMFS_NAMELEN), filp->f_pos, self, d_type);
+                if (res == 0)
+                        filp->f_pos++;
+                brelse(bh);
+        }
+out:
+        return res;
+}
+static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+                struct inode *new_dir, struct dentry *new_dentry)
+{
+        struct inode *new_inode = new_dentry->d_inode;
+        struct inode *old_inode = old_dentry->d_inode;
+        struct buffer_head *bh;
+        int is_dir;
+        int err;
+        is_dir = S_ISDIR(old_inode->i_mode);
+        if (new_inode) {
+                /* overwriting existing file/dir */
+                err = -ENOTEMPTY;
+                if (is_dir && !omfs_dir_is_empty(new_inode))
+                        goto out;
+                err = -ENOENT;
+                bh = omfs_find_entry(new_dir, new_dentry->d_name.name,
+                        new_dentry->d_name.len);
+                if (IS_ERR(bh))
+                        goto out;
+                brelse(bh);
+                err = omfs_unlink(new_dir, new_dentry);
+                if (err)
+                        goto out;
+        }
+        /* since omfs locates files by name, we need to unlink _before_
+         * adding the new link or we won't find the old one */
+        inode_inc_link_count(old_inode);
+        err = omfs_unlink(old_dir, old_dentry);
+        if (err) {
+                inode_dec_link_count(old_inode);
+                goto out;
+        }
+        err = omfs_add_link(new_dentry, old_inode);
+        if (err)
+                goto out;
+        old_inode->i_ctime = CURRENT_TIME_SEC;
+out:
+        return err;
+}
+static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+        struct inode *dir = filp->f_dentry->d_inode;
+        struct buffer_head *bh;
+        loff_t offset, res;
+        unsigned int hchain, hindex;
+        int nbuckets;
+        u64 fsblock;
+        int ret = -EINVAL;
+        if (filp->f_pos >> 32)
+                goto success;
+        switch ((unsigned long) filp->f_pos) {
+        case 0:
+                if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0)
+                        goto success;
+                filp->f_pos++;
+                /* fall through */
+        case 1:
+                if (filldir(dirent, "..", 2, 1,
+                    parent_ino(filp->f_dentry), DT_DIR) < 0)
+                        goto success;
+                filp->f_pos = 1 << 20;
+                /* fall through */
+        }
+        nbuckets = (dir->i_size - OMFS_DIR_START) / 8;
+        /* high 12 bits store bucket + 1 and low 20 bits store hash index */
+        hchain = (filp->f_pos >> 20) - 1;
+        hindex = filp->f_pos & 0xfffff;
+        bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino));
+        if (!bh)
+                goto out;
+        offset = OMFS_DIR_START + hchain * 8;
+        for (; hchain < nbuckets; hchain++, offset += 8) {
+                fsblock = be64_to_cpu(*((__be64 *) &bh->b_data[offset]));
+                res = omfs_fill_chain(filp, dirent, filldir, fsblock, hindex);
+                hindex = 0;
+                if (res < 0)
+                        break;
+                filp->f_pos = (hchain+2) << 20;
+        }
+        brelse(bh);
+success:
+        ret = 0;
+out:
+        return ret;
+}
+struct inode_operations omfs_dir_inops = {
+        .lookup = omfs_lookup,
+        .mkdir = omfs_mkdir,
+        .rename = omfs_rename,
+        .create = omfs_create,
+        .unlink = omfs_unlink,
+        .rmdir = omfs_rmdir,
+};
+struct file_operations omfs_dir_operations = {
+        .read = generic_read_dir,
+        .readdir = omfs_readdir,
+};
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
new file mode 100644
index 000000000000..66e01fae4384
--- /dev/null
+++ b/fs/omfs/file.c
@@ -0,0 +1,346 @@
+/*
+ * OMFS (as used by RIO Karma) file operations.
+ * Copyright (C) 2005 Bob Copeland <me@bobcopeland.com>
+ * Released under GPL v2.
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include "omfs.h"
+static int omfs_sync_file(struct file *file, struct dentry *dentry,
+                int datasync)
+{
+        struct inode *inode = dentry->d_inode;
+        int err;
+        err = sync_mapping_buffers(inode->i_mapping);
+        if (!(inode->i_state & I_DIRTY))
+                return err;
+        if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+                return err;
+        err |= omfs_sync_inode(inode);
+        return err ? -EIO : 0;
+}
+void omfs_make_empty_table(struct buffer_head *bh, int offset)
+{
+        struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset];
+        oe->e_next = ~0ULL;
+        oe->e_extent_count = cpu_to_be32(1),
+        oe->e_fill = cpu_to_be32(0x22),
+        oe->e_entry.e_cluster = ~0ULL;
+        oe->e_entry.e_blocks = ~0ULL;
+}
+int omfs_shrink_inode(struct inode *inode)
+{
+        struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
+        struct omfs_extent *oe;
+        struct omfs_extent_entry *entry;
+        struct buffer_head *bh;
+        u64 next, last;
+        u32 extent_count;
+        int ret;
+        /* traverse extent table, freeing each entry that is greater
+         * than inode->i_size;
+         */
+        next = inode->i_ino;
+        /* only support truncate -> 0 for now */
+        ret = -EIO;
+        if (inode->i_size != 0)
+                goto out;
+        bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
+        if (!bh)
+                goto out;
+        oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
+        for (;;) {
+                if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) {
+                        brelse(bh);
+                        goto out;
+                }
+                extent_count = be32_to_cpu(oe->e_extent_count);
+                last = next;
+                next = be64_to_cpu(oe->e_next);
+                entry = &oe->e_entry;
+                /* ignore last entry as it is the terminator */
+                for (; extent_count > 1; extent_count--) {
+                        u64 start, count;
+                        start = be64_to_cpu(entry->e_cluster);
+                        count = be64_to_cpu(entry->e_blocks);
+                        omfs_clear_range(inode->i_sb, start, (int) count);
+                        entry++;
+                }
+                omfs_make_empty_table(bh, (char *) oe - bh->b_data);
+                mark_buffer_dirty(bh);
+                brelse(bh);
+                if (last != inode->i_ino)
+                        omfs_clear_range(inode->i_sb, last, sbi->s_mirrors);
+                if (next == ~0)
+                        break;
+                bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
+                if (!bh)
+                        goto out;
+                oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
+        }
+        ret = 0;
+out:
+        return ret;
+}
+static void omfs_truncate(struct inode *inode)
+{
+        omfs_shrink_inode(inode);
+        mark_inode_dirty(inode);
+}
+/*
+ * Add new blocks to the current extent, or create new entries/continuations
+ * as necessary.
+ */
+static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe,
+                        u64 *ret_block)
+{
+        struct omfs_extent_entry *terminator;
+        struct omfs_extent_entry *entry = &oe->e_entry;
+        struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
+        u32 extent_count = be32_to_cpu(oe->e_extent_count);
+        u64 new_block = 0;
+        u32 max_count;
+        int new_count;
+        int ret = 0;
+        /* reached the end of the extent table with no blocks mapped.
+         * there are three possibilities for adding: grow last extent,
+         * add a new extent to the current extent table, and add a
+         * continuation inode.  in last two cases need an allocator for
+         * sbi->s_cluster_size
+         */
+        /* TODO: handle holes */
+        /* should always have a terminator */
+        if (extent_count < 1)
+                return -EIO;
+        /* trivially grow current extent, if next block is not taken */
+        terminator = entry + extent_count - 1;
+        if (extent_count > 1) {
+                entry = terminator-1;
+                new_block = be64_to_cpu(entry->e_cluster) +
+                        be64_to_cpu(entry->e_blocks);
+                if (omfs_allocate_block(inode->i_sb, new_block)) {
+                        entry->e_blocks =
+                                cpu_to_be64(be64_to_cpu(entry->e_blocks) + 1);
+                        terminator->e_blocks = ~(cpu_to_be64(
+                                be64_to_cpu(~terminator->e_blocks) + 1));
+                        goto out;
+                }
+        }
+        max_count = (sbi->s_sys_blocksize - OMFS_EXTENT_START -
+                sizeof(struct omfs_extent)) /
+                sizeof(struct omfs_extent_entry) + 1;
+        /* TODO: add a continuation block here */
+        if (be32_to_cpu(oe->e_extent_count) > max_count-1)
+                return -EIO;
+        /* try to allocate a new cluster */
+        ret = omfs_allocate_range(inode->i_sb, 1, sbi->s_clustersize,
+                &new_block, &new_count);
+        if (ret)
+                goto out_fail;
+        /* copy terminator down an entry */
+        entry = terminator;
+        terminator++;
+        memcpy(terminator, entry, sizeof(struct omfs_extent_entry));
+        entry->e_cluster = cpu_to_be64(new_block);
+        entry->e_blocks = cpu_to_be64((u64) new_count);
+        terminator->e_blocks = ~(cpu_to_be64(
+                be64_to_cpu(~terminator->e_blocks) + (u64) new_count));
+        /* write in new entry */
+        oe->e_extent_count = cpu_to_be32(1 + be32_to_cpu(oe->e_extent_count));
+out:
+        *ret_block = new_block;
+out_fail:
+        return ret;
+}
+/*
+ * Scans across the directory table for a given file block number.
+ * If block not found, return 0.
+ */
+static sector_t find_block(struct inode *inode, struct omfs_extent_entry *ent,
+                        sector_t block, int count, int *left)
+{
+        /* count > 1 because of terminator */
+        sector_t searched = 0;
+        for (; count > 1; count--) {
+                int numblocks = clus_to_blk(OMFS_SB(inode->i_sb),
+                        be64_to_cpu(ent->e_blocks));
+                if (block >= searched  &&
+                    block < searched + numblocks) {
+                        /*
+                         * found it at cluster + (block - searched)
+                         * numblocks - (block - searched) is remainder
+                         */
+                        *left = numblocks - (block - searched);
+                        return clus_to_blk(OMFS_SB(inode->i_sb),
+                                be64_to_cpu(ent->e_cluster)) +
+                                block - searched;
+                }
+                searched += numblocks;
+                ent++;
+        }
+        return 0;
+}
+static int omfs_get_block(struct inode *inode, sector_t block,
+                          struct buffer_head *bh_result, int create)
+{
+        struct buffer_head *bh;
+        sector_t next, offset;
+        int ret;
+        u64 new_block;
+        int extent_count;
+        struct omfs_extent *oe;
+        struct omfs_extent_entry *entry;
+        struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
+        int max_blocks = bh_result->b_size >> inode->i_blkbits;
+        int remain;
+        ret = -EIO;
+        bh = sb_bread(inode->i_sb, clus_to_blk(sbi, inode->i_ino));
+        if (!bh)
+                goto out;
+        oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
+        next = inode->i_ino;
+        for (;;) {
+                if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next))
+                        goto out_brelse;
+                extent_count = be32_to_cpu(oe->e_extent_count);
+                next = be64_to_cpu(oe->e_next);
+                entry = &oe->e_entry;
+                offset = find_block(inode, entry, block, extent_count, &remain);
+                if (offset > 0) {
+                        ret = 0;
+                        map_bh(bh_result, inode->i_sb, offset);
+                        if (remain > max_blocks)
+                                remain = max_blocks;
+                        bh_result->b_size = (remain << inode->i_blkbits);
+                        goto out_brelse;
+                }
+                if (next == ~0)
+                        break;
+                brelse(bh);
+                bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
+                if (!bh)
+                        goto out;
+                oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
+        }
+        if (create) {
+                ret = omfs_grow_extent(inode, oe, &new_block);
+                if (ret == 0) {
+                        mark_buffer_dirty(bh);
+                        mark_inode_dirty(inode);
+                        map_bh(bh_result, inode->i_sb,
+                                        clus_to_blk(sbi, new_block));
+                }
+        }
+out_brelse:
+        brelse(bh);
+out:
+        return ret;
+}
+static int omfs_readpage(struct file *file, struct page *page)
+{
+        return block_read_full_page(page, omfs_get_block);
+}
+static int omfs_readpages(struct file *file, struct address_space *mapping,
+                struct list_head *pages, unsigned nr_pages)
+{
+        return mpage_readpages(mapping, pages, nr_pages, omfs_get_block);
+}
+static int omfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+        return block_write_full_page(page, omfs_get_block, wbc);
+}
+static int
+omfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+        return mpage_writepages(mapping, wbc, omfs_get_block);
+}
+static int omfs_write_begin(struct file *file, struct address_space *mapping,
+                        loff_t pos, unsigned len, unsigned flags,
+                        struct page **pagep, void **fsdata)
+{
+        *pagep = NULL;
+        return block_write_begin(file, mapping, pos, len, flags,
+                                pagep, fsdata, omfs_get_block);
+}
+static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
+{
+        return generic_block_bmap(mapping, block, omfs_get_block);
+}
+struct file_operations omfs_file_operations = {
+        .llseek = generic_file_llseek,
+        .read = do_sync_read,
+        .write = do_sync_write,
+        .aio_read = generic_file_aio_read,
+        .aio_write = generic_file_aio_write,
+        .mmap = generic_file_mmap,
+        .fsync = omfs_sync_file,
+        .splice_read = generic_file_splice_read,
+};
+struct inode_operations omfs_file_inops = {
+        .truncate = omfs_truncate
+};
+struct address_space_operations omfs_aops = {
+        .readpage = omfs_readpage,
+        .readpages = omfs_readpages,
+        .writepage = omfs_writepage,
+        .writepages = omfs_writepages,
+        .sync_page = block_sync_page,
+        .write_begin = omfs_write_begin,
+        .write_end = generic_write_end,
+        .bmap = omfs_bmap,
+};
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
new file mode 100644
index 000000000000..d865f5535436
--- /dev/null
+++ b/fs/omfs/inode.c
@@ -0,0 +1,553 @@
+/*
+ * Optimized MPEG FS - inode and super operations.
+ * Copyright (C) 2006 Bob Copeland <me@bobcopeland.com>
+ * Released under GPL v2.
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/parser.h>
+#include <linux/buffer_head.h>
+#include <linux/vmalloc.h>
+#include <linux/crc-itu-t.h>
+#include "omfs.h"
+MODULE_AUTHOR("Bob Copeland <me@bobcopeland.com>");
+MODULE_DESCRIPTION("OMFS (ReplayTV/Karma) Filesystem for Linux");
+MODULE_LICENSE("GPL");
+struct inode *omfs_new_inode(struct inode *dir, int mode)
+{
+        struct inode *inode;
+        u64 new_block;
+        int err;
+        int len;
+        struct omfs_sb_info *sbi = OMFS_SB(dir->i_sb);
+        inode = new_inode(dir->i_sb);
+        if (!inode)
+                return ERR_PTR(-ENOMEM);
+        err = omfs_allocate_range(dir->i_sb, sbi->s_mirrors, sbi->s_mirrors,
+                        &new_block, &len);
+        if (err)
+                goto fail;
+        inode->i_ino = new_block;
+        inode->i_mode = mode;
+        inode->i_uid = current->fsuid;
+        inode->i_gid = current->fsgid;
+        inode->i_blocks = 0;
+        inode->i_mapping->a_ops = &omfs_aops;
+        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+        switch (mode & S_IFMT) {
+        case S_IFDIR:
+                inode->i_op = &omfs_dir_inops;
+                inode->i_fop = &omfs_dir_operations;
+                inode->i_size = sbi->s_sys_blocksize;
+                inc_nlink(inode);
+                break;
+        case S_IFREG:
+                inode->i_op = &omfs_file_inops;
+                inode->i_fop = &omfs_file_operations;
+                inode->i_size = 0;
+                break;
+        }
+        insert_inode_hash(inode);
+        mark_inode_dirty(inode);
+        return inode;
+fail:
+        make_bad_inode(inode);
+        iput(inode);
+        return ERR_PTR(err);
+}
+/*
+ * Update the header checksums for a dirty inode based on its contents.
+ * Caller is expected to hold the buffer head underlying oi and mark it
+ * dirty.
+ */
+static void omfs_update_checksums(struct omfs_inode *oi)
+{
+        int xor, i, ofs = 0, count;
+        u16 crc = 0;
+        unsigned char *ptr = (unsigned char *) oi;
+        count = be32_to_cpu(oi->i_head.h_body_size);
+        ofs = sizeof(struct omfs_header);
+        crc = crc_itu_t(crc, ptr + ofs, count);
+        oi->i_head.h_crc = cpu_to_be16(crc);
+        xor = ptr[0];
+        for (i = 1; i < OMFS_XOR_COUNT; i++)
+                xor ^= ptr[i];
+        oi->i_head.h_check_xor = xor;
+}
+static int omfs_write_inode(struct inode *inode, int wait)
+{
+        struct omfs_inode *oi;
+        struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
+        struct buffer_head *bh, *bh2;
+        unsigned int block;
+        u64 ctime;
+        int i;
+        int ret = -EIO;
+        int sync_failed = 0;
+        /* get current inode since we may have written sibling ptrs etc. */
+        block = clus_to_blk(sbi, inode->i_ino);
+        bh = sb_bread(inode->i_sb, block);
+        if (!bh)
+                goto out;
+        oi = (struct omfs_inode *) bh->b_data;
+        oi->i_head.h_self = cpu_to_be64(inode->i_ino);
+        if (S_ISDIR(inode->i_mode))
+                oi->i_type = OMFS_DIR;
+        else if (S_ISREG(inode->i_mode))
+                oi->i_type = OMFS_FILE;
+        else {
+                printk(KERN_WARNING "omfs: unknown file type: %d\n",
+                        inode->i_mode);
+                goto out_brelse;
+        }
+        oi->i_head.h_body_size = cpu_to_be32(sbi->s_sys_blocksize -
+                sizeof(struct omfs_header));
+        oi->i_head.h_version = 1;
+        oi->i_head.h_type = OMFS_INODE_NORMAL;
+        oi->i_head.h_magic = OMFS_IMAGIC;
+        oi->i_size = cpu_to_be64(inode->i_size);
+        ctime = inode->i_ctime.tv_sec * 1000LL +
+                ((inode->i_ctime.tv_nsec + 999)/1000);
+        oi->i_ctime = cpu_to_be64(ctime);
+        omfs_update_checksums(oi);
+        mark_buffer_dirty(bh);
+        if (wait) {
+                sync_dirty_buffer(bh);
+                if (buffer_req(bh) && !buffer_uptodate(bh))
+                        sync_failed = 1;
+        }
+        /* if mirroring writes, copy to next fsblock */
+        for (i = 1; i < sbi->s_mirrors; i++) {
+                bh2 = sb_bread(inode->i_sb, block + i *
+                        (sbi->s_blocksize / sbi->s_sys_blocksize));
+                if (!bh2)
+                        goto out_brelse;
+                memcpy(bh2->b_data, bh->b_data, bh->b_size);
+                mark_buffer_dirty(bh2);
+                if (wait) {
+                        sync_dirty_buffer(bh2);
+                        if (buffer_req(bh2) && !buffer_uptodate(bh2))
+                                sync_failed = 1;
+                }
+                brelse(bh2);
+        }
+        ret = (sync_failed) ? -EIO : 0;
+out_brelse:
+        brelse(bh);
+out:
+        return ret;
+}
+int omfs_sync_inode(struct inode *inode)
+{
+        return omfs_write_inode(inode, 1);
+}
+/*
+ * called when an entry is deleted, need to clear the bits in the
+ * bitmaps.
+ */
+static void omfs_delete_inode(struct inode *inode)
+{
+        truncate_inode_pages(&inode->i_data, 0);
+        if (S_ISREG(inode->i_mode)) {
+                inode->i_size = 0;
+                omfs_shrink_inode(inode);
+        }
+        omfs_clear_range(inode->i_sb, inode->i_ino, 2);
+        clear_inode(inode);
+}
+struct inode *omfs_iget(struct super_block *sb, ino_t ino)
+{
+        struct omfs_sb_info *sbi = OMFS_SB(sb);
+        struct omfs_inode *oi;
+        struct buffer_head *bh;
+        unsigned int block;
+        u64 ctime;
+        unsigned long nsecs;
+        struct inode *inode;
+        inode = iget_locked(sb, ino);
+        if (!inode)
+                return ERR_PTR(-ENOMEM);
+        if (!(inode->i_state & I_NEW))
+                return inode;
+        block = clus_to_blk(sbi, ino);
+        bh = sb_bread(inode->i_sb, block);
+        if (!bh)
+                goto iget_failed;
+        oi = (struct omfs_inode *)bh->b_data;
+        /* check self */
+        if (ino != be64_to_cpu(oi->i_head.h_self))
+                goto fail_bh;
+        inode->i_uid = sbi->s_uid;
+        inode->i_gid = sbi->s_gid;
+        ctime = be64_to_cpu(oi->i_ctime);
+        nsecs = do_div(ctime, 1000) * 1000L;
+        inode->i_atime.tv_sec = ctime;
+        inode->i_mtime.tv_sec = ctime;
+        inode->i_ctime.tv_sec = ctime;
+        inode->i_atime.tv_nsec = nsecs;
+        inode->i_mtime.tv_nsec = nsecs;
+        inode->i_ctime.tv_nsec = nsecs;
+        inode->i_mapping->a_ops = &omfs_aops;
+        switch (oi->i_type) {
+        case OMFS_DIR:
+                inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask);
+                inode->i_op = &omfs_dir_inops;
+                inode->i_fop = &omfs_dir_operations;
+                inode->i_size = be32_to_cpu(oi->i_head.h_body_size) +
+                        sizeof(struct omfs_header);
+                inc_nlink(inode);
+                break;
+        case OMFS_FILE:
+                inode->i_mode = S_IFREG | (S_IRWXUGO & ~sbi->s_fmask);
+                inode->i_fop = &omfs_file_operations;
+                inode->i_size = be64_to_cpu(oi->i_size);
+                break;
+        }
+        brelse(bh);
+        unlock_new_inode(inode);
+        return inode;
+fail_bh:
+        brelse(bh);
+iget_failed:
+        iget_failed(inode);
+        return ERR_PTR(-EIO);
+}
+static void omfs_put_super(struct super_block *sb)
+{
+        struct omfs_sb_info *sbi = OMFS_SB(sb);
+        kfree(sbi->s_imap);
+        kfree(sbi);
+        sb->s_fs_info = NULL;
+}
+static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+        struct super_block *s = dentry->d_sb;
+        struct omfs_sb_info *sbi = OMFS_SB(s);
+        buf->f_type = OMFS_MAGIC;
+        buf->f_bsize = sbi->s_blocksize;
+        buf->f_blocks = sbi->s_num_blocks;
+        buf->f_files = sbi->s_num_blocks;
+        buf->f_namelen = OMFS_NAMELEN;
+        buf->f_bfree = buf->f_bavail = buf->f_ffree =
+                omfs_count_free(s);
+        return 0;
+}
+static struct super_operations omfs_sops = {
+        .write_inode    = omfs_write_inode,
+        .delete_inode   = omfs_delete_inode,
+        .put_super      = omfs_put_super,
+        .statfs         = omfs_statfs,
+        .show_options   = generic_show_options,
+};
+/*
+ * For Rio Karma, there is an on-disk free bitmap whose location is
+ * stored in the root block.  For ReplayTV, there is no such free bitmap
+ * so we have to walk the tree.  Both inodes and file data are allocated
+ * from the same map.  This array can be big (300k) so we allocate
+ * in units of the blocksize.
+ */
+static int omfs_get_imap(struct super_block *sb)
+{
+        int bitmap_size;
+        int array_size;
+        int count;
+        struct omfs_sb_info *sbi = OMFS_SB(sb);
+        struct buffer_head *bh;
+        unsigned long **ptr;
+        sector_t block;
+        bitmap_size = DIV_ROUND_UP(sbi->s_num_blocks, 8);
+        array_size = DIV_ROUND_UP(bitmap_size, sb->s_blocksize);
+        if (sbi->s_bitmap_ino == ~0ULL)
+                goto out;
+        sbi->s_imap_size = array_size;
+        sbi->s_imap = kzalloc(array_size * sizeof(unsigned long *), GFP_KERNEL);
+        if (!sbi->s_imap)
+                goto nomem;
+        block = clus_to_blk(sbi, sbi->s_bitmap_ino);
+        ptr = sbi->s_imap;
+        for (count = bitmap_size; count > 0; count -= sb->s_blocksize) {
+                bh = sb_bread(sb, block++);
+                if (!bh)
+                        goto nomem_free;
+                *ptr = kmalloc(sb->s_blocksize, GFP_KERNEL);
+                if (!*ptr) {
+                        brelse(bh);
+                        goto nomem_free;
+                }
+                memcpy(*ptr, bh->b_data, sb->s_blocksize);
+                if (count < sb->s_blocksize)
+                        memset((void *)*ptr + count, 0xff,
+                                sb->s_blocksize - count);
+                brelse(bh);
+                ptr++;
+        }
+out:
+        return 0;
+nomem_free:
+        for (count = 0; count < array_size; count++)
+                kfree(sbi->s_imap[count]);
+        kfree(sbi->s_imap);
+nomem:
+        sbi->s_imap = NULL;
+        sbi->s_imap_size = 0;
+        return -ENOMEM;
+}
+enum {
+        Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask
+};
+static match_table_t tokens = {
+        {Opt_uid, "uid=%u"},
+        {Opt_gid, "gid=%u"},
+        {Opt_umask, "umask=%o"},
+        {Opt_dmask, "dmask=%o"},
+        {Opt_fmask, "fmask=%o"},
+};
+static int parse_options(char *options, struct omfs_sb_info *sbi)
+{
+        char *p;
+        substring_t args[MAX_OPT_ARGS];
+        int option;
+        if (!options)
+                return 1;
+        while ((p = strsep(&options, ",")) != NULL) {
+                int token;
+                if (!*p)
+                        continue;
+                token = match_token(p, tokens, args);
+                switch (token) {
+                case Opt_uid:
+                        if (match_int(&args[0], &option))
+                                return 0;
+                        sbi->s_uid = option;
+                        break;
+                case Opt_gid:
+                        if (match_int(&args[0], &option))
+                                return 0;
+                        sbi->s_gid = option;
+                        break;
+                case Opt_umask:
+                        if (match_octal(&args[0], &option))
+                                return 0;
+                        sbi->s_fmask = sbi->s_dmask = option;
+                        break;
+                case Opt_dmask:
+                        if (match_octal(&args[0], &option))
+                                return 0;
+                        sbi->s_dmask = option;
+                        break;
+                case Opt_fmask:
+                        if (match_octal(&args[0], &option))
+                                return 0;
+                        sbi->s_fmask = option;
+                        break;
+                default:
+                        return 0;
+                }
+        }
+        return 1;
+}
+static int omfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+        struct buffer_head *bh, *bh2;
+        struct omfs_super_block *omfs_sb;
+        struct omfs_root_block *omfs_rb;
+        struct omfs_sb_info *sbi;
+        struct inode *root;
+        sector_t start;
+        int ret = -EINVAL;
+        save_mount_options(sb, (char *) data);
+        sbi = kzalloc(sizeof(struct omfs_sb_info), GFP_KERNEL);
+        if (!sbi)
+                return -ENOMEM;
+        sb->s_fs_info = sbi;
+        sbi->s_uid = current->uid;
+        sbi->s_gid = current->gid;
+        sbi->s_dmask = sbi->s_fmask = current->fs->umask;
+        if (!parse_options((char *) data, sbi))
+                goto end;
+        sb->s_maxbytes = 0xffffffff;
+        sb_set_blocksize(sb, 0x200);
+        bh = sb_bread(sb, 0);
+        if (!bh)
+                goto end;
+        omfs_sb = (struct omfs_super_block *)bh->b_data;
+        if (omfs_sb->s_magic != cpu_to_be32(OMFS_MAGIC)) {
+                if (!silent)
+                        printk(KERN_ERR "omfs: Invalid superblock (%x)\n",
+                                   omfs_sb->s_magic);
+                goto out_brelse_bh;
+        }
+        sb->s_magic = OMFS_MAGIC;
+        sbi->s_num_blocks = be64_to_cpu(omfs_sb->s_num_blocks);
+        sbi->s_blocksize = be32_to_cpu(omfs_sb->s_blocksize);
+        sbi->s_mirrors = be32_to_cpu(omfs_sb->s_mirrors);
+        sbi->s_root_ino = be64_to_cpu(omfs_sb->s_root_block);
+        sbi->s_sys_blocksize = be32_to_cpu(omfs_sb->s_sys_blocksize);
+        mutex_init(&sbi->s_bitmap_lock);
+        if (sbi->s_sys_blocksize > PAGE_SIZE) {
+                printk(KERN_ERR "omfs: sysblock size (%d) is out of range\n",
+                        sbi->s_sys_blocksize);
+                goto out_brelse_bh;
+        }
+        if (sbi->s_blocksize < sbi->s_sys_blocksize ||
+            sbi->s_blocksize > OMFS_MAX_BLOCK_SIZE) {
+                printk(KERN_ERR "omfs: block size (%d) is out of range\n",
+                        sbi->s_blocksize);
+                goto out_brelse_bh;
+        }
+        /*
+         * Use sys_blocksize as the fs block since it is smaller than a
+         * page while the fs blocksize can be larger.
+         */
+        sb_set_blocksize(sb, sbi->s_sys_blocksize);
+        /*
+         * ...and the difference goes into a shift.  sys_blocksize is always
+         * a power of two factor of blocksize.
+         */
+        sbi->s_block_shift = get_bitmask_order(sbi->s_blocksize) -
+                get_bitmask_order(sbi->s_sys_blocksize);
+        start = clus_to_blk(sbi, be64_to_cpu(omfs_sb->s_root_block));
+        bh2 = sb_bread(sb, start);
+        if (!bh2)
+                goto out_brelse_bh;
+        omfs_rb = (struct omfs_root_block *)bh2->b_data;
+        sbi->s_bitmap_ino = be64_to_cpu(omfs_rb->r_bitmap);
+        sbi->s_clustersize = be32_to_cpu(omfs_rb->r_clustersize);
+        if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) {
+                printk(KERN_ERR "omfs: block count discrepancy between "
+                        "super and root blocks (%llx, %llx)\n",
+                        sbi->s_num_blocks, be64_to_cpu(omfs_rb->r_num_blocks));
+                goto out_brelse_bh2;
+        }
+        ret = omfs_get_imap(sb);
+        if (ret)
+                goto out_brelse_bh2;
+        sb->s_op = &omfs_sops;
+        root = omfs_iget(sb, be64_to_cpu(omfs_rb->r_root_dir));
+        if (IS_ERR(root)) {
+                ret = PTR_ERR(root);
+                goto out_brelse_bh2;
+        }
+        sb->s_root = d_alloc_root(root);
+        if (!sb->s_root) {
+                iput(root);
+                goto out_brelse_bh2;
+        }
+        printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name);
+        ret = 0;
+out_brelse_bh2:
+        brelse(bh2);
+out_brelse_bh:
+        brelse(bh);
+end:
+        return ret;
+}
+static int omfs_get_sb(struct file_system_type *fs_type,
+                        int flags, const char *dev_name,
+                        void *data, struct vfsmount *m)
+{
+        return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m);
+}
+static struct file_system_type omfs_fs_type = {
+        .owner = THIS_MODULE,
+        .name = "omfs",
+        .get_sb = omfs_get_sb,
+        .kill_sb = kill_block_super,
+        .fs_flags = FS_REQUIRES_DEV,
+};
+static int __init init_omfs_fs(void)
+{
+        return register_filesystem(&omfs_fs_type);
+}
+static void __exit exit_omfs_fs(void)
+{
+        unregister_filesystem(&omfs_fs_type);
+}
+module_init(init_omfs_fs);
+module_exit(exit_omfs_fs);
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
new file mode 100644
index 000000000000..2bc0f0670406
--- /dev/null
+++ b/fs/omfs/omfs.h
@@ -0,0 +1,67 @@
+#ifndef _OMFS_H
+#define _OMFS_H
+#include <linux/module.h>
+#include <linux/fs.h>
+#include "omfs_fs.h"
+/* In-memory structures */
+struct omfs_sb_info {
+        u64 s_num_blocks;
+        u64 s_bitmap_ino;
+        u64 s_root_ino;
+        u32 s_blocksize;
+        u32 s_mirrors;
+        u32 s_sys_blocksize;
+        u32 s_clustersize;
+        int s_block_shift;
+        unsigned long **s_imap;
+        int s_imap_size;
+        struct mutex s_bitmap_lock;
+        int s_uid;
+        int s_gid;
+        int s_dmask;
+        int s_fmask;
+};
+/* convert a cluster number to a scaled block number */
+static inline sector_t clus_to_blk(struct omfs_sb_info *sbi, sector_t block)
+{
+        return block << sbi->s_block_shift;
+}
+static inline struct omfs_sb_info *OMFS_SB(struct super_block *sb)
+{
+        return sb->s_fs_info;
+}
+/* bitmap.c */
+extern unsigned long omfs_count_free(struct super_block *sb);
+extern int omfs_allocate_block(struct super_block *sb, u64 block);
+extern int omfs_allocate_range(struct super_block *sb, int min_request,
+                        int max_request, u64 *return_block, int *return_size);
+extern int omfs_clear_range(struct super_block *sb, u64 block, int count);
+/* dir.c */
+extern struct file_operations omfs_dir_operations;
+extern struct inode_operations omfs_dir_inops;
+extern int omfs_make_empty(struct inode *inode, struct super_block *sb);
+extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
+                        u64 fsblock);
+/* file.c */
+extern struct file_operations omfs_file_operations;
+extern struct inode_operations omfs_file_inops;
+extern struct address_space_operations omfs_aops;
+extern void omfs_make_empty_table(struct buffer_head *bh, int offset);
+extern int omfs_shrink_inode(struct inode *inode);
+/* inode.c */
+extern struct inode *omfs_iget(struct super_block *sb, ino_t inode);
+extern struct inode *omfs_new_inode(struct inode *dir, int mode);
+extern int omfs_reserve_block(struct super_block *sb, sector_t block);
+extern int omfs_find_empty_block(struct super_block *sb, int mode, ino_t *ino);
+extern int omfs_sync_inode(struct inode *inode);
+#endif
diff --git a/fs/omfs/omfs_fs.h b/fs/omfs/omfs_fs.h
new file mode 100644
index 000000000000..12cca245d6e8
--- /dev/null
+++ b/fs/omfs/omfs_fs.h
@@ -0,0 +1,80 @@
+#ifndef _OMFS_FS_H
+#define _OMFS_FS_H
+/* OMFS On-disk structures */
+#define OMFS_MAGIC 0xC2993D87
+#define OMFS_IMAGIC 0xD2
+#define OMFS_DIR 'D'
+#define OMFS_FILE 'F'
+#define OMFS_INODE_NORMAL 'e'
+#define OMFS_INODE_CONTINUATION 'c'
+#define OMFS_INODE_SYSTEM 's'
+#define OMFS_NAMELEN 256
+#define OMFS_DIR_START 0x1b8
+#define OMFS_EXTENT_START 0x1d0
+#define OMFS_EXTENT_CONT 0x40
+#define OMFS_XOR_COUNT 19
+#define OMFS_MAX_BLOCK_SIZE 8192
+struct omfs_super_block {
+        char s_fill1[256];
+        __be64 s_root_block;            /* block number of omfs_root_block */
+        __be64 s_num_blocks;            /* total number of FS blocks */
+        __be32 s_magic;                 /* OMFS_MAGIC */
+        __be32 s_blocksize;             /* size of a block */
+        __be32 s_mirrors;               /* # of mirrors of system blocks */
+        __be32 s_sys_blocksize;         /* size of non-data blocks */
+};
+struct omfs_header {
+        __be64 h_self;                  /* FS block where this is located */
+        __be32 h_body_size;             /* size of useful data after header */
+        __be16 h_crc;                   /* crc-ccitt of body_size bytes */
+        char h_fill1[2];
+        u8 h_version;                   /* version, always 1 */
+        char h_type;                    /* OMFS_INODE_X */
+        u8 h_magic;                     /* OMFS_IMAGIC */
+        u8 h_check_xor;                 /* XOR of header bytes before this */
+        __be32 h_fill2;
+};
+struct omfs_root_block {
+        struct omfs_header r_head;      /* header */
+        __be64 r_fill1;
+        __be64 r_num_blocks;            /* total number of FS blocks */
+        __be64 r_root_dir;              /* block # of root directory */
+        __be64 r_bitmap;                /* block # of free space bitmap */
+        __be32 r_blocksize;             /* size of a block */
+        __be32 r_clustersize;           /* size allocated for data blocks */
+        __be64 r_mirrors;               /* # of mirrors of system blocks */
+        char r_name[OMFS_NAMELEN];      /* partition label */
+};
+struct omfs_inode {
+        struct omfs_header i_head;      /* header */
+        __be64 i_parent;                /* parent containing this inode */
+        __be64 i_sibling;               /* next inode in hash bucket */
+        __be64 i_ctime;                 /* ctime, in milliseconds */
+        char i_fill1[35];
+        char i_type;                    /* OMFS_[DIR,FILE] */
+        __be32 i_fill2;
+        char i_fill3[64];
+        char i_name[OMFS_NAMELEN];      /* filename */
+        __be64 i_size;                  /* size of file, in bytes */
+};
+struct omfs_extent_entry {
+        __be64 e_cluster;               /* start location of a set of blocks */
+        __be64 e_blocks;                /* number of blocks after e_cluster */
+};
+struct omfs_extent {
+        __be64 e_next;                  /* next extent table location */
+        __be32 e_extent_count;          /* total # extents in this table */
+        __be32 e_fill;
+        struct omfs_extent_entry e_entry;       /* start of extent entries */
+};
+#endif
diff --git a/fs/open.c b/fs/open.c
index a99ad09c3197..52647be277a2 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -64,7 +64,8 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
                memcpy(buf, &st, sizeof(st));
        else {
                if (sizeof buf->f_blocks == 4) {
-                        if ((st.f_blocks | st.f_bfree | st.f_bavail) &
+                        if ((st.f_blocks | st.f_bfree | st.f_bavail |
+                             st.f_bsize | st.f_frsize) &
                            0xffffffff00000000ULL)
                                return -EOVERFLOW;
                        /*
@@ -121,37 +122,37 @@ static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
        return 0;
 }
-asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
+asmlinkage long sys_statfs(const char __user *pathname, struct statfs __user * buf)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
-        error = user_path_walk(path, &nd);
+        error = user_path(pathname, &path);
        if (!error) {
                struct statfs tmp;
-                error = vfs_statfs_native(nd.path.dentry, &tmp);
+                error = vfs_statfs_native(path.dentry, &tmp);
                if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
                        error = -EFAULT;
-                path_put(&nd.path);
+                path_put(&path);
        }
        return error;
 }
-asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf)
+asmlinkage long sys_statfs64(const char __user *pathname, size_t sz, struct statfs64 __user *buf)
 {
-        struct nameidata nd;
+        struct path path;
        long error;
        if (sz != sizeof(*buf))
                return -EINVAL;
-        error = user_path_walk(path, &nd);
+        error = user_path(pathname, &path);
        if (!error) {
                struct statfs64 tmp;
-                error = vfs_statfs64(nd.path.dentry, &tmp);
+                error = vfs_statfs64(path.dentry, &tmp);
                if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
                        error = -EFAULT;
-                path_put(&nd.path);
+                path_put(&path);
        }
        return error;
 }
@@ -222,20 +223,20 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
        return err;
 }
-static long do_sys_truncate(const char __user * path, loff_t length)
+static long do_sys_truncate(const char __user *pathname, loff_t length)
 {
-        struct nameidata nd;
+        struct path path;
-        struct inode * inode;
+        struct inode *inode;
        int error;
        error = -EINVAL;
        if (length < 0) /* sorry, but loff_t says... */
                goto out;
-        error = user_path_walk(path, &nd);
+        error = user_path(pathname, &path);
        if (error)
                goto out;
-        inode = nd.path.dentry->d_inode;
+        inode = path.dentry->d_inode;
        /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
        error = -EISDIR;
@@ -246,16 +247,16 @@ static long do_sys_truncate(const char __user * path, loff_t length)
        if (!S_ISREG(inode->i_mode))
                goto dput_and_out;
-        error = mnt_want_write(nd.path.mnt);
+        error = mnt_want_write(path.mnt);
        if (error)
                goto dput_and_out;
-        error = vfs_permission(&nd, MAY_WRITE);
+        error = inode_permission(inode, MAY_WRITE);
        if (error)
                goto mnt_drop_write_and_out;
        error = -EPERM;
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+        if (IS_APPEND(inode))
                goto mnt_drop_write_and_out;
        error = get_write_access(inode);
@@ -273,15 +274,15 @@ static long do_sys_truncate(const char __user * path, loff_t length)
        error = locks_verify_truncate(inode, NULL, length);
        if (!error) {
                DQUOT_INIT(inode);
-                error = do_truncate(nd.path.dentry, length, 0, NULL);
+                error = do_truncate(path.dentry, length, 0, NULL);
        }
 put_write_and_out:
        put_write_access(inode);
 mnt_drop_write_and_out:
-        mnt_drop_write(nd.path.mnt);
+        mnt_drop_write(path.mnt);
 dput_and_out:
-        path_put(&nd.path);
+        path_put(&path);
 out:
        return error;
 }
@@ -424,7 +425,8 @@ out:
 */
 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 {
-        struct nameidata nd;
+        struct path path;
+        struct inode *inode;
        int old_fsuid, old_fsgid;
        kernel_cap_t uninitialized_var(old_cap);  /* !SECURE_NO_SETUID_FIXUP */
        int res;
@@ -447,7 +449,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
                 * FIXME: There is a race here against sys_capset.  The
                 * capabilities can change yet we will restore the old
                 * value below.  We should hold task_capabilities_lock,
-                 * but we cannot because user_path_walk can sleep.
+                 * but we cannot because user_path_at can sleep.
                 */
 #endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */
                if (current->uid)
@@ -456,14 +458,25 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
                        old_cap = cap_set_effective(current->cap_permitted);
        }
-        res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
+        res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
        if (res)
                goto out;
-        res = vfs_permission(&nd, mode);
+        inode = path.dentry->d_inode;
+        if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
+                /*
+                 * MAY_EXEC on regular files is denied if the fs is mounted
+                 * with the "noexec" flag.
+                 */
+                res = -EACCES;
+                if (path.mnt->mnt_flags & MNT_NOEXEC)
+                        goto out_path_release;
+        }
+        res = inode_permission(inode, mode | MAY_ACCESS);
        /* SuS v2 requires we report a read only fs too */
-        if(res || !(mode & S_IWOTH) ||
+        if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
-           special_file(nd.path.dentry->d_inode->i_mode))
                goto out_path_release;
        /*
         * This is a rare case where using __mnt_is_readonly()
@@ -475,11 +488,11 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
         * inherently racy and know that the fs may change
         * state before we even see this result.
         */
-        if (__mnt_is_readonly(nd.path.mnt))
+        if (__mnt_is_readonly(path.mnt))
                res = -EROFS;
 out_path_release:
-        path_put(&nd.path);
+        path_put(&path);
 out:
        current->fsuid = old_fsuid;
        current->fsgid = old_fsgid;
@@ -497,22 +510,21 @@ asmlinkage long sys_access(const char __user *filename, int mode)
 asmlinkage long sys_chdir(const char __user * filename)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
-        error = __user_walk(filename,
+        error = user_path_dir(filename, &path);
-                            LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
        if (error)
                goto out;
-        error = vfs_permission(&nd, MAY_EXEC);
+        error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
        if (error)
                goto dput_and_out;
-        set_fs_pwd(current->fs, &nd.path);
+        set_fs_pwd(current->fs, &path);
 dput_and_out:
-        path_put(&nd.path);
+        path_put(&path);
 out:
        return error;
 }
@@ -534,7 +546,7 @@ asmlinkage long sys_fchdir(unsigned int fd)
        if (!S_ISDIR(inode->i_mode))
                goto out_putf;
-        error = file_permission(file, MAY_EXEC);
+        error = inode_permission(inode, MAY_EXEC | MAY_ACCESS);
        if (!error)
                set_fs_pwd(current->fs, &file->f_path);
 out_putf:
@@ -545,14 +557,14 @@ out:
 asmlinkage long sys_chroot(const char __user * filename)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
-        error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
+        error = user_path_dir(filename, &path);
        if (error)
                goto out;
-        error = vfs_permission(&nd, MAY_EXEC);
+        error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
        if (error)
                goto dput_and_out;
@@ -560,11 +572,10 @@ asmlinkage long sys_chroot(const char __user * filename)
        if (!capable(CAP_SYS_CHROOT))
                goto dput_and_out;
-        set_fs_root(current->fs, &nd.path);
+        set_fs_root(current->fs, &path);
-        set_fs_altroot();
        error = 0;
 dput_and_out:
-        path_put(&nd.path);
+        path_put(&path);
 out:
        return error;
 }
@@ -589,9 +600,6 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
        err = mnt_want_write(file->f_path.mnt);
        if (err)
                goto out_putf;
-        err = -EPERM;
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                goto out_drop_write;
        mutex_lock(&inode->i_mutex);
        if (mode == (mode_t) -1)
                mode = inode->i_mode;
@@ -599,8 +607,6 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
        err = notify_change(dentry, &newattrs);
        mutex_unlock(&inode->i_mutex);
-out_drop_write:
        mnt_drop_write(file->f_path.mnt);
 out_putf:
        fput(file);
@@ -611,36 +617,29 @@ out:
 asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
                             mode_t mode)
 {
-        struct nameidata nd;
+        struct path path;
-        struct inode * inode;
+        struct inode *inode;
        int error;
        struct iattr newattrs;
-        error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
+        error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
        if (error)
                goto out;
-        inode = nd.path.dentry->d_inode;
+        inode = path.dentry->d_inode;
-        error = mnt_want_write(nd.path.mnt);
+        error = mnt_want_write(path.mnt);
        if (error)
                goto dput_and_out;
-        error = -EPERM;
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                goto out_drop_write;
        mutex_lock(&inode->i_mutex);
        if (mode == (mode_t) -1)
                mode = inode->i_mode;
        newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-        error = notify_change(nd.path.dentry, &newattrs);
+        error = notify_change(path.dentry, &newattrs);
        mutex_unlock(&inode->i_mutex);
+        mnt_drop_write(path.mnt);
-out_drop_write:
-        mnt_drop_write(nd.path.mnt);
 dput_and_out:
-        path_put(&nd.path);
+        path_put(&path);
 out:
        return error;
 }
@@ -652,18 +651,10 @@ asmlinkage long sys_chmod(const char __user *filename, mode_t mode)
 static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
 {
-        struct inode * inode;
+        struct inode *inode = dentry->d_inode;
        int error;
        struct iattr newattrs;
-        error = -ENOENT;
-        if (!(inode = dentry->d_inode)) {
-                printk(KERN_ERR "chown_common: NULL inode\n");
-                goto out;
-        }
-        error = -EPERM;
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                goto out;
        newattrs.ia_valid =  ATTR_CTIME;
        if (user != (uid_t) -1) {
                newattrs.ia_valid |= ATTR_UID;
@@ -679,25 +670,25 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
        mutex_lock(&inode->i_mutex);
        error = notify_change(dentry, &newattrs);
        mutex_unlock(&inode->i_mutex);
-out:
        return error;
 }
 asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
-        error = user_path_walk(filename, &nd);
+        error = user_path(filename, &path);
        if (error)
                goto out;
-        error = mnt_want_write(nd.path.mnt);
+        error = mnt_want_write(path.mnt);
        if (error)
                goto out_release;
-        error = chown_common(nd.path.dentry, user, group);
+        error = chown_common(path.dentry, user, group);
-        mnt_drop_write(nd.path.mnt);
+        mnt_drop_write(path.mnt);
 out_release:
-        path_put(&nd.path);
+        path_put(&path);
 out:
        return error;
 }
@@ -705,7 +696,7 @@ out:
 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
                             gid_t group, int flag)
 {
-        struct nameidata nd;
+        struct path path;
        int error = -EINVAL;
        int follow;
@@ -713,35 +704,35 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
                goto out;
        follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
-        error = __user_walk_fd(dfd, filename, follow, &nd);
+        error = user_path_at(dfd, filename, follow, &path);
        if (error)
                goto out;
-        error = mnt_want_write(nd.path.mnt);
+        error = mnt_want_write(path.mnt);
        if (error)
                goto out_release;
-        error = chown_common(nd.path.dentry, user, group);
+        error = chown_common(path.dentry, user, group);
-        mnt_drop_write(nd.path.mnt);
+        mnt_drop_write(path.mnt);
 out_release:
-        path_put(&nd.path);
+        path_put(&path);
 out:
        return error;
 }
 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
-        error = user_path_walk_link(filename, &nd);
+        error = user_lpath(filename, &path);
        if (error)
                goto out;
-        error = mnt_want_write(nd.path.mnt);
+        error = mnt_want_write(path.mnt);
        if (error)
                goto out_release;
-        error = chown_common(nd.path.dentry, user, group);
+        error = chown_common(path.dentry, user, group);
-        mnt_drop_write(nd.path.mnt);
+        mnt_drop_write(path.mnt);
 out_release:
-        path_put(&nd.path);
+        path_put(&path);
 out:
        return error;
 }
@@ -981,7 +972,6 @@ int get_unused_fd_flags(int flags)
        int fd, error;
        struct fdtable *fdt;
-        error = -EMFILE;
        spin_lock(&files->file_lock);
 repeat:
@@ -989,13 +979,6 @@ repeat:
        fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
                                files->next_fd);
-        /*
-         * N.B. For clone tasks sharing a files structure, this test
-         * will limit the total number of files that can be opened.
-         */
-        if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
-                goto out;
        /* Do we need to expand the fd array or fd set?  */
        error = expand_files(files, fd);
        if (error < 0)
@@ -1006,7 +989,6 @@ repeat:
                 * If we needed to expand the fs array we
                 * might have blocked - try again.
                 */
-                error = -EMFILE;
                goto repeat;
        }
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index d17b4fd204e1..9f5b054f06b9 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -430,7 +430,7 @@ static struct file_system_type openprom_fs_type = {
        .kill_sb        = kill_anon_super,
 };
-static void op_inode_init_once(struct kmem_cache * cachep, void *data)
+static void op_inode_init_once(void *data)
 {
        struct op_inode_info *oi = (struct op_inode_info *) data;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6149e4b58c88..7d6b34e201db 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -344,18 +344,18 @@ static ssize_t whole_disk_show(struct device *dev,
 static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
                   whole_disk_show, NULL);
-void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
+int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
 {
        struct hd_struct *p;
        int err;
        p = kzalloc(sizeof(*p), GFP_KERNEL);
        if (!p)
-                return;
+                return -ENOMEM;
        if (!init_part_stats(p)) {
-                kfree(p);
+                err = -ENOMEM;
-                return;
+                goto out0;
        }
        p->start_sect = start;
        p->nr_sects = len;
@@ -378,15 +378,31 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
        /* delay uevent until 'holders' subdir is created */
        p->dev.uevent_suppress = 1;
-        device_add(&p->dev);
+        err = device_add(&p->dev);
+        if (err)
+                goto out1;
        partition_sysfs_add_subdir(p);
        p->dev.uevent_suppress = 0;
-        if (flags & ADDPART_FLAG_WHOLEDISK)
+        if (flags & ADDPART_FLAG_WHOLEDISK) {
                err = device_create_file(&p->dev, &dev_attr_whole_disk);
+                if (err)
+                        goto out2;
+        }
        /* suppress uevent if the disk supresses it */
        if (!disk->dev.uevent_suppress)
                kobject_uevent(&p->dev.kobj, KOBJ_ADD);
+        return 0;
+out2:
+        device_del(&p->dev);
+out1:
+        put_device(&p->dev);
+        free_part_stats(p);
+out0:
+        kfree(p);
+        return err;
 }
 /* Not exported, helper to add_disk(). */
@@ -401,7 +417,7 @@ void register_disk(struct gendisk *disk)
        disk->dev.parent = disk->driverfs_dev;
        disk->dev.devt = MKDEV(disk->major, disk->first_minor);
-        strlcpy(disk->dev.bus_id, disk->disk_name, KOBJ_NAME_LEN);
+        strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE);
        /* ewww... some of these buggers have / in the name... */
        s = strchr(disk->dev.bus_id, '/');
        if (s)
@@ -483,10 +499,16 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
                if (!size)
                        continue;
                if (from + size > get_capacity(disk)) {
-                        printk(" %s: p%d exceeds device capacity\n",
+                        printk(KERN_ERR " %s: p%d exceeds device capacity\n",
                                disk->disk_name, p);
+                        continue;
+                }
+                res = add_partition(disk, p, from, size, state->parts[p].flags);
+                if (res) {
+                        printk(KERN_ERR " %s: p%d could not be added: %d\n",
+                                disk->disk_name, p, -res);
+                        continue;
                }
-                add_partition(disk, p, from, size, state->parts[p].flags);
 #ifdef CONFIG_BLK_DEV_MD
                if (state->parts[p].flags & ADDPART_FLAG_RAID)
                        md_autodetect_dev(bdev->bd_dev+p);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index e7b07006bc41..038a6022152f 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -95,13 +95,6 @@
 #include "check.h"
 #include "efi.h"
-#undef EFI_DEBUG
-#ifdef EFI_DEBUG
-#define Dprintk(x...) printk(KERN_DEBUG x)
-#else
-#define Dprintk(x...)
-#endif
 /* This allows a kernel command line option 'gpt' to override
 * the test for invalid PMBR.  Not __initdata because reloading
 * the partition tables happens after init too.
@@ -305,10 +298,10 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
        /* Check the GUID Partition Table signature */
        if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
-                Dprintk("GUID Partition Table Header signature is wrong:"
+                pr_debug("GUID Partition Table Header signature is wrong:"
-                        "%lld != %lld\n",
+                         "%lld != %lld\n",
-                        (unsigned long long)le64_to_cpu((*gpt)->signature),
+                         (unsigned long long)le64_to_cpu((*gpt)->signature),
-                        (unsigned long long)GPT_HEADER_SIGNATURE);
+                         (unsigned long long)GPT_HEADER_SIGNATURE);
                goto fail;
        }
@@ -318,9 +311,8 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
        crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size));
        if (crc != origcrc) {
-                Dprintk
+                pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n",
-                    ("GUID Partition Table Header CRC is wrong: %x != %x\n",
+                         crc, origcrc);
-                     crc, origcrc);
                goto fail;
        }
        (*gpt)->header_crc32 = cpu_to_le32(origcrc);
@@ -328,9 +320,9 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
        /* Check that the my_lba entry points to the LBA that contains
         * the GUID Partition Table */
        if (le64_to_cpu((*gpt)->my_lba) != lba) {
-                Dprintk("GPT my_lba incorrect: %lld != %lld\n",
+                pr_debug("GPT my_lba incorrect: %lld != %lld\n",
-                        (unsigned long long)le64_to_cpu((*gpt)->my_lba),
+                         (unsigned long long)le64_to_cpu((*gpt)->my_lba),
-                        (unsigned long long)lba);
+                         (unsigned long long)lba);
                goto fail;
        }
@@ -339,15 +331,15 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
         */
        lastlba = last_lba(bdev);
        if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
-                Dprintk("GPT: first_usable_lba incorrect: %lld > %lld\n",
+                pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
-                        (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
+                         (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
-                        (unsigned long long)lastlba);
+                         (unsigned long long)lastlba);
                goto fail;
        }
        if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) {
-                Dprintk("GPT: last_usable_lba incorrect: %lld > %lld\n",
+                pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
-                        (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
+                         (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
-                        (unsigned long long)lastlba);
+                         (unsigned long long)lastlba);
                goto fail;
        }
@@ -360,7 +352,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
                        le32_to_cpu((*gpt)->sizeof_partition_entry));
        if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
-                Dprintk("GUID Partitition Entry Array CRC check failed.\n");
+                pr_debug("GUID Partitition Entry Array CRC check failed.\n");
                goto fail_ptes;
        }
@@ -616,7 +608,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
                return 0;
        }
-        Dprintk("GUID Partition Table is valid!  Yea!\n");
+        pr_debug("GUID Partition Table is valid!  Yea!\n");
        for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
                if (!is_pte_valid(&ptes[i], last_lba(bdev)))
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 0fdda2e8a4cc..8652fb99e962 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -133,17 +133,17 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
        bool is_vista = false;
        BUG_ON(!data || !ph);
-        if (MAGIC_PRIVHEAD != BE64(data)) {
+        if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) {
                ldm_error("Cannot find PRIVHEAD structure. LDM database is"
                        " corrupt. Aborting.");
                return false;
        }
-        ph->ver_major = BE16(data + 0x000C);
+        ph->ver_major = get_unaligned_be16(data + 0x000C);
-        ph->ver_minor = BE16(data + 0x000E);
+        ph->ver_minor = get_unaligned_be16(data + 0x000E);
-        ph->logical_disk_start = BE64(data + 0x011B);
+        ph->logical_disk_start = get_unaligned_be64(data + 0x011B);
-        ph->logical_disk_size = BE64(data + 0x0123);
+        ph->logical_disk_size = get_unaligned_be64(data + 0x0123);
-        ph->config_start = BE64(data + 0x012B);
+        ph->config_start = get_unaligned_be64(data + 0x012B);
-        ph->config_size = BE64(data + 0x0133);
+        ph->config_size = get_unaligned_be64(data + 0x0133);
        /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
        if (ph->ver_major == 2 && ph->ver_minor == 12)
                is_vista = true;
@@ -191,14 +191,14 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
 {
        BUG_ON (!data || !toc);
-        if (MAGIC_TOCBLOCK != BE64 (data)) {
+        if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) {
                ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
                return false;
        }
        strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
        toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
-        toc->bitmap1_start = BE64 (data + 0x2E);
+        toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
-        toc->bitmap1_size  = BE64 (data + 0x36);
+        toc->bitmap1_size  = get_unaligned_be64(data + 0x36);
        if (strncmp (toc->bitmap1_name, TOC_BITMAP1,
                        sizeof (toc->bitmap1_name)) != 0) {
@@ -208,8 +208,8 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
        }
        strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
        toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
-        toc->bitmap2_start = BE64 (data + 0x50);
+        toc->bitmap2_start = get_unaligned_be64(data + 0x50);
-        toc->bitmap2_size  = BE64 (data + 0x58);
+        toc->bitmap2_size  = get_unaligned_be64(data + 0x58);
        if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
                        sizeof (toc->bitmap2_name)) != 0) {
                ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.",
@@ -237,22 +237,22 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
 {
        BUG_ON (!data || !vm);
-        if (MAGIC_VMDB != BE32 (data)) {
+        if (MAGIC_VMDB != get_unaligned_be32(data)) {
                ldm_crit ("Cannot find the VMDB, database may be corrupt.");
                return false;
        }
-        vm->ver_major = BE16 (data + 0x12);
+        vm->ver_major = get_unaligned_be16(data + 0x12);
-        vm->ver_minor = BE16 (data + 0x14);
+        vm->ver_minor = get_unaligned_be16(data + 0x14);
        if ((vm->ver_major != 4) || (vm->ver_minor != 10)) {
                ldm_error ("Expected VMDB version %d.%d, got %d.%d. "
                        "Aborting.", 4, 10, vm->ver_major, vm->ver_minor);
                return false;
        }
-        vm->vblk_size     = BE32 (data + 0x08);
+        vm->vblk_size     = get_unaligned_be32(data + 0x08);
-        vm->vblk_offset   = BE32 (data + 0x0C);
+        vm->vblk_offset   = get_unaligned_be32(data + 0x0C);
-        vm->last_vblk_seq = BE32 (data + 0x04);
+        vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
        ldm_debug ("Parsed VMDB successfully.");
        return true;
@@ -507,7 +507,7 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base,
                goto out;                               /* Already logged */
        /* Are there uncommitted transactions? */
-        if (BE16(data + 0x10) != 0x01) {
+        if (get_unaligned_be16(data + 0x10) != 0x01) {
                ldm_crit ("Database is not in a consistent state.  Aborting.");
                goto out;
        }
@@ -802,7 +802,7 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
                return false;
        len += VBLK_SIZE_CMP3;
-        if (len != BE32 (buffer + 0x14))
+        if (len != get_unaligned_be32(buffer + 0x14))
                return false;
        comp = &vb->vblk.comp;
@@ -851,7 +851,7 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
                return false;
        len += VBLK_SIZE_DGR3;
-        if (len != BE32 (buffer + 0x14))
+        if (len != get_unaligned_be32(buffer + 0x14))
                return false;
        dgrp = &vb->vblk.dgrp;
@@ -895,7 +895,7 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
                return false;
        len += VBLK_SIZE_DGR4;
-        if (len != BE32 (buffer + 0x14))
+        if (len != get_unaligned_be32(buffer + 0x14))
                return false;
        dgrp = &vb->vblk.dgrp;
@@ -931,7 +931,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
                return false;
        len += VBLK_SIZE_DSK3;
-        if (len != BE32 (buffer + 0x14))
+        if (len != get_unaligned_be32(buffer + 0x14))
                return false;
        disk = &vb->vblk.disk;
@@ -968,7 +968,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
                return false;
        len += VBLK_SIZE_DSK4;
-        if (len != BE32 (buffer + 0x14))
+        if (len != get_unaligned_be32(buffer + 0x14))
                return false;
        disk = &vb->vblk.disk;
@@ -1034,14 +1034,14 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
                return false;
        }
        len += VBLK_SIZE_PRT3;
-        if (len > BE32(buffer + 0x14)) {
+        if (len > get_unaligned_be32(buffer + 0x14)) {
                ldm_error("len %d > BE32(buffer + 0x14) %d", len,
-                                BE32(buffer + 0x14));
+                                get_unaligned_be32(buffer + 0x14));
                return false;
        }
        part = &vb->vblk.part;
-        part->start = BE64(buffer + 0x24 + r_name);
+        part->start = get_unaligned_be64(buffer + 0x24 + r_name);
-        part->volume_offset = BE64(buffer + 0x2C + r_name);
+        part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name);
        part->size = ldm_get_vnum(buffer + 0x34 + r_name);
        part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
        part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
@@ -1139,9 +1139,9 @@ static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb)
                return false;
        }
        len += VBLK_SIZE_VOL5;
-        if (len > BE32(buffer + 0x14)) {
+        if (len > get_unaligned_be32(buffer + 0x14)) {
                ldm_error("len %d > BE32(buffer + 0x14) %d", len,
-                                BE32(buffer + 0x14));
+                                get_unaligned_be32(buffer + 0x14));
                return false;
        }
        volu = &vb->vblk.volu;
@@ -1294,9 +1294,9 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
        BUG_ON (!data || !frags);
-        group = BE32 (data + 0x08);
+        group = get_unaligned_be32(data + 0x08);
-        rec   = BE16 (data + 0x0C);
+        rec   = get_unaligned_be16(data + 0x0C);
-        num   = BE16 (data + 0x0E);
+        num   = get_unaligned_be16(data + 0x0E);
        if ((num < 1) || (num > 4)) {
                ldm_error ("A VBLK claims to have %d parts.", num);
                return false;
@@ -1425,12 +1425,12 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base,
                }
                for (v = 0; v < perbuf; v++, data+=size) {  /* For each vblk */
-                        if (MAGIC_VBLK != BE32 (data)) {
+                        if (MAGIC_VBLK != get_unaligned_be32(data)) {
                                ldm_error ("Expected to find a VBLK.");
                                goto out;
                        }
-                        recs = BE16 (data + 0x0E);      /* Number of records */
+                        recs = get_unaligned_be16(data + 0x0E); /* Number of records */
                        if (recs == 1) {
                                if (!ldm_ldmdb_add (data, size, ldb))
                                        goto out;       /* Already logged */
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
index 80f63b5fdd9f..30e08e809c1d 100644
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -98,11 +98,6 @@ struct parsed_partitions;
 #define TOC_BITMAP1             "config"        /* Names of the two defined */
 #define TOC_BITMAP2             "log"           /* bitmaps in the TOCBLOCK. */
-/* Most numbers we deal with are big-endian and won't be aligned. */
-#define BE16(x)                 ((u16)be16_to_cpu(get_unaligned((__be16*)(x))))
-#define BE32(x)                 ((u32)be32_to_cpu(get_unaligned((__be32*)(x))))
-#define BE64(x)                 ((u64)be64_to_cpu(get_unaligned((__be64*)(x))))
 /* Borrowed from msdos.c */
 #define SYS_IND(p)              (get_unaligned(&(p)->sys_ind))
diff --git a/fs/pipe.c b/fs/pipe.c
index 700f4e0d9572..fcba6542b8d0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -777,45 +777,10 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
 /*
 * The file_operations structs are not static because they
 * are also used in linux/fs/fifo.c to do operations on FIFOs.
+ *
+ * Pipes reuse fifos' file_operations structs.
 */
-const struct file_operations read_fifo_fops = {
+const struct file_operations read_pipefifo_fops = {
-        .llseek         = no_llseek,
-        .read           = do_sync_read,
-        .aio_read       = pipe_read,
-        .write          = bad_pipe_w,
-        .poll           = pipe_poll,
-        .unlocked_ioctl = pipe_ioctl,
-        .open           = pipe_read_open,
-        .release        = pipe_read_release,
-        .fasync         = pipe_read_fasync,
-};
-const struct file_operations write_fifo_fops = {
-        .llseek         = no_llseek,
-        .read           = bad_pipe_r,
-        .write          = do_sync_write,
-        .aio_write      = pipe_write,
-        .poll           = pipe_poll,
-        .unlocked_ioctl = pipe_ioctl,
-        .open           = pipe_write_open,
-        .release        = pipe_write_release,
-        .fasync         = pipe_write_fasync,
-};
-const struct file_operations rdwr_fifo_fops = {
-        .llseek         = no_llseek,
-        .read           = do_sync_read,
-        .aio_read       = pipe_read,
-        .write          = do_sync_write,
-        .aio_write      = pipe_write,
-        .poll           = pipe_poll,
-        .unlocked_ioctl = pipe_ioctl,
-        .open           = pipe_rdwr_open,
-        .release        = pipe_rdwr_release,
-        .fasync         = pipe_rdwr_fasync,
-};
-static const struct file_operations read_pipe_fops = {
        .llseek         = no_llseek,
        .read           = do_sync_read,
        .aio_read       = pipe_read,
@@ -827,7 +792,7 @@ static const struct file_operations read_pipe_fops = {
        .fasync         = pipe_read_fasync,
 };
-static const struct file_operations write_pipe_fops = {
+const struct file_operations write_pipefifo_fops = {
        .llseek         = no_llseek,
        .read           = bad_pipe_r,
        .write          = do_sync_write,
@@ -839,7 +804,7 @@ static const struct file_operations write_pipe_fops = {
        .fasync         = pipe_write_fasync,
 };
-static const struct file_operations rdwr_pipe_fops = {
+const struct file_operations rdwr_pipefifo_fops = {
        .llseek         = no_llseek,
        .read           = do_sync_read,
        .aio_read       = pipe_read,
@@ -927,7 +892,7 @@ static struct inode * get_pipe_inode(void)
        inode->i_pipe = pipe;
        pipe->readers = pipe->writers = 1;
-        inode->i_fop = &rdwr_pipe_fops;
+        inode->i_fop = &rdwr_pipefifo_fops;
        /*
         * Mark the inode dirty from the very beginning,
@@ -950,7 +915,7 @@ fail_inode:
        return NULL;
 }
-struct file *create_write_pipe(void)
+struct file *create_write_pipe(int flags)
 {
        int err;
        struct inode *inode;
@@ -978,12 +943,12 @@ struct file *create_write_pipe(void)
        d_instantiate(dentry, inode);
        err = -ENFILE;
-        f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipe_fops);
+        f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops);
        if (!f)
                goto err_dentry;
        f->f_mapping = inode->i_mapping;
-        f->f_flags = O_WRONLY;
+        f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
        f->f_version = 0;
        return f;
@@ -1007,7 +972,7 @@ void free_write_pipe(struct file *f)
        put_filp(f);
 }
-struct file *create_read_pipe(struct file *wrf)
+struct file *create_read_pipe(struct file *wrf, int flags)
 {
        struct file *f = get_empty_filp();
        if (!f)
@@ -1019,34 +984,37 @@ struct file *create_read_pipe(struct file *wrf)
        f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
        f->f_pos = 0;
-        f->f_flags = O_RDONLY;
+        f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
-        f->f_op = &read_pipe_fops;
+        f->f_op = &read_pipefifo_fops;
        f->f_mode = FMODE_READ;
        f->f_version = 0;
        return f;
 }
-int do_pipe(int *fd)
+int do_pipe_flags(int *fd, int flags)
 {
        struct file *fw, *fr;
        int error;
        int fdw, fdr;
-        fw = create_write_pipe();
+        if (flags & ~(O_CLOEXEC | O_NONBLOCK))
+                return -EINVAL;
+        fw = create_write_pipe(flags);
        if (IS_ERR(fw))
                return PTR_ERR(fw);
-        fr = create_read_pipe(fw);
+        fr = create_read_pipe(fw, flags);
        error = PTR_ERR(fr);
        if (IS_ERR(fr))
                goto err_write_pipe;
-        error = get_unused_fd();
+        error = get_unused_fd_flags(flags);
        if (error < 0)
                goto err_read_pipe;
        fdr = error;
-        error = get_unused_fd();
+        error = get_unused_fd_flags(flags);
        if (error < 0)
                goto err_fdr;
        fdw = error;
@@ -1074,16 +1042,21 @@ int do_pipe(int *fd)
        return error;
 }
+int do_pipe(int *fd)
+{
+        return do_pipe_flags(fd, 0);
+}
 /*
 * sys_pipe() is the normal C calling standard for creating
 * a pipe. It's not the way Unix traditionally does this, though.
 */
-asmlinkage long __weak sys_pipe(int __user *fildes)
+asmlinkage long __weak sys_pipe2(int __user *fildes, int flags)
 {
        int fd[2];
        int error;
-        error = do_pipe(fd);
+        error = do_pipe_flags(fd, flags);
        if (!error) {
                if (copy_to_user(fildes, fd, sizeof(fd))) {
                        sys_close(fd[0]);
@@ -1094,6 +1067,11 @@ asmlinkage long __weak sys_pipe(int __user *fildes)
        return error;
 }
+asmlinkage long __weak sys_pipe(int __user *fildes)
+{
+        return sys_pipe2(fildes, 0);
+}
 /*
 * pipefs should _never_ be mounted by userland - too much of security hassle,
 * no real gain from having the whole whorehouse mounted. So we don't need
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
new file mode 100644
index 000000000000..73cd7a418f06
--- /dev/null
+++ b/fs/proc/Kconfig
@@ -0,0 +1,59 @@
+config PROC_FS
+        bool "/proc file system support" if EMBEDDED
+        default y
+        help
+          This is a virtual file system providing information about the status
+          of the system. "Virtual" means that it doesn't take up any space on
+          your hard disk: the files are created on the fly by the kernel when
+          you try to access them. Also, you cannot read the files with older
+          version of the program less: you need to use more or cat.
+          It's totally cool; for example, "cat /proc/interrupts" gives
+          information about what the different IRQs are used for at the moment
+          (there is a small number of Interrupt ReQuest lines in your computer
+          that are used by the attached devices to gain the CPU's attention --
+          often a source of trouble if two devices are mistakenly configured
+          to use the same IRQ). The program procinfo to display some
+          information about your system gathered from the /proc file system.
+          Before you can use the /proc file system, it has to be mounted,
+          meaning it has to be given a location in the directory hierarchy.
+          That location should be /proc. A command such as "mount -t proc proc
+          /proc" or the equivalent line in /etc/fstab does the job.
+          The /proc file system is explained in the file
+          <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
+          ("man 5 proc").
+          This option will enlarge your kernel by about 67 KB. Several
+          programs depend on this, so everyone should say Y here.
+config PROC_KCORE
+        bool "/proc/kcore support" if !ARM
+        depends on PROC_FS && MMU
+config PROC_VMCORE
+        bool "/proc/vmcore support (EXPERIMENTAL)"
+        depends on PROC_FS && CRASH_DUMP
+        default y
+        help
+        Exports the dump image of crashed kernel in ELF format.
+config PROC_SYSCTL
+        bool "Sysctl support (/proc/sys)" if EMBEDDED
+        depends on PROC_FS
+        select SYSCTL
+        default y
+        ---help---
+          The sysctl interface provides a means of dynamically changing
+          certain kernel parameters and variables on the fly without requiring
+          a recompile of the kernel or reboot of the system.  The primary
+          interface is through /proc/sys.  If you say Y here a tree of
+          modifiable sysctl entries will be generated beneath the
+          /proc/sys directory. They are explained in the files
+          in <file:Documentation/sysctl/>.  Note that enabling this
+          option will enlarge the kernel by at least 8 KB.
+          As it is generally a good thing, you should say Y here unless
+          building a kernel for install/rescue disks or your system is very
+          limited in memory.
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 797d775e0354..0d6eb33597c6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -80,6 +80,7 @@
 #include <linux/delayacct.h>
 #include <linux/seq_file.h>
 #include <linux/pid_namespace.h>
+#include <linux/tracehook.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -168,8 +169,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
        rcu_read_lock();
        ppid = pid_alive(p) ?
                task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
-        tpid = pid_alive(p) && p->ptrace ?
+        tpid = 0;
-                task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0;
+        if (pid_alive(p)) {
+                struct task_struct *tracer = tracehook_tracer_task(p);
+                if (tracer)
+                        tpid = task_pid_nr_ns(tracer, ns);
+        }
        seq_printf(m,
                "State:\t%s\n"
                "Tgid:\t%d\n"
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58c3e6a8e15e..e74308bdabd3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -69,6 +69,7 @@
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/cgroup.h>
 #include <linux/cpuset.h>
 #include <linux/audit.h>
@@ -231,10 +232,14 @@ static int check_mem_permission(struct task_struct *task)
         * If current is actively ptrace'ing, and would also be
         * permitted to freshly attach with ptrace now, permit it.
         */
-        if (task->parent == current && (task->ptrace & PT_PTRACED) &&
+        if (task_is_stopped_or_traced(task)) {
-            task_is_stopped_or_traced(task) &&
+                int match;
-            ptrace_may_access(task, PTRACE_MODE_ATTACH))
+                rcu_read_lock();
-                return 0;
+                match = (tracehook_tracer_task(task) == current);
+                rcu_read_unlock();
+                if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
+                        return 0;
+        }
        /*
         * Noone else is allowed.
@@ -504,6 +509,26 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
        return count;
 }
+#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+static int proc_pid_syscall(struct task_struct *task, char *buffer)
+{
+        long nr;
+        unsigned long args[6], sp, pc;
+        if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
+                return sprintf(buffer, "running\n");
+        if (nr < 0)
+                return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
+        return sprintf(buffer,
+                       "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
+                       nr,
+                       args[0], args[1], args[2], args[3], args[4], args[5],
+                       sp, pc);
+}
+#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
 /************************************************************************/
 /*                       Here the fs part begins                        */
 /************************************************************************/
@@ -1834,8 +1859,7 @@ static const struct file_operations proc_fd_operations = {
 * /proc/pid/fd needs a special permission handler so that a process can still
 * access /proc/self/fd after it has executed a setuid().
 */
-static int proc_fd_permission(struct inode *inode, int mask,
+static int proc_fd_permission(struct inode *inode, int mask)
-                                struct nameidata *nd)
 {
        int rv;
@@ -2376,29 +2400,70 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
 }
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
+static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
-{
+{
+        u64 rchar, wchar, syscr, syscw;
+        struct task_io_accounting ioac;
+        rchar = task->rchar;
+        wchar = task->wchar;
+        syscr = task->syscr;
+        syscw = task->syscw;
+        memcpy(&ioac, &task->ioac, sizeof(ioac));
+        if (whole) {
+                unsigned long flags;
+                if (lock_task_sighand(task, &flags)) {
+                        struct signal_struct *sig = task->signal;
+                        struct task_struct *t = task;
+                        rchar += sig->rchar;
+                        wchar += sig->wchar;
+                        syscr += sig->syscr;
+                        syscw += sig->syscw;
+                        ioac.read_bytes += sig->ioac.read_bytes;
+                        ioac.write_bytes += sig->ioac.write_bytes;
+                        ioac.cancelled_write_bytes +=
+                                        sig->ioac.cancelled_write_bytes;
+                        while_each_thread(task, t) {
+                                rchar += t->rchar;
+                                wchar += t->wchar;
+                                syscr += t->syscr;
+                                syscw += t->syscw;
+                                ioac.read_bytes += t->ioac.read_bytes;
+                                ioac.write_bytes += t->ioac.write_bytes;
+                                ioac.cancelled_write_bytes +=
+                                        t->ioac.cancelled_write_bytes;
+                        }
+                        unlock_task_sighand(task, &flags);
+                }
+        }
        return sprintf(buffer,
-#ifdef CONFIG_TASK_XACCT
                        "rchar: %llu\n"
                        "wchar: %llu\n"
                        "syscr: %llu\n"
                        "syscw: %llu\n"
-#endif
                        "read_bytes: %llu\n"
                        "write_bytes: %llu\n"
                        "cancelled_write_bytes: %llu\n",
-#ifdef CONFIG_TASK_XACCT
+                        rchar, wchar, syscr, syscw,
-                        (unsigned long long)task->rchar,
+                        ioac.read_bytes, ioac.write_bytes,
-                        (unsigned long long)task->wchar,
+                        ioac.cancelled_write_bytes);
-                        (unsigned long long)task->syscr,
+}
-                        (unsigned long long)task->syscw,
-#endif
+static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
-                        (unsigned long long)task->ioac.read_bytes,
+{
-                        (unsigned long long)task->ioac.write_bytes,
+        return do_io_accounting(task, buffer, 0);
-                        (unsigned long long)task->ioac.cancelled_write_bytes);
 }
-#endif
+static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
+{
+        return do_io_accounting(task, buffer, 1);
+}
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
 /*
 * Thread groups
@@ -2420,6 +2485,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_SCHED_DEBUG
        REG("sched",      S_IRUGO|S_IWUSR, pid_sched),
 #endif
+#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+        INF("syscall",    S_IRUSR, pid_syscall),
+#endif
        INF("cmdline",    S_IRUGO, pid_cmdline),
        ONE("stat",       S_IRUGO, tgid_stat),
        ONE("statm",      S_IRUGO, pid_statm),
@@ -2470,7 +2538,7 @@ static const struct pid_entry tgid_base_stuff[] = {
        REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
 #endif
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-        INF("io",       S_IRUGO, pid_io_accounting),
+        INF("io",       S_IRUGO, tgid_io_accounting),
 #endif
 };
@@ -2752,6 +2820,9 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_SCHED_DEBUG
        REG("sched",     S_IRUGO|S_IWUSR, pid_sched),
 #endif
+#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+        INF("syscall",   S_IRUSR, pid_syscall),
+#endif
        INF("cmdline",   S_IRUGO, pid_cmdline),
        ONE("stat",      S_IRUGO, tid_stat),
        ONE("statm",     S_IRUGO, pid_statm),
@@ -2797,6 +2868,9 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_FAULT_INJECTION
        REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
 #endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+        INF("io",       S_IRUGO, tid_io_accounting),
+#endif
 };
 static int proc_tid_base_readdir(struct file * filp,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 43e54e86cefd..cb4096cc3fb7 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,6 +597,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
        ent->pde_users = 0;
        spin_lock_init(&ent->pde_unload_lock);
        ent->pde_unload_completion = NULL;
+        INIT_LIST_HEAD(&ent->pde_openers);
 out:
        return ent;
 }
@@ -789,15 +790,25 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
        spin_unlock(&de->pde_unload_lock);
 continue_removing:
+        spin_lock(&de->pde_unload_lock);
+        while (!list_empty(&de->pde_openers)) {
+                struct pde_opener *pdeo;
+                pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
+                list_del(&pdeo->lh);
+                spin_unlock(&de->pde_unload_lock);
+                pdeo->release(pdeo->inode, pdeo->file);
+                kfree(pdeo);
+                spin_lock(&de->pde_unload_lock);
+        }
+        spin_unlock(&de->pde_unload_lock);
        if (S_ISDIR(de->mode))
                parent->nlink--;
        de->nlink = 0;
-        if (de->subdir) {
+        WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory "
-                printk(KERN_WARNING "%s: removing non-empty directory "
                        "'%s/%s', leaking at least '%s'\n", __func__,
                        de->parent->name, de->name, de->subdir->name);
-                WARN_ON(1);
-        }
        if (atomic_dec_and_test(&de->count))
                free_proc_entry(de);
 }
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b08d10017911..8bb03f056c28 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/smp_lock.h>
+#include <linux/sysctl.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -65,6 +66,8 @@ static void proc_delete_inode(struct inode *inode)
                        module_put(de->owner);
                de_put(de);
        }
+        if (PROC_I(inode)->sysctl)
+                sysctl_head_put(PROC_I(inode)->sysctl);
        clear_inode(inode);
 }
@@ -84,6 +87,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
        ei->fd = 0;
        ei->op.proc_get_link = NULL;
        ei->pde = NULL;
+        ei->sysctl = NULL;
+        ei->sysctl_entry = NULL;
        inode = &ei->vfs_inode;
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
        return inode;
@@ -94,7 +99,7 @@ static void proc_destroy_inode(struct inode *inode)
        kmem_cache_free(proc_inode_cachep, PROC_I(inode));
 }
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
        struct proc_inode *ei = (struct proc_inode *) foo;
@@ -111,27 +116,25 @@ int __init proc_init_inodecache(void)
        return 0;
 }
-static int proc_remount(struct super_block *sb, int *flags, char *data)
-{
-        *flags |= MS_NODIRATIME;
-        return 0;
-}
 static const struct super_operations proc_sops = {
        .alloc_inode    = proc_alloc_inode,
        .destroy_inode  = proc_destroy_inode,
        .drop_inode     = generic_delete_inode,
        .delete_inode   = proc_delete_inode,
        .statfs         = simple_statfs,
-        .remount_fs     = proc_remount,
 };
-static void pde_users_dec(struct proc_dir_entry *pde)
+static void __pde_users_dec(struct proc_dir_entry *pde)
 {
-        spin_lock(&pde->pde_unload_lock);
        pde->pde_users--;
        if (pde->pde_unload_completion && pde->pde_users == 0)
                complete(pde->pde_unload_completion);
+}
+static void pde_users_dec(struct proc_dir_entry *pde)
+{
+        spin_lock(&pde->pde_unload_lock);
+        __pde_users_dec(pde);
        spin_unlock(&pde->pde_unload_lock);
 }
@@ -318,36 +321,97 @@ static int proc_reg_open(struct inode *inode, struct file *file)
        struct proc_dir_entry *pde = PDE(inode);
        int rv = 0;
        int (*open)(struct inode *, struct file *);
+        int (*release)(struct inode *, struct file *);
+        struct pde_opener *pdeo;
+        /*
+         * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
+         * sequence. ->release won't be called because ->proc_fops will be
+         * cleared. Depending on complexity of ->release, consequences vary.
+         *
+         * We can't wait for mercy when close will be done for real, it's
+         * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
+         * by hand in remove_proc_entry(). For this, save opener's credentials
+         * for later.
+         */
+        pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
+        if (!pdeo)
+                return -ENOMEM;
        spin_lock(&pde->pde_unload_lock);
        if (!pde->proc_fops) {
                spin_unlock(&pde->pde_unload_lock);
+                kfree(pdeo);
                return rv;
        }
        pde->pde_users++;
        open = pde->proc_fops->open;
+        release = pde->proc_fops->release;
        spin_unlock(&pde->pde_unload_lock);
        if (open)
                rv = open(inode, file);
-        pde_users_dec(pde);
+        spin_lock(&pde->pde_unload_lock);
+        if (rv == 0 && release) {
+                /* To know what to release. */
+                pdeo->inode = inode;
+                pdeo->file = file;
+                /* Strictly for "too late" ->release in proc_reg_release(). */
+                pdeo->release = release;
+                list_add(&pdeo->lh, &pde->pde_openers);
+        } else
+                kfree(pdeo);
+        __pde_users_dec(pde);
+        spin_unlock(&pde->pde_unload_lock);
        return rv;
 }
+static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
+                                        struct inode *inode, struct file *file)
+{
+        struct pde_opener *pdeo;
+        list_for_each_entry(pdeo, &pde->pde_openers, lh) {
+                if (pdeo->inode == inode && pdeo->file == file)
+                        return pdeo;
+        }
+        return NULL;
+}
 static int proc_reg_release(struct inode *inode, struct file *file)
 {
        struct proc_dir_entry *pde = PDE(inode);
        int rv = 0;
        int (*release)(struct inode *, struct file *);
+        struct pde_opener *pdeo;
        spin_lock(&pde->pde_unload_lock);
+        pdeo = find_pde_opener(pde, inode, file);
        if (!pde->proc_fops) {
-                spin_unlock(&pde->pde_unload_lock);
+                /*
+                 * Can't simply exit, __fput() will think that everything is OK,
+                 * and move on to freeing struct file. remove_proc_entry() will
+                 * find slacker in opener's list and will try to do non-trivial
+                 * things with struct file. Therefore, remove opener from list.
+                 *
+                 * But if opener is removed from list, who will ->release it?
+                 */
+                if (pdeo) {
+                        list_del(&pdeo->lh);
+                        spin_unlock(&pde->pde_unload_lock);
+                        rv = pdeo->release(inode, file);
+                        kfree(pdeo);
+                } else
+                        spin_unlock(&pde->pde_unload_lock);
                return rv;
        }
        pde->pde_users++;
        release = pde->proc_fops->release;
+        if (pdeo) {
+                list_del(&pdeo->lh);
+                kfree(pdeo);
+        }
        spin_unlock(&pde->pde_unload_lock);
        if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 28cbca805905..442202314d53 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations;
 extern const struct file_operations proc_clear_refs_operations;
 extern const struct file_operations proc_pagemap_operations;
 extern const struct file_operations proc_net_operations;
+extern const struct file_operations proc_kmsg_operations;
 extern const struct inode_operations proc_net_inode_operations;
 void free_proc_entry(struct proc_dir_entry *de);
@@ -88,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
                struct dentry *dentry);
 int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
                filldir_t filldir);
+struct pde_opener {
+        struct inode *inode;
+        struct file *file;
+        int (*release)(struct inode *, struct file *);
+        struct list_head lh;
+};
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e78c81fcf547..c2370c76fb71 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -23,6 +23,10 @@
 #define CORE_STR "CORE"
+#ifndef ELF_CORE_EFLAGS
+#define ELF_CORE_EFLAGS 0
+#endif
 static int open_kcore(struct inode * inode, struct file * filp)
 {
        return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
@@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
        elf->e_entry    = 0;
        elf->e_phoff    = sizeof(struct elfhdr);
        elf->e_shoff    = 0;
-#if defined(CONFIG_H8300)
+        elf->e_flags    = ELF_CORE_EFLAGS;
-        elf->e_flags    = ELF_FLAGS;
-#else
-        elf->e_flags    = 0;
-#endif
        elf->e_ehsize   = sizeof(struct elfhdr);
        elf->e_phentsize= sizeof(struct elf_phdr);
        elf->e_phnum    = nphdr;
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ff3b90b56e9d..9fd5df3f40ce 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
+#include "internal.h"
 extern wait_queue_head_t log_wait;
 extern int do_syslog(int type, char __user *bug, int count);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index c652d469dc08..ded969862960 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -232,7 +232,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 #undef K
 }
-extern const struct seq_operations fragmentation_op;
 static int fragmentation_open(struct inode *inode, struct file *file)
 {
        (void)inode;
@@ -246,7 +245,6 @@ static const struct file_operations fragmentation_file_operations = {
        .release        = seq_release,
 };
-extern const struct seq_operations pagetypeinfo_op;
 static int pagetypeinfo_open(struct inode *inode, struct file *file)
 {
        return seq_open(file, &pagetypeinfo_op);
@@ -259,7 +257,6 @@ static const struct file_operations pagetypeinfo_file_ops = {
        .release        = seq_release,
 };
-extern const struct seq_operations zoneinfo_op;
 static int zoneinfo_open(struct inode *inode, struct file *file)
 {
        return seq_open(file, &zoneinfo_op);
@@ -356,7 +353,6 @@ static const struct file_operations proc_devinfo_operations = {
        .release        = seq_release,
 };
-extern const struct seq_operations vmstat_op;
 static int vmstat_open(struct inode *inode, struct file *file)
 {
        return seq_open(file, &vmstat_op);
@@ -468,14 +464,25 @@ static const struct file_operations proc_slabstats_operations = {
 #ifdef CONFIG_MMU
 static int vmalloc_open(struct inode *inode, struct file *file)
 {
-        return seq_open(file, &vmalloc_op);
+        unsigned int *ptr = NULL;
+        int ret;
+        if (NUMA_BUILD)
+                ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
+        ret = seq_open(file, &vmalloc_op);
+        if (!ret) {
+                struct seq_file *m = file->private_data;
+                m->private = ptr;
+        } else
+                kfree(ptr);
+        return ret;
 }
 static const struct file_operations proc_vmalloc_operations = {
        .open           = vmalloc_open,
        .read           = seq_read,
        .llseek         = seq_lseek,
-        .release        = seq_release,
+        .release        = seq_release_private,
 };
 #endif
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 83f357b30d71..7bc296f424ae 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -27,6 +27,11 @@
 #include "internal.h"
+static struct net *get_proc_net(const struct inode *inode)
+{
+        return maybe_get_net(PDE_NET(PDE(inode)));
+}
 int seq_open_net(struct inode *ino, struct file *f,
                 const struct seq_operations *ops, int size)
 {
@@ -51,6 +56,30 @@ int seq_open_net(struct inode *ino, struct file *f,
 }
 EXPORT_SYMBOL_GPL(seq_open_net);
+int single_open_net(struct inode *inode, struct file *file,
+                int (*show)(struct seq_file *, void *))
+{
+        int err;
+        struct net *net;
+        err = -ENXIO;
+        net = get_proc_net(inode);
+        if (net == NULL)
+                goto err_net;
+        err = single_open(file, show, net);
+        if (err < 0)
+                goto err_open;
+        return 0;
+err_open:
+        put_net(net);
+err_net:
+        return err;
+}
+EXPORT_SYMBOL_GPL(single_open_net);
 int seq_release_net(struct inode *ino, struct file *f)
 {
        struct seq_file *seq;
@@ -63,6 +92,14 @@ int seq_release_net(struct inode *ino, struct file *f)
 }
 EXPORT_SYMBOL_GPL(seq_release_net);
+int single_release_net(struct inode *ino, struct file *f)
+{
+        struct seq_file *seq = f->private_data;
+        put_net(seq->private);
+        return single_release(ino, f);
+}
+EXPORT_SYMBOL_GPL(single_release_net);
 static struct net *get_proc_task_net(struct inode *dir)
 {
        struct task_struct *task;
@@ -153,12 +190,6 @@ void proc_net_remove(struct net *net, const char *name)
 }
 EXPORT_SYMBOL_GPL(proc_net_remove);
-struct net *get_proc_net(const struct inode *inode)
-{
-        return maybe_get_net(PDE_NET(PDE(inode)));
-}
-EXPORT_SYMBOL_GPL(get_proc_net);
 static __net_init int proc_net_ns_init(struct net *net)
 {
        struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5acc001d49f6..f9a8b892718f 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -10,149 +10,110 @@
 static struct dentry_operations proc_sys_dentry_operations;
 static const struct file_operations proc_sys_file_operations;
 static const struct inode_operations proc_sys_inode_operations;
+static const struct file_operations proc_sys_dir_file_operations;
+static const struct inode_operations proc_sys_dir_operations;
-static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table)
+static struct inode *proc_sys_make_inode(struct super_block *sb,
-{
+                struct ctl_table_header *head, struct ctl_table *table)
-        /* Refresh the cached information bits in the inode */
-        if (table) {
-                inode->i_uid = 0;
-                inode->i_gid = 0;
-                inode->i_mode = table->mode;
-                if (table->proc_handler) {
-                        inode->i_mode |= S_IFREG;
-                        inode->i_nlink = 1;
-                } else {
-                        inode->i_mode |= S_IFDIR;
-                        inode->i_nlink = 0;     /* It is too hard to figure out */
-                }
-        }
-}
-static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
 {
        struct inode *inode;
-        struct proc_inode *dir_ei, *ei;
+        struct proc_inode *ei;
-        int depth;
-        inode = new_inode(dir->i_sb);
+        inode = new_inode(sb);
        if (!inode)
                goto out;
-        /* A directory is always one deeper than it's parent */
+        sysctl_head_get(head);
-        dir_ei = PROC_I(dir);
-        depth = dir_ei->fd + 1;
        ei = PROC_I(inode);
-        ei->fd = depth;
+        ei->sysctl = head;
+        ei->sysctl_entry = table;
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-        inode->i_op = &proc_sys_inode_operations;
-        inode->i_fop = &proc_sys_file_operations;
        inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
-        proc_sys_refresh_inode(inode, table);
+        inode->i_mode = table->mode;
+        if (!table->child) {
+                inode->i_mode |= S_IFREG;
+                inode->i_op = &proc_sys_inode_operations;
+                inode->i_fop = &proc_sys_file_operations;
+        } else {
+                inode->i_mode |= S_IFDIR;
+                inode->i_nlink = 0;
+                inode->i_op = &proc_sys_dir_operations;
+                inode->i_fop = &proc_sys_dir_file_operations;
+        }
 out:
        return inode;
 }
-static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth)
+static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
-{
-        for (;;) {
-                struct proc_inode *ei;
-                ei = PROC_I(dentry->d_inode);
-                if (ei->fd == depth)
-                        break; /* found */
-                dentry = dentry->d_parent;
-        }
-        return dentry;
-}
-static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
-                                                        struct qstr *name)
 {
        int len;
-        for ( ; table->ctl_name || table->procname; table++) {
+        for ( ; p->ctl_name || p->procname; p++) {
-                if (!table->procname)
+                if (!p->procname)
                        continue;
-                len = strlen(table->procname);
+                len = strlen(p->procname);
                if (len != name->len)
                        continue;
-                if (memcmp(table->procname, name->name, len) != 0)
+                if (memcmp(p->procname, name->name, len) != 0)
                        continue;
                /* I have a match */
-                return table;
+                return p;
        }
        return NULL;
 }
-static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry,
+struct ctl_table_header *grab_header(struct inode *inode)
-                                                struct ctl_table *table)
 {
-        struct dentry *ancestor;
+        if (PROC_I(inode)->sysctl)
-        struct proc_inode *ei;
+                return sysctl_head_grab(PROC_I(inode)->sysctl);
-        int depth, i;
+        else
+                return sysctl_head_next(NULL);
+}
-        ei = PROC_I(dentry->d_inode);
+static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
-        depth = ei->fd;
+                                        struct nameidata *nd)
+{
+        struct ctl_table_header *head = grab_header(dir);
+        struct ctl_table *table = PROC_I(dir)->sysctl_entry;
+        struct ctl_table_header *h = NULL;
+        struct qstr *name = &dentry->d_name;
+        struct ctl_table *p;
+        struct inode *inode;
+        struct dentry *err = ERR_PTR(-ENOENT);
-        if (depth == 0)
+        if (IS_ERR(head))
-                return table;
+                return ERR_CAST(head);
-        for (i = 1; table && (i <= depth); i++) {
+        if (table && !table->child) {
-                ancestor = proc_sys_ancestor(dentry, i);
+                WARN_ON(1);
-                table = proc_sys_lookup_table_one(table, &ancestor->d_name);
+                goto out;
-                if (table)
-                        table = table->child;
        }
-        return table;
-}
-static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
-                                                struct qstr *name,
-                                                struct ctl_table *table)
-{
-        table = proc_sys_lookup_table(dparent, table);
-        if (table)
-                table = proc_sys_lookup_table_one(table, name);
-        return table;
-}
-static struct ctl_table *do_proc_sys_lookup(struct dentry *parent,
+        table = table ? table->child : head->ctl_table;
-                                                struct qstr *name,
-                                                struct ctl_table_header **ptr)
-{
-        struct ctl_table_header *head;
-        struct ctl_table *table = NULL;
-        for (head = sysctl_head_next(NULL); head;
+        p = find_in_table(table, name);
-                        head = sysctl_head_next(head)) {
+        if (!p) {
-                table = proc_sys_lookup_entry(parent, name, head->ctl_table);
+                for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
-                if (table)
+                        if (h->attached_to != table)
-                        break;
+                                continue;
+                        p = find_in_table(h->attached_by, name);
+                        if (p)
+                                break;
+                }
        }
-        *ptr = head;
-        return table;
-}
-static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
-                                        struct nameidata *nd)
-{
-        struct ctl_table_header *head;
-        struct inode *inode;
-        struct dentry *err;
-        struct ctl_table *table;
-        err = ERR_PTR(-ENOENT);
+        if (!p)
-        table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
-        if (!table)
                goto out;
        err = ERR_PTR(-ENOMEM);
-        inode = proc_sys_make_inode(dir, table);
+        inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
+        if (h)
+                sysctl_head_finish(h);
        if (!inode)
                goto out;
@@ -168,22 +129,14 @@ out:
 static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
                size_t count, loff_t *ppos, int write)
 {
-        struct dentry *dentry = filp->f_dentry;
+        struct inode *inode = filp->f_path.dentry->d_inode;
-        struct ctl_table_header *head;
+        struct ctl_table_header *head = grab_header(inode);
-        struct ctl_table *table;
+        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
        ssize_t error;
        size_t res;
-        table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
+        if (IS_ERR(head))
-        /* Has the sysctl entry disappeared on us? */
+                return PTR_ERR(head);
-        error = -ENOENT;
-        if (!table)
-                goto out;
-        /* Has the sysctl entry been replaced by a directory? */
-        error = -EISDIR;
-        if (!table->proc_handler)
-                goto out;
        /*
         * At this point we know that the sysctl was not unregistered
@@ -193,6 +146,11 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
        if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
                goto out;
+        /* if that can happen at all, it should be -EINVAL, not -EISDIR */
+        error = -EINVAL;
+        if (!table->proc_handler)
+                goto out;
        /* careful: calling conventions are nasty here */
        res = count;
        error = table->proc_handler(table, write, filp, buf, &res, ppos);
@@ -218,82 +176,86 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
 static int proc_sys_fill_cache(struct file *filp, void *dirent,
-                                filldir_t filldir, struct ctl_table *table)
+                                filldir_t filldir,
+                                struct ctl_table_header *head,
+                                struct ctl_table *table)
 {
-        struct ctl_table_header *head;
-        struct ctl_table *child_table = NULL;
        struct dentry *child, *dir = filp->f_path.dentry;
        struct inode *inode;
        struct qstr qname;
        ino_t ino = 0;
        unsigned type = DT_UNKNOWN;
-        int ret;
        qname.name = table->procname;
        qname.len  = strlen(table->procname);
        qname.hash = full_name_hash(qname.name, qname.len);
-        /* Suppress duplicates.
-         * Only fill a directory entry if it is the value that
-         * an ordinary lookup of that name returns.  Hide all
-         * others.
-         *
-         * If we ever cache this translation in the dcache
-         * I should do a dcache lookup first.  But for now
-         * it is just simpler not to.
-         */
-        ret = 0;
-        child_table = do_proc_sys_lookup(dir, &qname, &head);
-        sysctl_head_finish(head);
-        if (child_table != table)
-                return 0;
        child = d_lookup(dir, &qname);
        if (!child) {
-                struct dentry *new;
+                child = d_alloc(dir, &qname);
-                new = d_alloc(dir, &qname);
+                if (child) {
-                if (new) {
+                        inode = proc_sys_make_inode(dir->d_sb, head, table);
-                        inode = proc_sys_make_inode(dir->d_inode, table);
+                        if (!inode) {
-                        if (!inode)
+                                dput(child);
-                                child = ERR_PTR(-ENOMEM);
+                                return -ENOMEM;
-                        else {
+                        } else {
-                                new->d_op = &proc_sys_dentry_operations;
+                                child->d_op = &proc_sys_dentry_operations;
-                                d_add(new, inode);
+                                d_add(child, inode);
                        }
-                        if (child)
+                } else {
-                                dput(new);
+                        return -ENOMEM;
-                        else
-                                child = new;
                }
        }
-        if (!child || IS_ERR(child) || !child->d_inode)
-                goto end_instantiate;
        inode = child->d_inode;
-        if (inode) {
+        ino  = inode->i_ino;
-                ino  = inode->i_ino;
+        type = inode->i_mode >> 12;
-                type = inode->i_mode >> 12;
-        }
        dput(child);
-end_instantiate:
+        return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
-        if (!ino)
+}
-                ino= find_inode_number(dir, &qname);
-        if (!ino)
+static int scan(struct ctl_table_header *head, ctl_table *table,
-                ino = 1;
+                unsigned long *pos, struct file *file,
-        return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
+                void *dirent, filldir_t filldir)
+{
+        for (; table->ctl_name || table->procname; table++, (*pos)++) {
+                int res;
+                /* Can't do anything without a proc name */
+                if (!table->procname)
+                        continue;
+                if (*pos < file->f_pos)
+                        continue;
+                res = proc_sys_fill_cache(file, dirent, filldir, head, table);
+                if (res)
+                        return res;
+                file->f_pos = *pos + 1;
+        }
+        return 0;
 }
 static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct dentry *dentry = filp->f_dentry;
+        struct dentry *dentry = filp->f_path.dentry;
        struct inode *inode = dentry->d_inode;
-        struct ctl_table_header *head = NULL;
+        struct ctl_table_header *head = grab_header(inode);
-        struct ctl_table *table;
+        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+        struct ctl_table_header *h = NULL;
        unsigned long pos;
-        int ret;
+        int ret = -EINVAL;
+        if (IS_ERR(head))
+                return PTR_ERR(head);
-        ret = -ENOTDIR;
+        if (table && !table->child) {
-        if (!S_ISDIR(inode->i_mode))
+                WARN_ON(1);
                goto out;
+        }
+        table = table ? table->child : head->ctl_table;
        ret = 0;
        /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
@@ -311,30 +273,17 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
        }
        pos = 2;
-        /* - Find each instance of the directory
+        ret = scan(head, table, &pos, filp, dirent, filldir);
-         * - Read all entries in each instance
+        if (ret)
-         * - Before returning an entry to user space lookup the entry
+                goto out;
-         *   by name and if I find a different entry don't return
-         *   this one because it means it is a buried dup.
-         * For sysctl this should only happen for directory entries.
-         */
-        for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
-                table = proc_sys_lookup_table(dentry, head->ctl_table);
-                if (!table)
+        for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
+                if (h->attached_to != table)
                        continue;
+                ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
-                for (; table->ctl_name || table->procname; table++, pos++) {
+                if (ret) {
-                        /* Can't do anything without a proc name */
+                        sysctl_head_finish(h);
-                        if (!table->procname)
+                        break;
-                                continue;
-                        if (pos < filp->f_pos)
-                                continue;
-                        if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
-                                goto out;
-                        filp->f_pos = pos + 1;
                }
        }
        ret = 1;
@@ -343,53 +292,24 @@ out:
        return ret;
 }
-static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int proc_sys_permission(struct inode *inode, int mask)
 {
        /*
         * sysctl entries that are not writeable,
         * are _NOT_ writeable, capabilities or not.
         */
-        struct ctl_table_header *head;
+        struct ctl_table_header *head = grab_header(inode);
-        struct ctl_table *table;
+        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
-        struct dentry *dentry;
-        int mode;
-        int depth;
        int error;
-        head = NULL;
+        if (IS_ERR(head))
-        depth = PROC_I(inode)->fd;
+                return PTR_ERR(head);
-        /* First check the cached permissions, in case we don't have
-         * enough information to lookup the sysctl table entry.
-         */
-        error = -EACCES;
-        mode = inode->i_mode;
-        if (current->euid == 0)
-                mode >>= 6;
-        else if (in_group_p(0))
-                mode >>= 3;
-        if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
-                error = 0;
-        /* If we can't get a sysctl table entry the permission
-         * checks on the cached mode will have to be enough.
-         */
-        if (!nd || !depth)
-                goto out;
-        dentry = nd->path.dentry;
+        if (!table) /* global root - r-xr-xr-x */
-        table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
+                error = mask & MAY_WRITE ? -EACCES : 0;
+        else /* Use the permissions on the sysctl table entry */
+                error = sysctl_perm(head->root, table, mask);
-        /* If the entry does not exist deny permission */
-        error = -EACCES;
-        if (!table)
-                goto out;
-        /* Use the permissions on the sysctl table entry */
-        error = sysctl_perm(head->root, table, mask);
-out:
        sysctl_head_finish(head);
        return error;
 }
@@ -409,33 +329,70 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
        return error;
 }
-/* I'm lazy and don't distinguish between files and directories,
+static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
- * until access time.
+{
- */
+        struct inode *inode = dentry->d_inode;
+        struct ctl_table_header *head = grab_header(inode);
+        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+        if (IS_ERR(head))
+                return PTR_ERR(head);
+        generic_fillattr(inode, stat);
+        if (table)
+                stat->mode = (stat->mode & S_IFMT) | table->mode;
+        sysctl_head_finish(head);
+        return 0;
+}
 static const struct file_operations proc_sys_file_operations = {
        .read           = proc_sys_read,
        .write          = proc_sys_write,
+};
+static const struct file_operations proc_sys_dir_file_operations = {
        .readdir        = proc_sys_readdir,
 };
 static const struct inode_operations proc_sys_inode_operations = {
+        .permission     = proc_sys_permission,
+        .setattr        = proc_sys_setattr,
+        .getattr        = proc_sys_getattr,
+};
+static const struct inode_operations proc_sys_dir_operations = {
        .lookup         = proc_sys_lookup,
        .permission     = proc_sys_permission,
        .setattr        = proc_sys_setattr,
+        .getattr        = proc_sys_getattr,
 };
 static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-        struct ctl_table_header *head;
+        return !PROC_I(dentry->d_inode)->sysctl->unregistering;
-        struct ctl_table *table;
+}
-        table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
-        proc_sys_refresh_inode(dentry->d_inode, table);
+static int proc_sys_delete(struct dentry *dentry)
-        sysctl_head_finish(head);
+{
-        return !!table;
+        return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
+}
+static int proc_sys_compare(struct dentry *dir, struct qstr *qstr,
+                            struct qstr *name)
+{
+        struct dentry *dentry = container_of(qstr, struct dentry, d_name);
+        if (qstr->len != name->len)
+                return 1;
+        if (memcmp(qstr->name, name->name, name->len))
+                return 1;
+        return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl);
 }
 static struct dentry_operations proc_sys_dentry_operations = {
        .d_revalidate   = proc_sys_revalidate,
+        .d_delete       = proc_sys_delete,
+        .d_compare      = proc_sys_compare,
 };
 static struct proc_dir_entry *proc_sys_root;
@@ -443,8 +400,8 @@ static struct proc_dir_entry *proc_sys_root;
 int proc_sys_init(void)
 {
        proc_sys_root = proc_mkdir("sys", NULL);
-        proc_sys_root->proc_iops = &proc_sys_inode_operations;
+        proc_sys_root->proc_iops = &proc_sys_dir_operations;
-        proc_sys_root->proc_fops = &proc_sys_file_operations;
+        proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
        proc_sys_root->nlink = 0;
        return 0;
 }
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 21f490f5d65c..d153946d6d15 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -136,54 +136,6 @@ static const struct file_operations proc_tty_drivers_operations = {
        .release        = seq_release,
 };
-static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
-{
-        return (*pos < NR_LDISCS) ? pos : NULL;
-}
-static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
-        (*pos)++;
-        return (*pos < NR_LDISCS) ? pos : NULL;
-}
-static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
-{
-}
-static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
-{
-        int i = *(loff_t *)v;
-        struct tty_ldisc *ld;
-        
-        ld = tty_ldisc_get(i);
-        if (ld == NULL)
-                return 0;
-        seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i);
-        tty_ldisc_put(i);
-        return 0;
-}
-static const struct seq_operations tty_ldiscs_seq_ops = {
-        .start  = tty_ldiscs_seq_start,
-        .next   = tty_ldiscs_seq_next,
-        .stop   = tty_ldiscs_seq_stop,
-        .show   = tty_ldiscs_seq_show,
-};
-static int proc_tty_ldiscs_open(struct inode *inode, struct file *file)
-{
-        return seq_open(file, &tty_ldiscs_seq_ops);
-}
-static const struct file_operations tty_ldiscs_proc_fops = {
-        .owner          = THIS_MODULE,
-        .open           = proc_tty_ldiscs_open,
-        .read           = seq_read,
-        .llseek         = seq_lseek,
-        .release        = seq_release,
-};
 /*
 * This function is called by tty_register_driver() to handle
 * registering the driver's /proc handler into /proc/tty/driver/<foo>
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 164bd9f9ede3..7546a918f790 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -636,7 +636,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
        struct pagemapread pm;
        int pagecount;
        int ret = -ESRCH;
-        struct mm_walk pagemap_walk;
+        struct mm_walk pagemap_walk = {};
        unsigned long src;
        unsigned long svpfn;
        unsigned long start_vaddr;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index b31ab78052b3..2aad1044b84c 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -553,7 +553,7 @@ static void qnx4_destroy_inode(struct inode *inode)
        kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
diff --git a/fs/quota.c b/fs/quota.c
index db1cc9f3c7aa..7f4386ebc23a 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -186,7 +186,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
 void sync_dquots(struct super_block *sb, int type)
 {
-        int cnt, dirty;
+        int cnt;
        if (sb) {
                if (sb->s_qcop->quota_sync)
@@ -198,11 +198,17 @@ void sync_dquots(struct super_block *sb, int type)
 restart:
        list_for_each_entry(sb, &super_blocks, s_list) {
                /* This test just improves performance so it needn't be reliable... */
-                for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
+                for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-                        if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
+                        if (type != -1 && type != cnt)
-                            && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
+                                continue;
-                                dirty = 1;
+                        if (!sb_has_quota_enabled(sb, cnt))
-                if (!dirty)
+                                continue;
+                        if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
+                            list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
+                                continue;
+                        break;
+                }
+                if (cnt == MAXQUOTAS)
                        continue;
                sb->s_count++;
                spin_unlock(&sb_lock);
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index a6cf9269105c..5ae15b13eeb0 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -1,6 +1,7 @@
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/quota.h>
+#include <linux/quotaops.h>
 #include <linux/dqblk_v1.h>
 #include <linux/quotaio_v1.h>
 #include <linux/kernel.h>
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 234ada903633..b53827dc02d9 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/quotaops.h>
 #include <asm/byteorder.h>
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e396b2fa4743..c8f60ee183b5 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -34,15 +34,10 @@
 **                      from within kupdate, it will ignore the immediate flag
 */
-#include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/time.h>
 #include <linux/semaphore.h>
 #include <linux/vmalloc.h>
 #include <linux/reiserfs_fs.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/fcntl.h>
@@ -54,6 +49,9 @@
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
+#include <linux/uaccess.h>
+#include <asm/system.h>
 /* gets a struct reiserfs_journal_list * from a list head */
 #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -558,13 +556,13 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
 static inline void lock_journal(struct super_block *p_s_sb)
 {
        PROC_INFO_INC(p_s_sb, journal.lock_journal);
-        down(&SB_JOURNAL(p_s_sb)->j_lock);
+        mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
 }
 /* unlock the current transaction */
 static inline void unlock_journal(struct super_block *p_s_sb)
 {
-        up(&SB_JOURNAL(p_s_sb)->j_lock);
+        mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
 }
 static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -1045,9 +1043,9 @@ static int flush_commit_list(struct super_block *s,
        }
        /* make sure nobody is trying to flush this one at the same time */
-        down(&jl->j_commit_lock);
+        mutex_lock(&jl->j_commit_mutex);
        if (!journal_list_still_alive(s, trans_id)) {
-                up(&jl->j_commit_lock);
+                mutex_unlock(&jl->j_commit_mutex);
                goto put_jl;
        }
        BUG_ON(jl->j_trans_id == 0);
@@ -1057,7 +1055,7 @@ static int flush_commit_list(struct super_block *s,
                if (flushall) {
                        atomic_set(&(jl->j_older_commits_done), 1);
                }
-                up(&jl->j_commit_lock);
+                mutex_unlock(&jl->j_commit_mutex);
                goto put_jl;
        }
@@ -1181,7 +1179,7 @@ static int flush_commit_list(struct super_block *s,
        if (flushall) {
                atomic_set(&(jl->j_older_commits_done), 1);
        }
-        up(&jl->j_commit_lock);
+        mutex_unlock(&jl->j_commit_mutex);
      put_jl:
        put_journal_list(s, jl);
@@ -1411,8 +1409,8 @@ static int flush_journal_list(struct super_block *s,
        /* if flushall == 0, the lock is already held */
        if (flushall) {
-                down(&journal->j_flush_sem);
+                mutex_lock(&journal->j_flush_mutex);
-        } else if (!down_trylock(&journal->j_flush_sem)) {
+        } else if (mutex_trylock(&journal->j_flush_mutex)) {
                BUG();
        }
@@ -1642,7 +1640,7 @@ static int flush_journal_list(struct super_block *s,
        jl->j_state = 0;
        put_journal_list(s, jl);
        if (flushall)
-                up(&journal->j_flush_sem);
+                mutex_unlock(&journal->j_flush_mutex);
        put_fs_excl();
        return err;
 }
@@ -1772,12 +1770,12 @@ static int kupdate_transactions(struct super_block *s,
        struct reiserfs_journal *journal = SB_JOURNAL(s);
        chunk.nr = 0;
-        down(&journal->j_flush_sem);
+        mutex_lock(&journal->j_flush_mutex);
        if (!journal_list_still_alive(s, orig_trans_id)) {
                goto done;
        }
-        /* we've got j_flush_sem held, nobody is going to delete any
+        /* we've got j_flush_mutex held, nobody is going to delete any
         * of these lists out from underneath us
         */
        while ((num_trans && transactions_flushed < num_trans) ||
@@ -1812,7 +1810,7 @@ static int kupdate_transactions(struct super_block *s,
        }
      done:
-        up(&journal->j_flush_sem);
+        mutex_unlock(&journal->j_flush_mutex);
        return ret;
 }
@@ -2556,7 +2554,7 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
        INIT_LIST_HEAD(&jl->j_working_list);
        INIT_LIST_HEAD(&jl->j_tail_bh_list);
        INIT_LIST_HEAD(&jl->j_bh_list);
-        sema_init(&jl->j_commit_lock, 1);
+        mutex_init(&jl->j_commit_mutex);
        SB_JOURNAL(s)->j_num_lists++;
        get_journal_list(jl);
        return jl;
@@ -2837,8 +2835,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
        journal->j_last = NULL;
        journal->j_first = NULL;
        init_waitqueue_head(&(journal->j_join_wait));
-        sema_init(&journal->j_lock, 1);
+        mutex_init(&journal->j_mutex);
-        sema_init(&journal->j_flush_sem, 1);
+        mutex_init(&journal->j_flush_mutex);
        journal->j_trans_id = 10;
        journal->j_mount_id = 10;
@@ -4030,7 +4028,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
         * the new transaction is fully setup, and we've already flushed the
         * ordered bh list
         */
-        down(&jl->j_commit_lock);
+        mutex_lock(&jl->j_commit_mutex);
        /* save the transaction id in case we need to commit it later */
        commit_trans_id = jl->j_trans_id;
@@ -4196,7 +4194,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
                lock_kernel();
        }
        BUG_ON(!list_empty(&jl->j_tail_bh_list));
-        up(&jl->j_commit_lock);
+        mutex_unlock(&jl->j_commit_mutex);
        /* honor the flush wishes from the caller, simple commits can
         ** be done outside the journal lock, they are done below
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1d40f2bd1970..879e54d35c2d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -22,6 +22,7 @@
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/exportfs.h>
+#include <linux/quotaops.h>
 #include <linux/vfs.h>
 #include <linux/mnt_namespace.h>
 #include <linux/mount.h>
@@ -182,7 +183,7 @@ static int finish_unfinished(struct super_block *s)
                        int ret = reiserfs_quota_on_mount(s, i);
                        if (ret < 0)
                                reiserfs_warning(s,
-                                                 "reiserfs: cannot turn on journalled quota: error %d",
+                                                 "reiserfs: cannot turn on journaled quota: error %d",
                                                 ret);
                }
        }
@@ -520,7 +521,7 @@ static void reiserfs_destroy_inode(struct inode *inode)
        kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
 }
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
        struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
@@ -876,7 +877,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
                                     mount options were selected. */
                                  unsigned long *blocks,        /* strtol-ed from NNN of resize=NNN */
                                  char **jdev_name,
-                                  unsigned int *commit_max_age)
+                                  unsigned int *commit_max_age,
+                                  char **qf_names,
+                                  unsigned int *qfmt)
 {
        int c;
        char *arg = NULL;
@@ -992,9 +995,11 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
                if (c == 'u' || c == 'g') {
                        int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
-                        if (sb_any_quota_enabled(s)) {
+                        if ((sb_any_quota_enabled(s) ||
+                             sb_any_quota_suspended(s)) &&
+                            (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
                                reiserfs_warning(s,
-                                                 "reiserfs_parse_options: cannot change journalled quota options when quota turned on.");
+                                                 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
                                return 0;
                        }
                        if (*arg) {     /* Some filename specified? */
@@ -1011,46 +1016,54 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
                                                         "reiserfs_parse_options: quotafile must be on filesystem root.");
                                        return 0;
                                }
-                                REISERFS_SB(s)->s_qf_names[qtype] =
+                                qf_names[qtype] =
                                    kmalloc(strlen(arg) + 1, GFP_KERNEL);
-                                if (!REISERFS_SB(s)->s_qf_names[qtype]) {
+                                if (!qf_names[qtype]) {
                                        reiserfs_warning(s,
                                                         "reiserfs_parse_options: not enough memory for storing quotafile name.");
                                        return 0;
                                }
-                                strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
+                                strcpy(qf_names[qtype], arg);
                                *mount_options |= 1 << REISERFS_QUOTA;
                        } else {
-                                kfree(REISERFS_SB(s)->s_qf_names[qtype]);
+                                if (qf_names[qtype] !=
-                                REISERFS_SB(s)->s_qf_names[qtype] = NULL;
+                                    REISERFS_SB(s)->s_qf_names[qtype])
+                                        kfree(qf_names[qtype]);
+                                qf_names[qtype] = NULL;
                        }
                }
                if (c == 'f') {
                        if (!strcmp(arg, "vfsold"))
-                                REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD;
+                                *qfmt = QFMT_VFS_OLD;
                        else if (!strcmp(arg, "vfsv0"))
-                                REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0;
+                                *qfmt = QFMT_VFS_V0;
                        else {
                                reiserfs_warning(s,
                                                 "reiserfs_parse_options: unknown quota format specified.");
                                return 0;
                        }
+                        if ((sb_any_quota_enabled(s) ||
+                             sb_any_quota_suspended(s)) &&
+                            *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
+                                reiserfs_warning(s,
+                                                 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
+                                return 0;
+                        }
                }
 #else
                if (c == 'u' || c == 'g' || c == 'f') {
                        reiserfs_warning(s,
-                                         "reiserfs_parse_options: journalled quota options not supported.");
+                                         "reiserfs_parse_options: journaled quota options not supported.");
                        return 0;
                }
 #endif
        }
 #ifdef CONFIG_QUOTA
-        if (!REISERFS_SB(s)->s_jquota_fmt
+        if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt
-            && (REISERFS_SB(s)->s_qf_names[USRQUOTA]
+            && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) {
-                || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
                reiserfs_warning(s,
-                                 "reiserfs_parse_options: journalled quota format not specified.");
+                                 "reiserfs_parse_options: journaled quota format not specified.");
                return 0;
        }
        /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
@@ -1130,6 +1143,21 @@ static void handle_attrs(struct super_block *s)
        }
 }
+#ifdef CONFIG_QUOTA
+static void handle_quota_files(struct super_block *s, char **qf_names,
+                               unsigned int *qfmt)
+{
+        int i;
+        for (i = 0; i < MAXQUOTAS; i++) {
+                if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
+                        kfree(REISERFS_SB(s)->s_qf_names[i]);
+                REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
+        }
+        REISERFS_SB(s)->s_jquota_fmt = *qfmt;
+}
+#endif
 static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 {
        struct reiserfs_super_block *rs;
@@ -1141,23 +1169,30 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
        struct reiserfs_journal *journal = SB_JOURNAL(s);
        char *new_opts = kstrdup(arg, GFP_KERNEL);
        int err;
+        char *qf_names[MAXQUOTAS];
+        unsigned int qfmt = 0;
 #ifdef CONFIG_QUOTA
        int i;
+        memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
 #endif
        rs = SB_DISK_SUPER_BLOCK(s);
        if (!reiserfs_parse_options
-            (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) {
+            (s, arg, &mount_options, &blocks, NULL, &commit_max_age,
+            qf_names, &qfmt)) {
 #ifdef CONFIG_QUOTA
-                for (i = 0; i < MAXQUOTAS; i++) {
+                for (i = 0; i < MAXQUOTAS; i++)
-                        kfree(REISERFS_SB(s)->s_qf_names[i]);
+                        if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
-                        REISERFS_SB(s)->s_qf_names[i] = NULL;
+                                kfree(qf_names[i]);
-                }
 #endif
                err = -EINVAL;
                goto out_err;
        }
+#ifdef CONFIG_QUOTA
+        handle_quota_files(s, qf_names, &qfmt);
+#endif
        handle_attrs(s);
@@ -1570,6 +1605,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
        char *jdev_name;
        struct reiserfs_sb_info *sbi;
        int errval = -EINVAL;
+        char *qf_names[MAXQUOTAS] = {};
+        unsigned int qfmt = 0;
        save_mount_options(s, data);
@@ -1597,9 +1634,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
        jdev_name = NULL;
        if (reiserfs_parse_options
            (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
-             &commit_max_age) == 0) {
+             &commit_max_age, qf_names, &qfmt) == 0) {
                goto error;
        }
+#ifdef CONFIG_QUOTA
+        handle_quota_files(s, qf_names, &qfmt);
+#endif
        if (blocks) {
                SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option "
@@ -1819,7 +1859,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
        return (0);
-      error:
+error:
        if (jinit_done) {       /* kill the commit thread, free journal ram */
                journal_release_error(NULL, s);
        }
@@ -1830,10 +1870,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 #ifdef CONFIG_QUOTA
        {
                int j;
-                for (j = 0; j < MAXQUOTAS; j++) {
+                for (j = 0; j < MAXQUOTAS; j++)
-                        kfree(sbi->s_qf_names[j]);
+                        kfree(qf_names[j]);
-                        sbi->s_qf_names[j] = NULL;
-                }
        }
 #endif
        kfree(sbi);
@@ -1980,7 +2018,7 @@ static int reiserfs_release_dquot(struct dquot *dquot)
 static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
 {
-        /* Are we journalling quotas? */
+        /* Are we journaling quotas? */
        if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
            REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
                dquot_mark_dquot_dirty(dquot);
@@ -2026,6 +2064,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
        int err;
        struct nameidata nd;
        struct inode *inode;
+        struct reiserfs_transaction_handle th;
        if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
                return -EINVAL;
@@ -2053,17 +2092,28 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
                }
                mark_inode_dirty(inode);
        }
-        /* Not journalling quota? No more tests needed... */
+        /* Journaling quota? */
-        if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] &&
+        if (REISERFS_SB(sb)->s_qf_names[type]) {
-            !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) {
+                /* Quotafile not of fs root? */
-                path_put(&nd.path);
+                if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
-                return vfs_quota_on(sb, type, format_id, path, 0);
+                        reiserfs_warning(sb,
-        }
-        /* Quotafile not of fs root? */
-        if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
-                reiserfs_warning(sb,
                                 "reiserfs: Quota file not on filesystem root. "
                                 "Journalled quota will not work.");
+        }
+        /*
+         * When we journal data on quota file, we have to flush journal to see
+         * all updates to the file when we bypass pagecache...
+         */
+        if (reiserfs_file_data_log(inode)) {
+                /* Just start temporary transaction and finish it */
+                err = journal_begin(&th, sb, 1);
+                if (err)
+                        return err;
+                err = journal_end_sync(&th, sb, 1);
+                if (err)
+                        return err;
+        }
        path_put(&nd.path);
        return vfs_quota_on(sb, type, format_id, path, 0);
 }
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index d7c4935c1034..bb3cb5b7cdb2 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -1250,7 +1250,7 @@ static int reiserfs_check_acl(struct inode *inode, int mask)
        return error;
 }
-int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int reiserfs_permission(struct inode *inode, int mask)
 {
        /*
         * We don't do permission checks on the internal objects.
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 5e90a95ad60b..056008db1377 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,8 +6,6 @@
 #include <linux/reiserfs_xattr.h>
 #include <asm/uaccess.h>
-#define XATTR_SECURITY_PREFIX "security."
 static int
 security_get(struct inode *inode, const char *name, void *buffer, size_t size)
 {
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 024a938ca60f..60abe2bb1f98 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -7,8 +7,6 @@
 #include <linux/reiserfs_xattr.h>
 #include <asm/uaccess.h>
-#define XATTR_TRUSTED_PREFIX "trusted."
 static int
 trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
 {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 073f39364b11..1384efcb938e 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -10,8 +10,6 @@
 # include <linux/reiserfs_acl.h>
 #endif
-#define XATTR_USER_PREFIX "user."
 static int
 user_get(struct inode *inode, const char *name, void *buffer, size_t size)
 {
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 3f13d491c7c7..8e51a2aaa977 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -577,7 +577,7 @@ static void romfs_destroy_inode(struct inode *inode)
        kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct romfs_inode_info *ei = foo;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 619725644c75..9c39bc7f8431 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -205,11 +205,19 @@ static const struct file_operations signalfd_fops = {
        .read           = signalfd_read,
 };
-asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
+asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
+                              size_t sizemask, int flags)
 {
        sigset_t sigmask;
        struct signalfd_ctx *ctx;
+        /* Check the SFD_* constants for consistency.  */
+        BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
+        BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
+        if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
+                return -EINVAL;
        if (sizemask != sizeof(sigset_t) ||
            copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
                return -EINVAL;
@@ -227,7 +235,8 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
                 * When we call this, the initialization must be complete, since
                 * anon_inode_getfd() will install the fd.
                 */
-                ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx);
+                ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
+                                       flags & (O_CLOEXEC | O_NONBLOCK));
                if (ufd < 0)
                        kfree(ctx);
        } else {
@@ -249,3 +258,9 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
        return ufd;
 }
+asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask,
+                             size_t sizemask)
+{
+        return sys_signalfd4(ufd, user_mask, sizemask, 0);
+}
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index 8182f0542a21..8c177eb7e344 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -13,7 +13,6 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/dirent.h>
 #include <linux/smb_fs.h>
 #include <linux/pagemap.h>
 #include <linux/net.h>
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 2294783320cb..e4f8d51a5553 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -408,7 +408,7 @@ smb_file_release(struct inode *inode, struct file * file)
 * privileges, so we need our own check for this.
 */
 static int
-smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
+smb_file_permission(struct inode *inode, int mask)
 {
        int mode = inode->i_mode;
        int error = 0;
@@ -417,7 +417,7 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
        /* Look at user permissions */
        mode >>= 6;
-        if ((mode & 7 & mask) != mask)
+        if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC))
                error = -EACCES;
        return error;
 }
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 376ef3ee6ed7..3528f40ffb0f 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -67,7 +67,7 @@ static void smb_destroy_inode(struct inode *inode)
        kmem_cache_free(smb_inode_cachep, SMB_I(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct smb_inode_info *ei = (struct smb_inode_info *) foo;
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index d517a27b7f4b..ee536e8a649a 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -16,7 +16,6 @@
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/dcache.h>
-#include <linux/dirent.h>
 #include <linux/nls.h>
 #include <linux/smp_lock.h>
 #include <linux/net.h>
diff --git a/fs/splice.c b/fs/splice.c
index 399442179d89..b30311ba8af6 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -772,7 +772,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
        ssize_t ret;
        int err;
-        err = remove_suid(out->f_path.dentry);
+        err = file_remove_suid(out);
        if (unlikely(err))
                return err;
@@ -830,7 +830,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
        ssize_t ret;
        inode_double_lock(inode, pipe->inode);
-        ret = remove_suid(out->f_path.dentry);
+        ret = file_remove_suid(out);
        if (likely(!ret))
                ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
        inode_double_unlock(inode, pipe->inode);
@@ -1161,36 +1161,6 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 }
 /*
- * Do a copy-from-user while holding the mmap_semaphore for reading, in a
- * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem
- * for writing) and page faulting on the user memory pointed to by src.
- * This assumes that we will very rarely hit the partial != 0 path, or this
- * will not be a win.
- */
-static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n)
-{
-        int partial;
-        if (!access_ok(VERIFY_READ, src, n))
-                return -EFAULT;
-        pagefault_disable();
-        partial = __copy_from_user_inatomic(dst, src, n);
-        pagefault_enable();
-        /*
-         * Didn't copy everything, drop the mmap_sem and do a faulting copy
-         */
-        if (unlikely(partial)) {
-                up_read(&current->mm->mmap_sem);
-                partial = copy_from_user(dst, src, n);
-                down_read(&current->mm->mmap_sem);
-        }
-        return partial;
-}
-/*
 * Map an iov into an array of pages and offset/length tupples. With the
 * partial_page structure, we can map several non-contiguous ranges into
 * our ones pages[] map instead of splitting that operation into pieces.
@@ -1203,8 +1173,6 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 {
        int buffers = 0, error = 0;
-        down_read(&current->mm->mmap_sem);
        while (nr_vecs) {
                unsigned long off, npages;
                struct iovec entry;
@@ -1213,7 +1181,7 @@ static int get_iovec_page_array(const struct iovec __user *iov,
                int i;
                error = -EFAULT;
-                if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry)))
+                if (copy_from_user(&entry, iov, sizeof(entry)))
                        break;
                base = entry.iov_base;
@@ -1247,9 +1215,8 @@ static int get_iovec_page_array(const struct iovec __user *iov,
                if (npages > PIPE_BUFFERS - buffers)
                        npages = PIPE_BUFFERS - buffers;
-                error = get_user_pages(current, current->mm,
+                error = get_user_pages_fast((unsigned long)base, npages,
-                                       (unsigned long) base, npages, 0, 0,
+                                        0, &pages[buffers]);
-                                       &pages[buffers], NULL);
                if (unlikely(error <= 0))
                        break;
@@ -1288,8 +1255,6 @@ static int get_iovec_page_array(const struct iovec __user *iov,
                iov++;
        }
-        up_read(&current->mm->mmap_sem);
        if (buffers)
                return buffers;
diff --git a/fs/stat.c b/fs/stat.c
index 9cf41f719d50..7c46fbeb8b76 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr);
 int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
-        error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd);
+        error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path);
        if (!error) {
-                error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
+                error = vfs_getattr(path.mnt, path.dentry, stat);
-                path_put(&nd.path);
+                path_put(&path);
        }
        return error;
 }
@@ -77,13 +77,13 @@ EXPORT_SYMBOL(vfs_stat);
 int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
-        error = __user_walk_fd(dfd, name, 0, &nd);
+        error = user_path_at(dfd, name, 0, &path);
        if (!error) {
-                error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
+                error = vfs_getattr(path.mnt, path.dentry, stat);
-                path_put(&nd.path);
+                path_put(&path);
        }
        return error;
 }
@@ -291,29 +291,29 @@ asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf)
        return error;
 }
-asmlinkage long sys_readlinkat(int dfd, const char __user *path,
+asmlinkage long sys_readlinkat(int dfd, const char __user *pathname,
                                char __user *buf, int bufsiz)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
        if (bufsiz <= 0)
                return -EINVAL;
-        error = __user_walk_fd(dfd, path, 0, &nd);
+        error = user_path_at(dfd, pathname, 0, &path);
        if (!error) {
-                struct inode *inode = nd.path.dentry->d_inode;
+                struct inode *inode = path.dentry->d_inode;
                error = -EINVAL;
                if (inode->i_op && inode->i_op->readlink) {
-                        error = security_inode_readlink(nd.path.dentry);
+                        error = security_inode_readlink(path.dentry);
                        if (!error) {
-                                touch_atime(nd.path.mnt, nd.path.dentry);
+                                touch_atime(path.mnt, path.dentry);
-                                error = inode->i_op->readlink(nd.path.dentry,
+                                error = inode->i_op->readlink(path.dentry,
                                                              buf, bufsiz);
                        }
                }
-                path_put(&nd.path);
+                path_put(&path);
        }
        return error;
 }
diff --git a/fs/super.c b/fs/super.c
index 453877c5697b..e931ae9511fe 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -70,6 +70,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
                INIT_LIST_HEAD(&s->s_instances);
                INIT_HLIST_HEAD(&s->s_anon);
                INIT_LIST_HEAD(&s->s_inodes);
+                INIT_LIST_HEAD(&s->s_dentry_lru);
                init_rwsem(&s->s_umount);
                mutex_init(&s->s_lock);
                lockdep_set_class(&s->s_umount, &type->s_umount_key);
diff --git a/fs/sync.c b/fs/sync.c
index 228e17b5e9ee..2967562d416f 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -139,7 +139,8 @@ asmlinkage long sys_fdatasync(unsigned int fd)
 * before performing the write.
 *
 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
- * range which are not presently under writeback.
+ * range which are not presently under writeback. Note that this may block for
+ * significant periods due to exhaustion of disk request structures.
 *
 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
 * after performing the write.
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 8c0e4b92574f..aedaeba82ae5 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -398,7 +398,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
 }
 /**
- *      sysfs_add_one - add sysfs_dirent to parent
+ *      __sysfs_add_one - add sysfs_dirent to parent without warning
 *      @acxt: addrm context to use
 *      @sd: sysfs_dirent to be added
 *
@@ -417,7 +417,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
 *      0 on success, -EEXIST if entry with the given name already
 *      exists.
 */
-int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 {
        if (sysfs_find_dirent(acxt->parent_sd, sd->s_name))
                return -EEXIST;
@@ -435,6 +435,36 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 }
 /**
+ *      sysfs_add_one - add sysfs_dirent to parent
+ *      @acxt: addrm context to use
+ *      @sd: sysfs_dirent to be added
+ *
+ *      Get @acxt->parent_sd and set sd->s_parent to it and increment
+ *      nlink of parent inode if @sd is a directory and link into the
+ *      children list of the parent.
+ *
+ *      This function should be called between calls to
+ *      sysfs_addrm_start() and sysfs_addrm_finish() and should be
+ *      passed the same @acxt as passed to sysfs_addrm_start().
+ *
+ *      LOCKING:
+ *      Determined by sysfs_addrm_start().
+ *
+ *      RETURNS:
+ *      0 on success, -EEXIST if entry with the given name already
+ *      exists.
+ */
+int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+{
+        int ret;
+        ret = __sysfs_add_one(acxt, sd);
+        WARN(ret == -EEXIST, KERN_WARNING "sysfs: duplicate filename '%s' "
+                       "can not be created\n", sd->s_name);
+        return ret;
+}
+/**
 *      sysfs_remove_one - remove sysfs_dirent from parent
 *      @acxt: addrm context to use
 *      @sd: sysfs_dirent to be removed
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e7735f643cd1..c9e4e5091da1 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -14,6 +14,7 @@
 #include <linux/kobject.h>
 #include <linux/kallsyms.h>
 #include <linux/slab.h>
+#include <linux/fsnotify.h>
 #include <linux/namei.h>
 #include <linux/poll.h>
 #include <linux/list.h>
@@ -336,9 +337,8 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
        if (kobj->ktype && kobj->ktype->sysfs_ops)
                ops = kobj->ktype->sysfs_ops;
        else {
-                printk(KERN_ERR "missing sysfs attribute operations for "
+                WARN(1, KERN_ERR "missing sysfs attribute operations for "
                       "kobject: %s\n", kobject_name(kobj));
-                WARN_ON(1);
                goto err_out;
        }
@@ -585,9 +585,11 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
        newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-        rc = notify_change(victim, &newattrs);
+        newattrs.ia_ctime = current_fs_time(inode->i_sb);
+        rc = sysfs_setattr(victim, &newattrs);
        if (rc == 0) {
+                fsnotify_change(victim, newattrs.ia_valid);
                mutex_lock(&sysfs_mutex);
                victim_sd->s_mode = newattrs.ia_mode;
                mutex_unlock(&sysfs_mutex);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index eeba38417b1d..fe611949a7f7 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -134,9 +134,8 @@ void sysfs_remove_group(struct kobject * kobj,
        if (grp->name) {
                sd = sysfs_get_dirent(dir_sd, grp->name);
                if (!sd) {
-                        printk(KERN_WARNING "sysfs group %p not found for "
+                        WARN(!sd, KERN_WARNING "sysfs group %p not found for "
                                "kobject '%s'\n", grp, kobject_name(kobj));
-                        WARN_ON(!sd);
                        return;
                }
        } else
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 817f5966edca..a3ba217fbe74 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -19,13 +19,8 @@
 #include "sysfs.h"
-/**
+static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
- *      sysfs_create_link - create symlink between two objects.
+                                const char *name, int warn)
- *      @kobj:  object whose directory we're creating the link in.
- *      @target:        object we're pointing to.
- *      @name:          name of the symlink.
- */
-int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
 {
        struct sysfs_dirent *parent_sd = NULL;
        struct sysfs_dirent *target_sd = NULL;
@@ -65,7 +60,10 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
        target_sd = NULL;       /* reference is now owned by the symlink */
        sysfs_addrm_start(&acxt, parent_sd);
-        error = sysfs_add_one(&acxt, sd);
+        if (warn)
+                error = sysfs_add_one(&acxt, sd);
+        else
+                error = __sysfs_add_one(&acxt, sd);
        sysfs_addrm_finish(&acxt);
        if (error)
@@ -80,6 +78,33 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
 }
 /**
+ *      sysfs_create_link - create symlink between two objects.
+ *      @kobj:  object whose directory we're creating the link in.
+ *      @target:        object we're pointing to.
+ *      @name:          name of the symlink.
+ */
+int sysfs_create_link(struct kobject *kobj, struct kobject *target,
+                      const char *name)
+{
+        return sysfs_do_create_link(kobj, target, name, 1);
+}
+/**
+ *      sysfs_create_link_nowarn - create symlink between two objects.
+ *      @kobj:  object whose directory we're creating the link in.
+ *      @target:        object we're pointing to.
+ *      @name:          name of the symlink.
+ *
+ *      This function does the same as sysf_create_link(), but it
+ *      doesn't warn if the link already exists.
+ */
+int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
+                             const char *name)
+{
+        return sysfs_do_create_link(kobj, target, name, 0);
+}
+/**
 *      sysfs_remove_link - remove symlink in object's directory.
 *      @kobj:  object we're acting for.
 *      @name:  name of the symlink to remove.
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ce4e15f8aaeb..a5db496f71c7 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -107,6 +107,7 @@ struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
 void sysfs_put_active_two(struct sysfs_dirent *sd);
 void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
                       struct sysfs_dirent *parent_sd);
+int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
 int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
 void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
 void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index c5d60de0658f..df0d435baa48 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -326,7 +326,7 @@ static void sysv_destroy_inode(struct inode *inode)
        kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *p)
+static void init_once(void *p)
 {
        struct sysv_inode_info *si = (struct sysv_inode_info *)p;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d87d354ec424..c502c60e4f54 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -184,7 +184,11 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
        int ufd;
        struct timerfd_ctx *ctx;
-        if (flags)
+        /* Check the TFD_* constants for consistency.  */
+        BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
+        BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
+        if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK))
                return -EINVAL;
        if (clockid != CLOCK_MONOTONIC &&
            clockid != CLOCK_REALTIME)
@@ -198,7 +202,8 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
        ctx->clockid = clockid;
        hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
-        ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx);
+        ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
+                               flags & (O_CLOEXEC | O_NONBLOCK));
        if (ufd < 0)
                kfree(ctx);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 005a3b854d96..8565e586e533 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -53,6 +53,7 @@
 #include "ubifs.h"
 #include <linux/mount.h>
+#include <linux/namei.h>
 static int read_block(struct inode *inode, void *addr, unsigned int block,
                      struct ubifs_data_node *dn)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 00eb9c68ad03..ca1e2d4e03cc 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1841,7 +1841,7 @@ static struct file_system_type ubifs_fs_type = {
 /*
 * Inode slab cache constructor.
 */
-static void inode_slab_ctor(struct kmem_cache *cachep, void *obj)
+static void inode_slab_ctor(void *obj)
 {
        struct ubifs_inode *ui = obj;
        inode_init_once(&ui->vfs_inode);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 44cc702f96cc..5698bbf83bbf 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -148,7 +148,7 @@ static void udf_destroy_inode(struct inode *inode)
        kmem_cache_free(udf_inode_cachep, UDF_I(inode));
 }
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
        struct udf_inode_info *ei = (struct udf_inode_info *)foo;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 85b22b5977fa..3e30e40aa24d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -76,6 +76,7 @@
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/stat.h>
@@ -1232,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
        struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
        unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
-        struct match_token *tp = tokens;
+        const struct match_token *tp = tokens;
        while (tp->token != Opt_onerror_panic && tp->token != mval)
                ++tp;
@@ -1301,7 +1302,7 @@ static void ufs_destroy_inode(struct inode *inode)
        kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
 }
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
        struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
diff --git a/fs/utimes.c b/fs/utimes.c
index b6b664e7145e..6929e3e91d05 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -48,66 +48,22 @@ static bool nsec_valid(long nsec)
        return nsec >= 0 && nsec <= 999999999;
 }
-/* If times==NULL, set access and modification to current time,
+static int utimes_common(struct path *path, struct timespec *times)
- * must be owner or have write permission.
- * Else, update from *times, must be owner or super user.
- */
-long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags)
 {
        int error;
-        struct nameidata nd;
-        struct dentry *dentry;
-        struct inode *inode;
        struct iattr newattrs;
-        struct file *f = NULL;
+        struct inode *inode = path->dentry->d_inode;
-        struct vfsmount *mnt;
-        error = -EINVAL;
-        if (times && (!nsec_valid(times[0].tv_nsec) ||
-                      !nsec_valid(times[1].tv_nsec))) {
-                goto out;
-        }
-        if (flags & ~AT_SYMLINK_NOFOLLOW)
-                goto out;
-        if (filename == NULL && dfd != AT_FDCWD) {
-                error = -EINVAL;
-                if (flags & AT_SYMLINK_NOFOLLOW)
-                        goto out;
-                error = -EBADF;
+        error = mnt_want_write(path->mnt);
-                f = fget(dfd);
-                if (!f)
-                        goto out;
-                dentry = f->f_path.dentry;
-                mnt = f->f_path.mnt;
-        } else {
-                error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
-                if (error)
-                        goto out;
-                dentry = nd.path.dentry;
-                mnt = nd.path.mnt;
-        }
-        inode = dentry->d_inode;
-        error = mnt_want_write(mnt);
        if (error)
-                goto dput_and_out;
+                goto out;
        if (times && times[0].tv_nsec == UTIME_NOW &&
                     times[1].tv_nsec == UTIME_NOW)
                times = NULL;
-        /* In most cases, the checks are done in inode_change_ok() */
        newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
        if (times) {
-                error = -EPERM;
-                if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-                        goto mnt_drop_write_and_out;
                if (times[0].tv_nsec == UTIME_OMIT)
                        newattrs.ia_valid &= ~ATTR_ATIME;
                else if (times[0].tv_nsec != UTIME_NOW) {
@@ -123,21 +79,13 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
                        newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
                        newattrs.ia_valid |= ATTR_MTIME_SET;
                }
                /*
-                 * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT
+                 * Tell inode_change_ok(), that this is an explicit time
-                 * cases, we need to make an extra check that is not done by
+                 * update, even if neither ATTR_ATIME_SET nor ATTR_MTIME_SET
-                 * inode_change_ok().
+                 * were used.
                 */
-                if (((times[0].tv_nsec == UTIME_NOW &&
+                newattrs.ia_valid |= ATTR_TIMES_SET;
-                            times[1].tv_nsec == UTIME_OMIT)
-                     ||
-                     (times[0].tv_nsec == UTIME_OMIT &&
-                            times[1].tv_nsec == UTIME_NOW))
-                    && !is_owner_or_cap(inode))
-                        goto mnt_drop_write_and_out;
        } else {
                /*
                 * If times is NULL (or both times are UTIME_NOW),
                 * then we need to check permissions, because
@@ -148,21 +96,76 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
                        goto mnt_drop_write_and_out;
                if (!is_owner_or_cap(inode)) {
-                        error = permission(inode, MAY_WRITE, NULL);
+                        error = inode_permission(inode, MAY_WRITE);
                        if (error)
                                goto mnt_drop_write_and_out;
                }
        }
        mutex_lock(&inode->i_mutex);
-        error = notify_change(dentry, &newattrs);
+        error = notify_change(path->dentry, &newattrs);
        mutex_unlock(&inode->i_mutex);
 mnt_drop_write_and_out:
-        mnt_drop_write(mnt);
+        mnt_drop_write(path->mnt);
-dput_and_out:
+out:
-        if (f)
+        return error;
-                fput(f);
+}
-        else
-                path_put(&nd.path);
+/*
+ * do_utimes - change times on filename or file descriptor
+ * @dfd: open file descriptor, -1 or AT_FDCWD
+ * @filename: path name or NULL
+ * @times: new times or NULL
+ * @flags: zero or more flags (only AT_SYMLINK_NOFOLLOW for the moment)
+ *
+ * If filename is NULL and dfd refers to an open file, then operate on
+ * the file.  Otherwise look up filename, possibly using dfd as a
+ * starting point.
+ *
+ * If times==NULL, set access and modification to current time,
+ * must be owner or have write permission.
+ * Else, update from *times, must be owner or super user.
+ */
+long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags)
+{
+        int error = -EINVAL;
+        if (times && (!nsec_valid(times[0].tv_nsec) ||
+                      !nsec_valid(times[1].tv_nsec))) {
+                goto out;
+        }
+        if (flags & ~AT_SYMLINK_NOFOLLOW)
+                goto out;
+        if (filename == NULL && dfd != AT_FDCWD) {
+                struct file *file;
+                if (flags & AT_SYMLINK_NOFOLLOW)
+                        goto out;
+                file = fget(dfd);
+                error = -EBADF;
+                if (!file)
+                        goto out;
+                error = utimes_common(&file->f_path, times);
+                fput(file);
+        } else {
+                struct path path;
+                int lookup_flags = 0;
+                if (!(flags & AT_SYMLINK_NOFOLLOW))
+                        lookup_flags |= LOOKUP_FOLLOW;
+                error = user_path_at(dfd, filename, lookup_flags, &path);
+                if (error)
+                        goto out;
+                error = utimes_common(&path, times);
+                path_put(&path);
+        }
 out:
        return error;
 }
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index b546ba69be82..155c10b4adbd 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -621,7 +621,7 @@ shortname:
        memcpy(de->name, msdos_name, MSDOS_NAME);
        de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
        de->lcase = lcase;
-        fat_date_unix2dos(ts->tv_sec, &time, &date);
+        fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
        de->time = de->ctime = time;
        de->date = de->cdate = de->adate = date;
        de->ctime_cs = 0;
diff --git a/fs/xattr.c b/fs/xattr.c
index 4706a8b1f495..468377e66531 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -63,7 +63,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
                        return -EPERM;
        }
-        return permission(inode, mask, NULL);
+        return inode_permission(inode, mask);
 }
 int
@@ -252,40 +252,40 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
 }
 asmlinkage long
-sys_setxattr(const char __user *path, const char __user *name,
+sys_setxattr(const char __user *pathname, const char __user *name,
             const void __user *value, size_t size, int flags)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
-        error = user_path_walk(path, &nd);
+        error = user_path(pathname, &path);
        if (error)
                return error;
-        error = mnt_want_write(nd.path.mnt);
+        error = mnt_want_write(path.mnt);
        if (!error) {
-                error = setxattr(nd.path.dentry, name, value, size, flags);
+                error = setxattr(path.dentry, name, value, size, flags);
-                mnt_drop_write(nd.path.mnt);
+                mnt_drop_write(path.mnt);
        }
-        path_put(&nd.path);
+        path_put(&path);
        return error;
 }
 asmlinkage long
-sys_lsetxattr(const char __user *path, const char __user *name,
+sys_lsetxattr(const char __user *pathname, const char __user *name,
              const void __user *value, size_t size, int flags)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
-        error = user_path_walk_link(path, &nd);
+        error = user_lpath(pathname, &path);
        if (error)
                return error;
-        error = mnt_want_write(nd.path.mnt);
+        error = mnt_want_write(path.mnt);
        if (!error) {
-                error = setxattr(nd.path.dentry, name, value, size, flags);
+                error = setxattr(path.dentry, name, value, size, flags);
-                mnt_drop_write(nd.path.mnt);
+                mnt_drop_write(path.mnt);
        }
-        path_put(&nd.path);
+        path_put(&path);
        return error;
 }
@@ -350,32 +350,32 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
 }
 asmlinkage ssize_t
-sys_getxattr(const char __user *path, const char __user *name,
+sys_getxattr(const char __user *pathname, const char __user *name,
             void __user *value, size_t size)
 {
-        struct nameidata nd;
+        struct path path;
        ssize_t error;
-        error = user_path_walk(path, &nd);
+        error = user_path(pathname, &path);
        if (error)
                return error;
-        error = getxattr(nd.path.dentry, name, value, size);
+        error = getxattr(path.dentry, name, value, size);
-        path_put(&nd.path);
+        path_put(&path);
        return error;
 }
 asmlinkage ssize_t
-sys_lgetxattr(const char __user *path, const char __user *name, void __user *value,
+sys_lgetxattr(const char __user *pathname, const char __user *name, void __user *value,
              size_t size)
 {
-        struct nameidata nd;
+        struct path path;
        ssize_t error;
-        error = user_path_walk_link(path, &nd);
+        error = user_lpath(pathname, &path);
        if (error)
                return error;
-        error = getxattr(nd.path.dentry, name, value, size);
+        error = getxattr(path.dentry, name, value, size);
-        path_put(&nd.path);
+        path_put(&path);
        return error;
 }
@@ -425,30 +425,30 @@ listxattr(struct dentry *d, char __user *list, size_t size)
 }
 asmlinkage ssize_t
-sys_listxattr(const char __user *path, char __user *list, size_t size)
+sys_listxattr(const char __user *pathname, char __user *list, size_t size)
 {
-        struct nameidata nd;
+        struct path path;
        ssize_t error;
-        error = user_path_walk(path, &nd);
+        error = user_path(pathname, &path);
        if (error)
                return error;
-        error = listxattr(nd.path.dentry, list, size);
+        error = listxattr(path.dentry, list, size);
-        path_put(&nd.path);
+        path_put(&path);
        return error;
 }
 asmlinkage ssize_t
-sys_llistxattr(const char __user *path, char __user *list, size_t size)
+sys_llistxattr(const char __user *pathname, char __user *list, size_t size)
 {
-        struct nameidata nd;
+        struct path path;
        ssize_t error;
-        error = user_path_walk_link(path, &nd);
+        error = user_lpath(pathname, &path);
        if (error)
                return error;
-        error = listxattr(nd.path.dentry, list, size);
+        error = listxattr(path.dentry, list, size);
-        path_put(&nd.path);
+        path_put(&path);
        return error;
 }
@@ -486,38 +486,38 @@ removexattr(struct dentry *d, const char __user *name)
 }
 asmlinkage long
-sys_removexattr(const char __user *path, const char __user *name)
+sys_removexattr(const char __user *pathname, const char __user *name)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
-        error = user_path_walk(path, &nd);
+        error = user_path(pathname, &path);
        if (error)
                return error;
-        error = mnt_want_write(nd.path.mnt);
+        error = mnt_want_write(path.mnt);
        if (!error) {
-                error = removexattr(nd.path.dentry, name);
+                error = removexattr(path.dentry, name);
-                mnt_drop_write(nd.path.mnt);
+                mnt_drop_write(path.mnt);
        }
-        path_put(&nd.path);
+        path_put(&path);
        return error;
 }
 asmlinkage long
-sys_lremovexattr(const char __user *path, const char __user *name)
+sys_lremovexattr(const char __user *pathname, const char __user *name)
 {
-        struct nameidata nd;
+        struct path path;
        int error;
-        error = user_path_walk_link(path, &nd);
+        error = user_lpath(pathname, &path);
        if (error)
                return error;
-        error = mnt_want_write(nd.path.mnt);
+        error = mnt_want_write(path.mnt);
        if (!error) {
-                error = removexattr(nd.path.dentry, name);
+                error = removexattr(path.dentry, name);
-                mnt_drop_write(nd.path.mnt);
+                mnt_drop_write(path.mnt);
        }
-        path_put(&nd.path);
+        path_put(&path);
        return error;
 }
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 5e9564902976..a20683cf74dd 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -79,7 +79,7 @@ kmem_zone_init(int size, char *zone_name)
 static inline kmem_zone_t *
 kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
-                     void (*construct)(kmem_zone_t *, void *))
+                     void (*construct)(void *))
 {
        return kmem_cache_create(zone_name, size, 0, flags, construct);
 }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index a42ba9d71156..01939ba2d8de 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -84,17 +84,15 @@ xfs_find_handle(
        switch (cmd) {
        case XFS_IOC_PATH_TO_FSHANDLE:
        case XFS_IOC_PATH_TO_HANDLE: {
-                struct nameidata        nd;
+                struct path path;
-                int                     error;
+                int error = user_lpath((const char __user *)hreq.path, &path);
-                error = user_path_walk_link((const char __user *)hreq.path, &nd);
                if (error)
                        return error;
-                ASSERT(nd.path.dentry);
+                ASSERT(path.dentry);
-                ASSERT(nd.path.dentry->d_inode);
+                ASSERT(path.dentry->d_inode);
-                inode = igrab(nd.path.dentry->d_inode);
+                inode = igrab(path.dentry->d_inode);
-                path_put(&nd.path);
+                path_put(&path);
                break;
        }
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 2bf287ef5489..5fc61c824bb9 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -589,8 +589,7 @@ xfs_check_acl(
 STATIC int
 xfs_vn_permission(
        struct inode            *inode,
-        int                     mask,
+        int                     mask)
-        struct nameidata        *nd)
 {
        return generic_permission(inode, mask, xfs_check_acl);
 }
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 5e3b57516ec7..82333b3e118e 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -711,7 +711,7 @@ start:
             !capable(CAP_FSETID)) {
                error = xfs_write_clear_setuid(xip);
                if (likely(!error))
-                        error = -remove_suid(file->f_path.dentry);
+                        error = -file_remove_suid(file);
                if (unlikely(error)) {
                        goto out_unlock_internal;
                }
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 742b2c7852c1..943381284e2e 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -843,7 +843,6 @@ xfs_fs_destroy_inode(
 STATIC void
 xfs_fs_inode_init_once(
-        kmem_zone_t             *zonep,
        void                    *vnode)
 {
        inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
author	Haavard Skinnemoen <haavard.skinnemoen@atmel.com>	2008-07-27 07:54:08 -0400
committer	Haavard Skinnemoen <haavard.skinnemoen@atmel.com>	2008-07-27 07:54:08 -0400
commit	eda3d8f5604860aae1bb9996bb5efc4213778369 (patch)
tree	9d3887d2665bcc5f5abf200758794545c7b2c69b /fs
parent	87a9f704658a40940e740b1d73d861667e9164d3 (diff)
parent	8be1a6d6c77ab4532e4476fdb8177030ef48b52c (diff)