166 files changed, 3391 insertions, 3762 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 5f8ab8adb5f..ab5547ff29a 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -37,6 +37,7 @@
 #include <linux/mount.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
@@ -155,6 +156,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
        root = d_alloc_root(inode);
        if (!root) {
+                iput(inode);
                retval = -ENOMEM;
                goto release_sb;
        }
@@ -173,10 +175,7 @@ P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
        return 0;
 release_sb:
-        if (sb) {
+        deactivate_locked_super(sb);
-                up_write(&sb->s_umount);
-                deactivate_super(sb);
-        }
 free_stat:
        kfree(st);
@@ -230,9 +229,12 @@ static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
 static void
 v9fs_umount_begin(struct super_block *sb)
 {
-        struct v9fs_session_info *v9ses = sb->s_fs_info;
+        struct v9fs_session_info *v9ses;
+        lock_kernel();
+        v9ses = sb->s_fs_info;
        v9fs_session_cancel(v9ses);
+        unlock_kernel();
 }
 static const struct super_operations v9fs_super_ops = {
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 5ce695e707f..63f5183f263 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -507,8 +507,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
                kfree(new_opts);
                return -EINVAL;
        }
-        kfree(sb->s_options);
+        replace_mount_options(sb, new_opts);
-        sb->s_options = new_opts;
        sbi->s_flags = mount_flags;
        sbi->s_mode  = mode;
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 7a1d942ef68..0149dab365e 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -102,6 +102,7 @@ int afs_release(struct inode *inode, struct file *file)
        return 0;
 }
+#ifdef CONFIG_AFS_FSCACHE
 /*
 * deal with notification that a page was read from the cache
 */
@@ -117,6 +118,7 @@ static void afs_file_readpage_read_complete(struct page *page,
                SetPageUptodate(page);
        unlock_page(page);
 }
+#endif
 /*
 * AFS read page from file, directory or symlink
diff --git a/fs/afs/super.c b/fs/afs/super.c
index aee239a048c..76828e5f8a3 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -405,21 +405,20 @@ static int afs_get_sb(struct file_system_type *fs_type,
                sb->s_flags = flags;
                ret = afs_fill_super(sb, &params);
                if (ret < 0) {
-                        up_write(&sb->s_umount);
+                        deactivate_locked_super(sb);
-                        deactivate_super(sb);
                        goto error;
                }
-                sb->s_options = new_opts;
+                save_mount_options(sb, new_opts);
                sb->s_flags |= MS_ACTIVE;
        } else {
                _debug("reuse");
-                kfree(new_opts);
                ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
        }
        simple_set_mnt(mnt, sb);
        afs_put_volume(params.volume);
        afs_put_cell(params.cell);
+        kfree(new_opts);
        _leave(" = 0 [%p]", sb);
        return 0;
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index bf8c8af9800..4eb4d8dfb2f 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -39,10 +39,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
 {
        struct autofs_dirhash *dh = &sbi->dirhash;
        struct autofs_dir_ent *ent;
-        struct dentry *dentry;
        unsigned long timeout = sbi->exp_timeout;
        while (1) {
+                struct path path;
+                int umount_ok;
                if ( list_empty(&dh->expiry_head) || sbi->catatonic )
                        return NULL;    /* No entries */
                /* We keep the list sorted by last_usage and want old stuff */
@@ -57,17 +59,17 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
                        return ent; /* Symlinks are always expirable */
                /* Get the dentry for the autofs subdirectory */
-                dentry = ent->dentry;
+                path.dentry = ent->dentry;
-                if ( !dentry ) {
+                if (!path.dentry) {
                        /* Should only happen in catatonic mode */
                        printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name);
                        autofs_delete_usage(ent);
                        continue;
                }
-                if ( !dentry->d_inode ) {
+                if (!path.dentry->d_inode) {
-                        dput(dentry);
+                        dput(path.dentry);
                        printk("autofs: negative dentry on expiry queue: %s\n",
                               ent->name);
                        autofs_delete_usage(ent);
@@ -76,29 +78,29 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
                /* Make sure entry is mounted and unused; note that dentry will
                   point to the mounted-on-top root. */
-                if (!S_ISDIR(dentry->d_inode->i_mode)||!d_mountpoint(dentry)) {
+                if (!S_ISDIR(path.dentry->d_inode->i_mode) ||
+                    !d_mountpoint(path.dentry)) {
                        DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
                        continue;
                }
-                mntget(mnt);
+                path.mnt = mnt;
-                dget(dentry);
+                path_get(&path);
-                if (!follow_down(&mnt, &dentry)) {
+                if (!follow_down(&path.mnt, &path.dentry)) {
-                        dput(dentry);
+                        path_put(&path);
-                        mntput(mnt);
                        DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
                        continue;
                }
-                while (d_mountpoint(dentry) && follow_down(&mnt, &dentry))
+                while (d_mountpoint(path.dentry) &&
+                       follow_down(&path.mnt, &path.dentry))
                        ;
-                dput(dentry);
+                umount_ok = may_umount(path.mnt);
+                path_put(&path);
-                if ( may_umount(mnt) ) {
+                if (umount_ok) {
-                        mntput(mnt);
                        DPRINTK(("autofs: signaling expire on %s\n", ent->name));
                        return ent; /* Expirable! */
                }
                DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name));
-                mntput(mnt);
        }
        return NULL;            /* No expirable entries */
 }
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 9e5ae8a4f5c..84168c0dcc2 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -54,11 +54,10 @@ static int check_name(const char *name)
 * Check a string doesn't overrun the chunk of
 * memory we copied from user land.
 */
-static int invalid_str(char *str, void *end)
+static int invalid_str(char *str, size_t size)
 {
-        while ((void *) str <= end)
+        if (memchr(str, 0, size))
-                if (!*str++)
+                return 0;
-                        return 0;
        return -EINVAL;
 }
@@ -138,8 +137,7 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
        }
        if (param->size > sizeof(*param)) {
-                err = invalid_str(param->path,
+                err = invalid_str(param->path, param->size - sizeof(*param));
-                                 (void *) ((size_t) param + param->size));
                if (err) {
                        AUTOFS_WARN(
                          "path string terminator missing for cmd(0x%08x)",
@@ -488,7 +486,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
        }
        path = param->path;
-        devid = sbi->sb->s_dev;
+        devid = new_encode_dev(sbi->sb->s_dev);
        param->requester.uid = param->requester.gid = -1;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 75f7ddacf7d..3077d8f1652 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -70,8 +70,10 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
                 * Otherwise it's an offset mount and we need to check
                 * if we can umount its mount, if there is one.
                 */
-                if (!d_mountpoint(dentry))
+                if (!d_mountpoint(dentry)) {
+                        status = 0;
                        goto done;
+                }
        }
        /* Update the expiry counter if fs is busy */
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index eeb24684590..2341375386f 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -297,20 +297,14 @@ static int validate_request(struct autofs_wait_queue **wait,
         */
        if (notify == NFY_MOUNT) {
                /*
-                 * If the dentry isn't hashed just go ahead and try the
+                 * If the dentry was successfully mounted while we slept
-                 * mount again with a new wait (not much else we can do).
+                 * on the wait queue mutex we can return success. If it
-                */
+                 * isn't mounted (doesn't have submounts for the case of
-                if (!d_unhashed(dentry)) {
+                 * a multi-mount with no mount at it's base) we can
-                        /*
+                 * continue on and create a new request.
-                         * But if the dentry is hashed, that means that we
+                 */
-                         * got here through the revalidate path.  Thus, we
+                if (have_submounts(dentry))
-                         * need to check if the dentry has been mounted
+                        return 0;
-                         * while we waited on the wq_mutex. If it has,
-                         * simply return success.
-                         */
-                        if (d_mountpoint(dentry))
-                                return 0;
-                }
        }
        return 1;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 70cfc4b84ae..fdb66faa24f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1388,7 +1388,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
        prstatus->pr_sigpend = p->pending.signal.sig[0];
        prstatus->pr_sighold = p->blocked.sig[0];
        prstatus->pr_pid = task_pid_vnr(p);
-        prstatus->pr_ppid = task_pid_vnr(p->parent);
+        prstatus->pr_ppid = task_pid_vnr(p->real_parent);
        prstatus->pr_pgrp = task_pgrp_vnr(p);
        prstatus->pr_sid = task_session_vnr(p);
        if (thread_group_leader(p)) {
@@ -1433,7 +1433,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
        psinfo->pr_psargs[len] = 0;
        psinfo->pr_pid = task_pid_vnr(p);
-        psinfo->pr_ppid = task_pid_vnr(p->parent);
+        psinfo->pr_ppid = task_pid_vnr(p->real_parent);
        psinfo->pr_pgrp = task_pgrp_vnr(p);
        psinfo->pr_sid = task_session_vnr(p);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 5cebf0b3779..697f6b5f131 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -41,6 +41,7 @@
 #include <asm/uaccess.h>
 #include <asm/unaligned.h>
 #include <asm/cacheflush.h>
+#include <asm/page.h>
 /****************************************************************************/
@@ -54,6 +55,18 @@
 #define DBG_FLT(a...)
 #endif
+/*
+ * User data (stack, data section and bss) needs to be aligned
+ * for the same reasons as SLAB memory is, and to the same amount.
+ * Avoid duplicating architecture specific code by using the same
+ * macro as with SLAB allocation:
+ */
+#ifdef ARCH_SLAB_MINALIGN
+#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN)
+#else
+#define FLAT_DATA_ALIGN (sizeof(void *))
+#endif
 #define RELOC_FAILED 0xff00ff01         /* Relocation incorrect somewhere */
 #define UNLOADED_LIB 0x7ff000ff         /* Placeholder for unused library */
@@ -114,20 +127,18 @@ static unsigned long create_flat_tables(
        int envc = bprm->envc;
        char uninitialized_var(dummy);
-        sp = (unsigned long *) ((-(unsigned long)sizeof(char *))&(unsigned long) p);
+        sp = (unsigned long *)p;
+        sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
+        sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
+        argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
+        envp = argv + (argc + 1);
-        sp -= envc+1;
-        envp = sp;
-        sp -= argc+1;
-        argv = sp;
-        flat_stack_align(sp);
        if (flat_argvp_envp_on_stack()) {
-                --sp; put_user((unsigned long) envp, sp);
+                put_user((unsigned long) envp, sp + 2);
-                --sp; put_user((unsigned long) argv, sp);
+                put_user((unsigned long) argv, sp + 1);
        }
-        put_user(argc,--sp);
+        put_user(argc, sp);
        current->mm->arg_start = (unsigned long) p;
        while (argc-->0) {
                put_user((unsigned long) p, argv++);
@@ -558,7 +569,9 @@ static int load_flat_file(struct linux_binprm * bprm,
                        ret = realdatastart;
                        goto err;
                }
-                datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
+                datapos = ALIGN(realdatastart +
+                                MAX_SHARED_LIBS * sizeof(unsigned long),
+                                FLAT_DATA_ALIGN);
                DBG_FLT("BINFMT_FLAT: Allocated data+bss+stack (%d bytes): %x\n",
                                (int)(data_len + bss_len + stack_len), (int)datapos);
@@ -604,9 +617,12 @@ static int load_flat_file(struct linux_binprm * bprm,
                }
                realdatastart = textpos + ntohl(hdr->data_start);
-                datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
+                datapos = ALIGN(realdatastart +
-                reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) +
+                                MAX_SHARED_LIBS * sizeof(unsigned long),
-                                MAX_SHARED_LIBS * sizeof(unsigned long));
+                                FLAT_DATA_ALIGN);
+                reloc = (unsigned long *)
+                        (datapos + (ntohl(hdr->reloc_start) - text_len));
                memp = textpos;
                memp_size = len;
 #ifdef CONFIG_BINFMT_ZFLAT
@@ -854,7 +870,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
        stack_len = TOP_OF_ARGS - bprm->p;             /* the strings */
        stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
        stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
+        stack_len += FLAT_DATA_ALIGN - 1;  /* reserve for upcoming alignment */
        
        res = load_flat_file(bprm, &libinfo, 0, &stack_len);
        if (res > (unsigned long)-4096)
diff --git a/fs/bio.c b/fs/bio.c
index cd42bb882f3..98711647ece 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -175,14 +175,6 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
        struct bio_vec *bvl;
        /*
-         * If 'bs' is given, lookup the pool and do the mempool alloc.
-         * If not, this is a bio_kmalloc() allocation and just do a
-         * kzalloc() for the exact number of vecs right away.
-         */
-        if (!bs)
-                bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask);
-        /*
         * see comment near bvec_array define!
         */
        switch (nr) {
@@ -260,21 +252,6 @@ void bio_free(struct bio *bio, struct bio_set *bs)
        mempool_free(p, bs->bio_pool);
 }
-/*
- * default destructor for a bio allocated with bio_alloc_bioset()
- */
-static void bio_fs_destructor(struct bio *bio)
-{
-        bio_free(bio, fs_bio_set);
-}
-static void bio_kmalloc_destructor(struct bio *bio)
-{
-        if (bio_has_allocated_vec(bio))
-                kfree(bio->bi_io_vec);
-        kfree(bio);
-}
 void bio_init(struct bio *bio)
 {
        memset(bio, 0, sizeof(*bio));
@@ -301,21 +278,15 @@ void bio_init(struct bio *bio)
 **/
 struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 {
+        unsigned long idx = BIO_POOL_NONE;
        struct bio_vec *bvl = NULL;
-        struct bio *bio = NULL;
+        struct bio *bio;
-        unsigned long idx = 0;
+        void *p;
-        void *p = NULL;
+        p = mempool_alloc(bs->bio_pool, gfp_mask);
-        if (bs) {
+        if (unlikely(!p))
-                p = mempool_alloc(bs->bio_pool, gfp_mask);
+                return NULL;
-                if (!p)
+        bio = p + bs->front_pad;
-                        goto err;
-                bio = p + bs->front_pad;
-        } else {
-                bio = kmalloc(sizeof(*bio), gfp_mask);
-                if (!bio)
-                        goto err;
-        }
        bio_init(bio);
@@ -332,22 +303,50 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
                nr_iovecs = bvec_nr_vecs(idx);
        }
+out_set:
        bio->bi_flags |= idx << BIO_POOL_OFFSET;
        bio->bi_max_vecs = nr_iovecs;
-out_set:
        bio->bi_io_vec = bvl;
        return bio;
 err_free:
-        if (bs)
+        mempool_free(p, bs->bio_pool);
-                mempool_free(p, bs->bio_pool);
-        else
-                kfree(bio);
-err:
        return NULL;
 }
+static void bio_fs_destructor(struct bio *bio)
+{
+        bio_free(bio, fs_bio_set);
+}
+/**
+ *      bio_alloc - allocate a new bio, memory pool backed
+ *      @gfp_mask: allocation mask to use
+ *      @nr_iovecs: number of iovecs
+ *
+ *      Allocate a new bio with @nr_iovecs bvecs.  If @gfp_mask
+ *      contains __GFP_WAIT, the allocation is guaranteed to succeed.
+ *
+ *      RETURNS:
+ *      Pointer to new bio on success, NULL on failure.
+ */
+struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
+{
+        struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
+        if (bio)
+                bio->bi_destructor = bio_fs_destructor;
+        return bio;
+}
+static void bio_kmalloc_destructor(struct bio *bio)
+{
+        if (bio_integrity(bio))
+                bio_integrity_free(bio);
+        kfree(bio);
+}
 /**
 * bio_alloc - allocate a bio for I/O
 * @gfp_mask:   the GFP_ mask given to the slab allocator
@@ -366,29 +365,20 @@ err:
 *   do so can cause livelocks under memory pressure.
 *
 **/
-struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
-{
-        struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
-        if (bio)
-                bio->bi_destructor = bio_fs_destructor;
-        return bio;
-}
-/*
- * Like bio_alloc(), but doesn't use a mempool backing. This means that
- * it CAN fail, but while bio_alloc() can only be used for allocations
- * that have a short (finite) life span, bio_kmalloc() should be used
- * for more permanent bio allocations (like allocating some bio's for
- * initalization or setup purposes).
- */
 struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
 {
-        struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
+        struct bio *bio;
-        if (bio)
+        bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
-                bio->bi_destructor = bio_kmalloc_destructor;
+                      gfp_mask);
+        if (unlikely(!bio))
+                return NULL;
+        bio_init(bio);
+        bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET;
+        bio->bi_max_vecs = nr_iovecs;
+        bio->bi_io_vec = bio->bi_inline_vecs;
+        bio->bi_destructor = bio_kmalloc_destructor;
        return bio;
 }
@@ -827,12 +817,15 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
                len += iov[i].iov_len;
        }
+        if (offset)
+                nr_pages++;
        bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
        if (!bmd)
                return ERR_PTR(-ENOMEM);
        ret = -ENOMEM;
-        bio = bio_alloc(gfp_mask, nr_pages);
+        bio = bio_kmalloc(gfp_mask, nr_pages);
        if (!bio)
                goto out_bmd;
@@ -956,7 +949,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
        if (!nr_pages)
                return ERR_PTR(-EINVAL);
-        bio = bio_alloc(gfp_mask, nr_pages);
+        bio = bio_kmalloc(gfp_mask, nr_pages);
        if (!bio)
                return ERR_PTR(-ENOMEM);
@@ -1140,7 +1133,7 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data,
        int offset, i;
        struct bio *bio;
-        bio = bio_alloc(gfp_mask, nr_pages);
+        bio = bio_kmalloc(gfp_mask, nr_pages);
        if (!bio)
                return ERR_PTR(-ENOMEM);
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9adf5e4f7e9..94212844a9b 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -1,25 +1,10 @@
-ifneq ($(KERNELRELEASE),)
-# kbuild part of makefile
 obj-$(CONFIG_BTRFS_FS) := btrfs.o
-btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
+btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
           file-item.o inode-item.o inode-map.o disk-io.o \
           transaction.o inode.o file.o tree-defrag.o \
           extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
           extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
           ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
           compression.o delayed-ref.o
-else
-# Normal Makefile
-KERNELDIR := /lib/modules/`uname -r`/build
-all:
-        $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules
-modules_install:
-        $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install
-clean:
-        $(MAKE) -C $(KERNELDIR) M=`pwd` clean
-endif
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 7fdd184a528..cbba000dccb 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,15 +60,20 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
                return ERR_PTR(-EINVAL);
        }
+        /* Handle the cached NULL acl case without locking */
+        acl = ACCESS_ONCE(*p_acl);
+        if (!acl)
+                return acl;
        spin_lock(&inode->i_lock);
-        if (*p_acl != BTRFS_ACL_NOT_CACHED)
+        acl = *p_acl;
-                acl = posix_acl_dup(*p_acl);
+        if (acl != BTRFS_ACL_NOT_CACHED)
+                acl = posix_acl_dup(acl);
        spin_unlock(&inode->i_lock);
-        if (acl)
+        if (acl != BTRFS_ACL_NOT_CACHED)
                return acl;
        size = __btrfs_getxattr(inode, name, "", 0);
        if (size > 0) {
                value = kzalloc(size, GFP_NOFS);
@@ -80,9 +85,12 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
                        btrfs_update_cached_acl(inode, p_acl, acl);
                }
                kfree(value);
-        } else if (size == -ENOENT) {
+        } else if (size == -ENOENT || size == -ENODATA || size == 0) {
+                /* FIXME, who returns -ENOENT?  I think nobody */
                acl = NULL;
                btrfs_update_cached_acl(inode, p_acl, acl);
+        } else {
+                acl = ERR_PTR(-EIO);
        }
        return acl;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 51bfdfc8fcd..502c3d61de6 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -25,6 +25,7 @@
 #define WORK_QUEUED_BIT 0
 #define WORK_DONE_BIT 1
 #define WORK_ORDER_DONE_BIT 2
+#define WORK_HIGH_PRIO_BIT 3
 /*
 * container for the kthread task pointer and the list of pending work
@@ -36,6 +37,7 @@ struct btrfs_worker_thread {
        /* list of struct btrfs_work that are waiting for service */
        struct list_head pending;
+        struct list_head prio_pending;
        /* list of worker threads from struct btrfs_workers */
        struct list_head worker_list;
@@ -103,10 +105,16 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
        spin_lock_irqsave(&workers->lock, flags);
-        while (!list_empty(&workers->order_list)) {
+        while (1) {
-                work = list_entry(workers->order_list.next,
+                if (!list_empty(&workers->prio_order_list)) {
-                                  struct btrfs_work, order_list);
+                        work = list_entry(workers->prio_order_list.next,
+                                          struct btrfs_work, order_list);
+                } else if (!list_empty(&workers->order_list)) {
+                        work = list_entry(workers->order_list.next,
+                                          struct btrfs_work, order_list);
+                } else {
+                        break;
+                }
                if (!test_bit(WORK_DONE_BIT, &work->flags))
                        break;
@@ -143,8 +151,14 @@ static int worker_loop(void *arg)
        do {
                spin_lock_irq(&worker->lock);
 again_locked:
-                while (!list_empty(&worker->pending)) {
+                while (1) {
-                        cur = worker->pending.next;
+                        if (!list_empty(&worker->prio_pending))
+                                cur = worker->prio_pending.next;
+                        else if (!list_empty(&worker->pending))
+                                cur = worker->pending.next;
+                        else
+                                break;
                        work = list_entry(cur, struct btrfs_work, list);
                        list_del(&work->list);
                        clear_bit(WORK_QUEUED_BIT, &work->flags);
@@ -163,7 +177,6 @@ again_locked:
                        spin_lock_irq(&worker->lock);
                        check_idle_worker(worker);
                }
                if (freezing(current)) {
                        worker->working = 0;
@@ -178,7 +191,8 @@ again_locked:
                                 * jump_in?
                                 */
                                smp_mb();
-                                if (!list_empty(&worker->pending))
+                                if (!list_empty(&worker->pending) ||
+                                    !list_empty(&worker->prio_pending))
                                        continue;
                                /*
@@ -191,7 +205,8 @@ again_locked:
                                 */
                                schedule_timeout(1);
                                smp_mb();
-                                if (!list_empty(&worker->pending))
+                                if (!list_empty(&worker->pending) ||
+                                    !list_empty(&worker->prio_pending))
                                        continue;
                                if (kthread_should_stop())
@@ -200,7 +215,8 @@ again_locked:
                                /* still no more work?, sleep for real */
                                spin_lock_irq(&worker->lock);
                                set_current_state(TASK_INTERRUPTIBLE);
-                                if (!list_empty(&worker->pending))
+                                if (!list_empty(&worker->pending) ||
+                                    !list_empty(&worker->prio_pending))
                                        goto again_locked;
                                /*
@@ -248,6 +264,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
        INIT_LIST_HEAD(&workers->worker_list);
        INIT_LIST_HEAD(&workers->idle_list);
        INIT_LIST_HEAD(&workers->order_list);
+        INIT_LIST_HEAD(&workers->prio_order_list);
        spin_lock_init(&workers->lock);
        workers->max_workers = max;
        workers->idle_thresh = 32;
@@ -273,6 +290,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
                }
                INIT_LIST_HEAD(&worker->pending);
+                INIT_LIST_HEAD(&worker->prio_pending);
                INIT_LIST_HEAD(&worker->worker_list);
                spin_lock_init(&worker->lock);
                atomic_set(&worker->num_pending, 0);
@@ -396,7 +414,10 @@ int btrfs_requeue_work(struct btrfs_work *work)
                goto out;
        spin_lock_irqsave(&worker->lock, flags);
-        list_add_tail(&work->list, &worker->pending);
+        if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
+                list_add_tail(&work->list, &worker->prio_pending);
+        else
+                list_add_tail(&work->list, &worker->pending);
        atomic_inc(&worker->num_pending);
        /* by definition we're busy, take ourselves off the idle
@@ -422,6 +443,11 @@ out:
        return 0;
 }
+void btrfs_set_work_high_prio(struct btrfs_work *work)
+{
+        set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
+}
 /*
 * places a struct btrfs_work into the pending queue of one of the kthreads
 */
@@ -438,7 +464,12 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
        worker = find_worker(workers);
        if (workers->ordered) {
                spin_lock_irqsave(&workers->lock, flags);
-                list_add_tail(&work->order_list, &workers->order_list);
+                if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
+                        list_add_tail(&work->order_list,
+                                      &workers->prio_order_list);
+                } else {
+                        list_add_tail(&work->order_list, &workers->order_list);
+                }
                spin_unlock_irqrestore(&workers->lock, flags);
        } else {
                INIT_LIST_HEAD(&work->order_list);
@@ -446,7 +477,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
        spin_lock_irqsave(&worker->lock, flags);
-        list_add_tail(&work->list, &worker->pending);
+        if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
+                list_add_tail(&work->list, &worker->prio_pending);
+        else
+                list_add_tail(&work->list, &worker->pending);
        atomic_inc(&worker->num_pending);
        check_busy_worker(worker);
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 31be4ed8b63..1b511c109db 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -85,6 +85,7 @@ struct btrfs_workers {
         * of work items waiting for completion
         */
        struct list_head order_list;
+        struct list_head prio_order_list;
        /* lock for finding the next worker thread to queue on */
        spinlock_t lock;
@@ -98,4 +99,5 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
 int btrfs_stop_workers(struct btrfs_workers *workers);
 void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max);
 int btrfs_requeue_work(struct btrfs_work *work);
+void btrfs_set_work_high_prio(struct btrfs_work *work);
 #endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index e5b2533b691..fedf8b9f03a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1325,12 +1325,12 @@ static noinline int reada_for_balance(struct btrfs_root *root,
        int ret = 0;
        int blocksize;
-        parent = path->nodes[level - 1];
+        parent = path->nodes[level + 1];
        if (!parent)
                return 0;
        nritems = btrfs_header_nritems(parent);
-        slot = path->slots[level];
+        slot = path->slots[level + 1];
        blocksize = btrfs_level_size(root, level);
        if (slot > 0) {
@@ -1341,7 +1341,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
                        block1 = 0;
                free_extent_buffer(eb);
        }
-        if (slot < nritems) {
+        if (slot + 1 < nritems) {
                block2 = btrfs_node_blockptr(parent, slot + 1);
                gen = btrfs_node_ptr_generation(parent, slot + 1);
                eb = btrfs_find_tree_block(root, block2, blocksize);
@@ -1351,7 +1351,11 @@ static noinline int reada_for_balance(struct btrfs_root *root,
        }
        if (block1 || block2) {
                ret = -EAGAIN;
+                /* release the whole path */
                btrfs_release_path(root, path);
+                /* read the blocks */
                if (block1)
                        readahead_tree_block(root, block1, blocksize, 0);
                if (block2)
@@ -1361,7 +1365,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
                        eb = read_tree_block(root, block1, blocksize, 0);
                        free_extent_buffer(eb);
                }
-                if (block1) {
+                if (block2) {
                        eb = read_tree_block(root, block2, blocksize, 0);
                        free_extent_buffer(eb);
                }
@@ -1465,6 +1469,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
        u32 blocksize;
        struct extent_buffer *b = *eb_ret;
        struct extent_buffer *tmp;
+        int ret;
        blocknr = btrfs_node_blockptr(b, slot);
        gen = btrfs_node_ptr_generation(b, slot);
@@ -1472,6 +1477,10 @@ read_block_for_search(struct btrfs_trans_handle *trans,
        tmp = btrfs_find_tree_block(root, blocknr, blocksize);
        if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+                /*
+                 * we found an up to date block without sleeping, return
+                 * right away
+                 */
                *eb_ret = tmp;
                return 0;
        }
@@ -1479,18 +1488,34 @@ read_block_for_search(struct btrfs_trans_handle *trans,
        /*
         * reduce lock contention at high levels
         * of the btree by dropping locks before
-         * we read.
+         * we read.  Don't release the lock on the current
+         * level because we need to walk this node to figure
+         * out which blocks to read.
         */
-        btrfs_release_path(NULL, p);
+        btrfs_unlock_up_safe(p, level + 1);
+        btrfs_set_path_blocking(p);
        if (tmp)
                free_extent_buffer(tmp);
        if (p->reada)
                reada_for_search(root, p, level, slot, key->objectid);
+        btrfs_release_path(NULL, p);
+        ret = -EAGAIN;
        tmp = read_tree_block(root, blocknr, blocksize, gen);
-        if (tmp)
+        if (tmp) {
+                /*
+                 * If the read above didn't mark this buffer up to date,
+                 * it will never end up being up to date.  Set ret to EIO now
+                 * and give up so that our caller doesn't loop forever
+                 * on our EAGAINs.
+                 */
+                if (!btrfs_buffer_uptodate(tmp, 0))
+                        ret = -EIO;
                free_extent_buffer(tmp);
-        return -EAGAIN;
+        }
+        return ret;
 }
 /*
@@ -1689,6 +1714,9 @@ cow_done:
                        if (ret == -EAGAIN)
                                goto again;
+                        if (ret == -EIO)
+                                goto done;
                        if (!p->skip_locking) {
                                int lret;
@@ -1731,6 +1759,8 @@ done:
         */
        if (!p->leave_spinning)
                btrfs_set_path_blocking(p);
+        if (ret < 0)
+                btrfs_release_path(root, p);
        return ret;
 }
@@ -4205,6 +4235,11 @@ again:
                if (ret == -EAGAIN)
                        goto again;
+                if (ret < 0) {
+                        btrfs_release_path(root, path);
+                        goto done;
+                }
                if (!path->skip_locking) {
                        ret = btrfs_try_spin_lock(next);
                        if (!ret) {
@@ -4239,6 +4274,11 @@ again:
                if (ret == -EAGAIN)
                        goto again;
+                if (ret < 0) {
+                        btrfs_release_path(root, path);
+                        goto done;
+                }
                if (!path->skip_locking) {
                        btrfs_assert_tree_locked(path->nodes[level]);
                        ret = btrfs_try_spin_lock(next);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ad96495dedc..4414a5d9983 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -881,6 +881,9 @@ struct btrfs_fs_info {
        u64 metadata_alloc_profile;
        u64 system_alloc_profile;
+        unsigned data_chunk_allocations;
+        unsigned metadata_ratio;
        void *bdev_holder;
 };
@@ -2174,7 +2177,8 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
 extern struct file_operations btrfs_file_operations;
 int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct inode *inode,
-                       u64 start, u64 end, u64 inline_limit, u64 *hint_block);
+                       u64 start, u64 end, u64 locked_end,
+                       u64 inline_limit, u64 *hint_block);
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              struct inode *inode, u64 start, u64 end);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 92caa8035f3..4b0ea0b80c2 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -232,10 +232,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
                        memcpy(&found, result, csum_size);
                        read_extent_buffer(buf, &val, 0, csum_size);
-                        printk(KERN_INFO "btrfs: %s checksum verify failed "
+                        if (printk_ratelimit()) {
-                               "on %llu wanted %X found %X level %d\n",
+                                printk(KERN_INFO "btrfs: %s checksum verify "
-                               root->fs_info->sb->s_id,
+                                       "failed on %llu wanted %X found %X "
-                               buf->start, val, found, btrfs_header_level(buf));
+                                       "level %d\n",
+                                       root->fs_info->sb->s_id,
+                                       (unsigned long long)buf->start, val, found,
+                                       btrfs_header_level(buf));
+                        }
                        if (result != (char *)&inline_result)
                                kfree(result);
                        return 1;
@@ -268,10 +272,13 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
                ret = 0;
                goto out;
        }
-        printk("parent transid verify failed on %llu wanted %llu found %llu\n",
+        if (printk_ratelimit()) {
-               (unsigned long long)eb->start,
+                printk("parent transid verify failed on %llu wanted %llu "
-               (unsigned long long)parent_transid,
+                       "found %llu\n",
-               (unsigned long long)btrfs_header_generation(eb));
+                       (unsigned long long)eb->start,
+                       (unsigned long long)parent_transid,
+                       (unsigned long long)btrfs_header_generation(eb));
+        }
        ret = 1;
        clear_extent_buffer_uptodate(io_tree, eb);
 out:
@@ -415,9 +422,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
        found_start = btrfs_header_bytenr(eb);
        if (found_start != start) {
-                printk(KERN_INFO "btrfs bad tree block start %llu %llu\n",
+                if (printk_ratelimit()) {
-                       (unsigned long long)found_start,
+                        printk(KERN_INFO "btrfs bad tree block start "
-                       (unsigned long long)eb->start);
+                               "%llu %llu\n",
+                               (unsigned long long)found_start,
+                               (unsigned long long)eb->start);
+                }
                ret = -EIO;
                goto err;
        }
@@ -429,8 +439,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
                goto err;
        }
        if (check_tree_block_fsid(root, eb)) {
-                printk(KERN_INFO "btrfs bad fsid on block %llu\n",
+                if (printk_ratelimit()) {
-                       (unsigned long long)eb->start);
+                        printk(KERN_INFO "btrfs bad fsid on block %llu\n",
+                               (unsigned long long)eb->start);
+                }
                ret = -EIO;
                goto err;
        }
@@ -579,19 +591,12 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
        async->bio_flags = bio_flags;
        atomic_inc(&fs_info->nr_async_submits);
+        if (rw & (1 << BIO_RW_SYNCIO))
+                btrfs_set_work_high_prio(&async->work);
        btrfs_queue_worker(&fs_info->workers, &async->work);
-#if 0
-        int limit = btrfs_async_submit_limit(fs_info);
-        if (atomic_read(&fs_info->nr_async_submits) > limit) {
-                wait_event_timeout(fs_info->async_submit_wait,
-                           (atomic_read(&fs_info->nr_async_submits) < limit),
-                           HZ/10);
-                wait_event_timeout(fs_info->async_submit_wait,
-                           (atomic_read(&fs_info->nr_async_bios) < limit),
-                           HZ/10);
-        }
-#endif
        while (atomic_read(&fs_info->async_submit_draining) &&
              atomic_read(&fs_info->nr_async_submits)) {
                wait_event(fs_info->async_submit_wait,
@@ -656,6 +661,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
                                     mirror_num, 0);
        }
        /*
         * kthread helpers are used to submit writes so that checksumming
         * can happen in parallel across all CPUs
@@ -765,27 +771,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
        }
 }
-#if 0
-static int btree_writepage(struct page *page, struct writeback_control *wbc)
-{
-        struct buffer_head *bh;
-        struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
-        struct buffer_head *head;
-        if (!page_has_buffers(page)) {
-                create_empty_buffers(page, root->fs_info->sb->s_blocksize,
-                                        (1 << BH_Dirty)|(1 << BH_Uptodate));
-        }
-        head = page_buffers(page);
-        bh = head;
-        do {
-                if (buffer_dirty(bh))
-                        csum_tree_block(root, bh, 0);
-                bh = bh->b_this_page;
-        } while (bh != head);
-        return block_write_full_page(page, btree_get_block, wbc);
-}
-#endif
 static struct address_space_operations btree_aops = {
        .readpage       = btree_readpage,
        .writepage      = btree_writepage,
@@ -863,8 +848,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
        if (ret == 0)
                set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
-        else
-                WARN_ON(1);
        return buf;
 }
@@ -1273,11 +1256,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
        int ret = 0;
        struct btrfs_device *device;
        struct backing_dev_info *bdi;
-#if 0
-        if ((bdi_bits & (1 << BDI_write_congested)) &&
-            btrfs_congested_async(info, 0))
-                return 1;
-#endif
        list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
                if (!device->bdev)
                        continue;
@@ -1599,6 +1578,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        fs_info->btree_inode = new_inode(sb);
        fs_info->btree_inode->i_ino = 1;
        fs_info->btree_inode->i_nlink = 1;
+        fs_info->metadata_ratio = 8;
        fs_info->thread_pool_size = min_t(unsigned long,
                                          num_online_cpus() + 2, 8);
@@ -1689,7 +1669,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        if (features) {
                printk(KERN_ERR "BTRFS: couldn't mount because of "
                       "unsupported optional features (%Lx).\n",
-                       features);
+                       (unsigned long long)features);
                err = -EINVAL;
                goto fail_iput;
        }
@@ -1699,7 +1679,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        if (!(sb->s_flags & MS_RDONLY) && features) {
                printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
                       "unsupported option features (%Lx).\n",
-                       features);
+                       (unsigned long long)features);
                err = -EINVAL;
                goto fail_iput;
        }
@@ -2095,10 +2075,10 @@ static int write_dev_supers(struct btrfs_device *device,
                                device->barriers = 0;
                                get_bh(bh);
                                lock_buffer(bh);
-                                ret = submit_bh(WRITE, bh);
+                                ret = submit_bh(WRITE_SYNC, bh);
                        }
                } else {
-                        ret = submit_bh(WRITE, bh);
+                        ret = submit_bh(WRITE_SYNC, bh);
                }
                if (!ret && wait) {
@@ -2291,7 +2271,7 @@ int close_ctree(struct btrfs_root *root)
        if (fs_info->delalloc_bytes) {
                printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
-                       fs_info->delalloc_bytes);
+                       (unsigned long long)fs_info->delalloc_bytes);
        }
        if (fs_info->total_ref_cache_size) {
                printk(KERN_INFO "btrfs: at umount reference cache size %llu\n",
@@ -2328,16 +2308,6 @@ int close_ctree(struct btrfs_root *root)
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->submit_workers);
-#if 0
-        while (!list_empty(&fs_info->hashers)) {
-                struct btrfs_hasher *hasher;
-                hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
-                                    hashers);
-                list_del(&hasher->hashers);
-                crypto_free_hash(&fs_info->hash_tfm);
-                kfree(hasher);
-        }
-#endif
        btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 178df4c67de..35af9335506 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -312,7 +312,7 @@ btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
 }
 /*
- * return the block group that contains teh given bytenr
+ * return the block group that contains the given bytenr
 */
 struct btrfs_block_group_cache *btrfs_lookup_block_group(
                                                 struct btrfs_fs_info *info,
@@ -1844,10 +1844,14 @@ again:
                printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
                       ", %llu bytes_used, %llu bytes_reserved, "
                       "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
-                       "%llu total\n", bytes, data_sinfo->bytes_delalloc,
+                       "%llu total\n", (unsigned long long)bytes,
-                       data_sinfo->bytes_used, data_sinfo->bytes_reserved,
+                       (unsigned long long)data_sinfo->bytes_delalloc,
-                       data_sinfo->bytes_pinned, data_sinfo->bytes_readonly,
+                       (unsigned long long)data_sinfo->bytes_used,
-                       data_sinfo->bytes_may_use, data_sinfo->total_bytes);
+                       (unsigned long long)data_sinfo->bytes_reserved,
+                       (unsigned long long)data_sinfo->bytes_pinned,
+                       (unsigned long long)data_sinfo->bytes_readonly,
+                       (unsigned long long)data_sinfo->bytes_may_use,
+                       (unsigned long long)data_sinfo->total_bytes);
                return -ENOSPC;
        }
        data_sinfo->bytes_may_use += bytes;
@@ -1918,15 +1922,29 @@ void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
        spin_unlock(&info->lock);
 }
+static void force_metadata_allocation(struct btrfs_fs_info *info)
+{
+        struct list_head *head = &info->space_info;
+        struct btrfs_space_info *found;
+        rcu_read_lock();
+        list_for_each_entry_rcu(found, head, list) {
+                if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
+                        found->force_alloc = 1;
+        }
+        rcu_read_unlock();
+}
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                          struct btrfs_root *extent_root, u64 alloc_bytes,
                          u64 flags, int force)
 {
        struct btrfs_space_info *space_info;
+        struct btrfs_fs_info *fs_info = extent_root->fs_info;
        u64 thresh;
        int ret = 0;
-        mutex_lock(&extent_root->fs_info->chunk_mutex);
+        mutex_lock(&fs_info->chunk_mutex);
        flags = btrfs_reduce_alloc_profile(extent_root, flags);
@@ -1958,6 +1976,18 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
        }
        spin_unlock(&space_info->lock);
+        /*
+         * if we're doing a data chunk, go ahead and make sure that
+         * we keep a reasonable number of metadata chunks allocated in the
+         * FS as well.
+         */
+        if (flags & BTRFS_BLOCK_GROUP_DATA) {
+                fs_info->data_chunk_allocations++;
+                if (!(fs_info->data_chunk_allocations %
+                      fs_info->metadata_ratio))
+                        force_metadata_allocation(fs_info);
+        }
        ret = btrfs_alloc_chunk(trans, extent_root, flags);
        if (ret)
                space_info->full = 1;
@@ -2592,7 +2622,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                                                       search_start);
                if (block_group && block_group_bits(block_group, data)) {
                        down_read(&space_info->groups_sem);
-                        goto have_block_group;
+                        if (list_empty(&block_group->list) ||
+                            block_group->ro) {
+                                /*
+                                 * someone is removing this block group,
+                                 * we can't jump into the have_block_group
+                                 * target because our list pointers are not
+                                 * valid
+                                 */
+                                btrfs_put_block_group(block_group);
+                                up_read(&space_info->groups_sem);
+                        } else
+                                goto have_block_group;
                } else if (block_group) {
                        btrfs_put_block_group(block_group);
                }
@@ -2626,6 +2667,13 @@ have_block_group:
                         * people trying to start a new cluster
                         */
                        spin_lock(&last_ptr->refill_lock);
+                        if (last_ptr->block_group &&
+                            (last_ptr->block_group->ro ||
+                            !block_group_bits(last_ptr->block_group, data))) {
+                                offset = 0;
+                                goto refill_cluster;
+                        }
                        offset = btrfs_alloc_from_cluster(block_group, last_ptr,
                                                 num_bytes, search_start);
                        if (offset) {
@@ -2651,10 +2699,17 @@ have_block_group:
                                last_ptr_loop = 1;
                                search_start = block_group->key.objectid;
+                                /*
+                                 * we know this block group is properly
+                                 * in the list because
+                                 * btrfs_remove_block_group, drops the
+                                 * cluster before it removes the block
+                                 * group from the list
+                                 */
                                goto have_block_group;
                        }
                        spin_unlock(&last_ptr->lock);
+refill_cluster:
                        /*
                         * this cluster didn't work out, free it and
                         * start over
@@ -2798,9 +2853,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
                                    info->bytes_pinned - info->bytes_reserved),
               (info->full) ? "" : "not ");
        printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
-               " may_use=%llu, used=%llu\n", info->total_bytes,
+               " may_use=%llu, used=%llu\n",
-               info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use,
+               (unsigned long long)info->total_bytes,
-               info->bytes_used);
+               (unsigned long long)info->bytes_pinned,
+               (unsigned long long)info->bytes_delalloc,
+               (unsigned long long)info->bytes_may_use,
+               (unsigned long long)info->bytes_used);
        down_read(&info->groups_sem);
        list_for_each_entry(cache, &info->block_groups, list) {
@@ -5935,6 +5993,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 {
        struct btrfs_path *path;
        struct btrfs_block_group_cache *block_group;
+        struct btrfs_free_cluster *cluster;
        struct btrfs_key key;
        int ret;
@@ -5946,6 +6005,21 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        memcpy(&key, &block_group->key, sizeof(key));
+        /* make sure this block group isn't part of an allocation cluster */
+        cluster = &root->fs_info->data_alloc_cluster;
+        spin_lock(&cluster->refill_lock);
+        btrfs_return_cluster_to_free_space(block_group, cluster);
+        spin_unlock(&cluster->refill_lock);
+        /*
+         * make sure this block group isn't part of a metadata
+         * allocation cluster
+         */
+        cluster = &root->fs_info->meta_alloc_cluster;
+        spin_lock(&cluster->refill_lock);
+        btrfs_return_cluster_to_free_space(block_group, cluster);
+        spin_unlock(&cluster->refill_lock);
        path = btrfs_alloc_path();
        BUG_ON(!path);
@@ -5955,7 +6029,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        spin_unlock(&root->fs_info->block_group_cache_lock);
        btrfs_remove_free_space_cache(block_group);
        down_write(&block_group->space_info->groups_sem);
-        list_del(&block_group->list);
+        /*
+         * we must use list_del_init so people can check to see if they
+         * are still on the list after taking the semaphore
+         */
+        list_del_init(&block_group->list);
        up_write(&block_group->space_info->groups_sem);
        spin_lock(&block_group->space_info->lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eb2bee8b7fb..fe9eb990e44 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -17,12 +17,6 @@
 #include "ctree.h"
 #include "btrfs_inode.h"
-/* temporary define until extent_map moves out of btrfs */
-struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
-                                       unsigned long extra_flags,
-                                       void (*ctor)(void *, struct kmem_cache *,
-                                                    unsigned long));
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
@@ -50,20 +44,23 @@ struct extent_page_data {
        /* tells writepage not to lock the state bits for this range
         * it still does the unlocking
         */
-        int extent_locked;
+        unsigned int extent_locked:1;
+        /* tells the submit_bio code to use a WRITE_SYNC */
+        unsigned int sync_io:1;
 };
 int __init extent_io_init(void)
 {
-        extent_state_cache = btrfs_cache_create("extent_state",
+        extent_state_cache = kmem_cache_create("extent_state",
-                                            sizeof(struct extent_state), 0,
+                        sizeof(struct extent_state), 0,
-                                            NULL);
+                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!extent_state_cache)
                return -ENOMEM;
-        extent_buffer_cache = btrfs_cache_create("extent_buffers",
+        extent_buffer_cache = kmem_cache_create("extent_buffers",
-                                            sizeof(struct extent_buffer), 0,
+                        sizeof(struct extent_buffer), 0,
-                                            NULL);
+                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!extent_buffer_cache)
                goto free_state_cache;
        return 0;
@@ -1404,69 +1401,6 @@ out:
        return total_bytes;
 }
-#if 0
-/*
- * helper function to lock both pages and extents in the tree.
- * pages must be locked first.
- */
-static int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
-{
-        unsigned long index = start >> PAGE_CACHE_SHIFT;
-        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
-        struct page *page;
-        int err;
-        while (index <= end_index) {
-                page = grab_cache_page(tree->mapping, index);
-                if (!page) {
-                        err = -ENOMEM;
-                        goto failed;
-                }
-                if (IS_ERR(page)) {
-                        err = PTR_ERR(page);
-                        goto failed;
-                }
-                index++;
-        }
-        lock_extent(tree, start, end, GFP_NOFS);
-        return 0;
-failed:
-        /*
-         * we failed above in getting the page at 'index', so we undo here
-         * up to but not including the page at 'index'
-         */
-        end_index = index;
-        index = start >> PAGE_CACHE_SHIFT;
-        while (index < end_index) {
-                page = find_get_page(tree->mapping, index);
-                unlock_page(page);
-                page_cache_release(page);
-                index++;
-        }
-        return err;
-}
-/*
- * helper function to unlock both pages and extents in the tree.
- */
-static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
-{
-        unsigned long index = start >> PAGE_CACHE_SHIFT;
-        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
-        struct page *page;
-        while (index <= end_index) {
-                page = find_get_page(tree->mapping, index);
-                unlock_page(page);
-                page_cache_release(page);
-                index++;
-        }
-        unlock_extent(tree, start, end, GFP_NOFS);
-        return 0;
-}
-#endif
 /*
 * set the private field for a given byte offset in the tree.  If there isn't
 * an extent_state there already, this does nothing.
@@ -2101,6 +2035,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
        return ret;
 }
+static noinline void update_nr_written(struct page *page,
+                                      struct writeback_control *wbc,
+                                      unsigned long nr_written)
+{
+        wbc->nr_to_write -= nr_written;
+        if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
+            wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
+                page->mapping->writeback_index = page->index + nr_written;
+}
 /*
 * the writepage semantics are similar to regular writepage.  extent
 * records are inserted to lock ranges in the tree, and as dirty areas
@@ -2136,8 +2080,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        u64 delalloc_end;
        int page_started;
        int compressed;
+        int write_flags;
        unsigned long nr_written = 0;
+        if (wbc->sync_mode == WB_SYNC_ALL)
+                write_flags = WRITE_SYNC_PLUG;
+        else
+                write_flags = WRITE;
        WARN_ON(!PageLocked(page));
        pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
        if (page->index > end_index ||
@@ -2164,6 +2114,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        delalloc_end = 0;
        page_started = 0;
        if (!epd->extent_locked) {
+                /*
+                 * make sure the wbc mapping index is at least updated
+                 * to this page.
+                 */
+                update_nr_written(page, wbc, 0);
                while (delalloc_end < page_end) {
                        nr_delalloc = find_lock_delalloc_range(inode, tree,
                                                       page,
@@ -2185,7 +2141,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                 */
                if (page_started) {
                        ret = 0;
-                        goto update_nr_written;
+                        /*
+                         * we've unlocked the page, so we can't update
+                         * the mapping's writeback index, just update
+                         * nr_to_write.
+                         */
+                        wbc->nr_to_write -= nr_written;
+                        goto done_unlocked;
                }
        }
        lock_extent(tree, start, page_end, GFP_NOFS);
@@ -2198,13 +2160,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                if (ret == -EAGAIN) {
                        unlock_extent(tree, start, page_end, GFP_NOFS);
                        redirty_page_for_writepage(wbc, page);
+                        update_nr_written(page, wbc, nr_written);
                        unlock_page(page);
                        ret = 0;
-                        goto update_nr_written;
+                        goto done_unlocked;
                }
        }
-        nr_written++;
+        /*
+         * we don't want to touch the inode after unlocking the page,
+         * so we update the mapping writeback index now
+         */
+        update_nr_written(page, wbc, nr_written + 1);
        end = page_end;
        if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
@@ -2314,9 +2281,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                                       (unsigned long long)end);
                        }
-                        ret = submit_extent_page(WRITE, tree, page, sector,
+                        ret = submit_extent_page(write_flags, tree, page,
-                                                 iosize, pg_offset, bdev,
+                                                 sector, iosize, pg_offset,
-                                                 &epd->bio, max_nr,
+                                                 bdev, &epd->bio, max_nr,
                                                 end_bio_extent_writepage,
                                                 0, 0, 0);
                        if (ret)
@@ -2336,11 +2303,8 @@ done:
                unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
        unlock_page(page);
-update_nr_written:
+done_unlocked:
-        wbc->nr_to_write -= nr_written;
-        if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
-            wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
-                page->mapping->writeback_index = page->index + nr_written;
        return 0;
 }
@@ -2460,15 +2424,23 @@ retry:
        return ret;
 }
-static noinline void flush_write_bio(void *data)
+static void flush_epd_write_bio(struct extent_page_data *epd)
 {
-        struct extent_page_data *epd = data;
        if (epd->bio) {
-                submit_one_bio(WRITE, epd->bio, 0, 0);
+                if (epd->sync_io)
+                        submit_one_bio(WRITE_SYNC, epd->bio, 0, 0);
+                else
+                        submit_one_bio(WRITE, epd->bio, 0, 0);
                epd->bio = NULL;
        }
 }
+static noinline void flush_write_bio(void *data)
+{
+        struct extent_page_data *epd = data;
+        flush_epd_write_bio(epd);
+}
 int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
                          get_extent_t *get_extent,
                          struct writeback_control *wbc)
@@ -2480,23 +2452,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
                .tree = tree,
                .get_extent = get_extent,
                .extent_locked = 0,
+                .sync_io = wbc->sync_mode == WB_SYNC_ALL,
        };
        struct writeback_control wbc_writepages = {
                .bdi            = wbc->bdi,
-                .sync_mode      = WB_SYNC_NONE,
+                .sync_mode      = wbc->sync_mode,
                .older_than_this = NULL,
                .nr_to_write    = 64,
                .range_start    = page_offset(page) + PAGE_CACHE_SIZE,
                .range_end      = (loff_t)-1,
        };
        ret = __extent_writepage(page, wbc, &epd);
        extent_write_cache_pages(tree, mapping, &wbc_writepages,
                                 __extent_writepage, &epd, flush_write_bio);
-        if (epd.bio)
+        flush_epd_write_bio(&epd);
-                submit_one_bio(WRITE, epd.bio, 0, 0);
        return ret;
 }
@@ -2515,6 +2486,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
                .tree = tree,
                .get_extent = get_extent,
                .extent_locked = 1,
+                .sync_io = mode == WB_SYNC_ALL,
        };
        struct writeback_control wbc_writepages = {
                .bdi            = inode->i_mapping->backing_dev_info,
@@ -2540,8 +2512,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
                start += PAGE_CACHE_SIZE;
        }
-        if (epd.bio)
+        flush_epd_write_bio(&epd);
-                submit_one_bio(WRITE, epd.bio, 0, 0);
        return ret;
 }
@@ -2556,13 +2527,13 @@ int extent_writepages(struct extent_io_tree *tree,
                .tree = tree,
                .get_extent = get_extent,
                .extent_locked = 0,
+                .sync_io = wbc->sync_mode == WB_SYNC_ALL,
        };
        ret = extent_write_cache_pages(tree, mapping, wbc,
                                       __extent_writepage, &epd,
                                       flush_write_bio);
-        if (epd.bio)
+        flush_epd_write_bio(&epd);
-                submit_one_bio(WRITE, epd.bio, 0, 0);
        return ret;
 }
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b187917b36f..30c9365861e 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -6,19 +6,14 @@
 #include <linux/hardirq.h>
 #include "extent_map.h"
-/* temporary define until extent_map moves out of btrfs */
-struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
-                                       unsigned long extra_flags,
-                                       void (*ctor)(void *, struct kmem_cache *,
-                                                    unsigned long));
 static struct kmem_cache *extent_map_cache;
 int __init extent_map_init(void)
 {
-        extent_map_cache = btrfs_cache_create("extent_map",
+        extent_map_cache = kmem_cache_create("extent_map",
-                                            sizeof(struct extent_map), 0,
+                        sizeof(struct extent_map), 0,
-                                            NULL);
+                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!extent_map_cache)
                return -ENOMEM;
        return 0;
@@ -43,7 +38,6 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
        tree->map.rb_node = NULL;
        spin_lock_init(&tree->lock);
 }
-EXPORT_SYMBOL(extent_map_tree_init);
 /**
 * alloc_extent_map - allocate new extent map structure
@@ -64,7 +58,6 @@ struct extent_map *alloc_extent_map(gfp_t mask)
        atomic_set(&em->refs, 1);
        return em;
 }
-EXPORT_SYMBOL(alloc_extent_map);
 /**
 * free_extent_map - drop reference count of an extent_map
@@ -83,7 +76,6 @@ void free_extent_map(struct extent_map *em)
                kmem_cache_free(extent_map_cache, em);
        }
 }
-EXPORT_SYMBOL(free_extent_map);
 static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
                                   struct rb_node *node)
@@ -264,7 +256,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
 out:
        return ret;
 }
-EXPORT_SYMBOL(add_extent_mapping);
 /* simple helper to do math around the end of an extent, handling wrap */
 static u64 range_end(u64 start, u64 len)
@@ -326,7 +317,6 @@ found:
 out:
        return em;
 }
-EXPORT_SYMBOL(lookup_extent_mapping);
 /**
 * remove_extent_mapping - removes an extent_map from the extent tree
@@ -346,4 +336,3 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
        em->in_tree = 0;
        return ret;
 }
-EXPORT_SYMBOL(remove_extent_mapping);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9c9fb46ccd0..1d51dc38bb4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -272,83 +272,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
        return 0;
 }
-int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
-{
-        return 0;
-#if 0
-        struct btrfs_path *path;
-        struct btrfs_key found_key;
-        struct extent_buffer *leaf;
-        struct btrfs_file_extent_item *extent;
-        u64 last_offset = 0;
-        int nritems;
-        int slot;
-        int found_type;
-        int ret;
-        int err = 0;
-        u64 extent_end = 0;
-        path = btrfs_alloc_path();
-        ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
-                                       last_offset, 0);
-        while (1) {
-                nritems = btrfs_header_nritems(path->nodes[0]);
-                if (path->slots[0] >= nritems) {
-                        ret = btrfs_next_leaf(root, path);
-                        if (ret)
-                                goto out;
-                        nritems = btrfs_header_nritems(path->nodes[0]);
-                }
-                slot = path->slots[0];
-                leaf = path->nodes[0];
-                btrfs_item_key_to_cpu(leaf, &found_key, slot);
-                if (found_key.objectid != inode->i_ino)
-                        break;
-                if (found_key.type != BTRFS_EXTENT_DATA_KEY)
-                        goto out;
-                if (found_key.offset < last_offset) {
-                        WARN_ON(1);
-                        btrfs_print_leaf(root, leaf);
-                        printk(KERN_ERR "inode %lu found offset %llu "
-                               "expected %llu\n", inode->i_ino,
-                               (unsigned long long)found_key.offset,
-                               (unsigned long long)last_offset);
-                        err = 1;
-                        goto out;
-                }
-                extent = btrfs_item_ptr(leaf, slot,
-                                        struct btrfs_file_extent_item);
-                found_type = btrfs_file_extent_type(leaf, extent);
-                if (found_type == BTRFS_FILE_EXTENT_REG) {
-                        extent_end = found_key.offset +
-                             btrfs_file_extent_num_bytes(leaf, extent);
-                } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
-                        struct btrfs_item *item;
-                        item = btrfs_item_nr(leaf, slot);
-                        extent_end = found_key.offset +
-                             btrfs_file_extent_inline_len(leaf, extent);
-                        extent_end = (extent_end + root->sectorsize - 1) &
-                                ~((u64)root->sectorsize - 1);
-                }
-                last_offset = extent_end;
-                path->slots[0]++;
-        }
-        if (0 && last_offset < inode->i_size) {
-                WARN_ON(1);
-                btrfs_print_leaf(root, leaf);
-                printk(KERN_ERR "inode %lu found offset %llu size %llu\n",
-                       inode->i_ino, (unsigned long long)last_offset,
-                       (unsigned long long)inode->i_size);
-                err = 1;
-        }
-out:
-        btrfs_free_path(path);
-        return err;
-#endif
-}
 /*
 * this is very complex, but the basic idea is to drop all extents
 * in the range start - end.  hint_block is filled in with a block number
@@ -363,15 +286,16 @@ out:
 */
 noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct inode *inode,
-                       u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
+                       u64 start, u64 end, u64 locked_end,
+                       u64 inline_limit, u64 *hint_byte)
 {
        u64 extent_end = 0;
-        u64 locked_end = end;
        u64 search_start = start;
        u64 leaf_start;
        u64 ram_bytes = 0;
        u64 orig_parent = 0;
        u64 disk_bytenr = 0;
+        u64 orig_locked_end = locked_end;
        u8 compression;
        u8 encryption;
        u16 other_encoding = 0;
@@ -684,11 +608,10 @@ next_slot:
        }
 out:
        btrfs_free_path(path);
-        if (locked_end > end) {
+        if (locked_end > orig_locked_end) {
-                unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
+                unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end,
-                              GFP_NOFS);
+                              locked_end - 1, GFP_NOFS);
        }
-        btrfs_check_file(root, inode);
        return ret;
 }
@@ -830,7 +753,7 @@ again:
                ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
                BUG_ON(ret);
-                goto done;
+                goto release;
        } else if (split == start) {
                if (locked_end < extent_end) {
                        ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
@@ -926,6 +849,8 @@ again:
        }
 done:
        btrfs_mark_buffer_dirty(leaf);
+release:
        btrfs_release_path(root, path);
        if (split_end && split == start) {
                split = end;
@@ -1131,7 +1056,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
                if (will_write) {
                        btrfs_fdatawrite_range(inode->i_mapping, pos,
                                               pos + write_bytes - 1,
-                                               WB_SYNC_NONE);
+                                               WB_SYNC_ALL);
                } else {
                        balance_dirty_pages_ratelimited_nr(inode->i_mapping,
                                                           num_pages);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 768b9523662..0bc93657b46 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -332,13 +332,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
                        printk(KERN_ERR "couldn't find space %llu to free\n",
                               (unsigned long long)offset);
                        printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n",
-                               block_group->cached, block_group->key.objectid,
+                               block_group->cached,
-                               block_group->key.offset);
+                               (unsigned long long)block_group->key.objectid,
+                               (unsigned long long)block_group->key.offset);
                        btrfs_dump_free_space(block_group, bytes);
                } else if (info) {
                        printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, "
                               "but wanted offset=%llu bytes=%llu\n",
-                               info->offset, info->bytes, offset, bytes);
+                               (unsigned long long)info->offset,
+                               (unsigned long long)info->bytes,
+                               (unsigned long long)offset,
+                               (unsigned long long)bytes);
                }
                WARN_ON(1);
        }
@@ -357,8 +361,9 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
                info = rb_entry(n, struct btrfs_free_space, offset_index);
                if (info->bytes >= bytes)
                        count++;
-                printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset,
+                printk(KERN_ERR "entry offset %llu, bytes %llu\n",
-                       info->bytes);
+                       (unsigned long long)info->offset,
+                       (unsigned long long)info->bytes);
        }
        printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
               "\n", count);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index cc7334d833c..9abbced1123 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -79,7 +79,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
        }
        path = btrfs_alloc_path();
        BUG_ON(!path);
-        search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID);
+        search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID);
        search_key.objectid = search_start;
        search_key.type = 0;
        search_key.offset = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a0d1dd492a5..1c8b0190d03 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -70,7 +70,6 @@ static struct extent_io_ops btrfs_extent_io_ops;
 static struct kmem_cache *btrfs_inode_cachep;
 struct kmem_cache *btrfs_trans_handle_cachep;
 struct kmem_cache *btrfs_transaction_cachep;
-struct kmem_cache *btrfs_bit_radix_cachep;
 struct kmem_cache *btrfs_path_cachep;
 #define S_SHIFT 12
@@ -234,7 +233,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
        }
        ret = btrfs_drop_extents(trans, root, inode, start,
-                                 aligned_end, start, &hint_byte);
+                                 aligned_end, aligned_end, start, &hint_byte);
        BUG_ON(ret);
        if (isize > actual_end)
@@ -1439,6 +1438,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
                                       struct inode *inode, u64 file_pos,
                                       u64 disk_bytenr, u64 disk_num_bytes,
                                       u64 num_bytes, u64 ram_bytes,
+                                       u64 locked_end,
                                       u8 compression, u8 encryption,
                                       u16 other_encoding, int extent_type)
 {
@@ -1455,7 +1455,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        path->leave_spinning = 1;
        ret = btrfs_drop_extents(trans, root, inode, file_pos,
-                                 file_pos + num_bytes, file_pos, &hint);
+                                 file_pos + num_bytes, locked_end,
+                                 file_pos, &hint);
        BUG_ON(ret);
        ins.objectid = inode->i_ino;
@@ -1590,6 +1591,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                                                ordered_extent->disk_len,
                                                ordered_extent->len,
                                                ordered_extent->len,
+                                                ordered_extent->file_offset +
+                                                ordered_extent->len,
                                                compressed, 0, 0,
                                                BTRFS_FILE_EXTENT_REG);
                BUG_ON(ret);
@@ -1819,10 +1822,12 @@ good:
        return 0;
 zeroit:
-        printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u "
+        if (printk_ratelimit()) {
-               "private %llu\n", page->mapping->host->i_ino,
+                printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u "
-               (unsigned long long)start, csum,
+                       "private %llu\n", page->mapping->host->i_ino,
-               (unsigned long long)private);
+                       (unsigned long long)start, csum,
+                       (unsigned long long)private);
+        }
        memset(kaddr + offset, 1, end - start + 1);
        flush_dcache_page(page);
        kunmap_atomic(kaddr, KM_USER0);
@@ -2011,6 +2016,57 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
 }
 /*
+ * very simple check to peek ahead in the leaf looking for xattrs.  If we
+ * don't find any xattrs, we know there can't be any acls.
+ *
+ * slot is the slot the inode is in, objectid is the objectid of the inode
+ */
+static noinline int acls_after_inode_item(struct extent_buffer *leaf,
+                                          int slot, u64 objectid)
+{
+        u32 nritems = btrfs_header_nritems(leaf);
+        struct btrfs_key found_key;
+        int scanned = 0;
+        slot++;
+        while (slot < nritems) {
+                btrfs_item_key_to_cpu(leaf, &found_key, slot);
+                /* we found a different objectid, there must not be acls */
+                if (found_key.objectid != objectid)
+                        return 0;
+                /* we found an xattr, assume we've got an acl */
+                if (found_key.type == BTRFS_XATTR_ITEM_KEY)
+                        return 1;
+                /*
+                 * we found a key greater than an xattr key, there can't
+                 * be any acls later on
+                 */
+                if (found_key.type > BTRFS_XATTR_ITEM_KEY)
+                        return 0;
+                slot++;
+                scanned++;
+                /*
+                 * it goes inode, inode backrefs, xattrs, extents,
+                 * so if there are a ton of hard links to an inode there can
+                 * be a lot of backrefs.  Don't waste time searching too hard,
+                 * this is just an optimization
+                 */
+                if (scanned >= 8)
+                        break;
+        }
+        /* we hit the end of the leaf before we found an xattr or
+         * something larger than an xattr.  We have to assume the inode
+         * has acls
+         */
+        return 1;
+}
+/*
 * read an inode from the btree into the in-memory inode
 */
 void btrfs_read_locked_inode(struct inode *inode)
@@ -2021,6 +2077,7 @@ void btrfs_read_locked_inode(struct inode *inode)
        struct btrfs_timespec *tspec;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_key location;
+        int maybe_acls;
        u64 alloc_group_block;
        u32 rdev;
        int ret;
@@ -2067,6 +2124,16 @@ void btrfs_read_locked_inode(struct inode *inode)
        alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
+        /*
+         * try to precache a NULL acl entry for files that don't have
+         * any xattrs or acls
+         */
+        maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
+        if (!maybe_acls) {
+                BTRFS_I(inode)->i_acl = NULL;
+                BTRFS_I(inode)->i_default_acl = NULL;
+        }
        BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
                                                alloc_group_block, 0);
        btrfs_free_path(path);
@@ -2877,6 +2944,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
                        err = btrfs_drop_extents(trans, root, inode,
                                                 cur_offset,
                                                 cur_offset + hole_size,
+                                                 block_end,
                                                 cur_offset, &hint_byte);
                        if (err)
                                break;
@@ -3041,8 +3109,8 @@ static noinline void init_btrfs_i(struct inode *inode)
 {
        struct btrfs_inode *bi = BTRFS_I(inode);
-        bi->i_acl = NULL;
+        bi->i_acl = BTRFS_ACL_NOT_CACHED;
-        bi->i_default_acl = NULL;
+        bi->i_default_acl = BTRFS_ACL_NOT_CACHED;
        bi->generation = 0;
        bi->sequence = 0;
@@ -3054,6 +3122,7 @@ static noinline void init_btrfs_i(struct inode *inode)
        bi->flags = 0;
        bi->index_cnt = (u64)-1;
        bi->last_unlink_trans = 0;
+        bi->ordered_data_close = 0;
        extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
        extent_io_tree_init(&BTRFS_I(inode)->io_tree,
                             inode->i_mapping, GFP_NOFS);
@@ -4227,7 +4296,6 @@ out:
        }
        if (err) {
                free_extent_map(em);
-                WARN_ON(1);
                return ERR_PTR(err);
        }
        return em;
@@ -4634,47 +4702,36 @@ void btrfs_destroy_cachep(void)
                kmem_cache_destroy(btrfs_trans_handle_cachep);
        if (btrfs_transaction_cachep)
                kmem_cache_destroy(btrfs_transaction_cachep);
-        if (btrfs_bit_radix_cachep)
-                kmem_cache_destroy(btrfs_bit_radix_cachep);
        if (btrfs_path_cachep)
                kmem_cache_destroy(btrfs_path_cachep);
 }
-struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
-                                       unsigned long extra_flags,
-                                       void (*ctor)(void *))
-{
-        return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
-                                 SLAB_MEM_SPREAD | extra_flags), ctor);
-}
 int btrfs_init_cachep(void)
 {
-        btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
+        btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
-                                          sizeof(struct btrfs_inode),
+                        sizeof(struct btrfs_inode), 0,
-                                          0, init_once);
+                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
        if (!btrfs_inode_cachep)
                goto fail;
-        btrfs_trans_handle_cachep =
-                        btrfs_cache_create("btrfs_trans_handle_cache",
+        btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
-                                           sizeof(struct btrfs_trans_handle),
+                        sizeof(struct btrfs_trans_handle), 0,
-                                           0, NULL);
+                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!btrfs_trans_handle_cachep)
                goto fail;
-        btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
-                                             sizeof(struct btrfs_transaction),
+        btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
-                                             0, NULL);
+                        sizeof(struct btrfs_transaction), 0,
+                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!btrfs_transaction_cachep)
                goto fail;
-        btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
-                                         sizeof(struct btrfs_path),
+        btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
-                                         0, NULL);
+                        sizeof(struct btrfs_path), 0,
+                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!btrfs_path_cachep)
                goto fail;
-        btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
-                                              SLAB_DESTROY_BY_RCU, NULL);
-        if (!btrfs_bit_radix_cachep)
-                goto fail;
        return 0;
 fail:
        btrfs_destroy_cachep();
@@ -4970,10 +5027,10 @@ out_fail:
        return err;
 }
-static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
+static int prealloc_file_range(struct btrfs_trans_handle *trans,
-                               u64 alloc_hint, int mode)
+                               struct inode *inode, u64 start, u64 end,
+                               u64 locked_end, u64 alloc_hint, int mode)
 {
-        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_key ins;
        u64 alloc_size;
@@ -4981,10 +5038,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
        u64 num_bytes = end - start;
        int ret = 0;
-        trans = btrfs_join_transaction(root, 1);
-        BUG_ON(!trans);
-        btrfs_set_trans_block_group(trans, inode);
        while (num_bytes > 0) {
                alloc_size = min(num_bytes, root->fs_info->max_extent);
                ret = btrfs_reserve_extent(trans, root, alloc_size,
@@ -4997,7 +5050,8 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
                ret = insert_reserved_file_extent(trans, inode,
                                                  cur_offset, ins.objectid,
                                                  ins.offset, ins.offset,
-                                                  ins.offset, 0, 0, 0,
+                                                  ins.offset, locked_end,
+                                                  0, 0, 0,
                                                  BTRFS_FILE_EXTENT_PREALLOC);
                BUG_ON(ret);
                num_bytes -= ins.offset;
@@ -5015,7 +5069,6 @@ out:
                BUG_ON(ret);
        }
-        btrfs_end_transaction(trans, root);
        return ret;
 }
@@ -5027,13 +5080,21 @@ static long btrfs_fallocate(struct inode *inode, int mode,
        u64 alloc_start;
        u64 alloc_end;
        u64 alloc_hint = 0;
+        u64 locked_end;
        u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
        struct extent_map *em;
+        struct btrfs_trans_handle *trans;
        int ret;
        alloc_start = offset & ~mask;
        alloc_end =  (offset + len + mask) & ~mask;
+        /*
+         * wait for ordered IO before we have any locks.  We'll loop again
+         * below with the locks held.
+         */
+        btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
        mutex_lock(&inode->i_mutex);
        if (alloc_start > inode->i_size) {
                ret = btrfs_cont_expand(inode, alloc_start);
@@ -5041,10 +5102,21 @@ static long btrfs_fallocate(struct inode *inode, int mode,
                        goto out;
        }
+        locked_end = alloc_end - 1;
        while (1) {
                struct btrfs_ordered_extent *ordered;
-                lock_extent(&BTRFS_I(inode)->io_tree, alloc_start,
-                            alloc_end - 1, GFP_NOFS);
+                trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
+                if (!trans) {
+                        ret = -EIO;
+                        goto out;
+                }
+                /* the extent lock is ordered inside the running
+                 * transaction
+                 */
+                lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
+                            GFP_NOFS);
                ordered = btrfs_lookup_first_ordered_extent(inode,
                                                            alloc_end - 1);
                if (ordered &&
@@ -5052,7 +5124,13 @@ static long btrfs_fallocate(struct inode *inode, int mode,
                    ordered->file_offset < alloc_end) {
                        btrfs_put_ordered_extent(ordered);
                        unlock_extent(&BTRFS_I(inode)->io_tree,
-                                      alloc_start, alloc_end - 1, GFP_NOFS);
+                                      alloc_start, locked_end, GFP_NOFS);
+                        btrfs_end_transaction(trans, BTRFS_I(inode)->root);
+                        /*
+                         * we can't wait on the range with the transaction
+                         * running or with the extent lock held
+                         */
                        btrfs_wait_ordered_range(inode, alloc_start,
                                                 alloc_end - alloc_start);
                } else {
@@ -5070,8 +5148,9 @@ static long btrfs_fallocate(struct inode *inode, int mode,
                last_byte = min(extent_map_end(em), alloc_end);
                last_byte = (last_byte + mask) & ~mask;
                if (em->block_start == EXTENT_MAP_HOLE) {
-                        ret = prealloc_file_range(inode, cur_offset,
+                        ret = prealloc_file_range(trans, inode, cur_offset,
-                                        last_byte, alloc_hint, mode);
+                                        last_byte, locked_end + 1,
+                                        alloc_hint, mode);
                        if (ret < 0) {
                                free_extent_map(em);
                                break;
@@ -5087,8 +5166,10 @@ static long btrfs_fallocate(struct inode *inode, int mode,
                        break;
                }
        }
-        unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1,
+        unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
                      GFP_NOFS);
+        btrfs_end_transaction(trans, BTRFS_I(inode)->root);
 out:
        mutex_unlock(&inode->i_mutex);
        return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7594bec1be1..2624b53ea78 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -437,10 +437,6 @@ out_unlock:
        return 0;
 }
-/*
- * Called inside transaction, so use GFP_NOFS
- */
 static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
 {
        u64 new_size;
@@ -461,15 +457,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
+        vol_args = memdup_user(arg, sizeof(*vol_args));
+        if (IS_ERR(vol_args))
-        if (!vol_args)
+                return PTR_ERR(vol_args);
-                return -ENOMEM;
-        if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
-                ret = -EFAULT;
-                goto out;
-        }
        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
        namelen = strlen(vol_args->name);
@@ -483,11 +473,13 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
                *devstr = '\0';
                devstr = vol_args->name;
                devid = simple_strtoull(devstr, &end, 10);
-                printk(KERN_INFO "resizing devid %llu\n", devid);
+                printk(KERN_INFO "resizing devid %llu\n",
+                       (unsigned long long)devid);
        }
        device = btrfs_find_device(root, devid, NULL, NULL);
        if (!device) {
-                printk(KERN_INFO "resizer unable to find device %llu\n", devid);
+                printk(KERN_INFO "resizer unable to find device %llu\n",
+                       (unsigned long long)devid);
                ret = -EINVAL;
                goto out_unlock;
        }
@@ -545,7 +537,6 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
 out_unlock:
        mutex_unlock(&root->fs_info->volume_mutex);
-out:
        kfree(vol_args);
        return ret;
 }
@@ -565,15 +556,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
        if (root->fs_info->sb->s_flags & MS_RDONLY)
                return -EROFS;
-        vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
+        vol_args = memdup_user(arg, sizeof(*vol_args));
+        if (IS_ERR(vol_args))
-        if (!vol_args)
+                return PTR_ERR(vol_args);
-                return -ENOMEM;
-        if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
-                ret = -EFAULT;
-                goto out;
-        }
        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
        namelen = strlen(vol_args->name);
@@ -675,19 +660,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
+        vol_args = memdup_user(arg, sizeof(*vol_args));
+        if (IS_ERR(vol_args))
-        if (!vol_args)
+                return PTR_ERR(vol_args);
-                return -ENOMEM;
-        if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
-                ret = -EFAULT;
-                goto out;
-        }
        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
        ret = btrfs_init_new_device(root, vol_args->name);
-out:
        kfree(vol_args);
        return ret;
 }
@@ -703,19 +682,13 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
        if (root->fs_info->sb->s_flags & MS_RDONLY)
                return -EROFS;
-        vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
+        vol_args = memdup_user(arg, sizeof(*vol_args));
+        if (IS_ERR(vol_args))
-        if (!vol_args)
+                return PTR_ERR(vol_args);
-                return -ENOMEM;
-        if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
-                ret = -EFAULT;
-                goto out;
-        }
        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
        ret = btrfs_rm_device(root, vol_args->name);
-out:
        kfree(vol_args);
        return ret;
 }
@@ -830,7 +803,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
        BUG_ON(!trans);
        /* punch hole in destination first */
-        btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte);
+        btrfs_drop_extents(trans, root, inode, off, off + len,
+                           off + len, 0, &hint_byte);
        /* clone data */
        key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 53c87b197d7..d6f0806c682 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -489,7 +489,7 @@ again:
        /* start IO across the range first to instantiate any delalloc
         * extents
         */
-        btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE);
+        btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL);
        /* The compression code will leave pages locked but return from
         * writepage without setting the page writeback.  Starting again
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9744af9d71e..2ff7cd2db25 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -68,7 +68,7 @@ enum {
        Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
        Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
        Opt_ssd, Opt_thread_pool, Opt_noacl,  Opt_compress, Opt_notreelog,
-        Opt_flushoncommit, Opt_err,
+        Opt_ratio, Opt_flushoncommit, Opt_err,
 };
 static match_table_t tokens = {
@@ -87,6 +87,7 @@ static match_table_t tokens = {
        {Opt_noacl, "noacl"},
        {Opt_notreelog, "notreelog"},
        {Opt_flushoncommit, "flushoncommit"},
+        {Opt_ratio, "metadata_ratio=%d"},
        {Opt_err, NULL},
 };
@@ -195,7 +196,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                info->max_extent = max_t(u64,
                                        info->max_extent, root->sectorsize);
                                printk(KERN_INFO "btrfs: max_extent at %llu\n",
-                                       info->max_extent);
+                                       (unsigned long long)info->max_extent);
                        }
                        break;
                case Opt_max_inline:
@@ -210,7 +211,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                                root->sectorsize);
                                }
                                printk(KERN_INFO "btrfs: max_inline at %llu\n",
-                                        info->max_inline);
+                                        (unsigned long long)info->max_inline);
                        }
                        break;
                case Opt_alloc_start:
@@ -220,7 +221,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                kfree(num);
                                printk(KERN_INFO
                                        "btrfs: allocations start at %llu\n",
-                                        info->alloc_start);
+                                        (unsigned long long)info->alloc_start);
                        }
                        break;
                case Opt_noacl:
@@ -234,6 +235,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        printk(KERN_INFO "btrfs: turning on flush-on-commit\n");
                        btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
                        break;
+                case Opt_ratio:
+                        intarg = 0;
+                        match_int(&args[0], &intarg);
+                        if (intarg) {
+                                info->metadata_ratio = intarg;
+                                printk(KERN_INFO "btrfs: metadata ratio %d\n",
+                                       info->metadata_ratio);
+                        }
+                        break;
                default:
                        break;
                }
@@ -410,11 +420,14 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
        if (btrfs_test_opt(root, NOBARRIER))
                seq_puts(seq, ",nobarrier");
        if (info->max_extent != (u64)-1)
-                seq_printf(seq, ",max_extent=%llu", info->max_extent);
+                seq_printf(seq, ",max_extent=%llu",
+                           (unsigned long long)info->max_extent);
        if (info->max_inline != 8192 * 1024)
-                seq_printf(seq, ",max_inline=%llu", info->max_inline);
+                seq_printf(seq, ",max_inline=%llu",
+                           (unsigned long long)info->max_inline);
        if (info->alloc_start != 0)
-                seq_printf(seq, ",alloc_start=%llu", info->alloc_start);
+                seq_printf(seq, ",alloc_start=%llu",
+                           (unsigned long long)info->alloc_start);
        if (info->thread_pool_size !=  min_t(unsigned long,
                                             num_online_cpus() + 2, 8))
                seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
@@ -423,9 +436,9 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
        if (btrfs_test_opt(root, SSD))
                seq_puts(seq, ",ssd");
        if (btrfs_test_opt(root, NOTREELOG))
-                seq_puts(seq, ",no-treelog");
+                seq_puts(seq, ",notreelog");
        if (btrfs_test_opt(root, FLUSHONCOMMIT))
-                seq_puts(seq, ",flush-on-commit");
+                seq_puts(seq, ",flushoncommit");
        if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
                seq_puts(seq, ",noacl");
        return 0;
@@ -489,8 +502,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
        if (s->s_root) {
                if ((flags ^ s->s_flags) & MS_RDONLY) {
-                        up_write(&s->s_umount);
+                        deactivate_locked_super(s);
-                        deactivate_super(s);
                        error = -EBUSY;
                        goto error_close_devices;
                }
@@ -504,8 +516,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
                error = btrfs_fill_super(s, fs_devices, data,
                                         flags & MS_SILENT ? 1 : 0);
                if (error) {
-                        up_write(&s->s_umount);
+                        deactivate_locked_super(s);
-                        deactivate_super(s);
                        goto error_free_subvol_name;
                }
@@ -522,15 +533,13 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
                mutex_unlock(&s->s_root->d_inode->i_mutex);
                if (IS_ERR(root)) {
-                        up_write(&s->s_umount);
+                        deactivate_locked_super(s);
-                        deactivate_super(s);
                        error = PTR_ERR(root);
                        goto error_free_subvol_name;
                }
                if (!root->d_inode) {
                        dput(root);
-                        up_write(&s->s_umount);
+                        deactivate_locked_super(s);
-                        deactivate_super(s);
                        error = -ENXIO;
                        goto error_free_subvol_name;
                }
@@ -635,14 +644,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        vol = kmalloc(sizeof(*vol), GFP_KERNEL);
+        vol = memdup_user((void __user *)arg, sizeof(*vol));
-        if (!vol)
+        if (IS_ERR(vol))
-                return -ENOMEM;
+                return PTR_ERR(vol);
-        if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) {
-                ret = -EFAULT;
-                goto out;
-        }
        switch (cmd) {
        case BTRFS_IOC_SCAN_DEV:
@@ -650,7 +654,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
                                            &btrfs_fs_type, &fs_devices);
                break;
        }
-out:
        kfree(vol);
        return ret;
 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2869b3361eb..01b143605ec 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -687,7 +687,13 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
                prepare_to_wait(&info->transaction_wait, &wait,
                                TASK_UNINTERRUPTIBLE);
                mutex_unlock(&info->trans_mutex);
+                atomic_dec(&info->throttles);
+                wake_up(&info->transaction_throttle);
                schedule();
+                atomic_inc(&info->throttles);
                mutex_lock(&info->trans_mutex);
                finish_wait(&info->transaction_wait, &wait);
        }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 25f20ea11f2..db5e212e844 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -536,7 +536,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
        saved_nbytes = inode_get_bytes(inode);
        /* drop any overlapping extents */
        ret = btrfs_drop_extents(trans, root, inode,
-                         start, extent_end, start, &alloc_hint);
+                         start, extent_end, extent_end, start, &alloc_hint);
        BUG_ON(ret);
        if (found_type == BTRFS_FILE_EXTENT_REG ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e0913e46972..a6d35b0054c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
        return NULL;
 }
+static void requeue_list(struct btrfs_pending_bios *pending_bios,
+                        struct bio *head, struct bio *tail)
+{
+        struct bio *old_head;
+        old_head = pending_bios->head;
+        pending_bios->head = head;
+        if (pending_bios->tail)
+                tail->bi_next = old_head;
+        else
+                pending_bios->tail = tail;
+}
 /*
 * we try to collect pending bios for a device so we don't get a large
 * number of procs sending bios down to the same device.  This greatly
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
        struct bio *pending;
        struct backing_dev_info *bdi;
        struct btrfs_fs_info *fs_info;
+        struct btrfs_pending_bios *pending_bios;
        struct bio *tail;
        struct bio *cur;
        int again = 0;
-        unsigned long num_run = 0;
+        unsigned long num_run;
+        unsigned long num_sync_run;
        unsigned long limit;
        unsigned long last_waited = 0;
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
        limit = btrfs_async_submit_limit(fs_info);
        limit = limit * 2 / 3;
+        /* we want to make sure that every time we switch from the sync
+         * list to the normal list, we unplug
+         */
+        num_sync_run = 0;
 loop:
        spin_lock(&device->io_lock);
+        num_run = 0;
 loop_lock:
        /* take all the bios off the list at once and process them
         * later on (without the lock held).  But, remember the
         * tail and other pointers so the bios can be properly reinserted
         * into the list if we hit congestion
         */
-        pending = device->pending_bios;
+        if (device->pending_sync_bios.head)
-        tail = device->pending_bio_tail;
+                pending_bios = &device->pending_sync_bios;
+        else
+                pending_bios = &device->pending_bios;
+        pending = pending_bios->head;
+        tail = pending_bios->tail;
        WARN_ON(pending && !tail);
-        device->pending_bios = NULL;
-        device->pending_bio_tail = NULL;
        /*
         * if pending was null this time around, no bios need processing
@@ -176,16 +202,41 @@ loop_lock:
         * device->running_pending is used to synchronize with the
         * schedule_bio code.
         */
-        if (pending) {
+        if (device->pending_sync_bios.head == NULL &&
-                again = 1;
+            device->pending_bios.head == NULL) {
-                device->running_pending = 1;
-        } else {
                again = 0;
                device->running_pending = 0;
+        } else {
+                again = 1;
+                device->running_pending = 1;
        }
+        pending_bios->head = NULL;
+        pending_bios->tail = NULL;
        spin_unlock(&device->io_lock);
+        /*
+         * if we're doing the regular priority list, make sure we unplug
+         * for any high prio bios we've sent down
+         */
+        if (pending_bios == &device->pending_bios && num_sync_run > 0) {
+                num_sync_run = 0;
+                blk_run_backing_dev(bdi, NULL);
+        }
        while (pending) {
+                rmb();
+                if (pending_bios != &device->pending_sync_bios &&
+                    device->pending_sync_bios.head &&
+                    num_run > 16) {
+                        cond_resched();
+                        spin_lock(&device->io_lock);
+                        requeue_list(pending_bios, pending, tail);
+                        goto loop_lock;
+                }
                cur = pending;
                pending = pending->bi_next;
                cur->bi_next = NULL;
@@ -196,10 +247,18 @@ loop_lock:
                        wake_up(&fs_info->async_submit_wait);
                BUG_ON(atomic_read(&cur->bi_cnt) == 0);
-                bio_get(cur);
                submit_bio(cur->bi_rw, cur);
-                bio_put(cur);
                num_run++;
+                if (bio_sync(cur))
+                        num_sync_run++;
+                if (need_resched()) {
+                        if (num_sync_run) {
+                                blk_run_backing_dev(bdi, NULL);
+                                num_sync_run = 0;
+                        }
+                        cond_resched();
+                }
                /*
                 * we made progress, there is more work to do and the bdi
@@ -208,7 +267,6 @@ loop_lock:
                 */
                if (pending && bdi_write_congested(bdi) && num_run > 16 &&
                    fs_info->fs_devices->open_devices > 1) {
-                        struct bio *old_head;
                        struct io_context *ioc;
                        ioc = current->io_context;
@@ -233,17 +291,17 @@ loop_lock:
                                 * against it before looping
                                 */
                                last_waited = ioc->last_waited;
+                                if (need_resched()) {
+                                        if (num_sync_run) {
+                                                blk_run_backing_dev(bdi, NULL);
+                                                num_sync_run = 0;
+                                        }
+                                        cond_resched();
+                                }
                                continue;
                        }
                        spin_lock(&device->io_lock);
+                        requeue_list(pending_bios, pending, tail);
-                        old_head = device->pending_bios;
-                        device->pending_bios = pending;
-                        if (device->pending_bio_tail)
-                                tail->bi_next = old_head;
-                        else
-                                device->pending_bio_tail = tail;
                        device->running_pending = 1;
                        spin_unlock(&device->io_lock);
@@ -251,11 +309,18 @@ loop_lock:
                        goto done;
                }
        }
+        if (num_sync_run) {
+                num_sync_run = 0;
+                blk_run_backing_dev(bdi, NULL);
+        }
+        cond_resched();
        if (again)
                goto loop;
        spin_lock(&device->io_lock);
-        if (device->pending_bios)
+        if (device->pending_bios.head || device->pending_sync_bios.head)
                goto loop_lock;
        spin_unlock(&device->io_lock);
@@ -1375,6 +1440,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        device->io_align = root->sectorsize;
        device->sector_size = root->sectorsize;
        device->total_bytes = i_size_read(bdev->bd_inode);
+        device->disk_total_bytes = device->total_bytes;
        device->dev_root = root->fs_info->dev_root;
        device->bdev = bdev;
        device->in_fs_metadata = 1;
@@ -1478,7 +1544,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
        btrfs_set_device_io_align(leaf, dev_item, device->io_align);
        btrfs_set_device_io_width(leaf, dev_item, device->io_width);
        btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
-        btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
+        btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
        btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
        btrfs_mark_buffer_dirty(leaf);
@@ -1875,14 +1941,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        device->total_bytes = new_size;
        if (device->writeable)
                device->fs_devices->total_rw_bytes -= diff;
-        ret = btrfs_update_device(trans, device);
-        if (ret) {
-                unlock_chunks(root);
-                btrfs_end_transaction(trans, root);
-                goto done;
-        }
-        WARN_ON(diff > old_total);
-        btrfs_set_super_total_bytes(super_copy, old_total - diff);
        unlock_chunks(root);
        btrfs_end_transaction(trans, root);
@@ -1914,7 +1972,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                length = btrfs_dev_extent_length(l, dev_extent);
                if (key.offset + length <= new_size)
-                        goto done;
+                        break;
                chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
                chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -1927,6 +1985,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                        goto done;
        }
+        /* Shrinking succeeded, else we would be at "done". */
+        trans = btrfs_start_transaction(root, 1);
+        if (!trans) {
+                ret = -ENOMEM;
+                goto done;
+        }
+        lock_chunks(root);
+        device->disk_total_bytes = new_size;
+        /* Now btrfs_update_device() will change the on-disk size. */
+        ret = btrfs_update_device(trans, device);
+        if (ret) {
+                unlock_chunks(root);
+                btrfs_end_transaction(trans, root);
+                goto done;
+        }
+        WARN_ON(diff > old_total);
+        btrfs_set_super_total_bytes(super_copy, old_total - diff);
+        unlock_chunks(root);
+        btrfs_end_transaction(trans, root);
 done:
        btrfs_free_path(path);
        return ret;
@@ -2497,7 +2575,7 @@ again:
                        max_errors = 1;
                }
        }
-        if (multi_ret && rw == WRITE &&
+        if (multi_ret && (rw & (1 << BIO_RW)) &&
            stripes_allocated < stripes_required) {
                stripes_allocated = map->num_stripes;
                free_extent_map(em);
@@ -2762,6 +2840,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
                                 int rw, struct bio *bio)
 {
        int should_queue = 1;
+        struct btrfs_pending_bios *pending_bios;
        /* don't bother with additional async steps for reads, right now */
        if (!(rw & (1 << BIO_RW))) {
@@ -2783,13 +2862,17 @@ static noinline int schedule_bio(struct btrfs_root *root,
        bio->bi_rw |= rw;
        spin_lock(&device->io_lock);
+        if (bio_sync(bio))
+                pending_bios = &device->pending_sync_bios;
+        else
+                pending_bios = &device->pending_bios;
-        if (device->pending_bio_tail)
+        if (pending_bios->tail)
-                device->pending_bio_tail->bi_next = bio;
+                pending_bios->tail->bi_next = bio;
-        device->pending_bio_tail = bio;
+        pending_bios->tail = bio;
-        if (!device->pending_bios)
+        if (!pending_bios->head)
-                device->pending_bios = bio;
+                pending_bios->head = bio;
        if (device->running_pending)
                should_queue = 0;
@@ -3006,7 +3089,8 @@ static int fill_device_from_item(struct extent_buffer *leaf,
        unsigned long ptr;
        device->devid = btrfs_device_id(leaf, dev_item);
-        device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+        device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+        device->total_bytes = device->disk_total_bytes;
        device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
        device->type = btrfs_device_type(leaf, dev_item);
        device->io_align = btrfs_device_io_align(leaf, dev_item);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2185de72ff7..5c3ff6d02fd 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -23,13 +23,22 @@
 #include "async-thread.h"
 struct buffer_head;
+struct btrfs_pending_bios {
+        struct bio *head;
+        struct bio *tail;
+};
 struct btrfs_device {
        struct list_head dev_list;
        struct list_head dev_alloc_list;
        struct btrfs_fs_devices *fs_devices;
        struct btrfs_root *dev_root;
-        struct bio *pending_bios;
-        struct bio *pending_bio_tail;
+        /* regular prio bios */
+        struct btrfs_pending_bios pending_bios;
+        /* WRITE_SYNC bios */
+        struct btrfs_pending_bios pending_sync_bios;
        int running_pending;
        u64 generation;
@@ -52,6 +61,9 @@ struct btrfs_device {
        /* size of the device */
        u64 total_bytes;
+        /* size of the disk */
+        u64 disk_total_bytes;
        /* bytes used */
        u64 bytes_used;
diff --git a/fs/buffer.c b/fs/buffer.c
index ff8bb1f2333..49106127a4a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -360,7 +360,7 @@ still_busy:
 * Completion handler for block_write_full_page() - pages which are unlocked
 * during I/O, and which have PageWriteback cleared upon I/O completion.
 */
-static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
+void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 {
        char b[BDEVNAME_SIZE];
        unsigned long flags;
@@ -438,11 +438,17 @@ static void mark_buffer_async_read(struct buffer_head *bh)
        set_buffer_async_read(bh);
 }
-void mark_buffer_async_write(struct buffer_head *bh)
+void mark_buffer_async_write_endio(struct buffer_head *bh,
+                                   bh_end_io_t *handler)
 {
-        bh->b_end_io = end_buffer_async_write;
+        bh->b_end_io = handler;
        set_buffer_async_write(bh);
 }
+void mark_buffer_async_write(struct buffer_head *bh)
+{
+        mark_buffer_async_write_endio(bh, end_buffer_async_write);
+}
 EXPORT_SYMBOL(mark_buffer_async_write);
@@ -1615,7 +1621,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
 * unplugging the device queue.
 */
 static int __block_write_full_page(struct inode *inode, struct page *page,
-                        get_block_t *get_block, struct writeback_control *wbc)
+                        get_block_t *get_block, struct writeback_control *wbc,
+                        bh_end_io_t *handler)
 {
        int err;
        sector_t block;
@@ -1700,7 +1707,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
                        continue;
                }
                if (test_clear_buffer_dirty(bh)) {
-                        mark_buffer_async_write(bh);
+                        mark_buffer_async_write_endio(bh, handler);
                } else {
                        unlock_buffer(bh);
                }
@@ -1753,7 +1760,7 @@ recover:
                if (buffer_mapped(bh) && buffer_dirty(bh) &&
                    !buffer_delay(bh)) {
                        lock_buffer(bh);
-                        mark_buffer_async_write(bh);
+                        mark_buffer_async_write_endio(bh, handler);
                } else {
                        /*
                         * The buffer may have been set dirty during
@@ -2390,7 +2397,8 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
        if ((page->mapping != inode->i_mapping) ||
            (page_offset(page) > size)) {
                /* page got truncated out from underneath us */
-                goto out_unlock;
+                unlock_page(page);
+                goto out;
        }
        /* page is wholly or partially inside EOF */
@@ -2404,14 +2412,15 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                ret = block_commit_write(page, 0, end);
        if (unlikely(ret)) {
+                unlock_page(page);
                if (ret == -ENOMEM)
                        ret = VM_FAULT_OOM;
                else /* -ENOSPC, -EIO, etc */
                        ret = VM_FAULT_SIGBUS;
-        }
+        } else
+                ret = VM_FAULT_LOCKED;
-out_unlock:
+out:
-        unlock_page(page);
        return ret;
 }
@@ -2679,7 +2688,8 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
 out:
        ret = mpage_writepage(page, get_block, wbc);
        if (ret == -EAGAIN)
-                ret = __block_write_full_page(inode, page, get_block, wbc);
+                ret = __block_write_full_page(inode, page, get_block, wbc,
+                                              end_buffer_async_write);
        return ret;
 }
 EXPORT_SYMBOL(nobh_writepage);
@@ -2726,6 +2736,8 @@ has_buffers:
                pos += blocksize;
        }
+        map_bh.b_size = blocksize;
+        map_bh.b_state = 0;
        err = get_block(inode, iblock, &map_bh, 0);
        if (err)
                goto unlock;
@@ -2837,9 +2849,10 @@ out:
 /*
 * The generic ->writepage function for buffer-backed address_spaces
+ * this form passes in the end_io handler used to finish the IO.
 */
-int block_write_full_page(struct page *page, get_block_t *get_block,
+int block_write_full_page_endio(struct page *page, get_block_t *get_block,
-                        struct writeback_control *wbc)
+                        struct writeback_control *wbc, bh_end_io_t *handler)
 {
        struct inode * const inode = page->mapping->host;
        loff_t i_size = i_size_read(inode);
@@ -2848,7 +2861,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
        /* Is the page fully inside i_size? */
        if (page->index < end_index)
-                return __block_write_full_page(inode, page, get_block, wbc);
+                return __block_write_full_page(inode, page, get_block, wbc,
+                                               handler);
        /* Is the page fully outside i_size? (truncate in progress) */
        offset = i_size & (PAGE_CACHE_SIZE-1);
@@ -2871,9 +2885,20 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
         * writes to that region are not written out to the file."
         */
        zero_user_segment(page, offset, PAGE_CACHE_SIZE);
-        return __block_write_full_page(inode, page, get_block, wbc);
+        return __block_write_full_page(inode, page, get_block, wbc, handler);
 }
+/*
+ * The generic ->writepage function for buffer-backed address_spaces
+ */
+int block_write_full_page(struct page *page, get_block_t *get_block,
+                        struct writeback_control *wbc)
+{
+        return block_write_full_page_endio(page, get_block, wbc,
+                                           end_buffer_async_write);
+}
 sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
                            get_block_t *get_block)
 {
@@ -3342,9 +3367,11 @@ EXPORT_SYMBOL(block_read_full_page);
 EXPORT_SYMBOL(block_sync_page);
 EXPORT_SYMBOL(block_truncate_page);
 EXPORT_SYMBOL(block_write_full_page);
+EXPORT_SYMBOL(block_write_full_page_endio);
 EXPORT_SYMBOL(cont_write_begin);
 EXPORT_SYMBOL(end_buffer_read_sync);
 EXPORT_SYMBOL(end_buffer_write_sync);
+EXPORT_SYMBOL(end_buffer_async_write);
 EXPORT_SYMBOL(file_fsync);
 EXPORT_SYMBOL(generic_block_bmap);
 EXPORT_SYMBOL(generic_cont_expand_simple);
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 19218e1463d..f7c255f9c62 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -122,13 +122,13 @@ static inline void cachefiles_state_changed(struct cachefiles_cache *cache)
 }
 /*
- * cf-bind.c
+ * bind.c
 */
 extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args);
 extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache);
 /*
- * cf-daemon.c
+ * daemon.c
 */
 extern const struct file_operations cachefiles_daemon_fops;
@@ -136,17 +136,17 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache,
                                unsigned fnr, unsigned bnr);
 /*
- * cf-interface.c
+ * interface.c
 */
 extern const struct fscache_cache_ops cachefiles_cache_ops;
 /*
- * cf-key.c
+ * key.c
 */
 extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type);
 /*
- * cf-namei.c
+ * namei.c
 */
 extern int cachefiles_delete_object(struct cachefiles_cache *cache,
                                    struct cachefiles_object *object);
@@ -165,7 +165,7 @@ extern int cachefiles_check_in_use(struct cachefiles_cache *cache,
                                   struct dentry *dir, char *filename);
 /*
- * cf-proc.c
+ * proc.c
 */
 #ifdef CONFIG_CACHEFILES_HISTOGRAM
 extern atomic_t cachefiles_lookup_histogram[HZ];
@@ -190,7 +190,7 @@ void cachefiles_hist(atomic_t histogram[], unsigned long start_jif)
 #endif
 /*
- * cf-rdwr.c
+ * rdwr.c
 */
 extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *,
                                         struct page *, gfp_t);
@@ -205,7 +205,7 @@ extern int cachefiles_write_page(struct fscache_storage *, struct page *);
 extern void cachefiles_uncache_page(struct fscache_object *, struct page *);
 /*
- * cf-security.c
+ * security.c
 */
 extern int cachefiles_get_security_ID(struct cachefiles_cache *cache);
 extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
@@ -225,7 +225,7 @@ static inline void cachefiles_end_secure(struct cachefiles_cache *cache,
 }
 /*
- * cf-xattr.c
+ * xattr.c
 */
 extern int cachefiles_check_object_type(struct cachefiles_object *object);
 extern int cachefiles_set_object_xattr(struct cachefiles_object *object,
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 65984006192..f20c4069c22 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,16 @@
+Version 1.58
+------------
+Guard against buffer overruns in various UCS-2 to UTF-8 string conversions
+when the UTF-8 string is composed of unusually long (more than 4 byte) converted
+characters. Add support for mounting root of a share which redirects immediately
+to DFS target. Convert string conversion functions from Unicode to more
+accurately mark string length before allocating memory (which may help the
+rare cases where a UTF-8 string is much larger than the UCS2 string that
+we converted from).  Fix endianness of the vcnum field used during
+session setup to distinguish multiple mounts to same server from different
+userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental
+flag to be set to 2, and mount must enable krb5 to turn on extended security).
+ 
 Version 1.57
 ------------
 Improve support for multiple security contexts to the same server. We
@@ -15,7 +28,8 @@ Posix file open support added (turned off after one attempt if server
 fails to support it properly, as with Samba server versions prior to 3.3.2)
 Fix "redzone overwritten" bug in cifs_put_tcon (CIFSTcon may allocate too
 little memory for the "nativeFileSystem" field returned by the server
-during mount). 
+during mount).  Endian convert inode numbers if necessary (makes it easier
+to compare inode numbers on network files from big endian systems). 
 Version 1.56
 ------------
diff --git a/fs/cifs/README b/fs/cifs/README
index 07434181623..db208ddb989 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -651,7 +651,15 @@ Experimental            When set to 1 used to enable certain experimental
                        signing turned on in case buffer was modified
                        just before it was sent, also this flag will
                        be used to use the new experimental directory change 
-                        notification code).
+                        notification code).  When set to 2 enables
+                        an additional experimental feature, "raw ntlmssp"
+                        session establishment support (which allows
+                        specifying "sec=ntlmssp" on mount). The Linux cifs
+                        module will use ntlmv2 authentication encapsulated
+                        in "raw ntlmssp" (not using SPNEGO) when
+                        "sec=ntlmssp" is specified on mount.
+                        This support also requires building cifs with
+                        the CONFIG_CIFS_EXPERIMENTAL configuration flag.
 These experimental features and tracing can be enabled by changing flags in 
 /proc/fs/cifs (after the cifs module has been installed or built into the 
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 5fdbf8a1447..83d62759c7c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -340,28 +340,24 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
                cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
        for (i = 0; i < num_referrals; i++) {
+                int len;
                dump_referral(referrals+i);
-                /* connect to a storage node */
+                /* connect to a node */
-                if (referrals[i].flags & DFSREF_STORAGE_SERVER) {
+                len = strlen(referrals[i].node_name);
-                        int len;
+                if (len < 2) {
-                        len = strlen(referrals[i].node_name);
+                        cERROR(1, ("%s: Net Address path too short: %s",
-                        if (len < 2) {
-                                cERROR(1, ("%s: Net Address path too short: %s",
                                        __func__, referrals[i].node_name));
-                                rc = -EINVAL;
+                        rc = -EINVAL;
-                                goto out_err;
+                        goto out_err;
-                        }
+                }
-                        mnt = cifs_dfs_do_refmount(nd->path.mnt,
+                mnt = cifs_dfs_do_refmount(nd->path.mnt,
-                                                nd->path.dentry,
+                                nd->path.dentry, referrals + i);
-                                                referrals + i);
+                cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__,
-                        cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p",
-                                         __func__,
                                        referrals[i].node_name, mnt));
-                        /* complete mount procedure if we accured submount */
+                /* complete mount procedure if we accured submount */
-                        if (!IS_ERR(mnt))
+                if (!IS_ERR(mnt))
-                                break;
+                        break;
-                }
        }
        /* we need it cause for() above could exit without valid submount */
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 3fd3a9df043..67bf93a40d2 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -41,7 +41,7 @@ cifs_spnego_key_instantiate(struct key *key, const void *data, size_t datalen)
        /* attach the data */
        memcpy(payload, data, datalen);
-        rcu_assign_pointer(key->payload.data, payload);
+        key->payload.data = payload;
        ret = 0;
 error:
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 7d75272a6b3..60e3c4253de 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -1,7 +1,7 @@
 /*
 *   fs/cifs/cifs_unicode.c
 *
- *   Copyright (c) International Business Machines  Corp., 2000,2005
+ *   Copyright (c) International Business Machines  Corp., 2000,2009
 *   Modified by Steve French (sfrench@us.ibm.com)
 *
 *   This program is free software;  you can redistribute it and/or modify
@@ -26,31 +26,157 @@
 #include "cifs_debug.h"
 /*
- * NAME:        cifs_strfromUCS()
+ * cifs_ucs2_bytes - how long will a string be after conversion?
- *
+ * @ucs - pointer to input string
- * FUNCTION:    Convert little-endian unicode string to character string
+ * @maxbytes - don't go past this many bytes of input string
+ * @codepage - destination codepage
 *
+ * Walk a ucs2le string and return the number of bytes that the string will
+ * be after being converted to the given charset, not including any null
+ * termination required. Don't walk past maxbytes in the source buffer.
 */
 int
-cifs_strfromUCS_le(char *to, const __le16 *from,
+cifs_ucs2_bytes(const __le16 *from, int maxbytes,
-                   int len, const struct nls_table *codepage)
+                const struct nls_table *codepage)
 {
        int i;
-        int outlen = 0;
+        int charlen, outlen = 0;
+        int maxwords = maxbytes / 2;
+        char tmp[NLS_MAX_CHARSET_SIZE];
-        for (i = 0; (i < len) && from[i]; i++) {
+        for (i = 0; from[i] && i < maxwords; i++) {
-                int charlen;
+                charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp,
-                /* 2.4.0 kernel or greater */
+                                             NLS_MAX_CHARSET_SIZE);
-                charlen =
+                if (charlen > 0)
-                    codepage->uni2char(le16_to_cpu(from[i]), &to[outlen],
-                                       NLS_MAX_CHARSET_SIZE);
-                if (charlen > 0) {
                        outlen += charlen;
-                } else {
+                else
-                        to[outlen++] = '?';
+                        outlen++;
+        }
+        return outlen;
+}
+/*
+ * cifs_mapchar - convert a little-endian char to proper char in codepage
+ * @target - where converted character should be copied
+ * @src_char - 2 byte little-endian source character
+ * @cp - codepage to which character should be converted
+ * @mapchar - should character be mapped according to mapchars mount option?
+ *
+ * This function handles the conversion of a single character. It is the
+ * responsibility of the caller to ensure that the target buffer is large
+ * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
+ */
+static int
+cifs_mapchar(char *target, const __le16 src_char, const struct nls_table *cp,
+             bool mapchar)
+{
+        int len = 1;
+        if (!mapchar)
+                goto cp_convert;
+        /*
+         * BB: Cannot handle remapping UNI_SLASH until all the calls to
+         *     build_path_from_dentry are modified, as they use slash as
+         *     separator.
+         */
+        switch (le16_to_cpu(src_char)) {
+        case UNI_COLON:
+                *target = ':';
+                break;
+        case UNI_ASTERIK:
+                *target = '*';
+                break;
+        case UNI_QUESTION:
+                *target = '?';
+                break;
+        case UNI_PIPE:
+                *target = '|';
+                break;
+        case UNI_GRTRTHAN:
+                *target = '>';
+                break;
+        case UNI_LESSTHAN:
+                *target = '<';
+                break;
+        default:
+                goto cp_convert;
+        }
+out:
+        return len;
+cp_convert:
+        len = cp->uni2char(le16_to_cpu(src_char), target,
+                           NLS_MAX_CHARSET_SIZE);
+        if (len <= 0) {
+                *target = '?';
+                len = 1;
+        }
+        goto out;
+}
+/*
+ * cifs_from_ucs2 - convert utf16le string to local charset
+ * @to - destination buffer
+ * @from - source buffer
+ * @tolen - destination buffer size (in bytes)
+ * @fromlen - source buffer size (in bytes)
+ * @codepage - codepage to which characters should be converted
+ * @mapchar - should characters be remapped according to the mapchars option?
+ *
+ * Convert a little-endian ucs2le string (as sent by the server) to a string
+ * in the provided codepage. The tolen and fromlen parameters are to ensure
+ * that the code doesn't walk off of the end of the buffer (which is always
+ * a danger if the alignment of the source buffer is off). The destination
+ * string is always properly null terminated and fits in the destination
+ * buffer. Returns the length of the destination string in bytes (including
+ * null terminator).
+ *
+ * Note that some windows versions actually send multiword UTF-16 characters
+ * instead of straight UCS-2. The linux nls routines however aren't able to
+ * deal with those characters properly. In the event that we get some of
+ * those characters, they won't be translated properly.
+ */
+int
+cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen,
+                 const struct nls_table *codepage, bool mapchar)
+{
+        int i, charlen, safelen;
+        int outlen = 0;
+        int nullsize = nls_nullsize(codepage);
+        int fromwords = fromlen / 2;
+        char tmp[NLS_MAX_CHARSET_SIZE];
+        /*
+         * because the chars can be of varying widths, we need to take care
+         * not to overflow the destination buffer when we get close to the
+         * end of it. Until we get to this offset, we don't need to check
+         * for overflow however.
+         */
+        safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
+        for (i = 0; i < fromwords && from[i]; i++) {
+                /*
+                 * check to see if converting this character might make the
+                 * conversion bleed into the null terminator
+                 */
+                if (outlen >= safelen) {
+                        charlen = cifs_mapchar(tmp, from[i], codepage, mapchar);
+                        if ((outlen + charlen) > (tolen - nullsize))
+                                break;
                }
+                /* put converted char into 'to' buffer */
+                charlen = cifs_mapchar(&to[outlen], from[i], codepage, mapchar);
+                outlen += charlen;
        }
-        to[outlen] = 0;
+        /* properly null-terminate string */
+        for (i = 0; i < nullsize; i++)
+                to[outlen++] = 0;
        return outlen;
 }
@@ -88,3 +214,41 @@ cifs_strtoUCS(__le16 *to, const char *from, int len,
        return i;
 }
+/*
+ * cifs_strndup_from_ucs - copy a string from wire format to the local codepage
+ * @src - source string
+ * @maxlen - don't walk past this many bytes in the source string
+ * @is_unicode - is this a unicode string?
+ * @codepage - destination codepage
+ *
+ * Take a string given by the server, convert it to the local codepage and
+ * put it in a new buffer. Returns a pointer to the new string or NULL on
+ * error.
+ */
+char *
+cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode,
+             const struct nls_table *codepage)
+{
+        int len;
+        char *dst;
+        if (is_unicode) {
+                len = cifs_ucs2_bytes((__le16 *) src, maxlen, codepage);
+                len += nls_nullsize(codepage);
+                dst = kmalloc(len, GFP_KERNEL);
+                if (!dst)
+                        return NULL;
+                cifs_from_ucs2(dst, (__le16 *) src, len, maxlen, codepage,
+                               false);
+        } else {
+                len = strnlen(src, maxlen);
+                len++;
+                dst = kmalloc(len, GFP_KERNEL);
+                if (!dst)
+                        return NULL;
+                strlcpy(dst, src, len);
+        }
+        return dst;
+}
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 14eb9a2395d..650638275a6 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -5,7 +5,7 @@
 *     Convert a unicode character to upper or lower case using
 *     compressed tables.
 *
- *   Copyright (c) International Business Machines  Corp., 2000,2007
+ *   Copyright (c) International Business Machines  Corp., 2000,2009
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -37,6 +37,19 @@
 #define  UNIUPR_NOLOWER         /* Example to not expand lower case tables */
+/*
+ * Windows maps these to the user defined 16 bit Unicode range since they are
+ * reserved symbols (along with \ and /), otherwise illegal to store
+ * in filenames in NTFS
+ */
+#define UNI_ASTERIK     (__u16) ('*' + 0xF000)
+#define UNI_QUESTION    (__u16) ('?' + 0xF000)
+#define UNI_COLON       (__u16) (':' + 0xF000)
+#define UNI_GRTRTHAN    (__u16) ('>' + 0xF000)
+#define UNI_LESSTHAN    (__u16) ('<' + 0xF000)
+#define UNI_PIPE        (__u16) ('|' + 0xF000)
+#define UNI_SLASH       (__u16) ('\\' + 0xF000)
 /* Just define what we want from uniupr.h.  We don't want to define the tables
 * in each source file.
 */
@@ -59,8 +72,14 @@ extern struct UniCaseRange UniLowerRange[];
 #endif                          /* UNIUPR_NOLOWER */
 #ifdef __KERNEL__
-int cifs_strfromUCS_le(char *, const __le16 *, int, const struct nls_table *);
+int cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen,
+                   const struct nls_table *codepage, bool mapchar);
+int cifs_ucs2_bytes(const __le16 *from, int maxbytes,
+                    const struct nls_table *codepage);
 int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *);
+char *cifs_strndup_from_ucs(const char *src, const int maxlen,
+                            const bool is_unicode,
+                            const struct nls_table *codepage);
 #endif
 /*
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 38491fd3871..5e6d35804d7 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -35,6 +35,7 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/smp_lock.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
 #define DECLARE_GLOBALS_HERE
@@ -66,9 +67,6 @@ unsigned int sign_CIFS_PDUs = 1;
 extern struct task_struct *oplockThread; /* remove sparse warning */
 struct task_struct *oplockThread = NULL;
 /* extern struct task_struct * dnotifyThread; remove sparse warning */
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-static struct task_struct *dnotifyThread = NULL;
-#endif
 static const struct super_operations cifs_super_ops;
 unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
 module_param(CIFSMaxBufSize, int, 0);
@@ -316,6 +314,7 @@ cifs_alloc_inode(struct super_block *sb)
        cifs_inode->clientCanCacheAll = false;
        cifs_inode->delete_pending = false;
        cifs_inode->vfs_inode.i_blkbits = 14;  /* 2**14 = CIFS_MAX_MSGSIZE */
+        cifs_inode->server_eof = 0;
        /* Can not set i_flags here - they get immediately overwritten
           to zero by the VFS */
@@ -532,6 +531,7 @@ static void cifs_umount_begin(struct super_block *sb)
        if (tcon == NULL)
                return;
+        lock_kernel();
        read_lock(&cifs_tcp_ses_lock);
        if (tcon->tc_count == 1)
                tcon->tidStatus = CifsExiting;
@@ -550,6 +550,7 @@ static void cifs_umount_begin(struct super_block *sb)
        }
 /* BB FIXME - finish add checks for tidStatus BB */
+        unlock_kernel();
        return;
 }
@@ -601,8 +602,7 @@ cifs_get_sb(struct file_system_type *fs_type,
        rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0);
        if (rc) {
-                up_write(&sb->s_umount);
+                deactivate_locked_super(sb);
-                deactivate_super(sb);
                return rc;
        }
        sb->s_flags |= MS_ACTIVE;
@@ -1040,34 +1040,6 @@ static int cifs_oplock_thread(void *dummyarg)
        return 0;
 }
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-static int cifs_dnotify_thread(void *dummyarg)
-{
-        struct list_head *tmp;
-        struct TCP_Server_Info *server;
-        do {
-                if (try_to_freeze())
-                        continue;
-                set_current_state(TASK_INTERRUPTIBLE);
-                schedule_timeout(15*HZ);
-                /* check if any stuck requests that need
-                   to be woken up and wakeq so the
-                   thread can wake up and error out */
-                read_lock(&cifs_tcp_ses_lock);
-                list_for_each(tmp, &cifs_tcp_ses_list) {
-                        server = list_entry(tmp, struct TCP_Server_Info,
-                                         tcp_ses_list);
-                        if (atomic_read(&server->inFlight))
-                                wake_up_all(&server->response_q);
-                }
-                read_unlock(&cifs_tcp_ses_lock);
-        } while (!kthread_should_stop());
-        return 0;
-}
-#endif
 static int __init
 init_cifs(void)
 {
@@ -1144,21 +1116,8 @@ init_cifs(void)
                goto out_unregister_dfs_key_type;
        }
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-        dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd");
-        if (IS_ERR(dnotifyThread)) {
-                rc = PTR_ERR(dnotifyThread);
-                cERROR(1, ("error %d create dnotify thread", rc));
-                goto out_stop_oplock_thread;
-        }
-#endif
        return 0;
-#ifdef CONFIG_CIFS_EXPERIMENTAL
- out_stop_oplock_thread:
-#endif
-        kthread_stop(oplockThread);
 out_unregister_dfs_key_type:
 #ifdef CONFIG_CIFS_DFS_UPCALL
        unregister_key_type(&key_type_dns_resolver);
@@ -1196,9 +1155,6 @@ exit_cifs(void)
        cifs_destroy_inodecache();
        cifs_destroy_mids();
        cifs_destroy_request_bufs();
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-        kthread_stop(dnotifyThread);
-#endif
        kthread_stop(oplockThread);
 }
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 77e190dc288..051b71cfdea 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -100,5 +100,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* EXPERIMENTAL */
-#define CIFS_VERSION   "1.57"
+#define CIFS_VERSION   "1.58"
 #endif                          /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 9fbf4dff5da..a61ab772c6f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -82,8 +82,8 @@ enum securityEnum {
        LANMAN,                 /* Legacy LANMAN auth */
        NTLM,                   /* Legacy NTLM012 auth with NTLM hash */
        NTLMv2,                 /* Legacy NTLM auth with NTLMv2 hash */
-        RawNTLMSSP,             /* NTLMSSP without SPNEGO */
+        RawNTLMSSP,             /* NTLMSSP without SPNEGO, NTLMv2 hash */
-        NTLMSSP,                /* NTLMSSP via SPNEGO */
+        NTLMSSP,                /* NTLMSSP via SPNEGO, NTLMv2 hash */
        Kerberos,               /* Kerberos via SPNEGO */
        MSKerberos,             /* MS Kerberos via SPNEGO */
 };
@@ -350,7 +350,7 @@ struct cifsFileInfo {
        bool invalidHandle:1;   /* file closed via session abend */
        bool messageMode:1;     /* for pipes: message vs byte mode */
        atomic_t wrtPending;   /* handle in use - defer close */
-        struct semaphore fh_sem; /* prevents reopen race after dead ses*/
+        struct mutex fh_mutex; /* prevents reopen race after dead ses*/
        struct cifs_search_info srch_inf;
 };
@@ -370,6 +370,7 @@ struct cifsInodeInfo {
        bool clientCanCacheAll:1;       /* read and writebehind oplock */
        bool oplockPending:1;
        bool delete_pending:1;          /* DELETE_ON_CLOSE is set */
+        u64  server_eof;                /* current file size on server */
        struct inode vfs_inode;
 };
@@ -530,6 +531,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
 #define   CIFSSEC_MAY_PLNTXT    0
 #endif /* weak passwords */
 #define   CIFSSEC_MAY_SEAL      0x00040 /* not supported yet */
+#define   CIFSSEC_MAY_NTLMSSP   0x00080 /* raw ntlmssp with ntlmv2 */
 #define   CIFSSEC_MUST_SIGN     0x01001
 /* note that only one of the following can be set so the
@@ -542,22 +544,23 @@ require use of the stronger protocol */
 #define   CIFSSEC_MUST_LANMAN   0x10010
 #define   CIFSSEC_MUST_PLNTXT   0x20020
 #ifdef CONFIG_CIFS_UPCALL
-#define   CIFSSEC_MASK          0x3F03F /* allows weak security but also krb5 */
+#define   CIFSSEC_MASK          0xAF0AF /* allows weak security but also krb5 */
 #else
-#define   CIFSSEC_MASK          0x37037 /* current flags supported if weak */
+#define   CIFSSEC_MASK          0xA70A7 /* current flags supported if weak */
 #endif /* UPCALL */
 #else /* do not allow weak pw hash */
 #ifdef CONFIG_CIFS_UPCALL
-#define   CIFSSEC_MASK          0x0F00F /* flags supported if no weak allowed */
+#define   CIFSSEC_MASK          0x8F08F /* flags supported if no weak allowed */
 #else
-#define   CIFSSEC_MASK          0x07007 /* flags supported if no weak allowed */
+#define   CIFSSEC_MASK          0x87087 /* flags supported if no weak allowed */
 #endif /* UPCALL */
 #endif /* WEAK_PW_HASH */
 #define   CIFSSEC_MUST_SEAL     0x40040 /* not supported yet */
+#define   CIFSSEC_MUST_NTLMSSP  0x80080 /* raw ntlmssp with ntlmv2 */
 #define   CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2)
 #define   CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2)
-#define   CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5)
+#define   CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)
 /*
 *****************************************************************
 * All constants go here
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b370489c8da..a785f69dbc9 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2163,7 +2163,7 @@ typedef struct {
        __le32 Type;
        __le64 DevMajor;
        __le64 DevMinor;
-        __u64 UniqueId;
+        __le64 UniqueId;
        __le64 Permissions;
        __le64 Nlinks;
 } __attribute__((packed)) FILE_UNIX_BASIC_INFO; /* level 0x200 QPathInfo */
@@ -2308,7 +2308,7 @@ struct unlink_psx_rq { /* level 0x20a SetPathInfo */
 } __attribute__((packed));
 struct file_internal_info {
-        __u64  UniqueId; /* inode number */
+        __le64  UniqueId; /* inode number */
 } __attribute__((packed));      /* level 0x3ee */
 struct file_mode_info {
@@ -2338,7 +2338,7 @@ typedef struct {
        __le32 Type;
        __le64 DevMajor;
        __le64 DevMinor;
-        __u64 UniqueId;
+        __le64 UniqueId;
        __le64 Permissions;
        __le64 Nlinks;
        char FileName[1];
@@ -2386,7 +2386,7 @@ typedef struct {
        __le32 FileNameLength;
        __le32 EaSize; /* EA size */
        __le32 Reserved;
-        __u64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/
+        __le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/
        char FileName[1];
 } __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO; /* level 0x105 FF rsp data */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 4167716d32f..fae083930ee 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -260,8 +260,7 @@ extern int CIFSUnixCreateSymLink(const int xid,
                        const struct nls_table *nls_codepage);
 extern int CIFSSMBUnixQuerySymLink(const int xid,
                        struct cifsTconInfo *tcon,
-                        const unsigned char *searchName,
+                        const unsigned char *searchName, char **syminfo,
-                        char *syminfo, const int buflen,
                        const struct nls_table *nls_codepage);
 extern int CIFSSMBQueryReparseLinkInfo(const int xid,
                        struct cifsTconInfo *tcon,
@@ -307,8 +306,6 @@ extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
                        const unsigned char *searchName, __u64 *inode_number,
                        const struct nls_table *nls_codepage,
                        int remap_special_chars);
-extern int cifs_convertUCSpath(char *target, const __le16 *source, int maxlen,
-                        const struct nls_table *codepage);
 extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
                        const struct nls_table *cp, int mapChars);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index bc09c998631..d06260251c3 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1,7 +1,7 @@
 /*
 *   fs/cifs/cifssmb.c
 *
- *   Copyright (C) International Business Machines  Corp., 2002,2008
+ *   Copyright (C) International Business Machines  Corp., 2002,2009
 *   Author(s): Steve French (sfrench@us.ibm.com)
 *
 *   Contains the routines for constructing the SMB PDUs themselves
@@ -81,41 +81,6 @@ static struct {
 #endif /* CONFIG_CIFS_WEAK_PW_HASH */
 #endif /* CIFS_POSIX */
-/* Allocates buffer into dst and copies smb string from src to it.
- * caller is responsible for freeing dst if function returned 0.
- * returns:
- *      on success - 0
- *      on failure - errno
- */
-static int
-cifs_strncpy_to_host(char **dst, const char *src, const int maxlen,
-                 const bool is_unicode, const struct nls_table *nls_codepage)
-{
-        int plen;
-        if (is_unicode) {
-                plen = UniStrnlen((wchar_t *)src, maxlen);
-                *dst = kmalloc(plen + 2, GFP_KERNEL);
-                if (!*dst)
-                        goto cifs_strncpy_to_host_ErrExit;
-                cifs_strfromUCS_le(*dst, (__le16 *)src, plen, nls_codepage);
-        } else {
-                plen = strnlen(src, maxlen);
-                *dst = kmalloc(plen + 2, GFP_KERNEL);
-                if (!*dst)
-                        goto cifs_strncpy_to_host_ErrExit;
-                strncpy(*dst, src, plen);
-        }
-        (*dst)[plen] = 0;
-        (*dst)[plen+1] = 0; /* harmless for ASCII case, needed for Unicode */
-        return 0;
-cifs_strncpy_to_host_ErrExit:
-        cERROR(1, ("Failed to allocate buffer for string\n"));
-        return -ENOMEM;
-}
 /* Mark as invalid, all open files on tree connections since they
   were closed when session to server was lost */
 static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
@@ -484,6 +449,14 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
                cFYI(1, ("Kerberos only mechanism, enable extended security"));
                pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
        }
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+        else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP)
+                pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
+        else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) {
+                cFYI(1, ("NTLMSSP only mechanism, enable extended security"));
+                pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
+        }
+#endif
        count = 0;
        for (i = 0; i < CIFS_NUM_PROT; i++) {
@@ -620,6 +593,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
                server->secType = NTLMv2;
        else if (secFlags & CIFSSEC_MAY_KRB5)
                server->secType = Kerberos;
+        else if (secFlags & CIFSSEC_MAY_NTLMSSP)
+                server->secType = NTLMSSP;
        else if (secFlags & CIFSSEC_MAY_LANMAN)
                server->secType = LANMAN;
 /* #ifdef CONFIG_CIFS_EXPERIMENTAL
@@ -1626,6 +1601,8 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
        int smb_hdr_len;
        int resp_buf_type = 0;
+        *nbytes = 0;
        cFYI(1, ("write2 at %lld %d bytes", (long long)offset, count));
        if (tcon->ses->capabilities & CAP_LARGE_FILES) {
@@ -1682,11 +1659,9 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
        cifs_stats_inc(&tcon->num_writes);
        if (rc) {
                cFYI(1, ("Send error Write2 = %d", rc));
-                *nbytes = 0;
        } else if (resp_buf_type == 0) {
                /* presumably this can not happen, but best to be safe */
                rc = -EIO;
-                *nbytes = 0;
        } else {
                WRITE_RSP *pSMBr = (WRITE_RSP *)iov[0].iov_base;
                *nbytes = le16_to_cpu(pSMBr->CountHigh);
@@ -2417,8 +2392,7 @@ winCreateHardLinkRetry:
 int
 CIFSSMBUnixQuerySymLink(const int xid, struct cifsTconInfo *tcon,
-                        const unsigned char *searchName,
+                        const unsigned char *searchName, char **symlinkinfo,
-                        char *symlinkinfo, const int buflen,
                        const struct nls_table *nls_codepage)
 {
 /* SMB_QUERY_FILE_UNIX_LINK */
@@ -2428,6 +2402,7 @@ CIFSSMBUnixQuerySymLink(const int xid, struct cifsTconInfo *tcon,
        int bytes_returned;
        int name_len;
        __u16 params, byte_count;
+        char *data_start;
        cFYI(1, ("In QPathSymLinkInfo (Unix) for path %s", searchName));
@@ -2482,30 +2457,26 @@ querySymLinkRetry:
                /* decode response */
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-                if (rc || (pSMBr->ByteCount < 2))
                /* BB also check enough total bytes returned */
-                        rc = -EIO;      /* bad smb */
+                if (rc || (pSMBr->ByteCount < 2))
+                        rc = -EIO;
                else {
-                        __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
+                        bool is_unicode;
-                        __u16 count = le16_to_cpu(pSMBr->t2.DataCount);
+                        u16 count = le16_to_cpu(pSMBr->t2.DataCount);
+                        data_start = ((char *) &pSMBr->hdr.Protocol) +
+                                           le16_to_cpu(pSMBr->t2.DataOffset);
+                        if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
+                                is_unicode = true;
+                        else
+                                is_unicode = false;
-                        if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) {
-                                name_len = UniStrnlen((wchar_t *) ((char *)
-                                        &pSMBr->hdr.Protocol + data_offset),
-                                        min_t(const int, buflen, count) / 2);
                        /* BB FIXME investigate remapping reserved chars here */
-                                cifs_strfromUCS_le(symlinkinfo,
+                        *symlinkinfo = cifs_strndup_from_ucs(data_start, count,
-                                        (__le16 *) ((char *)&pSMBr->hdr.Protocol
+                                                    is_unicode, nls_codepage);
-                                                        + data_offset),
+                        if (!*symlinkinfo)
-                                        name_len, nls_codepage);
+                                rc = -ENOMEM;
-                        } else {
-                                strncpy(symlinkinfo,
-                                        (char *) &pSMBr->hdr.Protocol +
-                                                data_offset,
-                                        min_t(const int, buflen, count));
-                        }
-                        symlinkinfo[buflen] = 0;
-        /* just in case so calling code does not go off the end of buffer */
                }
        }
        cifs_buf_release(pSMB);
@@ -2603,7 +2574,6 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
        *pparmlen = parm_count;
        return 0;
 }
-#endif /* CIFS_EXPERIMENTAL */
 int
 CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
@@ -2613,7 +2583,6 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
 {
        int rc = 0;
        int bytes_returned;
-        int name_len;
        struct smb_com_transaction_ioctl_req *pSMB;
        struct smb_com_transaction_ioctl_rsp *pSMBr;
@@ -2650,59 +2619,55 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
        } else {                /* decode response */
                __u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
                __u32 data_count = le32_to_cpu(pSMBr->DataCount);
-                if ((pSMBr->ByteCount < 2) || (data_offset > 512))
+                if ((pSMBr->ByteCount < 2) || (data_offset > 512)) {
                /* BB also check enough total bytes returned */
                        rc = -EIO;      /* bad smb */
-                else {
+                        goto qreparse_out;
-                        if (data_count && (data_count < 2048)) {
+                }
-                                char *end_of_smb = 2 /* sizeof byte count */ +
+                if (data_count && (data_count < 2048)) {
-                                                pSMBr->ByteCount +
+                        char *end_of_smb = 2 /* sizeof byte count */ +
-                                                (char *)&pSMBr->ByteCount;
+                                pSMBr->ByteCount + (char *)&pSMBr->ByteCount;
-                                struct reparse_data *reparse_buf =
+                        struct reparse_data *reparse_buf =
                                                (struct reparse_data *)
                                                ((char *)&pSMBr->hdr.Protocol
                                                                 + data_offset);
-                                if ((char *)reparse_buf >= end_of_smb) {
+                        if ((char *)reparse_buf >= end_of_smb) {
-                                        rc = -EIO;
+                                rc = -EIO;
-                                        goto qreparse_out;
+                                goto qreparse_out;
-                                }
+                        }
-                                if ((reparse_buf->LinkNamesBuf +
+                        if ((reparse_buf->LinkNamesBuf +
-                                        reparse_buf->TargetNameOffset +
+                                reparse_buf->TargetNameOffset +
-                                        reparse_buf->TargetNameLen) >
+                                reparse_buf->TargetNameLen) > end_of_smb) {
-                                                end_of_smb) {
+                                cFYI(1, ("reparse buf beyond SMB"));
-                                        cFYI(1, ("reparse buf beyond SMB"));
+                                rc = -EIO;
-                                        rc = -EIO;
+                                goto qreparse_out;
-                                        goto qreparse_out;
+                        }
-                                }
-                                if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) {
+                        if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) {
-                                        name_len = UniStrnlen((wchar_t *)
+                                cifs_from_ucs2(symlinkinfo, (__le16 *)
                                                (reparse_buf->LinkNamesBuf +
                                                reparse_buf->TargetNameOffset),
-                                                min(buflen/2,
+                                                buflen,
-                                                reparse_buf->TargetNameLen / 2));
+                                                reparse_buf->TargetNameLen,
-                                        cifs_strfromUCS_le(symlinkinfo,
+                                                nls_codepage, 0);
-                                                (__le16 *) (reparse_buf->LinkNamesBuf +
+                        } else { /* ASCII names */
-                                                reparse_buf->TargetNameOffset),
+                                strncpy(symlinkinfo,
-                                                name_len, nls_codepage);
+                                        reparse_buf->LinkNamesBuf +
-                                } else { /* ASCII names */
+                                        reparse_buf->TargetNameOffset,
-                                        strncpy(symlinkinfo,
+                                        min_t(const int, buflen,
-                                                reparse_buf->LinkNamesBuf +
+                                           reparse_buf->TargetNameLen));
-                                                reparse_buf->TargetNameOffset,
-                                                min_t(const int, buflen,
-                                                   reparse_buf->TargetNameLen));
-                                }
-                        } else {
-                                rc = -EIO;
-                                cFYI(1, ("Invalid return data count on "
-                                         "get reparse info ioctl"));
                        }
-                        symlinkinfo[buflen] = 0; /* just in case so the caller
+                } else {
-                                        does not go off the end of the buffer */
+                        rc = -EIO;
-                        cFYI(1, ("readlink result - %s", symlinkinfo));
+                        cFYI(1, ("Invalid return data count on "
+                                 "get reparse info ioctl"));
                }
+                symlinkinfo[buflen] = 0; /* just in case so the caller
+                                        does not go off the end of the buffer */
+                cFYI(1, ("readlink result - %s", symlinkinfo));
        }
 qreparse_out:
        cifs_buf_release(pSMB);
@@ -2711,6 +2676,7 @@ qreparse_out:
        return rc;
 }
+#endif /* CIFS_EXPERIMENTAL */
 #ifdef CONFIG_CIFS_POSIX
@@ -3918,7 +3884,7 @@ GetInodeNumberRetry:
                        }
                        pfinfo = (struct file_internal_info *)
                                (data_offset + (char *) &pSMBr->hdr.Protocol);
-                        *inode_number = pfinfo->UniqueId;
+                        *inode_number = le64_to_cpu(pfinfo->UniqueId);
                }
        }
 GetInodeNumOut:
@@ -3928,27 +3894,6 @@ GetInodeNumOut:
        return rc;
 }
-/* computes length of UCS string converted to host codepage
- * @src:        UCS string
- * @maxlen:     length of the input string in UCS characters
- *              (not in bytes)
- *
- * return:      size of input string in host codepage
- */
-static int hostlen_fromUCS(const __le16 *src, const int maxlen,
-                const struct nls_table *nls_codepage) {
-        int i;
-        int hostlen = 0;
-        char to[4];
-        int charlen;
-        for (i = 0; (i < maxlen) && src[i]; ++i) {
-                charlen = nls_codepage->uni2char(le16_to_cpu(src[i]),
-                                to, NLS_MAX_CHARSET_SIZE);
-                hostlen += charlen > 0 ? charlen : 1;
-        }
-        return hostlen;
-}
 /* parses DFS refferal V3 structure
 * caller is responsible for freeing target_nodes
 * returns:
@@ -3994,7 +3939,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
        cFYI(1, ("num_referrals: %d dfs flags: 0x%x ... \n",
                        *num_of_nodes,
-                        le16_to_cpu(pSMBr->DFSFlags)));
+                        le32_to_cpu(pSMBr->DFSFlags)));
        *target_nodes = kzalloc(sizeof(struct dfs_info3_param) *
                        *num_of_nodes, GFP_KERNEL);
@@ -4010,14 +3955,14 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
                int max_len;
                struct dfs_info3_param *node = (*target_nodes)+i;
-                node->flags = le16_to_cpu(pSMBr->DFSFlags);
+                node->flags = le32_to_cpu(pSMBr->DFSFlags);
                if (is_unicode) {
                        __le16 *tmp = kmalloc(strlen(searchName)*2 + 2,
                                                GFP_KERNEL);
                        cifsConvertToUCS((__le16 *) tmp, searchName,
                                        PATH_MAX, nls_codepage, remap);
-                        node->path_consumed = hostlen_fromUCS(tmp,
+                        node->path_consumed = cifs_ucs2_bytes(tmp,
-                                        le16_to_cpu(pSMBr->PathConsumed)/2,
+                                        le16_to_cpu(pSMBr->PathConsumed),
                                        nls_codepage);
                        kfree(tmp);
                } else
@@ -4029,20 +3974,20 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
                /* copy DfsPath */
                temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset);
                max_len = data_end - temp;
-                rc = cifs_strncpy_to_host(&(node->path_name), temp,
+                node->path_name = cifs_strndup_from_ucs(temp, max_len,
-                                        max_len, is_unicode, nls_codepage);
+                                                      is_unicode, nls_codepage);
-                if (rc)
+                if (!node->path_name) {
+                        rc = -ENOMEM;
                        goto parse_DFS_referrals_exit;
+                }
                /* copy link target UNC */
                temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset);
                max_len = data_end - temp;
-                rc = cifs_strncpy_to_host(&(node->node_name), temp,
+                node->node_name = cifs_strndup_from_ucs(temp, max_len,
-                                        max_len, is_unicode, nls_codepage);
+                                                      is_unicode, nls_codepage);
-                if (rc)
+                if (!node->node_name)
-                        goto parse_DFS_referrals_exit;
+                        rc = -ENOMEM;
-                ref += le16_to_cpu(ref->Size);
        }
 parse_DFS_referrals_exit:
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0de3b5615a2..4aa81a507b7 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1,7 +1,7 @@
 /*
 *   fs/cifs/connect.c
 *
- *   Copyright (C) International Business Machines  Corp., 2002,2008
+ *   Copyright (C) International Business Machines  Corp., 2002,2009
 *   Author(s): Steve French (sfrench@us.ibm.com)
 *
 *   This library is free software; you can redistribute it and/or modify
@@ -32,6 +32,7 @@
 #include <linux/kthread.h>
 #include <linux/pagevec.h>
 #include <linux/freezer.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <net/ipv6.h>
@@ -978,6 +979,13 @@ cifs_parse_mount_options(char *options, const char *devname,
                                return 1;
                        } else if (strnicmp(value, "krb5", 4) == 0) {
                                vol->secFlg |= CIFSSEC_MAY_KRB5;
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+                        } else if (strnicmp(value, "ntlmsspi", 8) == 0) {
+                                vol->secFlg |= CIFSSEC_MAY_NTLMSSP |
+                                        CIFSSEC_MUST_SIGN;
+                        } else if (strnicmp(value, "ntlmssp", 7) == 0) {
+                                vol->secFlg |= CIFSSEC_MAY_NTLMSSP;
+#endif
                        } else if (strnicmp(value, "ntlmv2i", 7) == 0) {
                                vol->secFlg |= CIFSSEC_MAY_NTLMV2 |
                                        CIFSSEC_MUST_SIGN;
@@ -2214,9 +2222,58 @@ is_path_accessible(int xid, struct cifsTconInfo *tcon,
        return rc;
 }
+static void
+cleanup_volume_info(struct smb_vol **pvolume_info)
+{
+        struct smb_vol *volume_info;
+        if (!pvolume_info && !*pvolume_info)
+                return;
+        volume_info = *pvolume_info;
+        kzfree(volume_info->password);
+        kfree(volume_info->UNC);
+        kfree(volume_info->prepath);
+        kfree(volume_info);
+        *pvolume_info = NULL;
+        return;
+}
+#ifdef CONFIG_CIFS_DFS_UPCALL
+/* build_path_to_root returns full path to root when
+ * we do not have an exiting connection (tcon) */
+static char *
+build_unc_path_to_root(const struct smb_vol *volume_info,
+                const struct cifs_sb_info *cifs_sb)
+{
+        char *full_path;
+        int unc_len = strnlen(volume_info->UNC, MAX_TREE_SIZE + 1);
+        full_path = kmalloc(unc_len + cifs_sb->prepathlen + 1, GFP_KERNEL);
+        if (full_path == NULL)
+                return ERR_PTR(-ENOMEM);
+        strncpy(full_path, volume_info->UNC, unc_len);
+        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) {
+                int i;
+                for (i = 0; i < unc_len; i++) {
+                        if (full_path[i] == '\\')
+                                full_path[i] = '/';
+                }
+        }
+        if (cifs_sb->prepathlen)
+                strncpy(full_path + unc_len, cifs_sb->prepath,
+                                cifs_sb->prepathlen);
+        full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */
+        return full_path;
+}
+#endif
 int
 cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
-           char *mount_data, const char *devname)
+                char *mount_data_global, const char *devname)
 {
        int rc = 0;
        int xid;
@@ -2225,6 +2282,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
        struct cifsTconInfo *tcon = NULL;
        struct TCP_Server_Info *srvTcp = NULL;
        char   *full_path;
+        char *mount_data = mount_data_global;
+#ifdef CONFIG_CIFS_DFS_UPCALL
+        struct dfs_info3_param *referrals = NULL;
+        unsigned int num_referrals = 0;
+        int referral_walks_count = 0;
+try_mount_again:
+#endif
+        full_path = NULL;
        xid = GetXid();
@@ -2371,11 +2436,9 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                                }
                        }
-                        /* check for null share name ie connect to dfs root */
                        if ((strchr(volume_info->UNC + 3, '\\') == NULL)
                            && (strchr(volume_info->UNC + 3, '/') == NULL)) {
-                                /* rc = connect_to_dfs_path(...) */
+                                cERROR(1, ("Missing share name"));
-                                cFYI(1, ("DFS root not supported"));
                                rc = -ENODEV;
                                goto mount_fail_check;
                        } else {
@@ -2392,7 +2455,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                                }
                        }
                        if (rc)
-                                goto mount_fail_check;
+                                goto remote_path_check;
                        tcon->seal = volume_info->seal;
                        write_lock(&cifs_tcp_ses_lock);
                        list_add(&tcon->tcon_list, &pSesInfo->tcon_list);
@@ -2417,19 +2480,9 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
        /* BB FIXME fix time_gran to be larger for LANMAN sessions */
        sb->s_time_gran = 100;
-mount_fail_check:
+        if (rc)
-        /* on error free sesinfo and tcon struct if needed */
+                goto remote_path_check;
-        if (rc) {
-                /* If find_unc succeeded then rc == 0 so we can not end */
-                /* up accidently freeing someone elses tcon struct */
-                if (tcon)
-                        cifs_put_tcon(tcon);
-                else if (pSesInfo)
-                        cifs_put_smb_ses(pSesInfo);
-                else
-                        cifs_put_tcp_session(srvTcp);
-                goto out;
-        }
        cifs_sb->tcon = tcon;
        /* do not care if following two calls succeed - informational */
@@ -2461,7 +2514,9 @@ mount_fail_check:
                cifs_sb->rsize = min(cifs_sb->rsize,
                               (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
-        if (!rc && cifs_sb->prepathlen) {
+remote_path_check:
+        /* check if a whole path (including prepath) is not remote */
+        if (!rc && cifs_sb->prepathlen && tcon) {
                /* build_path_to_root works only when we have a valid tcon */
                full_path = cifs_build_path_to_root(cifs_sb);
                if (full_path == NULL) {
@@ -2469,1079 +2524,91 @@ mount_fail_check:
                        goto mount_fail_check;
                }
                rc = is_path_accessible(xid, tcon, cifs_sb, full_path);
-                if (rc) {
+                if (rc != -EREMOTE) {
-                        cERROR(1, ("Path %s in not accessible: %d",
-                                                full_path, rc));
                        kfree(full_path);
                        goto mount_fail_check;
                }
                kfree(full_path);
        }
-        /* volume_info->password is freed above when existing session found
+        /* get referral if needed */
-        (in which case it is not needed anymore) but when new sesion is created
+        if (rc == -EREMOTE) {
-        the password ptr is put in the new session structure (in which case the
+#ifdef CONFIG_CIFS_DFS_UPCALL
-        password will be freed at unmount time) */
+                if (referral_walks_count > MAX_NESTED_LINKS) {
-out:
+                        /*
-        /* zero out password before freeing */
+                         * BB: when we implement proper loop detection,
-        if (volume_info) {
+                         *     we will remove this check. But now we need it
-                if (volume_info->password != NULL) {
+                         *     to prevent an indefinite loop if 'DFS tree' is
-                        memset(volume_info->password, 0,
+                         *     misconfigured (i.e. has loops).
-                                strlen(volume_info->password));
+                         */
-                        kfree(volume_info->password);
+                        rc = -ELOOP;
-                }
+                        goto mount_fail_check;
-                kfree(volume_info->UNC);
-                kfree(volume_info->prepath);
-                kfree(volume_info);
-        }
-        FreeXid(xid);
-        return rc;
-}
-static int
-CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
-              char session_key[CIFS_SESS_KEY_SIZE],
-              const struct nls_table *nls_codepage)
-{
-        struct smb_hdr *smb_buffer;
-        struct smb_hdr *smb_buffer_response;
-        SESSION_SETUP_ANDX *pSMB;
-        SESSION_SETUP_ANDX *pSMBr;
-        char *bcc_ptr;
-        char *user;
-        char *domain;
-        int rc = 0;
-        int remaining_words = 0;
-        int bytes_returned = 0;
-        int len;
-        __u32 capabilities;
-        __u16 count;
-        cFYI(1, ("In sesssetup"));
-        if (ses == NULL)
-                return -EINVAL;
-        user = ses->userName;
-        domain = ses->domainName;
-        smb_buffer = cifs_buf_get();
-        if (smb_buffer == NULL)
-                return -ENOMEM;
-        smb_buffer_response = smb_buffer;
-        pSMBr = pSMB = (SESSION_SETUP_ANDX *) smb_buffer;
-        /* send SMBsessionSetup here */
-        header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
-                        NULL /* no tCon exists yet */ , 13 /* wct */ );
-        smb_buffer->Mid = GetNextMid(ses->server);
-        pSMB->req_no_secext.AndXCommand = 0xFF;
-        pSMB->req_no_secext.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
-        pSMB->req_no_secext.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
-        if (ses->server->secMode &
-                        (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
-                smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
-        capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
-                CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
-        if (ses->capabilities & CAP_UNICODE) {
-                smb_buffer->Flags2 |= SMBFLG2_UNICODE;
-                capabilities |= CAP_UNICODE;
-        }
-        if (ses->capabilities & CAP_STATUS32) {
-                smb_buffer->Flags2 |= SMBFLG2_ERR_STATUS;
-                capabilities |= CAP_STATUS32;
-        }
-        if (ses->capabilities & CAP_DFS) {
-                smb_buffer->Flags2 |= SMBFLG2_DFS;
-                capabilities |= CAP_DFS;
-        }
-        pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
-        pSMB->req_no_secext.CaseInsensitivePasswordLength =
-                cpu_to_le16(CIFS_SESS_KEY_SIZE);
-        pSMB->req_no_secext.CaseSensitivePasswordLength =
-            cpu_to_le16(CIFS_SESS_KEY_SIZE);
-        bcc_ptr = pByteArea(smb_buffer);
-        memcpy(bcc_ptr, (char *) session_key, CIFS_SESS_KEY_SIZE);
-        bcc_ptr += CIFS_SESS_KEY_SIZE;
-        memcpy(bcc_ptr, (char *) session_key, CIFS_SESS_KEY_SIZE);
-        bcc_ptr += CIFS_SESS_KEY_SIZE;
-        if (ses->capabilities & CAP_UNICODE) {
-                if ((long) bcc_ptr % 2) { /* must be word aligned for Unicode */
-                        *bcc_ptr = 0;
-                        bcc_ptr++;
-                }
-                if (user == NULL)
-                        bytes_returned = 0; /* skip null user */
-                else
-                        bytes_returned =
-                                cifs_strtoUCS((__le16 *) bcc_ptr, user, 100,
-                                        nls_codepage);
-                /* convert number of 16 bit words to bytes */
-                bcc_ptr += 2 * bytes_returned;
-                bcc_ptr += 2;   /* trailing null */
-                if (domain == NULL)
-                        bytes_returned =
-                            cifs_strtoUCS((__le16 *) bcc_ptr,
-                                          "CIFS_LINUX_DOM", 32, nls_codepage);
-                else
-                        bytes_returned =
-                            cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
-                                          nls_codepage);
-                bcc_ptr += 2 * bytes_returned;
-                bcc_ptr += 2;
-                bytes_returned =
-                    cifs_strtoUCS((__le16 *) bcc_ptr, "Linux version ",
-                                  32, nls_codepage);
-                bcc_ptr += 2 * bytes_returned;
-                bytes_returned =
-                    cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release,
-                                  32, nls_codepage);
-                bcc_ptr += 2 * bytes_returned;
-                bcc_ptr += 2;
-                bytes_returned =
-                    cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
-                                  64, nls_codepage);
-                bcc_ptr += 2 * bytes_returned;
-                bcc_ptr += 2;
-        } else {
-                if (user != NULL) {
-                    strncpy(bcc_ptr, user, 200);
-                    bcc_ptr += strnlen(user, 200);
-                }
-                *bcc_ptr = 0;
-                bcc_ptr++;
-                if (domain == NULL) {
-                        strcpy(bcc_ptr, "CIFS_LINUX_DOM");
-                        bcc_ptr += strlen("CIFS_LINUX_DOM") + 1;
-                } else {
-                        strncpy(bcc_ptr, domain, 64);
-                        bcc_ptr += strnlen(domain, 64);
-                        *bcc_ptr = 0;
-                        bcc_ptr++;
-                }
-                strcpy(bcc_ptr, "Linux version ");
-                bcc_ptr += strlen("Linux version ");
-                strcpy(bcc_ptr, utsname()->release);
-                bcc_ptr += strlen(utsname()->release) + 1;
-                strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
-                bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
-        }
-        count = (long) bcc_ptr - (long) pByteArea(smb_buffer);
-        smb_buffer->smb_buf_length += count;
-        pSMB->req_no_secext.ByteCount = cpu_to_le16(count);
-        rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
-                         &bytes_returned, CIFS_LONG_OP);
-        if (rc) {
-/* rc = map_smb_to_linux_error(smb_buffer_response); now done in SendReceive */
-        } else if ((smb_buffer_response->WordCount == 3)
-                   || (smb_buffer_response->WordCount == 4)) {
-                __u16 action = le16_to_cpu(pSMBr->resp.Action);
-                __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength);
-                if (action & GUEST_LOGIN)
-                        cFYI(1, ("Guest login")); /* BB mark SesInfo struct? */
-                ses->Suid = smb_buffer_response->Uid; /* UID left in wire format
-                                                         (little endian) */
-                cFYI(1, ("UID = %d ", ses->Suid));
-        /* response can have either 3 or 4 word count - Samba sends 3 */
-                bcc_ptr = pByteArea(smb_buffer_response);
-                if ((pSMBr->resp.hdr.WordCount == 3)
-                    || ((pSMBr->resp.hdr.WordCount == 4)
-                        && (blob_len < pSMBr->resp.ByteCount))) {
-                        if (pSMBr->resp.hdr.WordCount == 4)
-                                bcc_ptr += blob_len;
-                        if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
-                                if ((long) (bcc_ptr) % 2) {
-                                        remaining_words =
-                                            (BCC(smb_buffer_response) - 1) / 2;
-                                        /* Unicode strings must be word
-                                           aligned */
-                                        bcc_ptr++;
-                                } else {
-                                        remaining_words =
-                                                BCC(smb_buffer_response) / 2;
-                                }
-                                len =
-                                    UniStrnlen((wchar_t *) bcc_ptr,
-                                               remaining_words - 1);
-/* We look for obvious messed up bcc or strings in response so we do not go off
-   the end since (at least) WIN2K and Windows XP have a major bug in not null
-   terminating last Unicode string in response  */
-                                if (ses->serverOS)
-                                        kfree(ses->serverOS);
-                                ses->serverOS = kzalloc(2 * (len + 1),
-                                                        GFP_KERNEL);
-                                if (ses->serverOS == NULL)
-                                        goto sesssetup_nomem;
-                                cifs_strfromUCS_le(ses->serverOS,
-                                                   (__le16 *)bcc_ptr,
-                                                   len, nls_codepage);
-                                bcc_ptr += 2 * (len + 1);
-                                remaining_words -= len + 1;
-                                ses->serverOS[2 * len] = 0;
-                                ses->serverOS[1 + (2 * len)] = 0;
-                                if (remaining_words > 0) {
-                                        len = UniStrnlen((wchar_t *)bcc_ptr,
-                                                         remaining_words-1);
-                                        kfree(ses->serverNOS);
-                                        ses->serverNOS = kzalloc(2 * (len + 1),
-                                                                 GFP_KERNEL);
-                                        if (ses->serverNOS == NULL)
-                                                goto sesssetup_nomem;
-                                        cifs_strfromUCS_le(ses->serverNOS,
-                                                           (__le16 *)bcc_ptr,
-                                                           len, nls_codepage);
-                                        bcc_ptr += 2 * (len + 1);
-                                        ses->serverNOS[2 * len] = 0;
-                                        ses->serverNOS[1 + (2 * len)] = 0;
-                                        if (strncmp(ses->serverNOS,
-                                                "NT LAN Manager 4", 16) == 0) {
-                                                cFYI(1, ("NT4 server"));
-                                                ses->flags |= CIFS_SES_NT4;
-                                        }
-                                        remaining_words -= len + 1;
-                                        if (remaining_words > 0) {
-                                                len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
-                                /* last string is not always null terminated
-                                   (for e.g. for Windows XP & 2000) */
-                                                if (ses->serverDomain)
-                                                        kfree(ses->serverDomain);
-                                                ses->serverDomain =
-                                                    kzalloc(2*(len+1),
-                                                            GFP_KERNEL);
-                                                if (ses->serverDomain == NULL)
-                                                        goto sesssetup_nomem;
-                                                cifs_strfromUCS_le(ses->serverDomain,
-                                                        (__le16 *)bcc_ptr,
-                                                        len, nls_codepage);
-                                                bcc_ptr += 2 * (len + 1);
-                                                ses->serverDomain[2*len] = 0;
-                                                ses->serverDomain[1+(2*len)] = 0;
-                                        } else { /* else no more room so create
-                                                  dummy domain string */
-                                                if (ses->serverDomain)
-                                                        kfree(ses->serverDomain);
-                                                ses->serverDomain =
-                                                        kzalloc(2, GFP_KERNEL);
-                                        }
-                                } else { /* no room so create dummy domain
-                                            and NOS string */
-                                        /* if these kcallocs fail not much we
-                                           can do, but better to not fail the
-                                           sesssetup itself */
-                                        kfree(ses->serverDomain);
-                                        ses->serverDomain =
-                                            kzalloc(2, GFP_KERNEL);
-                                        kfree(ses->serverNOS);
-                                        ses->serverNOS =
-                                            kzalloc(2, GFP_KERNEL);
-                                }
-                        } else {        /* ASCII */
-                                len = strnlen(bcc_ptr, 1024);
-                                if (((long) bcc_ptr + len) - (long)
-                                    pByteArea(smb_buffer_response)
-                                            <= BCC(smb_buffer_response)) {
-                                        kfree(ses->serverOS);
-                                        ses->serverOS = kzalloc(len + 1,
-                                                                GFP_KERNEL);
-                                        if (ses->serverOS == NULL)
-                                                goto sesssetup_nomem;
-                                        strncpy(ses->serverOS, bcc_ptr, len);
-                                        bcc_ptr += len;
-                                        /* null terminate the string */
-                                        bcc_ptr[0] = 0;
-                                        bcc_ptr++;
-                                        len = strnlen(bcc_ptr, 1024);
-                                        kfree(ses->serverNOS);
-                                        ses->serverNOS = kzalloc(len + 1,
-                                                                 GFP_KERNEL);
-                                        if (ses->serverNOS == NULL)
-                                                goto sesssetup_nomem;
-                                        strncpy(ses->serverNOS, bcc_ptr, len);
-                                        bcc_ptr += len;
-                                        bcc_ptr[0] = 0;
-                                        bcc_ptr++;
-                                        len = strnlen(bcc_ptr, 1024);
-                                        if (ses->serverDomain)
-                                                kfree(ses->serverDomain);
-                                        ses->serverDomain = kzalloc(len + 1,
-                                                                    GFP_KERNEL);
-                                        if (ses->serverDomain == NULL)
-                                                goto sesssetup_nomem;
-                                        strncpy(ses->serverDomain, bcc_ptr,
-                                                len);
-                                        bcc_ptr += len;
-                                        bcc_ptr[0] = 0;
-                                        bcc_ptr++;
-                                } else
-                                        cFYI(1,
-                                             ("Variable field of length %d "
-                                                "extends beyond end of smb ",
-                                              len));
-                        }
-                } else {
-                        cERROR(1, ("Security Blob Length extends beyond "
-                                "end of SMB"));
                }
-        } else {
+                /* convert forward to back slashes in prepath here if needed */
-                cERROR(1, ("Invalid Word count %d: ",
+                if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
-                        smb_buffer_response->WordCount));
+                        convert_delimiter(cifs_sb->prepath,
-                rc = -EIO;
+                                        CIFS_DIR_SEP(cifs_sb));
-        }
+                full_path = build_unc_path_to_root(volume_info, cifs_sb);
-sesssetup_nomem:        /* do not return an error on nomem for the info strings,
+                if (IS_ERR(full_path)) {
-                           since that could make reconnection harder, and
+                        rc = PTR_ERR(full_path);
-                           reconnection might be needed to free memory */
+                        goto mount_fail_check;
-        cifs_buf_release(smb_buffer);
-        return rc;
-}
-static int
-CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
-                              struct cifsSesInfo *ses, bool *pNTLMv2_flag,
-                              const struct nls_table *nls_codepage)
-{
-        struct smb_hdr *smb_buffer;
-        struct smb_hdr *smb_buffer_response;
-        SESSION_SETUP_ANDX *pSMB;
-        SESSION_SETUP_ANDX *pSMBr;
-        char *bcc_ptr;
-        char *domain;
-        int rc = 0;
-        int remaining_words = 0;
-        int bytes_returned = 0;
-        int len;
-        int SecurityBlobLength = sizeof(NEGOTIATE_MESSAGE);
-        PNEGOTIATE_MESSAGE SecurityBlob;
-        PCHALLENGE_MESSAGE SecurityBlob2;
-        __u32 negotiate_flags, capabilities;
-        __u16 count;
-        cFYI(1, ("In NTLMSSP sesssetup (negotiate)"));
-        if (ses == NULL)
-                return -EINVAL;
-        domain = ses->domainName;
-        *pNTLMv2_flag = false;
-        smb_buffer = cifs_buf_get();
-        if (smb_buffer == NULL) {
-                return -ENOMEM;
-        }
-        smb_buffer_response = smb_buffer;
-        pSMB = (SESSION_SETUP_ANDX *) smb_buffer;
-        pSMBr = (SESSION_SETUP_ANDX *) smb_buffer_response;
-        /* send SMBsessionSetup here */
-        header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
-                        NULL /* no tCon exists yet */ , 12 /* wct */ );
-        smb_buffer->Mid = GetNextMid(ses->server);
-        pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
-        pSMB->req.hdr.Flags |= (SMBFLG_CASELESS | SMBFLG_CANONICAL_PATH_FORMAT);
-        pSMB->req.AndXCommand = 0xFF;
-        pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
-        pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
-        if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
-                smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
-        capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
-            CAP_EXTENDED_SECURITY;
-        if (ses->capabilities & CAP_UNICODE) {
-                smb_buffer->Flags2 |= SMBFLG2_UNICODE;
-                capabilities |= CAP_UNICODE;
-        }
-        if (ses->capabilities & CAP_STATUS32) {
-                smb_buffer->Flags2 |= SMBFLG2_ERR_STATUS;
-                capabilities |= CAP_STATUS32;
-        }
-        if (ses->capabilities & CAP_DFS) {
-                smb_buffer->Flags2 |= SMBFLG2_DFS;
-                capabilities |= CAP_DFS;
-        }
-        pSMB->req.Capabilities = cpu_to_le32(capabilities);
-        bcc_ptr = (char *) &pSMB->req.SecurityBlob;
-        SecurityBlob = (PNEGOTIATE_MESSAGE) bcc_ptr;
-        strncpy(SecurityBlob->Signature, NTLMSSP_SIGNATURE, 8);
-        SecurityBlob->MessageType = NtLmNegotiate;
-        negotiate_flags =
-            NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_NEGOTIATE_OEM |
-            NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_NTLM |
-            NTLMSSP_NEGOTIATE_56 |
-            /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN | */ NTLMSSP_NEGOTIATE_128;
-        if (sign_CIFS_PDUs)
-                negotiate_flags |= NTLMSSP_NEGOTIATE_SIGN;
-/*      if (ntlmv2_support)
-                negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2;*/
-        /* setup pointers to domain name and workstation name */
-        bcc_ptr += SecurityBlobLength;
-        SecurityBlob->WorkstationName.Buffer = 0;
-        SecurityBlob->WorkstationName.Length = 0;
-        SecurityBlob->WorkstationName.MaximumLength = 0;
-        /* Domain not sent on first Sesssetup in NTLMSSP, instead it is sent
-        along with username on auth request (ie the response to challenge) */
-        SecurityBlob->DomainName.Buffer = 0;
-        SecurityBlob->DomainName.Length = 0;
-        SecurityBlob->DomainName.MaximumLength = 0;
-        if (ses->capabilities & CAP_UNICODE) {
-                if ((long) bcc_ptr % 2) {
-                        *bcc_ptr = 0;
-                        bcc_ptr++;
                }
-                bytes_returned =
+                cFYI(1, ("Getting referral for: %s", full_path));
-                    cifs_strtoUCS((__le16 *) bcc_ptr, "Linux version ",
+                rc = get_dfs_path(xid, pSesInfo , full_path + 1,
-                                  32, nls_codepage);
+                        cifs_sb->local_nls, &num_referrals, &referrals,
-                bcc_ptr += 2 * bytes_returned;
+                        cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
-                bytes_returned =
+                if (!rc && num_referrals > 0) {
-                    cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32,
+                        char *fake_devname = NULL;
-                                  nls_codepage);
-                bcc_ptr += 2 * bytes_returned;
+                        if (mount_data != mount_data_global)
-                bcc_ptr += 2;   /* null terminate Linux version */
+                                kfree(mount_data);
-                bytes_returned =
+                        mount_data = cifs_compose_mount_options(
-                    cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
+                                        cifs_sb->mountdata, full_path + 1,
-                                  64, nls_codepage);
+                                        referrals, &fake_devname);
-                bcc_ptr += 2 * bytes_returned;
+                        kfree(fake_devname);
-                *(bcc_ptr + 1) = 0;
+                        free_dfs_info_array(referrals, num_referrals);
-                *(bcc_ptr + 2) = 0;
-                bcc_ptr += 2;   /* null terminate network opsys string */
+                        if (tcon)
-                *(bcc_ptr + 1) = 0;
+                                cifs_put_tcon(tcon);
-                *(bcc_ptr + 2) = 0;
+                        else if (pSesInfo)
-                bcc_ptr += 2;   /* null domain */
+                                cifs_put_smb_ses(pSesInfo);
-        } else {                /* ASCII */
-                strcpy(bcc_ptr, "Linux version ");
+                        cleanup_volume_info(&volume_info);
-                bcc_ptr += strlen("Linux version ");
+                        FreeXid(xid);
-                strcpy(bcc_ptr, utsname()->release);
+                        kfree(full_path);
-                bcc_ptr += strlen(utsname()->release) + 1;
+                        referral_walks_count++;
-                strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
+                        goto try_mount_again;
-                bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
-                bcc_ptr++;      /* empty domain field */
-                *bcc_ptr = 0;
-        }
-        SecurityBlob->NegotiateFlags = cpu_to_le32(negotiate_flags);
-        pSMB->req.SecurityBlobLength = cpu_to_le16(SecurityBlobLength);
-        count = (long) bcc_ptr - (long) pByteArea(smb_buffer);
-        smb_buffer->smb_buf_length += count;
-        pSMB->req.ByteCount = cpu_to_le16(count);
-        rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
-                         &bytes_returned, CIFS_LONG_OP);
-        if (smb_buffer_response->Status.CifsError ==
-            cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))
-                rc = 0;
-        if (rc) {
-/*    rc = map_smb_to_linux_error(smb_buffer_response);  *//* done in SendReceive now */
-        } else if ((smb_buffer_response->WordCount == 3)
-                   || (smb_buffer_response->WordCount == 4)) {
-                __u16 action = le16_to_cpu(pSMBr->resp.Action);
-                __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength);
-                if (action & GUEST_LOGIN)
-                        cFYI(1, ("Guest login"));
-        /* Do we want to set anything in SesInfo struct when guest login? */
-                bcc_ptr = pByteArea(smb_buffer_response);
-        /* response can have either 3 or 4 word count - Samba sends 3 */
-                SecurityBlob2 = (PCHALLENGE_MESSAGE) bcc_ptr;
-                if (SecurityBlob2->MessageType != NtLmChallenge) {
-                        cFYI(1, ("Unexpected NTLMSSP message type received %d",
-                              SecurityBlob2->MessageType));
-                } else if (ses) {
-                        ses->Suid = smb_buffer_response->Uid; /* UID left in le format */
-                        cFYI(1, ("UID = %d", ses->Suid));
-                        if ((pSMBr->resp.hdr.WordCount == 3)
-                            || ((pSMBr->resp.hdr.WordCount == 4)
-                                && (blob_len <
-                                    pSMBr->resp.ByteCount))) {
-                                if (pSMBr->resp.hdr.WordCount == 4) {
-                                        bcc_ptr += blob_len;
-                                        cFYI(1, ("Security Blob Length %d",
-                                              blob_len));
-                                }
-                                cFYI(1, ("NTLMSSP Challenge rcvd"));
-                                memcpy(ses->server->cryptKey,
-                                       SecurityBlob2->Challenge,
-                                       CIFS_CRYPTO_KEY_SIZE);
-                                if (SecurityBlob2->NegotiateFlags &
-                                        cpu_to_le32(NTLMSSP_NEGOTIATE_NTLMV2))
-                                        *pNTLMv2_flag = true;
-                                if ((SecurityBlob2->NegotiateFlags &
-                                        cpu_to_le32(NTLMSSP_NEGOTIATE_ALWAYS_SIGN))
-                                        || (sign_CIFS_PDUs > 1))
-                                                ses->server->secMode |=
-                                                        SECMODE_SIGN_REQUIRED;
-                                if ((SecurityBlob2->NegotiateFlags &
-                                        cpu_to_le32(NTLMSSP_NEGOTIATE_SIGN)) && (sign_CIFS_PDUs))
-                                                ses->server->secMode |=
-                                                        SECMODE_SIGN_ENABLED;
-                                if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
-                                        if ((long) (bcc_ptr) % 2) {
-                                                remaining_words =
-                                                    (BCC(smb_buffer_response)
-                                                     - 1) / 2;
-                                         /* Must word align unicode strings */
-                                                bcc_ptr++;
-                                        } else {
-                                                remaining_words =
-                                                    BCC
-                                                    (smb_buffer_response) / 2;
-                                        }
-                                        len =
-                                            UniStrnlen((wchar_t *) bcc_ptr,
-                                                       remaining_words - 1);
-/* We look for obvious messed up bcc or strings in response so we do not go off
-   the end since (at least) WIN2K and Windows XP have a major bug in not null
-   terminating last Unicode string in response  */
-                                        if (ses->serverOS)
-                                                kfree(ses->serverOS);
-                                        ses->serverOS =
-                                            kzalloc(2 * (len + 1), GFP_KERNEL);
-                                        cifs_strfromUCS_le(ses->serverOS,
-                                                           (__le16 *)
-                                                           bcc_ptr, len,
-                                                           nls_codepage);
-                                        bcc_ptr += 2 * (len + 1);
-                                        remaining_words -= len + 1;
-                                        ses->serverOS[2 * len] = 0;
-                                        ses->serverOS[1 + (2 * len)] = 0;
-                                        if (remaining_words > 0) {
-                                                len = UniStrnlen((wchar_t *)
-                                                                 bcc_ptr,
-                                                                 remaining_words
-                                                                 - 1);
-                                                kfree(ses->serverNOS);
-                                                ses->serverNOS =
-                                                    kzalloc(2 * (len + 1),
-                                                            GFP_KERNEL);
-                                                cifs_strfromUCS_le(ses->
-                                                                   serverNOS,
-                                                                   (__le16 *)
-                                                                   bcc_ptr,
-                                                                   len,
-                                                                   nls_codepage);
-                                                bcc_ptr += 2 * (len + 1);
-                                                ses->serverNOS[2 * len] = 0;
-                                                ses->serverNOS[1 +
-                                                               (2 * len)] = 0;
-                                                remaining_words -= len + 1;
-                                                if (remaining_words > 0) {
-                                                        len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
-                                /* last string not always null terminated
-                                   (for e.g. for Windows XP & 2000) */
-                                                        kfree(ses->serverDomain);
-                                                        ses->serverDomain =
-                                                            kzalloc(2 *
-                                                                    (len +
-                                                                     1),
-                                                                    GFP_KERNEL);
-                                                        cifs_strfromUCS_le
-                                                            (ses->serverDomain,
-                                                             (__le16 *)bcc_ptr,
-                                                             len, nls_codepage);
-                                                        bcc_ptr +=
-                                                            2 * (len + 1);
-                                                        ses->serverDomain[2*len]
-                                                            = 0;
-                                                        ses->serverDomain
-                                                                [1 + (2 * len)]
-                                                            = 0;
-                                                } /* else no more room so create dummy domain string */
-                                                else {
-                                                        kfree(ses->serverDomain);
-                                                        ses->serverDomain =
-                                                            kzalloc(2,
-                                                                    GFP_KERNEL);
-                                                }
-                                        } else {        /* no room so create dummy domain and NOS string */
-                                                kfree(ses->serverDomain);
-                                                ses->serverDomain =
-                                                    kzalloc(2, GFP_KERNEL);
-                                                kfree(ses->serverNOS);
-                                                ses->serverNOS =
-                                                    kzalloc(2, GFP_KERNEL);
-                                        }
-                                } else {        /* ASCII */
-                                        len = strnlen(bcc_ptr, 1024);
-                                        if (((long) bcc_ptr + len) - (long)
-                                            pByteArea(smb_buffer_response)
-                                            <= BCC(smb_buffer_response)) {
-                                                if (ses->serverOS)
-                                                        kfree(ses->serverOS);
-                                                ses->serverOS =
-                                                    kzalloc(len + 1,
-                                                            GFP_KERNEL);
-                                                strncpy(ses->serverOS,
-                                                        bcc_ptr, len);
-                                                bcc_ptr += len;
-                                                bcc_ptr[0] = 0; /* null terminate string */
-                                                bcc_ptr++;
-                                                len = strnlen(bcc_ptr, 1024);
-                                                kfree(ses->serverNOS);
-                                                ses->serverNOS =
-                                                    kzalloc(len + 1,
-                                                            GFP_KERNEL);
-                                                strncpy(ses->serverNOS, bcc_ptr, len);
-                                                bcc_ptr += len;
-                                                bcc_ptr[0] = 0;
-                                                bcc_ptr++;
-                                                len = strnlen(bcc_ptr, 1024);
-                                                kfree(ses->serverDomain);
-                                                ses->serverDomain =
-                                                    kzalloc(len + 1,
-                                                            GFP_KERNEL);
-                                                strncpy(ses->serverDomain,
-                                                        bcc_ptr, len);
-                                                bcc_ptr += len;
-                                                bcc_ptr[0] = 0;
-                                                bcc_ptr++;
-                                        } else
-                                                cFYI(1,
-                                                     ("field of length %d "
-                                                    "extends beyond end of smb",
-                                                      len));
-                                }
-                        } else {
-                                cERROR(1, ("Security Blob Length extends beyond"
-                                           " end of SMB"));
-                        }
-                } else {
-                        cERROR(1, ("No session structure passed in."));
                }
-        } else {
+#else /* No DFS support, return error on mount */
-                cERROR(1, ("Invalid Word count %d:",
+                rc = -EOPNOTSUPP;
-                        smb_buffer_response->WordCount));
+#endif
-                rc = -EIO;
-        }
-        cifs_buf_release(smb_buffer);
-        return rc;
-}
-static int
-CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
-                        char *ntlm_session_key, bool ntlmv2_flag,
-                        const struct nls_table *nls_codepage)
-{
-        struct smb_hdr *smb_buffer;
-        struct smb_hdr *smb_buffer_response;
-        SESSION_SETUP_ANDX *pSMB;
-        SESSION_SETUP_ANDX *pSMBr;
-        char *bcc_ptr;
-        char *user;
-        char *domain;
-        int rc = 0;
-        int remaining_words = 0;
-        int bytes_returned = 0;
-        int len;
-        int SecurityBlobLength = sizeof(AUTHENTICATE_MESSAGE);
-        PAUTHENTICATE_MESSAGE SecurityBlob;
-        __u32 negotiate_flags, capabilities;
-        __u16 count;
-        cFYI(1, ("In NTLMSSPSessSetup (Authenticate)"));
-        if (ses == NULL)
-                return -EINVAL;
-        user = ses->userName;
-        domain = ses->domainName;
-        smb_buffer = cifs_buf_get();
-        if (smb_buffer == NULL) {
-                return -ENOMEM;
-        }
-        smb_buffer_response = smb_buffer;
-        pSMB = (SESSION_SETUP_ANDX *)smb_buffer;
-        pSMBr = (SESSION_SETUP_ANDX *)smb_buffer_response;
-        /* send SMBsessionSetup here */
-        header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
-                        NULL /* no tCon exists yet */ , 12 /* wct */ );
-        smb_buffer->Mid = GetNextMid(ses->server);
-        pSMB->req.hdr.Flags |= (SMBFLG_CASELESS | SMBFLG_CANONICAL_PATH_FORMAT);
-        pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
-        pSMB->req.AndXCommand = 0xFF;
-        pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
-        pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
-        pSMB->req.hdr.Uid = ses->Suid;
-        if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
-                smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
-        capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
-                        CAP_EXTENDED_SECURITY;
-        if (ses->capabilities & CAP_UNICODE) {
-                smb_buffer->Flags2 |= SMBFLG2_UNICODE;
-                capabilities |= CAP_UNICODE;
-        }
-        if (ses->capabilities & CAP_STATUS32) {
-                smb_buffer->Flags2 |= SMBFLG2_ERR_STATUS;
-                capabilities |= CAP_STATUS32;
        }
-        if (ses->capabilities & CAP_DFS) {
-                smb_buffer->Flags2 |= SMBFLG2_DFS;
-                capabilities |= CAP_DFS;
-        }
-        pSMB->req.Capabilities = cpu_to_le32(capabilities);
-        bcc_ptr = (char *)&pSMB->req.SecurityBlob;
-        SecurityBlob = (PAUTHENTICATE_MESSAGE)bcc_ptr;
-        strncpy(SecurityBlob->Signature, NTLMSSP_SIGNATURE, 8);
-        SecurityBlob->MessageType = NtLmAuthenticate;
-        bcc_ptr += SecurityBlobLength;
-        negotiate_flags = NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_REQUEST_TARGET |
-                        NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_TARGET_INFO |
-                        0x80000000 | NTLMSSP_NEGOTIATE_128;
-        if (sign_CIFS_PDUs)
-                negotiate_flags |= /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN |*/ NTLMSSP_NEGOTIATE_SIGN;
-        if (ntlmv2_flag)
-                negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2;
-/* setup pointers to domain name and workstation name */
-        SecurityBlob->WorkstationName.Buffer = 0;
-        SecurityBlob->WorkstationName.Length = 0;
-        SecurityBlob->WorkstationName.MaximumLength = 0;
-        SecurityBlob->SessionKey.Length = 0;
-        SecurityBlob->SessionKey.MaximumLength = 0;
-        SecurityBlob->SessionKey.Buffer = 0;
-        SecurityBlob->LmChallengeResponse.Length = 0;
-        SecurityBlob->LmChallengeResponse.MaximumLength = 0;
-        SecurityBlob->LmChallengeResponse.Buffer = 0;
-        SecurityBlob->NtChallengeResponse.Length =
-            cpu_to_le16(CIFS_SESS_KEY_SIZE);
-        SecurityBlob->NtChallengeResponse.MaximumLength =
-            cpu_to_le16(CIFS_SESS_KEY_SIZE);
-        memcpy(bcc_ptr, ntlm_session_key, CIFS_SESS_KEY_SIZE);
-        SecurityBlob->NtChallengeResponse.Buffer =
-            cpu_to_le32(SecurityBlobLength);
-        SecurityBlobLength += CIFS_SESS_KEY_SIZE;
-        bcc_ptr += CIFS_SESS_KEY_SIZE;
-        if (ses->capabilities & CAP_UNICODE) {
+mount_fail_check:
-                if (domain == NULL) {
+        /* on error free sesinfo and tcon struct if needed */
-                        SecurityBlob->DomainName.Buffer = 0;
-                        SecurityBlob->DomainName.Length = 0;
-                        SecurityBlob->DomainName.MaximumLength = 0;
-                } else {
-                        __u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
-                                          nls_codepage);
-                        ln *= 2;
-                        SecurityBlob->DomainName.MaximumLength =
-                            cpu_to_le16(ln);
-                        SecurityBlob->DomainName.Buffer =
-                            cpu_to_le32(SecurityBlobLength);
-                        bcc_ptr += ln;
-                        SecurityBlobLength += ln;
-                        SecurityBlob->DomainName.Length = cpu_to_le16(ln);
-                }
-                if (user == NULL) {
-                        SecurityBlob->UserName.Buffer = 0;
-                        SecurityBlob->UserName.Length = 0;
-                        SecurityBlob->UserName.MaximumLength = 0;
-                } else {
-                        __u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, user, 64,
-                                          nls_codepage);
-                        ln *= 2;
-                        SecurityBlob->UserName.MaximumLength =
-                            cpu_to_le16(ln);
-                        SecurityBlob->UserName.Buffer =
-                            cpu_to_le32(SecurityBlobLength);
-                        bcc_ptr += ln;
-                        SecurityBlobLength += ln;
-                        SecurityBlob->UserName.Length = cpu_to_le16(ln);
-                }
-                /* SecurityBlob->WorkstationName.Length =
-                 cifs_strtoUCS((__le16 *) bcc_ptr, "AMACHINE",64, nls_codepage);
-                   SecurityBlob->WorkstationName.Length *= 2;
-                   SecurityBlob->WorkstationName.MaximumLength =
-                        cpu_to_le16(SecurityBlob->WorkstationName.Length);
-                   SecurityBlob->WorkstationName.Buffer =
-                                 cpu_to_le32(SecurityBlobLength);
-                   bcc_ptr += SecurityBlob->WorkstationName.Length;
-                   SecurityBlobLength += SecurityBlob->WorkstationName.Length;
-                   SecurityBlob->WorkstationName.Length =
-                        cpu_to_le16(SecurityBlob->WorkstationName.Length);  */
-                if ((long) bcc_ptr % 2) {
-                        *bcc_ptr = 0;
-                        bcc_ptr++;
-                }
-                bytes_returned =
-                    cifs_strtoUCS((__le16 *) bcc_ptr, "Linux version ",
-                                  32, nls_codepage);
-                bcc_ptr += 2 * bytes_returned;
-                bytes_returned =
-                    cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32,
-                                  nls_codepage);
-                bcc_ptr += 2 * bytes_returned;
-                bcc_ptr += 2;   /* null term version string */
-                bytes_returned =
-                    cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
-                                  64, nls_codepage);
-                bcc_ptr += 2 * bytes_returned;
-                *(bcc_ptr + 1) = 0;
-                *(bcc_ptr + 2) = 0;
-                bcc_ptr += 2;   /* null terminate network opsys string */
-                *(bcc_ptr + 1) = 0;
-                *(bcc_ptr + 2) = 0;
-                bcc_ptr += 2;   /* null domain */
-        } else {                /* ASCII */
-                if (domain == NULL) {
-                        SecurityBlob->DomainName.Buffer = 0;
-                        SecurityBlob->DomainName.Length = 0;
-                        SecurityBlob->DomainName.MaximumLength = 0;
-                } else {
-                        __u16 ln;
-                        negotiate_flags |= NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED;
-                        strncpy(bcc_ptr, domain, 63);
-                        ln = strnlen(domain, 64);
-                        SecurityBlob->DomainName.MaximumLength =
-                            cpu_to_le16(ln);
-                        SecurityBlob->DomainName.Buffer =
-                            cpu_to_le32(SecurityBlobLength);
-                        bcc_ptr += ln;
-                        SecurityBlobLength += ln;
-                        SecurityBlob->DomainName.Length = cpu_to_le16(ln);
-                }
-                if (user == NULL) {
-                        SecurityBlob->UserName.Buffer = 0;
-                        SecurityBlob->UserName.Length = 0;
-                        SecurityBlob->UserName.MaximumLength = 0;
-                } else {
-                        __u16 ln;
-                        strncpy(bcc_ptr, user, 63);
-                        ln = strnlen(user, 64);
-                        SecurityBlob->UserName.MaximumLength = cpu_to_le16(ln);
-                        SecurityBlob->UserName.Buffer =
-                                                cpu_to_le32(SecurityBlobLength);
-                        bcc_ptr += ln;
-                        SecurityBlobLength += ln;
-                        SecurityBlob->UserName.Length = cpu_to_le16(ln);
-                }
-                /* BB fill in our workstation name if known BB */
-                strcpy(bcc_ptr, "Linux version ");
-                bcc_ptr += strlen("Linux version ");
-                strcpy(bcc_ptr, utsname()->release);
-                bcc_ptr += strlen(utsname()->release) + 1;
-                strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
-                bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
-                bcc_ptr++;      /* null domain */
-                *bcc_ptr = 0;
-        }
-        SecurityBlob->NegotiateFlags = cpu_to_le32(negotiate_flags);
-        pSMB->req.SecurityBlobLength = cpu_to_le16(SecurityBlobLength);
-        count = (long) bcc_ptr - (long) pByteArea(smb_buffer);
-        smb_buffer->smb_buf_length += count;
-        pSMB->req.ByteCount = cpu_to_le16(count);
-        rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
-                         &bytes_returned, CIFS_LONG_OP);
        if (rc) {
-/*   rc = map_smb_to_linux_error(smb_buffer_response) done in SendReceive now */
+                if (mount_data != mount_data_global)
-        } else if ((smb_buffer_response->WordCount == 3) ||
+                        kfree(mount_data);
-                   (smb_buffer_response->WordCount == 4)) {
+                /* If find_unc succeeded then rc == 0 so we can not end */
-                __u16 action = le16_to_cpu(pSMBr->resp.Action);
+                /* up accidently freeing someone elses tcon struct */
-                __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength);
+                if (tcon)
-                if (action & GUEST_LOGIN)
+                        cifs_put_tcon(tcon);
-                        cFYI(1, ("Guest login")); /* BB Should we set anything
+                else if (pSesInfo)
-                                                         in SesInfo struct ? */
+                        cifs_put_smb_ses(pSesInfo);
-/*              if (SecurityBlob2->MessageType != NtLm??) {
+                else
-                        cFYI("Unexpected message type on auth response is %d"));
+                        cifs_put_tcp_session(srvTcp);
-                } */
+                goto out;
-                if (ses) {
-                        cFYI(1,
-                             ("Check challenge UID %d vs auth response UID %d",
-                              ses->Suid, smb_buffer_response->Uid));
-                        /* UID left in wire format */
-                        ses->Suid = smb_buffer_response->Uid;
-                        bcc_ptr = pByteArea(smb_buffer_response);
-                /* response can have either 3 or 4 word count - Samba sends 3 */
-                        if ((pSMBr->resp.hdr.WordCount == 3)
-                            || ((pSMBr->resp.hdr.WordCount == 4)
-                                && (blob_len <
-                                    pSMBr->resp.ByteCount))) {
-                                if (pSMBr->resp.hdr.WordCount == 4) {
-                                        bcc_ptr +=
-                                            blob_len;
-                                        cFYI(1,
-                                             ("Security Blob Length %d ",
-                                              blob_len));
-                                }
-                                cFYI(1,
-                                     ("NTLMSSP response to Authenticate "));
-                                if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
-                                        if ((long) (bcc_ptr) % 2) {
-                                                remaining_words =
-                                                    (BCC(smb_buffer_response)
-                                                     - 1) / 2;
-                                                bcc_ptr++;      /* Unicode strings must be word aligned */
-                                        } else {
-                                                remaining_words = BCC(smb_buffer_response) / 2;
-                                        }
-                                        len = UniStrnlen((wchar_t *) bcc_ptr,
-                                                        remaining_words - 1);
-/* We look for obvious messed up bcc or strings in response so we do not go off
-  the end since (at least) WIN2K and Windows XP have a major bug in not null
-  terminating last Unicode string in response  */
-                                        if (ses->serverOS)
-                                                kfree(ses->serverOS);
-                                        ses->serverOS =
-                                            kzalloc(2 * (len + 1), GFP_KERNEL);
-                                        cifs_strfromUCS_le(ses->serverOS,
-                                                           (__le16 *)
-                                                           bcc_ptr, len,
-                                                           nls_codepage);
-                                        bcc_ptr += 2 * (len + 1);
-                                        remaining_words -= len + 1;
-                                        ses->serverOS[2 * len] = 0;
-                                        ses->serverOS[1 + (2 * len)] = 0;
-                                        if (remaining_words > 0) {
-                                                len = UniStrnlen((wchar_t *)
-                                                                 bcc_ptr,
-                                                                 remaining_words
-                                                                 - 1);
-                                                kfree(ses->serverNOS);
-                                                ses->serverNOS =
-                                                    kzalloc(2 * (len + 1),
-                                                            GFP_KERNEL);
-                                                cifs_strfromUCS_le(ses->
-                                                                   serverNOS,
-                                                                   (__le16 *)
-                                                                   bcc_ptr,
-                                                                   len,
-                                                                   nls_codepage);
-                                                bcc_ptr += 2 * (len + 1);
-                                                ses->serverNOS[2 * len] = 0;
-                                                ses->serverNOS[1+(2*len)] = 0;
-                                                remaining_words -= len + 1;
-                                                if (remaining_words > 0) {
-                                                        len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
-     /* last string not always null terminated (e.g. for Windows XP & 2000) */
-                                                        if (ses->serverDomain)
-                                                                kfree(ses->serverDomain);
-                                                        ses->serverDomain =
-                                                            kzalloc(2 *
-                                                                    (len +
-                                                                     1),
-                                                                    GFP_KERNEL);
-                                                        cifs_strfromUCS_le
-                                                            (ses->
-                                                             serverDomain,
-                                                             (__le16 *)
-                                                             bcc_ptr, len,
-                                                             nls_codepage);
-                                                        bcc_ptr +=
-                                                            2 * (len + 1);
-                                                        ses->
-                                                            serverDomain[2
-                                                                         * len]
-                                                            = 0;
-                                                        ses->
-                                                            serverDomain[1
-                                                                         +
-                                                                         (2
-                                                                          *
-                                                                          len)]
-                                                            = 0;
-                                                } /* else no more room so create dummy domain string */
-                                                else {
-                                                        if (ses->serverDomain)
-                                                                kfree(ses->serverDomain);
-                                                        ses->serverDomain = kzalloc(2,GFP_KERNEL);
-                                                }
-                                        } else {  /* no room so create dummy domain and NOS string */
-                                                if (ses->serverDomain)
-                                                        kfree(ses->serverDomain);
-                                                ses->serverDomain = kzalloc(2, GFP_KERNEL);
-                                                kfree(ses->serverNOS);
-                                                ses->serverNOS = kzalloc(2, GFP_KERNEL);
-                                        }
-                                } else {        /* ASCII */
-                                        len = strnlen(bcc_ptr, 1024);
-                                        if (((long) bcc_ptr + len) -
-                                           (long) pByteArea(smb_buffer_response)
-                                                <= BCC(smb_buffer_response)) {
-                                                if (ses->serverOS)
-                                                        kfree(ses->serverOS);
-                                                ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
-                                                strncpy(ses->serverOS,bcc_ptr, len);
-                                                bcc_ptr += len;
-                                                bcc_ptr[0] = 0; /* null terminate the string */
-                                                bcc_ptr++;
-                                                len = strnlen(bcc_ptr, 1024);
-                                                kfree(ses->serverNOS);
-                                                ses->serverNOS = kzalloc(len+1,
-                                                                    GFP_KERNEL);
-                                                strncpy(ses->serverNOS,
-                                                        bcc_ptr, len);
-                                                bcc_ptr += len;
-                                                bcc_ptr[0] = 0;
-                                                bcc_ptr++;
-                                                len = strnlen(bcc_ptr, 1024);
-                                                if (ses->serverDomain)
-                                                        kfree(ses->serverDomain);
-                                                ses->serverDomain =
-                                                                kzalloc(len+1,
-                                                                    GFP_KERNEL);
-                                                strncpy(ses->serverDomain,
-                                                        bcc_ptr, len);
-                                                bcc_ptr += len;
-                                                bcc_ptr[0] = 0;
-                                                bcc_ptr++;
-                                        } else
-                                                cFYI(1, ("field of length %d "
-                                                   "extends beyond end of smb ",
-                                                      len));
-                                }
-                        } else {
-                                cERROR(1, ("Security Blob extends beyond end "
-                                        "of SMB"));
-                        }
-                } else {
-                        cERROR(1, ("No session structure passed in."));
-                }
-        } else {
-                cERROR(1, ("Invalid Word count %d: ",
-                        smb_buffer_response->WordCount));
-                rc = -EIO;
        }
-        cifs_buf_release(smb_buffer);
+        /* volume_info->password is freed above when existing session found
+        (in which case it is not needed anymore) but when new sesion is created
+        the password ptr is put in the new session structure (in which case the
+        password will be freed at unmount time) */
+out:
+        /* zero out password before freeing */
+        cleanup_volume_info(&volume_info);
+        FreeXid(xid);
        return rc;
 }
@@ -3556,7 +2623,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
        TCONX_RSP *pSMBr;
        unsigned char *bcc_ptr;
        int rc = 0;
-        int length;
+        int length, bytes_left;
        __u16 count;
        if (ses == NULL)
@@ -3644,14 +2711,22 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
        rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length,
                         CIFS_STD_OP);
-        /* if (rc) rc = map_smb_to_linux_error(smb_buffer_response); */
        /* above now done in SendReceive */
        if ((rc == 0) && (tcon != NULL)) {
+                bool is_unicode;
                tcon->tidStatus = CifsGood;
                tcon->need_reconnect = false;
                tcon->tid = smb_buffer_response->Tid;
                bcc_ptr = pByteArea(smb_buffer_response);
-                length = strnlen(bcc_ptr, BCC(smb_buffer_response) - 2);
+                bytes_left = BCC(smb_buffer_response);
+                length = strnlen(bcc_ptr, bytes_left - 2);
+                if (smb_buffer->Flags2 & SMBFLG2_UNICODE)
+                        is_unicode = true;
+                else
+                        is_unicode = false;
                /* skip service field (NB: this field is always ASCII) */
                if (length == 3) {
                        if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') &&
@@ -3666,40 +2741,16 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
                        }
                }
                bcc_ptr += length + 1;
+                bytes_left -= (length + 1);
                strncpy(tcon->treeName, tree, MAX_TREE_SIZE);
-                if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
-                        length = UniStrnlen((wchar_t *) bcc_ptr, 512);
+                /* mostly informational -- no need to fail on error here */
-                        if ((bcc_ptr + (2 * length)) -
+                tcon->nativeFileSystem = cifs_strndup_from_ucs(bcc_ptr,
-                             pByteArea(smb_buffer_response) <=
+                                                      bytes_left, is_unicode,
-                            BCC(smb_buffer_response)) {
+                                                      nls_codepage);
-                                kfree(tcon->nativeFileSystem);
-                                tcon->nativeFileSystem =
+                cFYI(1, ("nativeFileSystem=%s", tcon->nativeFileSystem));
-                                    kzalloc(2*(length + 1), GFP_KERNEL);
-                                if (tcon->nativeFileSystem)
-                                        cifs_strfromUCS_le(
-                                                tcon->nativeFileSystem,
-                                                (__le16 *) bcc_ptr,
-                                                length, nls_codepage);
-                                bcc_ptr += 2 * length;
-                                bcc_ptr[0] = 0; /* null terminate the string */
-                                bcc_ptr[1] = 0;
-                                bcc_ptr += 2;
-                        }
-                        /* else do not bother copying these information fields*/
-                } else {
-                        length = strnlen(bcc_ptr, 1024);
-                        if ((bcc_ptr + length) -
-                            pByteArea(smb_buffer_response) <=
-                            BCC(smb_buffer_response)) {
-                                kfree(tcon->nativeFileSystem);
-                                tcon->nativeFileSystem =
-                                    kzalloc(length + 1, GFP_KERNEL);
-                                if (tcon->nativeFileSystem)
-                                        strncpy(tcon->nativeFileSystem, bcc_ptr,
-                                                length);
-                        }
-                        /* else do not bother copying these information fields*/
-                }
                if ((smb_buffer_response->WordCount == 3) ||
                         (smb_buffer_response->WordCount == 7))
                        /* field is in same location */
@@ -3738,8 +2789,6 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
                                           struct nls_table *nls_info)
 {
        int rc = 0;
-        char ntlm_session_key[CIFS_SESS_KEY_SIZE];
-        bool ntlmv2_flag = false;
        int first_time = 0;
        struct TCP_Server_Info *server = pSesInfo->server;
@@ -3771,83 +2820,19 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
        pSesInfo->capabilities = server->capabilities;
        if (linuxExtEnabled == 0)
                pSesInfo->capabilities &= (~CAP_UNIX);
-        /*      pSesInfo->sequence_number = 0;*/
        cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
                 server->secMode, server->capabilities, server->timeAdj));
-        if (experimEnabled < 2)
+        rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
-                rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
-        else if (extended_security
-                        && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
-                        && (server->secType == NTLMSSP)) {
-                rc = -EOPNOTSUPP;
-        } else if (extended_security
-                        && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
-                        && (server->secType == RawNTLMSSP)) {
-                cFYI(1, ("NTLMSSP sesssetup"));
-                rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag,
-                                                   nls_info);
-                if (!rc) {
-                        if (ntlmv2_flag) {
-                                char *v2_response;
-                                cFYI(1, ("more secure NTLM ver2 hash"));
-                                if (CalcNTLMv2_partial_mac_key(pSesInfo,
-                                                                nls_info)) {
-                                        rc = -ENOMEM;
-                                        goto ss_err_exit;
-                                } else
-                                        v2_response = kmalloc(16 + 64 /* blob*/,
-                                                                GFP_KERNEL);
-                                if (v2_response) {
-                                        CalcNTLMv2_response(pSesInfo,
-                                                                v2_response);
-                                /*      if (first_time)
-                                                cifs_calculate_ntlmv2_mac_key */
-                                        kfree(v2_response);
-                                        /* BB Put dummy sig in SessSetup PDU? */
-                                } else {
-                                        rc = -ENOMEM;
-                                        goto ss_err_exit;
-                                }
-                        } else {
-                                SMBNTencrypt(pSesInfo->password,
-                                             server->cryptKey,
-                                             ntlm_session_key);
-                                if (first_time)
-                                        cifs_calculate_mac_key(
-                                             &server->mac_signing_key,
-                                             ntlm_session_key,
-                                             pSesInfo->password);
-                        }
-                        /* for better security the weaker lanman hash not sent
-                           in AuthSessSetup so we no longer calculate it */
-                        rc = CIFSNTLMSSPAuthSessSetup(xid, pSesInfo,
-                                                      ntlm_session_key,
-                                                      ntlmv2_flag,
-                                                      nls_info);
-                }
-        } else { /* old style NTLM 0.12 session setup */
-                SMBNTencrypt(pSesInfo->password, server->cryptKey,
-                             ntlm_session_key);
-                if (first_time)
-                        cifs_calculate_mac_key(&server->mac_signing_key,
-                                                ntlm_session_key,
-                                                pSesInfo->password);
-                rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info);
-        }
        if (rc) {
                cERROR(1, ("Send error in SessSetup = %d", rc));
        } else {
                cFYI(1, ("CIFS Session Established successfully"));
-                        spin_lock(&GlobalMid_Lock);
+                spin_lock(&GlobalMid_Lock);
-                        pSesInfo->status = CifsGood;
+                pSesInfo->status = CifsGood;
-                        pSesInfo->need_reconnect = false;
+                pSesInfo->need_reconnect = false;
-                        spin_unlock(&GlobalMid_Lock);
+                spin_unlock(&GlobalMid_Lock);
        }
 ss_err_exit:
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 54dce78fbb7..3758965d73d 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -129,12 +129,62 @@ cifs_bp_rename_retry:
        return full_path;
 }
+static void
+cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle,
+                        struct cifsTconInfo *tcon, bool write_only)
+{
+        int oplock = 0;
+        struct cifsFileInfo *pCifsFile;
+        struct cifsInodeInfo *pCifsInode;
+        pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
+        if (pCifsFile == NULL)
+                return;
+        if (oplockEnabled)
+                oplock = REQ_OPLOCK;
+        pCifsFile->netfid = fileHandle;
+        pCifsFile->pid = current->tgid;
+        pCifsFile->pInode = newinode;
+        pCifsFile->invalidHandle = false;
+        pCifsFile->closePend = false;
+        mutex_init(&pCifsFile->fh_mutex);
+        mutex_init(&pCifsFile->lock_mutex);
+        INIT_LIST_HEAD(&pCifsFile->llist);
+        atomic_set(&pCifsFile->wrtPending, 0);
+        /* set the following in open now
+                        pCifsFile->pfile = file; */
+        write_lock(&GlobalSMBSeslock);
+        list_add(&pCifsFile->tlist, &tcon->openFileList);
+        pCifsInode = CIFS_I(newinode);
+        if (pCifsInode) {
+                /* if readable file instance put first in list*/
+                if (write_only)
+                        list_add_tail(&pCifsFile->flist,
+                                      &pCifsInode->openFileList);
+                else
+                        list_add(&pCifsFile->flist, &pCifsInode->openFileList);
+                if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
+                        pCifsInode->clientCanCacheAll = true;
+                        pCifsInode->clientCanCacheRead = true;
+                        cFYI(1, ("Exclusive Oplock inode %p", newinode));
+                } else if ((oplock & 0xF) == OPLOCK_READ)
+                                pCifsInode->clientCanCacheRead = true;
+        }
+        write_unlock(&GlobalSMBSeslock);
+}
 int cifs_posix_open(char *full_path, struct inode **pinode,
                    struct super_block *sb, int mode, int oflags,
                    int *poplock, __u16 *pnetfid, int xid)
 {
        int rc;
        __u32 oplock;
+        bool write_only = false;
        FILE_UNIX_BASIC_INFO *presp_data;
        __u32 posix_flags = 0;
        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -172,7 +222,10 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
        if (oflags & O_DIRECT)
                posix_flags |= SMB_O_DIRECT;
+        if (!(oflags & FMODE_READ))
+                write_only = true;
+        mode &= ~current_umask();
        rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode,
                        pnetfid, presp_data, &oplock, full_path,
                        cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
@@ -187,8 +240,10 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
        if (!pinode)
                goto posix_open_ret; /* caller does not need info */
-        if (*pinode == NULL)
+        if (*pinode == NULL) {
-                *pinode = cifs_new_inode(sb, &presp_data->UniqueId);
+                __u64 unique_id = le64_to_cpu(presp_data->UniqueId);
+                *pinode = cifs_new_inode(sb, &unique_id);
+        }
        /* else an inode was passed in. Update its info, don't create one */
        /* We do not need to close the file if new_inode fails since
@@ -198,6 +253,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
        posix_fill_in_inode(*pinode, presp_data, 1);
+        cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only);
 posix_open_ret:
        kfree(presp_data);
        return rc;
@@ -225,6 +282,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
        int create_options = CREATE_NOT_DIR;
        int oplock = 0;
        int oflags;
+        bool posix_create = false;
        /*
         * BB below access is probably too much for mknod to request
         *    but we have to do query and setpathinfo so requesting
@@ -239,7 +297,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
        char *full_path = NULL;
        FILE_ALL_INFO *buf = NULL;
        struct inode *newinode = NULL;
-        struct cifsInodeInfo *pCifsInode;
        int disposition = FILE_OVERWRITE_IF;
        bool write_only = false;
@@ -254,7 +311,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                return -ENOMEM;
        }
-        mode &= ~current_umask();
        if (oplockEnabled)
                oplock = REQ_OPLOCK;
@@ -273,12 +329,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                   negotation.  EREMOTE indicates DFS junction, which is not
                   handled in posix open */
-                if ((rc == 0) && (newinode == NULL))
+                if (rc == 0) {
-                        goto cifs_create_get_file_info; /* query inode info */
+                        posix_create = true;
-                else if (rc == 0) /* success, no need to query */
+                        if (newinode == NULL) /* query inode info */
-                        goto cifs_create_set_dentry;
+                                goto cifs_create_get_file_info;
-                else if ((rc != -EIO) && (rc != -EREMOTE) &&
+                        else /* success, no need to query */
-                         (rc != -EOPNOTSUPP)) /* path not found or net err */
+                                goto cifs_create_set_dentry;
+                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
+                         (rc != -EOPNOTSUPP) && (rc != -EINVAL))
                        goto cifs_create_out;
                /* else fallthrough to retry, using older open call, this is
                   case where server does not support this SMB level, and
@@ -409,45 +467,9 @@ cifs_create_set_dentry:
        if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
                /* mknod case - do not leave file open */
                CIFSSMBClose(xid, tcon, fileHandle);
-        } else if (newinode) {
+        } else if (!(posix_create) && (newinode)) {
-                struct cifsFileInfo *pCifsFile =
+                        cifs_fill_fileinfo(newinode, fileHandle,
-                        kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
+                                        cifs_sb->tcon, write_only);
-                if (pCifsFile == NULL)
-                        goto cifs_create_out;
-                pCifsFile->netfid = fileHandle;
-                pCifsFile->pid = current->tgid;
-                pCifsFile->pInode = newinode;
-                pCifsFile->invalidHandle = false;
-                pCifsFile->closePend     = false;
-                init_MUTEX(&pCifsFile->fh_sem);
-                mutex_init(&pCifsFile->lock_mutex);
-                INIT_LIST_HEAD(&pCifsFile->llist);
-                atomic_set(&pCifsFile->wrtPending, 0);
-                /* set the following in open now
-                                pCifsFile->pfile = file; */
-                write_lock(&GlobalSMBSeslock);
-                list_add(&pCifsFile->tlist, &tcon->openFileList);
-                pCifsInode = CIFS_I(newinode);
-                if (pCifsInode) {
-                        /* if readable file instance put first in list*/
-                        if (write_only) {
-                                list_add_tail(&pCifsFile->flist,
-                                              &pCifsInode->openFileList);
-                        } else {
-                                list_add(&pCifsFile->flist,
-                                         &pCifsInode->openFileList);
-                        }
-                        if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
-                                pCifsInode->clientCanCacheAll = true;
-                                pCifsInode->clientCanCacheRead = true;
-                                cFYI(1, ("Exclusive Oplock inode %p",
-                                        newinode));
-                        } else if ((oplock & 0xF) == OPLOCK_READ)
-                                pCifsInode->clientCanCacheRead = true;
-                }
-                write_unlock(&GlobalSMBSeslock);
        }
 cifs_create_out:
        kfree(buf);
@@ -580,17 +602,20 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
        return rc;
 }
 struct dentry *
 cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
            struct nameidata *nd)
 {
        int xid;
        int rc = 0; /* to get around spurious gcc warning, set to zero here */
+        int oplock = 0;
+        __u16 fileHandle = 0;
+        bool posix_open = false;
        struct cifs_sb_info *cifs_sb;
        struct cifsTconInfo *pTcon;
        struct inode *newInode = NULL;
        char *full_path = NULL;
+        struct file *filp;
        xid = GetXid();
@@ -632,12 +657,43 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
        }
        cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode));
-        if (pTcon->unix_ext)
+        /* Posix open is only called (at lookup time) for file create now.
-                rc = cifs_get_inode_info_unix(&newInode, full_path,
+         * For opens (rather than creates), because we do not know if it
-                                              parent_dir_inode->i_sb, xid);
+         * is a file or directory yet, and current Samba no longer allows
-        else
+         * us to do posix open on dirs, we could end up wasting an open call
+         * on what turns out to be a dir. For file opens, we wait to call posix
+         * open till cifs_open.  It could be added here (lookup) in the future
+         * but the performance tradeoff of the extra network request when EISDIR
+         * or EACCES is returned would have to be weighed against the 50%
+         * reduction in network traffic in the other paths.
+         */
+        if (pTcon->unix_ext) {
+                if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
+                     (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
+                     (nd->intent.open.flags & O_CREAT)) {
+                        rc = cifs_posix_open(full_path, &newInode,
+                                        parent_dir_inode->i_sb,
+                                        nd->intent.open.create_mode,
+                                        nd->intent.open.flags, &oplock,
+                                        &fileHandle, xid);
+                        /*
+                         * The check below works around a bug in POSIX
+                         * open in samba versions 3.3.1 and earlier where
+                         * open could incorrectly fail with invalid parameter.
+                         * If either that or op not supported returned, follow
+                         * the normal lookup.
+                         */
+                        if ((rc == 0) || (rc == -ENOENT))
+                                posix_open = true;
+                        else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP))
+                                pTcon->broken_posix_open = true;
+                }
+                if (!posix_open)
+                        rc = cifs_get_inode_info_unix(&newInode, full_path,
+                                                parent_dir_inode->i_sb, xid);
+        } else
                rc = cifs_get_inode_info(&newInode, full_path, NULL,
-                                         parent_dir_inode->i_sb, xid, NULL);
+                                parent_dir_inode->i_sb, xid, NULL);
        if ((rc == 0) && (newInode != NULL)) {
                if (pTcon->nocase)
@@ -645,7 +701,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
                else
                        direntry->d_op = &cifs_dentry_ops;
                d_add(direntry, newInode);
+                if (posix_open)
+                        filp = lookup_instantiate_filp(nd, direntry, NULL);
                /* since paths are not looked up by component - the parent
                   directories are presumed to be good here */
                renew_parental_timestamps(direntry);
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 1e0c1bd8f2e..df4a306f697 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -78,7 +78,7 @@ dns_resolver_instantiate(struct key *key, const void *data,
        }
        key->type_data.x[0] = datalen;
-        rcu_assign_pointer(key->payload.data, ip);
+        key->payload.data = ip;
        return rc;
 }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 81747acca4c..302ea15f02e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -46,7 +46,7 @@ static inline struct cifsFileInfo *cifs_init_private(
        memset(private_data, 0, sizeof(struct cifsFileInfo));
        private_data->netfid = netfid;
        private_data->pid = current->tgid;
-        init_MUTEX(&private_data->fh_sem);
+        mutex_init(&private_data->fh_mutex);
        mutex_init(&private_data->lock_mutex);
        INIT_LIST_HEAD(&private_data->llist);
        private_data->pfile = file; /* needed for writepage */
@@ -129,15 +129,8 @@ static inline int cifs_posix_open_inode_helper(struct inode *inode,
                        struct file *file, struct cifsInodeInfo *pCifsInode,
                        struct cifsFileInfo *pCifsFile, int oplock, u16 netfid)
 {
-        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
-/*      struct timespec temp; */   /* BB REMOVEME BB */
-        file->private_data = kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
-        if (file->private_data == NULL)
-                return -ENOMEM;
-        pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
        write_lock(&GlobalSMBSeslock);
-        list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList);
        pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
        if (pCifsInode == NULL) {
@@ -145,17 +138,6 @@ static inline int cifs_posix_open_inode_helper(struct inode *inode,
                return -EINVAL;
        }
-        /* want handles we can use to read with first
-           in the list so we do not have to walk the
-           list to search for one in write_begin */
-        if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
-                list_add_tail(&pCifsFile->flist,
-                              &pCifsInode->openFileList);
-        } else {
-                list_add(&pCifsFile->flist,
-                         &pCifsInode->openFileList);
-        }
        if (pCifsInode->clientCanCacheRead) {
                /* we have the inode open somewhere else
                   no need to discard cache data */
@@ -198,6 +180,38 @@ psx_client_can_cache:
        return 0;
 }
+static struct cifsFileInfo *
+cifs_fill_filedata(struct file *file)
+{
+        struct list_head *tmp;
+        struct cifsFileInfo *pCifsFile = NULL;
+        struct cifsInodeInfo *pCifsInode = NULL;
+        /* search inode for this file and fill in file->private_data */
+        pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
+        read_lock(&GlobalSMBSeslock);
+        list_for_each(tmp, &pCifsInode->openFileList) {
+                pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
+                if ((pCifsFile->pfile == NULL) &&
+                    (pCifsFile->pid == current->tgid)) {
+                        /* mode set in cifs_create */
+                        /* needed for writepage */
+                        pCifsFile->pfile = file;
+                        file->private_data = pCifsFile;
+                        break;
+                }
+        }
+        read_unlock(&GlobalSMBSeslock);
+        if (file->private_data != NULL) {
+                return pCifsFile;
+        } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
+                        cERROR(1, ("could not find file instance for "
+                                   "new file %p", file));
+        return NULL;
+}
 /* all arguments to this function must be checked for validity in caller */
 static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
        struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
@@ -272,7 +286,6 @@ int cifs_open(struct inode *inode, struct file *file)
        struct cifsTconInfo *tcon;
        struct cifsFileInfo *pCifsFile;
        struct cifsInodeInfo *pCifsInode;
-        struct list_head *tmp;
        char *full_path = NULL;
        int desiredAccess;
        int disposition;
@@ -284,34 +297,11 @@ int cifs_open(struct inode *inode, struct file *file)
        cifs_sb = CIFS_SB(inode->i_sb);
        tcon = cifs_sb->tcon;
-        if (file->f_flags & O_CREAT) {
+        pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
-                /* search inode for this file and fill in file->private_data */
+        pCifsFile = cifs_fill_filedata(file);
-                pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
+        if (pCifsFile) {
-                read_lock(&GlobalSMBSeslock);
+                FreeXid(xid);
-                list_for_each(tmp, &pCifsInode->openFileList) {
+                return 0;
-                        pCifsFile = list_entry(tmp, struct cifsFileInfo,
-                                               flist);
-                        if ((pCifsFile->pfile == NULL) &&
-                            (pCifsFile->pid == current->tgid)) {
-                                /* mode set in cifs_create */
-                                /* needed for writepage */
-                                pCifsFile->pfile = file;
-                                file->private_data = pCifsFile;
-                                break;
-                        }
-                }
-                read_unlock(&GlobalSMBSeslock);
-                if (file->private_data != NULL) {
-                        rc = 0;
-                        FreeXid(xid);
-                        return rc;
-                } else {
-                        if (file->f_flags & O_EXCL)
-                                cERROR(1, ("could not find file instance for "
-                                           "new file %p", file));
-                }
        }
        full_path = build_path_from_dentry(file->f_path.dentry);
@@ -342,6 +332,7 @@ int cifs_open(struct inode *inode, struct file *file)
                        /* no need for special case handling of setting mode
                           on read only files needed here */
+                        pCifsFile = cifs_fill_filedata(file);
                        cifs_posix_open_inode_helper(inode, file, pCifsInode,
                                                     pCifsFile, oplock, netfid);
                        goto out;
@@ -500,9 +491,9 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
                return -EBADF;
        xid = GetXid();
-        down(&pCifsFile->fh_sem);
+        mutex_unlock(&pCifsFile->fh_mutex);
        if (!pCifsFile->invalidHandle) {
-                up(&pCifsFile->fh_sem);
+                mutex_lock(&pCifsFile->fh_mutex);
                FreeXid(xid);
                return 0;
        }
@@ -533,7 +524,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
        if (full_path == NULL) {
                rc = -ENOMEM;
 reopen_error_exit:
-                up(&pCifsFile->fh_sem);
+                mutex_lock(&pCifsFile->fh_mutex);
                FreeXid(xid);
                return rc;
        }
@@ -575,14 +566,14 @@ reopen_error_exit:
                         cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
                                CIFS_MOUNT_MAP_SPECIAL_CHR);
        if (rc) {
-                up(&pCifsFile->fh_sem);
+                mutex_lock(&pCifsFile->fh_mutex);
                cFYI(1, ("cifs_open returned 0x%x", rc));
                cFYI(1, ("oplock: %d", oplock));
        } else {
 reopen_success:
                pCifsFile->netfid = netfid;
                pCifsFile->invalidHandle = false;
-                up(&pCifsFile->fh_sem);
+                mutex_lock(&pCifsFile->fh_mutex);
                pCifsInode = CIFS_I(inode);
                if (pCifsInode) {
                        if (can_flush) {
@@ -971,6 +962,40 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
        return rc;
 }
+/*
+ * Set the timeout on write requests past EOF. For some servers (Windows)
+ * these calls can be very long.
+ *
+ * If we're writing >10M past the EOF we give a 180s timeout. Anything less
+ * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
+ * The 10M cutoff is totally arbitrary. A better scheme for this would be
+ * welcome if someone wants to suggest one.
+ *
+ * We may be able to do a better job with this if there were some way to
+ * declare that a file should be sparse.
+ */
+static int
+cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
+{
+        if (offset <= cifsi->server_eof)
+                return CIFS_STD_OP;
+        else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
+                return CIFS_VLONG_OP;
+        else
+                return CIFS_LONG_OP;
+}
+/* update the file size (if needed) after a write */
+static void
+cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
+                      unsigned int bytes_written)
+{
+        loff_t end_of_write = offset + bytes_written;
+        if (end_of_write > cifsi->server_eof)
+                cifsi->server_eof = end_of_write;
+}
 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
        size_t write_size, loff_t *poffset)
 {
@@ -981,6 +1006,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
        struct cifsTconInfo *pTcon;
        int xid, long_op;
        struct cifsFileInfo *open_file;
+        struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -1000,11 +1026,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
        xid = GetXid();
-        if (*poffset > file->f_path.dentry->d_inode->i_size)
+        long_op = cifs_write_timeout(cifsi, *poffset);
-                long_op = CIFS_VLONG_OP; /* writes past EOF take long time */
-        else
-                long_op = CIFS_LONG_OP;
        for (total_written = 0; write_size > total_written;
             total_written += bytes_written) {
                rc = -EAGAIN;
@@ -1048,8 +1070,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
                                FreeXid(xid);
                                return rc;
                        }
-                } else
+                } else {
+                        cifs_update_eof(cifsi, *poffset, bytes_written);
                        *poffset += bytes_written;
+                }
                long_op = CIFS_STD_OP; /* subsequent writes fast -
                                    15 seconds is plenty */
        }
@@ -1085,6 +1109,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
        struct cifsTconInfo *pTcon;
        int xid, long_op;
        struct cifsFileInfo *open_file;
+        struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -1099,11 +1124,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
        xid = GetXid();
-        if (*poffset > file->f_path.dentry->d_inode->i_size)
+        long_op = cifs_write_timeout(cifsi, *poffset);
-                long_op = CIFS_VLONG_OP; /* writes past EOF can be slow */
-        else
-                long_op = CIFS_LONG_OP;
        for (total_written = 0; write_size > total_written;
             total_written += bytes_written) {
                rc = -EAGAIN;
@@ -1166,8 +1187,10 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
                                FreeXid(xid);
                                return rc;
                        }
-                } else
+                } else {
+                        cifs_update_eof(cifsi, *poffset, bytes_written);
                        *poffset += bytes_written;
+                }
                long_op = CIFS_STD_OP; /* subsequent writes fast -
                                    15 seconds is plenty */
        }
@@ -1380,11 +1403,12 @@ static int cifs_writepages(struct address_space *mapping,
        int nr_pages;
        __u64 offset = 0;
        struct cifsFileInfo *open_file;
+        struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
        struct page *page;
        struct pagevec pvec;
        int rc = 0;
        int scanned = 0;
-        int xid;
+        int xid, long_op;
        cifs_sb = CIFS_SB(mapping->host->i_sb);
@@ -1528,12 +1552,15 @@ retry:
                                cERROR(1, ("No writable handles for inode"));
                                rc = -EBADF;
                        } else {
+                                long_op = cifs_write_timeout(cifsi, offset);
                                rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
                                                   open_file->netfid,
                                                   bytes_to_write, offset,
                                                   &bytes_written, iov, n_iov,
-                                                   CIFS_LONG_OP);
+                                                   long_op);
                                atomic_dec(&open_file->wrtPending);
+                                cifs_update_eof(cifsi, offset, bytes_written);
                                if (rc || bytes_written < bytes_to_write) {
                                        cERROR(1, ("Write2 ret %d, wrote %d",
                                                  rc, bytes_written));
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f121a80fdd6..9c869a6dcba 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -143,6 +143,7 @@ static void cifs_unix_info_to_inode(struct inode *inode,
        inode->i_nlink = le64_to_cpu(info->Nlinks);
+        cifsInfo->server_eof = end_of_file;
        spin_lock(&inode->i_lock);
        if (is_size_safe_to_change(cifsInfo, end_of_file)) {
                /*
@@ -276,7 +277,8 @@ int cifs_get_inode_info_unix(struct inode **pinode,
        /* get new inode */
        if (*pinode == NULL) {
-                *pinode = cifs_new_inode(sb, &find_data.UniqueId);
+                __u64 unique_id = le64_to_cpu(find_data.UniqueId);
+                *pinode = cifs_new_inode(sb, &unique_id);
                if (*pinode == NULL) {
                        rc = -ENOMEM;
                        goto cgiiu_exit;
@@ -605,12 +607,12 @@ int cifs_get_inode_info(struct inode **pinode,
                        inode->i_mode |= S_IFREG;
        }
+        cifsInfo->server_eof = le64_to_cpu(pfindData->EndOfFile);
        spin_lock(&inode->i_lock);
-        if (is_size_safe_to_change(cifsInfo,
+        if (is_size_safe_to_change(cifsInfo, cifsInfo->server_eof)) {
-                                   le64_to_cpu(pfindData->EndOfFile))) {
                /* can not safely shrink the file size here if the
                   client is writing to it due to potential races */
-                i_size_write(inode, le64_to_cpu(pfindData->EndOfFile));
+                i_size_write(inode, cifsInfo->server_eof);
                /* 512 bytes (2**9) is the fake blocksize that must be
                   used for this calculation */
@@ -960,13 +962,21 @@ undo_setattr:
        goto out_close;
 }
+/*
+ * If dentry->d_inode is null (usually meaning the cached dentry
+ * is a negative dentry) then we would attempt a standard SMB delete, but
+ * if that fails we can not attempt the fall back mechanisms on EACESS
+ * but will return the EACESS to the caller.  Note that the VFS does not call
+ * unlink on negative dentries currently.
+ */
 int cifs_unlink(struct inode *dir, struct dentry *dentry)
 {
        int rc = 0;
        int xid;
        char *full_path = NULL;
        struct inode *inode = dentry->d_inode;
-        struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+        struct cifsInodeInfo *cifs_inode;
        struct super_block *sb = dir->i_sb;
        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
        struct cifsTconInfo *tcon = cifs_sb->tcon;
@@ -1010,7 +1020,7 @@ psx_del_no_retry:
                rc = cifs_rename_pending_delete(full_path, dentry, xid);
                if (rc == 0)
                        drop_nlink(inode);
-        } else if (rc == -EACCES && dosattr == 0) {
+        } else if ((rc == -EACCES) && (dosattr == 0) && inode) {
                attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
                if (attrs == NULL) {
                        rc = -ENOMEM;
@@ -1018,7 +1028,8 @@ psx_del_no_retry:
                }
                /* try to reset dos attributes */
-                origattr = cifsInode->cifsAttrs;
+                cifs_inode = CIFS_I(inode);
+                origattr = cifs_inode->cifsAttrs;
                if (origattr == 0)
                        origattr |= ATTR_NORMAL;
                dosattr = origattr & ~ATTR_READONLY;
@@ -1039,13 +1050,13 @@ psx_del_no_retry:
 out_reval:
        if (inode) {
-                cifsInode = CIFS_I(inode);
+                cifs_inode = CIFS_I(inode);
-                cifsInode->time = 0;    /* will force revalidate to get info
+                cifs_inode->time = 0;   /* will force revalidate to get info
                                           when needed */
                inode->i_ctime = current_fs_time(sb);
        }
        dir->i_ctime = dir->i_mtime = current_fs_time(sb);
-        cifsInode = CIFS_I(dir);
+        cifs_inode = CIFS_I(dir);
        CIFS_I(dir)->time = 0;  /* force revalidate of dir as well */
        kfree(full_path);
@@ -1138,6 +1149,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
                        cFYI(1, ("posix mkdir returned 0x%x", rc));
                        d_drop(direntry);
                } else {
+                        __u64 unique_id;
                        if (pInfo->Type == cpu_to_le32(-1)) {
                                /* no return info, go query for it */
                                kfree(pInfo);
@@ -1151,8 +1163,8 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
                        else
                                direntry->d_op = &cifs_dentry_ops;
-                        newinode = cifs_new_inode(inode->i_sb,
+                        unique_id = le64_to_cpu(pInfo->UniqueId);
-                                                  &pInfo->UniqueId);
+                        newinode = cifs_new_inode(inode->i_sb, &unique_id);
                        if (newinode == NULL) {
                                kfree(pInfo);
                                goto mkdir_get_info;
@@ -1450,7 +1462,8 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
                     checking the UniqueId via FILE_INTERNAL_INFO */
 unlink_target:
-        if ((rc == -EACCES) || (rc == -EEXIST)) {
+        /* Try unlinking the target dentry if it's not negative */
+        if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) {
                tmprc = cifs_unlink(target_dir, target_dentry);
                if (tmprc)
                        goto cifs_rename_exit;
@@ -1753,6 +1766,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
        }
        if (rc == 0) {
+                cifsInode->server_eof = attrs->ia_size;
                rc = cifs_vmtruncate(inode, attrs->ia_size);
                cifs_truncate_page(inode->i_mapping, inode->i_size);
        }
@@ -1792,20 +1806,21 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
                goto out;
        }
-        if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
+        /*
-                /*
+         * Attempt to flush data before changing attributes. We need to do
-                   Flush data before changing file size or changing the last
+         * this for ATTR_SIZE and ATTR_MTIME for sure, and if we change the
-                   write time of the file on the server. If the
+         * ownership or mode then we may also need to do this. Here, we take
-                   flush returns error, store it to report later and continue.
+         * the safe way out and just do the flush on all setattr requests. If
-                   BB: This should be smarter. Why bother flushing pages that
+         * the flush returns error, store it to report later and continue.
-                   will be truncated anyway? Also, should we error out here if
+         *
-                   the flush returns error?
+         * BB: This should be smarter. Why bother flushing pages that
-                 */
+         * will be truncated anyway? Also, should we error out here if
-                rc = filemap_write_and_wait(inode->i_mapping);
+         * the flush returns error?
-                if (rc != 0) {
+         */
-                        cifsInode->write_behind_rc = rc;
+        rc = filemap_write_and_wait(inode->i_mapping);
-                        rc = 0;
+        if (rc != 0) {
-                }
+                cifsInode->write_behind_rc = rc;
+                rc = 0;
        }
        if (attrs->ia_valid & ATTR_SIZE) {
@@ -1903,20 +1918,21 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
                return -ENOMEM;
        }
-        if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
+        /*
-                /*
+         * Attempt to flush data before changing attributes. We need to do
-                   Flush data before changing file size or changing the last
+         * this for ATTR_SIZE and ATTR_MTIME for sure, and if we change the
-                   write time of the file on the server. If the
+         * ownership or mode then we may also need to do this. Here, we take
-                   flush returns error, store it to report later and continue.
+         * the safe way out and just do the flush on all setattr requests. If
-                   BB: This should be smarter. Why bother flushing pages that
+         * the flush returns error, store it to report later and continue.
-                   will be truncated anyway? Also, should we error out here if
+         *
-                   the flush returns error?
+         * BB: This should be smarter. Why bother flushing pages that
-                 */
+         * will be truncated anyway? Also, should we error out here if
-                rc = filemap_write_and_wait(inode->i_mapping);
+         * the flush returns error?
-                if (rc != 0) {
+         */
-                        cifsInode->write_behind_rc = rc;
+        rc = filemap_write_and_wait(inode->i_mapping);
-                        rc = 0;
+        if (rc != 0) {
-                }
+                cifsInode->write_behind_rc = rc;
+                rc = 0;
        }
        if (attrs->ia_valid & ATTR_SIZE) {
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 63f644000ce..cd83c53fcbb 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -107,63 +107,51 @@ void *
 cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
 {
        struct inode *inode = direntry->d_inode;
-        int rc = -EACCES;
+        int rc = -ENOMEM;
        int xid;
        char *full_path = NULL;
-        char *target_path = ERR_PTR(-ENOMEM);
+        char *target_path = NULL;
-        struct cifs_sb_info *cifs_sb;
+        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
-        struct cifsTconInfo *pTcon;
+        struct cifsTconInfo *tcon = cifs_sb->tcon;
        xid = GetXid();
-        full_path = build_path_from_dentry(direntry);
+        /*
+         * For now, we just handle symlinks with unix extensions enabled.
-        if (!full_path)
+         * Eventually we should handle NTFS reparse points, and MacOS
-                goto out_no_free;
+         * symlink support. For instance...
+         *
-        cFYI(1, ("Full path: %s inode = 0x%p", full_path, inode));
+         * rc = CIFSSMBQueryReparseLinkInfo(...)
-        cifs_sb = CIFS_SB(inode->i_sb);
+         *
-        pTcon = cifs_sb->tcon;
+         * For now, just return -EACCES when the server doesn't support posix
-        target_path = kmalloc(PATH_MAX, GFP_KERNEL);
+         * extensions. Note that we still allow querying symlinks when posix
-        if (!target_path) {
+         * extensions are manually disabled. We could disable these as well
-                target_path = ERR_PTR(-ENOMEM);
+         * but there doesn't seem to be any harm in allowing the client to
+         * read them.
+         */
+        if (!(tcon->ses->capabilities & CAP_UNIX)) {
+                rc = -EACCES;
                goto out;
        }
-        /* We could change this to:
+        full_path = build_path_from_dentry(direntry);
-                if (pTcon->unix_ext)
+        if (!full_path)
-           but there does not seem any point in refusing to
+                goto out;
-           get symlink info if we can, even if unix extensions
-           turned off for this mount */
-        if (pTcon->ses->capabilities & CAP_UNIX)
-                rc = CIFSSMBUnixQuerySymLink(xid, pTcon, full_path,
-                                             target_path,
-                                             PATH_MAX-1,
-                                             cifs_sb->local_nls);
-        else {
-                /* BB add read reparse point symlink code here */
-                /* rc = CIFSSMBQueryReparseLinkInfo */
-                /* BB Add code to Query ReparsePoint info */
-                /* BB Add MAC style xsymlink check here if enabled */
-        }
-        if (rc == 0) {
-/* BB Add special case check for Samba DFS symlinks */
+        cFYI(1, ("Full path: %s inode = 0x%p", full_path, inode));
-                target_path[PATH_MAX-1] = 0;
+        rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path,
-        } else {
+                                     cifs_sb->local_nls);
+        kfree(full_path);
+out:
+        if (rc != 0) {
                kfree(target_path);
                target_path = ERR_PTR(rc);
        }
-out:
-        kfree(full_path);
-out_no_free:
        FreeXid(xid);
        nd_set_link(nd, target_path);
-        return NULL;    /* No cookie */
+        return NULL;
 }
 int
@@ -224,98 +212,6 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
        return rc;
 }
-int
-cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
-{
-        struct inode *inode = direntry->d_inode;
-        int rc = -EACCES;
-        int xid;
-        int oplock = 0;
-        struct cifs_sb_info *cifs_sb;
-        struct cifsTconInfo *pTcon;
-        char *full_path = NULL;
-        char *tmpbuffer;
-        int len;
-        __u16 fid;
-        xid = GetXid();
-        cifs_sb = CIFS_SB(inode->i_sb);
-        pTcon = cifs_sb->tcon;
-/* BB would it be safe against deadlock to grab this sem
-      even though rename itself grabs the sem and calls lookup? */
-/*       mutex_lock(&inode->i_sb->s_vfs_rename_mutex);*/
-        full_path = build_path_from_dentry(direntry);
-/*       mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);*/
-        if (full_path == NULL) {
-                FreeXid(xid);
-                return -ENOMEM;
-        }
-        cFYI(1,
-             ("Full path: %s inode = 0x%p pBuffer = 0x%p buflen = %d",
-              full_path, inode, pBuffer, buflen));
-        if (buflen > PATH_MAX)
-                len = PATH_MAX;
-        else
-                len = buflen;
-        tmpbuffer = kmalloc(len, GFP_KERNEL);
-        if (tmpbuffer == NULL) {
-                kfree(full_path);
-                FreeXid(xid);
-                return -ENOMEM;
-        }
-/* BB add read reparse point symlink code and
-        Unix extensions symlink code here BB */
-/* We could disable this based on pTcon->unix_ext flag instead ... but why? */
-        if (cifs_sb->tcon->ses->capabilities & CAP_UNIX)
-                rc = CIFSSMBUnixQuerySymLink(xid, pTcon, full_path,
-                                tmpbuffer,
-                                len - 1,
-                                cifs_sb->local_nls);
-        else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
-                cERROR(1, ("SFU style symlinks not implemented yet"));
-                /* add open and read as in fs/cifs/inode.c */
-        } else {
-                rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, GENERIC_READ,
-                                OPEN_REPARSE_POINT, &fid, &oplock, NULL,
-                                cifs_sb->local_nls,
-                                cifs_sb->mnt_cifs_flags &
-                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
-                if (!rc) {
-                        rc = CIFSSMBQueryReparseLinkInfo(xid, pTcon, full_path,
-                                tmpbuffer,
-                                len - 1,
-                                fid,
-                                cifs_sb->local_nls);
-                        if (CIFSSMBClose(xid, pTcon, fid)) {
-                                cFYI(1, ("Error closing junction point "
-                                         "(open for ioctl)"));
-                        }
-                        /* If it is a DFS junction earlier we would have gotten
-                           PATH_NOT_COVERED returned from server so we do
-                           not need to request the DFS info here */
-                }
-        }
-        /* BB Anything else to do to handle recursive links? */
-        /* BB Should we be using page ops here? */
-        /* BB null terminate returned string in pBuffer? BB */
-        if (rc == 0) {
-                rc = vfs_readlink(direntry, pBuffer, len, tmpbuffer);
-                cFYI(1,
-                     ("vfs_readlink called from cifs_readlink returned %d",
-                      rc));
-        }
-        kfree(tmpbuffer);
-        kfree(full_path);
-        FreeXid(xid);
-        return rc;
-}
 void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie)
 {
        char *p = nd_get_link(nd);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 4c89c572891..e079a9190ec 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -635,77 +635,6 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length)
        return;
 }
-/* Windows maps these to the user defined 16 bit Unicode range since they are
-   reserved symbols (along with \ and /), otherwise illegal to store
-   in filenames in NTFS */
-#define UNI_ASTERIK     (__u16) ('*' + 0xF000)
-#define UNI_QUESTION    (__u16) ('?' + 0xF000)
-#define UNI_COLON       (__u16) (':' + 0xF000)
-#define UNI_GRTRTHAN    (__u16) ('>' + 0xF000)
-#define UNI_LESSTHAN    (__u16) ('<' + 0xF000)
-#define UNI_PIPE        (__u16) ('|' + 0xF000)
-#define UNI_SLASH       (__u16) ('\\' + 0xF000)
-/* Convert 16 bit Unicode pathname from wire format to string in current code
-   page.  Conversion may involve remapping up the seven characters that are
-   only legal in POSIX-like OS (if they are present in the string). Path
-   names are little endian 16 bit Unicode on the wire */
-int
-cifs_convertUCSpath(char *target, const __le16 *source, int maxlen,
-                    const struct nls_table *cp)
-{
-        int i, j, len;
-        __u16 src_char;
-        for (i = 0, j = 0; i < maxlen; i++) {
-                src_char = le16_to_cpu(source[i]);
-                switch (src_char) {
-                        case 0:
-                                goto cUCS_out; /* BB check this BB */
-                        case UNI_COLON:
-                                target[j] = ':';
-                                break;
-                        case UNI_ASTERIK:
-                                target[j] = '*';
-                                break;
-                        case UNI_QUESTION:
-                                target[j] = '?';
-                                break;
-                        /* BB We can not handle remapping slash until
-                           all the calls to build_path_from_dentry
-                           are modified, as they use slash as separator BB */
-                        /* case UNI_SLASH:
-                                target[j] = '\\';
-                                break;*/
-                        case UNI_PIPE:
-                                target[j] = '|';
-                                break;
-                        case UNI_GRTRTHAN:
-                                target[j] = '>';
-                                break;
-                        case UNI_LESSTHAN:
-                                target[j] = '<';
-                                break;
-                        default:
-                                len = cp->uni2char(src_char, &target[j],
-                                                NLS_MAX_CHARSET_SIZE);
-                                if (len > 0) {
-                                        j += len;
-                                        continue;
-                                } else {
-                                        target[j] = '?';
-                                }
-                }
-                j++;
-                /* make sure we do not overrun callers allocated temp buffer */
-                if (j >= (2 * NAME_MAX))
-                        break;
-        }
-cUCS_out:
-        target[j] = 0;
-        return j;
-}
 /* Convert 16 bit Unicode pathname to wire format from string in current code
   page.  Conversion may involve remapping up the seven characters that are
   only legal in POSIX-like OS (if they are present in the string). Path
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 8703d68f5b2..e2fe998989a 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -79,6 +79,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
        {ErrQuota, -EDQUOT},
        {ErrNotALink, -ENOLINK},
        {ERRnetlogonNotStarted, -ENOPROTOOPT},
+        {ERRsymlink, -EOPNOTSUPP},
        {ErrTooManyLinks, -EMLINK},
        {0, 0}
 };
@@ -714,6 +715,7 @@ static const struct {
        ERRDOS, ERRnoaccess, 0xc000028f}, {
        ERRDOS, ERRnoaccess, 0xc0000290}, {
        ERRDOS, ERRbadfunc, 0xc000029c}, {
+        ERRDOS, ERRsymlink, NT_STATUS_STOPPED_ON_SYMLINK}, {
        ERRDOS, ERRinvlevel, 0x007c0001}, };
 /*****************************************************************************
diff --git a/fs/cifs/nterr.h b/fs/cifs/nterr.h
index 588abbb9d08..257267367d4 100644
--- a/fs/cifs/nterr.h
+++ b/fs/cifs/nterr.h
@@ -35,8 +35,6 @@ struct nt_err_code_struct {
 extern const struct nt_err_code_struct nt_errs[];
 /* Win32 Status codes. */
-#define STATUS_BUFFER_OVERFLOW            0x80000005
 #define STATUS_MORE_ENTRIES               0x0105
 #define ERROR_INVALID_PARAMETER           0x0057
 #define ERROR_INSUFFICIENT_BUFFER         0x007a
@@ -50,6 +48,13 @@ extern const struct nt_err_code_struct nt_errs[];
 #define STATUS_SOME_UNMAPPED       0x0107
 #define STATUS_BUFFER_OVERFLOW     0x80000005
 #define NT_STATUS_NO_MORE_ENTRIES  0x8000001a
+#define NT_STATUS_MEDIA_CHANGED    0x8000001c
+#define NT_STATUS_END_OF_MEDIA     0x8000001e
+#define NT_STATUS_MEDIA_CHECK      0x80000020
+#define NT_STATUS_NO_DATA_DETECTED 0x8000001c
+#define NT_STATUS_STOPPED_ON_SYMLINK 0x8000002d
+#define NT_STATUS_DEVICE_REQUIRES_CLEANING 0x80000288
+#define NT_STATUS_DEVICE_DOOR_OPEN 0x80000288
 #define NT_STATUS_UNSUCCESSFUL 0xC0000000 | 0x0001
 #define NT_STATUS_NOT_IMPLEMENTED 0xC0000000 | 0x0002
 #define NT_STATUS_INVALID_INFO_CLASS 0xC0000000 | 0x0003
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index c377d8065d9..49c9a4e7531 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -27,29 +27,39 @@
 #define UnknownMessage    cpu_to_le32(8)
 /* Negotiate Flags */
-#define NTLMSSP_NEGOTIATE_UNICODE       0x01 /* Text strings are in unicode */
+#define NTLMSSP_NEGOTIATE_UNICODE         0x01 /* Text strings are unicode */
-#define NTLMSSP_NEGOTIATE_OEM           0x02 /* Text strings are in OEM */
+#define NTLMSSP_NEGOTIATE_OEM             0x02 /* Text strings are in OEM */
-#define NTLMSSP_REQUEST_TARGET          0x04 /* Server return its auth realm */
+#define NTLMSSP_REQUEST_TARGET            0x04 /* Srv returns its auth realm */
-#define NTLMSSP_NEGOTIATE_SIGN        0x0010 /* Request signature capability */
+/* define reserved9                       0x08 */
-#define NTLMSSP_NEGOTIATE_SEAL        0x0020 /*  Request confidentiality */
+#define NTLMSSP_NEGOTIATE_SIGN          0x0010 /* Request signing capability */
-#define NTLMSSP_NEGOTIATE_DGRAM       0x0040
+#define NTLMSSP_NEGOTIATE_SEAL          0x0020 /* Request confidentiality */
-#define NTLMSSP_NEGOTIATE_LM_KEY      0x0080 /* Sign/seal use LM session key */
+#define NTLMSSP_NEGOTIATE_DGRAM         0x0040
-#define NTLMSSP_NEGOTIATE_NTLM        0x0200 /* NTLM authentication */
+#define NTLMSSP_NEGOTIATE_LM_KEY        0x0080 /* Use LM session key */
-#define NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED 0x1000
+/* defined reserved 8                   0x0100 */
+#define NTLMSSP_NEGOTIATE_NTLM          0x0200 /* NTLM authentication */
+#define NTLMSSP_NEGOTIATE_NT_ONLY       0x0400 /* Lanman not allowed */
+#define NTLMSSP_ANONYMOUS               0x0800
+#define NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED 0x1000 /* reserved6 */
 #define NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED 0x2000
-#define NTLMSSP_NEGOTIATE_LOCAL_CALL  0x4000 /* client/server on same machine */
+#define NTLMSSP_NEGOTIATE_LOCAL_CALL    0x4000 /* client/server same machine */
-#define NTLMSSP_NEGOTIATE_ALWAYS_SIGN 0x8000 /* Sign for all security levels */
+#define NTLMSSP_NEGOTIATE_ALWAYS_SIGN   0x8000 /* Sign. All security levels  */
-#define NTLMSSP_TARGET_TYPE_DOMAIN   0x10000
+#define NTLMSSP_TARGET_TYPE_DOMAIN     0x10000
-#define NTLMSSP_TARGET_TYPE_SERVER   0x20000
+#define NTLMSSP_TARGET_TYPE_SERVER     0x20000
-#define NTLMSSP_TARGET_TYPE_SHARE    0x40000
+#define NTLMSSP_TARGET_TYPE_SHARE      0x40000
-#define NTLMSSP_NEGOTIATE_NTLMV2     0x80000
+#define NTLMSSP_NEGOTIATE_EXTENDED_SEC 0x80000 /* NB:not related to NTLMv2 pwd*/
-#define NTLMSSP_REQUEST_INIT_RESP   0x100000
+/* #define NTLMSSP_REQUEST_INIT_RESP     0x100000 */
-#define NTLMSSP_REQUEST_ACCEPT_RESP 0x200000
+#define NTLMSSP_NEGOTIATE_IDENTIFY    0x100000
-#define NTLMSSP_REQUEST_NOT_NT_KEY  0x400000
+#define NTLMSSP_REQUEST_ACCEPT_RESP   0x200000 /* reserved5 */
+#define NTLMSSP_REQUEST_NON_NT_KEY    0x400000
 #define NTLMSSP_NEGOTIATE_TARGET_INFO 0x800000
-#define NTLMSSP_NEGOTIATE_128     0x20000000
+/* #define reserved4                 0x1000000 */
-#define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000
+#define NTLMSSP_NEGOTIATE_VERSION    0x2000000 /* we do not set */
-#define NTLMSSP_NEGOTIATE_56      0x80000000
+/* #define reserved3                 0x4000000 */
+/* #define reserved2                 0x8000000 */
+/* #define reserved1                0x10000000 */
+#define NTLMSSP_NEGOTIATE_128       0x20000000
+#define NTLMSSP_NEGOTIATE_KEY_XCH   0x40000000
+#define NTLMSSP_NEGOTIATE_56        0x80000000
 /* Although typedefs are not commonly used for structure definitions */
 /* in the Linux kernel, in this particular case they are useful      */
@@ -60,32 +70,36 @@
 typedef struct _SECURITY_BUFFER {
        __le16 Length;
        __le16 MaximumLength;
-        __le32 Buffer;          /* offset to buffer */
+        __le32 BufferOffset;    /* offset to buffer */
 } __attribute__((packed)) SECURITY_BUFFER;
 typedef struct _NEGOTIATE_MESSAGE {
        __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
-        __le32 MessageType;     /* 1 */
+        __le32 MessageType;     /* NtLmNegotiate = 1 */
        __le32 NegotiateFlags;
        SECURITY_BUFFER DomainName;     /* RFC 1001 style and ASCII */
        SECURITY_BUFFER WorkstationName;        /* RFC 1001 and ASCII */
+        /* SECURITY_BUFFER for version info not present since we
+           do not set the version is present flag */
        char DomainString[0];
        /* followed by WorkstationString */
 } __attribute__((packed)) NEGOTIATE_MESSAGE, *PNEGOTIATE_MESSAGE;
 typedef struct _CHALLENGE_MESSAGE {
        __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
-        __le32 MessageType;   /* 2 */
+        __le32 MessageType;   /* NtLmChallenge = 2 */
        SECURITY_BUFFER TargetName;
        __le32 NegotiateFlags;
        __u8 Challenge[CIFS_CRYPTO_KEY_SIZE];
        __u8 Reserved[8];
        SECURITY_BUFFER TargetInfoArray;
+        /* SECURITY_BUFFER for version info not present since we
+           do not set the version is present flag */
 } __attribute__((packed)) CHALLENGE_MESSAGE, *PCHALLENGE_MESSAGE;
 typedef struct _AUTHENTICATE_MESSAGE {
-        __u8 Signature[sizeof (NTLMSSP_SIGNATURE)];
+        __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
-        __le32 MessageType;  /* 3 */
+        __le32 MessageType;  /* NtLmsAuthenticate = 3 */
        SECURITY_BUFFER LmChallengeResponse;
        SECURITY_BUFFER NtChallengeResponse;
        SECURITY_BUFFER DomainName;
@@ -93,5 +107,7 @@ typedef struct _AUTHENTICATE_MESSAGE {
        SECURITY_BUFFER WorkstationName;
        SECURITY_BUFFER SessionKey;
        __le32 NegotiateFlags;
+        /* SECURITY_BUFFER for version info not present since we
+           do not set the version is present flag */
        char UserString[0];
 } __attribute__((packed)) AUTHENTICATE_MESSAGE, *PAUTHENTICATE_MESSAGE;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index c2c01ff4c32..964e097c820 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -31,6 +31,13 @@
 #include "cifs_fs_sb.h"
 #include "cifsfs.h"
+/*
+ * To be safe - for UCS to UTF-8 with strings loaded with the rare long
+ * characters alloc more to account for such multibyte target UTF-8
+ * characters.
+ */
+#define UNICODE_NAME_MAX ((4 * NAME_MAX) + 2)
 #ifdef CONFIG_CIFS_DEBUG2
 static void dump_cifs_file_struct(struct file *file, char *label)
 {
@@ -239,6 +246,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
        if (atomic_read(&cifsInfo->inUse) == 0)
                atomic_set(&cifsInfo->inUse, 1);
+        cifsInfo->server_eof = end_of_file;
        spin_lock(&tmp_inode->i_lock);
        if (is_size_safe_to_change(cifsInfo, end_of_file)) {
                /* can not safely change the file size here if the
@@ -375,6 +383,7 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
                tmp_inode->i_gid = le64_to_cpu(pfindData->Gid);
        tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks);
+        cifsInfo->server_eof = end_of_file;
        spin_lock(&tmp_inode->i_lock);
        if (is_size_safe_to_change(cifsInfo, end_of_file)) {
                /* can not safely change the file size here if the
@@ -436,6 +445,38 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
        }
 }
+/* BB eventually need to add the following helper function to
+      resolve NT_STATUS_STOPPED_ON_SYMLINK return code when
+      we try to do FindFirst on (NTFS) directory symlinks */
+/*
+int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb,
+                             int xid)
+{
+        __u16 fid;
+        int len;
+        int oplock = 0;
+        int rc;
+        struct cifsTconInfo *ptcon = cifs_sb->tcon;
+        char *tmpbuffer;
+        rc = CIFSSMBOpen(xid, ptcon, full_path, FILE_OPEN, GENERIC_READ,
+                        OPEN_REPARSE_POINT, &fid, &oplock, NULL,
+                        cifs_sb->local_nls,
+                        cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+        if (!rc) {
+                tmpbuffer = kmalloc(maxpath);
+                rc = CIFSSMBQueryReparseLinkInfo(xid, ptcon, full_path,
+                                tmpbuffer,
+                                maxpath -1,
+                                fid,
+                                cifs_sb->local_nls);
+                if (CIFSSMBClose(xid, ptcon, fid)) {
+                        cFYI(1, ("Error closing temporary reparsepoint open)"));
+                }
+        }
+}
+ */
 static int initiate_cifs_search(const int xid, struct file *file)
 {
        int rc = 0;
@@ -491,7 +532,10 @@ ffirst_retry:
                        CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb));
        if (rc == 0)
                cifsFile->invalidHandle = false;
-        if ((rc == -EOPNOTSUPP) &&
+        /* BB add following call to handle readdir on new NTFS symlink errors
+        else if STATUS_STOPPED_ON_SYMLINK
+                call get_symlink_reparse_path and retry with new path */
+        else if ((rc == -EOPNOTSUPP) &&
                (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) {
                cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
                goto ffirst_retry;
@@ -820,7 +864,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
 /* inode num, inode type and filename returned */
 static int cifs_get_name_from_search_buf(struct qstr *pqst,
        char *current_entry, __u16 level, unsigned int unicode,
-        struct cifs_sb_info *cifs_sb, int max_len, __u64 *pinum)
+        struct cifs_sb_info *cifs_sb, unsigned int max_len, __u64 *pinum)
 {
        int rc = 0;
        unsigned int len = 0;
@@ -840,7 +884,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
                        len = strnlen(filename, PATH_MAX);
                }
-                *pinum = pFindData->UniqueId;
+                *pinum = le64_to_cpu(pFindData->UniqueId);
        } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) {
                FILE_DIRECTORY_INFO *pFindData =
                        (FILE_DIRECTORY_INFO *)current_entry;
@@ -856,7 +900,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
                        (SEARCH_ID_FULL_DIR_INFO *)current_entry;
                filename = &pFindData->FileName[0];
                len = le32_to_cpu(pFindData->FileNameLength);
-                *pinum = pFindData->UniqueId;
+                *pinum = le64_to_cpu(pFindData->UniqueId);
        } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) {
                FILE_BOTH_DIRECTORY_INFO *pFindData =
                        (FILE_BOTH_DIRECTORY_INFO *)current_entry;
@@ -879,14 +923,12 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
        }
        if (unicode) {
-                /* BB fixme - test with long names */
+                pqst->len = cifs_from_ucs2((char *) pqst->name,
-                /* Note converted filename can be longer than in unicode */
+                                           (__le16 *) filename,
-                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
+                                           UNICODE_NAME_MAX,
-                        pqst->len = cifs_convertUCSpath((char *)pqst->name,
+                                           min(len, max_len), nlt,
-                                        (__le16 *)filename, len/2, nlt);
+                                           cifs_sb->mnt_cifs_flags &
-                else
+                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
-                        pqst->len = cifs_strfromUCS_le((char *)pqst->name,
-                                        (__le16 *)filename, len/2, nlt);
        } else {
                pqst->name = filename;
                pqst->len = len;
@@ -896,8 +938,8 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
        return rc;
 }
-static int cifs_filldir(char *pfindEntry, struct file *file,
+static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
-        filldir_t filldir, void *direntry, char *scratch_buf, int max_len)
+                        void *direntry, char *scratch_buf, unsigned int max_len)
 {
        int rc = 0;
        struct qstr qstring;
@@ -994,7 +1036,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
        int num_to_fill = 0;
        char *tmp_buf = NULL;
        char *end_of_smb;
-        int max_len;
+        unsigned int max_len;
        xid = GetXid();
@@ -1068,11 +1110,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
                                cifsFile->srch_inf.ntwrk_buf_start);
                end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
-                /* To be safe - for UCS to UTF-8 with strings loaded
+                tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL);
-                with the rare long characters alloc more to account for
-                such multibyte target UTF-8 characters. cifs_unicode.c,
-                which actually does the conversion, has the same limit */
-                tmp_buf = kmalloc((2 * NAME_MAX) + 4, GFP_KERNEL);
                for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
                        if (current_entry == NULL) {
                                /* evaluate whether this case is an error */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 5c68b4282be..897a052270f 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -3,7 +3,7 @@
 *
 *   SMB/CIFS session setup handling routines
 *
- *   Copyright (c) International Business Machines  Corp., 2006, 2007
+ *   Copyright (c) International Business Machines  Corp., 2006, 2009
 *   Author(s): Steve French (sfrench@us.ibm.com)
 *
 *   This library is free software; you can redistribute it and/or modify
@@ -111,7 +111,7 @@ static __le16 get_next_vcnum(struct cifsSesInfo *ses)
 get_vc_num_exit:
        write_unlock(&cifs_tcp_ses_lock);
-        return le16_to_cpu(vcnum);
+        return cpu_to_le16(vcnum);
 }
 static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
@@ -277,85 +277,51 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
        *pbcc_area = bcc_ptr;
 }
-static int decode_unicode_ssetup(char **pbcc_area, int bleft,
+static void
-                                 struct cifsSesInfo *ses,
+decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
-                                 const struct nls_table *nls_cp)
+                      const struct nls_table *nls_cp)
 {
-        int rc = 0;
+        int len;
-        int words_left, len;
        char *data = *pbcc_area;
        cFYI(1, ("bleft %d", bleft));
+        /*
-        /* SMB header is unaligned, so cifs servers word align start of
+         * Windows servers do not always double null terminate their final
-           Unicode strings */
+         * Unicode string. Check to see if there are an uneven number of bytes
-        data++;
+         * left. If so, then add an extra NULL pad byte to the end of the
-        bleft--; /* Windows servers do not always double null terminate
+         * response.
-                    their final Unicode string - in which case we
+         *
-                    now will not attempt to decode the byte of junk
+         * See section 2.7.2 in "Implementing CIFS" for details
-                    which follows it */
+         */
+        if (bleft % 2) {
-        words_left = bleft / 2;
+                data[bleft] = 0;
+                ++bleft;
-        /* save off server operating system */
+        }
-        len = UniStrnlen((wchar_t *) data, words_left);
-/* We look for obvious messed up bcc or strings in response so we do not go off
-   the end since (at least) WIN2K and Windows XP have a major bug in not null
-   terminating last Unicode string in response  */
-        if (len >= words_left)
-                return rc;
        kfree(ses->serverOS);
-        /* UTF-8 string will not grow more than four times as big as UCS-16 */
+        ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
-        ses->serverOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL);
+        cFYI(1, ("serverOS=%s", ses->serverOS));
-        if (ses->serverOS != NULL)
+        len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2;
-                cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp);
+        data += len;
-        data += 2 * (len + 1);
+        bleft -= len;
-        words_left -= len + 1;
+        if (bleft <= 0)
+                return;
-        /* save off server network operating system */
-        len = UniStrnlen((wchar_t *) data, words_left);
-        if (len >= words_left)
-                return rc;
        kfree(ses->serverNOS);
-        ses->serverNOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL);
+        ses->serverNOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
-        if (ses->serverNOS != NULL) {
+        cFYI(1, ("serverNOS=%s", ses->serverNOS));
-                cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len,
+        len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2;
-                                   nls_cp);
+        data += len;
-                if (strncmp(ses->serverNOS, "NT LAN Manager 4", 16) == 0) {
+        bleft -= len;
-                        cFYI(1, ("NT4 server"));
+        if (bleft <= 0)
-                        ses->flags |= CIFS_SES_NT4;
+                return;
-                }
-        }
-        data += 2 * (len + 1);
-        words_left -= len + 1;
-        /* save off server domain */
-        len = UniStrnlen((wchar_t *) data, words_left);
-        if (len > words_left)
-                return rc;
        kfree(ses->serverDomain);
-        ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */
+        ses->serverDomain = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
-        if (ses->serverDomain != NULL) {
+        cFYI(1, ("serverDomain=%s", ses->serverDomain));
-                cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
-                                   nls_cp);
-                ses->serverDomain[2*len] = 0;
-                ses->serverDomain[(2*len) + 1] = 0;
-        }
-        data += 2 * (len + 1);
-        words_left -= len + 1;
-        cFYI(1, ("words left: %d", words_left));
+        return;
-        return rc;
 }
 static int decode_ascii_ssetup(char **pbcc_area, int bleft,
@@ -412,6 +378,186 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
        return rc;
 }
+static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
+                                    struct cifsSesInfo *ses)
+{
+        CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr;
+        if (blob_len < sizeof(CHALLENGE_MESSAGE)) {
+                cERROR(1, ("challenge blob len %d too small", blob_len));
+                return -EINVAL;
+        }
+        if (memcmp(pblob->Signature, "NTLMSSP", 8)) {
+                cERROR(1, ("blob signature incorrect %s", pblob->Signature));
+                return -EINVAL;
+        }
+        if (pblob->MessageType != NtLmChallenge) {
+                cERROR(1, ("Incorrect message type %d", pblob->MessageType));
+                return -EINVAL;
+        }
+        memcpy(ses->server->cryptKey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
+        /* BB we could decode pblob->NegotiateFlags; some may be useful */
+        /* In particular we can examine sign flags */
+        /* BB spec says that if AvId field of MsvAvTimestamp is populated then
+                we must set the MIC field of the AUTHENTICATE_MESSAGE */
+        return 0;
+}
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+/* BB Move to ntlmssp.c eventually */
+/* We do not malloc the blob, it is passed in pbuffer, because
+   it is fixed size, and small, making this approach cleaner */
+static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
+                                         struct cifsSesInfo *ses)
+{
+        NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer;
+        __u32 flags;
+        memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
+        sec_blob->MessageType = NtLmNegotiate;
+        /* BB is NTLMV2 session security format easier to use here? */
+        flags = NTLMSSP_NEGOTIATE_56 |  NTLMSSP_REQUEST_TARGET |
+                NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
+                NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM;
+        if (ses->server->secMode &
+           (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+                flags |= NTLMSSP_NEGOTIATE_SIGN;
+        if (ses->server->secMode & SECMODE_SIGN_REQUIRED)
+                flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
+        sec_blob->NegotiateFlags |= cpu_to_le32(flags);
+        sec_blob->WorkstationName.BufferOffset = 0;
+        sec_blob->WorkstationName.Length = 0;
+        sec_blob->WorkstationName.MaximumLength = 0;
+        /* Domain name is sent on the Challenge not Negotiate NTLMSSP request */
+        sec_blob->DomainName.BufferOffset = 0;
+        sec_blob->DomainName.Length = 0;
+        sec_blob->DomainName.MaximumLength = 0;
+}
+/* We do not malloc the blob, it is passed in pbuffer, because its
+   maximum possible size is fixed and small, making this approach cleaner.
+   This function returns the length of the data in the blob */
+static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+                                   struct cifsSesInfo *ses,
+                                   const struct nls_table *nls_cp, int first)
+{
+        AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
+        __u32 flags;
+        unsigned char *tmp;
+        char ntlm_session_key[CIFS_SESS_KEY_SIZE];
+        memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
+        sec_blob->MessageType = NtLmAuthenticate;
+        flags = NTLMSSP_NEGOTIATE_56 |
+                NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO |
+                NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
+                NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM;
+        if (ses->server->secMode &
+           (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+                flags |= NTLMSSP_NEGOTIATE_SIGN;
+        if (ses->server->secMode & SECMODE_SIGN_REQUIRED)
+                flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
+        tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
+        sec_blob->NegotiateFlags |= cpu_to_le32(flags);
+        sec_blob->LmChallengeResponse.BufferOffset =
+                                cpu_to_le32(sizeof(AUTHENTICATE_MESSAGE));
+        sec_blob->LmChallengeResponse.Length = 0;
+        sec_blob->LmChallengeResponse.MaximumLength = 0;
+        /* calculate session key,  BB what about adding similar ntlmv2 path? */
+        SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key);
+        if (first)
+                cifs_calculate_mac_key(&ses->server->mac_signing_key,
+                                       ntlm_session_key, ses->password);
+        memcpy(tmp, ntlm_session_key, CIFS_SESS_KEY_SIZE);
+        sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
+        sec_blob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE);
+        sec_blob->NtChallengeResponse.MaximumLength =
+                                cpu_to_le16(CIFS_SESS_KEY_SIZE);
+        tmp += CIFS_SESS_KEY_SIZE;
+        if (ses->domainName == NULL) {
+                sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+                sec_blob->DomainName.Length = 0;
+                sec_blob->DomainName.MaximumLength = 0;
+                tmp += 2;
+        } else {
+                int len;
+                len = cifs_strtoUCS((__le16 *)tmp, ses->domainName,
+                                    MAX_USERNAME_SIZE, nls_cp);
+                len *= 2; /* unicode is 2 bytes each */
+                len += 2; /* trailing null */
+                sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+                sec_blob->DomainName.Length = cpu_to_le16(len);
+                sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
+                tmp += len;
+        }
+        if (ses->userName == NULL) {
+                sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+                sec_blob->UserName.Length = 0;
+                sec_blob->UserName.MaximumLength = 0;
+                tmp += 2;
+        } else {
+                int len;
+                len = cifs_strtoUCS((__le16 *)tmp, ses->userName,
+                                    MAX_USERNAME_SIZE, nls_cp);
+                len *= 2; /* unicode is 2 bytes each */
+                len += 2; /* trailing null */
+                sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+                sec_blob->UserName.Length = cpu_to_le16(len);
+                sec_blob->UserName.MaximumLength = cpu_to_le16(len);
+                tmp += len;
+        }
+        sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+        sec_blob->WorkstationName.Length = 0;
+        sec_blob->WorkstationName.MaximumLength = 0;
+        tmp += 2;
+        sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+        sec_blob->SessionKey.Length = 0;
+        sec_blob->SessionKey.MaximumLength = 0;
+        return tmp - pbuffer;
+}
+static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB,
+                                 struct cifsSesInfo *ses)
+{
+        build_ntlmssp_negotiate_blob(&pSMB->req.SecurityBlob[0], ses);
+        pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
+        return;
+}
+static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB,
+                                  struct cifsSesInfo *ses,
+                                  const struct nls_table *nls, int first_time)
+{
+        int bloblen;
+        bloblen = build_ntlmssp_auth_blob(&pSMB->req.SecurityBlob[0], ses, nls,
+                                          first_time);
+        pSMB->req.SecurityBlobLength = cpu_to_le16(bloblen);
+        return bloblen;
+}
+#endif
 int
 CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
                const struct nls_table *nls_cp)
@@ -430,6 +576,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
        __u16 action;
        int bytes_remaining;
        struct key *spnego_key = NULL;
+        __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
        if (ses == NULL)
                return -EINVAL;
@@ -437,6 +584,10 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
        type = ses->server->secType;
        cFYI(1, ("sess setup type %d", type));
+ssetup_ntlmssp_authenticate:
+        if (phase == NtLmChallenge)
+                phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
        if (type == LANMAN) {
 #ifndef CONFIG_CIFS_WEAK_PW_HASH
                /* LANMAN and plaintext are less secure and off by default.
@@ -650,9 +801,53 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
                goto ssetup_exit;
 #endif /* CONFIG_CIFS_UPCALL */
        } else {
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+                if ((experimEnabled > 1) && (type == RawNTLMSSP)) {
+                        if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
+                                cERROR(1, ("NTLMSSP requires Unicode support"));
+                                rc = -ENOSYS;
+                                goto ssetup_exit;
+                        }
+                        cFYI(1, ("ntlmssp session setup phase %d", phase));
+                        pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
+                        capabilities |= CAP_EXTENDED_SECURITY;
+                        pSMB->req.Capabilities |= cpu_to_le32(capabilities);
+                        if (phase == NtLmNegotiate) {
+                                setup_ntlmssp_neg_req(pSMB, ses);
+                                iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
+                        } else if (phase == NtLmAuthenticate) {
+                                int blob_len;
+                                blob_len = setup_ntlmssp_auth_req(pSMB, ses,
+                                                                  nls_cp,
+                                                                  first_time);
+                                iov[1].iov_len = blob_len;
+                                /* Make sure that we tell the server that we
+                                   are using the uid that it just gave us back
+                                   on the response (challenge) */
+                                smb_buf->Uid = ses->Suid;
+                        } else {
+                                cERROR(1, ("invalid phase %d", phase));
+                                rc = -ENOSYS;
+                                goto ssetup_exit;
+                        }
+                        iov[1].iov_base = &pSMB->req.SecurityBlob[0];
+                        /* unicode strings must be word aligned */
+                        if ((iov[0].iov_len + iov[1].iov_len) % 2) {
+                                *bcc_ptr = 0;
+                                bcc_ptr++;
+                        }
+                        unicode_oslm_strings(&bcc_ptr, nls_cp);
+                } else {
+                        cERROR(1, ("secType %d not supported!", type));
+                        rc = -ENOSYS;
+                        goto ssetup_exit;
+                }
+#else
                cERROR(1, ("secType %d not supported!", type));
                rc = -ENOSYS;
                goto ssetup_exit;
+#endif
        }
        iov[2].iov_base = str_area;
@@ -668,12 +863,23 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
        /* SMB request buf freed in SendReceive2 */
        cFYI(1, ("ssetup rc from sendrecv2 is %d", rc));
-        if (rc)
-                goto ssetup_exit;
        pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
        smb_buf = (struct smb_hdr *)iov[0].iov_base;
+        if ((type == RawNTLMSSP) && (smb_buf->Status.CifsError ==
+                        cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) {
+                if (phase != NtLmNegotiate) {
+                        cERROR(1, ("Unexpected more processing error"));
+                        goto ssetup_exit;
+                }
+                /* NTLMSSP Negotiate sent now processing challenge (response) */
+                phase = NtLmChallenge; /* process ntlmssp challenge */
+                rc = 0; /* MORE_PROC rc is not an error here, but expected */
+        }
+        if (rc)
+                goto ssetup_exit;
        if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) {
                rc = -EIO;
                cERROR(1, ("bad word count %d", smb_buf->WordCount));
@@ -692,22 +898,33 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
        if (smb_buf->WordCount == 4) {
                __u16 blob_len;
                blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
-                bcc_ptr += blob_len;
                if (blob_len > bytes_remaining) {
                        cERROR(1, ("bad security blob length %d", blob_len));
                        rc = -EINVAL;
                        goto ssetup_exit;
                }
+                if (phase == NtLmChallenge) {
+                        rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses);
+                        /* now goto beginning for ntlmssp authenticate phase */
+                        if (rc)
+                                goto ssetup_exit;
+                }
+                bcc_ptr += blob_len;
                bytes_remaining -= blob_len;
        }
        /* BB check if Unicode and decode strings */
-        if (smb_buf->Flags2 & SMBFLG2_UNICODE)
+        if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
-                rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining,
+                /* unicode string area must be word-aligned */
-                                                   ses, nls_cp);
+                if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
-        else
+                        ++bcc_ptr;
+                        --bytes_remaining;
+                }
+                decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp);
+        } else {
                rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining,
                                         ses, nls_cp);
+        }
 ssetup_exit:
        if (spnego_key) {
@@ -721,5 +938,9 @@ ssetup_exit:
        } else if (resp_buf_type == CIFS_LARGE_BUFFER)
                cifs_buf_release(iov[0].iov_base);
+        /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */
+        if ((phase == NtLmChallenge) && (rc == 0))
+                goto ssetup_ntlmssp_authenticate;
        return rc;
 }
diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h
index 7f50e8577c1..c5084d27db7 100644
--- a/fs/cifs/smberr.h
+++ b/fs/cifs/smberr.h
@@ -110,6 +110,7 @@
 /* Below errors are used internally (do not come over the wire) for passthrough
   from STATUS codes to POSIX only  */
+#define ERRsymlink              0xFFFD
 #define ErrTooManyLinks         0xFFFE
 /* Following error codes may be generated with the ERRSRV error class.*/
diff --git a/fs/compat.c b/fs/compat.c
index 3f84d5f1588..681ed81e6be 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -181,22 +181,24 @@ asmlinkage long compat_sys_newstat(char __user * filename,
                struct compat_stat __user *statbuf)
 {
        struct kstat stat;
-        int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
+        int error;
-        if (!error)
+        error = vfs_stat(filename, &stat);
-                error = cp_compat_stat(&stat, statbuf);
+        if (error)
-        return error;
+                return error;
+        return cp_compat_stat(&stat, statbuf);
 }
 asmlinkage long compat_sys_newlstat(char __user * filename,
                struct compat_stat __user *statbuf)
 {
        struct kstat stat;
-        int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
+        int error;
-        if (!error)
+        error = vfs_lstat(filename, &stat);
-                error = cp_compat_stat(&stat, statbuf);
+        if (error)
-        return error;
+                return error;
+        return cp_compat_stat(&stat, statbuf);
 }
 #ifndef __ARCH_WANT_STAT64
@@ -204,21 +206,12 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename,
                struct compat_stat __user *statbuf, int flag)
 {
        struct kstat stat;
-        int error = -EINVAL;
+        int error;
-        if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
-                goto out;
-        if (flag & AT_SYMLINK_NOFOLLOW)
-                error = vfs_lstat_fd(dfd, filename, &stat);
-        else
-                error = vfs_stat_fd(dfd, filename, &stat);
-        if (!error)
-                error = cp_compat_stat(&stat, statbuf);
-out:
+        error = vfs_fstatat(dfd, filename, &stat, flag);
-        return error;
+        if (error)
+                return error;
+        return cp_compat_stat(&stat, statbuf);
 }
 #endif
@@ -1483,6 +1476,7 @@ int compat_do_execve(char * filename,
        struct linux_binprm *bprm;
        struct file *file;
        struct files_struct *displaced;
+        bool clear_in_exec;
        int retval;
        retval = unshare_files(&displaced);
@@ -1505,8 +1499,9 @@ int compat_do_execve(char * filename,
                goto out_unlock;
        retval = check_unsafe_exec(bprm);
-        if (retval)
+        if (retval < 0)
                goto out_unlock;
+        clear_in_exec = retval;
        file = open_exec(filename);
        retval = PTR_ERR(file);
@@ -1553,9 +1548,7 @@ int compat_do_execve(char * filename,
                goto out;
        /* execve succeeded */
-        write_lock(&current->fs->lock);
        current->fs->in_exec = 0;
-        write_unlock(&current->fs->lock);
        current->in_execve = 0;
        mutex_unlock(&current->cred_exec_mutex);
        acct_update_integrals(current);
@@ -1575,9 +1568,8 @@ out_file:
        }
 out_unmark:
-        write_lock(&current->fs->lock);
+        if (clear_in_exec)
-        current->fs->in_exec = 0;
+                current->fs->in_exec = 0;
-        write_unlock(&current->fs->lock);
 out_unlock:
        current->in_execve = 0;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 3e87ce443ea..b83f6bcfa51 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -58,7 +58,6 @@
 #include <linux/i2c.h>
 #include <linux/i2c-dev.h>
 #include <linux/atalk.h>
-#include <linux/loop.h>
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci.h>
@@ -68,6 +67,7 @@
 #include <linux/gigaset_dev.h>
 #ifdef CONFIG_BLOCK
+#include <linux/loop.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_ioctl.h>
 #include <scsi/sg.h>
@@ -2660,6 +2660,8 @@ HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
 HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
 /* block stuff */
 #ifdef CONFIG_BLOCK
+/* loop */
+IGNORE_IOCTL(LOOP_CLR_FD)
 /* Raw devices */
 HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
 HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
@@ -2728,9 +2730,6 @@ HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
 IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32)
 IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32)
-/* loop */
-IGNORE_IOCTL(LOOP_CLR_FD)
 #ifdef CONFIG_SPARC
 /* Sparc framebuffers, handled in sbusfb_compat_ioctl() */
 IGNORE_IOCTL(FBIOGTYPE)
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 932a92b3148..c8afa6b1d91 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -135,7 +135,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
        struct path path;
        struct configfs_dirent *sd;
        struct config_item *parent_item;
-        struct config_item *target_item;
+        struct config_item *target_item = NULL;
        struct config_item_type *type;
        ret = -EPERM;  /* What lack-of-symlink returns */
diff --git a/fs/dcache.c b/fs/dcache.c
index 761d30be268..75659a6fd1f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -481,7 +481,7 @@ restart:
                        if ((flags & DCACHE_REFERENCED)
                                && (dentry->d_flags & DCACHE_REFERENCED)) {
                                dentry->d_flags &= ~DCACHE_REFERENCED;
-                                list_move_tail(&dentry->d_lru, &referenced);
+                                list_move(&dentry->d_lru, &referenced);
                                spin_unlock(&dentry->d_lock);
                        } else {
                                list_move_tail(&dentry->d_lru, &tmp);
@@ -2149,7 +2149,6 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
        int result;
        unsigned long seq;
-        /* FIXME: This is old behavior, needed? Please check callers. */
        if (new_dentry == old_dentry)
                return 1;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 63a4a59e414..c68edb96944 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -90,6 +90,15 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode)
 #define PARSE_MOUNT     0
 #define PARSE_REMOUNT   1
+/*
+ * parse_mount_options():
+ *      Set @opts to mount options specified in @data. If an option is not
+ *      specified in @data, set it to its default value. The exception is
+ *      'newinstance' option which can only be set/cleared on a mount (i.e.
+ *      cannot be changed during remount).
+ *
+ * Note: @data may be NULL (in which case all options are set to default).
+ */
 static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
 {
        char *p;
@@ -355,12 +364,9 @@ static int devpts_get_sb(struct file_system_type *fs_type,
        struct pts_mount_opts opts;
        struct super_block *s;
-        memset(&opts, 0, sizeof(opts));
+        error = parse_mount_options(data, PARSE_MOUNT, &opts);
-        if (data) {
+        if (error)
-                error = parse_mount_options(data, PARSE_MOUNT, &opts);
+                return error;
-                if (error)
-                        return error;
-        }
        if (opts.newinstance)
                s = sget(fs_type, NULL, set_anon_super, NULL);
@@ -389,11 +395,10 @@ static int devpts_get_sb(struct file_system_type *fs_type,
        return 0;
 out_dput:
-        dput(s->s_root);
+        dput(s->s_root); /* undo dget() in simple_set_mnt() */
 out_undo_sget:
-        up_write(&s->s_umount);
+        deactivate_locked_super(s);
-        deactivate_super(s);
        return error;
 }
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 8b65f289ee0..b91851f1cda 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -483,15 +483,7 @@ int ecryptfs_encrypt_page(struct page *page)
        ecryptfs_inode = page->mapping->host;
        crypt_stat =
                &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
-        if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
+        BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
-                rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page,
-                                                       0, PAGE_CACHE_SIZE);
-                if (rc)
-                        printk(KERN_ERR "%s: Error attempting to copy "
-                               "page at index [%ld]\n", __func__,
-                               page->index);
-                goto out;
-        }
        enc_extent_page = alloc_page(GFP_USER);
        if (!enc_extent_page) {
                rc = -ENOMEM;
@@ -620,16 +612,7 @@ int ecryptfs_decrypt_page(struct page *page)
        ecryptfs_inode = page->mapping->host;
        crypt_stat =
                &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
-        if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
+        BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
-                rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
-                                                      PAGE_CACHE_SIZE,
-                                                      ecryptfs_inode);
-                if (rc)
-                        printk(KERN_ERR "%s: Error attempting to copy "
-                               "page at index [%ld]\n", __func__,
-                               page->index);
-                goto out;
-        }
        enc_extent_page = alloc_page(GFP_USER);
        if (!enc_extent_page) {
                rc = -ENOMEM;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 064c5820e4e..00b30a2d546 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -269,6 +269,7 @@ struct ecryptfs_crypt_stat {
 #define ECRYPTFS_ENCRYPT_FILENAMES    0x00000800
 #define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000
 #define ECRYPTFS_ENCFN_USE_FEK        0x00002000
+#define ECRYPTFS_UNLINK_SIGS          0x00004000
        u32 flags;
        unsigned int file_version;
        size_t iv_bytes;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 55b3145b807..2f0945d6329 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -379,9 +379,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                goto out_d_drop;
        }
        lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
+        mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
        lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
                                      lower_dir_dentry,
                                      ecryptfs_dentry->d_name.len);
+        mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
        if (IS_ERR(lower_dentry)) {
                rc = PTR_ERR(lower_dentry);
                printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
@@ -406,9 +408,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                       "filename; rc = [%d]\n", __func__, rc);
                goto out_d_drop;
        }
+        mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
        lower_dentry = lookup_one_len(encrypted_and_encoded_name,
                                      lower_dir_dentry,
                                      encrypted_and_encoded_name_size - 1);
+        mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
        if (IS_ERR(lower_dentry)) {
                rc = PTR_ERR(lower_dentry);
                printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
@@ -636,8 +640,9 @@ static int
 ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
 {
        char *lower_buf;
+        size_t lower_bufsiz;
        struct dentry *lower_dentry;
-        struct ecryptfs_crypt_stat *crypt_stat;
+        struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
        char *plaintext_name;
        size_t plaintext_name_size;
        mm_segment_t old_fs;
@@ -648,12 +653,21 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
                rc = -EINVAL;
                goto out;
        }
-        crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
+        mount_crypt_stat = &ecryptfs_superblock_to_private(
+                                                dentry->d_sb)->mount_crypt_stat;
+        /*
+         * If the lower filename is encrypted, it will result in a significantly
+         * longer name.  If needed, truncate the name after decode and decrypt.
+         */
+        if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
+                lower_bufsiz = PATH_MAX;
+        else
+                lower_bufsiz = bufsiz;
        /* Released in this function */
-        lower_buf = kmalloc(bufsiz, GFP_KERNEL);
+        lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL);
        if (lower_buf == NULL) {
                printk(KERN_ERR "%s: Out of memory whilst attempting to "
-                       "kmalloc [%d] bytes\n", __func__, bufsiz);
+                       "kmalloc [%zd] bytes\n", __func__, lower_bufsiz);
                rc = -ENOMEM;
                goto out;
        }
@@ -661,7 +675,7 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
        set_fs(get_ds());
        rc = lower_dentry->d_inode->i_op->readlink(lower_dentry,
                                                   (char __user *)lower_buf,
-                                                   bufsiz);
+                                                   lower_bufsiz);
        set_fs(old_fs);
        if (rc >= 0) {
                rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name,
@@ -674,7 +688,9 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
                                rc);
                        goto out_free_lower_buf;
                }
-                rc = copy_to_user(buf, plaintext_name, plaintext_name_size);
+                /* Check for bufsiz <= 0 done in sys_readlinkat() */
+                rc = copy_to_user(buf, plaintext_name,
+                                  min((size_t) bufsiz, plaintext_name_size));
                if (rc)
                        rc = -EFAULT;
                else
@@ -814,6 +830,13 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
                size_t num_zeros = (PAGE_CACHE_SIZE
                                    - (new_length & ~PAGE_CACHE_MASK));
+                if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
+                        rc = vmtruncate(inode, new_length);
+                        if (rc)
+                                goto out_free;
+                        rc = vmtruncate(lower_dentry->d_inode, new_length);
+                        goto out_free;
+                }
                if (num_zeros) {
                        char *zeros_virt;
@@ -915,8 +938,6 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
                        }
                        rc = 0;
                        crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
-                        mutex_unlock(&crypt_stat->cs_mutex);
-                        goto out;
                }
        }
        mutex_unlock(&crypt_stat->cs_mutex);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index aed56c25539..9f0aa9883c2 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -190,14 +190,14 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
                init_special_inode(inode, lower_inode->i_mode,
                                   lower_inode->i_rdev);
        dentry->d_op = &ecryptfs_dops;
-        if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
-                d_add(dentry, inode);
-        else
-                d_instantiate(dentry, inode);
        fsstack_copy_attr_all(inode, lower_inode, NULL);
        /* This size will be overwritten for real files w/ headers and
         * other metadata */
        fsstack_copy_inode_size(inode, lower_inode);
+        if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
+                d_add(dentry, inode);
+        else
+                d_instantiate(dentry, inode);
 out:
        return rc;
 }
@@ -208,7 +208,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
       ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
       ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
       ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
-       ecryptfs_opt_err };
+       ecryptfs_opt_unlink_sigs, ecryptfs_opt_err };
 static const match_table_t tokens = {
        {ecryptfs_opt_sig, "sig=%s"},
@@ -222,6 +222,7 @@ static const match_table_t tokens = {
        {ecryptfs_opt_fnek_sig, "ecryptfs_fnek_sig=%s"},
        {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"},
        {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
+        {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
        {ecryptfs_opt_err, NULL}
 };
@@ -402,6 +403,9 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
                                fn_cipher_key_bytes;
                        fn_cipher_key_bytes_set = 1;
                        break;
+                case ecryptfs_opt_unlink_sigs:
+                        mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS;
+                        break;
                case ecryptfs_opt_err:
                default:
                        printk(KERN_WARNING
@@ -610,9 +614,8 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
        }
        goto out;
 out_abort:
-        dput(sb->s_root);
+        dput(sb->s_root); /* aka mnt->mnt_root, as set by get_sb_nodev() */
-        up_write(&sb->s_umount);
+        deactivate_locked_super(sb);
-        deactivate_super(sb);
 out:
        return rc;
 }
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 295e7fa5675..f1c17e87c5f 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -133,45 +133,6 @@ out:
        return rc;
 }
-static int
-ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type,
-                             struct ecryptfs_msg_ctx **msg_ctx);
-/**
- * ecryptfs_send_raw_message
- * @msg_type: Message type
- * @daemon: Daemon struct for recipient of message
- *
- * A raw message is one that does not include an ecryptfs_message
- * struct. It simply has a type.
- *
- * Must be called with ecryptfs_daemon_hash_mux held.
- *
- * Returns zero on success; non-zero otherwise
- */
-static int ecryptfs_send_raw_message(u8 msg_type,
-                                     struct ecryptfs_daemon *daemon)
-{
-        struct ecryptfs_msg_ctx *msg_ctx;
-        int rc;
-        rc = ecryptfs_send_message_locked(NULL, 0, msg_type, &msg_ctx);
-        if (rc) {
-                printk(KERN_ERR "%s: Error whilst attempting to send "
-                       "message to ecryptfsd; rc = [%d]\n", __func__, rc);
-                goto out;
-        }
-        /* Raw messages are logically context-free (e.g., no
-         * reply is expected), so we set the state of the
-         * ecryptfs_msg_ctx object to indicate that it should
-         * be freed as soon as the message is sent. */
-        mutex_lock(&msg_ctx->mux);
-        msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY;
-        mutex_unlock(&msg_ctx->mux);
-out:
-        return rc;
-}
 /**
 * ecryptfs_spawn_daemon - Create and initialize a new daemon struct
 * @daemon: Pointer to set to newly allocated daemon struct
@@ -212,49 +173,6 @@ out:
 }
 /**
- * ecryptfs_process_helo
- * @euid: The user ID owner of the message
- * @user_ns: The namespace in which @euid applies
- * @pid: The process ID for the userspace program that sent the
- *       message
- *
- * Adds the euid and pid values to the daemon euid hash.  If an euid
- * already has a daemon pid registered, the daemon will be
- * unregistered before the new daemon is put into the hash list.
- * Returns zero after adding a new daemon to the hash list;
- * non-zero otherwise.
- */
-int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns,
-                          struct pid *pid)
-{
-        struct ecryptfs_daemon *new_daemon;
-        struct ecryptfs_daemon *old_daemon;
-        int rc;
-        mutex_lock(&ecryptfs_daemon_hash_mux);
-        rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns);
-        if (rc != 0) {
-                printk(KERN_WARNING "Received request from user [%d] "
-                       "to register daemon [0x%p]; unregistering daemon "
-                       "[0x%p]\n", euid, pid, old_daemon->pid);
-                rc = ecryptfs_send_raw_message(ECRYPTFS_MSG_QUIT, old_daemon);
-                if (rc)
-                        printk(KERN_WARNING "Failed to send QUIT "
-                               "message to daemon [0x%p]; rc = [%d]\n",
-                               old_daemon->pid, rc);
-                hlist_del(&old_daemon->euid_chain);
-                kfree(old_daemon);
-        }
-        rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid);
-        if (rc)
-                printk(KERN_ERR "%s: The gods are displeased with this attempt "
-                       "to create a new daemon object for euid [%d]; pid "
-                       "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc);
-        mutex_unlock(&ecryptfs_daemon_hash_mux);
-        return rc;
-}
-/**
 * ecryptfs_exorcise_daemon - Destroy the daemon struct
 *
 * Must be called ceremoniously while in possession of
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index a67fea655f4..4ec8f61ccf5 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -193,26 +193,20 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
        int rc = 0;
        mutex_lock(&msg_ctx->mux);
-        if (data) {
+        msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
-                msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
+                               GFP_KERNEL);
-                                       GFP_KERNEL);
+        if (!msg_ctx->msg) {
-                if (!msg_ctx->msg) {
+                rc = -ENOMEM;
-                        rc = -ENOMEM;
+                printk(KERN_ERR "%s: Out of memory whilst attempting "
-                        printk(KERN_ERR "%s: Out of memory whilst attempting "
+                       "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
-                               "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
+                       (sizeof(*msg_ctx->msg) + data_size));
-                               (sizeof(*msg_ctx->msg) + data_size));
+                goto out_unlock;
-                        goto out_unlock;
+        }
-                }
-        } else
-                msg_ctx->msg = NULL;
        msg_ctx->msg->index = msg_ctx->index;
        msg_ctx->msg->data_len = data_size;
        msg_ctx->type = msg_type;
-        if (data) {
+        memcpy(msg_ctx->msg->data, data, data_size);
-                memcpy(msg_ctx->msg->data, data, data_size);
+        msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
-                msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
-        } else
-                msg_ctx->msg_size = 0;
        mutex_lock(&daemon->mux);
        list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
        daemon->num_queued_msg_ctx++;
@@ -418,18 +412,13 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
        if (count == 0)
                goto out;
-        data = kmalloc(count, GFP_KERNEL);
-        if (!data) {
+        data = memdup_user(buf, count);
-                printk(KERN_ERR "%s: Out of memory whilst attempting to "
+        if (IS_ERR(data)) {
-                       "kmalloc([%zd], GFP_KERNEL)\n", __func__, count);
+                printk(KERN_ERR "%s: memdup_user returned error [%ld]\n",
+                       __func__, PTR_ERR(data));
                goto out;
        }
-        rc = copy_from_user(data, buf, count);
-        if (rc) {
-                printk(KERN_ERR "%s: copy_from_user returned error [%d]\n",
-                       __func__, rc);
-                goto out_free;
-        }
        sz = count;
        i = 0;
        switch (data[i++]) {
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 46cec2b6979..5c6bab9786e 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -449,6 +449,7 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode)
        struct ecryptfs_crypt_stat *crypt_stat;
        crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
+        BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
        if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
                return ecryptfs_write_inode_size_to_xattr(ecryptfs_inode);
        else
@@ -490,6 +491,16 @@ static int ecryptfs_write_end(struct file *file,
                ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
        ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
                        "(page w/ index = [0x%.16x], to = [%d])\n", index, to);
+        if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
+                rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0,
+                                                       to);
+                if (!rc) {
+                        rc = copied;
+                        fsstack_copy_inode_size(ecryptfs_inode,
+                                ecryptfs_inode_to_lower(ecryptfs_inode));
+                }
+                goto out;
+        }
        /* Fills in zeros if 'to' goes beyond inode size */
        rc = fill_zeros_to_end_of_page(page, to);
        if (rc) {
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 75c2ea9fee3..a137c6ea2fe 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -117,13 +117,15 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
                   size_t size)
 {
        struct page *ecryptfs_page;
+        struct ecryptfs_crypt_stat *crypt_stat;
+        struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode;
        char *ecryptfs_page_virt;
-        loff_t ecryptfs_file_size =
+        loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode);
-                i_size_read(ecryptfs_file->f_dentry->d_inode);
        loff_t data_offset = 0;
        loff_t pos;
        int rc = 0;
+        crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
        /*
         * if we are writing beyond current size, then start pos
         * at the current size - we'll fill in zeros from there.
@@ -184,7 +186,13 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
                flush_dcache_page(ecryptfs_page);
                SetPageUptodate(ecryptfs_page);
                unlock_page(ecryptfs_page);
-                rc = ecryptfs_encrypt_page(ecryptfs_page);
+                if (crypt_stat->flags & ECRYPTFS_ENCRYPTED)
+                        rc = ecryptfs_encrypt_page(ecryptfs_page);
+                else
+                        rc = ecryptfs_write_lower_page_segment(ecryptfs_inode,
+                                                ecryptfs_page,
+                                                start_offset_in_page,
+                                                data_offset);
                page_cache_release(ecryptfs_page);
                if (rc) {
                        printk(KERN_ERR "%s: Error encrypting "
@@ -194,14 +202,16 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
                pos += num_bytes;
        }
        if ((offset + size) > ecryptfs_file_size) {
-                i_size_write(ecryptfs_file->f_dentry->d_inode, (offset + size));
+                i_size_write(ecryptfs_inode, (offset + size));
-                rc = ecryptfs_write_inode_size_to_metadata(
+                if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) {
-                        ecryptfs_file->f_dentry->d_inode);
+                        rc = ecryptfs_write_inode_size_to_metadata(
-                if (rc) {
+                                                                ecryptfs_inode);
-                        printk(KERN_ERR "Problem with "
+                        if (rc) {
-                               "ecryptfs_write_inode_size_to_metadata; "
+                                printk(KERN_ERR "Problem with "
-                               "rc = [%d]\n", rc);
+                                       "ecryptfs_write_inode_size_to_metadata; "
-                        goto out;
+                                       "rc = [%d]\n", rc);
+                                goto out;
+                        }
                }
        }
 out:
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index c27ac2b358a..fa4c7e7d15d 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -170,7 +170,10 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
        list_for_each_entry(walker,
                            &mount_crypt_stat->global_auth_tok_list,
                            mount_crypt_stat_list) {
-                seq_printf(m, ",ecryptfs_sig=%s", walker->sig);
+                if (walker->flags & ECRYPTFS_AUTH_TOK_FNEK)
+                        seq_printf(m, ",ecryptfs_fnek_sig=%s", walker->sig);
+                else
+                        seq_printf(m, ",ecryptfs_sig=%s", walker->sig);
        }
        mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
@@ -186,6 +189,8 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
                seq_printf(m, ",ecryptfs_xattr_metadata");
        if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
                seq_printf(m, ",ecryptfs_encrypted_view");
+        if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS)
+                seq_printf(m, ",ecryptfs_unlink_sigs");
        return 0;
 }
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a89f370fadb..5458e80fc55 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1212,7 +1212,7 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
 SYSCALL_DEFINE1(epoll_create, int, size)
 {
-        if (size < 0)
+        if (size <= 0)
                return -EINVAL;
        return sys_epoll_create1(0);
diff --git a/fs/exec.c b/fs/exec.c
index 052a961e41a..895823d0149 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -69,17 +69,18 @@ int suid_dumpable = 0;
 static LIST_HEAD(formats);
 static DEFINE_RWLOCK(binfmt_lock);
-int register_binfmt(struct linux_binfmt * fmt)
+int __register_binfmt(struct linux_binfmt * fmt, int insert)
 {
        if (!fmt)
                return -EINVAL;
        write_lock(&binfmt_lock);
-        list_add(&fmt->lh, &formats);
+        insert ? list_add(&fmt->lh, &formats) :
+                 list_add_tail(&fmt->lh, &formats);
        write_unlock(&binfmt_lock);
        return 0;       
 }
-EXPORT_SYMBOL(register_binfmt);
+EXPORT_SYMBOL(__register_binfmt);
 void unregister_binfmt(struct linux_binfmt * fmt)
 {
@@ -104,40 +105,28 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
 SYSCALL_DEFINE1(uselib, const char __user *, library)
 {
        struct file *file;
-        struct nameidata nd;
        char *tmp = getname(library);
        int error = PTR_ERR(tmp);
-        if (!IS_ERR(tmp)) {
+        if (IS_ERR(tmp))
-                error = path_lookup_open(AT_FDCWD, tmp,
+                goto out;
-                                         LOOKUP_FOLLOW, &nd,
-                                         FMODE_READ|FMODE_EXEC);
+        file = do_filp_open(AT_FDCWD, tmp,
-                putname(tmp);
+                                O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0,
-        }
+                                MAY_READ | MAY_EXEC | MAY_OPEN);
-        if (error)
+        putname(tmp);
+        error = PTR_ERR(file);
+        if (IS_ERR(file))
                goto out;
        error = -EINVAL;
-        if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
+        if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
                goto exit;
        error = -EACCES;
-        if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
+        if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
                goto exit;
-        error = inode_permission(nd.path.dentry->d_inode,
-                                 MAY_READ | MAY_EXEC | MAY_OPEN);
-        if (error)
-                goto exit;
-        error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN);
-        if (error)
-                goto exit;
-        file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
-        error = PTR_ERR(file);
-        if (IS_ERR(file))
-                goto out;
        fsnotify_open(file->f_path.dentry);
        error = -ENOEXEC;
@@ -159,13 +148,10 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
                }
                read_unlock(&binfmt_lock);
        }
+exit:
        fput(file);
 out:
        return error;
-exit:
-        release_open_intent(&nd);
-        path_put(&nd.path);
-        goto out;
 }
 #ifdef CONFIG_MMU
@@ -660,47 +646,33 @@ EXPORT_SYMBOL(setup_arg_pages);
 struct file *open_exec(const char *name)
 {
-        struct nameidata nd;
        struct file *file;
        int err;
-        err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd,
+        file = do_filp_open(AT_FDCWD, name,
-                                FMODE_READ|FMODE_EXEC);
+                                O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0,
-        if (err)
+                                MAY_EXEC | MAY_OPEN);
+        if (IS_ERR(file))
                goto out;
        err = -EACCES;
-        if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
+        if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
-                goto out_path_put;
+                goto exit;
-        if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
-                goto out_path_put;
-        err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN);
-        if (err)
-                goto out_path_put;
-        err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN);
-        if (err)
-                goto out_path_put;
-        file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
+        if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
-        if (IS_ERR(file))
+                goto exit;
-                return file;
        fsnotify_open(file->f_path.dentry);
        err = deny_write_access(file);
-        if (err) {
+        if (err)
-                fput(file);
+                goto exit;
-                goto out;
-        }
+out:
        return file;
- out_path_put:
+exit:
-        release_open_intent(&nd);
+        fput(file);
-        path_put(&nd.path);
- out:
        return ERR_PTR(err);
 }
 EXPORT_SYMBOL(open_exec);
@@ -1060,7 +1032,6 @@ EXPORT_SYMBOL(install_exec_creds);
 int check_unsafe_exec(struct linux_binprm *bprm)
 {
        struct task_struct *p = current, *t;
-        unsigned long flags;
        unsigned n_fs;
        int res = 0;
@@ -1068,21 +1039,22 @@ int check_unsafe_exec(struct linux_binprm *bprm)
        n_fs = 1;
        write_lock(&p->fs->lock);
-        lock_task_sighand(p, &flags);
+        rcu_read_lock();
        for (t = next_thread(p); t != p; t = next_thread(t)) {
                if (t->fs == p->fs)
                        n_fs++;
        }
+        rcu_read_unlock();
        if (p->fs->users > n_fs) {
                bprm->unsafe |= LSM_UNSAFE_SHARE;
        } else {
-                if (p->fs->in_exec)
+                res = -EAGAIN;
-                        res = -EAGAIN;
+                if (!p->fs->in_exec) {
-                p->fs->in_exec = 1;
+                        p->fs->in_exec = 1;
+                        res = 1;
+                }
        }
-        unlock_task_sighand(p, &flags);
        write_unlock(&p->fs->lock);
        return res;
@@ -1284,6 +1256,7 @@ int do_execve(char * filename,
        struct linux_binprm *bprm;
        struct file *file;
        struct files_struct *displaced;
+        bool clear_in_exec;
        int retval;
        retval = unshare_files(&displaced);
@@ -1306,8 +1279,9 @@ int do_execve(char * filename,
                goto out_unlock;
        retval = check_unsafe_exec(bprm);
-        if (retval)
+        if (retval < 0)
                goto out_unlock;
+        clear_in_exec = retval;
        file = open_exec(filename);
        retval = PTR_ERR(file);
@@ -1355,9 +1329,7 @@ int do_execve(char * filename,
                goto out;
        /* execve succeeded */
-        write_lock(&current->fs->lock);
        current->fs->in_exec = 0;
-        write_unlock(&current->fs->lock);
        current->in_execve = 0;
        mutex_unlock(&current->cred_exec_mutex);
        acct_update_integrals(current);
@@ -1377,9 +1349,8 @@ out_file:
        }
 out_unmark:
-        write_lock(&current->fs->lock);
+        if (clear_in_exec)
-        current->fs->in_exec = 0;
+                current->fs->in_exec = 0;
-        write_unlock(&current->fs->lock);
 out_unlock:
        current->in_execve = 0;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f983225266d..5c4afe65224 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1395,8 +1395,10 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
                blk++;
        }
 out:
-        if (len == towrite)
+        if (len == towrite) {
+                mutex_unlock(&inode->i_mutex);
                return err;
+        }
        if (inode->i_size < off+len-towrite)
                i_size_write(inode, off+len-towrite);
        inode->i_version++;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2a1cb097976..e3a55eb8b26 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -326,11 +326,14 @@ ext4_ext_max_entries(struct inode *inode, int depth)
 static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
 {
-        ext4_fsblk_t block = ext_pblock(ext);
+        ext4_fsblk_t block = ext_pblock(ext), valid_block;
        int len = ext4_ext_get_actual_len(ext);
        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
-        if (unlikely(block < le32_to_cpu(es->s_first_data_block) ||
-                        ((block + len) > ext4_blocks_count(es))))
+        valid_block = le32_to_cpu(es->s_first_data_block) +
+                EXT4_SB(inode->i_sb)->s_gdb_count;
+        if (unlikely(block <= valid_block ||
+                     ((block + len) > ext4_blocks_count(es))))
                return 0;
        else
                return 1;
@@ -339,10 +342,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
 static int ext4_valid_extent_idx(struct inode *inode,
                                struct ext4_extent_idx *ext_idx)
 {
-        ext4_fsblk_t block = idx_pblock(ext_idx);
+        ext4_fsblk_t block = idx_pblock(ext_idx), valid_block;
        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
-        if (unlikely(block < le32_to_cpu(es->s_first_data_block) ||
-                        (block >= ext4_blocks_count(es))))
+        valid_block = le32_to_cpu(es->s_first_data_block) +
+                EXT4_SB(inode->i_sb)->s_gdb_count;
+        if (unlikely(block <= valid_block ||
+                     (block >= ext4_blocks_count(es))))
                return 0;
        else
                return 1;
@@ -1835,11 +1841,13 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
 {
        struct ext4_ext_cache *cex;
        BUG_ON(len == 0);
+        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
        cex = &EXT4_I(inode)->i_cached_extent;
        cex->ec_type = type;
        cex->ec_block = block;
        cex->ec_len = len;
        cex->ec_start = start;
+        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 }
 /*
@@ -1896,12 +1904,17 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
                        struct ext4_extent *ex)
 {
        struct ext4_ext_cache *cex;
+        int ret = EXT4_EXT_CACHE_NO;
+        /* 
+         * We borrow i_block_reservation_lock to protect i_cached_extent
+         */
+        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
        cex = &EXT4_I(inode)->i_cached_extent;
        /* has cache valid data? */
        if (cex->ec_type == EXT4_EXT_CACHE_NO)
-                return EXT4_EXT_CACHE_NO;
+                goto errout;
        BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
                        cex->ec_type != EXT4_EXT_CACHE_EXTENT);
@@ -1912,11 +1925,11 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
                ext_debug("%u cached by %u:%u:%llu\n",
                                block,
                                cex->ec_block, cex->ec_len, cex->ec_start);
-                return cex->ec_type;
+                ret = cex->ec_type;
        }
+errout:
-        /* not in cache */
+        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-        return EXT4_EXT_CACHE_NO;
+        return ret;
 }
 /*
@@ -2869,6 +2882,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                                if (allocated > max_blocks)
                                        allocated = max_blocks;
                                set_buffer_unwritten(bh_result);
+                                bh_result->b_bdev = inode->i_sb->s_bdev;
+                                bh_result->b_blocknr = newblock;
                                goto out2;
                        }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 47b84e8df56..f18e0a08a6b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -585,6 +585,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 fallback:
        ngroups = sbi->s_groups_count;
        avefreei = freei / ngroups;
+fallback_retry:
        parent_group = EXT4_I(parent)->i_block_group;
        for (i = 0; i < ngroups; i++) {
                grp = (parent_group + i) % ngroups;
@@ -602,7 +603,7 @@ fallback:
                 * filesystems the above test can fail to find any blockgroups
                 */
                avefreei = 0;
-                goto fallback;
+                goto fallback_retry;
        }
        return -1;
@@ -831,11 +832,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
                ret2 = find_group_flex(sb, dir, &group);
                if (ret2 == -1) {
                        ret2 = find_group_other(sb, dir, &group, mode);
-                        if (ret2 == 0 && once)
+                        if (ret2 == 0 && once) {
                                once = 0;
                                printk(KERN_NOTICE "ext4: find_group_flex "
                                       "failed, fallback succeeded dir %lu\n",
                                       dir->i_ino);
+                        }
                }
                goto got_group;
        }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c6bd6ced3bb..2a9ffd528dd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1149,6 +1149,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
        int retval;
        clear_buffer_mapped(bh);
+        clear_buffer_unwritten(bh);
        /*
         * Try to see if we can get  the block without requesting
@@ -1179,6 +1180,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
                return retval;
        /*
+         * When we call get_blocks without the create flag, the
+         * BH_Unwritten flag could have gotten set if the blocks
+         * requested were part of a uninitialized extent.  We need to
+         * clear this flag now that we are committed to convert all or
+         * part of the uninitialized extent to be an initialized
+         * extent.  This is because we need to avoid the combination
+         * of BH_Unwritten and BH_Mapped flags being simultaneously
+         * set on the buffer_head.
+         */
+        clear_buffer_unwritten(bh);
+        /*
         * New blocks allocate and/or writing to uninitialized extent
         * will possibly result in updating i_data, so we take
         * the write lock of i_data_sem, and call get_blocks()
@@ -2297,6 +2310,10 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
                                  struct buffer_head *bh_result, int create)
 {
        int ret = 0;
+        sector_t invalid_block = ~((sector_t) 0xffff);
+        if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
+                invalid_block = ~0;
        BUG_ON(create == 0);
        BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
@@ -2318,11 +2335,18 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
                        /* not enough space to reserve */
                        return ret;
-                map_bh(bh_result, inode->i_sb, 0);
+                map_bh(bh_result, inode->i_sb, invalid_block);
                set_buffer_new(bh_result);
                set_buffer_delay(bh_result);
        } else if (ret > 0) {
                bh_result->b_size = (ret << inode->i_blkbits);
+                /*
+                 * With sub-block writes into unwritten extents
+                 * we also need to mark the buffer as new so that
+                 * the unwritten parts of the buffer gets correctly zeroed.
+                 */
+                if (buffer_unwritten(bh_result))
+                        set_buffer_new(bh_result);
                ret = 0;
        }
@@ -4357,11 +4381,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        ei->i_flags = le32_to_cpu(raw_inode->i_flags);
        inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
        ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
-        if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
-            cpu_to_le32(EXT4_OS_HURD)) {
                ei->i_file_acl |=
                        ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
-        }
        inode->i_size = ext4_isize(raw_inode);
        ei->i_disksize = inode->i_size;
        inode->i_generation = le32_to_cpu(raw_inode->i_generation);
@@ -4409,9 +4431,23 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                        (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
        }
-        if (ei->i_flags & EXT4_EXTENTS_FL) {
+        ret = 0;
-                /* Validate extent which is part of inode */
+        if (ei->i_file_acl &&
-                ret = ext4_ext_check_inode(inode);
+            ((ei->i_file_acl < 
+              (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
+               EXT4_SB(sb)->s_gdb_count)) ||
+             (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
+                ext4_error(sb, __func__,
+                           "bad extended attribute block %llu in inode #%lu",
+                           ei->i_file_acl, inode->i_ino);
+                ret = -EIO;
+                goto bad_inode;
+        } else if (ei->i_flags & EXT4_EXTENTS_FL) {
+                if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+                    (S_ISLNK(inode->i_mode) &&
+                     !ext4_inode_is_fast_symlink(inode)))
+                        /* Validate extent which is part of inode */
+                        ret = ext4_ext_check_inode(inode);
        } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
                   (S_ISLNK(inode->i_mode) &&
                    !ext4_inode_is_fast_symlink(inode))) {
diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig
index d0a69ff2537..182f9ffe2b5 100644
--- a/fs/fat/Kconfig
+++ b/fs/fat/Kconfig
@@ -95,3 +95,6 @@ config FAT_DEFAULT_IOCHARSET
          Note that "utf8" is not recommended for FAT filesystems.
          If unsure, you shouldn't set "utf8" here.
          See <file:Documentation/filesystems/vfat.txt> for more information.
+          Enable any character sets you need in File Systems/Native Language
+          Support.
diff --git a/fs/fcntl.c b/fs/fcntl.c
index cc8e4de2fee..1ad703150de 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -117,11 +117,13 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
 {
        if (unlikely(newfd == oldfd)) { /* corner case */
                struct files_struct *files = current->files;
+                int retval = oldfd;
                rcu_read_lock();
                if (!fcheck_files(files, oldfd))
-                        oldfd = -EBADF;
+                        retval = -EBADF;
                rcu_read_unlock();
-                return oldfd;
+                return retval;
        }
        return sys_dup3(oldfd, newfd, 0);
 }
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 1aa70260e6d..a24c58e181d 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -199,7 +199,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2)
        return retval;
 }
-int get_filesystem_list(char * buf)
+int __init get_filesystem_list(char *buf)
 {
        int len = 0;
        struct file_system_type * tmp;
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index e0cbd16f6dc..1c341304621 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -28,7 +28,7 @@
 #define FSCACHE_MAX_THREADS     32
 /*
- * fsc-cache.c
+ * cache.c
 */
 extern struct list_head fscache_cache_list;
 extern struct rw_semaphore fscache_addremove_sem;
@@ -37,7 +37,7 @@ extern struct fscache_cache *fscache_select_cache_for_object(
        struct fscache_cookie *);
 /*
- * fsc-cookie.c
+ * cookie.c
 */
 extern struct kmem_cache *fscache_cookie_jar;
@@ -45,13 +45,13 @@ extern void fscache_cookie_init_once(void *);
 extern void __fscache_cookie_put(struct fscache_cookie *);
 /*
- * fsc-fsdef.c
+ * fsdef.c
 */
 extern struct fscache_cookie fscache_fsdef_index;
 extern struct fscache_cookie_def fscache_fsdef_netfs_def;
 /*
- * fsc-histogram.c
+ * histogram.c
 */
 #ifdef CONFIG_FSCACHE_HISTOGRAM
 extern atomic_t fscache_obj_instantiate_histogram[HZ];
@@ -75,7 +75,7 @@ extern const struct file_operations fscache_histogram_fops;
 #endif
 /*
- * fsc-main.c
+ * main.c
 */
 extern unsigned fscache_defer_lookup;
 extern unsigned fscache_defer_create;
@@ -86,14 +86,14 @@ extern int fscache_wait_bit(void *);
 extern int fscache_wait_bit_interruptible(void *);
 /*
- * fsc-object.c
+ * object.c
 */
 extern void fscache_withdrawing_object(struct fscache_cache *,
                                       struct fscache_object *);
 extern void fscache_enqueue_object(struct fscache_object *);
 /*
- * fsc-operation.c
+ * operation.c
 */
 extern int fscache_submit_exclusive_op(struct fscache_object *,
                                       struct fscache_operation *);
@@ -104,7 +104,7 @@ extern void fscache_start_operations(struct fscache_object *);
 extern void fscache_operation_gc(struct work_struct *);
 /*
- * fsc-proc.c
+ * proc.c
 */
 #ifdef CONFIG_PROC_FS
 extern int __init fscache_proc_init(void);
@@ -115,7 +115,7 @@ extern void fscache_proc_cleanup(void);
 #endif
 /*
- * fsc-stats.c
+ * stats.c
 */
 #ifdef CONFIG_FSCACHE_STATS
 extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS];
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 459b73dd45e..91f7c85f1ff 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -19,6 +19,7 @@
 #include <linux/random.h>
 #include <linux/sched.h>
 #include <linux/exportfs.h>
+#include <linux/smp_lock.h>
 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
 MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -259,7 +260,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
 static void fuse_umount_begin(struct super_block *sb)
 {
+        lock_kernel();
        fuse_abort_conn(get_fuse_conn_super(sb));
+        unlock_kernel();
 }
 static void fuse_send_destroy(struct fuse_conn *fc)
@@ -908,6 +911,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 err_put_root:
        dput(root_dentry);
 err_put_conn:
+        bdi_destroy(&fc->bdi);
        fuse_conn_put(fc);
 err_fput:
        fput(file);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1afd9f26bcb..ff498109048 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1304,6 +1304,7 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
                                nr--;
                                if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
                                        gfs2_glock_put(gl);
+                                got_ref = 0;
                        }
                        spin_lock(&lru_lock);
                        if (may_demote)
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index bf23a62aa92..70f87f43afa 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -156,6 +156,12 @@ static void inode_go_sync(struct gfs2_glock *gl)
        error = filemap_fdatawait(metamapping);
        mapping_set_error(metamapping, error);
        gfs2_ail_empty_gl(gl);
+        /*
+         * Writeback of the data mapping may cause the dirty flag to be set
+         * so we have to clear it again here.
+         */
+        smp_mb__before_clear_bit();
+        clear_bit(GLF_DIRTY, &gl->gl_flags);
 }
 /**
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 101caf3ee86..5d82e91887e 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -413,7 +413,9 @@ out_unlock:
        gfs2_glock_dq(&gh);
 out:
        gfs2_holder_uninit(&gh);
-        if (ret)
+        if (ret == -ENOMEM)
+                ret = VM_FAULT_OOM;
+        else if (ret)
                ret = VM_FAULT_SIGBUS;
        return ret;
 }
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 650a730707b..1ff9473ea75 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1282,21 +1282,21 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
 static struct super_block *get_gfs2_sb(const char *dev_name)
 {
        struct super_block *sb;
-        struct nameidata nd;
+        struct path path;
        int error;
-        error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
+        error = kern_path(dev_name, LOOKUP_FOLLOW, &path);
        if (error) {
                printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
                       dev_name, error);
                return NULL;
        }
-        sb = nd.path.dentry->d_inode->i_sb;
+        sb = path.dentry->d_inode->i_sb;
        if (sb && (sb->s_type == &gfs2_fs_type))
                atomic_inc(&sb->s_active);
        else
                sb = NULL;
-        path_put(&nd.path);
+        path_put(&path);
        return sb;
 }
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f03d024038e..565038243fa 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -212,8 +212,7 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
        if (tmp == 0)
                return BFITNOENT;
        ptr--;
-        bit = fls64(tmp);
+        bit = __ffs64(tmp);
-        bit--;          /* fls64 always adds one to the bit count */
        bit /= 2;       /* two bits per entry in the bitmap */
        return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
 }
@@ -1445,10 +1444,12 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+        struct buffer_head *dibh;
        struct gfs2_alloc *al = ip->i_alloc;
        struct gfs2_rgrpd *rgd = al->al_rgd;
        u32 goal, blk;
        u64 block;
+        int error;
        if (rgrp_contains_block(rgd, ip->i_goal))
                goal = ip->i_goal - rgd->rd_data0;
@@ -1461,7 +1462,13 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
        rgd->rd_last_alloc = blk;
        block = rgd->rd_data0 + blk;
        ip->i_goal = block;
+        error = gfs2_meta_inode_buffer(ip, &dibh);
+        if (error == 0) {
+                struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
+                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+                di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal);
+                brelse(dibh);
+        }
        gfs2_assert_withdraw(sdp, rgd->rd_free >= *n);
        rgd->rd_free -= *n;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index fecf402d7b8..fc77965be84 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -423,8 +423,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
        if (!(*flags & MS_RDONLY)) mark_dirty(s);
-        kfree(s->s_options);
+        replace_mount_options(s, new_opts);
-        s->s_options = new_opts;
        return 0;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 23a3c76711e..c1462d43e72 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -26,7 +26,6 @@
 #include <linux/pagevec.h>
 #include <linux/parser.h>
 #include <linux/mman.h>
-#include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/dnotify.h>
 #include <linux/statfs.h>
@@ -313,16 +312,6 @@ out:
        return retval;
 }
-/*
- * Read a page. Again trivial. If it didn't already exist
- * in the page cache, it is zero-filled.
- */
-static int hugetlbfs_readpage(struct file *file, struct page * page)
-{
-        unlock_page(page);
-        return -EINVAL;
-}
 static int hugetlbfs_write_begin(struct file *file,
                        struct address_space *mapping,
                        loff_t pos, unsigned len, unsigned flags,
@@ -702,7 +691,6 @@ static void hugetlbfs_destroy_inode(struct inode *inode)
 }
 static const struct address_space_operations hugetlbfs_aops = {
-        .readpage       = hugetlbfs_readpage,
        .write_begin    = hugetlbfs_write_begin,
        .write_end      = hugetlbfs_write_end,
        .set_page_dirty = hugetlbfs_set_page_dirty,
@@ -842,7 +830,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 bad_val:
        printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n",
               args[0].from, p);
-        return 1;
+        return -EINVAL;
 }
 static int
diff --git a/fs/inode.c b/fs/inode.c
index 6ad14a1cd8c..bca0c618fdb 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -99,7 +99,7 @@ static DEFINE_MUTEX(iprune_mutex);
 */
 struct inodes_stat_t inodes_stat;
-static struct kmem_cache * inode_cachep __read_mostly;
+static struct kmem_cache *inode_cachep __read_mostly;
 static void wake_up_inode(struct inode *inode)
 {
@@ -124,7 +124,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
        static struct inode_operations empty_iops;
        static const struct file_operations empty_fops;
-        struct address_space * const mapping = &inode->i_data;
+        struct address_space *const mapping = &inode->i_data;
        inode->i_sb = sb;
        inode->i_blkbits = sb->s_blocksize_bits;
@@ -216,9 +216,10 @@ static struct inode *alloc_inode(struct super_block *sb)
        return NULL;
 }
-void destroy_inode(struct inode *inode) 
+void destroy_inode(struct inode *inode)
 {
        BUG_ON(inode_has_buffers(inode));
+        ima_inode_free(inode);
        security_inode_free(inode);
        if (inode->i_sb->s_op->destroy_inode)
                inode->i_sb->s_op->destroy_inode(inode);
@@ -252,12 +253,11 @@ void inode_init_once(struct inode *inode)
        mutex_init(&inode->inotify_mutex);
 #endif
 }
 EXPORT_SYMBOL(inode_init_once);
 static void init_once(void *foo)
 {
-        struct inode * inode = (struct inode *) foo;
+        struct inode *inode = (struct inode *) foo;
        inode_init_once(inode);
 }
@@ -265,7 +265,7 @@ static void init_once(void *foo)
 /*
 * inode_lock must be held
 */
-void __iget(struct inode * inode)
+void __iget(struct inode *inode)
 {
        if (atomic_read(&inode->i_count)) {
                atomic_inc(&inode->i_count);
@@ -289,7 +289,7 @@ void clear_inode(struct inode *inode)
 {
        might_sleep();
        invalidate_inode_buffers(inode);
-       
        BUG_ON(inode->i_data.nrpages);
        BUG_ON(!(inode->i_state & I_FREEING));
        BUG_ON(inode->i_state & I_CLEAR);
@@ -303,7 +303,6 @@ void clear_inode(struct inode *inode)
                cd_forget(inode);
        inode->i_state = I_CLEAR;
 }
 EXPORT_SYMBOL(clear_inode);
 /*
@@ -351,8 +350,8 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
        next = head->next;
        for (;;) {
-                struct list_head * tmp = next;
+                struct list_head *tmp = next;
-                struct inode * inode;
+                struct inode *inode;
                /*
                 * We can reschedule here without worrying about the list's
@@ -391,7 +390,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
 *      fails because there are busy inodes then a non zero value is returned.
 *      If the discard is successful all the inodes have been discarded.
 */
-int invalidate_inodes(struct super_block * sb)
+int invalidate_inodes(struct super_block *sb)
 {
        int busy;
        LIST_HEAD(throw_away);
@@ -407,7 +406,6 @@ int invalidate_inodes(struct super_block * sb)
        return busy;
 }
 EXPORT_SYMBOL(invalidate_inodes);
 static int can_unuse(struct inode *inode)
@@ -504,7 +502,7 @@ static int shrink_icache_memory(int nr, gfp_t gfp_mask)
                 * Nasty deadlock avoidance.  We may hold various FS locks,
                 * and we don't want to recurse into the FS that called us
                 * in clear_inode() and friends..
-                 */
+                 */
                if (!(gfp_mask & __GFP_FS))
                        return -1;
                prune_icache(nr);
@@ -524,10 +522,13 @@ static void __wait_on_freeing_inode(struct inode *inode);
 * by hand after calling find_inode now! This simplifies iunique and won't
 * add any additional branch in the common code.
 */
-static struct inode * find_inode(struct super_block * sb, struct hlist_head *head, int (*test)(struct inode *, void *), void *data)
+static struct inode *find_inode(struct super_block *sb,
+                                struct hlist_head *head,
+                                int (*test)(struct inode *, void *),
+                                void *data)
 {
        struct hlist_node *node;
-        struct inode * inode = NULL;
+        struct inode *inode = NULL;
 repeat:
        hlist_for_each_entry(inode, node, head, i_hash) {
@@ -548,10 +549,11 @@ repeat:
 * find_inode_fast is the fast path version of find_inode, see the comment at
 * iget_locked for details.
 */
-static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head *head, unsigned long ino)
+static struct inode *find_inode_fast(struct super_block *sb,
+                                struct hlist_head *head, unsigned long ino)
 {
        struct hlist_node *node;
-        struct inode * inode = NULL;
+        struct inode *inode = NULL;
 repeat:
        hlist_for_each_entry(inode, node, head, i_hash) {
@@ -631,10 +633,10 @@ struct inode *new_inode(struct super_block *sb)
         * here to attempt to avoid that.
         */
        static unsigned int last_ino;
-        struct inode * inode;
+        struct inode *inode;
        spin_lock_prefetch(&inode_lock);
-        
        inode = alloc_inode(sb);
        if (inode) {
                spin_lock(&inode_lock);
@@ -645,7 +647,6 @@ struct inode *new_inode(struct super_block *sb)
        }
        return inode;
 }
 EXPORT_SYMBOL(new_inode);
 void unlock_new_inode(struct inode *inode)
@@ -674,7 +675,6 @@ void unlock_new_inode(struct inode *inode)
        inode->i_state &= ~(I_LOCK|I_NEW);
        wake_up_inode(inode);
 }
 EXPORT_SYMBOL(unlock_new_inode);
 /*
@@ -683,13 +683,17 @@ EXPORT_SYMBOL(unlock_new_inode);
 * We no longer cache the sb_flags in i_flags - see fs.h
 *      -- rmk@arm.uk.linux.org
 */
-static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data)
+static struct inode *get_new_inode(struct super_block *sb,
+                                struct hlist_head *head,
+                                int (*test)(struct inode *, void *),
+                                int (*set)(struct inode *, void *),
+                                void *data)
 {
-        struct inode * inode;
+        struct inode *inode;
        inode = alloc_inode(sb);
        if (inode) {
-                struct inode * old;
+                struct inode *old;
                spin_lock(&inode_lock);
                /* We released the lock, so.. */
@@ -731,13 +735,14 @@ set_failed:
 * get_new_inode_fast is the fast path version of get_new_inode, see the
 * comment at iget_locked for details.
 */
-static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino)
+static struct inode *get_new_inode_fast(struct super_block *sb,
+                                struct hlist_head *head, unsigned long ino)
 {
-        struct inode * inode;
+        struct inode *inode;
        inode = alloc_inode(sb);
        if (inode) {
-                struct inode * old;
+                struct inode *old;
                spin_lock(&inode_lock);
                /* We released the lock, so.. */
@@ -823,7 +828,6 @@ struct inode *igrab(struct inode *inode)
        spin_unlock(&inode_lock);
        return inode;
 }
 EXPORT_SYMBOL(igrab);
 /**
@@ -924,7 +928,6 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
        return ifind(sb, head, test, data, 0);
 }
 EXPORT_SYMBOL(ilookup5_nowait);
 /**
@@ -953,7 +956,6 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
        return ifind(sb, head, test, data, 1);
 }
 EXPORT_SYMBOL(ilookup5);
 /**
@@ -976,7 +978,6 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
        return ifind_fast(sb, head, ino);
 }
 EXPORT_SYMBOL(ilookup);
 /**
@@ -1015,7 +1016,6 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
         */
        return get_new_inode(sb, head, test, set, data);
 }
 EXPORT_SYMBOL(iget5_locked);
 /**
@@ -1047,7 +1047,6 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
         */
        return get_new_inode_fast(sb, head, ino);
 }
 EXPORT_SYMBOL(iget_locked);
 int insert_inode_locked(struct inode *inode)
@@ -1055,13 +1054,22 @@ int insert_inode_locked(struct inode *inode)
        struct super_block *sb = inode->i_sb;
        ino_t ino = inode->i_ino;
        struct hlist_head *head = inode_hashtable + hash(sb, ino);
-        struct inode *old;
        inode->i_state |= I_LOCK|I_NEW;
        while (1) {
+                struct hlist_node *node;
+                struct inode *old = NULL;
                spin_lock(&inode_lock);
-                old = find_inode_fast(sb, head, ino);
+                hlist_for_each_entry(old, node, head, i_hash) {
-                if (likely(!old)) {
+                        if (old->i_ino != ino)
+                                continue;
+                        if (old->i_sb != sb)
+                                continue;
+                        if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+                                continue;
+                        break;
+                }
+                if (likely(!node)) {
                        hlist_add_head(&inode->i_hash, head);
                        spin_unlock(&inode_lock);
                        return 0;
@@ -1076,7 +1084,6 @@ int insert_inode_locked(struct inode *inode)
                iput(old);
        }
 }
 EXPORT_SYMBOL(insert_inode_locked);
 int insert_inode_locked4(struct inode *inode, unsigned long hashval,
@@ -1084,14 +1091,24 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 {
        struct super_block *sb = inode->i_sb;
        struct hlist_head *head = inode_hashtable + hash(sb, hashval);
-        struct inode *old;
        inode->i_state |= I_LOCK|I_NEW;
        while (1) {
+                struct hlist_node *node;
+                struct inode *old = NULL;
                spin_lock(&inode_lock);
-                old = find_inode(sb, head, test, data);
+                hlist_for_each_entry(old, node, head, i_hash) {
-                if (likely(!old)) {
+                        if (old->i_sb != sb)
+                                continue;
+                        if (!test(old, data))
+                                continue;
+                        if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+                                continue;
+                        break;
+                }
+                if (likely(!node)) {
                        hlist_add_head(&inode->i_hash, head);
                        spin_unlock(&inode_lock);
                        return 0;
@@ -1106,7 +1123,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
                iput(old);
        }
 }
 EXPORT_SYMBOL(insert_inode_locked4);
 /**
@@ -1124,7 +1140,6 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
        hlist_add_head(&inode->i_hash, head);
        spin_unlock(&inode_lock);
 }
 EXPORT_SYMBOL(__insert_inode_hash);
 /**
@@ -1139,7 +1154,6 @@ void remove_inode_hash(struct inode *inode)
        hlist_del_init(&inode->i_hash);
        spin_unlock(&inode_lock);
 }
 EXPORT_SYMBOL(remove_inode_hash);
 /*
@@ -1187,7 +1201,6 @@ void generic_delete_inode(struct inode *inode)
        BUG_ON(inode->i_state != I_CLEAR);
        destroy_inode(inode);
 }
 EXPORT_SYMBOL(generic_delete_inode);
 static void generic_forget_inode(struct inode *inode)
@@ -1237,12 +1250,11 @@ void generic_drop_inode(struct inode *inode)
        else
                generic_forget_inode(inode);
 }
 EXPORT_SYMBOL_GPL(generic_drop_inode);
 /*
 * Called when we're dropping the last reference
- * to an inode. 
+ * to an inode.
 *
 * Call the FS "drop()" function, defaulting to
 * the legacy UNIX filesystem behaviour..
@@ -1262,7 +1274,7 @@ static inline void iput_final(struct inode *inode)
 }
 /**
- *      iput    - put an inode 
+ *      iput    - put an inode
 *      @inode: inode to put
 *
 *      Puts an inode, dropping its usage count. If the inode use count hits
@@ -1279,7 +1291,6 @@ void iput(struct inode *inode)
                        iput_final(inode);
        }
 }
 EXPORT_SYMBOL(iput);
 /**
@@ -1290,10 +1301,10 @@ EXPORT_SYMBOL(iput);
 *      Returns the block number on the device holding the inode that
 *      is the disk block number for the block of the file requested.
 *      That is, asked for block 4 of inode 1 the function will return the
- *      disk block relative to the disk start that holds that block of the 
+ *      disk block relative to the disk start that holds that block of the
 *      file.
 */
-sector_t bmap(struct inode * inode, sector_t block)
+sector_t bmap(struct inode *inode, sector_t block)
 {
        sector_t res = 0;
        if (inode->i_mapping->a_ops->bmap)
@@ -1425,7 +1436,6 @@ void file_update_time(struct file *file)
                mark_inode_dirty_sync(inode);
        mnt_drop_write(file->f_path.mnt);
 }
 EXPORT_SYMBOL(file_update_time);
 int inode_needs_sync(struct inode *inode)
@@ -1436,7 +1446,6 @@ int inode_needs_sync(struct inode *inode)
                return 1;
        return 0;
 }
 EXPORT_SYMBOL(inode_needs_sync);
 int inode_wait(void *word)
diff --git a/fs/ioctl.c b/fs/ioctl.c
index ac2d47e4392..82d9c42b8ba 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -258,7 +258,7 @@ int __generic_block_fiemap(struct inode *inode,
        long long length = 0, map_len = 0;
        u64 logical = 0, phys = 0, size = 0;
        u32 flags = FIEMAP_EXTENT_MERGED;
-        int ret = 0;
+        int ret = 0, past_eof = 0, whole_file = 0;
        if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)))
                return ret;
@@ -266,6 +266,9 @@ int __generic_block_fiemap(struct inode *inode,
        start_blk = logical_to_blk(inode, start);
        length = (long long)min_t(u64, len, i_size_read(inode));
+        if (length < len)
+                whole_file = 1;
        map_len = length;
        do {
@@ -282,11 +285,26 @@ int __generic_block_fiemap(struct inode *inode,
                /* HOLE */
                if (!buffer_mapped(&tmp)) {
+                        length -= blk_to_logical(inode, 1);
+                        start_blk++;
+                        /*
+                         * we want to handle the case where there is an
+                         * allocated block at the front of the file, and then
+                         * nothing but holes up to the end of the file properly,
+                         * to make sure that extent at the front gets properly
+                         * marked with FIEMAP_EXTENT_LAST
+                         */
+                        if (!past_eof &&
+                            blk_to_logical(inode, start_blk) >=
+                            blk_to_logical(inode, 0)+i_size_read(inode))
+                                past_eof = 1;
                        /*
                         * first hole after going past the EOF, this is our
                         * last extent
                         */
-                        if (length <= 0) {
+                        if (past_eof && size) {
                                flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST;
                                ret = fiemap_fill_next_extent(fieinfo, logical,
                                                              phys, size,
@@ -294,15 +312,37 @@ int __generic_block_fiemap(struct inode *inode,
                                break;
                        }
-                        length -= blk_to_logical(inode, 1);
                        /* if we have holes up to/past EOF then we're done */
-                        if (length <= 0)
+                        if (length <= 0 || past_eof)
                                break;
-                        start_blk++;
                } else {
-                        if (length <= 0 && size) {
+                        /*
+                         * we have gone over the length of what we wanted to
+                         * map, and it wasn't the entire file, so add the extent
+                         * we got last time and exit.
+                         *
+                         * This is for the case where say we want to map all the
+                         * way up to the second to the last block in a file, but
+                         * the last block is a hole, making the second to last
+                         * block FIEMAP_EXTENT_LAST.  In this case we want to
+                         * see if there is a hole after the second to last block
+                         * so we can mark it properly.  If we found data after
+                         * we exceeded the length we were requesting, then we
+                         * are good to go, just add the extent to the fieinfo
+                         * and break
+                         */
+                        if (length <= 0 && !whole_file) {
+                                ret = fiemap_fill_next_extent(fieinfo, logical,
+                                                              phys, size,
+                                                              flags);
+                                break;
+                        }
+                        /*
+                         * if size != 0 then we know we already have an extent
+                         * to add, so add it.
+                         */
+                        if (size) {
                                ret = fiemap_fill_next_extent(fieinfo, logical,
                                                              phys, size,
                                                              flags);
@@ -319,19 +359,14 @@ int __generic_block_fiemap(struct inode *inode,
                        start_blk += logical_to_blk(inode, size);
                        /*
-                         * if we are past the EOF we need to loop again to see
+                         * If we are past the EOF, then we need to make sure as
-                         * if there is a hole so we can mark this extent as the
+                         * soon as we find a hole that the last extent we found
-                         * last one, and if not keep mapping things until we
+                         * is marked with FIEMAP_EXTENT_LAST
-                         * find a hole, or we run out of slots in the extent
-                         * array
                         */
-                        if (length <= 0)
+                        if (!past_eof &&
-                                continue;
+                            logical+size >=
+                            blk_to_logical(inode, 0)+i_size_read(inode))
-                        ret = fiemap_fill_next_extent(fieinfo, logical, phys,
+                                past_eof = 1;
-                                                      size, flags);
-                        if (ret)
-                                break;
                }
                cond_resched();
        } while (1);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a8e8513a78a..618e21c0b7a 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -241,7 +241,7 @@ write_out_data:
                        spin_lock(&journal->j_list_lock);
                }
                /* Someone already cleaned up the buffer? */
-                if (!buffer_jbd(bh)
+                if (!buffer_jbd(bh) || bh2jh(bh) != jh
                        || jh->b_transaction != commit_transaction
                        || jh->b_jlist != BJ_SyncData) {
                        jbd_unlock_bh_state(bh);
@@ -478,7 +478,9 @@ void journal_commit_transaction(journal_t *journal)
                        spin_lock(&journal->j_list_lock);
                        continue;
                }
-                if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
+                if (buffer_jbd(bh) && bh2jh(bh) == jh &&
+                    jh->b_transaction == commit_transaction &&
+                    jh->b_jlist == BJ_Locked) {
                        __journal_unfile_buffer(jh);
                        jbd_unlock_bh_state(bh);
                        journal_remove_journal_head(bh);
@@ -502,7 +504,7 @@ void journal_commit_transaction(journal_t *journal)
                err = 0;
        }
-        journal_write_revoke_records(journal, commit_transaction);
+        journal_write_revoke_records(journal, commit_transaction, write_op);
        /*
         * If we found any dirty or locked buffers, then we should have
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 3e9afc2a91d..da6cd9bdaab 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -86,6 +86,7 @@
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/init.h>
+#include <linux/bio.h>
 #endif
 #include <linux/log2.h>
@@ -118,8 +119,8 @@ struct jbd_revoke_table_s
 #ifdef __KERNEL__
 static void write_one_revoke_record(journal_t *, transaction_t *,
                                    struct journal_head **, int *,
-                                    struct jbd_revoke_record_s *);
+                                    struct jbd_revoke_record_s *, int);
-static void flush_descriptor(journal_t *, struct journal_head *, int);
+static void flush_descriptor(journal_t *, struct journal_head *, int, int);
 #endif
 /* Utility functions to maintain the revoke table */
@@ -500,7 +501,7 @@ void journal_switch_revoke_table(journal_t *journal)
 * revoke hash, deleting the entries as we go.
 */
 void journal_write_revoke_records(journal_t *journal,
-                                  transaction_t *transaction)
+                                  transaction_t *transaction, int write_op)
 {
        struct journal_head *descriptor;
        struct jbd_revoke_record_s *record;
@@ -524,14 +525,14 @@ void journal_write_revoke_records(journal_t *journal,
                                hash_list->next;
                        write_one_revoke_record(journal, transaction,
                                                &descriptor, &offset,
-                                                record);
+                                                record, write_op);
                        count++;
                        list_del(&record->hash);
                        kmem_cache_free(revoke_record_cache, record);
                }
        }
        if (descriptor)
-                flush_descriptor(journal, descriptor, offset);
+                flush_descriptor(journal, descriptor, offset, write_op);
        jbd_debug(1, "Wrote %d revoke records\n", count);
 }
@@ -544,7 +545,8 @@ static void write_one_revoke_record(journal_t *journal,
                                    transaction_t *transaction,
                                    struct journal_head **descriptorp,
                                    int *offsetp,
-                                    struct jbd_revoke_record_s *record)
+                                    struct jbd_revoke_record_s *record,
+                                    int write_op)
 {
        struct journal_head *descriptor;
        int offset;
@@ -563,7 +565,7 @@ static void write_one_revoke_record(journal_t *journal,
        /* Make sure we have a descriptor with space left for the record */
        if (descriptor) {
                if (offset == journal->j_blocksize) {
-                        flush_descriptor(journal, descriptor, offset);
+                        flush_descriptor(journal, descriptor, offset, write_op);
                        descriptor = NULL;
                }
        }
@@ -600,7 +602,7 @@ static void write_one_revoke_record(journal_t *journal,
 static void flush_descriptor(journal_t *journal,
                             struct journal_head *descriptor,
-                             int offset)
+                             int offset, int write_op)
 {
        journal_revoke_header_t *header;
        struct buffer_head *bh = jh2bh(descriptor);
@@ -615,7 +617,7 @@ static void flush_descriptor(journal_t *journal,
        set_buffer_jwrite(bh);
        BUFFER_TRACE(bh, "write");
        set_buffer_dirty(bh);
-        ll_rw_block(SWRITE, 1, &bh);
+        ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
 }
 #endif
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 073c8c3df7c..0b7d3b8226f 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -506,7 +506,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        if (err)
                jbd2_journal_abort(journal, err);
-        jbd2_journal_write_revoke_records(journal, commit_transaction);
+        jbd2_journal_write_revoke_records(journal, commit_transaction,
+                                          write_op);
        jbd_debug(3, "JBD: commit phase 2\n");
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index bbe6d592d8b..a360b06af2e 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -86,6 +86,7 @@
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/init.h>
+#include <linux/bio.h>
 #endif
 #include <linux/log2.h>
@@ -118,8 +119,8 @@ struct jbd2_revoke_table_s
 #ifdef __KERNEL__
 static void write_one_revoke_record(journal_t *, transaction_t *,
                                    struct journal_head **, int *,
-                                    struct jbd2_revoke_record_s *);
+                                    struct jbd2_revoke_record_s *, int);
-static void flush_descriptor(journal_t *, struct journal_head *, int);
+static void flush_descriptor(journal_t *, struct journal_head *, int, int);
 #endif
 /* Utility functions to maintain the revoke table */
@@ -499,7 +500,8 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
 * revoke hash, deleting the entries as we go.
 */
 void jbd2_journal_write_revoke_records(journal_t *journal,
-                                  transaction_t *transaction)
+                                       transaction_t *transaction,
+                                       int write_op)
 {
        struct journal_head *descriptor;
        struct jbd2_revoke_record_s *record;
@@ -523,14 +525,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
                                hash_list->next;
                        write_one_revoke_record(journal, transaction,
                                                &descriptor, &offset,
-                                                record);
+                                                record, write_op);
                        count++;
                        list_del(&record->hash);
                        kmem_cache_free(jbd2_revoke_record_cache, record);
                }
        }
        if (descriptor)
-                flush_descriptor(journal, descriptor, offset);
+                flush_descriptor(journal, descriptor, offset, write_op);
        jbd_debug(1, "Wrote %d revoke records\n", count);
 }
@@ -543,7 +545,8 @@ static void write_one_revoke_record(journal_t *journal,
                                    transaction_t *transaction,
                                    struct journal_head **descriptorp,
                                    int *offsetp,
-                                    struct jbd2_revoke_record_s *record)
+                                    struct jbd2_revoke_record_s *record,
+                                    int write_op)
 {
        struct journal_head *descriptor;
        int offset;
@@ -562,7 +565,7 @@ static void write_one_revoke_record(journal_t *journal,
        /* Make sure we have a descriptor with space left for the record */
        if (descriptor) {
                if (offset == journal->j_blocksize) {
-                        flush_descriptor(journal, descriptor, offset);
+                        flush_descriptor(journal, descriptor, offset, write_op);
                        descriptor = NULL;
                }
        }
@@ -607,7 +610,7 @@ static void write_one_revoke_record(journal_t *journal,
 static void flush_descriptor(journal_t *journal,
                             struct journal_head *descriptor,
-                             int offset)
+                             int offset, int write_op)
 {
        jbd2_journal_revoke_header_t *header;
        struct buffer_head *bh = jh2bh(descriptor);
@@ -622,7 +625,7 @@ static void flush_descriptor(journal_t *journal,
        set_buffer_jwrite(bh);
        BUFFER_TRACE(bh, "write");
        set_buffer_dirty(bh);
-        ll_rw_block(SWRITE, 1, &bh);
+        ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
 }
 #endif
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index c32b4a1ad6c..a0244740b75 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -480,13 +480,6 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
        return;
 filebad:
-        mutex_lock(&c->erase_free_sem);
-        spin_lock(&c->erase_completion_lock);
-        /* Stick it on a list (any list) so erase_failed can take it
-           right off again.  Silly, but shouldn't happen often. */
-        list_move(&jeb->list, &c->erasing_list);
-        spin_unlock(&c->erase_completion_lock);
-        mutex_unlock(&c->erase_free_sem);
        jffs2_erase_failed(c, jeb, bad_offset);
        return;
diff --git a/fs/libfs.c b/fs/libfs.c
index cd223190c4e..80046ddf506 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -246,8 +246,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
        return 0;
 Enomem:
-        up_write(&s->s_umount);
+        deactivate_locked_super(s);
-        deactivate_super(s);
        return -ENOMEM;
 }
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index abf83881f68..1a54ae14a19 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -104,6 +104,16 @@ static void set_grace_period(void)
        schedule_delayed_work(&grace_period_end, grace_period);
 }
+static void restart_grace(void)
+{
+        if (nlmsvc_ops) {
+                cancel_delayed_work_sync(&grace_period_end);
+                locks_end_grace(&lockd_manager);
+                nlmsvc_invalidate_all();
+                set_grace_period();
+        }
+}
 /*
 * This is the lockd kernel thread
 */
@@ -149,10 +159,7 @@ lockd(void *vrqstp)
                if (signalled()) {
                        flush_signals(current);
-                        if (nlmsvc_ops) {
+                        restart_grace();
-                                nlmsvc_invalidate_all();
-                                set_grace_period();
-                        }
                        continue;
                }
diff --git a/fs/namei.c b/fs/namei.c
index b8433ebfae0..967c3db9272 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1130,8 +1130,8 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 * @nd: pointer to nameidata
 * @open_flags: open intent flags
 */
-int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
+static int path_lookup_open(int dfd, const char *name,
-                struct nameidata *nd, int open_flags)
+                unsigned int lookup_flags, struct nameidata *nd, int open_flags)
 {
        struct file *filp = get_empty_filp();
        int err;
@@ -1248,6 +1248,8 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
        int err;
        struct qstr this;
+        WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
        err = __lookup_one_len(name, &this, base, len);
        if (err)
                return ERR_PTR(err);
@@ -1635,18 +1637,19 @@ static int open_will_write_to_fs(int flag, struct inode *inode)
 * open_to_namei_flags() for more details.
 */
 struct file *do_filp_open(int dfd, const char *pathname,
-                int open_flag, int mode)
+                int open_flag, int mode, int acc_mode)
 {
        struct file *filp;
        struct nameidata nd;
-        int acc_mode, error;
+        int error;
        struct path path;
        struct dentry *dir;
        int count = 0;
        int will_write;
        int flag = open_to_namei_flags(open_flag);
-        acc_mode = MAY_OPEN | ACC_MODE(flag);
+        if (!acc_mode)
+                acc_mode = MAY_OPEN | ACC_MODE(flag);
        /* O_TRUNC implies we need access checks for write permissions */
        if (flag & O_TRUNC)
@@ -1867,7 +1870,7 @@ do_link:
 */
 struct file *filp_open(const char *filename, int flags, int mode)
 {
-        return do_filp_open(AT_FDCWD, filename, flags, mode);
+        return do_filp_open(AT_FDCWD, filename, flags, mode, 0);
 }
 EXPORT_SYMBOL(filp_open);
diff --git a/fs/namespace.c b/fs/namespace.c
index c6f54e4c429..134d494158d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -695,12 +695,16 @@ static inline void mangle(struct seq_file *m, const char *s)
 */
 int generic_show_options(struct seq_file *m, struct vfsmount *mnt)
 {
-        const char *options = mnt->mnt_sb->s_options;
+        const char *options;
+        rcu_read_lock();
+        options = rcu_dereference(mnt->mnt_sb->s_options);
        if (options != NULL && options[0]) {
                seq_putc(m, ',');
                mangle(m, options);
        }
+        rcu_read_unlock();
        return 0;
 }
@@ -721,11 +725,22 @@ EXPORT_SYMBOL(generic_show_options);
 */
 void save_mount_options(struct super_block *sb, char *options)
 {
-        kfree(sb->s_options);
+        BUG_ON(sb->s_options);
-        sb->s_options = kstrdup(options, GFP_KERNEL);
+        rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL));
 }
 EXPORT_SYMBOL(save_mount_options);
+void replace_mount_options(struct super_block *sb, char *options)
+{
+        char *old = sb->s_options;
+        rcu_assign_pointer(sb->s_options, options);
+        if (old) {
+                synchronize_rcu();
+                kfree(old);
+        }
+}
+EXPORT_SYMBOL(replace_mount_options);
 #ifdef CONFIG_PROC_FS
 /* iterator */
 static void *m_start(struct seq_file *m, loff_t *pos)
@@ -1073,9 +1088,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
         */
        if (flags & MNT_FORCE && sb->s_op->umount_begin) {
-                lock_kernel();
                sb->s_op->umount_begin(sb);
-                unlock_kernel();
        }
        /*
@@ -1377,7 +1390,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
        if (parent_path) {
                detach_mnt(source_mnt, parent_path);
                attach_mnt(source_mnt, path);
-                touch_mnt_namespace(current->nsproxy->mnt_ns);
+                touch_mnt_namespace(parent_path->mnt->mnt_ns);
        } else {
                mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
                commit_tree(source_mnt);
@@ -1920,8 +1933,9 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
        if (data_page)
                ((char *)data_page)[PAGE_SIZE - 1] = 0;
-        /* Default to relatime */
+        /* Default to relatime unless overriden */
-        mnt_flags |= MNT_RELATIME;
+        if (!(flags & MS_NOATIME))
+                mnt_flags |= MNT_RELATIME;
        /* Separate the per-mountpoint flags */
        if (flags & MS_NOSUID)
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index f54360f50a9..fa038df63ac 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -660,13 +660,10 @@ outrel:
                        if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN)
                                return -ENOMEM;
                        if (user.object_name_len) {
-                                newname = kmalloc(user.object_name_len, GFP_USER);
+                                newname = memdup_user(user.object_name,
-                                if (!newname)
+                                                      user.object_name_len);
-                                        return -ENOMEM;
+                                if (IS_ERR(newname))
-                                if (copy_from_user(newname, user.object_name, user.object_name_len)) {
+                                        return PTR_ERR(newname);
-                                        kfree(newname);
-                                        return -EFAULT;
-                                }
                        } else {
                                newname = NULL;
                        }
@@ -760,13 +757,9 @@ outrel:
                        if (user.len > NCP_PRIVATE_DATA_MAX_LEN)
                                return -ENOMEM;
                        if (user.len) {
-                                new = kmalloc(user.len, GFP_USER);
+                                new = memdup_user(user.data, user.len);
-                                if (!new)
+                                if (IS_ERR(new))
-                                        return -ENOMEM;
+                                        return PTR_ERR(new);
-                                if (copy_from_user(new, user.data, user.len)) {
-                                        kfree(new);
-                                        return -EFAULT;
-                                }
                        } else {
                                new = NULL;
                        }
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 370b190a09d..89f98e9a024 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1943,7 +1943,8 @@ int nfs_permission(struct inode *inode, int mask)
                case S_IFREG:
                        /* NFSv4 has atomic_open... */
                        if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
-                                        && (mask & MAY_OPEN))
+                                        && (mask & MAY_OPEN)
+                                        && !(mask & MAY_EXEC))
                                goto out;
                        break;
                case S_IFDIR:
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5a97bcfe03e..ec7e27d00bc 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -517,10 +517,10 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        ret = nfs_updatepage(filp, page, 0, pagelen);
 out_unlock:
+        if (!ret)
+                return VM_FAULT_LOCKED;
        unlock_page(page);
-        if (ret)
+        return VM_FAULT_SIGBUS;
-                ret = VM_FAULT_SIGBUS;
-        return ret;
 }
 static struct vm_operations_struct nfs_file_vm_ops = {
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index e6a1932c711..35869a4921f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -713,7 +713,8 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
        if (args->npages != 0)
                xdr_encode_pages(buf, args->pages, 0, args->len);
        else
-                req->rq_slen += args->len;
+                req->rq_slen = xdr_adjust_iovec(req->rq_svec,
+                                p + XDR_QUADLEN(args->len));
        err = nfsacl_encode(buf, base, args->inode,
                            (args->mask & NFS_ACL) ?
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a4d24268029..4674f8092da 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2594,12 +2594,9 @@ static void nfs4_renew_done(struct rpc_task *task, void *data)
        unsigned long timestamp = (unsigned long)data;
        if (task->tk_status < 0) {
-                switch (task->tk_status) {
+                /* Unless we're shutting down, schedule state recovery! */
-                        case -NFS4ERR_STALE_CLIENTID:
+                if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
-                        case -NFS4ERR_EXPIRED:
+                        nfs4_schedule_state_recovery(clp);
-                        case -NFS4ERR_CB_PATH_DOWN:
-                                nfs4_schedule_state_recovery(clp);
-                }
                return;
        }
        spin_lock(&clp->cl_lock);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index d9ef602fbc5..e3ed5908820 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -129,7 +129,7 @@ enum {
        Opt_err
 };
-static match_table_t __initconst tokens = {
+static const match_table_t tokens __initconst = {
        {Opt_port, "port=%u"},
        {Opt_rsize, "rsize=%u"},
        {Opt_wsize, "wsize=%u"},
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 6717200923f..d2d67781c57 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -683,9 +683,12 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 */
 static void nfs_umount_begin(struct super_block *sb)
 {
-        struct nfs_server *server = NFS_SB(sb);
+        struct nfs_server *server;
        struct rpc_clnt *rpc;
+        lock_kernel();
+        server = NFS_SB(sb);
        /* -EIO all pending I/O */
        rpc = server->client_acl;
        if (!IS_ERR(rpc))
@@ -693,6 +696,8 @@ static void nfs_umount_begin(struct super_block *sb)
        rpc = server->client;
        if (!IS_ERR(rpc))
                rpc_killall_tasks(rpc);
+        unlock_kernel();
 }
 /*
@@ -2106,8 +2111,7 @@ out_err_nosb:
 error_splat_root:
        dput(mntroot);
 error_splat_super:
-        up_write(&s->s_umount);
+        deactivate_locked_super(s);
-        deactivate_super(s);
        goto out;
 }
@@ -2203,8 +2207,7 @@ out_err_noserver:
        return error;
 error_splat_super:
-        up_write(&s->s_umount);
+        deactivate_locked_super(s);
-        deactivate_super(s);
        dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
        return error;
 }
@@ -2464,8 +2467,7 @@ out_free:
 error_splat_root:
        dput(mntroot);
 error_splat_super:
-        up_write(&s->s_umount);
+        deactivate_locked_super(s);
-        deactivate_super(s);
        goto out;
 }
@@ -2559,8 +2561,7 @@ out_err_noserver:
        return error;
 error_splat_super:
-        up_write(&s->s_umount);
+        deactivate_locked_super(s);
-        deactivate_super(s);
        dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error);
        return error;
 }
@@ -2644,8 +2645,7 @@ out_err_noserver:
        return error;
 error_splat_super:
-        up_write(&s->s_umount);
+        deactivate_locked_super(s);
-        deactivate_super(s);
        dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
        return error;
 }
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 3444c0052a8..b5348405046 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -229,21 +229,23 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
                goto out;
        status = vfs_readdir(filp, nfsd4_build_namelist, &names);
        fput(filp);
+        mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
        while (!list_empty(&names)) {
                entry = list_entry(names.next, struct name_list, list);
                dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
                if (IS_ERR(dentry)) {
                        status = PTR_ERR(dentry);
-                        goto out;
+                        break;
                }
                status = f(dir, dentry);
                dput(dentry);
                if (status)
-                        goto out;
+                        break;
                list_del(&entry->list);
                kfree(entry);
        }
+        mutex_unlock(&dir->d_inode->i_mutex);
 out:
        while (!list_empty(&names)) {
                entry = list_entry(names.next, struct name_list, list);
@@ -255,36 +257,6 @@ out:
 }
 static int
-nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
-{
-        int status;
-        if (!S_ISREG(dir->d_inode->i_mode)) {
-                printk("nfsd4: non-file found in client recovery directory\n");
-                return -EINVAL;
-        }
-        mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
-        status = vfs_unlink(dir->d_inode, dentry);
-        mutex_unlock(&dir->d_inode->i_mutex);
-        return status;
-}
-static int
-nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
-{
-        int status;
-        /* For now this directory should already be empty, but we empty it of
-         * any regular files anyway, just in case the directory was created by
-         * a kernel from the future.... */
-        nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
-        mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
-        status = vfs_rmdir(dir->d_inode, dentry);
-        mutex_unlock(&dir->d_inode->i_mutex);
-        return status;
-}
-static int
 nfsd4_unlink_clid_dir(char *name, int namlen)
 {
        struct dentry *dentry;
@@ -292,20 +264,20 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
        dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
-        mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
+        mutex_lock_nested(&rec_dir.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
        dentry = lookup_one_len(name, rec_dir.dentry, namlen);
-        mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
        if (IS_ERR(dentry)) {
                status = PTR_ERR(dentry);
-                return status;
+                goto out_unlock;
        }
        status = -ENOENT;
        if (!dentry->d_inode)
                goto out;
+        status = vfs_rmdir(rec_dir.dentry->d_inode, dentry);
-        status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
 out:
        dput(dentry);
+out_unlock:
+        mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
        return status;
 }
@@ -348,7 +320,7 @@ purge_old(struct dentry *parent, struct dentry *child)
        if (nfs4_has_reclaimed_state(child->d_name.name, false))
                return 0;
-        status = nfsd4_clear_clid_dir(parent, child);
+        status = vfs_rmdir(parent->d_inode, child);
        if (status)
                printk("failed to remove client recovery directory %s\n",
                                child->d_name.name);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c65a27b76a9..3b711f5147a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -580,7 +580,6 @@ free_session(struct kref *kref)
                struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry;
                nfsd4_release_respages(e->ce_respages, e->ce_resused);
        }
-        kfree(ses->se_slots);
        kfree(ses);
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b820c311931..b73549d293b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2214,6 +2214,15 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
        dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
        if (IS_ERR(dentry))
                return nfserrno(PTR_ERR(dentry));
+        if (!dentry->d_inode) {
+                /*
+                 * nfsd_buffered_readdir drops the i_mutex between
+                 * readdir and calling this callback, leaving a window
+                 * where this directory entry could have gone away.
+                 */
+                dput(dentry);
+                return nfserr_noent;
+        }
        exp_get(exp);
        /*
@@ -2276,6 +2285,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
        struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
        int buflen;
        __be32 *p = cd->buffer;
+        __be32 *cookiep;
        __be32 nfserr = nfserr_toosmall;
        /* In nfsv4, "." and ".." never make it onto the wire.. */
@@ -2292,7 +2302,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
                goto fail;
        *p++ = xdr_one;                             /* mark entry present */
-        cd->offset = p;                             /* remember pointer */
+        cookiep = p;
        p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
        p = xdr_encode_array(p, name, namlen);      /* name length & name */
@@ -2306,6 +2316,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
                goto fail;
        case nfserr_dropit:
                goto fail;
+        case nfserr_noent:
+                goto skip_entry;
        default:
                /*
                 * If the client requested the RDATTR_ERROR attribute,
@@ -2324,6 +2336,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
        }
        cd->buflen -= (p - cd->buffer);
        cd->buffer = p;
+        cd->offset = cookiep;
+skip_entry:
        cd->common.err = nfs_ok;
        return 0;
 fail:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ab93fcfef25..b660435978d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -116,10 +116,15 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
        }
        if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
                /* successfully crossed mount point */
-                exp_put(exp);
+                /*
-                *expp = exp2;
+                 * This is subtle: dentry is *not* under mnt at this point.
+                 * The only reason we are safe is that original mnt is pinned
+                 * down by exp, so we should dput before putting exp.
+                 */
                dput(dentry);
                *dpp = mounts;
+                exp_put(exp);
+                *expp = exp2;
        } else {
                exp_put(exp2);
                dput(mounts);
@@ -1010,6 +1015,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
        set_fs(oldfs);
        if (host_err >= 0) {
+                *cnt = host_err;
                nfsdstats.io_write += host_err;
                fsnotify_modify(file->f_path.dentry);
        }
@@ -1055,10 +1061,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        }
        dprintk("nfsd: write complete host_err=%d\n", host_err);
-        if (host_err >= 0) {
+        if (host_err >= 0)
                err = 0;
-                *cnt = host_err;
+        else
-        } else
                err = nfserrno(host_err);
 out:
        return err;
@@ -1885,8 +1890,8 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
        return 0;
 }
-static int nfsd_buffered_readdir(struct file *file, filldir_t func,
+static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
-                                 struct readdir_cd *cdp, loff_t *offsetp)
+                                    struct readdir_cd *cdp, loff_t *offsetp)
 {
        struct readdir_data buf;
        struct buffered_dirent *de;
@@ -1896,11 +1901,12 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
        buf.dirent = (void *)__get_free_page(GFP_KERNEL);
        if (!buf.dirent)
-                return -ENOMEM;
+                return nfserrno(-ENOMEM);
        offset = *offsetp;
        while (1) {
+                struct inode *dir_inode = file->f_path.dentry->d_inode;
                unsigned int reclen;
                cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -1919,26 +1925,38 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
                if (!size)
                        break;
+                /*
+                 * Various filldir functions may end up calling back into
+                 * lookup_one_len() and the file system's ->lookup() method.
+                 * These expect i_mutex to be held, as it would within readdir.
+                 */
+                host_err = mutex_lock_killable(&dir_inode->i_mutex);
+                if (host_err)
+                        break;
                de = (struct buffered_dirent *)buf.dirent;
                while (size > 0) {
                        offset = de->offset;
                        if (func(cdp, de->name, de->namlen, de->offset,
                                 de->ino, de->d_type))
-                                goto done;
+                                break;
                        if (cdp->err != nfs_ok)
-                                goto done;
+                                break;
                        reclen = ALIGN(sizeof(*de) + de->namlen,
                                       sizeof(u64));
                        size -= reclen;
                        de = (struct buffered_dirent *)((char *)de + reclen);
                }
+                mutex_unlock(&dir_inode->i_mutex);
+                if (size > 0) /* We bailed out early */
+                        break;
                offset = vfs_llseek(file, 0, SEEK_CUR);
        }
- done:
        free_page((unsigned long)(buf.dirent));
        if (host_err)
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index e90b60dfced..300f1cdfa86 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -311,7 +311,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
                ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
                if (ret < 0) {
                        if (ret != -ENOENT)
-                                goto out_sem;
+                                goto out_header;
                        /* skip hole */
                        ret = 0;
                        continue;
@@ -344,7 +344,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
                                        continue;
                                printk(KERN_ERR "%s: cannot delete block\n",
                                       __func__);
-                                goto out_sem;
+                                goto out_header;
                        }
                }
@@ -361,6 +361,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
                nilfs_mdt_mark_dirty(cpfile);
                kunmap_atomic(kaddr, KM_USER0);
        }
+ out_header:
        brelse(header_bh);
 out_sem:
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 108d281ebca..d6759b92006 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -25,6 +25,7 @@
 #include <linux/smp_lock.h>     /* lock_kernel(), unlock_kernel() */
 #include <linux/capability.h>   /* capable() */
 #include <linux/uaccess.h>      /* copy_from_user(), copy_to_user() */
+#include <linux/vmalloc.h>
 #include <linux/nilfs2_fs.h>
 #include "nilfs.h"
 #include "segment.h"
@@ -147,29 +148,12 @@ static ssize_t
 nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
                          void *buf, size_t size, size_t nmembs)
 {
-        return nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf,
-                                       nmembs);
-}
-static int nilfs_ioctl_get_cpinfo(struct inode *inode, struct file *filp,
-                                  unsigned int cmd, void __user *argp)
-{
-        struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
-        struct nilfs_argv argv;
        int ret;
-        if (copy_from_user(&argv, argp, sizeof(argv)))
-                return -EFAULT;
        down_read(&nilfs->ns_segctor_sem);
-        ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd),
+        ret = nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf,
-                                    nilfs_ioctl_do_get_cpinfo);
+                                      nmembs);
        up_read(&nilfs->ns_segctor_sem);
-        if (ret < 0)
-                return ret;
-        if (copy_to_user(argp, &argv, sizeof(argv)))
-                ret = -EFAULT;
        return ret;
 }
@@ -195,28 +179,11 @@ static ssize_t
 nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
                          void *buf, size_t size, size_t nmembs)
 {
-        return nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs);
-}
-static int nilfs_ioctl_get_suinfo(struct inode *inode, struct file *filp,
-                                  unsigned int cmd, void __user *argp)
-{
-        struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
-        struct nilfs_argv argv;
        int ret;
-        if (copy_from_user(&argv, argp, sizeof(argv)))
-                return -EFAULT;
        down_read(&nilfs->ns_segctor_sem);
-        ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd),
+        ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs);
-                                    nilfs_ioctl_do_get_suinfo);
        up_read(&nilfs->ns_segctor_sem);
-        if (ret < 0)
-                return ret;
-        if (copy_to_user(argp, &argv, sizeof(argv)))
-                ret = -EFAULT;
        return ret;
 }
@@ -242,28 +209,11 @@ static ssize_t
 nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
                         void *buf, size_t size, size_t nmembs)
 {
-        return nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs);
-}
-static int nilfs_ioctl_get_vinfo(struct inode *inode, struct file *filp,
-                                 unsigned int cmd, void __user *argp)
-{
-        struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
-        struct nilfs_argv argv;
        int ret;
-        if (copy_from_user(&argv, argp, sizeof(argv)))
-                return -EFAULT;
        down_read(&nilfs->ns_segctor_sem);
-        ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd),
+        ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs);
-                                    nilfs_ioctl_do_get_vinfo);
        up_read(&nilfs->ns_segctor_sem);
-        if (ret < 0)
-                return ret;
-        if (copy_to_user(argp, &argv, sizeof(argv)))
-                ret = -EFAULT;
        return ret;
 }
@@ -276,17 +226,21 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
        struct nilfs_bdesc *bdescs = buf;
        int ret, i;
+        down_read(&nilfs->ns_segctor_sem);
        for (i = 0; i < nmembs; i++) {
                ret = nilfs_bmap_lookup_at_level(bmap,
                                                 bdescs[i].bd_offset,
                                                 bdescs[i].bd_level + 1,
                                                 &bdescs[i].bd_blocknr);
                if (ret < 0) {
-                        if (ret != -ENOENT)
+                        if (ret != -ENOENT) {
+                                up_read(&nilfs->ns_segctor_sem);
                                return ret;
+                        }
                        bdescs[i].bd_blocknr = 0;
                }
        }
+        up_read(&nilfs->ns_segctor_sem);
        return nmembs;
 }
@@ -300,10 +254,11 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp,
        if (copy_from_user(&argv, argp, sizeof(argv)))
                return -EFAULT;
-        down_read(&nilfs->ns_segctor_sem);
+        if (argv.v_size != sizeof(struct nilfs_bdesc))
+                return -EINVAL;
        ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd),
                                    nilfs_ioctl_do_get_bdescs);
-        up_read(&nilfs->ns_segctor_sem);
        if (ret < 0)
                return ret;
@@ -346,10 +301,10 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode,
        return 0;
 }
-static ssize_t
+static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs,
-nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags,
+                                   struct nilfs_argv *argv, void *buf)
-                           void *buf, size_t size, size_t nmembs)
 {
+        size_t nmembs = argv->v_nmembs;
        struct inode *inode;
        struct nilfs_vdesc *vdesc;
        struct buffer_head *bh, *n;
@@ -410,19 +365,10 @@ nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags,
        return ret;
 }
-static inline int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs,
+static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs,
-                                          struct nilfs_argv *argv,
+                                          struct nilfs_argv *argv, void *buf)
-                                          int dir)
-{
-        return nilfs_ioctl_wrap_copy(nilfs, argv, dir,
-                                     nilfs_ioctl_do_move_blocks);
-}
-static ssize_t
-nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp,
-                                  int flags, void *buf, size_t size,
-                                  size_t nmembs)
 {
+        size_t nmembs = argv->v_nmembs;
        struct inode *cpfile = nilfs->ns_cpfile;
        struct nilfs_period *periods = buf;
        int ret, i;
@@ -436,36 +382,21 @@ nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp,
        return nmembs;
 }
-static inline int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs,
+static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
-                                                 struct nilfs_argv *argv,
+                                      struct nilfs_argv *argv, void *buf)
-                                                 int dir)
 {
-        return nilfs_ioctl_wrap_copy(nilfs, argv, dir,
+        size_t nmembs = argv->v_nmembs;
-                                     nilfs_ioctl_do_delete_checkpoints);
+        int ret;
-}
-static ssize_t
+        ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs);
-nilfs_ioctl_do_free_vblocknrs(struct the_nilfs *nilfs, __u64 *posp, int flags,
-                              void *buf, size_t size, size_t nmembs)
-{
-        int ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs);
        return (ret < 0) ? ret : nmembs;
 }
-static inline int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
+static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
-                                             struct nilfs_argv *argv,
+                                         struct nilfs_argv *argv, void *buf)
-                                             int dir)
-{
-        return nilfs_ioctl_wrap_copy(nilfs, argv, dir,
-                                     nilfs_ioctl_do_free_vblocknrs);
-}
-static ssize_t
-nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp,
-                                 int flags, void *buf, size_t size,
-                                 size_t nmembs)
 {
+        size_t nmembs = argv->v_nmembs;
        struct inode *dat = nilfs_dat_inode(nilfs);
        struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap;
        struct nilfs_bdesc *bdescs = buf;
@@ -504,55 +435,37 @@ nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp,
        return nmembs;
 }
-static inline int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
+static int nilfs_ioctl_free_segments(struct the_nilfs *nilfs,
-                                                struct nilfs_argv *argv,
+                                     struct nilfs_argv *argv, void *buf)
-                                                int dir)
 {
-        return nilfs_ioctl_wrap_copy(nilfs, argv, dir,
+        size_t nmembs = argv->v_nmembs;
-                                     nilfs_ioctl_do_mark_blocks_dirty);
+        struct nilfs_sb_info *sbi = nilfs->ns_writer;
-}
-static ssize_t
-nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags,
-                             void *buf, size_t size, size_t nmembs)
-{
-        struct nilfs_sb_info *sbi = nilfs_get_writer(nilfs);
        int ret;
-        if (unlikely(!sbi))
+        if (unlikely(!sbi)) {
+                /* never happens because called for a writable mount */
+                WARN_ON(1);
                return -EROFS;
+        }
        ret = nilfs_segctor_add_segments_to_be_freed(
                NILFS_SC(sbi), buf, nmembs);
-        nilfs_put_writer(nilfs);
        return (ret < 0) ? ret : nmembs;
 }
-static inline int nilfs_ioctl_free_segments(struct the_nilfs *nilfs,
-                                             struct nilfs_argv *argv,
-                                             int dir)
-{
-        return nilfs_ioctl_wrap_copy(nilfs, argv, dir,
-                                     nilfs_ioctl_do_free_segments);
-}
 int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
-                                       void __user *argp)
+                                       struct nilfs_argv *argv, void **kbufs)
 {
-        struct nilfs_argv argv[5];
        const char *msg;
-        int dir, ret;
+        int ret;
-        if (copy_from_user(argv, argp, sizeof(argv)))
-                return -EFAULT;
-        dir = _IOC_WRITE;
+        ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]);
-        ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], dir);
        if (ret < 0) {
                msg = "cannot read source blocks";
                goto failed;
        }
-        ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], dir);
+        ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]);
        if (ret < 0) {
                /*
                 * can safely abort because checkpoints can be removed
@@ -561,7 +474,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
                msg = "cannot delete checkpoints";
                goto failed;
        }
-        ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], dir);
+        ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], kbufs[2]);
        if (ret < 0) {
                /*
                 * can safely abort because DAT file is updated atomically
@@ -570,7 +483,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
                msg = "cannot delete virtual blocks from DAT file";
                goto failed;
        }
-        ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], dir);
+        ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], kbufs[3]);
        if (ret < 0) {
                /*
                 * can safely abort because the operation is nondestructive.
@@ -578,7 +491,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
                msg = "cannot mark copying blocks dirty";
                goto failed;
        }
-        ret = nilfs_ioctl_free_segments(nilfs, &argv[4], dir);
+        ret = nilfs_ioctl_free_segments(nilfs, &argv[4], kbufs[4]);
        if (ret < 0) {
                /*
                 * can safely abort because this operation is atomic.
@@ -598,9 +511,75 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
 static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
                                      unsigned int cmd, void __user *argp)
 {
+        struct nilfs_argv argv[5];
+        const static size_t argsz[5] = {
+                sizeof(struct nilfs_vdesc),
+                sizeof(struct nilfs_period),
+                sizeof(__u64),
+                sizeof(struct nilfs_bdesc),
+                sizeof(__u64),
+        };
+        void __user *base;
+        void *kbufs[5];
+        struct the_nilfs *nilfs;
+        size_t len, nsegs;
+        int n, ret;
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        return nilfs_clean_segments(inode->i_sb, argp);
+        if (copy_from_user(argv, argp, sizeof(argv)))
+                return -EFAULT;
+        nsegs = argv[4].v_nmembs;
+        if (argv[4].v_size != argsz[4])
+                return -EINVAL;
+        /*
+         * argv[4] points to segment numbers this ioctl cleans.  We
+         * use kmalloc() for its buffer because memory used for the
+         * segment numbers is enough small.
+         */
+        kbufs[4] = memdup_user((void __user *)(unsigned long)argv[4].v_base,
+                               nsegs * sizeof(__u64));
+        if (IS_ERR(kbufs[4]))
+                return PTR_ERR(kbufs[4]);
+        nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
+        for (n = 0; n < 4; n++) {
+                ret = -EINVAL;
+                if (argv[n].v_size != argsz[n])
+                        goto out_free;
+                if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment)
+                        goto out_free;
+                len = argv[n].v_size * argv[n].v_nmembs;
+                base = (void __user *)(unsigned long)argv[n].v_base;
+                if (len == 0) {
+                        kbufs[n] = NULL;
+                        continue;
+                }
+                kbufs[n] = vmalloc(len);
+                if (!kbufs[n]) {
+                        ret = -ENOMEM;
+                        goto out_free;
+                }
+                if (copy_from_user(kbufs[n], base, len)) {
+                        ret = -EFAULT;
+                        vfree(kbufs[n]);
+                        goto out_free;
+                }
+        }
+        ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
+ out_free:
+        while (--n >= 0)
+                vfree(kbufs[n]);
+        kfree(kbufs[4]);
+        return ret;
 }
 static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
@@ -621,6 +600,33 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
        return 0;
 }
+static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
+                                unsigned int cmd, void __user *argp,
+                                size_t membsz,
+                                ssize_t (*dofunc)(struct the_nilfs *,
+                                                  __u64 *, int,
+                                                  void *, size_t, size_t))
+{
+        struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
+        struct nilfs_argv argv;
+        int ret;
+        if (copy_from_user(&argv, argp, sizeof(argv)))
+                return -EFAULT;
+        if (argv.v_size != membsz)
+                return -EINVAL;
+        ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), dofunc);
+        if (ret < 0)
+                return ret;
+        if (copy_to_user(argp, &argv, sizeof(argv)))
+                ret = -EFAULT;
+        return ret;
+}
 long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        struct inode *inode = filp->f_dentry->d_inode;
@@ -632,16 +638,21 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case NILFS_IOCTL_DELETE_CHECKPOINT:
                return nilfs_ioctl_delete_checkpoint(inode, filp, cmd, argp);
        case NILFS_IOCTL_GET_CPINFO:
-                return nilfs_ioctl_get_cpinfo(inode, filp, cmd, argp);
+                return nilfs_ioctl_get_info(inode, filp, cmd, argp,
+                                            sizeof(struct nilfs_cpinfo),
+                                            nilfs_ioctl_do_get_cpinfo);
        case NILFS_IOCTL_GET_CPSTAT:
                return nilfs_ioctl_get_cpstat(inode, filp, cmd, argp);
        case NILFS_IOCTL_GET_SUINFO:
-                return nilfs_ioctl_get_suinfo(inode, filp, cmd, argp);
+                return nilfs_ioctl_get_info(inode, filp, cmd, argp,
+                                            sizeof(struct nilfs_suinfo),
+                                            nilfs_ioctl_do_get_suinfo);
        case NILFS_IOCTL_GET_SUSTAT:
                return nilfs_ioctl_get_sustat(inode, filp, cmd, argp);
        case NILFS_IOCTL_GET_VINFO:
-                /* XXX: rename to ??? */
+                return nilfs_ioctl_get_info(inode, filp, cmd, argp,
-                return nilfs_ioctl_get_vinfo(inode, filp, cmd, argp);
+                                            sizeof(struct nilfs_vinfo),
+                                            nilfs_ioctl_do_get_vinfo);
        case NILFS_IOCTL_GET_BDESCS:
                return nilfs_ioctl_get_bdescs(inode, filp, cmd, argp);
        case NILFS_IOCTL_CLEAN_SEGMENTS:
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 47dd815433f..bb78745a0e3 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -77,19 +77,22 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
                                                     void *))
 {
        struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs;
-        struct nilfs_sb_info *writer = NULL;
        struct super_block *sb = inode->i_sb;
        struct nilfs_transaction_info ti;
        struct buffer_head *bh;
        int err;
        if (!sb) {
-                writer = nilfs_get_writer(nilfs);
+                /*
-                if (!writer) {
+                 * Make sure this function is not called from any
+                 * read-only context.
+                 */
+                if (!nilfs->ns_writer) {
+                        WARN_ON(1);
                        err = -EROFS;
                        goto out;
                }
-                sb = writer->s_super;
+                sb = nilfs->ns_writer->s_super;
        }
        nilfs_transaction_begin(sb, &ti, 0);
@@ -127,8 +130,6 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
                err = nilfs_transaction_commit(sb);
        else
                nilfs_transaction_abort(sb);
-        if (writer)
-                nilfs_put_writer(nilfs);
 out:
        return err;
 }
@@ -299,7 +300,7 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
        int err;
        err = nilfs_bmap_delete(ii->i_bmap, block);
-        if (likely(!err)) {
+        if (!err || err == -ENOENT) {
                nilfs_mdt_mark_dirty(inode);
                nilfs_mdt_forget_block(inode, block);
        }
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 3d0c18a16db..da6fc0bba2e 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -236,7 +236,8 @@ extern int nilfs_sync_file(struct file *, struct dentry *, int);
 /* ioctl.c */
 long nilfs_ioctl(struct file *, unsigned int, unsigned long);
-int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, void __user *);
+int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *,
+                                       void **);
 /* inode.c */
 extern struct inode *nilfs_new_inode(struct inode *, int);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 1bfbba9c0e9..a2692bbc7b5 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -128,7 +128,8 @@ void nilfs_forget_buffer(struct buffer_head *bh)
        lock_buffer(bh);
        clear_buffer_nilfs_volatile(bh);
-        if (test_clear_buffer_dirty(bh) && nilfs_page_buffers_clean(page))
+        clear_buffer_dirty(bh);
+        if (nilfs_page_buffers_clean(page))
                __nilfs_clear_page_dirty(page);
        clear_buffer_uptodate(bh);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 4fc081e47d7..57afa9d2406 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -407,6 +407,7 @@ void nilfs_dispose_segment_list(struct list_head *head)
 }
 static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
+                                              struct nilfs_sb_info *sbi,
                                              struct nilfs_recovery_info *ri)
 {
        struct list_head *head = &ri->ri_used_segments;
@@ -421,6 +422,7 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
        segnum[2] = ri->ri_segnum;
        segnum[3] = ri->ri_nextnum;
+        nilfs_attach_writer(nilfs, sbi);
        /*
         * Releasing the next segment of the latest super root.
         * The next segment is invalidated by this recovery.
@@ -459,10 +461,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
        nilfs->ns_pseg_offset = 0;
        nilfs->ns_seg_seq = ri->ri_seq + 2;
        nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0];
-        return 0;
 failed:
        /* No need to recover sufile because it will be destroyed on error */
+        nilfs_detach_writer(nilfs, sbi);
        return err;
 }
@@ -728,7 +730,7 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
                goto failed;
        if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
-                err = nilfs_prepare_segment_for_recovery(nilfs, ri);
+                err = nilfs_prepare_segment_for_recovery(nilfs, sbi, ri);
                if (unlikely(err)) {
                        printk(KERN_ERR "NILFS: Error preparing segments for "
                               "recovery.\n");
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index fb70ec3be20..22c7f65c240 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2589,7 +2589,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
        }
 }
-int nilfs_clean_segments(struct super_block *sb, void __user *argp)
+int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
+                         void **kbufs)
 {
        struct nilfs_sb_info *sbi = NILFS_SB(sb);
        struct nilfs_sc_info *sci = NILFS_SC(sbi);
@@ -2606,7 +2607,7 @@ int nilfs_clean_segments(struct super_block *sb, void __user *argp)
        err = nilfs_init_gcdat_inode(nilfs);
        if (unlikely(err))
                goto out_unlock;
-        err = nilfs_ioctl_prepare_clean_segments(nilfs, argp);
+        err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs);
        if (unlikely(err))
                goto out_unlock;
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index a98fc1ed0bb..476bdd5df5b 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -222,7 +222,8 @@ extern int nilfs_construct_segment(struct super_block *);
 extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *,
                                         loff_t, loff_t);
 extern void nilfs_flush_segment(struct super_block *, ino_t);
-extern int nilfs_clean_segments(struct super_block *, void __user *);
+extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *,
+                                void **);
 extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *,
                                                  __u64 *, size_t);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index bed766e435b..1634319e240 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -220,7 +220,7 @@ static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
                                rem = 0;
                }
-                kevent->name = kmalloc(len + rem, GFP_KERNEL);
+                kevent->name = kmalloc(len + rem, GFP_NOFS);
                if (unlikely(!kevent->name)) {
                        kmem_cache_free(event_cachep, kevent);
                        return NULL;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 7d604480557..b574431a031 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -290,6 +290,21 @@ out_attach:
        else
                mlog_errno(ret);
+        /*
+         * In case of error, manually free the allocation and do the iput().
+         * We need to do this because error here means no d_instantiate(),
+         * which means iput() will not be called during dput(dentry).
+         */
+        if (ret < 0 && !alias) {
+                ocfs2_lock_res_free(&dl->dl_lockres);
+                BUG_ON(dl->dl_count != 1);
+                spin_lock(&dentry_attach_lock);
+                dentry->d_fsdata = NULL;
+                spin_unlock(&dentry_attach_lock);
+                kfree(dl);
+                iput(inode);
+        }
        dput(alias);
        return ret;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index e71160cda11..c5752305627 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2697,7 +2697,7 @@ static int ocfs2_dx_dir_index_block(struct inode *dir,
                                    u32 *num_dx_entries,
                                    struct buffer_head *dirent_bh)
 {
-        int ret, namelen, i;
+        int ret = 0, namelen, i;
        char *de_buf, *limit;
        struct ocfs2_dir_entry *de;
        struct buffer_head *dx_leaf_bh;
@@ -2934,7 +2934,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
         */
        BUG_ON(alloc > 2);
-        ret = ocfs2_reserve_clusters(osb, alloc, &data_ac);
+        ret = ocfs2_reserve_clusters(osb, alloc + dx_alloc, &data_ac);
        if (ret) {
                mlog_errno(ret);
                goto out;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index de3da8eb558..15713cbb865 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -100,7 +100,8 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
        /* If the inode allocator bit is clear, this inode must be stale */
        if (!set) {
-                mlog(0, "inode %llu suballoc bit is clear\n", blkno);
+                mlog(0, "inode %llu suballoc bit is clear\n",
+                     (unsigned long long)blkno);
                status = -ESTALE;
                goto unlock_nfs_sync;
        }
@@ -114,7 +115,7 @@ check_err:
        if (status < 0) {
                if (status == -ESTALE) {
                        mlog(0, "stale inode ino: %llu generation: %u\n",
-                             blkno, handle->ih_generation);
+                             (unsigned long long)blkno, handle->ih_generation);
                }
                result = ERR_PTR(status);
                goto bail;
@@ -129,8 +130,8 @@ check_err:
 check_gen:
        if (handle->ih_generation != inode->i_generation) {
                iput(inode);
-                mlog(0, "stale inode ino: %llu generation: %u\n", blkno,
+                mlog(0, "stale inode ino: %llu generation: %u\n",
-                     handle->ih_generation);
+                     (unsigned long long)blkno, handle->ih_generation);
                result = ERR_PTR(-ESTALE);
                goto bail;
        }
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 619dd7f6c05..eb7b76331eb 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -437,8 +437,9 @@ static inline int ocfs2_unlink_credits(struct super_block *sb)
 }
 /* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
- * inode alloc group descriptor + orphan dir index leaf */
+ * inode alloc group descriptor + orphan dir index root +
-#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3)
+ * orphan dir index leaf */
+#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 4)
 /* dinode update, old dir dinode update, new dir dinode update, old
 * dir dir entry, new dir dir entry, dir entry update for renaming
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2220f93f668..33464c6b60a 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1025,10 +1025,8 @@ static int ocfs2_rename(struct inode *old_dir,
        struct inode *orphan_dir = NULL;
        struct ocfs2_dinode *newfe = NULL;
        char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
-        struct buffer_head *orphan_entry_bh = NULL;
        struct buffer_head *newfe_bh = NULL;
        struct buffer_head *old_inode_bh = NULL;
-        struct buffer_head *insert_entry_bh = NULL;
        struct ocfs2_super *osb = NULL;
        u64 newfe_blkno, old_de_ino;
        handle_t *handle = NULL;
@@ -1455,8 +1453,6 @@ bail:
        brelse(old_inode_bh);
        brelse(old_dir_bh);
        brelse(new_dir_bh);
-        brelse(orphan_entry_bh);
-        brelse(insert_entry_bh);
        mlog_exit(status);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index b4ca5911caa..8439f6b324b 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2197,26 +2197,29 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
        struct buffer_head *inode_bh = NULL;
        struct ocfs2_dinode *inode_fe;
-        mlog_entry("blkno: %llu\n", blkno);
+        mlog_entry("blkno: %llu\n", (unsigned long long)blkno);
        /* dirty read disk */
        status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
        if (status < 0) {
-                mlog(ML_ERROR, "read block %llu failed %d\n", blkno, status);
+                mlog(ML_ERROR, "read block %llu failed %d\n",
+                     (unsigned long long)blkno, status);
                goto bail;
        }
        inode_fe = (struct ocfs2_dinode *) inode_bh->b_data;
        if (!OCFS2_IS_VALID_DINODE(inode_fe)) {
-                mlog(ML_ERROR, "invalid inode %llu requested\n", blkno);
+                mlog(ML_ERROR, "invalid inode %llu requested\n",
+                     (unsigned long long)blkno);
                status = -EINVAL;
                goto bail;
        }
-        if (le16_to_cpu(inode_fe->i_suballoc_slot) != OCFS2_INVALID_SLOT &&
+        if (le16_to_cpu(inode_fe->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT &&
            (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) {
                mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n",
-                     blkno, (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
+                     (unsigned long long)blkno,
+                     (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
                status = -EINVAL;
                goto bail;
        }
@@ -2251,7 +2254,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
        u64 bg_blkno;
        int status;
-        mlog_entry("blkno: %llu bit: %u\n", blkno, (unsigned int)bit);
+        mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno,
+                   (unsigned int)bit);
        alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data;
        if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) {
@@ -2266,7 +2270,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
        status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno,
                                             &group_bh);
        if (status < 0) {
-                mlog(ML_ERROR, "read group %llu failed %d\n", bg_blkno, status);
+                mlog(ML_ERROR, "read group %llu failed %d\n",
+                     (unsigned long long)bg_blkno, status);
                goto bail;
        }
@@ -2300,7 +2305,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
        struct inode *inode_alloc_inode;
        struct buffer_head *alloc_bh = NULL;
-        mlog_entry("blkno: %llu", blkno);
+        mlog_entry("blkno: %llu", (unsigned long long)blkno);
        status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
                                             &suballoc_bit);
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index ed0a0cfd68d..579dd1b1110 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -39,6 +39,7 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/utsname.h>
+#include <linux/namei.h>
 #define MLOG_MASK_PREFIX ML_NAMEI
 #include <cluster/masklog.h>
@@ -54,26 +55,6 @@
 #include "buffer_head_io.h"
-static char *ocfs2_page_getlink(struct dentry * dentry,
-                                struct page **ppage);
-static char *ocfs2_fast_symlink_getlink(struct inode *inode,
-                                        struct buffer_head **bh);
-/* get the link contents into pagecache */
-static char *ocfs2_page_getlink(struct dentry * dentry,
-                                struct page **ppage)
-{
-        struct page * page;
-        struct address_space *mapping = dentry->d_inode->i_mapping;
-        page = read_mapping_page(mapping, 0, NULL);
-        if (IS_ERR(page))
-                goto sync_fail;
-        *ppage = page;
-        return kmap(page);
-sync_fail:
-        return (char*)page;
-}
 static char *ocfs2_fast_symlink_getlink(struct inode *inode,
                                        struct buffer_head **bh)
@@ -128,40 +109,55 @@ out:
        return ret;
 }
-static void *ocfs2_follow_link(struct dentry *dentry,
+static void *ocfs2_fast_follow_link(struct dentry *dentry,
-                               struct nameidata *nd)
+                                    struct nameidata *nd)
 {
-        int status;
+        int status = 0;
-        char *link;
+        int len;
+        char *target, *link = ERR_PTR(-ENOMEM);
        struct inode *inode = dentry->d_inode;
-        struct page *page = NULL;
        struct buffer_head *bh = NULL;
-        
-        if (ocfs2_inode_is_fast_symlink(inode))
+        mlog_entry_void();
-                link = ocfs2_fast_symlink_getlink(inode, &bh);
-        else
+        BUG_ON(!ocfs2_inode_is_fast_symlink(inode));
-                link = ocfs2_page_getlink(dentry, &page);
+        target = ocfs2_fast_symlink_getlink(inode, &bh);
-        if (IS_ERR(link)) {
+        if (IS_ERR(target)) {
-                status = PTR_ERR(link);
+                status = PTR_ERR(target);
                mlog_errno(status);
                goto bail;
        }
-        status = vfs_follow_link(nd, link);
+        /* Fast symlinks can't be large */
+        len = strlen(target);
+        link = kzalloc(len + 1, GFP_NOFS);
+        if (!link) {
+                status = -ENOMEM;
+                mlog_errno(status);
+                goto bail;
+        }
+        memcpy(link, target, len);
+        nd_set_link(nd, link);
 bail:
-        if (page) {
-                kunmap(page);
-                page_cache_release(page);
-        }
        brelse(bh);
-        return ERR_PTR(status);
+        mlog_exit(status);
+        return status ? ERR_PTR(status) : link;
+}
+static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
+{
+        char *link = cookie;
+        kfree(link);
 }
 const struct inode_operations ocfs2_symlink_inode_operations = {
        .readlink       = page_readlink,
-        .follow_link    = ocfs2_follow_link,
+        .follow_link    = page_follow_link_light,
+        .put_link       = page_put_link,
        .getattr        = ocfs2_getattr,
        .setattr        = ocfs2_setattr,
        .setxattr       = generic_setxattr,
@@ -171,7 +167,8 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
 };
 const struct inode_operations ocfs2_fast_symlink_inode_operations = {
        .readlink       = ocfs2_readlink,
-        .follow_link    = ocfs2_follow_link,
+        .follow_link    = ocfs2_fast_follow_link,
+        .put_link       = ocfs2_fast_put_link,
        .getattr        = ocfs2_getattr,
        .setattr        = ocfs2_setattr,
        .setxattr       = generic_setxattr,
diff --git a/fs/open.c b/fs/open.c
index 377eb25b6ab..bdfbf03615a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1033,7 +1033,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
        if (!IS_ERR(tmp)) {
                fd = get_unused_fd_flags(flags);
                if (fd >= 0) {
-                        struct file *f = do_filp_open(dfd, tmp, flags, mode);
+                        struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);
                        if (IS_ERR(f)) {
                                put_unused_fd(fd);
                                fd = PTR_ERR(f);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 7e4877d9dcb..725a650bbbb 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -80,6 +80,7 @@
 #include <linux/delayacct.h>
 #include <linux/seq_file.h>
 #include <linux/pid_namespace.h>
+#include <linux/ptrace.h>
 #include <linux/tracehook.h>
 #include <asm/pgtable.h>
@@ -352,6 +353,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
        char state;
        pid_t ppid = 0, pgid = -1, sid = -1;
        int num_threads = 0;
+        int permitted;
        struct mm_struct *mm;
        unsigned long long start_time;
        unsigned long cmin_flt = 0, cmaj_flt = 0;
@@ -364,11 +366,14 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
        state = *get_task_state(task);
        vsize = eip = esp = 0;
+        permitted = ptrace_may_access(task, PTRACE_MODE_READ);
        mm = get_task_mm(task);
        if (mm) {
                vsize = task_vsize(mm);
-                eip = KSTK_EIP(task);
+                if (permitted) {
-                esp = KSTK_ESP(task);
+                        eip = KSTK_EIP(task);
+                        esp = KSTK_ESP(task);
+                }
        }
        get_task_comm(tcomm, task);
@@ -424,7 +429,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                unlock_task_sighand(task, &flags);
        }
-        if (!whole || num_threads < 2)
+        if (permitted && (!whole || num_threads < 2))
                wchan = get_wchan(task);
        if (!whole) {
                min_flt = task->min_flt;
@@ -476,7 +481,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                rsslim,
                mm ? mm->start_code : 0,
                mm ? mm->end_code : 0,
-                mm ? mm->start_stack : 0,
+                (permitted && mm) ? mm->start_stack : 0,
                esp,
                eip,
                /* The signal information here is obsolete.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index f71559784bf..3326bbf9ab9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -322,7 +322,10 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
        wchan = get_wchan(task);
        if (lookup_symbol_name(wchan, symname) < 0)
-                return sprintf(buffer, "%lu", wchan);
+                if (!ptrace_may_access(task, PTRACE_MODE_READ))
+                        return 0;
+                else
+                        return sprintf(buffer, "%lu", wchan);
        else
                return sprintf(buffer, "%s", symname);
 }
@@ -648,14 +651,14 @@ static unsigned mounts_poll(struct file *file, poll_table *wait)
 {
        struct proc_mounts *p = file->private_data;
        struct mnt_namespace *ns = p->ns;
-        unsigned res = 0;
+        unsigned res = POLLIN | POLLRDNORM;
        poll_wait(file, &ns->poll, wait);
        spin_lock(&vfsmount_lock);
        if (p->event != ns->event) {
                p->event = ns->event;
-                res = POLLERR;
+                res |= POLLERR | POLLPRI;
        }
        spin_unlock(&vfsmount_lock);
@@ -1953,7 +1956,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
        const struct pid_entry *p = ptr;
        struct inode *inode;
        struct proc_inode *ei;
-        struct dentry *error = ERR_PTR(-EINVAL);
+        struct dentry *error = ERR_PTR(-ENOENT);
        inode = proc_pid_make_inode(dir->i_sb, task);
        if (!inode)
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 74ea974f5ca..c6b0302af4c 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -35,7 +35,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 #define K(x) ((x) << (PAGE_SHIFT - 10))
        si_meminfo(&i);
        si_swapinfo(&i);
-        committed = atomic_long_read(&vm_committed_space);
+        committed = percpu_counter_read_positive(&vm_committed_as);
        allowed = ((totalram_pages - hugetlb_total_pages())
                * sysctl_overcommit_ratio / 100) + total_swap_pages;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 1e15a2b176e..b080b791d9e 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -67,8 +67,7 @@ static int proc_get_sb(struct file_system_type *fs_type,
                sb->s_flags = flags;
                err = proc_fill_super(sb);
                if (err) {
-                        up_write(&sb->s_umount);
+                        deactivate_locked_super(sb);
-                        deactivate_super(sb);
                        return err;
                }
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index f75efa22df5..81e4eb60972 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -18,6 +18,9 @@
 #ifndef arch_irq_stat
 #define arch_irq_stat() 0
 #endif
+#ifndef arch_idle_time
+#define arch_idle_time(cpu) 0
+#endif
 static int show_stat(struct seq_file *p, void *v)
 {
@@ -40,6 +43,7 @@ static int show_stat(struct seq_file *p, void *v)
                nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
                system = cputime64_add(system, kstat_cpu(i).cpustat.system);
                idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle);
+                idle = cputime64_add(idle, arch_idle_time(i));
                iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait);
                irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
                softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
@@ -69,6 +73,7 @@ static int show_stat(struct seq_file *p, void *v)
                nice = kstat_cpu(i).cpustat.nice;
                system = kstat_cpu(i).cpustat.system;
                idle = kstat_cpu(i).cpustat.idle;
+                idle = cputime64_add(idle, arch_idle_time(i));
                iowait = kstat_cpu(i).cpustat.iowait;
                irq = kstat_cpu(i).cpustat.irq;
                softirq = kstat_cpu(i).cpustat.softirq;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 39e4ad4f59f..6f61b7cc32e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -665,6 +665,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
                goto out_task;
        ret = 0;
+        if (!count)
+                goto out_task;
        mm = get_task_mm(task);
        if (!mm)
                goto out_task;
diff --git a/fs/quota/Makefile b/fs/quota/Makefile
index 385a0831cc9..68d4f6dc057 100644
--- a/fs/quota/Makefile
+++ b/fs/quota/Makefile
@@ -1,12 +1,3 @@
-#
-# Makefile for the Linux filesystems.
-#
-# 14 Sep 2000, Christoph Hellwig <hch@infradead.org>
-# Rewritten to use lists instead of if-statements.
-#
-obj-y :=
 obj-$(CONFIG_QUOTA)             += dquot.o
 obj-$(CONFIG_QFMT_V1)           += quota_v1.o
 obj-$(CONFIG_QFMT_V2)           += quota_v2.o
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 67a80d7e59e..45ee3d357c7 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -41,6 +41,18 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
 #define store_ih(where,what) copy_item_head (where, what)
+static inline bool is_privroot_deh(struct dentry *dir,
+                                   struct reiserfs_de_head *deh)
+{
+        int ret = 0;
+#ifdef CONFIG_REISERFS_FS_XATTR
+        struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root;
+        ret = (dir == dir->d_parent && privroot->d_inode &&
+               deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
+#endif
+        return ret;
+}
 int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
                           filldir_t filldir, loff_t *pos)
 {
@@ -138,18 +150,8 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
                                }
                                /* Ignore the .reiserfs_priv entry */
-                                if (reiserfs_xattrs(inode->i_sb) &&
+                                if (is_privroot_deh(dentry, deh))
-                                    !old_format_only(inode->i_sb) &&
-                                    dentry == inode->i_sb->s_root &&
-                                    REISERFS_SB(inode->i_sb)->priv_root &&
-                                    REISERFS_SB(inode->i_sb)->priv_root->d_inode
-                                    && deh_objectid(deh) ==
-                                    le32_to_cpu(INODE_PKEY
-                                                (REISERFS_SB(inode->i_sb)->
-                                                 priv_root->d_inode)->
-                                                k_objectid)) {
                                        continue;
-                                }
                                d_off = deh_offset(deh);
                                *pos = d_off;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index efd4d720718..27157912863 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -338,21 +338,8 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
                                &path_to_entry, &de);
        pathrelse(&path_to_entry);
        if (retval == NAME_FOUND) {
-                /* Hide the .reiserfs_priv directory */
+                inode = reiserfs_iget(dir->i_sb,
-                if (reiserfs_xattrs(dir->i_sb) &&
+                                      (struct cpu_key *)&(de.de_dir_id));
-                    !old_format_only(dir->i_sb) &&
-                    REISERFS_SB(dir->i_sb)->priv_root &&
-                    REISERFS_SB(dir->i_sb)->priv_root->d_inode &&
-                    de.de_objectid ==
-                    le32_to_cpu(INODE_PKEY
-                                (REISERFS_SB(dir->i_sb)->priv_root->d_inode)->
-                                k_objectid)) {
-                        reiserfs_write_unlock(dir->i_sb);
-                        return ERR_PTR(-EACCES);
-                }
-                inode =
-                    reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
                if (!inode || IS_ERR(inode)) {
                        reiserfs_write_unlock(dir->i_sb);
                        return ERR_PTR(-EACCES);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 0ae6486d904..3567fb9e3fb 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -448,13 +448,11 @@ int remove_save_link(struct inode *inode, int truncate)
 static void reiserfs_kill_sb(struct super_block *s)
 {
        if (REISERFS_SB(s)) {
-#ifdef CONFIG_REISERFS_FS_XATTR
                if (REISERFS_SB(s)->xattr_root) {
                        d_invalidate(REISERFS_SB(s)->xattr_root);
                        dput(REISERFS_SB(s)->xattr_root);
                        REISERFS_SB(s)->xattr_root = NULL;
                }
-#endif
                if (REISERFS_SB(s)->priv_root) {
                        d_invalidate(REISERFS_SB(s)->priv_root);
                        dput(REISERFS_SB(s)->priv_root);
@@ -1316,8 +1314,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
        }
 out_ok:
-        kfree(s->s_options);
+        replace_mount_options(s, new_opts);
-        s->s_options = new_opts;
        return 0;
 out_err:
@@ -1842,7 +1839,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
                        goto error;
                }
-                if ((errval = reiserfs_xattr_init(s, s->s_flags))) {
+                if ((errval = reiserfs_lookup_privroot(s)) ||
+                    (errval = reiserfs_xattr_init(s, s->s_flags))) {
                        dput(s->s_root);
                        s->s_root = NULL;
                        goto error;
@@ -1855,7 +1853,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
                        reiserfs_info(s, "using 3.5.x disk format\n");
                }
-                if ((errval = reiserfs_xattr_init(s, s->s_flags))) {
+                if ((errval = reiserfs_lookup_privroot(s)) ||
+                    (errval = reiserfs_xattr_init(s, s->s_flags))) {
                        dput(s->s_root);
                        s->s_root = NULL;
                        goto error;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index f83f52bae39..8e7deb0e696 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -113,41 +113,30 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
 #define xattr_may_create(flags) (!flags || flags & XATTR_CREATE)
-/* Returns and possibly creates the xattr dir. */
+static struct dentry *open_xa_root(struct super_block *sb, int flags)
-static struct dentry *lookup_or_create_dir(struct dentry *parent,
-                                            const char *name, int flags)
 {
-        struct dentry *dentry;
+        struct dentry *privroot = REISERFS_SB(sb)->priv_root;
-        BUG_ON(!parent);
+        struct dentry *xaroot;
+        if (!privroot->d_inode)
-        dentry = lookup_one_len(name, parent, strlen(name));
+                return ERR_PTR(-ENODATA);
-        if (IS_ERR(dentry))
-                return dentry;
-        else if (!dentry->d_inode) {
-                int err = -ENODATA;
-                if (xattr_may_create(flags)) {
+        mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR);
-                        mutex_lock_nested(&parent->d_inode->i_mutex,
-                                          I_MUTEX_XATTR);
-                        err = xattr_mkdir(parent->d_inode, dentry, 0700);
-                        mutex_unlock(&parent->d_inode->i_mutex);
-                }
+        xaroot = dget(REISERFS_SB(sb)->xattr_root);
+        if (!xaroot)
+                xaroot = ERR_PTR(-ENODATA);
+        else if (!xaroot->d_inode) {
+                int err = -ENODATA;
+                if (xattr_may_create(flags))
+                        err = xattr_mkdir(privroot->d_inode, xaroot, 0700);
                if (err) {
-                        dput(dentry);
+                        dput(xaroot);
-                        dentry = ERR_PTR(err);
+                        xaroot = ERR_PTR(err);
                }
        }
-        return dentry;
+        mutex_unlock(&privroot->d_inode->i_mutex);
-}
+        return xaroot;
-static struct dentry *open_xa_root(struct super_block *sb, int flags)
-{
-        struct dentry *privroot = REISERFS_SB(sb)->priv_root;
-        if (!privroot)
-                return ERR_PTR(-ENODATA);
-        return lookup_or_create_dir(privroot, XAROOT_NAME, flags);
 }
 static struct dentry *open_xa_dir(const struct inode *inode, int flags)
@@ -163,10 +152,22 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
                 le32_to_cpu(INODE_PKEY(inode)->k_objectid),
                 inode->i_generation);
-        xadir = lookup_or_create_dir(xaroot, namebuf, flags);
+        mutex_lock_nested(&xaroot->d_inode->i_mutex, I_MUTEX_XATTR);
+        xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
+        if (!IS_ERR(xadir) && !xadir->d_inode) {
+                int err = -ENODATA;
+                if (xattr_may_create(flags))
+                        err = xattr_mkdir(xaroot->d_inode, xadir, 0700);
+                if (err) {
+                        dput(xadir);
+                        xadir = ERR_PTR(err);
+                }
+        }
+        mutex_unlock(&xaroot->d_inode->i_mutex);
        dput(xaroot);
        return xadir;
 }
 /* The following are side effects of other operations that aren't explicitly
@@ -184,6 +185,7 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
 {
        struct reiserfs_dentry_buf *dbuf = buf;
        struct dentry *dentry;
+        WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
        if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
                return -ENOSPC;
@@ -349,6 +351,7 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
        if (IS_ERR(xadir))
                return ERR_CAST(xadir);
+        mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR);
        xafile = lookup_one_len(name, xadir, strlen(name));
        if (IS_ERR(xafile)) {
                err = PTR_ERR(xafile);
@@ -360,18 +363,15 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
        if (!xafile->d_inode) {
                err = -ENODATA;
-                if (xattr_may_create(flags)) {
+                if (xattr_may_create(flags))
-                        mutex_lock_nested(&xadir->d_inode->i_mutex,
-                                          I_MUTEX_XATTR);
                        err = xattr_create(xadir->d_inode, xafile,
                                              0700|S_IFREG);
-                        mutex_unlock(&xadir->d_inode->i_mutex);
-                }
        }
        if (err)
                dput(xafile);
 out:
+        mutex_unlock(&xadir->d_inode->i_mutex);
        dput(xadir);
        if (err)
                return ERR_PTR(err);
@@ -435,6 +435,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
        if (IS_ERR(xadir))
                return PTR_ERR(xadir);
+        mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR);
        dentry = lookup_one_len(name, xadir, strlen(name));
        if (IS_ERR(dentry)) {
                err = PTR_ERR(dentry);
@@ -442,14 +443,13 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
        }
        if (dentry->d_inode) {
-                mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR);
                err = xattr_unlink(xadir->d_inode, dentry);
-                mutex_unlock(&xadir->d_inode->i_mutex);
                update_ctime(inode);
        }
        dput(dentry);
 out_dput:
+        mutex_unlock(&xadir->d_inode->i_mutex);
        dput(xadir);
        return err;
 }
@@ -687,20 +687,6 @@ out:
        return err;
 }
-/* Actual operations that are exported to VFS-land */
-struct xattr_handler *reiserfs_xattr_handlers[] = {
-        &reiserfs_xattr_user_handler,
-        &reiserfs_xattr_trusted_handler,
-#ifdef CONFIG_REISERFS_FS_SECURITY
-        &reiserfs_xattr_security_handler,
-#endif
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-        &reiserfs_posix_acl_access_handler,
-        &reiserfs_posix_acl_default_handler,
-#endif
-        NULL
-};
 /*
 * In order to implement different sets of xattr operations for each xattr
 * prefix with the generic xattr API, a filesystem should create a
@@ -843,7 +829,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
        if (!dentry->d_inode)
                return -EINVAL;
-        if (!reiserfs_xattrs(dentry->d_sb) ||
+        if (!dentry->d_sb->s_xattr ||
            get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
                return -EOPNOTSUPP;
@@ -885,42 +871,50 @@ static int reiserfs_check_acl(struct inode *inode, int mask)
        return error;
 }
-int reiserfs_permission(struct inode *inode, int mask)
-{
-        /*
-         * We don't do permission checks on the internal objects.
-         * Permissions are determined by the "owning" object.
-         */
-        if (IS_PRIVATE(inode))
-                return 0;
-        /*
-         * Stat data v1 doesn't support ACLs.
-         */
-        if (get_inode_sd_version(inode) == STAT_DATA_V1)
-                return generic_permission(inode, mask, NULL);
-        else
-                return generic_permission(inode, mask, reiserfs_check_acl);
-}
 static int create_privroot(struct dentry *dentry)
 {
        int err;
        struct inode *inode = dentry->d_parent->d_inode;
-        mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR);
+        WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
        err = xattr_mkdir(inode, dentry, 0700);
-        mutex_unlock(&inode->i_mutex);
+        if (err || !dentry->d_inode) {
-        if (err) {
+                reiserfs_warning(dentry->d_sb, "jdm-20006",
-                dput(dentry);
+                                 "xattrs/ACLs enabled and couldn't "
-                dentry = NULL;
+                                 "find/create .reiserfs_priv. "
+                                 "Failing mount.");
+                return -EOPNOTSUPP;
        }
-        if (dentry && dentry->d_inode)
+        dentry->d_inode->i_flags |= S_PRIVATE;
-                reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr "
+        reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr "
-                              "storage.\n", PRIVROOT_NAME);
+                      "storage.\n", PRIVROOT_NAME);
-        return err;
+        return 0;
 }
+#else
+int __init reiserfs_xattr_register_handlers(void) { return 0; }
+void reiserfs_xattr_unregister_handlers(void) {}
+static int create_privroot(struct dentry *dentry) { return 0; }
+#endif
+/* Actual operations that are exported to VFS-land */
+struct xattr_handler *reiserfs_xattr_handlers[] = {
+#ifdef CONFIG_REISERFS_FS_XATTR
+        &reiserfs_xattr_user_handler,
+        &reiserfs_xattr_trusted_handler,
+#endif
+#ifdef CONFIG_REISERFS_FS_SECURITY
+        &reiserfs_xattr_security_handler,
+#endif
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
+        &reiserfs_posix_acl_access_handler,
+        &reiserfs_posix_acl_default_handler,
+#endif
+        NULL
+};
 static int xattr_mount_check(struct super_block *s)
 {
        /* We need generation numbers to ensure that the oid mapping is correct
@@ -940,21 +934,33 @@ static int xattr_mount_check(struct super_block *s)
        return 0;
 }
-#else
+int reiserfs_permission(struct inode *inode, int mask)
-int __init reiserfs_xattr_register_handlers(void) { return 0; }
+{
-void reiserfs_xattr_unregister_handlers(void) {}
+        /*
+         * We don't do permission checks on the internal objects.
+         * Permissions are determined by the "owning" object.
+         */
+        if (IS_PRIVATE(inode))
+                return 0;
+#ifdef CONFIG_REISERFS_FS_XATTR
+        /*
+         * Stat data v1 doesn't support ACLs.
+         */
+        if (get_inode_sd_version(inode) != STAT_DATA_V1)
+                return generic_permission(inode, mask, reiserfs_check_acl);
 #endif
+        return generic_permission(inode, mask, NULL);
+}
 /* This will catch lookups from the fs root to .reiserfs_priv */
 static int
 xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
 {
        struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root;
-        if (name->len == priv_root->d_name.len &&
+        if (container_of(q1, struct dentry, d_name) == priv_root)
-            name->hash == priv_root->d_name.hash &&
-            !memcmp(name->name, priv_root->d_name.name, name->len)) {
                return -ENOENT;
-        } else if (q1->len == name->len &&
+        if (q1->len == name->len &&
                   !memcmp(q1->name, name->name, name->len))
                return 0;
        return 1;
@@ -964,73 +970,71 @@ static const struct dentry_operations xattr_lookup_poison_ops = {
        .d_compare = xattr_lookup_poison,
 };
+int reiserfs_lookup_privroot(struct super_block *s)
+{
+        struct dentry *dentry;
+        int err = 0;
+        /* If we don't have the privroot located yet - go find it */
+        mutex_lock(&s->s_root->d_inode->i_mutex);
+        dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
+                                strlen(PRIVROOT_NAME));
+        if (!IS_ERR(dentry)) {
+                REISERFS_SB(s)->priv_root = dentry;
+                s->s_root->d_op = &xattr_lookup_poison_ops;
+                if (dentry->d_inode)
+                        dentry->d_inode->i_flags |= S_PRIVATE;
+        } else
+                err = PTR_ERR(dentry);
+        mutex_unlock(&s->s_root->d_inode->i_mutex);
+        return err;
+}
 /* We need to take a copy of the mount flags since things like
 * MS_RDONLY don't get set until *after* we're called.
 * mount_flags != mount_options */
 int reiserfs_xattr_init(struct super_block *s, int mount_flags)
 {
        int err = 0;
+        struct dentry *privroot = REISERFS_SB(s)->priv_root;
-#ifdef CONFIG_REISERFS_FS_XATTR
        err = xattr_mount_check(s);
        if (err)
                goto error;
-#endif
-        /* If we don't have the privroot located yet - go find it */
+        if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) {
-        if (!REISERFS_SB(s)->priv_root) {
+                mutex_lock(&s->s_root->d_inode->i_mutex);
-                struct dentry *dentry;
+                err = create_privroot(REISERFS_SB(s)->priv_root);
-                dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
+                mutex_unlock(&s->s_root->d_inode->i_mutex);
-                                        strlen(PRIVROOT_NAME));
-                if (!IS_ERR(dentry)) {
-#ifdef CONFIG_REISERFS_FS_XATTR
-                        if (!(mount_flags & MS_RDONLY) && !dentry->d_inode)
-                                err = create_privroot(dentry);
-#endif
-                        if (!dentry->d_inode) {
-                                dput(dentry);
-                                dentry = NULL;
-                        }
-                } else
-                        err = PTR_ERR(dentry);
-                if (!err && dentry) {
-                        s->s_root->d_op = &xattr_lookup_poison_ops;
-                        dentry->d_inode->i_flags |= S_PRIVATE;
-                        REISERFS_SB(s)->priv_root = dentry;
-#ifdef CONFIG_REISERFS_FS_XATTR
-                /* xattrs are unavailable */
-                } else if (!(mount_flags & MS_RDONLY)) {
-                        /* If we're read-only it just means that the dir
-                         * hasn't been created. Not an error -- just no
-                         * xattrs on the fs. We'll check again if we
-                         * go read-write */
-                        reiserfs_warning(s, "jdm-20006",
-                                         "xattrs/ACLs enabled and couldn't "
-                                         "find/create .reiserfs_priv. "
-                                         "Failing mount.");
-                        err = -EOPNOTSUPP;
-#endif
-                }
        }
-#ifdef CONFIG_REISERFS_FS_XATTR
+        if (privroot->d_inode) {
-        if (!err)
                s->s_xattr = reiserfs_xattr_handlers;
+                mutex_lock(&privroot->d_inode->i_mutex);
+                if (!REISERFS_SB(s)->xattr_root) {
+                        struct dentry *dentry;
+                        dentry = lookup_one_len(XAROOT_NAME, privroot,
+                                                strlen(XAROOT_NAME));
+                        if (!IS_ERR(dentry))
+                                REISERFS_SB(s)->xattr_root = dentry;
+                        else
+                                err = PTR_ERR(dentry);
+                }
+                mutex_unlock(&privroot->d_inode->i_mutex);
+        }
 error:
        if (err) {
                clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt));
                clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt));
        }
-#endif
        /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */
-        s->s_flags = s->s_flags & ~MS_POSIXACL;
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
        if (reiserfs_posixacl(s))
                s->s_flags |= MS_POSIXACL;
-#endif
+        else
+                s->s_flags &= ~MS_POSIXACL;
        return err;
 }
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 4d3c20e787c..a92c8792c0f 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -55,8 +55,16 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode,
                           struct reiserfs_security_handle *sec)
 {
        int blocks = 0;
-        int error = security_inode_init_security(inode, dir, &sec->name,
+        int error;
-                                                 &sec->value, &sec->length);
+        sec->name = NULL;
+        /* Don't add selinux attributes on xattrs - they'll never get used */
+        if (IS_PRIVATE(dir))
+                return 0;
+        error = security_inode_init_security(inode, dir, &sec->name,
+                                             &sec->value, &sec->length);
        if (error) {
                if (error == -EOPNOTSUPP)
                        error = 0;
diff --git a/fs/romfs/internal.h b/fs/romfs/internal.h
index 06044a9dc62..95217b83011 100644
--- a/fs/romfs/internal.h
+++ b/fs/romfs/internal.h
@@ -43,5 +43,5 @@ extern int romfs_dev_read(struct super_block *sb, unsigned long pos,
                          void *buf, size_t buflen);
 extern ssize_t romfs_dev_strnlen(struct super_block *sb,
                                 unsigned long pos, size_t maxlen);
-extern int romfs_dev_strncmp(struct super_block *sb, unsigned long pos,
+extern int romfs_dev_strcmp(struct super_block *sb, unsigned long pos,
-                             const char *str, size_t size);
+                            const char *str, size_t size);
diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c
index 7e3e1e12a08..b3208adf8e7 100644
--- a/fs/romfs/storage.c
+++ b/fs/romfs/storage.c
@@ -67,26 +67,35 @@ static ssize_t romfs_mtd_strnlen(struct super_block *sb,
 * compare a string to one in a romfs image on MTD
 * - return 1 if matched, 0 if differ, -ve if error
 */
-static int romfs_mtd_strncmp(struct super_block *sb, unsigned long pos,
+static int romfs_mtd_strcmp(struct super_block *sb, unsigned long pos,
-                             const char *str, size_t size)
+                            const char *str, size_t size)
 {
-        u_char buf[16];
+        u_char buf[17];
        size_t len, segment;
        int ret;
-        /* scan the string up to 16 bytes at a time */
+        /* scan the string up to 16 bytes at a time, and attempt to grab the
+         * trailing NUL whilst we're at it */
+        buf[0] = 0xff;
        while (size > 0) {
-                segment = min_t(size_t, size, 16);
+                segment = min_t(size_t, size + 1, 17);
                ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf);
                if (ret < 0)
                        return ret;
+                len--;
                if (memcmp(buf, str, len) != 0)
                        return 0;
+                buf[0] = buf[len];
                size -= len;
                pos += len;
                str += len;
        }
+        /* check the trailing NUL was */
+        if (buf[0])
+                return 0;
        return 1;
 }
 #endif /* CONFIG_ROMFS_ON_MTD */
@@ -111,6 +120,7 @@ static int romfs_blk_read(struct super_block *sb, unsigned long pos,
                        return -EIO;
                memcpy(buf, bh->b_data + offset, segment);
                brelse(bh);
+                buf += segment;
                buflen -= segment;
                pos += segment;
        }
@@ -154,28 +164,48 @@ static ssize_t romfs_blk_strnlen(struct super_block *sb,
 * compare a string to one in a romfs image on a block device
 * - return 1 if matched, 0 if differ, -ve if error
 */
-static int romfs_blk_strncmp(struct super_block *sb, unsigned long pos,
+static int romfs_blk_strcmp(struct super_block *sb, unsigned long pos,
-                             const char *str, size_t size)
+                            const char *str, size_t size)
 {
        struct buffer_head *bh;
        unsigned long offset;
        size_t segment;
-        bool x;
+        bool matched, terminated = false;
-        /* scan the string up to 16 bytes at a time */
+        /* compare string up to a block at a time */
        while (size > 0) {
                offset = pos & (ROMBSIZE - 1);
                segment = min_t(size_t, size, ROMBSIZE - offset);
                bh = sb_bread(sb, pos >> ROMBSBITS);
                if (!bh)
                        return -EIO;
-                x = (memcmp(bh->b_data + offset, str, segment) != 0);
+                matched = (memcmp(bh->b_data + offset, str, segment) == 0);
-                brelse(bh);
-                if (x)
-                        return 0;
                size -= segment;
                pos += segment;
                str += segment;
+                if (matched && size == 0 && offset + segment < ROMBSIZE) {
+                        if (!bh->b_data[offset + segment])
+                                terminated = true;
+                        else
+                                matched = false;
+                }
+                brelse(bh);
+                if (!matched)
+                        return 0;
+        }
+        if (!terminated) {
+                /* the terminating NUL must be on the first byte of the next
+                 * block */
+                BUG_ON((pos & (ROMBSIZE - 1)) != 0);
+                bh = sb_bread(sb, pos >> ROMBSBITS);
+                if (!bh)
+                        return -EIO;
+                matched = !bh->b_data[0];
+                brelse(bh);
+                if (!matched)
+                        return 0;
        }
        return 1;
@@ -234,10 +264,12 @@ ssize_t romfs_dev_strnlen(struct super_block *sb,
 /*
 * compare a string to one in romfs
+ * - the string to be compared to, str, may not be NUL-terminated; instead the
+ *   string is of the specified size
 * - return 1 if matched, 0 if differ, -ve if error
 */
-int romfs_dev_strncmp(struct super_block *sb, unsigned long pos,
+int romfs_dev_strcmp(struct super_block *sb, unsigned long pos,
-                      const char *str, size_t size)
+                     const char *str, size_t size)
 {
        size_t limit;
@@ -246,16 +278,16 @@ int romfs_dev_strncmp(struct super_block *sb, unsigned long pos,
                return -EIO;
        if (size > ROMFS_MAXFN)
                return -ENAMETOOLONG;
-        if (size > limit - pos)
+        if (size + 1 > limit - pos)
                return -EIO;
 #ifdef CONFIG_ROMFS_ON_MTD
        if (sb->s_mtd)
-                return romfs_mtd_strncmp(sb, pos, str, size);
+                return romfs_mtd_strcmp(sb, pos, str, size);
 #endif
 #ifdef CONFIG_ROMFS_ON_BLOCK
        if (sb->s_bdev)
-                return romfs_blk_strncmp(sb, pos, str, size);
+                return romfs_blk_strcmp(sb, pos, str, size);
 #endif
        return -EIO;
 }
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 10ca7d984a8..4ab3c03d8f9 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -240,8 +240,8 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry,
                        goto error;
                /* try to match the first 16 bytes of name */
-                ret = romfs_dev_strncmp(dir->i_sb, offset + ROMFH_SIZE, name,
+                ret = romfs_dev_strcmp(dir->i_sb, offset + ROMFH_SIZE, name,
-                                        len);
+                                       len);
                if (ret < 0)
                        goto error;
                if (ret == 1)
@@ -298,7 +298,8 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
        struct romfs_inode ri;
        struct inode *i;
        unsigned long nlen;
-        unsigned nextfh, ret;
+        unsigned nextfh;
+        int ret;
        umode_t mode;
        /* we might have to traverse a chain of "hard link" file entries to get
diff --git a/fs/splice.c b/fs/splice.c
index 5384a90665d..666953d59a3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -614,7 +614,6 @@ static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
 * @actor:      handler that splices the data
 *
 * Description:
 *    This function loops over the pipe and calls @actor to do the
 *    actual moving of a single struct pipe_buffer to the desired
 *    destination.  It returns when there's no more buffers left in
@@ -711,7 +710,7 @@ EXPORT_SYMBOL(splice_from_pipe_next);
 /**
 * splice_from_pipe_begin - start splicing from pipe
- * @pipe:       pipe to splice from
+ * @sd:         information about the splice operation
 *
 * Description:
 *    This function should be called before a loop containing
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 8258cf9a031..70e3244fa30 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,4 +5,3 @@
 obj-$(CONFIG_SQUASHFS) += squashfs.o
 squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
 squashfs-y += namei.o super.o symlink.o
-#squashfs-y += squashfs2_0.o
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 1c4739e33af..40c98fa6b5d 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -252,6 +252,7 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
        cache->entries = entries;
        cache->block_size = block_size;
        cache->pages = block_size >> PAGE_CACHE_SHIFT;
+        cache->pages = cache->pages ? cache->pages : 1;
        cache->name = name;
        cache->num_waiters = 0;
        spin_lock_init(&cache->lock);
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index ffa6edcd2d0..0adc624c956 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -157,6 +157,16 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
        if (msblk->block_size > SQUASHFS_FILE_MAX_SIZE)
                goto failed_mount;
+        /*
+         * Check the system page size is not larger than the filesystem
+         * block size (by default 128K).  This is currently not supported.
+         */
+        if (PAGE_CACHE_SIZE > msblk->block_size) {
+                ERROR("Page size > filesystem block size (%d).  This is "
+                        "currently not supported!\n", msblk->block_size);
+                goto failed_mount;
+        }
        msblk->block_log = le16_to_cpu(sblk->block_log);
        if (msblk->block_log > SQUASHFS_FILE_MAX_LOG)
                goto failed_mount;
diff --git a/fs/stat.c b/fs/stat.c
index 2db740a0cfb..075694e31d8 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -55,59 +55,54 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 EXPORT_SYMBOL(vfs_getattr);
-int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
+int vfs_fstat(unsigned int fd, struct kstat *stat)
 {
-        struct path path;
+        struct file *f = fget(fd);
-        int error;
+        int error = -EBADF;
-        error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path);
+        if (f) {
-        if (!error) {
+                error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
-                error = vfs_getattr(path.mnt, path.dentry, stat);
+                fput(f);
-                path_put(&path);
        }
        return error;
 }
+EXPORT_SYMBOL(vfs_fstat);
-int vfs_stat(char __user *name, struct kstat *stat)
+int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag)
 {
-        return vfs_stat_fd(AT_FDCWD, name, stat);
+        struct path path;
-}
+        int error = -EINVAL;
+        int lookup_flags = 0;
-EXPORT_SYMBOL(vfs_stat);
+        if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+                goto out;
-int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
+        if (!(flag & AT_SYMLINK_NOFOLLOW))
-{
+                lookup_flags |= LOOKUP_FOLLOW;
-        struct path path;
-        int error;
-        error = user_path_at(dfd, name, 0, &path);
+        error = user_path_at(dfd, filename, lookup_flags, &path);
-        if (!error) {
+        if (error)
-                error = vfs_getattr(path.mnt, path.dentry, stat);
+                goto out;
-                path_put(&path);
-        }
+        error = vfs_getattr(path.mnt, path.dentry, stat);
+        path_put(&path);
+out:
        return error;
 }
+EXPORT_SYMBOL(vfs_fstatat);
-int vfs_lstat(char __user *name, struct kstat *stat)
+int vfs_stat(char __user *name, struct kstat *stat)
 {
-        return vfs_lstat_fd(AT_FDCWD, name, stat);
+        return vfs_fstatat(AT_FDCWD, name, stat, 0);
 }
+EXPORT_SYMBOL(vfs_stat);
-EXPORT_SYMBOL(vfs_lstat);
+int vfs_lstat(char __user *name, struct kstat *stat)
-int vfs_fstat(unsigned int fd, struct kstat *stat)
 {
-        struct file *f = fget(fd);
+        return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
-        int error = -EBADF;
-        if (f) {
-                error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
-                fput(f);
-        }
-        return error;
 }
+EXPORT_SYMBOL(vfs_lstat);
-EXPORT_SYMBOL(vfs_fstat);
 #ifdef __ARCH_WANT_OLD_STAT
@@ -155,23 +150,25 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
 SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf)
 {
        struct kstat stat;
-        int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
+        int error;
-        if (!error)
+        error = vfs_stat(filename, &stat);
-                error = cp_old_stat(&stat, statbuf);
+        if (error)
+                return error;
-        return error;
+        return cp_old_stat(&stat, statbuf);
 }
 SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf)
 {
        struct kstat stat;
-        int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
+        int error;
-        if (!error)
+        error = vfs_lstat(filename, &stat);
-                error = cp_old_stat(&stat, statbuf);
+        if (error)
+                return error;
-        return error;
+        return cp_old_stat(&stat, statbuf);
 }
 SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf)
@@ -240,23 +237,23 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
 SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf)
 {
        struct kstat stat;
-        int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
+        int error = vfs_stat(filename, &stat);
-        if (!error)
-                error = cp_new_stat(&stat, statbuf);
-        return error;
+        if (error)
+                return error;
+        return cp_new_stat(&stat, statbuf);
 }
 SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf)
 {
        struct kstat stat;
-        int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
+        int error;
-        if (!error)
+        error = vfs_lstat(filename, &stat);
-                error = cp_new_stat(&stat, statbuf);
+        if (error)
+                return error;
-        return error;
+        return cp_new_stat(&stat, statbuf);
 }
 #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT)
@@ -264,21 +261,12 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename,
                struct stat __user *, statbuf, int, flag)
 {
        struct kstat stat;
-        int error = -EINVAL;
+        int error;
-        if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
-                goto out;
-        if (flag & AT_SYMLINK_NOFOLLOW)
-                error = vfs_lstat_fd(dfd, filename, &stat);
-        else
-                error = vfs_stat_fd(dfd, filename, &stat);
-        if (!error)
-                error = cp_new_stat(&stat, statbuf);
-out:
+        error = vfs_fstatat(dfd, filename, &stat, flag);
-        return error;
+        if (error)
+                return error;
+        return cp_new_stat(&stat, statbuf);
 }
 #endif
@@ -404,21 +392,12 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename,
                struct stat64 __user *, statbuf, int, flag)
 {
        struct kstat stat;
-        int error = -EINVAL;
+        int error;
-        if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
-                goto out;
-        if (flag & AT_SYMLINK_NOFOLLOW)
-                error = vfs_lstat_fd(dfd, filename, &stat);
-        else
-                error = vfs_stat_fd(dfd, filename, &stat);
-        if (!error)
-                error = cp_new_stat64(&stat, statbuf);
-out:
+        error = vfs_fstatat(dfd, filename, &stat, flag);
-        return error;
+        if (error)
+                return error;
+        return cp_new_stat64(&stat, statbuf);
 }
 #endif /* __ARCH_WANT_STAT64 */
diff --git a/fs/super.c b/fs/super.c
index 786fe7d7279..1943fdf655f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -208,6 +208,34 @@ void deactivate_super(struct super_block *s)
 EXPORT_SYMBOL(deactivate_super);
 /**
+ *      deactivate_locked_super -       drop an active reference to superblock
+ *      @s: superblock to deactivate
+ *
+ *      Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that
+ *      it does not unlock it until it's all over.  As the result, it's safe to
+ *      use to dispose of new superblock on ->get_sb() failure exits - nobody
+ *      will see the sucker until it's all over.  Equivalent using up_write +
+ *      deactivate_super is safe for that purpose only if superblock is either
+ *      safe to use or has NULL ->s_root when we unlock.
+ */
+void deactivate_locked_super(struct super_block *s)
+{
+        struct file_system_type *fs = s->s_type;
+        if (atomic_dec_and_lock(&s->s_active, &sb_lock)) {
+                s->s_count -= S_BIAS-1;
+                spin_unlock(&sb_lock);
+                vfs_dq_off(s, 0);
+                fs->kill_sb(s);
+                put_filesystem(fs);
+                put_super(s);
+        } else {
+                up_write(&s->s_umount);
+        }
+}
+EXPORT_SYMBOL(deactivate_locked_super);
+/**
 *      grab_super - acquire an active reference
 *      @s: reference we are trying to make active
 *
@@ -797,8 +825,7 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
                sb->s_flags = flags;
                err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
                if (err) {
-                        up_write(&sb->s_umount);
+                        deactivate_locked_super(sb);
-                        deactivate_super(sb);
                        return err;
                }
@@ -854,8 +881,7 @@ int get_sb_bdev(struct file_system_type *fs_type,
        if (s->s_root) {
                if ((flags ^ s->s_flags) & MS_RDONLY) {
-                        up_write(&s->s_umount);
+                        deactivate_locked_super(s);
-                        deactivate_super(s);
                        error = -EBUSY;
                        goto error_bdev;
                }
@@ -870,8 +896,7 @@ int get_sb_bdev(struct file_system_type *fs_type,
                sb_set_blocksize(s, block_size(bdev));
                error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
                if (error) {
-                        up_write(&s->s_umount);
+                        deactivate_locked_super(s);
-                        deactivate_super(s);
                        goto error;
                }
@@ -897,7 +922,7 @@ void kill_block_super(struct super_block *sb)
        struct block_device *bdev = sb->s_bdev;
        fmode_t mode = sb->s_mode;
-        bdev->bd_super = 0;
+        bdev->bd_super = NULL;
        generic_shutdown_super(sb);
        sync_blockdev(bdev);
        close_bdev_exclusive(bdev, mode);
@@ -921,8 +946,7 @@ int get_sb_nodev(struct file_system_type *fs_type,
        error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
        if (error) {
-                up_write(&s->s_umount);
+                deactivate_locked_super(s);
-                deactivate_super(s);
                return error;
        }
        s->s_flags |= MS_ACTIVE;
@@ -952,8 +976,7 @@ int get_sb_single(struct file_system_type *fs_type,
                s->s_flags = flags;
                error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
                if (error) {
-                        up_write(&s->s_umount);
+                        deactivate_locked_super(s);
-                        deactivate_super(s);
                        return error;
                }
                s->s_flags |= MS_ACTIVE;
@@ -1006,8 +1029,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
        return mnt;
 out_sb:
        dput(mnt->mnt_root);
-        up_write(&mnt->mnt_sb->s_umount);
+        deactivate_locked_super(mnt->mnt_sb);
-        deactivate_super(mnt->mnt_sb);
 out_free_secdata:
        free_secdata(secdata);
 out_mnt:
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 93e0c0281d4..9345806c885 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -157,14 +157,9 @@ static ssize_t write(struct file *file, const char __user *userbuf,
                        count = size - offs;
        }
-        temp = kmalloc(count, GFP_KERNEL);
+        temp = memdup_user(userbuf, count);
-        if (!temp)
+        if (IS_ERR(temp))
-                return -ENOMEM;
+                return PTR_ERR(temp);
-        if (copy_from_user(temp, userbuf, count)) {
-                count = -EFAULT;
-                goto out_free;
-        }
        mutex_lock(&bb->mutex);
@@ -176,8 +171,6 @@ static ssize_t write(struct file *file, const char __user *userbuf,
        if (count > 0)
                *off = offs + count;
-out_free:
-        kfree(temp);
        return count;
 }
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 289c43a4726..561a9c050ce 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -446,11 +446,11 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
        if (buffer->event != atomic_read(&od->event))
                goto trigger;
-        return 0;
+        return DEFAULT_POLLMASK;
 trigger:
        buffer->needs_read_fill = 1;
-        return POLLERR|POLLPRI;
+        return DEFAULT_POLLMASK|POLLERR|POLLPRI;
 }
 void sysfs_notify_dirent(struct sysfs_dirent *sd)
@@ -667,6 +667,7 @@ struct sysfs_schedule_callback_struct {
        struct work_struct      work;
 };
+static struct workqueue_struct *sysfs_workqueue;
 static DEFINE_MUTEX(sysfs_workq_mutex);
 static LIST_HEAD(sysfs_workq);
 static void sysfs_schedule_callback_work(struct work_struct *work)
@@ -715,11 +716,20 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
        mutex_lock(&sysfs_workq_mutex);
        list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list)
                if (ss->kobj == kobj) {
+                        module_put(owner);
                        mutex_unlock(&sysfs_workq_mutex);
                        return -EAGAIN;
                }
        mutex_unlock(&sysfs_workq_mutex);
+        if (sysfs_workqueue == NULL) {
+                sysfs_workqueue = create_singlethread_workqueue("sysfsd");
+                if (sysfs_workqueue == NULL) {
+                        module_put(owner);
+                        return -ENOMEM;
+                }
+        }
        ss = kmalloc(sizeof(*ss), GFP_KERNEL);
        if (!ss) {
                module_put(owner);
@@ -735,7 +745,7 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
        mutex_lock(&sysfs_workq_mutex);
        list_add_tail(&ss->workq_list, &sysfs_workq);
        mutex_unlock(&sysfs_workq_mutex);
-        schedule_work(&ss->work);
+        queue_work(sysfs_workqueue, &ss->work);
        return 0;
 }
 EXPORT_SYMBOL_GPL(sysfs_schedule_callback);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index faa44f90608..e9f7a754c4f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2055,8 +2055,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
        return 0;
 out_deact:
-        up_write(&sb->s_umount);
+        deactivate_locked_super(sb);
-        deactivate_super(sb);
 out_close:
        ubi_close_volume(ubi);
        return err;
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index dbbbc466876..6321b797061 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -666,6 +666,6 @@ not_empty:
 const struct file_operations ufs_dir_operations = {
        .read           = generic_read_dir,
        .readdir        = ufs_readdir,
-        .fsync          = file_fsync,
+        .fsync          = ufs_sync_file,
        .llseek         = generic_file_llseek,
 };
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 625ef17c6f8..2bd3a161571 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -30,7 +30,7 @@
 #include "ufs.h"
-static int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync)
+int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 {
        struct inode *inode = dentry->d_inode;
        int err;
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 69b3427d788..d0c4acd4f1f 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -98,8 +98,8 @@ extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
 /* file.c */
 extern const struct inode_operations ufs_file_inode_operations;
 extern const struct file_operations ufs_file_operations;
 extern const struct address_space_operations ufs_aops;
+extern int ufs_sync_file(struct file *, struct dentry *, int);
 /* ialloc.c */
 extern void ufs_free_inode (struct inode *inode);
diff --git a/fs/xattr.c b/fs/xattr.c
index 197c4fcac03..d51b8f9db92 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -237,13 +237,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
        if (size) {
                if (size > XATTR_SIZE_MAX)
                        return -E2BIG;
-                kvalue = kmalloc(size, GFP_KERNEL);
+                kvalue = memdup_user(value, size);
-                if (!kvalue)
+                if (IS_ERR(kvalue))
-                        return -ENOMEM;
+                        return PTR_ERR(kvalue);
-                if (copy_from_user(kvalue, value, size)) {
-                        kfree(kvalue);
-                        return -EFAULT;
-                }
        }
        error = vfs_setxattr(d, kname, kvalue, size, flags);
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index af6843c7ee4..179cbd630f6 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -103,7 +103,7 @@ extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
 static inline int
 kmem_shake_allow(gfp_t gfp_mask)
 {
-        return (gfp_mask & __GFP_WAIT) != 0;
+        return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
 }
 #endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d0b499418a7..34eaab608e6 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -489,17 +489,12 @@ xfs_attrmulti_attr_set(
        if (len > XATTR_SIZE_MAX)
                return EINVAL;
-        kbuf = kmalloc(len, GFP_KERNEL);
+        kbuf = memdup_user(ubuf, len);
-        if (!kbuf)
+        if (IS_ERR(kbuf))
-                return ENOMEM;
+                return PTR_ERR(kbuf);
-        if (copy_from_user(kbuf, ubuf, len))
-                goto out_kfree;
        error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
- out_kfree:
-        kfree(kbuf);
        return error;
 }
@@ -540,20 +535,16 @@ xfs_attrmulti_by_handle(
        if (!size || size > 16 * PAGE_SIZE)
                goto out_dput;
-        error = ENOMEM;
+        ops = memdup_user(am_hreq.ops, size);
-        ops = kmalloc(size, GFP_KERNEL);
+        if (IS_ERR(ops)) {
-        if (!ops)
+                error = PTR_ERR(ops);
                goto out_dput;
+        }
-        error = EFAULT;
-        if (copy_from_user(ops, am_hreq.ops, size))
-                goto out_kfree_ops;
        attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
        if (!attr_name)
                goto out_kfree_ops;
        error = 0;
        for (i = 0; i < am_hreq.opcount; i++) {
                ops[i].am_error = strncpy_from_user(attr_name,
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index c70c4e3db79..0882d166239 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -427,20 +427,16 @@ xfs_compat_attrmulti_by_handle(
        if (!size || size > 16 * PAGE_SIZE)
                goto out_dput;
-        error = ENOMEM;
+        ops = memdup_user(compat_ptr(am_hreq.ops), size);
-        ops = kmalloc(size, GFP_KERNEL);
+        if (IS_ERR(ops)) {
-        if (!ops)
+                error = PTR_ERR(ops);
                goto out_dput;
+        }
-        error = EFAULT;
-        if (copy_from_user(ops, compat_ptr(am_hreq.ops), size))
-                goto out_kfree_ops;
        attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
        if (!attr_name)
                goto out_kfree_ops;
        error = 0;
        for (i = 0; i < am_hreq.opcount; i++) {
                ops[i].am_error = strncpy_from_user(attr_name,
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 3a6ed426327..ca7c6005a48 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5880,7 +5880,7 @@ xfs_getbmap(
        void                    *arg)           /* formatter arg */
 {
        __int64_t               bmvend;         /* last block requested */
-        int                     error;          /* return value */
+        int                     error = 0;      /* return value */
        __int64_t               fixlen;         /* length for -1 case */
        int                     i;              /* extent number */
        int                     lock;           /* lock state */
@@ -5890,39 +5890,18 @@ xfs_getbmap(
        int                     nexleft;        /* # of user extents left */
        int                     subnex;         /* # of bmapi's can do */
        int                     nmap;           /* number of map entries */
-        struct getbmapx         out;            /* output structure */
+        struct getbmapx         *out;           /* output structure */
        int                     whichfork;      /* data or attr fork */
        int                     prealloced;     /* this is a file with
                                                 * preallocated data space */
        int                     iflags;         /* interface flags */
        int                     bmapi_flags;    /* flags for xfs_bmapi */
+        int                     cur_ext = 0;
        mp = ip->i_mount;
        iflags = bmv->bmv_iflags;
        whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
-        /*      If the BMV_IF_NO_DMAPI_READ interface bit specified, do not
-         *      generate a DMAPI read event.  Otherwise, if the DM_EVENT_READ
-         *      bit is set for the file, generate a read event in order
-         *      that the DMAPI application may do its thing before we return
-         *      the extents.  Usually this means restoring user file data to
-         *      regions of the file that look like holes.
-         *
-         *      The "old behavior" (from XFS_IOC_GETBMAP) is to not specify
-         *      BMV_IF_NO_DMAPI_READ so that read events are generated.
-         *      If this were not true, callers of ioctl( XFS_IOC_GETBMAP )
-         *      could misinterpret holes in a DMAPI file as true holes,
-         *      when in fact they may represent offline user data.
-         */
-        if ((iflags & BMV_IF_NO_DMAPI_READ) == 0 &&
-            DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
-            whichfork == XFS_DATA_FORK) {
-                error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
-                if (error)
-                        return XFS_ERROR(error);
-        }
        if (whichfork == XFS_ATTR_FORK) {
                if (XFS_IFORK_Q(ip)) {
                        if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
@@ -5936,11 +5915,37 @@ xfs_getbmap(
                                         ip->i_mount);
                        return XFS_ERROR(EFSCORRUPTED);
                }
-        } else if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
-                   ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
+                prealloced = 0;
-                   ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+                fixlen = 1LL << 32;
-                return XFS_ERROR(EINVAL);
+        } else {
-        if (whichfork == XFS_DATA_FORK) {
+                /*
+                 * If the BMV_IF_NO_DMAPI_READ interface bit specified, do
+                 * not generate a DMAPI read event.  Otherwise, if the
+                 * DM_EVENT_READ bit is set for the file, generate a read
+                 * event in order that the DMAPI application may do its thing
+                 * before we return the extents.  Usually this means restoring
+                 * user file data to regions of the file that look like holes.
+                 *
+                 * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify
+                 * BMV_IF_NO_DMAPI_READ so that read events are generated.
+                 * If this were not true, callers of ioctl(XFS_IOC_GETBMAP)
+                 * could misinterpret holes in a DMAPI file as true holes,
+                 * when in fact they may represent offline user data.
+                 */
+                if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
+                    !(iflags & BMV_IF_NO_DMAPI_READ)) {
+                        error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip,
+                                              0, 0, 0, NULL);
+                        if (error)
+                                return XFS_ERROR(error);
+                }
+                if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
+                    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
+                    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+                        return XFS_ERROR(EINVAL);
                if (xfs_get_extsz_hint(ip) ||
                    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
                        prealloced = 1;
@@ -5949,42 +5954,41 @@ xfs_getbmap(
                        prealloced = 0;
                        fixlen = ip->i_size;
                }
-        } else {
-                prealloced = 0;
-                fixlen = 1LL << 32;
        }
        if (bmv->bmv_length == -1) {
                fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
-                bmv->bmv_length = MAX( (__int64_t)(fixlen - bmv->bmv_offset),
+                bmv->bmv_length =
-                                        (__int64_t)0);
+                        max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
-        } else if (bmv->bmv_length < 0)
+        } else if (bmv->bmv_length == 0) {
-                return XFS_ERROR(EINVAL);
-        if (bmv->bmv_length == 0) {
                bmv->bmv_entries = 0;
                return 0;
+        } else if (bmv->bmv_length < 0) {
+                return XFS_ERROR(EINVAL);
        }
        nex = bmv->bmv_count - 1;
        if (nex <= 0)
                return XFS_ERROR(EINVAL);
        bmvend = bmv->bmv_offset + bmv->bmv_length;
-        xfs_ilock(ip, XFS_IOLOCK_SHARED);
-        if (((iflags & BMV_IF_DELALLOC) == 0) &&
+        if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
-            (whichfork == XFS_DATA_FORK) &&
+                return XFS_ERROR(ENOMEM);
-            (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) {
+        out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
-                /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
+        if (!out)
-                error = xfs_flush_pages(ip, (xfs_off_t)0,
+                return XFS_ERROR(ENOMEM);
-                                               -1, 0, FI_REMAPF);
-                if (error) {
+        xfs_ilock(ip, XFS_IOLOCK_SHARED);
-                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+        if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
-                return error;
+                if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) {
+                        error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF);
+                        if (error)
+                                goto out_unlock_iolock;
                }
-        }
-        ASSERT(whichfork == XFS_ATTR_FORK || (iflags & BMV_IF_DELALLOC) ||
+                ASSERT(ip->i_delayed_blks == 0);
-               ip->i_delayed_blks == 0);
+        }
        lock = xfs_ilock_map_shared(ip);
@@ -5995,23 +5999,25 @@ xfs_getbmap(
        if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
                nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
-        bmapi_flags = xfs_bmapi_aflag(whichfork) |
+        bmapi_flags = xfs_bmapi_aflag(whichfork);
-                        ((iflags & BMV_IF_PREALLOC) ? 0 : XFS_BMAPI_IGSTATE);
+        if (!(iflags & BMV_IF_PREALLOC))
+                bmapi_flags |= XFS_BMAPI_IGSTATE;
        /*
         * Allocate enough space to handle "subnex" maps at a time.
         */
+        error = ENOMEM;
        subnex = 16;
-        map = kmem_alloc(subnex * sizeof(*map), KM_SLEEP);
+        map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL);
+        if (!map)
+                goto out_unlock_ilock;
        bmv->bmv_entries = 0;
-        if ((XFS_IFORK_NEXTENTS(ip, whichfork) == 0)) {
+        if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
-                if (((iflags & BMV_IF_DELALLOC) == 0) ||
+            (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
-                    whichfork == XFS_ATTR_FORK) {
+                error = 0;
-                        error = 0;
+                goto out_free_map;
-                        goto unlock_and_return;
-                }
        }
        nexleft = nex;
@@ -6023,53 +6029,61 @@ xfs_getbmap(
                                  bmapi_flags, NULL, 0, map, &nmap,
                                  NULL, NULL);
                if (error)
-                        goto unlock_and_return;
+                        goto out_free_map;
                ASSERT(nmap <= subnex);
                for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
-                        out.bmv_oflags = 0;
+                        out[cur_ext].bmv_oflags = 0;
                        if (map[i].br_state == XFS_EXT_UNWRITTEN)
-                                out.bmv_oflags |= BMV_OF_PREALLOC;
+                                out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
                        else if (map[i].br_startblock == DELAYSTARTBLOCK)
-                                out.bmv_oflags |= BMV_OF_DELALLOC;
+                                out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
-                        out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff);
+                        out[cur_ext].bmv_offset =
-                        out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
+                                XFS_FSB_TO_BB(mp, map[i].br_startoff);
-                        out.bmv_unused1 = out.bmv_unused2 = 0;
+                        out[cur_ext].bmv_length =
+                                XFS_FSB_TO_BB(mp, map[i].br_blockcount);
+                        out[cur_ext].bmv_unused1 = 0;
+                        out[cur_ext].bmv_unused2 = 0;
                        ASSERT(((iflags & BMV_IF_DELALLOC) != 0) ||
                              (map[i].br_startblock != DELAYSTARTBLOCK));
                        if (map[i].br_startblock == HOLESTARTBLOCK &&
                            whichfork == XFS_ATTR_FORK) {
                                /* came to the end of attribute fork */
-                                out.bmv_oflags |= BMV_OF_LAST;
+                                out[cur_ext].bmv_oflags |= BMV_OF_LAST;
-                                goto unlock_and_return;
+                                goto out_free_map;
-                        } else {
-                                int full = 0;   /* user array is full */
-                                if (!xfs_getbmapx_fix_eof_hole(ip, &out,
-                                                        prealloced, bmvend,
-                                                        map[i].br_startblock)) {
-                                        goto unlock_and_return;
-                                }
-                                /* format results & advance arg */
-                                error = formatter(&arg, &out, &full);
-                                if (error || full)
-                                        goto unlock_and_return;
-                                nexleft--;
-                                bmv->bmv_offset =
-                                        out.bmv_offset + out.bmv_length;
-                                bmv->bmv_length = MAX((__int64_t)0,
-                                        (__int64_t)(bmvend - bmv->bmv_offset));
-                                bmv->bmv_entries++;
                        }
+                        if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext],
+                                        prealloced, bmvend,
+                                        map[i].br_startblock))
+                                goto out_free_map;
+                        nexleft--;
+                        bmv->bmv_offset =
+                                out[cur_ext].bmv_offset +
+                                out[cur_ext].bmv_length;
+                        bmv->bmv_length =
+                                max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
+                        bmv->bmv_entries++;
+                        cur_ext++;
                }
        } while (nmap && nexleft && bmv->bmv_length);
-unlock_and_return:
+ out_free_map:
+        kmem_free(map);
+ out_unlock_ilock:
        xfs_iunlock_map_shared(ip, lock);
+ out_unlock_iolock:
        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-        kmem_free(map);
+        for (i = 0; i < cur_ext; i++) {
+                int full = 0;   /* user array is full */
+                /* format results & advance arg */
+                error = formatter(&arg, &out[i], &full);
+                if (error || full)
+                        break;
+        }
        return error;
 }
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e6d839bddbf..7465f9ee125 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -347,13 +347,15 @@ xfs_swap_extents(
        error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
-out_unlock:
-        xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-        xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 out:
        kmem_free(tempifp);
        return error;
+out_unlock:
+        xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+        xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+        goto out;
 out_trans_cancel:
        xfs_trans_cancel(tp, 0);
        goto out_unlock;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8379e3bca26..cbd451bb484 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -160,7 +160,7 @@ xfs_growfs_data_private(
        nagcount = new + (nb_mod != 0);
        if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) {
                nagcount--;
-                nb = nagcount * mp->m_sb.sb_agblocks;
+                nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
                if (nb < mp->m_sb.sb_dblocks)
                        return XFS_ERROR(EINVAL);
        }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e7ae08d1df4..123b20c8cbf 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1258,8 +1258,10 @@ xfs_file_last_byte(
         * necessary.
         */
        if (ip->i_df.if_flags & XFS_IFEXTENTS) {
+                xfs_ilock(ip, XFS_ILOCK_SHARED);
                error = xfs_bmap_last_offset(NULL, ip, &last_block,
                        XFS_DATA_FORK);
+                xfs_iunlock(ip, XFS_ILOCK_SHARED);
                if (error) {
                        last_block = 0;
                }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b101990df02..65a99725d0c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -291,14 +291,17 @@ xfs_mount_validate_sb(
            sbp->sb_sectsize > XFS_MAX_SECTORSIZE                       ||
            sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG                    ||
            sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG                    ||
+            sbp->sb_sectsize != (1 << sbp->sb_sectlog)                  ||
            sbp->sb_blocksize < XFS_MIN_BLOCKSIZE                       ||
            sbp->sb_blocksize > XFS_MAX_BLOCKSIZE                       ||
            sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG                    ||
            sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG                    ||
+            sbp->sb_blocksize != (1 << sbp->sb_blocklog)                ||
            sbp->sb_inodesize < XFS_DINODE_MIN_SIZE                     ||
            sbp->sb_inodesize > XFS_DINODE_MAX_SIZE                     ||
            sbp->sb_inodelog < XFS_DINODE_MIN_LOG                       ||
            sbp->sb_inodelog > XFS_DINODE_MAX_LOG                       ||
+            sbp->sb_inodesize != (1 << sbp->sb_inodelog)                ||
            (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)   ||
            (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)  ||
            (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)  ||