27 files changed, 276 insertions, 163 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 14a86448572c..69357c0d9899 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -188,7 +188,8 @@ static void v9fs_kill_super(struct super_block *s)
        P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
-        v9fs_dentry_release(s->s_root); /* clunk root */
+        if (s->s_root)
+                v9fs_dentry_release(s->s_root); /* clunk root */
        kill_anon_super(s);
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index e511dc621a2e..0e40caaba456 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -106,8 +106,8 @@ struct affs_sb_info {
        u32 s_last_bmap;
        struct buffer_head *s_bmap_bh;
        char *s_prefix;                 /* Prefix for volumes and assigns. */
-        int s_prefix_len;               /* Length of prefix. */
        char s_volume[32];              /* Volume prefix for absolute symlinks. */
+        spinlock_t symlink_lock;        /* protects the previous two */
 };
 #define SF_INTL         0x0001          /* International filesystem. */
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 960d336ec694..d70bbbac6b7b 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -341,10 +341,13 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
        p  = (char *)AFFS_HEAD(bh)->table;
        lc = '/';
        if (*symname == '/') {
+                struct affs_sb_info *sbi = AFFS_SB(sb);
                while (*symname == '/')
                        symname++;
-                while (AFFS_SB(sb)->s_volume[i])        /* Cannot overflow */
+                spin_lock(&sbi->symlink_lock);
-                        *p++ = AFFS_SB(sb)->s_volume[i++];
+                while (sbi->s_volume[i])        /* Cannot overflow */
+                        *p++ = sbi->s_volume[i++];
+                spin_unlock(&sbi->symlink_lock);
        }
        while (i < maxlen && (c = *symname++)) {
                if (c == '.' && lc == '/' && *symname == '.' && symname[1] == '/') {
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 104fdcb3a7fc..d41e9673cd97 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -203,7 +203,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
                switch (token) {
                case Opt_bs:
                        if (match_int(&args[0], &n))
-                                return -EINVAL;
+                                return 0;
                        if (n != 512 && n != 1024 && n != 2048
                            && n != 4096) {
                                printk ("AFFS: Invalid blocksize (512, 1024, 2048, 4096 allowed)\n");
@@ -213,7 +213,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
                        break;
                case Opt_mode:
                        if (match_octal(&args[0], &option))
-                                return 1;
+                                return 0;
                        *mode = option & 0777;
                        *mount_opts |= SF_SETMODE;
                        break;
@@ -221,8 +221,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
                        *mount_opts |= SF_MUFS;
                        break;
                case Opt_prefix:
-                        /* Free any previous prefix */
-                        kfree(*prefix);
                        *prefix = match_strdup(&args[0]);
                        if (!*prefix)
                                return 0;
@@ -233,21 +231,21 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
                        break;
                case Opt_reserved:
                        if (match_int(&args[0], reserved))
-                                return 1;
+                                return 0;
                        break;
                case Opt_root:
                        if (match_int(&args[0], root))
-                                return 1;
+                                return 0;
                        break;
                case Opt_setgid:
                        if (match_int(&args[0], &option))
-                                return 1;
+                                return 0;
                        *gid = option;
                        *mount_opts |= SF_SETGID;
                        break;
                case Opt_setuid:
                        if (match_int(&args[0], &option))
-                                return -EINVAL;
+                                return 0;
                        *uid = option;
                        *mount_opts |= SF_SETUID;
                        break;
@@ -311,11 +309,14 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
                return -ENOMEM;
        sb->s_fs_info = sbi;
        mutex_init(&sbi->s_bmlock);
+        spin_lock_init(&sbi->symlink_lock);
        if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
                                &blocksize,&sbi->s_prefix,
                                sbi->s_volume, &mount_flags)) {
                printk(KERN_ERR "AFFS: Error parsing options\n");
+                kfree(sbi->s_prefix);
+                kfree(sbi);
                return -EINVAL;
        }
        /* N.B. after this point s_prefix must be released */
@@ -516,14 +517,18 @@ affs_remount(struct super_block *sb, int *flags, char *data)
        unsigned long            mount_flags;
        int                      res = 0;
        char                    *new_opts = kstrdup(data, GFP_KERNEL);
+        char                     volume[32];
+        char                    *prefix = NULL;
        pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data);
        *flags |= MS_NODIRATIME;
+        memcpy(volume, sbi->s_volume, 32);
        if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block,
-                           &blocksize, &sbi->s_prefix, sbi->s_volume,
+                           &blocksize, &prefix, volume,
                           &mount_flags)) {
+                kfree(prefix);
                kfree(new_opts);
                return -EINVAL;
        }
@@ -534,6 +539,14 @@ affs_remount(struct super_block *sb, int *flags, char *data)
        sbi->s_mode  = mode;
        sbi->s_uid   = uid;
        sbi->s_gid   = gid;
+        /* protect against readers */
+        spin_lock(&sbi->symlink_lock);
+        if (prefix) {
+                kfree(sbi->s_prefix);
+                sbi->s_prefix = prefix;
+        }
+        memcpy(sbi->s_volume, volume, 32);
+        spin_unlock(&sbi->symlink_lock);
        if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
                unlock_kernel();
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index 41782539c907..ee00f08c4f53 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -20,7 +20,6 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
        int                      i, j;
        char                     c;
        char                     lc;
-        char                    *pf;
        pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino);
@@ -32,11 +31,15 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
        j  = 0;
        lf = (struct slink_front *)bh->b_data;
        lc = 0;
-        pf = AFFS_SB(inode->i_sb)->s_prefix ? AFFS_SB(inode->i_sb)->s_prefix : "/";
        if (strchr(lf->symname,':')) {  /* Handle assign or volume name */
+                struct affs_sb_info *sbi = AFFS_SB(inode->i_sb);
+                char *pf;
+                spin_lock(&sbi->symlink_lock);
+                pf = sbi->s_prefix ? sbi->s_prefix : "/";
                while (i < 1023 && (c = pf[i]))
                        link[i++] = c;
+                spin_unlock(&sbi->symlink_lock);
                while (i < 1023 && lf->symname[j] != ':')
                        link[i++] = lf->symname[j++];
                if (i < 1023)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 6f60336c6628..8f3d9fd89604 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -353,35 +353,35 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
        struct inode *inode;
        unsigned i, imap_len;
        struct bfs_sb_info *info;
-        long ret = -EINVAL;
+        int ret = -EINVAL;
        unsigned long i_sblock, i_eblock, i_eoff, s_size;
        info = kzalloc(sizeof(*info), GFP_KERNEL);
        if (!info)
                return -ENOMEM;
+        mutex_init(&info->bfs_lock);
        s->s_fs_info = info;
        sb_set_blocksize(s, BFS_BSIZE);
-        bh = sb_bread(s, 0);
+        info->si_sbh = sb_bread(s, 0);
-        if(!bh)
+        if (!info->si_sbh)
                goto out;
-        bfs_sb = (struct bfs_super_block *)bh->b_data;
+        bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data;
        if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
                if (!silent)
                        printf("No BFS filesystem on %s (magic=%08x)\n", 
                                s->s_id,  le32_to_cpu(bfs_sb->s_magic));
-                goto out;
+                goto out1;
        }
        if (BFS_UNCLEAN(bfs_sb, s) && !silent)
                printf("%s is unclean, continuing\n", s->s_id);
        s->s_magic = BFS_MAGIC;
-        info->si_sbh = bh;
        if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) {
                printf("Superblock is corrupted\n");
-                goto out;
+                goto out1;
        }
        info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) /
@@ -390,7 +390,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
        imap_len = (info->si_lasti / 8) + 1;
        info->si_imap = kzalloc(imap_len, GFP_KERNEL);
        if (!info->si_imap)
-                goto out;
+                goto out1;
        for (i = 0; i < BFS_ROOT_INO; i++)
                set_bit(i, info->si_imap);
@@ -398,15 +398,13 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
        inode = bfs_iget(s, BFS_ROOT_INO);
        if (IS_ERR(inode)) {
                ret = PTR_ERR(inode);
-                kfree(info->si_imap);
+                goto out2;
-                goto out;
        }
        s->s_root = d_alloc_root(inode);
        if (!s->s_root) {
                iput(inode);
                ret = -ENOMEM;
-                kfree(info->si_imap);
+                goto out2;
-                goto out;
        }
        info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS;
@@ -419,10 +417,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
        bh = sb_bread(s, info->si_blocks - 1);
        if (!bh) {
                printf("Last block not available: %lu\n", info->si_blocks - 1);
-                iput(inode);
                ret = -EIO;
-                kfree(info->si_imap);
+                goto out3;
-                goto out;
        }
        brelse(bh);
@@ -459,11 +455,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
                        printf("Inode 0x%08x corrupted\n", i);
                        brelse(bh);
-                        s->s_root = NULL;
+                        ret = -EIO;
-                        kfree(info->si_imap);
+                        goto out3;
-                        kfree(info);
-                        s->s_fs_info = NULL;
-                        return -EIO;
                }
                if (!di->i_ino) {
@@ -483,11 +476,17 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
                s->s_dirt = 1;
        } 
        dump_imap("read_super", s);
-        mutex_init(&info->bfs_lock);
        return 0;
+out3:
+        dput(s->s_root);
+        s->s_root = NULL;
+out2:
+        kfree(info->si_imap);
+out1:
+        brelse(info->si_sbh);
 out:
-        brelse(bh);
+        mutex_destroy(&info->bfs_lock);
        kfree(info);
        s->s_fs_info = NULL;
        return ret;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 346b69405363..fdd397099172 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -264,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 #else
        set_personality(PER_LINUX);
 #endif
+        setup_new_exec(bprm);
        current->mm->end_code = ex.a_text +
                (current->mm->start_code = N_TXTADDR(ex));
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index edd90c49003c..fd5b2ea5d299 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
                                goto out_free_interp;
-                        /*
-                         * The early SET_PERSONALITY here is so that the lookup
-                         * for the interpreter happens in the namespace of the 
-                         * to-be-execed image.  SET_PERSONALITY can select an
-                         * alternate root.
-                         *
-                         * However, SET_PERSONALITY is NOT allowed to switch
-                         * this task into the new images's memory mapping
-                         * policy - that is, TASK_SIZE must still evaluate to
-                         * that which is appropriate to the execing application.
-                         * This is because exit_mmap() needs to have TASK_SIZE
-                         * evaluate to the size of the old image.
-                         *
-                         * So if (say) a 64-bit application is execing a 32-bit
-                         * application it is the architecture's responsibility
-                         * to defer changing the value of TASK_SIZE until the
-                         * switch really is going to happen - do this in
-                         * flush_thread().      - akpm
-                         */
-                        SET_PERSONALITY(loc->elf_ex);
                        interpreter = open_exec(elf_interpreter);
                        retval = PTR_ERR(interpreter);
                        if (IS_ERR(interpreter))
@@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
                /* Verify the interpreter has a valid arch */
                if (!elf_check_arch(&loc->interp_elf_ex))
                        goto out_free_dentry;
-        } else {
-                /* Executables without an interpreter also need a personality  */
-                SET_PERSONALITY(loc->elf_ex);
        }
        /* Flush all traces of the currently running executable */
@@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
                current->flags |= PF_RANDOMIZE;
-        arch_pick_mmap_layout(current->mm);
+        setup_new_exec(bprm);
        /* Do this so that we can load the interpreter, if need be.  We will
           change some of these later */
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c57d9ce5ff7e..18d77297ccc8 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -321,6 +321,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
        set_personality(PER_LINUX_FDPIC);
        if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
                current->personality |= READ_IMPLIES_EXEC;
+        setup_new_exec(bprm);
        set_binfmt(&elf_fdpic_format);
        current->mm->start_code = 0;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index d4a00ea1054c..42c6b4a54445 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                /* OK, This is the point of no return */
                set_personality(PER_LINUX_32BIT);
+                setup_new_exec(bprm);
        }
        /*
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 2a9b5330cc5e..cc8560f6c9b0 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
        /* OK, This is the point of no return */
        current->flags &= ~PF_FORKNOEXEC;
        current->personality = PER_HPUX;
+        setup_new_exec(bprm);
        /* Set the task size for HP-UX processes such that
         * the gateway page is outside the address space.
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 54f4798ab46a..6df6d6ed74fd 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -244,6 +244,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
                                                    ACL_TYPE_ACCESS);
                        }
                }
+                posix_acl_release(clone);
        }
 failed:
        posix_acl_release(acl);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9f806dd04c27..2aa8ec6a0981 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1161,6 +1161,7 @@ struct btrfs_root {
 #define BTRFS_MOUNT_SSD_SPREAD          (1 << 8)
 #define BTRFS_MOUNT_NOSSD               (1 << 9)
 #define BTRFS_MOUNT_DISCARD             (1 << 10)
+#define BTRFS_MOUNT_FORCE_COMPRESS      (1 << 11)
 #define btrfs_clear_opt(o, opt)         ((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)           ((o) |= BTRFS_MOUNT_##opt)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 009e3bd18f23..87b25543d7d1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1993,6 +1993,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        if (!fs_info->fs_root)
                goto fail_trans_kthread;
+        if (!(sb->s_flags & MS_RDONLY)) {
+                down_read(&fs_info->cleanup_work_sem);
+                btrfs_orphan_cleanup(fs_info->fs_root);
+                up_read(&fs_info->cleanup_work_sem);
+        }
        return tree_root;
 fail_trans_kthread:
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 46bea0f4dc7b..428fcac45f90 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -155,20 +155,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
        return NULL;
 }
-/*
- * look for an offset in the tree, and if it can't be found, return
- * the first offset we can find smaller than 'offset'.
- */
-static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
-{
-        struct rb_node *prev;
-        struct rb_node *ret;
-        ret = __tree_search(root, offset, &prev, NULL);
-        if (!ret)
-                return prev;
-        return ret;
-}
 /* check to see if two extent_map structs are adjacent and safe to merge */
 static int mergable_maps(struct extent_map *prev, struct extent_map *next)
 {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b330e27c2d8b..8cd109972fa6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -483,7 +483,8 @@ again:
                nr_pages_ret = 0;
                /* flag the file so we don't compress in the future */
-                BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
+                if (!btrfs_test_opt(root, FORCE_COMPRESS))
+                        BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
        }
        if (will_compress) {
                *num_added += 1;
@@ -3796,12 +3797,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
        if (location.type == BTRFS_INODE_ITEM_KEY) {
                inode = btrfs_iget(dir->i_sb, &location, root);
-                if (unlikely(root->clean_orphans) &&
-                    !(inode->i_sb->s_flags & MS_RDONLY)) {
-                        down_read(&root->fs_info->cleanup_work_sem);
-                        btrfs_orphan_cleanup(root);
-                        up_read(&root->fs_info->cleanup_work_sem);
-                }
                return inode;
        }
@@ -5799,7 +5794,7 @@ out_fail:
 }
 static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
-                               u64 alloc_hint, int mode)
+                        u64 alloc_hint, int mode, loff_t actual_len)
 {
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5808,6 +5803,7 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
        u64 cur_offset = start;
        u64 num_bytes = end - start;
        int ret = 0;
+        u64 i_size;
        while (num_bytes > 0) {
                alloc_size = min(num_bytes, root->fs_info->max_extent);
@@ -5846,8 +5842,12 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
                BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
                if (!(mode & FALLOC_FL_KEEP_SIZE) &&
                    cur_offset > inode->i_size) {
-                        i_size_write(inode, cur_offset);
+                        if (cur_offset > actual_len)
-                        btrfs_ordered_update_i_size(inode, cur_offset, NULL);
+                                i_size  = actual_len;
+                        else
+                                i_size = cur_offset;
+                        i_size_write(inode, i_size);
+                        btrfs_ordered_update_i_size(inode, i_size, NULL);
                }
                ret = btrfs_update_inode(trans, root, inode);
@@ -5940,7 +5940,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
                     !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
                        ret = prealloc_file_range(inode,
                                                  cur_offset, last_byte,
-                                                  alloc_hint, mode);
+                                                alloc_hint, mode, offset+len);
                        if (ret < 0) {
                                free_extent_map(em);
                                break;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3f9b45704fcd..8a1ea6e64575 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,7 +66,8 @@ enum {
        Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
        Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
        Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
-        Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
+        Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio,
+        Opt_flushoncommit,
        Opt_discard, Opt_err,
 };
@@ -82,6 +83,7 @@ static match_table_t tokens = {
        {Opt_alloc_start, "alloc_start=%s"},
        {Opt_thread_pool, "thread_pool=%d"},
        {Opt_compress, "compress"},
+        {Opt_compress_force, "compress-force"},
        {Opt_ssd, "ssd"},
        {Opt_ssd_spread, "ssd_spread"},
        {Opt_nossd, "nossd"},
@@ -173,6 +175,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        printk(KERN_INFO "btrfs: use compression\n");
                        btrfs_set_opt(info->mount_opt, COMPRESS);
                        break;
+                case Opt_compress_force:
+                        printk(KERN_INFO "btrfs: forcing compression\n");
+                        btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
+                        btrfs_set_opt(info->mount_opt, COMPRESS);
+                        break;
                case Opt_ssd:
                        printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
                        btrfs_set_opt(info->mount_opt, SSD);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 220dad5db017..41ecbb2347f2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1135,7 +1135,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
                root->fs_info->avail_metadata_alloc_bits;
        if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
-            root->fs_info->fs_devices->rw_devices <= 4) {
+            root->fs_info->fs_devices->num_devices <= 4) {
                printk(KERN_ERR "btrfs: unable to go below four devices "
                       "on raid10\n");
                ret = -EINVAL;
@@ -1143,7 +1143,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        }
        if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
-            root->fs_info->fs_devices->rw_devices <= 2) {
+            root->fs_info->fs_devices->num_devices <= 2) {
                printk(KERN_ERR "btrfs: unable to go below two "
                       "devices on raid1\n");
                ret = -EINVAL;
@@ -1434,8 +1434,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
                return -EINVAL;
        bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
-        if (!bdev)
+        if (IS_ERR(bdev))
-                return -EIO;
+                return PTR_ERR(bdev);
        if (root->fs_info->fs_devices->seeding) {
                seeding_dev = 1;
@@ -2538,6 +2538,11 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
        if (!em)
                return 1;
+        if (btrfs_test_opt(root, DEGRADED)) {
+                free_extent_map(em);
+                return 0;
+        }
        map = (struct map_lookup *)em->bdev;
        for (i = 0; i < map->num_stripes; i++) {
                if (!map->stripes[i].dev->writeable) {
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b486169f42bf..274ac865bae8 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -160,15 +160,8 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
         * block. A pointer to that is in the struct vfsmount that we
         * have around.
         */
-        if (!parent) {
+        if (!parent)
-                if (debugfs_mount && debugfs_mount->mnt_sb) {
+                parent = debugfs_mount->mnt_sb->s_root;
-                        parent = debugfs_mount->mnt_sb->s_root;
-                }
-        }
-        if (!parent) {
-                pr_debug("debugfs: Ah! can not find a parent!\n");
-                return -EFAULT;
-        }
        *dentry = NULL;
        mutex_lock(&parent->d_inode->i_mutex);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index d26402ff06ea..7758cc382ef0 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
        return events;
 }
-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
+static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
-                            loff_t *ppos)
+{
+        *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+        ctx->count -= *cnt;
+}
+/**
+ * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
+ * @ctx: [in] Pointer to eventfd context.
+ * @wait: [in] Wait queue to be removed.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN      : The operation would have blocked.
+ *
+ * This is used to atomically remove a wait queue entry from the eventfd wait
+ * queue head, and read/reset the counter value.
+ */
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+                                  __u64 *cnt)
+{
+        unsigned long flags;
+        spin_lock_irqsave(&ctx->wqh.lock, flags);
+        eventfd_ctx_do_read(ctx, cnt);
+        __remove_wait_queue(&ctx->wqh, wait);
+        if (*cnt != 0 && waitqueue_active(&ctx->wqh))
+                wake_up_locked_poll(&ctx->wqh, POLLOUT);
+        spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+        return *cnt != 0 ? 0 : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
+/**
+ * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
+ * @ctx: [in] Pointer to eventfd context.
+ * @no_wait: [in] Different from zero if the operation should not block.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN      : The operation would have blocked but @no_wait was nonzero.
+ * -ERESTARTSYS : A signal interrupted the wait operation.
+ *
+ * If @no_wait is zero, the function might sleep until the eventfd internal
+ * counter becomes greater than zero.
+ */
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
 {
-        struct eventfd_ctx *ctx = file->private_data;
        ssize_t res;
-        __u64 ucnt = 0;
        DECLARE_WAITQUEUE(wait, current);
-        if (count < sizeof(ucnt))
-                return -EINVAL;
        spin_lock_irq(&ctx->wqh.lock);
+        *cnt = 0;
        res = -EAGAIN;
        if (ctx->count > 0)
-                res = sizeof(ucnt);
+                res = 0;
-        else if (!(file->f_flags & O_NONBLOCK)) {
+        else if (!no_wait) {
                __add_wait_queue(&ctx->wqh, &wait);
-                for (res = 0;;) {
+                for (;;) {
                        set_current_state(TASK_INTERRUPTIBLE);
                        if (ctx->count > 0) {
-                                res = sizeof(ucnt);
+                                res = 0;
                                break;
                        }
                        if (signal_pending(current)) {
@@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
                __remove_wait_queue(&ctx->wqh, &wait);
                __set_current_state(TASK_RUNNING);
        }
-        if (likely(res > 0)) {
+        if (likely(res == 0)) {
-                ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+                eventfd_ctx_do_read(ctx, cnt);
-                ctx->count -= ucnt;
                if (waitqueue_active(&ctx->wqh))
                        wake_up_locked_poll(&ctx->wqh, POLLOUT);
        }
        spin_unlock_irq(&ctx->wqh.lock);
-        if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
-                return -EFAULT;
        return res;
 }
+EXPORT_SYMBOL_GPL(eventfd_ctx_read);
+static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
+                            loff_t *ppos)
+{
+        struct eventfd_ctx *ctx = file->private_data;
+        ssize_t res;
+        __u64 cnt;
+        if (count < sizeof(cnt))
+                return -EINVAL;
+        res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
+        if (res < 0)
+                return res;
+        return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
+}
 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
                             loff_t *ppos)
diff --git a/fs/exec.c b/fs/exec.c
index 632b02e34ec7..675c3f44c2ea 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -941,9 +941,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
 int flush_old_exec(struct linux_binprm * bprm)
 {
-        char * name;
+        int retval;
-        int i, ch, retval;
-        char tcomm[sizeof(current->comm)];
        /*
         * Make sure we have a private signal table and that
@@ -963,6 +961,20 @@ int flush_old_exec(struct linux_binprm * bprm)
                goto out;
        bprm->mm = NULL;                /* We're using it now */
+        return 0;
+out:
+        return retval;
+}
+EXPORT_SYMBOL(flush_old_exec);
+void setup_new_exec(struct linux_binprm * bprm)
+{
+        int i, ch;
+        char * name;
+        char tcomm[sizeof(current->comm)];
+        arch_pick_mmap_layout(current->mm);
        /* This is the point of no return */
        current->sas_ss_sp = current->sas_ss_size = 0;
@@ -1019,14 +1031,8 @@ int flush_old_exec(struct linux_binprm * bprm)
                        
        flush_signal_handlers(current, 0);
        flush_old_files(current->files);
-        return 0;
-out:
-        return retval;
 }
+EXPORT_SYMBOL(setup_new_exec);
-EXPORT_SYMBOL(flush_old_exec);
 /*
 * Prepare credentials and lock ->cred_guard_mutex.
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index af7b62699ea9..874d169a193e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -361,14 +361,11 @@ struct ext4_new_group_data {
           so set the magic i_delalloc_reserve_flag after taking the 
           inode allocation semaphore for */
 #define EXT4_GET_BLOCKS_DELALLOC_RESERVE        0x0004
-        /* Call ext4_da_update_reserve_space() after successfully 
-           allocating the blocks */
-#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE    0x0008
        /* caller is from the direct IO path, request to creation of an
        unitialized extents if not allocated, split the uninitialized
        extent if blocks has been preallocated already*/
-#define EXT4_GET_BLOCKS_DIO                     0x0010
+#define EXT4_GET_BLOCKS_DIO                     0x0008
-#define EXT4_GET_BLOCKS_CONVERT                 0x0020
+#define EXT4_GET_BLOCKS_CONVERT                 0x0010
 #define EXT4_GET_BLOCKS_DIO_CREATE_EXT          (EXT4_GET_BLOCKS_DIO|\
                                         EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
        /* Convert extent to initialized after direct IO complete */
@@ -1443,6 +1440,8 @@ extern int ext4_block_truncate_page(handle_t *handle,
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern int flush_aio_dio_completed_IO(struct inode *inode);
+extern void ext4_da_update_reserve_space(struct inode *inode,
+                                        int used, int quota_claim);
 /* ioctl.c */
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
 extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7d7b74e94687..765a4826b118 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3132,7 +3132,19 @@ out:
                unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
                                        newblock + max_blocks,
                                        allocated - max_blocks);
+                allocated = max_blocks;
        }
+        /*
+         * If we have done fallocate with the offset that is already
+         * delayed allocated, we would have block reservation
+         * and quota reservation done in the delayed write path.
+         * But fallocate would have already updated quota and block
+         * count for this offset. So cancel these reservation
+         */
+        if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+                ext4_da_update_reserve_space(inode, allocated, 0);
 map_out:
        set_buffer_mapped(bh_result);
 out1:
@@ -3368,9 +3380,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        /* previous routine could use block we allocated */
        newblock = ext_pblock(&newex);
        allocated = ext4_ext_get_actual_len(&newex);
+        if (allocated > max_blocks)
+                allocated = max_blocks;
        set_buffer_new(bh_result);
        /*
+         * Update reserved blocks/metadata blocks after successful
+         * block allocation which had been deferred till now.
+         */
+        if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+                ext4_da_update_reserve_space(inode, allocated, 1);
+        /*
         * Cache the extent and update transaction to commit on fdatasync only
         * when it is _not_ an uninitialized extent.
         */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c818972c8302..e11952404e02 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1053,11 +1053,12 @@ static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
 * Called with i_data_sem down, which is important since we can call
 * ext4_discard_preallocations() from here.
 */
-static void ext4_da_update_reserve_space(struct inode *inode, int used)
+void ext4_da_update_reserve_space(struct inode *inode,
+                                        int used, int quota_claim)
 {
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        struct ext4_inode_info *ei = EXT4_I(inode);
-        int mdb_free = 0;
+        int mdb_free = 0, allocated_meta_blocks = 0;
        spin_lock(&ei->i_block_reservation_lock);
        if (unlikely(used > ei->i_reserved_data_blocks)) {
@@ -1073,6 +1074,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
        ei->i_reserved_data_blocks -= used;
        used += ei->i_allocated_meta_blocks;
        ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
+        allocated_meta_blocks = ei->i_allocated_meta_blocks;
        ei->i_allocated_meta_blocks = 0;
        percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
@@ -1090,9 +1092,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
        /* Update quota subsystem */
-        vfs_dq_claim_block(inode, used);
+        if (quota_claim) {
-        if (mdb_free)
+                vfs_dq_claim_block(inode, used);
-                vfs_dq_release_reservation_block(inode, mdb_free);
+                if (mdb_free)
+                        vfs_dq_release_reservation_block(inode, mdb_free);
+        } else {
+                /*
+                 * We did fallocate with an offset that is already delayed
+                 * allocated. So on delayed allocated writeback we should
+                 * not update the quota for allocated blocks. But then
+                 * converting an fallocate region to initialized region would
+                 * have caused a metadata allocation. So claim quota for
+                 * that
+                 */
+                if (allocated_meta_blocks)
+                        vfs_dq_claim_block(inode, allocated_meta_blocks);
+                vfs_dq_release_reservation_block(inode, mdb_free + used);
+        }
        /*
         * If we have done all the pending block allocations and if
@@ -1292,18 +1308,20 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
                         */
                        EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
                }
-        }
+                /*
+                 * Update reserved blocks/metadata blocks after successful
+                 * block allocation which had been deferred till now. We don't
+                 * support fallocate for non extent files. So we can update
+                 * reserve space here.
+                 */
+                if ((retval > 0) &&
+                        (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
+                        ext4_da_update_reserve_space(inode, retval, 1);
+        }
        if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
                EXT4_I(inode)->i_delalloc_reserved_flag = 0;
-        /*
-         * Update reserved blocks/metadata blocks after successful
-         * block allocation which had been deferred till now.
-         */
-        if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
-                ext4_da_update_reserve_space(inode, retval);
        up_write((&EXT4_I(inode)->i_data_sem));
        if (retval > 0 && buffer_mapped(bh)) {
                int ret = check_block_validity(inode, "file system "
@@ -1835,24 +1853,12 @@ repeat:
         * later. Real quota accounting is done at pages writeout
         * time.
         */
-        if (vfs_dq_reserve_block(inode, md_needed + 1)) {
+        if (vfs_dq_reserve_block(inode, md_needed + 1))
-                /* 
-                 * We tend to badly over-estimate the amount of
-                 * metadata blocks which are needed, so if we have
-                 * reserved any metadata blocks, try to force out the
-                 * inode and see if we have any better luck.
-                 */
-                if (md_reserved && retries++ <= 3)
-                        goto retry;
                return -EDQUOT;
-        }
        if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
                vfs_dq_release_reservation_block(inode, md_needed + 1);
                if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
-                retry:
-                        if (md_reserved)
-                                write_inode_now(inode, (retries == 3));
                        yield();
                        goto repeat;
                }
@@ -2213,10 +2219,10 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
         * variables are updated after the blocks have been allocated.
         */
        new.b_state = 0;
-        get_blocks_flags = (EXT4_GET_BLOCKS_CREATE |
+        get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
-                            EXT4_GET_BLOCKS_DELALLOC_RESERVE);
        if (mpd->b_state & (1 << BH_Delay))
-                get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE;
+                get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
        blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
                               &new, get_blocks_flags);
        if (blks < 0) {
@@ -3032,7 +3038,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
                               loff_t pos, unsigned len, unsigned flags,
                               struct page **pagep, void **fsdata)
 {
-        int ret, retries = 0;
+        int ret, retries = 0, quota_retries = 0;
        struct page *page;
        pgoff_t index;
        unsigned from, to;
@@ -3091,6 +3097,22 @@ retry:
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
+        if ((ret == -EDQUOT) &&
+            EXT4_I(inode)->i_reserved_meta_blocks &&
+            (quota_retries++ < 3)) {
+                /*
+                 * Since we often over-estimate the number of meta
+                 * data blocks required, we may sometimes get a
+                 * spurios out of quota error even though there would
+                 * be enough space once we write the data blocks and
+                 * find out how many meta data blocks were _really_
+                 * required.  So try forcing the inode write to see if
+                 * that helps.
+                 */
+                write_inode_now(inode, (quota_retries == 3));
+                goto retry;
+        }
 out:
        return ret;
 }
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 97e01dc0d95f..5ef953e6f908 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -199,7 +199,9 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
                     int force)
 {
-        write_lock_irq(&filp->f_owner.lock);
+        unsigned long flags;
+        write_lock_irqsave(&filp->f_owner.lock, flags);
        if (force || !filp->f_owner.pid) {
                put_pid(filp->f_owner.pid);
                filp->f_owner.pid = get_pid(pid);
@@ -211,7 +213,7 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
                        filp->f_owner.euid = cred->euid;
                }
        }
-        write_unlock_irq(&filp->f_owner.lock);
+        write_unlock_irqrestore(&filp->f_owner.lock, flags);
 }
 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 83ac4d3b3cb0..ba98546fabbd 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2913,7 +2913,9 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
        journal->j_mount_id = 10;
        journal->j_state = 0;
        atomic_set(&(journal->j_jlock), 0);
+        reiserfs_write_unlock(sb);
        journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
+        reiserfs_write_lock(sb);
        journal->j_cnode_free_orig = journal->j_cnode_free_list;
        journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
        journal->j_cnode_used = 0;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index c117fa80d1e9..42d213546894 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -544,6 +544,7 @@ error:
 error_rsb_inval:
        ret = -EINVAL;
 error_rsb:
+        kfree(rsb);
        return ret;
 }