34 files changed, 568 insertions, 153 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 15690bb1d3b5..789b3afb3423 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -140,6 +140,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
        candidate->first = candidate->last = index;
        candidate->offset_first = from;
        candidate->to_last = to;
+        INIT_LIST_HEAD(&candidate->link);
        candidate->usage = 1;
        candidate->state = AFS_WBACK_PENDING;
        init_waitqueue_head(&candidate->waitq);
diff --git a/fs/aio.c b/fs/aio.c
index 020de5cb4a67..a936b7fe4f69 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -228,15 +228,23 @@ static void __put_ioctx(struct kioctx *ctx)
        call_rcu(&ctx->rcu_head, ctx_rcu_free);
 }
-#define get_ioctx(kioctx) do {                                          \
+static inline void get_ioctx(struct kioctx *kioctx)
-        BUG_ON(atomic_read(&(kioctx)->users) <= 0);                     \
+{
-        atomic_inc(&(kioctx)->users);                                   \
+        BUG_ON(atomic_read(&kioctx->users) <= 0);
-} while (0)
+        atomic_inc(&kioctx->users);
-#define put_ioctx(kioctx) do {                                          \
+}
-        BUG_ON(atomic_read(&(kioctx)->users) <= 0);                     \
-        if (unlikely(atomic_dec_and_test(&(kioctx)->users)))            \
+static inline int try_get_ioctx(struct kioctx *kioctx)
-                __put_ioctx(kioctx);                                    \
+{
-} while (0)
+        return atomic_inc_not_zero(&kioctx->users);
+}
+static inline void put_ioctx(struct kioctx *kioctx)
+{
+        BUG_ON(atomic_read(&kioctx->users) <= 0);
+        if (unlikely(atomic_dec_and_test(&kioctx->users)))
+                __put_ioctx(kioctx);
+}
 /* ioctx_alloc
 *      Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
@@ -590,8 +598,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
        rcu_read_lock();
        hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
-                if (ctx->user_id == ctx_id && !ctx->dead) {
+                /*
-                        get_ioctx(ctx);
+                 * RCU protects us against accessing freed memory but
+                 * we have to be careful not to get a reference when the
+                 * reference count already dropped to 0 (ctx->dead test
+                 * is unreliable because of races).
+                 */
+                if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
                        ret = ctx;
                        break;
                }
@@ -1569,6 +1582,23 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
                goto out_put_req;
        spin_lock_irq(&ctx->ctx_lock);
+        /*
+         * We could have raced with io_destroy() and are currently holding a
+         * reference to ctx which should be destroyed. We cannot submit IO
+         * since ctx gets freed as soon as io_submit() puts its reference.  The
+         * check here is reliable: io_destroy() sets ctx->dead before waiting
+         * for outstanding IO and the barrier between these two is realized by
+         * unlock of mm->ioctx_lock and lock of ctx->ctx_lock.  Analogously we
+         * increment ctx->reqs_active before checking for ctx->dead and the
+         * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
+         * don't see ctx->dead set here, io_destroy() waits for our IO to
+         * finish.
+         */
+        if (ctx->dead) {
+                spin_unlock_irq(&ctx->ctx_lock);
+                ret = -EINVAL;
+                goto out_put_req;
+        }
        aio_run_iocb(req);
        if (!list_empty(&ctx->run_list)) {
                /* drain the run list */
diff --git a/fs/bio.c b/fs/bio.c
index 4bd454fa844e..5694b756ed01 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -43,7 +43,7 @@ static mempool_t *bio_split_pool __read_mostly;
 * unsigned short
 */
 #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
-struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
+static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
        BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
 };
 #undef BV
@@ -1656,12 +1656,10 @@ static void __init biovec_init_slabs(void)
                int size;
                struct biovec_slab *bvs = bvec_slabs + i;
-#ifndef CONFIG_BLK_DEV_INTEGRITY
                if (bvs->nr_vecs <= BIO_INLINE_VECS) {
                        bvs->slab = NULL;
                        continue;
                }
-#endif
                size = bvs->nr_vecs * sizeof(struct bio_vec);
                bvs->slab = kmem_cache_create(bvs->name, size, 0,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index fffc2c672396..fbe05cbdd692 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
        ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
        if (ret)
                goto out_del;
+        /*
+         * bdev could be deleted beneath us which would implicitly destroy
+         * the holder directory.  Hold on to it.
+         */
+        kobject_get(bdev->bd_part->holder_dir);
        list_add(&holder->list, &bdev->bd_holder_disks);
        goto out_unlock;
@@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
                del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
                del_symlink(bdev->bd_part->holder_dir,
                            &disk_to_dev(disk)->kobj);
+                kobject_put(bdev->bd_part->holder_dir);
                list_del_init(&holder->list);
                kfree(holder);
        }
@@ -922,14 +928,15 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
 * flush_disk - invalidates all buffer-cache entries on a disk
 *
 * @bdev:      struct block device to be flushed
+ * @kill_dirty: flag to guide handling of dirty inodes
 *
 * Invalidates all buffer-cache entries on a disk. It should be called
 * when a disk has been changed -- either by a media change or online
 * resize.
 */
-static void flush_disk(struct block_device *bdev)
+static void flush_disk(struct block_device *bdev, bool kill_dirty)
 {
-        if (__invalidate_device(bdev)) {
+        if (__invalidate_device(bdev, kill_dirty)) {
                char name[BDEVNAME_SIZE] = "";
                if (bdev->bd_disk)
@@ -966,7 +973,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
                       "%s: detected capacity change from %lld to %lld\n",
                       name, bdev_size, disk_size);
                i_size_write(bdev->bd_inode, disk_size);
-                flush_disk(bdev);
+                flush_disk(bdev, false);
        }
 }
 EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1026,7 @@ int check_disk_change(struct block_device *bdev)
        if (!(events & DISK_EVENT_MEDIA_CHANGE))
                return 0;
-        flush_disk(bdev);
+        flush_disk(bdev, true);
        if (bdops->revalidate_disk)
                bdops->revalidate_disk(bdev->bd_disk);
        return 1;
@@ -1080,6 +1087,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
        if (!disk)
                goto out;
+        disk_block_events(disk);
        mutex_lock_nested(&bdev->bd_mutex, for_part);
        if (!bdev->bd_openers) {
                bdev->bd_disk = disk;
@@ -1101,10 +1109,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                                         */
                                        disk_put_part(bdev->bd_part);
                                        bdev->bd_part = NULL;
-                                        module_put(disk->fops->owner);
-                                        put_disk(disk);
                                        bdev->bd_disk = NULL;
                                        mutex_unlock(&bdev->bd_mutex);
+                                        disk_unblock_events(disk);
+                                        module_put(disk->fops->owner);
+                                        put_disk(disk);
                                        goto restart;
                                }
                                if (ret)
@@ -1141,9 +1150,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                        bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
                }
        } else {
-                module_put(disk->fops->owner);
-                put_disk(disk);
-                disk = NULL;
                if (bdev->bd_contains == bdev) {
                        if (bdev->bd_disk->fops->open) {
                                ret = bdev->bd_disk->fops->open(bdev, mode);
@@ -1153,11 +1159,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                        if (bdev->bd_invalidated)
                                rescan_partitions(bdev->bd_disk, bdev);
                }
+                /* only one opener holds refs to the module and disk */
+                module_put(disk->fops->owner);
+                put_disk(disk);
        }
        bdev->bd_openers++;
        if (for_part)
                bdev->bd_part_count++;
        mutex_unlock(&bdev->bd_mutex);
+        disk_unblock_events(disk);
        return 0;
 out_clear:
@@ -1170,9 +1180,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
        bdev->bd_contains = NULL;
 out_unlock_bdev:
        mutex_unlock(&bdev->bd_mutex);
+        disk_unblock_events(disk);
 out:
-        if (disk)
+        module_put(disk->fops->owner);
-                module_put(disk->fops->owner);
        put_disk(disk);
        bdput(bdev);
@@ -1439,14 +1449,13 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
                if (bdev_free) {
                        if (bdev->bd_write_holder) {
                                disk_unblock_events(bdev->bd_disk);
-                                bdev->bd_write_holder = false;
-                        } else
                                disk_check_events(bdev->bd_disk);
+                                bdev->bd_write_holder = false;
+                        }
                }
                mutex_unlock(&bdev->bd_mutex);
-        } else
+        }
-                disk_check_events(bdev->bd_disk);
        return __blkdev_put(bdev, mode, 0);
 }
@@ -1599,7 +1608,7 @@ fail:
 }
 EXPORT_SYMBOL(lookup_bdev);
-int __invalidate_device(struct block_device *bdev)
+int __invalidate_device(struct block_device *bdev, bool kill_dirty)
 {
        struct super_block *sb = get_super(bdev);
        int res = 0;
@@ -1612,7 +1621,7 @@ int __invalidate_device(struct block_device *bdev)
                 * hold).
                 */
                shrink_dcache_sb(sb);
-                res = invalidate_inodes(sb);
+                res = invalidate_inodes(sb, kill_dirty);
                drop_super(sb);
        }
        invalidate_bdev(bdev);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af6052..6f820fa23df4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1254,6 +1254,7 @@ struct btrfs_root {
 #define BTRFS_MOUNT_SPACE_CACHE         (1 << 12)
 #define BTRFS_MOUNT_CLEAR_CACHE         (1 << 13)
 #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
+#define BTRFS_MOUNT_ENOSPC_DEBUG         (1 << 15)
 #define btrfs_clear_opt(o, opt)         ((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)           ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2219,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
                                   u64 start, u64 end);
 int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
                               u64 num_bytes);
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root, u64 type);
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f3c96fc01439..588ff9849873 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5376,7 +5376,7 @@ again:
                               num_bytes, data, 1);
                goto again;
        }
-        if (ret == -ENOSPC) {
+        if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
                struct btrfs_space_info *sinfo;
                sinfo = __find_space_info(root->fs_info, data);
@@ -8065,6 +8065,13 @@ out:
        return ret;
 }
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root, u64 type)
+{
+        u64 alloc_flags = get_alloc_profile(root, type);
+        return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+}
 /*
 * helper to account the unused space of all the readonly block group in the
 * list. takes mirrors into account.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b76f7cd47401..00497d551e51 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
 */
 u64 count_range_bits(struct extent_io_tree *tree,
                     u64 *start, u64 search_end, u64 max_bytes,
-                     unsigned long bits)
+                     unsigned long bits, int contig)
 {
        struct rb_node *node;
        struct extent_state *state;
        u64 cur_start = *start;
        u64 total_bytes = 0;
+        u64 last = 0;
        int found = 0;
        if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
                state = rb_entry(node, struct extent_state, rb_node);
                if (state->start > search_end)
                        break;
-                if (state->end >= cur_start && (state->state & bits)) {
+                if (contig && found && state->start > last + 1)
+                        break;
+                if (state->end >= cur_start && (state->state & bits) == bits) {
                        total_bytes += min(search_end, state->end) + 1 -
                                       max(cur_start, state->start);
                        if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
                                *start = state->start;
                                found = 1;
                        }
+                        last = state->end;
+                } else if (contig && found) {
+                        break;
                }
                node = rb_next(node);
                if (!node)
@@ -2912,6 +2918,46 @@ out:
        return sector;
 }
+/*
+ * helper function for fiemap, which doesn't want to see any holes.
+ * This maps until we find something past 'last'
+ */
+static struct extent_map *get_extent_skip_holes(struct inode *inode,
+                                                u64 offset,
+                                                u64 last,
+                                                get_extent_t *get_extent)
+{
+        u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
+        struct extent_map *em;
+        u64 len;
+        if (offset >= last)
+                return NULL;
+        while(1) {
+                len = last - offset;
+                if (len == 0)
+                        break;
+                len = (len + sectorsize - 1) & ~(sectorsize - 1);
+                em = get_extent(inode, NULL, 0, offset, len, 0);
+                if (!em || IS_ERR(em))
+                        return em;
+                /* if this isn't a hole return it */
+                if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
+                    em->block_start != EXTENT_MAP_HOLE) {
+                        return em;
+                }
+                /* this is a hole, advance to the next extent */
+                offset = extent_map_end(em);
+                free_extent_map(em);
+                if (offset >= last)
+                        break;
+        }
+        return NULL;
+}
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len, get_extent_t *get_extent)
 {
@@ -2921,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        u32 flags = 0;
        u32 found_type;
        u64 last;
+        u64 last_for_get_extent = 0;
        u64 disko = 0;
+        u64 isize = i_size_read(inode);
        struct btrfs_key found_key;
        struct extent_map *em = NULL;
        struct extent_state *cached_state = NULL;
        struct btrfs_path *path;
        struct btrfs_file_extent_item *item;
        int end = 0;
-        u64 em_start = 0, em_len = 0;
+        u64 em_start = 0;
+        u64 em_len = 0;
+        u64 em_end = 0;
        unsigned long emflags;
-        int hole = 0;
        if (len == 0)
                return -EINVAL;
@@ -2940,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                return -ENOMEM;
        path->leave_spinning = 1;
+        /*
+         * lookup the last file extent.  We're not using i_size here
+         * because there might be preallocation past i_size
+         */
        ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
                                       path, inode->i_ino, -1, 0);
        if (ret < 0) {
@@ -2953,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
        found_type = btrfs_key_type(&found_key);
-        /* No extents, just return */
+        /* No extents, but there might be delalloc bits */
        if (found_key.objectid != inode->i_ino ||
            found_type != BTRFS_EXTENT_DATA_KEY) {
-                btrfs_free_path(path);
+                /* have to trust i_size as the end */
-                return 0;
+                last = (u64)-1;
+                last_for_get_extent = isize;
+        } else {
+                /*
+                 * remember the start of the last extent.  There are a
+                 * bunch of different factors that go into the length of the
+                 * extent, so its much less complex to remember where it started
+                 */
+                last = found_key.offset;
+                last_for_get_extent = last + 1;
        }
-        last = found_key.offset;
        btrfs_free_path(path);
+        /*
+         * we might have some extents allocated but more delalloc past those
+         * extents.  so, we trust isize unless the start of the last extent is
+         * beyond isize
+         */
+        if (last < isize) {
+                last = (u64)-1;
+                last_for_get_extent = isize;
+        }
        lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
                         &cached_state, GFP_NOFS);
-        em = get_extent(inode, NULL, 0, off, max - off, 0);
+        em = get_extent_skip_holes(inode, off, last_for_get_extent,
+                                   get_extent);
        if (!em)
                goto out;
        if (IS_ERR(em)) {
@@ -2973,19 +3046,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        }
        while (!end) {
-                hole = 0;
+                off = extent_map_end(em);
-                off = em->start + em->len;
                if (off >= max)
                        end = 1;
-                if (em->block_start == EXTENT_MAP_HOLE) {
-                        hole = 1;
-                        goto next;
-                }
                em_start = em->start;
                em_len = em->len;
+                em_end = extent_map_end(em);
+                emflags = em->flags;
                disko = 0;
                flags = 0;
@@ -3004,37 +3072,29 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
                        flags |= FIEMAP_EXTENT_ENCODED;
-next:
-                emflags = em->flags;
                free_extent_map(em);
                em = NULL;
-                if (!end) {
+                if ((em_start >= last) || em_len == (u64)-1 ||
-                        em = get_extent(inode, NULL, 0, off, max - off, 0);
+                   (last == (u64)-1 && isize <= em_end)) {
-                        if (!em)
-                                goto out;
-                        if (IS_ERR(em)) {
-                                ret = PTR_ERR(em);
-                                goto out;
-                        }
-                        emflags = em->flags;
-                }
-                if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
                        flags |= FIEMAP_EXTENT_LAST;
                        end = 1;
                }
-                if (em_start == last) {
+                /* now scan forward to see if this is really the last extent. */
+                em = get_extent_skip_holes(inode, off, last_for_get_extent,
+                                           get_extent);
+                if (IS_ERR(em)) {
+                        ret = PTR_ERR(em);
+                        goto out;
+                }
+                if (!em) {
                        flags |= FIEMAP_EXTENT_LAST;
                        end = 1;
                }
+                ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
-                if (!hole) {
+                                              em_len, flags);
-                        ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+                if (ret)
-                                                em_len, flags);
+                        goto out_free;
-                        if (ret)
-                                goto out_free;
-                }
        }
 out_free:
        free_extent_map(em);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd061..9318dfefd59c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
 u64 count_range_bits(struct extent_io_tree *tree,
                     u64 *start, u64 search_end,
-                     u64 max_bytes, unsigned long bits);
+                     u64 max_bytes, unsigned long bits, int contig);
 void free_extent_state(struct extent_state *state);
 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 462e08e724b0..02438c917923 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1913,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
        private = 0;
        if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
-                             (u64)-1, 1, EXTENT_DIRTY)) {
+                             (u64)-1, 1, EXTENT_DIRTY, 0)) {
                ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
                                        start, &private_failure);
                if (ret == 0) {
@@ -5280,6 +5280,128 @@ out:
        return em;
 }
+struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
+                                           size_t pg_offset, u64 start, u64 len,
+                                           int create)
+{
+        struct extent_map *em;
+        struct extent_map *hole_em = NULL;
+        u64 range_start = start;
+        u64 end;
+        u64 found;
+        u64 found_end;
+        int err = 0;
+        em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
+        if (IS_ERR(em))
+                return em;
+        if (em) {
+                /*
+                 * if our em maps to a hole, there might
+                 * actually be delalloc bytes behind it
+                 */
+                if (em->block_start != EXTENT_MAP_HOLE)
+                        return em;
+                else
+                        hole_em = em;
+        }
+        /* check to see if we've wrapped (len == -1 or similar) */
+        end = start + len;
+        if (end < start)
+                end = (u64)-1;
+        else
+                end -= 1;
+        em = NULL;
+        /* ok, we didn't find anything, lets look for delalloc */
+        found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
+                                 end, len, EXTENT_DELALLOC, 1);
+        found_end = range_start + found;
+        if (found_end < range_start)
+                found_end = (u64)-1;
+        /*
+         * we didn't find anything useful, return
+         * the original results from get_extent()
+         */
+        if (range_start > end || found_end <= start) {
+                em = hole_em;
+                hole_em = NULL;
+                goto out;
+        }
+        /* adjust the range_start to make sure it doesn't
+         * go backwards from the start they passed in
+         */
+        range_start = max(start,range_start);
+        found = found_end - range_start;
+        if (found > 0) {
+                u64 hole_start = start;
+                u64 hole_len = len;
+                em = alloc_extent_map(GFP_NOFS);
+                if (!em) {
+                        err = -ENOMEM;
+                        goto out;
+                }
+                /*
+                 * when btrfs_get_extent can't find anything it
+                 * returns one huge hole
+                 *
+                 * make sure what it found really fits our range, and
+                 * adjust to make sure it is based on the start from
+                 * the caller
+                 */
+                if (hole_em) {
+                        u64 calc_end = extent_map_end(hole_em);
+                        if (calc_end <= start || (hole_em->start > end)) {
+                                free_extent_map(hole_em);
+                                hole_em = NULL;
+                        } else {
+                                hole_start = max(hole_em->start, start);
+                                hole_len = calc_end - hole_start;
+                        }
+                }
+                em->bdev = NULL;
+                if (hole_em && range_start > hole_start) {
+                        /* our hole starts before our delalloc, so we
+                         * have to return just the parts of the hole
+                         * that go until  the delalloc starts
+                         */
+                        em->len = min(hole_len,
+                                      range_start - hole_start);
+                        em->start = hole_start;
+                        em->orig_start = hole_start;
+                        /*
+                         * don't adjust block start at all,
+                         * it is fixed at EXTENT_MAP_HOLE
+                         */
+                        em->block_start = hole_em->block_start;
+                        em->block_len = hole_len;
+                } else {
+                        em->start = range_start;
+                        em->len = found;
+                        em->orig_start = range_start;
+                        em->block_start = EXTENT_MAP_DELALLOC;
+                        em->block_len = found;
+                }
+        } else if (hole_em) {
+                return hole_em;
+        }
+out:
+        free_extent_map(hole_em);
+        if (err) {
+                free_extent_map(em);
+                return ERR_PTR(err);
+        }
+        return em;
+}
 static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                                                  u64 start, u64 len)
 {
@@ -6102,7 +6224,7 @@ out:
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len)
 {
-        return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent);
+        return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
 }
 int btrfs_readpage(struct file *file, struct page *page)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index be2d4f6aaa5e..5fdb2abc4fa7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1071,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
        if (copy_from_user(&flags, arg, sizeof(flags)))
                return -EFAULT;
-        if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC)
+        if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
                return -EINVAL;
        if (flags & ~BTRFS_SUBVOL_RDONLY)
                return -EOPNOTSUPP;
+        if (!is_owner_or_cap(inode))
+                return -EACCES;
        down_write(&root->fs_info->subvol_sem);
        /* nothing to do */
@@ -1097,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
                goto out_reset;
        }
-        ret = btrfs_update_root(trans, root,
+        ret = btrfs_update_root(trans, root->fs_info->tree_root,
                                &root->root_key, &root->root_item);
        btrfs_commit_transaction(trans, root);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399df..a178f5ebea78 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
        unsigned long tot_out;
        unsigned long tot_len;
        char *buf;
+        bool may_late_unmap, need_unmap;
        data_in = kmap(pages_in[0]);
        tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
                tot_in += in_len;
                working_bytes = in_len;
+                may_late_unmap = need_unmap = false;
                /* fast path: avoid using the working buffer */
                if (in_page_bytes_left >= in_len) {
                        buf = data_in + in_offset;
                        bytes = in_len;
+                        may_late_unmap = true;
                        goto cont;
                }
@@ -329,14 +332,17 @@ cont:
                                if (working_bytes == 0 && tot_in >= tot_len)
                                        break;
-                                kunmap(pages_in[page_in_index]);
+                                if (page_in_index + 1 >= total_pages_in) {
-                                page_in_index++;
-                                if (page_in_index >= total_pages_in) {
                                        ret = -1;
-                                        data_in = NULL;
                                        goto done;
                                }
-                                data_in = kmap(pages_in[page_in_index]);
+                                if (may_late_unmap)
+                                        need_unmap = true;
+                                else
+                                        kunmap(pages_in[page_in_index]);
+                                data_in = kmap(pages_in[++page_in_index]);
                                in_page_bytes_left = PAGE_CACHE_SIZE;
                                in_offset = 0;
@@ -346,6 +352,8 @@ cont:
                out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
                ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
                                            &out_len);
+                if (need_unmap)
+                        kunmap(pages_in[page_in_index - 1]);
                if (ret != LZO_E_OK) {
                        printk(KERN_WARNING "btrfs decompress failed\n");
                        ret = -1;
@@ -363,8 +371,7 @@ cont:
                        break;
        }
 done:
-        if (data_in)
+        kunmap(pages_in[page_in_index]);
-                kunmap(pages_in[page_in_index]);
        return ret;
 }
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 0825e4ed9447..31ade5802ae8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3654,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
        u32 item_size;
        int ret;
        int err = 0;
+        int progress = 0;
        path = btrfs_alloc_path();
        if (!path)
@@ -3666,9 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
        }
        while (1) {
+                progress++;
                trans = btrfs_start_transaction(rc->extent_root, 0);
                BUG_ON(IS_ERR(trans));
+restart:
                if (update_backref_cache(trans, &rc->backref_cache)) {
                        btrfs_end_transaction(trans, rc->extent_root);
                        continue;
@@ -3781,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
                        }
                }
        }
+        if (trans && progress && err == -ENOSPC) {
+                ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
+                                              rc->block_group->flags);
+                if (ret == 0) {
+                        err = 0;
+                        progress = 0;
+                        goto restart;
+                }
+        }
        btrfs_release_path(rc->extent_root, path);
        clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a004008f7d28..d39a9895d932 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
        Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
        Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
        Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
-        Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
+        Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
+        Opt_enospc_debug, Opt_err,
 };
 static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
        {Opt_space_cache, "space_cache"},
        {Opt_clear_cache, "clear_cache"},
        {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
+        {Opt_enospc_debug, "enospc_debug"},
        {Opt_err, NULL},
 };
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                case Opt_user_subvol_rm_allowed:
                        btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
                        break;
+                case Opt_enospc_debug:
+                        btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
+                        break;
                case Opt_err:
                        printk(KERN_INFO "btrfs: unrecognized mount option "
                               "'%s'\n", p);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6e0e82a1b188..9d554e8e6583 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1294,11 +1294,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        ret = btrfs_shrink_device(device, 0);
        if (ret)
-                goto error_brelse;
+                goto error_undo;
        ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
        if (ret)
-                goto error_brelse;
+                goto error_undo;
        device->in_fs_metadata = 0;
@@ -1372,6 +1372,13 @@ out:
        mutex_unlock(&root->fs_info->volume_mutex);
        mutex_unlock(&uuid_mutex);
        return ret;
+error_undo:
+        if (device->writeable) {
+                list_add(&device->dev_alloc_list,
+                         &root->fs_info->fs_devices->alloc_list);
+                root->fs_info->fs_devices->rw_devices++;
+        }
+        goto error_brelse;
 }
 /*
@@ -1589,7 +1596,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        device->dev_root = root->fs_info->dev_root;
        device->bdev = bdev;
        device->in_fs_metadata = 1;
-        device->mode = 0;
+        device->mode = FMODE_EXCL;
        set_blocksize(device->bdev, 4096);
        if (seeding_dev) {
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 267d0ada4541..4a09af9e9a63 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
 * cleanup path and it is also acquired by eventpoll_release_file()
 * if a file has been pushed inside an epoll set and it is then
 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
+ * It is also acquired when inserting an epoll fd onto another epoll
+ * fd. We do this so that we walk the epoll tree and ensure that this
+ * insertion does not create a cycle of epoll file descriptors, which
+ * could lead to deadlock. We need a global mutex to prevent two
+ * simultaneous inserts (A into B and B into A) from racing and
+ * constructing a cycle without either insert observing that it is
+ * going to.
 * It is possible to drop the "ep->mtx" and to use the global
 * mutex "epmutex" (together with "ep->lock") to have it working,
 * but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
 */
 static DEFINE_MUTEX(epmutex);
+/* Used to check for epoll file descriptor inclusion loops */
+static struct nested_calls poll_loop_ncalls;
 /* Used for safe wake up implementation */
 static struct nested_calls poll_safewake_ncalls;
@@ -1198,6 +1208,62 @@ retry:
        return res;
 }
+/**
+ * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
+ *                      API, to verify that adding an epoll file inside another
+ *                      epoll structure, does not violate the constraints, in
+ *                      terms of closed loops, or too deep chains (which can
+ *                      result in excessive stack usage).
+ *
+ * @priv: Pointer to the epoll file to be currently checked.
+ * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
+ *          data structure pointer.
+ * @call_nests: Current dept of the @ep_call_nested() call stack.
+ *
+ * Returns: Returns zero if adding the epoll @file inside current epoll
+ *          structure @ep does not violate the constraints, or -1 otherwise.
+ */
+static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
+{
+        int error = 0;
+        struct file *file = priv;
+        struct eventpoll *ep = file->private_data;
+        struct rb_node *rbp;
+        struct epitem *epi;
+        mutex_lock(&ep->mtx);
+        for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+                epi = rb_entry(rbp, struct epitem, rbn);
+                if (unlikely(is_file_epoll(epi->ffd.file))) {
+                        error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+                                               ep_loop_check_proc, epi->ffd.file,
+                                               epi->ffd.file->private_data, current);
+                        if (error != 0)
+                                break;
+                }
+        }
+        mutex_unlock(&ep->mtx);
+        return error;
+}
+/**
+ * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
+ *                 another epoll file (represented by @ep) does not create
+ *                 closed loops or too deep chains.
+ *
+ * @ep: Pointer to the epoll private data structure.
+ * @file: Pointer to the epoll file to be checked.
+ *
+ * Returns: Returns zero if adding the epoll @file inside current epoll
+ *          structure @ep does not violate the constraints, or -1 otherwise.
+ */
+static int ep_loop_check(struct eventpoll *ep, struct file *file)
+{
+        return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+                              ep_loop_check_proc, file, ep, current);
+}
 /*
 * Open an eventpoll file descriptor.
 */
@@ -1246,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
                struct epoll_event __user *, event)
 {
        int error;
+        int did_lock_epmutex = 0;
        struct file *file, *tfile;
        struct eventpoll *ep;
        struct epitem *epi;
@@ -1287,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
         */
        ep = file->private_data;
+        /*
+         * When we insert an epoll file descriptor, inside another epoll file
+         * descriptor, there is the change of creating closed loops, which are
+         * better be handled here, than in more critical paths.
+         *
+         * We hold epmutex across the loop check and the insert in this case, in
+         * order to prevent two separate inserts from racing and each doing the
+         * insert "at the same time" such that ep_loop_check passes on both
+         * before either one does the insert, thereby creating a cycle.
+         */
+        if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
+                mutex_lock(&epmutex);
+                did_lock_epmutex = 1;
+                error = -ELOOP;
+                if (ep_loop_check(ep, tfile) != 0)
+                        goto error_tgt_fput;
+        }
        mutex_lock(&ep->mtx);
        /*
@@ -1322,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
        mutex_unlock(&ep->mtx);
 error_tgt_fput:
+        if (unlikely(did_lock_epmutex))
+                mutex_unlock(&epmutex);
        fput(tfile);
 error_fput:
        fput(file);
@@ -1441,6 +1530,12 @@ static int __init eventpoll_init(void)
                EP_ITEM_COST;
        BUG_ON(max_user_watches < 0);
+        /*
+         * Initialize the structure used to perform epoll file descriptor
+         * inclusion loops checks.
+         */
+        ep_nested_calls_init(&poll_loop_ncalls);
        /* Initialize the structure used to perform safe poll wait head wake ups */
        ep_nested_calls_init(&poll_safewake_ncalls);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bfed8447ed80..83543b5ff941 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1283,8 +1283,11 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
        if (err)
                return err;
-        if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
+        if (attr->ia_valid & ATTR_OPEN) {
-                return 0;
+                if (fc->atomic_o_trunc)
+                        return 0;
+                file = NULL;
+        }
        if (attr->ia_valid & ATTR_SIZE)
                is_truncate = true;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 95da1bc1c826..9e0832dbb1e3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,18 +86,52 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
        return ff;
 }
+static void fuse_release_async(struct work_struct *work)
+{
+        struct fuse_req *req;
+        struct fuse_conn *fc;
+        struct path path;
+        req = container_of(work, struct fuse_req, misc.release.work);
+        path = req->misc.release.path;
+        fc = get_fuse_conn(path.dentry->d_inode);
+        fuse_put_request(fc, req);
+        path_put(&path);
+}
 static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
 {
-        path_put(&req->misc.release.path);
+        if (fc->destroy_req) {
+                /*
+                 * If this is a fuseblk mount, then it's possible that
+                 * releasing the path will result in releasing the
+                 * super block and sending the DESTROY request.  If
+                 * the server is single threaded, this would hang.
+                 * For this reason do the path_put() in a separate
+                 * thread.
+                 */
+                atomic_inc(&req->count);
+                INIT_WORK(&req->misc.release.work, fuse_release_async);
+                schedule_work(&req->misc.release.work);
+        } else {
+                path_put(&req->misc.release.path);
+        }
 }
-static void fuse_file_put(struct fuse_file *ff)
+static void fuse_file_put(struct fuse_file *ff, bool sync)
 {
        if (atomic_dec_and_test(&ff->count)) {
                struct fuse_req *req = ff->reserved_req;
-                req->end = fuse_release_end;
+                if (sync) {
-                fuse_request_send_background(ff->fc, req);
+                        fuse_request_send(ff->fc, req);
+                        path_put(&req->misc.release.path);
+                        fuse_put_request(ff->fc, req);
+                } else {
+                        req->end = fuse_release_end;
+                        fuse_request_send_background(ff->fc, req);
+                }
                kfree(ff);
        }
 }
@@ -219,8 +253,12 @@ void fuse_release_common(struct file *file, int opcode)
         * Normally this will send the RELEASE request, however if
         * some asynchronous READ or WRITE requests are outstanding,
         * the sending will be delayed.
+         *
+         * Make the release synchronous if this is a fuseblk mount,
+         * synchronous RELEASE is allowed (and desirable) in this case
+         * because the server can be trusted not to screw up.
         */
-        fuse_file_put(ff);
+        fuse_file_put(ff, ff->fc->destroy_req != NULL);
 }
 static int fuse_open(struct inode *inode, struct file *file)
@@ -558,7 +596,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
                page_cache_release(page);
        }
        if (req->ff)
-                fuse_file_put(req->ff);
+                fuse_file_put(req->ff, false);
 }
 static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1137,7 +1175,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
 static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
 {
        __free_page(req->pages[0]);
-        fuse_file_put(req->ff);
+        fuse_file_put(req->ff, false);
 }
 static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ae5744a2f9e9..d4286947bc2c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
 #include <linux/rwsem.h>
 #include <linux/rbtree.h>
 #include <linux/poll.h>
+#include <linux/workqueue.h>
 /** Max number of pages that can be used in a single read request */
 #define FUSE_MAX_PAGES_PER_REQ 32
@@ -262,7 +263,10 @@ struct fuse_req {
        /** Data for asynchronous requests */
        union {
                struct {
-                        struct fuse_release_in in;
+                        union {
+                                struct fuse_release_in in;
+                                struct work_struct work;
+                        };
                        struct path path;
                } release;
                struct fuse_init_in init_in;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 85ba027d1c4d..72c31a315d96 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -59,14 +59,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
        struct address_space *mapping = (struct address_space *)(gl + 1);
        gfs2_init_glock_once(gl);
-        memset(mapping, 0, sizeof(*mapping));
+        address_space_init_once(mapping);
-        INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
-        spin_lock_init(&mapping->tree_lock);
-        spin_lock_init(&mapping->i_mmap_lock);
-        INIT_LIST_HEAD(&mapping->private_list);
-        spin_lock_init(&mapping->private_lock);
-        INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
-        INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
 }
 /**
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f3..0647d80accf6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -295,6 +295,20 @@ static void destroy_inode(struct inode *inode)
                call_rcu(&inode->i_rcu, i_callback);
 }
+void address_space_init_once(struct address_space *mapping)
+{
+        memset(mapping, 0, sizeof(*mapping));
+        INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
+        spin_lock_init(&mapping->tree_lock);
+        spin_lock_init(&mapping->i_mmap_lock);
+        INIT_LIST_HEAD(&mapping->private_list);
+        spin_lock_init(&mapping->private_lock);
+        INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
+        INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+        mutex_init(&mapping->unmap_mutex);
+}
+EXPORT_SYMBOL(address_space_init_once);
 /*
 * These are initializations that only need to be done
 * once, because the fields are idempotent across use
@@ -308,13 +322,7 @@ void inode_init_once(struct inode *inode)
        INIT_LIST_HEAD(&inode->i_devices);
        INIT_LIST_HEAD(&inode->i_wb_list);
        INIT_LIST_HEAD(&inode->i_lru);
-        INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
+        address_space_init_once(&inode->i_data);
-        spin_lock_init(&inode->i_data.tree_lock);
-        spin_lock_init(&inode->i_data.i_mmap_lock);
-        INIT_LIST_HEAD(&inode->i_data.private_list);
-        spin_lock_init(&inode->i_data.private_lock);
-        INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
-        INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
        i_size_ordered_init(inode);
 #ifdef CONFIG_FSNOTIFY
        INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
@@ -540,11 +548,14 @@ void evict_inodes(struct super_block *sb)
 /**
 * invalidate_inodes    - attempt to free all inodes on a superblock
 * @sb:         superblock to operate on
+ * @kill_dirty: flag to guide handling of dirty inodes
 *
 * Attempts to free all inodes for a given superblock.  If there were any
 * busy inodes return a non-zero value, else zero.
+ * If @kill_dirty is set, discard dirty inodes too, otherwise treat
+ * them as busy.
 */
-int invalidate_inodes(struct super_block *sb)
+int invalidate_inodes(struct super_block *sb, bool kill_dirty)
 {
        int busy = 0;
        struct inode *inode, *next;
@@ -556,6 +567,10 @@ int invalidate_inodes(struct super_block *sb)
        list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
                if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
                        continue;
+                if (inode->i_state & I_DIRTY && !kill_dirty) {
+                        busy = 1;
+                        continue;
+                }
                if (atomic_read(&inode->i_count)) {
                        busy = 1;
                        continue;
diff --git a/fs/internal.h b/fs/internal.h
index 0663568b1247..9b976b57d7fe 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -112,4 +112,4 @@ extern void release_open_intent(struct nameidata *);
 */
 extern int get_nr_dirty_inodes(void);
 extern void evict_inodes(struct super_block *);
-extern int invalidate_inodes(struct super_block *);
+extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b0b95371696..d1edf26025dc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1244,7 +1244,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
                 */
                br_write_lock(vfsmount_lock);
                if (mnt_get_count(mnt) != 2) {
-                        br_write_lock(vfsmount_lock);
+                        br_write_unlock(vfsmount_lock);
                        return -EBUSY;
                }
                br_write_unlock(vfsmount_lock);
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index f4f1c08807ed..609cd223eea8 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,12 +34,6 @@
 #include "page.h"
 #include "btnode.h"
-void nilfs_btnode_cache_init_once(struct address_space *btnc)
-{
-        nilfs_mapping_init_once(btnc);
-}
 void nilfs_btnode_cache_init(struct address_space *btnc,
                             struct backing_dev_info *bdi)
 {
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 79037494f1e0..1b8ebd888c28 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
        struct buffer_head *newbh;
 };
-void nilfs_btnode_cache_init_once(struct address_space *);
 void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
 void nilfs_btnode_cache_clear(struct address_space *);
 struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 3fdb61d79c9a..a649b05f7069 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -449,9 +449,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
        struct backing_dev_info *bdi = inode->i_sb->s_bdi;
        INIT_LIST_HEAD(&shadow->frozen_buffers);
-        nilfs_mapping_init_once(&shadow->frozen_data);
+        address_space_init_once(&shadow->frozen_data);
        nilfs_mapping_init(&shadow->frozen_data, bdi);
-        nilfs_mapping_init_once(&shadow->frozen_btnodes);
+        address_space_init_once(&shadow->frozen_btnodes);
        nilfs_mapping_init(&shadow->frozen_btnodes, bdi);
        mi->mi_shadow = shadow;
        return 0;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 3da37cc5de34..4d2a1ee0eb47 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
        return nc;
 }
-void nilfs_mapping_init_once(struct address_space *mapping)
-{
-        memset(mapping, 0, sizeof(*mapping));
-        INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
-        spin_lock_init(&mapping->tree_lock);
-        INIT_LIST_HEAD(&mapping->private_list);
-        spin_lock_init(&mapping->private_lock);
-        spin_lock_init(&mapping->i_mmap_lock);
-        INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
-        INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
-}
 void nilfs_mapping_init(struct address_space *mapping,
                        struct backing_dev_info *bdi)
 {
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index ba4d6fd40b04..f06b79ad7493 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
 int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
 void nilfs_copy_back_pages(struct address_space *, struct address_space *);
 void nilfs_clear_dirty_pages(struct address_space *);
-void nilfs_mapping_init_once(struct address_space *mapping);
 void nilfs_mapping_init(struct address_space *mapping,
                        struct backing_dev_info *bdi);
 unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 58fd707174e1..1673b3d99842 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1279,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
 #ifdef CONFIG_NILFS_XATTR
        init_rwsem(&ii->xattr_sem);
 #endif
-        nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
+        address_space_init_once(&ii->i_btnode_cache);
        ii->i_bmap = &ii->i_bmap_data;
        inode_init_once(&ii->vfs_inode);
 }
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 43e56b97f9c0..6180da1e37e6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -405,9 +405,9 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
               ocfs2_quota_trans_credits(sb);
 }
-/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
+/* data block for new dir/symlink, allocation of directory block, dx_root
- * bitmap block for the new bit) dx_root update for free list */
+ * update for free list */
-#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1)
+#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + OCFS2_SUBALLOC_ALLOC + 1)
 static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
 {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index b5f9160e93e9..19ebc5aad391 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3228,7 +3228,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
                                        u32 num_clusters, unsigned int e_flags)
 {
        int ret, delete, index, credits =  0;
-        u32 new_bit, new_len;
+        u32 new_bit, new_len, orig_num_clusters;
        unsigned int set_len;
        struct ocfs2_super *osb = OCFS2_SB(sb);
        handle_t *handle;
@@ -3261,6 +3261,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
                goto out;
        }
+        orig_num_clusters = num_clusters;
        while (num_clusters) {
                ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
                                             p_cluster, num_clusters,
@@ -3348,7 +3350,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
         * in write-back mode.
         */
        if (context->get_clusters == ocfs2_di_get_clusters) {
-                ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
+                ret = ocfs2_cow_sync_writeback(sb, context, cpos,
+                                               orig_num_clusters);
                if (ret)
                        mlog_errno(ret);
        }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 38f986d2447e..36c423fb0635 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1316,7 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb,
                               struct mount_options *mopt,
                               int is_remount)
 {
-        int status;
+        int status, user_stack = 0;
        char *p;
        u32 tmp;
@@ -1459,6 +1459,15 @@ static int ocfs2_parse_options(struct super_block *sb,
                        memcpy(mopt->cluster_stack, args[0].from,
                               OCFS2_STACK_LABEL_LEN);
                        mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
+                        /*
+                         * Open code the memcmp here as we don't have
+                         * an osb to pass to
+                         * ocfs2_userspace_stack().
+                         */
+                        if (memcmp(mopt->cluster_stack,
+                                   OCFS2_CLASSIC_CLUSTER_STACK,
+                                   OCFS2_STACK_LABEL_LEN))
+                                user_stack = 1;
                        break;
                case Opt_inode64:
                        mopt->mount_opt |= OCFS2_MOUNT_INODE64;
@@ -1514,13 +1523,16 @@ static int ocfs2_parse_options(struct super_block *sb,
                }
        }
-        /* Ensure only one heartbeat mode */
+        if (user_stack == 0) {
-        tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
+                /* Ensure only one heartbeat mode */
-                                 OCFS2_MOUNT_HB_NONE);
+                tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
-        if (hweight32(tmp) != 1) {
+                                         OCFS2_MOUNT_HB_GLOBAL |
-                mlog(ML_ERROR, "Invalid heartbeat mount options\n");
+                                         OCFS2_MOUNT_HB_NONE);
-                status = 0;
+                if (hweight32(tmp) != 1) {
-                goto bail;
+                        mlog(ML_ERROR, "Invalid heartbeat mount options\n");
+                        status = 0;
+                        goto bail;
+                }
        }
        status = 1;
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 789c625c7aa5..b10e3540d5b7 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -251,6 +251,11 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
        }
        vm->vblk_size     = get_unaligned_be32(data + 0x08);
+        if (vm->vblk_size == 0) {
+                ldm_error ("Illegal VBLK size");
+                return false;
+        }
        vm->vblk_offset   = get_unaligned_be32(data + 0x0C);
        vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 05201ae719e5..d61611c88012 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -152,6 +152,8 @@ xfs_ioc_trim(
        if (!capable(CAP_SYS_ADMIN))
                return -XFS_ERROR(EPERM);
+        if (!blk_queue_discard(q))
+                return -XFS_ERROR(EOPNOTSUPP);
        if (copy_from_user(&range, urange, sizeof(range)))
                return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cec89dd5d7d2..85668efb3e3e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -53,6 +53,9 @@ xfs_fs_geometry(
        xfs_fsop_geom_t         *geo,
        int                     new_version)
 {
+        memset(geo, 0, sizeof(*geo));
        geo->blocksize = mp->m_sb.sb_blocksize;
        geo->rtextsize = mp->m_sb.sb_rextsize;
        geo->agblocks = mp->m_sb.sb_agblocks;