81 files changed, 1078 insertions, 785 deletions
diff --git a/fs/aio.c b/fs/aio.c
index c5b1a8c10411..7fe5bdee1630 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -307,7 +307,9 @@ static void free_ioctx(struct kioctx *ctx)
        kunmap_atomic(ring);
        while (atomic_read(&ctx->reqs_active) > 0) {
-                wait_event(ctx->wait, head != ctx->tail);
+                wait_event(ctx->wait,
+                                head != ctx->tail ||
+                                atomic_read(&ctx->reqs_active) <= 0);
                avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head;
@@ -1299,8 +1301,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 *      < min_nr if the timeout specified by timeout has elapsed
 *      before sufficient events are available, where timeout == NULL
 *      specifies an infinite timeout. Note that the timeout pointed to by
- *      timeout is relative and will be updated if not NULL and the
+ *      timeout is relative.  Will fail with -ENOSYS if not implemented.
- *      operation blocks. Will fail with -ENOSYS if not implemented.
 */
 SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
                long, min_nr,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 8615ee89ab55..f95dddced968 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -265,8 +265,8 @@ befs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                result = filldir(dirent, keybuf, keysize, filp->f_pos,
                                 (ino_t) value, d_type);
        }
+        if (!result)
-        filp->f_pos++;
+                filp->f_pos++;
        befs_debug(sb, "<--- befs_readdir() filp->f_pos %Ld", filp->f_pos);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index b4fb41558111..290e347b6db3 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -918,7 +918,8 @@ again:
                                                           ref->parent, bsz, 0);
                                if (!eb || !extent_buffer_uptodate(eb)) {
                                        free_extent_buffer(eb);
-                                        return -EIO;
+                                        ret = -EIO;
+                                        goto out;
                                }
                                ret = find_extent_in_eb(eb, bytenr,
                                                        *extent_item_pos, &eie);
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 18af6f48781a..1431a6965017 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1700,7 +1700,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
                unsigned int j;
                DECLARE_COMPLETION_ONSTACK(complete);
-                bio = bio_alloc(GFP_NOFS, num_pages - i);
+                bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
                if (!bio) {
                        printk(KERN_INFO
                               "btrfsic: bio_alloc() for %u pages failed!\n",
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index de6de8e60b46..02fae7f7e42c 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -951,10 +951,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                        BUG_ON(ret); /* -ENOMEM */
                }
                if (new_flags != 0) {
+                        int level = btrfs_header_level(buf);
                        ret = btrfs_set_disk_extent_flags(trans, root,
                                                          buf->start,
                                                          buf->len,
-                                                          new_flags, 0);
+                                                          new_flags, level, 0);
                        if (ret)
                                return ret;
                }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 63c328a9ce95..d6dd49b51ba8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -88,12 +88,12 @@ struct btrfs_ordered_sum;
 /* holds checksums of all the data extents */
 #define BTRFS_CSUM_TREE_OBJECTID 7ULL
-/* for storing balance parameters in the root tree */
-#define BTRFS_BALANCE_OBJECTID -4ULL
 /* holds quota configuration and tracking */
 #define BTRFS_QUOTA_TREE_OBJECTID 8ULL
+/* for storing balance parameters in the root tree */
+#define BTRFS_BALANCE_OBJECTID -4ULL
 /* orhpan objectid for tracking unlinked/truncated files */
 #define BTRFS_ORPHAN_OBJECTID -5ULL
@@ -3075,7 +3075,7 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes, u64 flags,
-                                int is_data);
+                                int level, int is_data);
 int btrfs_free_extent(struct btrfs_trans_handle *trans,
                      struct btrfs_root *root,
                      u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index f75fcaf79aeb..70b962cc177d 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -60,6 +60,7 @@ struct btrfs_delayed_ref_node {
 struct btrfs_delayed_extent_op {
        struct btrfs_disk_key key;
        u64 flags_to_set;
+        int level;
        unsigned int update_key:1;
        unsigned int update_flags:1;
        unsigned int is_data:1;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 7ba7b3900cb8..65241f32d3f8 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -313,6 +313,11 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
        struct btrfs_device *tgt_device = NULL;
        struct btrfs_device *src_device = NULL;
+        if (btrfs_fs_incompat(fs_info, RAID56)) {
+                pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n");
+                return -EINVAL;
+        }
        switch (args->start.cont_reading_from_srcdev_mode) {
        case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:
        case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4e9ebe1f1827..e7b3cb5286a5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -152,7 +152,7 @@ static struct btrfs_lockdep_keyset {
        { .id = BTRFS_DEV_TREE_OBJECTID,        .name_stem = "dev"      },
        { .id = BTRFS_FS_TREE_OBJECTID,         .name_stem = "fs"       },
        { .id = BTRFS_CSUM_TREE_OBJECTID,       .name_stem = "csum"     },
-        { .id = BTRFS_ORPHAN_OBJECTID,          .name_stem = "orphan"   },
+        { .id = BTRFS_QUOTA_TREE_OBJECTID,      .name_stem = "quota"    },
        { .id = BTRFS_TREE_LOG_OBJECTID,        .name_stem = "log"      },
        { .id = BTRFS_TREE_RELOC_OBJECTID,      .name_stem = "treloc"   },
        { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc"   },
@@ -1513,7 +1513,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
        }
        root->commit_root = btrfs_root_node(root);
-        BUG_ON(!root->node); /* -ENOMEM */
 out:
        if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
                root->ref_cows = 1;
@@ -1988,30 +1987,33 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
 {
        free_extent_buffer(info->tree_root->node);
        free_extent_buffer(info->tree_root->commit_root);
-        free_extent_buffer(info->dev_root->node);
-        free_extent_buffer(info->dev_root->commit_root);
-        free_extent_buffer(info->extent_root->node);
-        free_extent_buffer(info->extent_root->commit_root);
-        free_extent_buffer(info->csum_root->node);
-        free_extent_buffer(info->csum_root->commit_root);
-        if (info->quota_root) {
-                free_extent_buffer(info->quota_root->node);
-                free_extent_buffer(info->quota_root->commit_root);
-        }
        info->tree_root->node = NULL;
        info->tree_root->commit_root = NULL;
-        info->dev_root->node = NULL;
-        info->dev_root->commit_root = NULL;
+        if (info->dev_root) {
-        info->extent_root->node = NULL;
+                free_extent_buffer(info->dev_root->node);
-        info->extent_root->commit_root = NULL;
+                free_extent_buffer(info->dev_root->commit_root);
-        info->csum_root->node = NULL;
+                info->dev_root->node = NULL;
-        info->csum_root->commit_root = NULL;
+                info->dev_root->commit_root = NULL;
+        }
+        if (info->extent_root) {
+                free_extent_buffer(info->extent_root->node);
+                free_extent_buffer(info->extent_root->commit_root);
+                info->extent_root->node = NULL;
+                info->extent_root->commit_root = NULL;
+        }
+        if (info->csum_root) {
+                free_extent_buffer(info->csum_root->node);
+                free_extent_buffer(info->csum_root->commit_root);
+                info->csum_root->node = NULL;
+                info->csum_root->commit_root = NULL;
+        }
        if (info->quota_root) {
+                free_extent_buffer(info->quota_root->node);
+                free_extent_buffer(info->quota_root->commit_root);
                info->quota_root->node = NULL;
                info->quota_root->commit_root = NULL;
        }
        if (chunk_root) {
                free_extent_buffer(info->chunk_root->node);
                free_extent_buffer(info->chunk_root->commit_root);
@@ -3128,7 +3130,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
         * caller
         */
        device->flush_bio = NULL;
-        bio = bio_alloc(GFP_NOFS, 0);
+        bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
        if (!bio)
                return -ENOMEM;
@@ -3659,8 +3661,11 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
                                         ordered_operations);
                list_del_init(&btrfs_inode->ordered_operations);
+                spin_unlock(&root->fs_info->ordered_extent_lock);
                btrfs_invalidate_inodes(btrfs_inode->root);
+                spin_lock(&root->fs_info->ordered_extent_lock);
        }
        spin_unlock(&root->fs_info->ordered_extent_lock);
@@ -3782,8 +3787,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
                list_del_init(&btrfs_inode->delalloc_inodes);
                clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
                          &btrfs_inode->runtime_flags);
+                spin_unlock(&root->fs_info->delalloc_lock);
                btrfs_invalidate_inodes(btrfs_inode->root);
+                spin_lock(&root->fs_info->delalloc_lock);
        }
        spin_unlock(&root->fs_info->delalloc_lock);
@@ -3808,7 +3816,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
                while (start <= end) {
                        eb = btrfs_find_tree_block(root, start,
                                                   root->leafsize);
-                        start += eb->len;
+                        start += root->leafsize;
                        if (!eb)
                                continue;
                        wait_on_extent_buffer_writeback(eb);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2305b5c5cf00..df472ab1b5ac 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2070,8 +2070,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
        u32 item_size;
        int ret;
        int err = 0;
-        int metadata = (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
+        int metadata = !extent_op->is_data;
-                        node->type == BTRFS_SHARED_BLOCK_REF_KEY);
        if (trans->aborted)
                return 0;
@@ -2086,11 +2085,8 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
        key.objectid = node->bytenr;
        if (metadata) {
-                struct btrfs_delayed_tree_ref *tree_ref;
-                tree_ref = btrfs_delayed_node_to_tree_ref(node);
                key.type = BTRFS_METADATA_ITEM_KEY;
-                key.offset = tree_ref->level;
+                key.offset = extent_op->level;
        } else {
                key.type = BTRFS_EXTENT_ITEM_KEY;
                key.offset = node->num_bytes;
@@ -2719,7 +2715,7 @@ out:
 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes, u64 flags,
-                                int is_data)
+                                int level, int is_data)
 {
        struct btrfs_delayed_extent_op *extent_op;
        int ret;
@@ -2732,6 +2728,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
        extent_op->update_flags = 1;
        extent_op->update_key = 0;
        extent_op->is_data = is_data ? 1 : 0;
+        extent_op->level = level;
        ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
                                          num_bytes, extent_op);
@@ -3109,6 +3106,11 @@ again:
        WARN_ON(ret);
        if (i_size_read(inode) > 0) {
+                ret = btrfs_check_trunc_cache_free_space(root,
+                                        &root->fs_info->global_block_rsv);
+                if (ret)
+                        goto out_put;
                ret = btrfs_truncate_free_space_cache(root, trans, path,
                                                      inode);
                if (ret)
@@ -4562,6 +4564,8 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
        fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
        fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
        fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
+        if (fs_info->quota_root)
+                fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
        fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
        update_global_block_rsv(fs_info);
@@ -6651,51 +6655,51 @@ use_block_rsv(struct btrfs_trans_handle *trans,
        struct btrfs_block_rsv *block_rsv;
        struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
        int ret;
+        bool global_updated = false;
        block_rsv = get_block_rsv(trans, root);
-        if (block_rsv->size == 0) {
+        if (unlikely(block_rsv->size == 0))
-                ret = reserve_metadata_bytes(root, block_rsv, blocksize,
+                goto try_reserve;
-                                             BTRFS_RESERVE_NO_FLUSH);
+again:
-                /*
+        ret = block_rsv_use_bytes(block_rsv, blocksize);
-                 * If we couldn't reserve metadata bytes try and use some from
+        if (!ret)
-                 * the global reserve.
-                 */
-                if (ret && block_rsv != global_rsv) {
-                        ret = block_rsv_use_bytes(global_rsv, blocksize);
-                        if (!ret)
-                                return global_rsv;
-                        return ERR_PTR(ret);
-                } else if (ret) {
-                        return ERR_PTR(ret);
-                }
                return block_rsv;
+        if (block_rsv->failfast)
+                return ERR_PTR(ret);
+        if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
+                global_updated = true;
+                update_global_block_rsv(root->fs_info);
+                goto again;
        }
-        ret = block_rsv_use_bytes(block_rsv, blocksize);
+        if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+                static DEFINE_RATELIMIT_STATE(_rs,
+                                DEFAULT_RATELIMIT_INTERVAL * 10,
+                                /*DEFAULT_RATELIMIT_BURST*/ 1);
+                if (__ratelimit(&_rs))
+                        WARN(1, KERN_DEBUG
+                                "btrfs: block rsv returned %d\n", ret);
+        }
+try_reserve:
+        ret = reserve_metadata_bytes(root, block_rsv, blocksize,
+                                     BTRFS_RESERVE_NO_FLUSH);
        if (!ret)
                return block_rsv;
-        if (ret && !block_rsv->failfast) {
+        /*
-                if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+         * If we couldn't reserve metadata bytes try and use some from
-                        static DEFINE_RATELIMIT_STATE(_rs,
+         * the global reserve if its space type is the same as the global
-                                        DEFAULT_RATELIMIT_INTERVAL * 10,
+         * reservation.
-                                        /*DEFAULT_RATELIMIT_BURST*/ 1);
+         */
-                        if (__ratelimit(&_rs))
+        if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
-                                WARN(1, KERN_DEBUG
+            block_rsv->space_info == global_rsv->space_info) {
-                                        "btrfs: block rsv returned %d\n", ret);
+                ret = block_rsv_use_bytes(global_rsv, blocksize);
-                }
+                if (!ret)
-                ret = reserve_metadata_bytes(root, block_rsv, blocksize,
+                        return global_rsv;
-                                             BTRFS_RESERVE_NO_FLUSH);
-                if (!ret) {
-                        return block_rsv;
-                } else if (ret && block_rsv != global_rsv) {
-                        ret = block_rsv_use_bytes(global_rsv, blocksize);
-                        if (!ret)
-                                return global_rsv;
-                }
        }
+        return ERR_PTR(ret);
-        return ERR_PTR(-ENOSPC);
 }
 static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
@@ -6763,6 +6767,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                        extent_op->update_key = 1;
                extent_op->update_flags = 1;
                extent_op->is_data = 0;
+                extent_op->level = level;
                ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
                                        ins.objectid,
@@ -6934,7 +6939,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
                BUG_ON(ret); /* -ENOMEM */
                ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
-                                                  eb->len, flag, 0);
+                                                  eb->len, flag,
+                                                  btrfs_header_level(eb), 0);
                BUG_ON(ret); /* -ENOMEM */
                wc->flags[level] |= flag;
        }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 32d67a822e93..e7e7afb4a872 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -23,6 +23,7 @@
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
+static struct bio_set *btrfs_bioset;
 #ifdef CONFIG_BTRFS_DEBUG
 static LIST_HEAD(buffers);
@@ -125,10 +126,20 @@ int __init extent_io_init(void)
                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!extent_buffer_cache)
                goto free_state_cache;
+        btrfs_bioset = bioset_create(BIO_POOL_SIZE,
+                                     offsetof(struct btrfs_io_bio, bio));
+        if (!btrfs_bioset)
+                goto free_buffer_cache;
        return 0;
+free_buffer_cache:
+        kmem_cache_destroy(extent_buffer_cache);
+        extent_buffer_cache = NULL;
 free_state_cache:
        kmem_cache_destroy(extent_state_cache);
+        extent_state_cache = NULL;
        return -ENOMEM;
 }
@@ -145,6 +156,8 @@ void extent_io_exit(void)
                kmem_cache_destroy(extent_state_cache);
        if (extent_buffer_cache)
                kmem_cache_destroy(extent_buffer_cache);
+        if (btrfs_bioset)
+                bioset_free(btrfs_bioset);
 }
 void extent_io_tree_init(struct extent_io_tree *tree,
@@ -1948,28 +1961,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
 }
 /*
- * helper function to unlock a page if all the extents in the tree
- * for that page are unlocked
- */
-static void check_page_locked(struct extent_io_tree *tree, struct page *page)
-{
-        u64 start = page_offset(page);
-        u64 end = start + PAGE_CACHE_SIZE - 1;
-        if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
-                unlock_page(page);
-}
-/*
- * helper function to end page writeback if all the extents
- * in the tree for that page are done with writeback
- */
-static void check_page_writeback(struct extent_io_tree *tree,
-                                 struct page *page)
-{
-        end_page_writeback(page);
-}
-/*
 * When IO fails, either with EIO or csum verification fails, we
 * try other mirrors that might have a good copy of the data.  This
 * io_failure_record is used to record state as we go through all the
@@ -2046,7 +2037,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
        if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
                return 0;
-        bio = bio_alloc(GFP_NOFS, 1);
+        bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
        if (!bio)
                return -EIO;
        bio->bi_private = &compl;
@@ -2336,7 +2327,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
                return -EIO;
        }
-        bio = bio_alloc(GFP_NOFS, 1);
+        bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
        if (!bio) {
                free_io_failure(inode, failrec, 0);
                return -EIO;
@@ -2398,19 +2389,24 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
        struct extent_io_tree *tree;
        u64 start;
        u64 end;
-        int whole_page;
        do {
                struct page *page = bvec->bv_page;
                tree = &BTRFS_I(page->mapping->host)->io_tree;
-                start = page_offset(page) + bvec->bv_offset;
+                /* We always issue full-page reads, but if some block
-                end = start + bvec->bv_len - 1;
+                 * in a page fails to read, blk_update_request() will
+                 * advance bv_offset and adjust bv_len to compensate.
+                 * Print a warning for nonzero offsets, and an error
+                 * if they don't add up to a full page.  */
+                if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+                        printk("%s page write in btrfs with offset %u and length %u\n",
+                               bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+                               ? KERN_ERR "partial" : KERN_INFO "incomplete",
+                               bvec->bv_offset, bvec->bv_len);
-                if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
+                start = page_offset(page);
-                        whole_page = 1;
+                end = start + bvec->bv_offset + bvec->bv_len - 1;
-                else
-                        whole_page = 0;
                if (--bvec >= bio->bi_io_vec)
                        prefetchw(&bvec->bv_page->flags);
@@ -2418,10 +2414,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
                if (end_extent_writepage(page, err, start, end))
                        continue;
-                if (whole_page)
+                end_page_writeback(page);
-                        end_page_writeback(page);
-                else
-                        check_page_writeback(tree, page);
        } while (bvec >= bio->bi_io_vec);
        bio_put(bio);
@@ -2446,7 +2439,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
        struct extent_io_tree *tree;
        u64 start;
        u64 end;
-        int whole_page;
        int mirror;
        int ret;
@@ -2457,19 +2449,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                struct page *page = bvec->bv_page;
                struct extent_state *cached = NULL;
                struct extent_state *state;
+                struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
                pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
-                         "mirror=%ld\n", (u64)bio->bi_sector, err,
+                         "mirror=%lu\n", (u64)bio->bi_sector, err,
-                         (long int)bio->bi_bdev);
+                         io_bio->mirror_num);
                tree = &BTRFS_I(page->mapping->host)->io_tree;
-                start = page_offset(page) + bvec->bv_offset;
+                /* We always issue full-page reads, but if some block
-                end = start + bvec->bv_len - 1;
+                 * in a page fails to read, blk_update_request() will
+                 * advance bv_offset and adjust bv_len to compensate.
+                 * Print a warning for nonzero offsets, and an error
+                 * if they don't add up to a full page.  */
+                if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+                        printk("%s page read in btrfs with offset %u and length %u\n",
+                               bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+                               ? KERN_ERR "partial" : KERN_INFO "incomplete",
+                               bvec->bv_offset, bvec->bv_len);
-                if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
+                start = page_offset(page);
-                        whole_page = 1;
+                end = start + bvec->bv_offset + bvec->bv_len - 1;
-                else
-                        whole_page = 0;
                if (++bvec <= bvec_end)
                        prefetchw(&bvec->bv_page->flags);
@@ -2485,7 +2484,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                }
                spin_unlock(&tree->lock);
-                mirror = (int)(unsigned long)bio->bi_bdev;
+                mirror = io_bio->mirror_num;
                if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
                        ret = tree->ops->readpage_end_io_hook(page, start, end,
                                                              state, mirror);
@@ -2528,39 +2527,35 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                }
                unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
-                if (whole_page) {
+                if (uptodate) {
-                        if (uptodate) {
+                        SetPageUptodate(page);
-                                SetPageUptodate(page);
-                        } else {
-                                ClearPageUptodate(page);
-                                SetPageError(page);
-                        }
-                        unlock_page(page);
                } else {
-                        if (uptodate) {
+                        ClearPageUptodate(page);
-                                check_page_uptodate(tree, page);
+                        SetPageError(page);
-                        } else {
-                                ClearPageUptodate(page);
-                                SetPageError(page);
-                        }
-                        check_page_locked(tree, page);
                }
+                unlock_page(page);
        } while (bvec <= bvec_end);
        bio_put(bio);
 }
+/*
+ * this allocates from the btrfs_bioset.  We're returning a bio right now
+ * but you can call btrfs_io_bio for the appropriate container_of magic
+ */
 struct bio *
 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
                gfp_t gfp_flags)
 {
        struct bio *bio;
-        bio = bio_alloc(gfp_flags, nr_vecs);
+        bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
        if (bio == NULL && (current->flags & PF_MEMALLOC)) {
-                while (!bio && (nr_vecs /= 2))
+                while (!bio && (nr_vecs /= 2)) {
-                        bio = bio_alloc(gfp_flags, nr_vecs);
+                        bio = bio_alloc_bioset(gfp_flags,
+                                               nr_vecs, btrfs_bioset);
+                }
        }
        if (bio) {
@@ -2571,6 +2566,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
        return bio;
 }
+struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
+{
+        return bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
+}
+/* this also allocates from the btrfs_bioset */
+struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+{
+        return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
+}
 static int __must_check submit_one_bio(int rw, struct bio *bio,
                                       int mirror_num, unsigned long bio_flags)
 {
@@ -3988,7 +3996,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                last_for_get_extent = isize;
        }
-        lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
+        lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
                         &cached_state);
        em = get_extent_skip_holes(inode, start, last_for_get_extent,
@@ -4075,7 +4083,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 out_free:
        free_extent_map(em);
 out:
-        unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len,
+        unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
                             &cached_state, GFP_NOFS);
        return ret;
 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a2c03a175009..41fb81e7ec53 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -336,6 +336,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
 struct bio *
 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
                gfp_t gfp_flags);
+struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
+struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
 struct btrfs_fs_info;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ecca6c7375a6..e53009657f0e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -197,30 +197,32 @@ int create_free_space_inode(struct btrfs_root *root,
                                         block_group->key.objectid);
 }
-int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
-                                    struct btrfs_trans_handle *trans,
+                                       struct btrfs_block_rsv *rsv)
-                                    struct btrfs_path *path,
-                                    struct inode *inode)
 {
-        struct btrfs_block_rsv *rsv;
        u64 needed_bytes;
-        loff_t oldsize;
+        int ret;
-        int ret = 0;
-        rsv = trans->block_rsv;
-        trans->block_rsv = &root->fs_info->global_block_rsv;
        /* 1 for slack space, 1 for updating the inode */
        needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
                btrfs_calc_trans_metadata_size(root, 1);
-        spin_lock(&trans->block_rsv->lock);
+        spin_lock(&rsv->lock);
-        if (trans->block_rsv->reserved < needed_bytes) {
+        if (rsv->reserved < needed_bytes)
-                spin_unlock(&trans->block_rsv->lock);
+                ret = -ENOSPC;
-                trans->block_rsv = rsv;
+        else
-                return -ENOSPC;
+                ret = 0;
-        }
+        spin_unlock(&rsv->lock);
-        spin_unlock(&trans->block_rsv->lock);
+        return 0;
+}
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+                                    struct btrfs_trans_handle *trans,
+                                    struct btrfs_path *path,
+                                    struct inode *inode)
+{
+        loff_t oldsize;
+        int ret = 0;
        oldsize = i_size_read(inode);
        btrfs_i_size_write(inode, 0);
@@ -232,9 +234,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
         */
        ret = btrfs_truncate_inode_items(trans, root, inode,
                                         0, BTRFS_EXTENT_DATA_KEY);
        if (ret) {
-                trans->block_rsv = rsv;
                btrfs_abort_transaction(trans, root, ret);
                return ret;
        }
@@ -242,7 +242,6 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
        ret = btrfs_update_inode(trans, root, inode);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
-        trans->block_rsv = rsv;
        return ret;
 }
@@ -920,10 +919,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
        /* Make sure we can fit our crcs into the first page */
        if (io_ctl.check_crcs &&
-            (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
+            (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
-                WARN_ON(1);
                goto out_nospc;
-        }
        io_ctl_set_generation(&io_ctl, trans->transid);
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 4dc17d8809c7..8b7f19f44961 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -54,6 +54,8 @@ int create_free_space_inode(struct btrfs_root *root,
                            struct btrfs_block_group_cache *block_group,
                            struct btrfs_path *path);
+int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
+                                       struct btrfs_block_rsv *rsv);
 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
                                    struct btrfs_trans_handle *trans,
                                    struct btrfs_path *path,
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index d26f67a59e36..2c66ddbbe670 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -429,11 +429,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
        num_bytes = trans->bytes_reserved;
        /*
         * 1 item for inode item insertion if need
-         * 3 items for inode item update (in the worst case)
+         * 4 items for inode item update (in the worst case)
+         * 1 items for slack space if we need do truncation
         * 1 item for free space object
         * 3 items for pre-allocation
         */
-        trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8);
+        trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 10);
        ret = btrfs_block_rsv_add(root, trans->block_rsv,
                                  trans->bytes_reserved,
                                  BTRFS_RESERVE_NO_FLUSH);
@@ -468,7 +469,8 @@ again:
        if (i_size_read(inode) > 0) {
                ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
                if (ret) {
-                        btrfs_abort_transaction(trans, root, ret);
+                        if (ret != -ENOSPC)
+                                btrfs_abort_transaction(trans, root, ret);
                        goto out_put;
                }
        }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9b31b3b091fc..af978f7682b3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -715,8 +715,10 @@ retry:
                                        async_extent->ram_size - 1, 0);
                em = alloc_extent_map();
-                if (!em)
+                if (!em) {
+                        ret = -ENOMEM;
                        goto out_free_reserve;
+                }
                em->start = async_extent->start;
                em->len = async_extent->ram_size;
                em->orig_start = em->start;
@@ -923,8 +925,10 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
                }
                em = alloc_extent_map();
-                if (!em)
+                if (!em) {
+                        ret = -ENOMEM;
                        goto out_reserve;
+                }
                em->start = start;
                em->orig_start = em->start;
                ram_size = ins.offset;
@@ -4724,6 +4728,7 @@ void btrfs_evict_inode(struct inode *inode)
        btrfs_end_transaction(trans, root);
        btrfs_btree_balance_dirty(root);
 no_delete:
+        btrfs_remove_delayed_node(inode);
        clear_inode(inode);
        return;
 }
@@ -4839,14 +4844,13 @@ static void inode_tree_add(struct inode *inode)
        struct rb_node **p;
        struct rb_node *parent;
        u64 ino = btrfs_ino(inode);
-again:
-        p = &root->inode_tree.rb_node;
-        parent = NULL;
        if (inode_unhashed(inode))
                return;
+again:
+        parent = NULL;
        spin_lock(&root->inode_lock);
+        p = &root->inode_tree.rb_node;
        while (*p) {
                parent = *p;
                entry = rb_entry(parent, struct btrfs_inode, rb_node);
@@ -6928,7 +6932,11 @@ struct btrfs_dio_private {
        /* IO errors */
        int errors;
+        /* orig_bio is our btrfs_io_bio */
        struct bio *orig_bio;
+        /* dio_bio came from fs/direct-io.c */
+        struct bio *dio_bio;
 };
 static void btrfs_endio_direct_read(struct bio *bio, int err)
@@ -6938,6 +6946,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
        struct bio_vec *bvec = bio->bi_io_vec;
        struct inode *inode = dip->inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
+        struct bio *dio_bio;
        u64 start;
        start = dip->logical_offset;
@@ -6977,14 +6986,15 @@ failed:
        unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
                      dip->logical_offset + dip->bytes - 1);
-        bio->bi_private = dip->private;
+        dio_bio = dip->dio_bio;
        kfree(dip);
        /* If we had a csum failure make sure to clear the uptodate flag */
        if (err)
-                clear_bit(BIO_UPTODATE, &bio->bi_flags);
+                clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
-        dio_end_io(bio, err);
+        dio_end_io(dio_bio, err);
+        bio_put(bio);
 }
 static void btrfs_endio_direct_write(struct bio *bio, int err)
@@ -6995,6 +7005,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
        struct btrfs_ordered_extent *ordered = NULL;
        u64 ordered_offset = dip->logical_offset;
        u64 ordered_bytes = dip->bytes;
+        struct bio *dio_bio;
        int ret;
        if (err)
@@ -7022,14 +7033,15 @@ out_test:
                goto again;
        }
 out_done:
-        bio->bi_private = dip->private;
+        dio_bio = dip->dio_bio;
        kfree(dip);
        /* If we had an error make sure to clear the uptodate flag */
        if (err)
-                clear_bit(BIO_UPTODATE, &bio->bi_flags);
+                clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
-        dio_end_io(bio, err);
+        dio_end_io(dio_bio, err);
+        bio_put(bio);
 }
 static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
@@ -7065,10 +7077,10 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
        if (!atomic_dec_and_test(&dip->pending_bios))
                goto out;
-        if (dip->errors)
+        if (dip->errors) {
                bio_io_error(dip->orig_bio);
-        else {
+        } else {
-                set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
+                set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
                bio_endio(dip->orig_bio, 0);
        }
 out:
@@ -7243,25 +7255,34 @@ out_err:
        return 0;
 }
-static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
+static void btrfs_submit_direct(int rw, struct bio *dio_bio,
-                                loff_t file_offset)
+                                struct inode *inode, loff_t file_offset)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_dio_private *dip;
-        struct bio_vec *bvec = bio->bi_io_vec;
+        struct bio_vec *bvec = dio_bio->bi_io_vec;
+        struct bio *io_bio;
        int skip_sum;
        int write = rw & REQ_WRITE;
        int ret = 0;
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+        io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
+        if (!io_bio) {
+                ret = -ENOMEM;
+                goto free_ordered;
+        }
        dip = kmalloc(sizeof(*dip), GFP_NOFS);
        if (!dip) {
                ret = -ENOMEM;
-                goto free_ordered;
+                goto free_io_bio;
        }
-        dip->private = bio->bi_private;
+        dip->private = dio_bio->bi_private;
+        io_bio->bi_private = dio_bio->bi_private;
        dip->inode = inode;
        dip->logical_offset = file_offset;
@@ -7269,22 +7290,27 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
        do {
                dip->bytes += bvec->bv_len;
                bvec++;
-        } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
+        } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1));
-        dip->disk_bytenr = (u64)bio->bi_sector << 9;
+        dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
-        bio->bi_private = dip;
+        io_bio->bi_private = dip;
        dip->errors = 0;
-        dip->orig_bio = bio;
+        dip->orig_bio = io_bio;
+        dip->dio_bio = dio_bio;
        atomic_set(&dip->pending_bios, 0);
        if (write)
-                bio->bi_end_io = btrfs_endio_direct_write;
+                io_bio->bi_end_io = btrfs_endio_direct_write;
        else
-                bio->bi_end_io = btrfs_endio_direct_read;
+                io_bio->bi_end_io = btrfs_endio_direct_read;
        ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
        if (!ret)
                return;
+free_io_bio:
+        bio_put(io_bio);
 free_ordered:
        /*
         * If this is a write, we need to clean up the reserved space and kill
@@ -7300,7 +7326,7 @@ free_ordered:
                btrfs_put_ordered_extent(ordered);
                btrfs_put_ordered_extent(ordered);
        }
-        bio_endio(bio, ret);
+        bio_endio(dio_bio, ret);
 }
 static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
@@ -7979,7 +8005,6 @@ void btrfs_destroy_inode(struct inode *inode)
        inode_tree_del(inode);
        btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
 free:
-        btrfs_remove_delayed_node(inode);
        call_rcu(&inode->i_rcu, btrfs_i_callback);
 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0de4a2fcfb24..0f81d67cdc8d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1801,7 +1801,11 @@ static noinline int copy_to_sk(struct btrfs_root *root,
                item_off = btrfs_item_ptr_offset(leaf, i);
                item_len = btrfs_item_size_nr(leaf, i);
-                if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
+                btrfs_item_key_to_cpu(leaf, key, i);
+                if (!key_in_sk(key, sk))
+                        continue;
+                if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
                        item_len = 0;
                if (sizeof(sh) + item_len + *sk_offset >
@@ -1810,10 +1814,6 @@ static noinline int copy_to_sk(struct btrfs_root *root,
                        goto overflow;
                }
-                btrfs_item_key_to_cpu(leaf, key, i);
-                if (!key_in_sk(key, sk))
-                        continue;
                sh.objectid = key->objectid;
                sh.offset = key->offset;
                sh.type = key->type;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 0740621daf6c..0525e1389f5b 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1050,7 +1050,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
        }
        /* put a new bio on the list */
-        bio = bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
+        bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
        if (!bio)
                return -ENOMEM;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 704a1b8d2a2b..395b82031a42 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1773,7 +1773,7 @@ again:
                        if (!eb || !extent_buffer_uptodate(eb)) {
                                ret = (!eb) ? -ENOMEM : -EIO;
                                free_extent_buffer(eb);
-                                return ret;
+                                break;
                        }
                        btrfs_tree_lock(eb);
                        if (cow) {
@@ -3350,6 +3350,11 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
        }
 truncate:
+        ret = btrfs_check_trunc_cache_free_space(root,
+                                                 &fs_info->global_block_rsv);
+        if (ret)
+                goto out;
        path = btrfs_alloc_path();
        if (!path) {
                ret = -ENOMEM;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f489e24659a4..79bd479317cb 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1296,7 +1296,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
                }
                WARN_ON(!page->page);
-                bio = bio_alloc(GFP_NOFS, 1);
+                bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
                if (!bio) {
                        page->io_error = 1;
                        sblock->no_io_error_seen = 0;
@@ -1431,7 +1431,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
                        return -EIO;
                }
-                bio = bio_alloc(GFP_NOFS, 1);
+                bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
                if (!bio)
                        return -EIO;
                bio->bi_bdev = page_bad->dev->bdev;
@@ -1522,7 +1522,7 @@ again:
                sbio->dev = wr_ctx->tgtdev;
                bio = sbio->bio;
                if (!bio) {
-                        bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
+                        bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
                        if (!bio) {
                                mutex_unlock(&wr_ctx->wr_lock);
                                return -ENOMEM;
@@ -1930,7 +1930,7 @@ again:
                sbio->dev = spage->dev;
                bio = sbio->bio;
                if (!bio) {
-                        bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
+                        bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
                        if (!bio)
                                return -ENOMEM;
                        sbio->bio = bio;
@@ -3307,7 +3307,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
                        "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
                return -EIO;
        }
-        bio = bio_alloc(GFP_NOFS, 1);
+        bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
        if (!bio) {
                spin_lock(&sctx->stat_lock);
                sctx->stat.malloc_errors++;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a4807ced23cc..f0857e092a3c 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1263,6 +1263,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
                btrfs_dev_replace_suspend_for_unmount(fs_info);
                btrfs_scrub_cancel(fs_info);
+                btrfs_pause_balance(fs_info);
                ret = btrfs_commit_super(root);
                if (ret)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e925ced971b..8bffb9174afb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3120,14 +3120,13 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
        if (num_devices == 1)
                allowed |= BTRFS_BLOCK_GROUP_DUP;
-        else if (num_devices < 4)
+        else if (num_devices > 1)
                allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
-        else
+        if (num_devices > 2)
-                allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+                allowed |= BTRFS_BLOCK_GROUP_RAID5;
-                                BTRFS_BLOCK_GROUP_RAID10 |
+        if (num_devices > 3)
-                                BTRFS_BLOCK_GROUP_RAID5 |
+                allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
-                                BTRFS_BLOCK_GROUP_RAID6);
+                            BTRFS_BLOCK_GROUP_RAID6);
        if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
            (!alloc_profile_is_valid(bctl->data.target, 1) ||
             (bctl->data.target & ~allowed))) {
@@ -5019,42 +5018,16 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
        return 0;
 }
-static void *merge_stripe_index_into_bio_private(void *bi_private,
-                                                 unsigned int stripe_index)
-{
-        /*
-         * with single, dup, RAID0, RAID1 and RAID10, stripe_index is
-         * at most 1.
-         * The alternative solution (instead of stealing bits from the
-         * pointer) would be to allocate an intermediate structure
-         * that contains the old private pointer plus the stripe_index.
-         */
-        BUG_ON((((uintptr_t)bi_private) & 3) != 0);
-        BUG_ON(stripe_index > 3);
-        return (void *)(((uintptr_t)bi_private) | stripe_index);
-}
-static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private)
-{
-        return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3));
-}
-static unsigned int extract_stripe_index_from_bio_private(void *bi_private)
-{
-        return (unsigned int)((uintptr_t)bi_private) & 3;
-}
 static void btrfs_end_bio(struct bio *bio, int err)
 {
-        struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private);
+        struct btrfs_bio *bbio = bio->bi_private;
        int is_orig_bio = 0;
        if (err) {
                atomic_inc(&bbio->error);
                if (err == -EIO || err == -EREMOTEIO) {
                        unsigned int stripe_index =
-                                extract_stripe_index_from_bio_private(
+                                btrfs_io_bio(bio)->stripe_index;
-                                        bio->bi_private);
                        struct btrfs_device *dev;
                        BUG_ON(stripe_index >= bbio->num_stripes);
@@ -5084,8 +5057,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
                }
                bio->bi_private = bbio->private;
                bio->bi_end_io = bbio->end_io;
-                bio->bi_bdev = (struct block_device *)
+                btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
-                                        (unsigned long)bbio->mirror_num;
                /* only send an error to the higher layers if it is
                 * beyond the tolerance of the btrfs bio
                 */
@@ -5211,8 +5183,7 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
        struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
        bio->bi_private = bbio;
-        bio->bi_private = merge_stripe_index_into_bio_private(
+        btrfs_io_bio(bio)->stripe_index = dev_nr;
-                        bio->bi_private, (unsigned int)dev_nr);
        bio->bi_end_io = btrfs_end_bio;
        bio->bi_sector = physical >> 9;
 #ifdef DEBUG
@@ -5273,8 +5244,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
        if (atomic_dec_and_test(&bbio->stripes_pending)) {
                bio->bi_private = bbio->private;
                bio->bi_end_io = bbio->end_io;
-                bio->bi_bdev = (struct block_device *)
+                btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
-                        (unsigned long)bbio->mirror_num;
                bio->bi_sector = logical >> 9;
                kfree(bbio);
                bio_endio(bio, -EIO);
@@ -5352,7 +5322,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                }
                if (dev_nr < total_devs - 1) {
-                        bio = bio_clone(first_bio, GFP_NOFS);
+                        bio = btrfs_bio_clone(first_bio, GFP_NOFS);
                        BUG_ON(!bio); /* -ENOMEM */
                } else {
                        bio = first_bio;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 845ccbb0d2e3..f6247e2a47f7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -152,6 +152,26 @@ struct btrfs_fs_devices {
        int rotating;
 };
+/*
+ * we need the mirror number and stripe index to be passed around
+ * the call chain while we are processing end_io (especially errors).
+ * Really, what we need is a btrfs_bio structure that has this info
+ * and is properly sized with its stripe array, but we're not there
+ * quite yet.  We have our own btrfs bioset, and all of the bios
+ * we allocate are actually btrfs_io_bios.  We'll cram as much of
+ * struct btrfs_bio as we can into this over time.
+ */
+struct btrfs_io_bio {
+        unsigned long mirror_num;
+        unsigned long stripe_index;
+        struct bio bio;
+};
+static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio)
+{
+        return container_of(bio, struct btrfs_io_bio, bio);
+}
 struct btrfs_bio_stripe {
        struct btrfs_device *dev;
        u64 physical;
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 8e33ec65847b..58df174deb10 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/vfs.h>
 #include <linux/fs.h>
+#include <linux/inet.h>
 #include "cifsglob.h"
 #include "cifsproto.h"
 #include "cifsfs.h"
@@ -48,58 +49,74 @@ void cifs_dfs_release_automount_timer(void)
 }
 /**
- * cifs_get_share_name  -       extracts share name from UNC
+ * cifs_build_devname - build a devicename from a UNC and optional prepath
- * @node_name:  pointer to UNC string
+ * @nodename:   pointer to UNC string
+ * @prepath:    pointer to prefixpath (or NULL if there isn't one)
 *
- * Extracts sharename form full UNC.
+ * Build a new cifs devicename after chasing a DFS referral. Allocate a buffer
- * i.e. strips from UNC trailing path that is not part of share
+ * big enough to hold the final thing. Copy the UNC from the nodename, and
- * name and fixup missing '\' in the beginning of DFS node refferal
+ * concatenate the prepath onto the end of it if there is one.
- * if necessary.
+ *
- * Returns pointer to share name on success or ERR_PTR on error.
+ * Returns pointer to the built string, or a ERR_PTR. Caller is responsible
- * Caller is responsible for freeing returned string.
+ * for freeing the returned string.
 */
-static char *cifs_get_share_name(const char *node_name)
+static char *
+cifs_build_devname(char *nodename, const char *prepath)
 {
-        int len;
+        size_t pplen;
-        char *UNC;
+        size_t unclen;
-        char *pSep;
+        char *dev;
+        char *pos;
-        len = strlen(node_name);
-        UNC = kmalloc(len+2 /*for term null and additional \ if it's missed */,
+        /* skip over any preceding delimiters */
-                         GFP_KERNEL);
+        nodename += strspn(nodename, "\\");
-        if (!UNC)
+        if (!*nodename)
-                return ERR_PTR(-ENOMEM);
+                return ERR_PTR(-EINVAL);
-        /* get share name and server name */
+        /* get length of UNC and set pos to last char */
-        if (node_name[1] != '\\') {
+        unclen = strlen(nodename);
-                UNC[0] = '\\';
+        pos = nodename + unclen - 1;
-                strncpy(UNC+1, node_name, len);
-                len++;
-                UNC[len] = 0;
-        } else {
-                strncpy(UNC, node_name, len);
-                UNC[len] = 0;
-        }
-        /* find server name end */
+        /* trim off any trailing delimiters */
-        pSep = memchr(UNC+2, '\\', len-2);
+        while (*pos == '\\') {
-        if (!pSep) {
+                --pos;
-                cifs_dbg(VFS, "%s: no server name end in node name: %s\n",
+                --unclen;
-                         __func__, node_name);
-                kfree(UNC);
-                return ERR_PTR(-EINVAL);
        }
-        /* find sharename end */
+        /* allocate a buffer:
-        pSep++;
+         * +2 for preceding "//"
-        pSep = memchr(UNC+(pSep-UNC), '\\', len-(pSep-UNC));
+         * +1 for delimiter between UNC and prepath
-        if (pSep) {
+         * +1 for trailing NULL
-                /* trim path up to sharename end
+         */
-                 * now we have share name in UNC */
+        pplen = prepath ? strlen(prepath) : 0;
-                *pSep = 0;
+        dev = kmalloc(2 + unclen + 1 + pplen + 1, GFP_KERNEL);
+        if (!dev)
+                return ERR_PTR(-ENOMEM);
+        pos = dev;
+        /* add the initial "//" */
+        *pos = '/';
+        ++pos;
+        *pos = '/';
+        ++pos;
+        /* copy in the UNC portion from referral */
+        memcpy(pos, nodename, unclen);
+        pos += unclen;
+        /* copy the prefixpath remainder (if there is one) */
+        if (pplen) {
+                *pos = '/';
+                ++pos;
+                memcpy(pos, prepath, pplen);
+                pos += pplen;
        }
-        return UNC;
+        /* NULL terminator */
+        *pos = '\0';
+        convert_delimiter(dev, '/');
+        return dev;
 }
@@ -123,6 +140,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
 {
        int rc;
        char *mountdata = NULL;
+        const char *prepath = NULL;
        int md_len;
        char *tkn_e;
        char *srvIP = NULL;
@@ -132,7 +150,10 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
        if (sb_mountdata == NULL)
                return ERR_PTR(-EINVAL);
-        *devname = cifs_get_share_name(ref->node_name);
+        if (strlen(fullpath) - ref->path_consumed)
+                prepath = fullpath + ref->path_consumed;
+        *devname = cifs_build_devname(ref->node_name, prepath);
        if (IS_ERR(*devname)) {
                rc = PTR_ERR(*devname);
                *devname = NULL;
@@ -146,12 +167,14 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
                goto compose_mount_options_err;
        }
-        /* md_len = strlen(...) + 12 for 'sep+prefixpath='
+        /*
-         * assuming that we have 'unc=' and 'ip=' in
+         * In most cases, we'll be building a shorter string than the original,
-         * the original sb_mountdata
+         * but we do have to assume that the address in the ip= option may be
+         * much longer than the original. Add the max length of an address
+         * string to the length of the original string to allow for worst case.
         */
-        md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12;
+        md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN;
-        mountdata = kzalloc(md_len+1, GFP_KERNEL);
+        mountdata = kzalloc(md_len + 1, GFP_KERNEL);
        if (mountdata == NULL) {
                rc = -ENOMEM;
                goto compose_mount_options_err;
@@ -195,26 +218,6 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
                strncat(mountdata, &sep, 1);
        strcat(mountdata, "ip=");
        strcat(mountdata, srvIP);
-        strncat(mountdata, &sep, 1);
-        strcat(mountdata, "unc=");
-        strcat(mountdata, *devname);
-        /* find & copy prefixpath */
-        tkn_e = strchr(ref->node_name + 2, '\\');
-        if (tkn_e == NULL) {
-                /* invalid unc, missing share name*/
-                rc = -EINVAL;
-                goto compose_mount_options_err;
-        }
-        tkn_e = strchr(tkn_e + 1, '\\');
-        if (tkn_e || (strlen(fullpath) - ref->path_consumed)) {
-                strncat(mountdata, &sep, 1);
-                strcat(mountdata, "prefixpath=");
-                if (tkn_e)
-                        strcat(mountdata, tkn_e + 1);
-                strcat(mountdata, fullpath + ref->path_consumed);
-        }
        /*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/
        /*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 72e4efee1389..3752b9f6d9e4 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -372,9 +372,6 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
        cifs_show_security(s, tcon->ses->server);
        cifs_show_cache_flavor(s, cifs_sb);
-        seq_printf(s, ",unc=");
-        seq_escape(s, tcon->treeName, " \t\n\\");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
                seq_printf(s, ",multiuser");
        else if (tcon->ses->user_name)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 99eeaa17ee00..5b97e56ddbca 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1061,6 +1061,7 @@ static int cifs_parse_security_flavors(char *value,
 #endif
        case Opt_sec_none:
                vol->nullauth = 1;
+                vol->secFlg |= CIFSSEC_MAY_NTLM;
                break;
        default:
                cifs_dbg(VFS, "bad security option: %s\n", value);
@@ -1257,14 +1258,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
        vol->backupuid_specified = false; /* no backup intent for a user */
        vol->backupgid_specified = false; /* no backup intent for a group */
-        /*
+        switch (cifs_parse_devname(devname, vol)) {
-         * For now, we ignore -EINVAL errors under the assumption that the
+        case 0:
-         * unc= and prefixpath= options will be usable.
+                break;
-         */
+        case -ENOMEM:
-        if (cifs_parse_devname(devname, vol) == -ENOMEM) {
+                cifs_dbg(VFS, "Unable to allocate memory for devname.\n");
-                printk(KERN_ERR "CIFS: Unable to allocate memory to parse "
+                goto cifs_parse_mount_err;
-                                "device string.\n");
+        case -EINVAL:
-                goto out_nomem;
+                cifs_dbg(VFS, "Malformed UNC in devname.\n");
+                goto cifs_parse_mount_err;
+        default:
+                cifs_dbg(VFS, "Unknown error parsing devname.\n");
+                goto cifs_parse_mount_err;
        }
        while ((data = strsep(&options, separator)) != NULL) {
@@ -1826,7 +1831,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
        }
 #endif
        if (!vol->UNC) {
-                cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string or in unc= option!\n");
+                cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string!\n");
                goto cifs_parse_mount_err;
        }
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index e7512e497611..7ede7306599f 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -34,7 +34,7 @@
 /**
 * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address.
- * @unc: UNC path specifying the server
+ * @unc: UNC path specifying the server (with '/' as delimiter)
 * @ip_addr: Where to return the IP address.
 *
 * The IP address will be returned in string form, and the caller is
@@ -64,7 +64,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
        hostname = unc + 2;
        /* Search for server name delimiter */
-        sep = memchr(hostname, '\\', len);
+        sep = memchr(hostname, '/', len);
        if (sep)
                len = sep - hostname;
        else
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index fc3025199cb3..20efd81266c6 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -171,7 +171,8 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
        if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL)
                inode->i_flags |= S_AUTOMOUNT;
-        cifs_set_ops(inode);
+        if (inode->i_state & I_NEW)
+                cifs_set_ops(inode);
 }
 void
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index bfb531564319..8dd524f32284 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -44,8 +44,11 @@ static ssize_t efivarfs_file_write(struct file *file,
        bytes = efivar_entry_set_get_size(var, attributes, &datasize,
                                          data, &set);
-        if (!set && bytes)
+        if (!set && bytes) {
+                if (bytes == -ENOENT)
+                        bytes = -EIO;
                goto out;
+        }
        if (bytes == -ENOENT) {
                drop_nlink(inode);
@@ -76,7 +79,14 @@ static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf,
        int err;
        err = efivar_entry_size(var, &datasize);
-        if (err)
+        /*
+         * efivarfs represents uncommitted variables with
+         * zero-length files. Reading them should return EOF.
+         */
+        if (err == -ENOENT)
+                return 0;
+        else if (err)
                return err;
        data = kmalloc(datasize + sizeof(attributes), GFP_KERNEL);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0aabb344b02e..5aae3d12d400 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -209,7 +209,6 @@ typedef struct ext4_io_end {
        ssize_t                 size;           /* size of the extent */
        struct kiocb            *iocb;          /* iocb struct for AIO */
        int                     result;         /* error value for AIO */
-        atomic_t                count;          /* reference counter */
 } ext4_io_end_t;
 struct ext4_io_submit {
@@ -2651,14 +2650,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
 /* page-io.c */
 extern int __init ext4_init_pageio(void);
+extern void ext4_add_complete_io(ext4_io_end_t *io_end);
 extern void ext4_exit_pageio(void);
 extern void ext4_ioend_shutdown(struct inode *);
+extern void ext4_free_io_end(ext4_io_end_t *io);
 extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
-extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
-extern int ext4_put_io_end(ext4_io_end_t *io_end);
-extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
-extern void ext4_io_submit_init(struct ext4_io_submit *io,
-                                struct writeback_control *wbc);
 extern void ext4_end_io_work(struct work_struct *work);
 extern void ext4_io_submit(struct ext4_io_submit *io);
 extern int ext4_bio_write_page(struct ext4_io_submit *io,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 107936db244e..bc0f1910b9cf 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3642,7 +3642,7 @@ int ext4_find_delalloc_range(struct inode *inode,
 {
        struct extent_status es;
-        ext4_es_find_delayed_extent(inode, lblk_start, &es);
+        ext4_es_find_delayed_extent_range(inode, lblk_start, lblk_end, &es);
        if (es.es_len == 0)
                return 0; /* there is no delay extent in this tree */
        else if (es.es_lblk <= lblk_start &&
@@ -4608,9 +4608,10 @@ static int ext4_find_delayed_extent(struct inode *inode,
        struct extent_status es;
        ext4_lblk_t block, next_del;
-        ext4_es_find_delayed_extent(inode, newes->es_lblk, &es);
        if (newes->es_pblk == 0) {
+                ext4_es_find_delayed_extent_range(inode, newes->es_lblk,
+                                newes->es_lblk + newes->es_len - 1, &es);
                /*
                 * No extent in extent-tree contains block @newes->es_pblk,
                 * then the block may stay in 1)a hole or 2)delayed-extent.
@@ -4630,7 +4631,7 @@ static int ext4_find_delayed_extent(struct inode *inode,
        }
        block = newes->es_lblk + newes->es_len;
-        ext4_es_find_delayed_extent(inode, block, &es);
+        ext4_es_find_delayed_extent_range(inode, block, EXT_MAX_BLOCKS, &es);
        if (es.es_len == 0)
                next_del = EXT_MAX_BLOCKS;
        else
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index fe3337a85ede..e6941e622d31 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -232,14 +232,16 @@ static struct extent_status *__es_tree_search(struct rb_root *root,
 }
 /*
- * ext4_es_find_delayed_extent: find the 1st delayed extent covering @es->lblk
+ * ext4_es_find_delayed_extent_range: find the 1st delayed extent covering
- * if it exists, otherwise, the next extent after @es->lblk.
+ * @es->lblk if it exists, otherwise, the next extent after @es->lblk.
 *
 * @inode: the inode which owns delayed extents
 * @lblk: the offset where we start to search
+ * @end: the offset where we stop to search
 * @es: delayed extent that we found
 */
-void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
+void ext4_es_find_delayed_extent_range(struct inode *inode,
+                                 ext4_lblk_t lblk, ext4_lblk_t end,
                                 struct extent_status *es)
 {
        struct ext4_es_tree *tree = NULL;
@@ -247,7 +249,8 @@ void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
        struct rb_node *node;
        BUG_ON(es == NULL);
-        trace_ext4_es_find_delayed_extent_enter(inode, lblk);
+        BUG_ON(end < lblk);
+        trace_ext4_es_find_delayed_extent_range_enter(inode, lblk);
        read_lock(&EXT4_I(inode)->i_es_lock);
        tree = &EXT4_I(inode)->i_es_tree;
@@ -270,6 +273,10 @@ out:
        if (es1 && !ext4_es_is_delayed(es1)) {
                while ((node = rb_next(&es1->rb_node)) != NULL) {
                        es1 = rb_entry(node, struct extent_status, rb_node);
+                        if (es1->es_lblk > end) {
+                                es1 = NULL;
+                                break;
+                        }
                        if (ext4_es_is_delayed(es1))
                                break;
                }
@@ -285,7 +292,7 @@ out:
        read_unlock(&EXT4_I(inode)->i_es_lock);
        ext4_es_lru_add(inode);
-        trace_ext4_es_find_delayed_extent_exit(inode, es);
+        trace_ext4_es_find_delayed_extent_range_exit(inode, es);
 }
 static struct extent_status *
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index d8e2d4dc311e..f740eb03b707 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -62,7 +62,8 @@ extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
                                 unsigned long long status);
 extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
                                 ext4_lblk_t len);
-extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
+extern void ext4_es_find_delayed_extent_range(struct inode *inode,
+                                        ext4_lblk_t lblk, ext4_lblk_t end,
                                        struct extent_status *es);
 extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
                                 struct extent_status *es);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 4959e29573b6..b1b4d51b5d86 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -465,7 +465,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
                 * If there is a delay extent at this offset,
                 * it will be as a data.
                 */
-                ext4_es_find_delayed_extent(inode, last, &es);
+                ext4_es_find_delayed_extent_range(inode, last, last, &es);
                if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
                        if (last != start)
                                dataoff = last << blkbits;
@@ -548,7 +548,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
                 * If there is a delay extent at this offset,
                 * we will skip this extent.
                 */
-                ext4_es_find_delayed_extent(inode, last, &es);
+                ext4_es_find_delayed_extent_range(inode, last, last, &es);
                if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
                        last = es.es_lblk + es.es_len;
                        holeoff = last << blkbits;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0723774bdfb5..d6382b89ecbd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1488,10 +1488,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
        struct ext4_io_submit io_submit;
        BUG_ON(mpd->next_page <= mpd->first_page);
-        ext4_io_submit_init(&io_submit, mpd->wbc);
+        memset(&io_submit, 0, sizeof(io_submit));
-        io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
-        if (!io_submit.io_end)
-                return -ENOMEM;
        /*
         * We need to start from the first_page to the next_page - 1
         * to make sure we also write the mapped dirty buffer_heads.
@@ -1579,8 +1576,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                pagevec_release(&pvec);
        }
        ext4_io_submit(&io_submit);
-        /* Drop io_end reference we got from init */
-        ext4_put_io_end_defer(io_submit.io_end);
        return ret;
 }
@@ -2239,16 +2234,9 @@ static int ext4_writepage(struct page *page,
                 */
                return __ext4_journalled_writepage(page, len);
-        ext4_io_submit_init(&io_submit, wbc);
+        memset(&io_submit, 0, sizeof(io_submit));
-        io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
-        if (!io_submit.io_end) {
-                redirty_page_for_writepage(wbc, page);
-                return -ENOMEM;
-        }
        ret = ext4_bio_write_page(&io_submit, page, len, wbc);
        ext4_io_submit(&io_submit);
-        /* Drop io_end reference we got from init */
-        ext4_put_io_end_defer(io_submit.io_end);
        return ret;
 }
@@ -3079,13 +3067,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
        struct inode *inode = file_inode(iocb->ki_filp);
        ext4_io_end_t *io_end = iocb->private;
-        /* if not async direct IO just return */
+        /* if not async direct IO or dio with 0 bytes write, just return */
-        if (!io_end) {
+        if (!io_end || !size)
-                inode_dio_done(inode);
+                goto out;
-                if (is_async)
-                        aio_complete(iocb, ret, 0);
-                return;
-        }
        ext_debug("ext4_end_io_dio(): io_end 0x%p "
                  "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3093,13 +3077,25 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
                  size);
        iocb->private = NULL;
+        /* if not aio dio with unwritten extents, just free io and return */
+        if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+                ext4_free_io_end(io_end);
+out:
+                inode_dio_done(inode);
+                if (is_async)
+                        aio_complete(iocb, ret, 0);
+                return;
+        }
        io_end->offset = offset;
        io_end->size = size;
        if (is_async) {
                io_end->iocb = iocb;
                io_end->result = ret;
        }
-        ext4_put_io_end_defer(io_end);
+        ext4_add_complete_io(io_end);
 }
 /*
@@ -3133,7 +3129,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
        get_block_t *get_block_func = NULL;
        int dio_flags = 0;
        loff_t final_size = offset + count;
-        ext4_io_end_t *io_end = NULL;
        /* Use the old path for reads and writes beyond i_size. */
        if (rw != WRITE || final_size > inode->i_size)
@@ -3172,16 +3167,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
        iocb->private = NULL;
        ext4_inode_aio_set(inode, NULL);
        if (!is_sync_kiocb(iocb)) {
-                io_end = ext4_init_io_end(inode, GFP_NOFS);
+                ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS);
                if (!io_end) {
                        ret = -ENOMEM;
                        goto retake_lock;
                }
                io_end->flag |= EXT4_IO_END_DIRECT;
-                /*
+                iocb->private = io_end;
-                 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
-                 */
-                iocb->private = ext4_get_io_end(io_end);
                /*
                 * we save the io structure for current async direct
                 * IO, so that later ext4_map_blocks() could flag the
@@ -3205,27 +3197,26 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                                   NULL,
                                   dio_flags);
+        if (iocb->private)
+                ext4_inode_aio_set(inode, NULL);
        /*
-         * Put our reference to io_end. This can free the io_end structure e.g.
+         * The io_end structure takes a reference to the inode, that
-         * in sync IO case or in case of error. It can even perform extent
+         * structure needs to be destroyed and the reference to the
-         * conversion if all bios we submitted finished before we got here.
+         * inode need to be dropped, when IO is complete, even with 0
-         * Note that in that case iocb->private can be already set to NULL
+         * byte write, or failed.
-         * here.
+         *
+         * In the successful AIO DIO case, the io_end structure will
+         * be destroyed and the reference to the inode will be dropped
+         * after the end_io call back function is called.
+         *
+         * In the case there is 0 byte write, or error case, since VFS
+         * direct IO won't invoke the end_io call back function, we
+         * need to free the end_io structure here.
         */
-        if (io_end) {
+        if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
-                ext4_inode_aio_set(inode, NULL);
+                ext4_free_io_end(iocb->private);
-                ext4_put_io_end(io_end);
+                iocb->private = NULL;
-                /*
+        } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
-                 * In case of error or no write ext4_end_io_dio() was not
-                 * called so we have to put iocb's reference.
-                 */
-                if (ret <= 0 && ret != -EIOCBQUEUED) {
-                        WARN_ON(iocb->private != io_end);
-                        ext4_put_io_end(io_end);
-                        iocb->private = NULL;
-                }
-        }
-        if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
                                                EXT4_STATE_DIO_UNWRITTEN)) {
                int err;
                /*
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b1ed9e07434b..def84082a9a9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2105,7 +2105,11 @@ repeat:
                group = ac->ac_g_ex.fe_group;
                for (i = 0; i < ngroups; group++, i++) {
-                        if (group == ngroups)
+                        /*
+                         * Artificially restricted ngroups for non-extent
+                         * files makes group > ngroups possible on first loop.
+                         */
+                        if (group >= ngroups)
                                group = 0;
                        /* This now checks without needing the buddy page */
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 19599bded62a..4acf1f78881b 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -62,28 +62,15 @@ void ext4_ioend_shutdown(struct inode *inode)
                cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
 }
-static void ext4_release_io_end(ext4_io_end_t *io_end)
+void ext4_free_io_end(ext4_io_end_t *io)
 {
-        BUG_ON(!list_empty(&io_end->list));
+        BUG_ON(!io);
-        BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
+        BUG_ON(!list_empty(&io->list));
+        BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
-        if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
-                wake_up_all(ext4_ioend_wq(io_end->inode));
-        if (io_end->flag & EXT4_IO_END_DIRECT)
-                inode_dio_done(io_end->inode);
-        if (io_end->iocb)
-                aio_complete(io_end->iocb, io_end->result, 0);
-        kmem_cache_free(io_end_cachep, io_end);
-}
-static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
-{
-        struct inode *inode = io_end->inode;
-        io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
+        if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
-        /* Wake up anyone waiting on unwritten extent conversion */
+                wake_up_all(ext4_ioend_wq(io->inode));
-        if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
+        kmem_cache_free(io_end_cachep, io);
-                wake_up_all(ext4_ioend_wq(inode));
 }
 /* check a range of space and convert unwritten extents to written. */
@@ -106,8 +93,13 @@ static int ext4_end_io(ext4_io_end_t *io)
                         "(inode %lu, offset %llu, size %zd, error %d)",
                         inode->i_ino, offset, size, ret);
        }
-        ext4_clear_io_unwritten_flag(io);
+        /* Wake up anyone waiting on unwritten extent conversion */
-        ext4_release_io_end(io);
+        if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
+                wake_up_all(ext4_ioend_wq(inode));
+        if (io->flag & EXT4_IO_END_DIRECT)
+                inode_dio_done(inode);
+        if (io->iocb)
+                aio_complete(io->iocb, io->result, 0);
        return ret;
 }
@@ -138,7 +130,7 @@ static void dump_completed_IO(struct inode *inode)
 }
 /* Add the io_end to per-inode completed end_io list. */
-static void ext4_add_complete_io(ext4_io_end_t *io_end)
+void ext4_add_complete_io(ext4_io_end_t *io_end)
 {
        struct ext4_inode_info *ei = EXT4_I(io_end->inode);
        struct workqueue_struct *wq;
@@ -175,6 +167,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode)
                err = ext4_end_io(io);
                if (unlikely(!ret && err))
                        ret = err;
+                io->flag &= ~EXT4_IO_END_UNWRITTEN;
+                ext4_free_io_end(io);
        }
        return ret;
 }
@@ -206,43 +200,10 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
                atomic_inc(&EXT4_I(inode)->i_ioend_count);
                io->inode = inode;
                INIT_LIST_HEAD(&io->list);
-                atomic_set(&io->count, 1);
        }
        return io;
 }
-void ext4_put_io_end_defer(ext4_io_end_t *io_end)
-{
-        if (atomic_dec_and_test(&io_end->count)) {
-                if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
-                        ext4_release_io_end(io_end);
-                        return;
-                }
-                ext4_add_complete_io(io_end);
-        }
-}
-int ext4_put_io_end(ext4_io_end_t *io_end)
-{
-        int err = 0;
-        if (atomic_dec_and_test(&io_end->count)) {
-                if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
-                        err = ext4_convert_unwritten_extents(io_end->inode,
-                                                io_end->offset, io_end->size);
-                        ext4_clear_io_unwritten_flag(io_end);
-                }
-                ext4_release_io_end(io_end);
-        }
-        return err;
-}
-ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
-{
-        atomic_inc(&io_end->count);
-        return io_end;
-}
 /*
 * Print an buffer I/O error compatible with the fs/buffer.c.  This
 * provides compatibility with dmesg scrapers that look for a specific
@@ -325,7 +286,12 @@ static void ext4_end_bio(struct bio *bio, int error)
                             bi_sector >> (inode->i_blkbits - 9));
        }
-        ext4_put_io_end_defer(io_end);
+        if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+                ext4_free_io_end(io_end);
+                return;
+        }
+        ext4_add_complete_io(io_end);
 }
 void ext4_io_submit(struct ext4_io_submit *io)
@@ -339,37 +305,40 @@ void ext4_io_submit(struct ext4_io_submit *io)
                bio_put(io->io_bio);
        }
        io->io_bio = NULL;
-}
+        io->io_op = 0;
-void ext4_io_submit_init(struct ext4_io_submit *io,
-                         struct writeback_control *wbc)
-{
-        io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
-        io->io_bio = NULL;
        io->io_end = NULL;
 }
-static int io_submit_init_bio(struct ext4_io_submit *io,
+static int io_submit_init(struct ext4_io_submit *io,
-                              struct buffer_head *bh)
+                          struct inode *inode,
+                          struct writeback_control *wbc,
+                          struct buffer_head *bh)
 {
+        ext4_io_end_t *io_end;
+        struct page *page = bh->b_page;
        int nvecs = bio_get_nr_vecs(bh->b_bdev);
        struct bio *bio;
+        io_end = ext4_init_io_end(inode, GFP_NOFS);
+        if (!io_end)
+                return -ENOMEM;
        bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
        bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
        bio->bi_bdev = bh->b_bdev;
+        bio->bi_private = io->io_end = io_end;
        bio->bi_end_io = ext4_end_bio;
-        bio->bi_private = ext4_get_io_end(io->io_end);
-        if (!io->io_end->size)
+        io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
-                io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT)
-                                     + bh_offset(bh);
        io->io_bio = bio;
+        io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
        io->io_next_block = bh->b_blocknr;
        return 0;
 }
 static int io_submit_add_bh(struct ext4_io_submit *io,
                            struct inode *inode,
+                            struct writeback_control *wbc,
                            struct buffer_head *bh)
 {
        ext4_io_end_t *io_end;
@@ -380,18 +349,18 @@ submit_and_retry:
                ext4_io_submit(io);
        }
        if (io->io_bio == NULL) {
-                ret = io_submit_init_bio(io, bh);
+                ret = io_submit_init(io, inode, wbc, bh);
                if (ret)
                        return ret;
        }
-        ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
-        if (ret != bh->b_size)
-                goto submit_and_retry;
        io_end = io->io_end;
        if (test_clear_buffer_uninit(bh))
                ext4_set_io_unwritten_flag(inode, io_end);
-        io_end->size += bh->b_size;
+        io->io_end->size += bh->b_size;
        io->io_next_block++;
+        ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
+        if (ret != bh->b_size)
+                goto submit_and_retry;
        return 0;
 }
@@ -463,7 +432,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
        do {
                if (!buffer_async_write(bh))
                        continue;
-                ret = io_submit_add_bh(io, inode, bh);
+                ret = io_submit_add_bh(io, inode, wbc, bh);
                if (ret) {
                        /*
                         * We only get here on ENOMEM.  Not much else
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index dfce656ddb33..5d4513cb1b3c 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1229,6 +1229,19 @@ static int fat_read_root(struct inode *inode)
        return 0;
 }
+static unsigned long calc_fat_clusters(struct super_block *sb)
+{
+        struct msdos_sb_info *sbi = MSDOS_SB(sb);
+        /* Divide first to avoid overflow */
+        if (sbi->fat_bits != 12) {
+                unsigned long ent_per_sec = sb->s_blocksize * 8 / sbi->fat_bits;
+                return ent_per_sec * sbi->fat_length;
+        }
+        return sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits;
+}
 /*
 * Read the super block of an MS-DOS FS.
 */
@@ -1434,7 +1447,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
                sbi->dirty = b->fat16.state & FAT_STATE_DIRTY;
        /* check that FAT table does not overflow */
-        fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits;
+        fat_clusters = calc_fat_clusters(sb);
        total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
        if (total_clusters > MAX_FAT(sb)) {
                if (!silent)
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index eb08c9e43c2a..5a376ab81feb 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -26,7 +26,7 @@ config GFS2_FS
 config GFS2_FS_LOCKING_DLM
        bool "GFS2 DLM locking"
        depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
-                HOTPLUG && DLM && CONFIGFS_FS && SYSFS
+                HOTPLUG && CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS)
        help
          Multiple node locking module for GFS2
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index c5fa758fd844..68b4c8f1fce8 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -212,7 +212,7 @@ static void gfs2_end_log_write(struct bio *bio, int error)
                fs_err(sdp, "Error %d writing to log\n", error);
        }
-        bio_for_each_segment(bvec, bio, i) {
+        bio_for_each_segment_all(bvec, bio, i) {
                page = bvec->bv_page;
                if (page_has_buffers(page))
                        gfs2_end_log_write_bh(sdp, bvec, error);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c7c840e916f8..c253b13722e8 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -121,7 +121,7 @@ static u64 qd2index(struct gfs2_quota_data *qd)
 {
        struct kqid qid = qd->qd_id;
        return (2 * (u64)from_kqid(&init_user_ns, qid)) +
-                (qid.type == USRQUOTA) ? 0 : 1;
+                ((qid.type == USRQUOTA) ? 0 : 1);
 }
 static u64 qd2offset(struct gfs2_quota_data *qd)
@@ -721,7 +721,7 @@ get_a_page:
                        goto unlock_out;
        }
-        gfs2_trans_add_meta(ip->i_gl, bh);
+        gfs2_trans_add_data(ip->i_gl, bh);
        kaddr = kmap_atomic(page);
        if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 0c5a575b513e..5232525934ae 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1401,9 +1401,14 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
        u32 extlen;
        u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
        int ret;
+        struct inode *inode = &ip->i_inode;
-        extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested);
+        if (S_ISDIR(inode->i_mode))
-        extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
+                extlen = 1;
+        else {
+                extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested);
+                extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
+        }
        if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
                return;
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index f3b1a15ccd59..d3fa6bd9503e 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -415,7 +415,11 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
        spin_lock(&tree->hash_lock);
        node = hfs_bnode_findhash(tree, num);
        spin_unlock(&tree->hash_lock);
-        BUG_ON(node);
+        if (node) {
+                pr_crit("new node %u already hashed?\n", num);
+                WARN_ON(1);
+                return node;
+        }
        node = __hfs_bnode_create(tree, num);
        if (!node)
                return ERR_PTR(-ENOMEM);
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 546f6d39713a..834ac13c04b7 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -33,25 +33,27 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
        if (whence == SEEK_DATA || whence == SEEK_HOLE)
                return -EINVAL;
+        mutex_lock(&i->i_mutex);
        hpfs_lock(s);
        /*printk("dir lseek\n");*/
        if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok;
-        mutex_lock(&i->i_mutex);
        pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1;
        while (pos != new_off) {
                if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh);
                else goto fail;
                if (pos == 12) goto fail;
        }
-        mutex_unlock(&i->i_mutex);
+        hpfs_add_pos(i, &filp->f_pos);
 ok:
+        filp->f_pos = new_off;
        hpfs_unlock(s);
-        return filp->f_pos = new_off;
-fail:
        mutex_unlock(&i->i_mutex);
+        return new_off;
+fail:
        /*printk("illegal lseek: %016llx\n", new_off);*/
        hpfs_unlock(s);
+        mutex_unlock(&i->i_mutex);
        return -ESPIPE;
 }
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index a13d26ede254..0bc27684ebfa 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -414,7 +414,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
        spin_lock(&tbl->slot_tbl_lock);
        /* state manager is resetting the session */
-        if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
+        if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
                spin_unlock(&tbl->slot_tbl_lock);
                status = htonl(NFS4ERR_DELAY);
                /* Return NFS4ERR_BADSESSION if we're draining the session
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 59461c957d9d..a35582c9d444 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -763,7 +763,7 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
         * A single slot, so highest used slotid is either 0 or -1
         */
        tbl->highest_used_slotid = NFS4_NO_SLOT;
-        nfs4_session_drain_complete(session, tbl);
+        nfs4_slot_tbl_drain_complete(tbl);
        spin_unlock(&tbl->slot_tbl_lock);
 }
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 947b0c908aa9..4cbad5d6b276 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -203,7 +203,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
        __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
        error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I);
        if (error == -EINVAL)
-                error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_NULL);
+                error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
        if (error < 0)
                goto error;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8fbc10054115..d7ba5616989c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -572,7 +572,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
        task->tk_timeout = 0;
        spin_lock(&tbl->slot_tbl_lock);
-        if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
+        if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state) &&
            !args->sa_privileged) {
                /* The state manager will wait until the slot table is empty */
                dprintk("%s session is draining\n", __func__);
@@ -1078,7 +1078,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
        struct nfs4_state *state = opendata->state;
        struct nfs_inode *nfsi = NFS_I(state->inode);
        struct nfs_delegation *delegation;
-        int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC);
+        int open_mode = opendata->o_arg.open_flags;
        fmode_t fmode = opendata->o_arg.fmode;
        nfs4_stateid stateid;
        int ret = -EAGAIN;
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index ebda5f4a031b..c4e225e4a9af 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -73,7 +73,7 @@ void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot)
                        tbl->highest_used_slotid = new_max;
                else {
                        tbl->highest_used_slotid = NFS4_NO_SLOT;
-                        nfs4_session_drain_complete(tbl->session, tbl);
+                        nfs4_slot_tbl_drain_complete(tbl);
                }
        }
        dprintk("%s: slotid %u highest_used_slotid %d\n", __func__,
@@ -226,7 +226,7 @@ static bool nfs41_assign_slot(struct rpc_task *task, void *pslot)
        struct nfs4_slot *slot = pslot;
        struct nfs4_slot_table *tbl = slot->table;
-        if (nfs4_session_draining(tbl->session) && !args->sa_privileged)
+        if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
                return false;
        slot->generation = tbl->generation;
        args->sa_slot = slot;
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 6f3cb39386d4..ff7d9f0f8a65 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -25,6 +25,10 @@ struct nfs4_slot {
 };
 /* Sessions */
+enum nfs4_slot_tbl_state {
+        NFS4_SLOT_TBL_DRAINING,
+};
 #define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
 struct nfs4_slot_table {
        struct nfs4_session *session;           /* Parent session */
@@ -43,6 +47,7 @@ struct nfs4_slot_table {
        unsigned long   generation;             /* Generation counter for
                                                   target_highest_slotid */
        struct completion complete;
+        unsigned long   slot_tbl_state;
 };
 /*
@@ -68,7 +73,6 @@ struct nfs4_session {
 enum nfs4_session_state {
        NFS4_SESSION_INITING,
-        NFS4_SESSION_DRAINING,
 };
 #if defined(CONFIG_NFS_V4_1)
@@ -88,12 +92,11 @@ extern void nfs4_destroy_session(struct nfs4_session *session);
 extern int nfs4_init_session(struct nfs_server *server);
 extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
-extern void nfs4_session_drain_complete(struct nfs4_session *session,
+extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
-                struct nfs4_slot_table *tbl);
-static inline bool nfs4_session_draining(struct nfs4_session *session)
+static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl)
 {
-        return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state);
+        return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
 }
 bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 300d17d85c0e..1fab140764c4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -241,7 +241,7 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
        if (ses == NULL)
                return;
        tbl = &ses->fc_slot_table;
-        if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
+        if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
                spin_lock(&tbl->slot_tbl_lock);
                nfs41_wake_slot_table(tbl);
                spin_unlock(&tbl->slot_tbl_lock);
@@ -251,15 +251,15 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
 /*
 * Signal state manager thread if session fore channel is drained
 */
-void nfs4_session_drain_complete(struct nfs4_session *session,
+void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl)
-                struct nfs4_slot_table *tbl)
 {
-        if (nfs4_session_draining(session))
+        if (nfs4_slot_tbl_draining(tbl))
                complete(&tbl->complete);
 }
-static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl)
+static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl)
 {
+        set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
        spin_lock(&tbl->slot_tbl_lock);
        if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
                INIT_COMPLETION(tbl->complete);
@@ -275,13 +275,12 @@ static int nfs4_begin_drain_session(struct nfs_client *clp)
        struct nfs4_session *ses = clp->cl_session;
        int ret = 0;
-        set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
        /* back channel */
-        ret = nfs4_wait_on_slot_tbl(&ses->bc_slot_table);
+        ret = nfs4_drain_slot_tbl(&ses->bc_slot_table);
        if (ret)
                return ret;
        /* fore channel */
-        return nfs4_wait_on_slot_tbl(&ses->fc_slot_table);
+        return nfs4_drain_slot_tbl(&ses->fc_slot_table);
 }
 static void nfs41_finish_session_reset(struct nfs_client *clp)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a366107a7331..2d7525fbcf25 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1942,6 +1942,7 @@ static int nfs23_validate_mount_data(void *options,
                args->namlen            = data->namlen;
                args->bsize             = data->bsize;
+                args->auth_flavors[0] = RPC_AUTH_UNIX;
                if (data->flags & NFS_MOUNT_SECFLAVOUR)
                        args->auth_flavors[0] = data->pseudoflavor;
                if (!args->nfs_server.hostname)
@@ -2637,6 +2638,7 @@ static int nfs4_validate_mount_data(void *options,
                        goto out_no_address;
                args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
+                args->auth_flavors[0] = RPC_AUTH_UNIX;
                if (data->auth_flavourlen) {
                        if (data->auth_flavourlen > 1)
                                goto out_inval_auth;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 689fb608648e..bccfec8343c5 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -219,13 +219,32 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
 static int nilfs_set_page_dirty(struct page *page)
 {
-        int ret = __set_page_dirty_buffers(page);
+        int ret = __set_page_dirty_nobuffers(page);
-        if (ret) {
+        if (page_has_buffers(page)) {
                struct inode *inode = page->mapping->host;
-                unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits);
+                unsigned nr_dirty = 0;
+                struct buffer_head *bh, *head;
-                nilfs_set_file_dirty(inode, nr_dirty);
+                /*
+                 * This page is locked by callers, and no other thread
+                 * concurrently marks its buffers dirty since they are
+                 * only dirtied through routines in fs/buffer.c in
+                 * which call sites of mark_buffer_dirty are protected
+                 * by page lock.
+                 */
+                bh = head = page_buffers(page);
+                do {
+                        /* Do not mark hole blocks dirty */
+                        if (buffer_dirty(bh) || !buffer_mapped(bh))
+                                continue;
+                        set_buffer_dirty(bh);
+                        nr_dirty++;
+                } while (bh = bh->b_this_page, bh != head);
+                if (nr_dirty)
+                        nilfs_set_file_dirty(inode, nr_dirty);
        }
        return ret;
 }
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 1c39efb71bab..2487116d0d33 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -790,7 +790,7 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                                                 &hole_size, &rec, &is_last);
                if (ret) {
                        mlog_errno(ret);
-                        goto out;
+                        goto out_unlock;
                }
                if (rec.e_blkno == 0ULL) {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8a7509f9e6f5..ff54014a24ec 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2288,7 +2288,7 @@ relock:
                ret = ocfs2_inode_lock(inode, NULL, 1);
                if (ret < 0) {
                        mlog_errno(ret);
-                        goto out_sems;
+                        goto out;
                }
                ocfs2_inode_unlock(inode, 1);
diff --git a/fs/pnode.c b/fs/pnode.c
index 3d2a7141b87a..9af0df15256e 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -83,7 +83,8 @@ static int do_make_slave(struct mount *mnt)
                if (peer_mnt == mnt)
                        peer_mnt = NULL;
        }
-        if (IS_MNT_SHARED(mnt) && list_empty(&mnt->mnt_share))
+        if (mnt->mnt_group_id && IS_MNT_SHARED(mnt) &&
+            list_empty(&mnt->mnt_share))
                mnt_release_group_id(mnt);
        list_del_init(&mnt->mnt_share);
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index 8798d065e400..afa6be6fc397 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -120,7 +120,7 @@ static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir)
        struct inode *inode = file_inode(filp);
        struct super_block *s = inode->i_sb;
        struct qnx6_sb_info *sbi = QNX6_SB(s);
-        loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1);
+        loff_t pos = filp->f_pos & ~(QNX6_DIR_ENTRY_SIZE - 1);
        unsigned long npages = dir_pages(inode);
        unsigned long n = pos >> PAGE_CACHE_SHIFT;
        unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE;
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 66c53b642a88..6c2d136561cb 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -204,6 +204,8 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
                                next_pos = deh_offset(deh) + 1;
                                if (item_moved(&tmp_ih, &path_to_entry)) {
+                                        set_cpu_key_k_offset(&pos_key,
+                                                             next_pos);
                                        goto research;
                                }
                        }       /* for */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 77d6d47abc83..f844533792ee 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1811,11 +1811,16 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
                                  TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
        memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
        args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
-        if (insert_inode_locked4(inode, args.objectid,
-                             reiserfs_find_actor, &args) < 0) {
+        reiserfs_write_unlock(inode->i_sb);
+        err = insert_inode_locked4(inode, args.objectid,
+                             reiserfs_find_actor, &args);
+        reiserfs_write_lock(inode->i_sb);
+        if (err) {
                err = -EINVAL;
                goto out_bad_inode;
        }
        if (old_format_only(sb))
                /* not a perfect generation count, as object ids can be reused, but
                 ** this is as good as reiserfs can do right now.
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 4cce1d9552fb..821bcf70e467 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -318,7 +318,19 @@ static int delete_one_xattr(struct dentry *dentry, void *data)
 static int chown_one_xattr(struct dentry *dentry, void *data)
 {
        struct iattr *attrs = data;
-        return reiserfs_setattr(dentry, attrs);
+        int ia_valid = attrs->ia_valid;
+        int err;
+        /*
+         * We only want the ownership bits. Otherwise, we'll do
+         * things like change a directory to a regular file if
+         * ATTR_MODE is set.
+         */
+        attrs->ia_valid &= (ATTR_UID|ATTR_GID);
+        err = reiserfs_setattr(dentry, attrs);
+        attrs->ia_valid = ia_valid;
+        return err;
 }
 /* No i_mutex, but the inode is unconnected. */
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index d7c01ef64eda..6c8767fdfc6a 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -443,6 +443,9 @@ int reiserfs_acl_chmod(struct inode *inode)
        int depth;
        int error;
+        if (IS_PRIVATE(inode))
+                return 0;
        if (S_ISLNK(inode->i_mode))
                return -EOPNOTSUPP;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 2b2691b73428..41a695048be7 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -725,6 +725,25 @@ xfs_convert_page(
                        (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
                        i_size_read(inode));
+        /*
+         * If the current map does not span the entire page we are about to try
+         * to write, then give up. The only way we can write a page that spans
+         * multiple mappings in a single writeback iteration is via the
+         * xfs_vm_writepage() function. Data integrity writeback requires the
+         * entire page to be written in a single attempt, otherwise the part of
+         * the page we don't write here doesn't get written as part of the data
+         * integrity sync.
+         *
+         * For normal writeback, we also don't attempt to write partial pages
+         * here as it simply means that write_cache_pages() will see it under
+         * writeback and ignore the page until some point in the future, at
+         * which time this will be the only page in the file that needs
+         * writeback.  Hence for more optimal IO patterns, we should always
+         * avoid partial page writeback due to multiple mappings on a page here.
+         */
+        if (!xfs_imap_valid(inode, imap, end_offset))
+                goto fail_unlock_page;
        len = 1 << inode->i_blkbits;
        p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
                                        PAGE_CACHE_SIZE);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 08d5457c948e..d788302e506a 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -931,20 +931,22 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 */
 int
 xfs_attr_shortform_allfit(
-        struct xfs_buf  *bp,
+        struct xfs_buf          *bp,
-        struct xfs_inode *dp)
+        struct xfs_inode        *dp)
 {
-        xfs_attr_leafblock_t *leaf;
+        struct xfs_attr_leafblock *leaf;
-        xfs_attr_leaf_entry_t *entry;
+        struct xfs_attr_leaf_entry *entry;
        xfs_attr_leaf_name_local_t *name_loc;
-        int bytes, i;
+        struct xfs_attr3_icleaf_hdr leafhdr;
+        int                     bytes;
+        int                     i;
        leaf = bp->b_addr;
-        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+        xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
+        entry = xfs_attr3_leaf_entryp(leaf);
-        entry = &leaf->entries[0];
        bytes = sizeof(struct xfs_attr_sf_hdr);
-        for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
+        for (i = 0; i < leafhdr.count; entry++, i++) {
                if (entry->flags & XFS_ATTR_INCOMPLETE)
                        continue;               /* don't copy partial entries */
                if (!(entry->flags & XFS_ATTR_LOCAL))
@@ -954,15 +956,15 @@ xfs_attr_shortform_allfit(
                        return(0);
                if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX)
                        return(0);
-                bytes += sizeof(struct xfs_attr_sf_entry)-1
+                bytes += sizeof(struct xfs_attr_sf_entry) - 1
                                + name_loc->namelen
                                + be16_to_cpu(name_loc->valuelen);
        }
        if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
            (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
            (bytes == sizeof(struct xfs_attr_sf_hdr)))
-                return(-1);
+                return -1;
-        return(xfs_attr_shortform_bytesfit(dp, bytes));
+        return xfs_attr_shortform_bytesfit(dp, bytes);
 }
 /*
@@ -1410,7 +1412,7 @@ xfs_attr3_leaf_add_work(
                name_rmt->valuelen = 0;
                name_rmt->valueblk = 0;
                args->rmtblkno = 1;
-                args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen);
+                args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
        }
        xfs_trans_log_buf(args->trans, bp,
             XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
@@ -1443,11 +1445,12 @@ xfs_attr3_leaf_add_work(
 STATIC void
 xfs_attr3_leaf_compact(
        struct xfs_da_args      *args,
-        struct xfs_attr3_icleaf_hdr *ichdr_d,
+        struct xfs_attr3_icleaf_hdr *ichdr_dst,
        struct xfs_buf          *bp)
 {
-        xfs_attr_leafblock_t    *leaf_s, *leaf_d;
+        struct xfs_attr_leafblock *leaf_src;
-        struct xfs_attr3_icleaf_hdr ichdr_s;
+        struct xfs_attr_leafblock *leaf_dst;
+        struct xfs_attr3_icleaf_hdr ichdr_src;
        struct xfs_trans        *trans = args->trans;
        struct xfs_mount        *mp = trans->t_mountp;
        char                    *tmpbuffer;
@@ -1455,29 +1458,38 @@ xfs_attr3_leaf_compact(
        trace_xfs_attr_leaf_compact(args);
        tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
-        ASSERT(tmpbuffer != NULL);
        memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp));
        memset(bp->b_addr, 0, XFS_LBSIZE(mp));
+        leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
+        leaf_dst = bp->b_addr;
        /*
-         * Copy basic information
+         * Copy the on-disk header back into the destination buffer to ensure
+         * all the information in the header that is not part of the incore
+         * header structure is preserved.
         */
-        leaf_s = (xfs_attr_leafblock_t *)tmpbuffer;
+        memcpy(bp->b_addr, tmpbuffer, xfs_attr3_leaf_hdr_size(leaf_src));
-        leaf_d = bp->b_addr;
-        ichdr_s = *ichdr_d;     /* struct copy */
+        /* Initialise the incore headers */
-        ichdr_d->firstused = XFS_LBSIZE(mp);
+        ichdr_src = *ichdr_dst; /* struct copy */
-        ichdr_d->usedbytes = 0;
+        ichdr_dst->firstused = XFS_LBSIZE(mp);
-        ichdr_d->count = 0;
+        ichdr_dst->usedbytes = 0;
-        ichdr_d->holes = 0;
+        ichdr_dst->count = 0;
-        ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_s);
+        ichdr_dst->holes = 0;
-        ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base;
+        ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src);
+        ichdr_dst->freemap[0].size = ichdr_dst->firstused -
+                                                ichdr_dst->freemap[0].base;
+        /* write the header back to initialise the underlying buffer */
+        xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst);
        /*
         * Copy all entry's in the same (sorted) order,
         * but allocate name/value pairs packed and in sequence.
         */
-        xfs_attr3_leaf_moveents(leaf_s, &ichdr_s, 0, leaf_d, ichdr_d, 0,
+        xfs_attr3_leaf_moveents(leaf_src, &ichdr_src, 0, leaf_dst, ichdr_dst, 0,
-                                ichdr_s.count, mp);
+                                ichdr_src.count, mp);
        /*
         * this logs the entire buffer, but the caller must write the header
         * back to the buffer when it is finished modifying it.
@@ -2179,14 +2191,24 @@ xfs_attr3_leaf_unbalance(
                struct xfs_attr_leafblock *tmp_leaf;
                struct xfs_attr3_icleaf_hdr tmphdr;
-                tmp_leaf = kmem_alloc(state->blocksize, KM_SLEEP);
+                tmp_leaf = kmem_zalloc(state->blocksize, KM_SLEEP);
-                memset(tmp_leaf, 0, state->blocksize);
-                memset(&tmphdr, 0, sizeof(tmphdr));
+                /*
+                 * Copy the header into the temp leaf so that all the stuff
+                 * not in the incore header is present and gets copied back in
+                 * once we've moved all the entries.
+                 */
+                memcpy(tmp_leaf, save_leaf, xfs_attr3_leaf_hdr_size(save_leaf));
+                memset(&tmphdr, 0, sizeof(tmphdr));
                tmphdr.magic = savehdr.magic;
                tmphdr.forw = savehdr.forw;
                tmphdr.back = savehdr.back;
                tmphdr.firstused = state->blocksize;
+                /* write the header to the temp buffer to initialise it */
+                xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr);
                if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
                                         drop_blk->bp, &drophdr)) {
                        xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
@@ -2330,9 +2352,11 @@ xfs_attr3_leaf_lookup_int(
                        if (!xfs_attr_namesp_match(args->flags, entry->flags))
                                continue;
                        args->index = probe;
+                        args->valuelen = be32_to_cpu(name_rmt->valuelen);
                        args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
-                        args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount,
+                        args->rmtblkcnt = xfs_attr3_rmt_blocks(
-                                                   be32_to_cpu(name_rmt->valuelen));
+                                                        args->dp->i_mount,
+                                                        args->valuelen);
                        return XFS_ERROR(EEXIST);
                }
        }
@@ -2383,7 +2407,8 @@ xfs_attr3_leaf_getvalue(
                ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
                valuelen = be32_to_cpu(name_rmt->valuelen);
                args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
-                args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen);
+                args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
+                                                       valuelen);
                if (args->flags & ATTR_KERNOVAL) {
                        args->valuelen = valuelen;
                        return 0;
@@ -2709,7 +2734,8 @@ xfs_attr3_leaf_list_int(
                                args.valuelen = valuelen;
                                args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
                                args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
-                                args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen);
+                                args.rmtblkcnt = xfs_attr3_rmt_blocks(
+                                                        args.dp->i_mount, valuelen);
                                retval = xfs_attr_rmtval_get(&args);
                                if (retval)
                                        return retval;
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index dee84466dcc9..ef6b0c124528 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -47,22 +47,55 @@
 * Each contiguous block has a header, so it is not just a simple attribute
 * length to FSB conversion.
 */
-static int
+int
 xfs_attr3_rmt_blocks(
        struct xfs_mount *mp,
        int             attrlen)
 {
-        int             buflen = XFS_ATTR3_RMT_BUF_SPACE(mp,
+        if (xfs_sb_version_hascrc(&mp->m_sb)) {
-                                                         mp->m_sb.sb_blocksize);
+                int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
-        return (attrlen + buflen - 1) / buflen;
+                return (attrlen + buflen - 1) / buflen;
+        }
+        return XFS_B_TO_FSB(mp, attrlen);
+}
+/*
+ * Checking of the remote attribute header is split into two parts. The verifier
+ * does CRC, location and bounds checking, the unpacking function checks the
+ * attribute parameters and owner.
+ */
+static bool
+xfs_attr3_rmt_hdr_ok(
+        struct xfs_mount        *mp,
+        void                    *ptr,
+        xfs_ino_t               ino,
+        uint32_t                offset,
+        uint32_t                size,
+        xfs_daddr_t             bno)
+{
+        struct xfs_attr3_rmt_hdr *rmt = ptr;
+        if (bno != be64_to_cpu(rmt->rm_blkno))
+                return false;
+        if (offset != be32_to_cpu(rmt->rm_offset))
+                return false;
+        if (size != be32_to_cpu(rmt->rm_bytes))
+                return false;
+        if (ino != be64_to_cpu(rmt->rm_owner))
+                return false;
+        /* ok */
+        return true;
 }
 static bool
 xfs_attr3_rmt_verify(
-        struct xfs_buf          *bp)
+        struct xfs_mount        *mp,
+        void                    *ptr,
+        int                     fsbsize,
+        xfs_daddr_t             bno)
 {
-        struct xfs_mount        *mp = bp->b_target->bt_mount;
+        struct xfs_attr3_rmt_hdr *rmt = ptr;
-        struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return false;
@@ -70,7 +103,9 @@ xfs_attr3_rmt_verify(
                return false;
        if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
                return false;
-        if (bp->b_bn != be64_to_cpu(rmt->rm_blkno))
+        if (be64_to_cpu(rmt->rm_blkno) != bno)
+                return false;
+        if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
                return false;
        if (be32_to_cpu(rmt->rm_offset) +
                                be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX)
@@ -86,17 +121,40 @@ xfs_attr3_rmt_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
+        char            *ptr;
+        int             len;
+        bool            corrupt = false;
+        xfs_daddr_t     bno;
        /* no verification of non-crc buffers */
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return;
-        if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+        ptr = bp->b_addr;
-                              XFS_ATTR3_RMT_CRC_OFF) ||
+        bno = bp->b_bn;
-            !xfs_attr3_rmt_verify(bp)) {
+        len = BBTOB(bp->b_length);
+        ASSERT(len >= XFS_LBSIZE(mp));
+        while (len > 0) {
+                if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
+                                      XFS_ATTR3_RMT_CRC_OFF)) {
+                        corrupt = true;
+                        break;
+                }
+                if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
+                        corrupt = true;
+                        break;
+                }
+                len -= XFS_LBSIZE(mp);
+                ptr += XFS_LBSIZE(mp);
+                bno += mp->m_bsize;
+        }
+        if (corrupt) {
                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
+        } else
+                ASSERT(len == 0);
 }
 static void
@@ -105,23 +163,39 @@ xfs_attr3_rmt_write_verify(
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
        struct xfs_buf_log_item *bip = bp->b_fspriv;
+        char            *ptr;
+        int             len;
+        xfs_daddr_t     bno;
        /* no verification of non-crc buffers */
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return;
-        if (!xfs_attr3_rmt_verify(bp)) {
+        ptr = bp->b_addr;
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+        bno = bp->b_bn;
-                xfs_buf_ioerror(bp, EFSCORRUPTED);
+        len = BBTOB(bp->b_length);
-                return;
+        ASSERT(len >= XFS_LBSIZE(mp));
-        }
+        while (len > 0) {
+                if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
+                        XFS_CORRUPTION_ERROR(__func__,
+                                            XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+                        xfs_buf_ioerror(bp, EFSCORRUPTED);
+                        return;
+                }
+                if (bip) {
+                        struct xfs_attr3_rmt_hdr *rmt;
+                        rmt = (struct xfs_attr3_rmt_hdr *)ptr;
+                        rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+                }
+                xfs_update_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF);
-        if (bip) {
+                len -= XFS_LBSIZE(mp);
-                struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
+                ptr += XFS_LBSIZE(mp);
-                rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+                bno += mp->m_bsize;
        }
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+        ASSERT(len == 0);
-                         XFS_ATTR3_RMT_CRC_OFF);
 }
 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
@@ -129,15 +203,16 @@ const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
        .verify_write = xfs_attr3_rmt_write_verify,
 };
-static int
+STATIC int
 xfs_attr3_rmt_hdr_set(
        struct xfs_mount        *mp,
+        void                    *ptr,
        xfs_ino_t               ino,
        uint32_t                offset,
        uint32_t                size,
-        struct xfs_buf          *bp)
+        xfs_daddr_t             bno)
 {
-        struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
+        struct xfs_attr3_rmt_hdr *rmt = ptr;
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return 0;
@@ -147,36 +222,107 @@ xfs_attr3_rmt_hdr_set(
        rmt->rm_bytes = cpu_to_be32(size);
        uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
        rmt->rm_owner = cpu_to_be64(ino);
-        rmt->rm_blkno = cpu_to_be64(bp->b_bn);
+        rmt->rm_blkno = cpu_to_be64(bno);
-        bp->b_ops = &xfs_attr3_rmt_buf_ops;
        return sizeof(struct xfs_attr3_rmt_hdr);
 }
 /*
- * Checking of the remote attribute header is split into two parts. the verifier
+ * Helper functions to copy attribute data in and out of the one disk extents
- * does CRC, location and bounds checking, the unpacking function checks the
- * attribute parameters and owner.
 */
-static bool
+STATIC int
-xfs_attr3_rmt_hdr_ok(
+xfs_attr_rmtval_copyout(
-        struct xfs_mount        *mp,
+        struct xfs_mount *mp,
-        xfs_ino_t               ino,
+        struct xfs_buf  *bp,
-        uint32_t                offset,
+        xfs_ino_t       ino,
-        uint32_t                size,
+        int             *offset,
-        struct xfs_buf          *bp)
+        int             *valuelen,
+        char            **dst)
 {
-        struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
+        char            *src = bp->b_addr;
+        xfs_daddr_t     bno = bp->b_bn;
+        int             len = BBTOB(bp->b_length);
-        if (offset != be32_to_cpu(rmt->rm_offset))
+        ASSERT(len >= XFS_LBSIZE(mp));
-                return false;
-        if (size != be32_to_cpu(rmt->rm_bytes))
-                return false;
-        if (ino != be64_to_cpu(rmt->rm_owner))
-                return false;
-        /* ok */
+        while (len > 0 && *valuelen > 0) {
-        return true;
+                int hdr_size = 0;
+                int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
+                byte_cnt = min_t(int, *valuelen, byte_cnt);
+                if (xfs_sb_version_hascrc(&mp->m_sb)) {
+                        if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset,
+                                                  byte_cnt, bno)) {
+                                xfs_alert(mp,
+"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
+                                        bno, *offset, byte_cnt, ino);
+                                return EFSCORRUPTED;
+                        }
+                        hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
+                }
+                memcpy(*dst, src + hdr_size, byte_cnt);
+                /* roll buffer forwards */
+                len -= XFS_LBSIZE(mp);
+                src += XFS_LBSIZE(mp);
+                bno += mp->m_bsize;
+                /* roll attribute data forwards */
+                *valuelen -= byte_cnt;
+                *dst += byte_cnt;
+                *offset += byte_cnt;
+        }
+        return 0;
+}
+STATIC void
+xfs_attr_rmtval_copyin(
+        struct xfs_mount *mp,
+        struct xfs_buf  *bp,
+        xfs_ino_t       ino,
+        int             *offset,
+        int             *valuelen,
+        char            **src)
+{
+        char            *dst = bp->b_addr;
+        xfs_daddr_t     bno = bp->b_bn;
+        int             len = BBTOB(bp->b_length);
+        ASSERT(len >= XFS_LBSIZE(mp));
+        while (len > 0 && *valuelen > 0) {
+                int hdr_size;
+                int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
+                byte_cnt = min(*valuelen, byte_cnt);
+                hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
+                                                 byte_cnt, bno);
+                memcpy(dst + hdr_size, *src, byte_cnt);
+                /*
+                 * If this is the last block, zero the remainder of it.
+                 * Check that we are actually the last block, too.
+                 */
+                if (byte_cnt + hdr_size < XFS_LBSIZE(mp)) {
+                        ASSERT(*valuelen - byte_cnt == 0);
+                        ASSERT(len == XFS_LBSIZE(mp));
+                        memset(dst + hdr_size + byte_cnt, 0,
+                                        XFS_LBSIZE(mp) - hdr_size - byte_cnt);
+                }
+                /* roll buffer forwards */
+                len -= XFS_LBSIZE(mp);
+                dst += XFS_LBSIZE(mp);
+                bno += mp->m_bsize;
+                /* roll attribute data forwards */
+                *valuelen -= byte_cnt;
+                *src += byte_cnt;
+                *offset += byte_cnt;
+        }
 }
 /*
@@ -190,13 +336,12 @@ xfs_attr_rmtval_get(
        struct xfs_bmbt_irec    map[ATTR_RMTVALUE_MAPSIZE];
        struct xfs_mount        *mp = args->dp->i_mount;
        struct xfs_buf          *bp;
-        xfs_daddr_t             dblkno;
        xfs_dablk_t             lblkno = args->rmtblkno;
-        void                    *dst = args->value;
+        char                    *dst = args->value;
        int                     valuelen = args->valuelen;
        int                     nmap;
        int                     error;
-        int                     blkcnt;
+        int                     blkcnt = args->rmtblkcnt;
        int                     i;
        int                     offset = 0;
@@ -207,52 +352,36 @@ xfs_attr_rmtval_get(
        while (valuelen > 0) {
                nmap = ATTR_RMTVALUE_MAPSIZE;
                error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
-                                       args->rmtblkcnt, map, &nmap,
+                                       blkcnt, map, &nmap,
                                       XFS_BMAPI_ATTRFORK);
                if (error)
                        return error;
                ASSERT(nmap >= 1);
                for (i = 0; (i < nmap) && (valuelen > 0); i++) {
-                        int     byte_cnt;
+                        xfs_daddr_t     dblkno;
-                        char    *src;
+                        int             dblkcnt;
                        ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
                               (map[i].br_startblock != HOLESTARTBLOCK));
                        dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
-                        blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
+                        dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
                        error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
-                                                   dblkno, blkcnt, 0, &bp,
+                                                   dblkno, dblkcnt, 0, &bp,
                                                   &xfs_attr3_rmt_buf_ops);
                        if (error)
                                return error;
-                        byte_cnt = min_t(int, valuelen, BBTOB(bp->b_length));
+                        error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
-                        byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
+                                                        &offset, &valuelen,
+                                                        &dst);
-                        src = bp->b_addr;
-                        if (xfs_sb_version_hascrc(&mp->m_sb)) {
-                                if (!xfs_attr3_rmt_hdr_ok(mp, args->dp->i_ino,
-                                                        offset, byte_cnt, bp)) {
-                                        xfs_alert(mp,
-"remote attribute header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
-                                                offset, byte_cnt, args->dp->i_ino);
-                                        xfs_buf_relse(bp);
-                                        return EFSCORRUPTED;
-                                }
-                                src += sizeof(struct xfs_attr3_rmt_hdr);
-                        }
-                        memcpy(dst, src, byte_cnt);
                        xfs_buf_relse(bp);
+                        if (error)
+                                return error;
-                        offset += byte_cnt;
+                        /* roll attribute extent map forwards */
-                        dst += byte_cnt;
-                        valuelen -= byte_cnt;
                        lblkno += map[i].br_blockcount;
+                        blkcnt -= map[i].br_blockcount;
                }
        }
        ASSERT(valuelen == 0);
@@ -270,17 +399,13 @@ xfs_attr_rmtval_set(
        struct xfs_inode        *dp = args->dp;
        struct xfs_mount        *mp = dp->i_mount;
        struct xfs_bmbt_irec    map;
-        struct xfs_buf          *bp;
-        xfs_daddr_t             dblkno;
        xfs_dablk_t             lblkno;
        xfs_fileoff_t           lfileoff = 0;
-        void                    *src = args->value;
+        char                    *src = args->value;
        int                     blkcnt;
        int                     valuelen;
        int                     nmap;
        int                     error;
-        int                     hdrcnt = 0;
-        bool                    crcs = xfs_sb_version_hascrc(&mp->m_sb);
        int                     offset = 0;
        trace_xfs_attr_rmtval_set(args);
@@ -289,24 +414,14 @@ xfs_attr_rmtval_set(
         * Find a "hole" in the attribute address space large enough for
         * us to drop the new attribute's value into. Because CRC enable
         * attributes have headers, we can't just do a straight byte to FSB
-         * conversion. We calculate the worst case block count in this case
+         * conversion and have to take the header space into account.
-         * and we may not need that many, so we have to handle this when
-         * allocating the blocks below. 
         */
-        if (!crcs)
+        blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
-                blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
-        else
-                blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
        error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
                                                   XFS_ATTR_FORK);
        if (error)
                return error;
-        /* Start with the attribute data. We'll allocate the rest afterwards. */
-        if (crcs)
-                blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
        args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
        args->rmtblkcnt = blkcnt;
@@ -349,26 +464,6 @@ xfs_attr_rmtval_set(
                       (map.br_startblock != HOLESTARTBLOCK));
                lblkno += map.br_blockcount;
                blkcnt -= map.br_blockcount;
-                hdrcnt++;
-                /*
-                 * If we have enough blocks for the attribute data, calculate
-                 * how many extra blocks we need for headers. We might run
-                 * through this multiple times in the case that the additional
-                 * headers in the blocks needed for the data fragments spills
-                 * into requiring more blocks. e.g. for 512 byte blocks, we'll
-                 * spill for another block every 9 headers we require in this
-                 * loop.
-                 */
-                if (crcs && blkcnt == 0) {
-                        int total_len;
-                        total_len = args->valuelen +
-                                    hdrcnt * sizeof(struct xfs_attr3_rmt_hdr);
-                        blkcnt = XFS_B_TO_FSB(mp, total_len);
-                        blkcnt -= args->rmtblkcnt;
-                        args->rmtblkcnt += blkcnt;
-                }
                /*
                 * Start the next trans in the chain.
@@ -385,18 +480,19 @@ xfs_attr_rmtval_set(
         * the INCOMPLETE flag.
         */
        lblkno = args->rmtblkno;
+        blkcnt = args->rmtblkcnt;
        valuelen = args->valuelen;
        while (valuelen > 0) {
-                int     byte_cnt;
+                struct xfs_buf  *bp;
-                char    *buf;
+                xfs_daddr_t     dblkno;
+                int             dblkcnt;
+                ASSERT(blkcnt > 0);
-                /*
-                 * Try to remember where we decided to put the value.
-                 */
                xfs_bmap_init(args->flist, args->firstblock);
                nmap = 1;
                error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
-                                       args->rmtblkcnt, &map, &nmap,
+                                       blkcnt, &map, &nmap,
                                       XFS_BMAPI_ATTRFORK);
                if (error)
                        return(error);
@@ -405,41 +501,27 @@ xfs_attr_rmtval_set(
                       (map.br_startblock != HOLESTARTBLOCK));
                dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
-                blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
+                dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
-                bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0);
+                bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
                if (!bp)
                        return ENOMEM;
                bp->b_ops = &xfs_attr3_rmt_buf_ops;
-                byte_cnt = BBTOB(bp->b_length);
+                xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
-                byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
+                                       &valuelen, &src);
-                if (valuelen < byte_cnt)
-                        byte_cnt = valuelen;
-                buf = bp->b_addr;
-                buf += xfs_attr3_rmt_hdr_set(mp, dp->i_ino, offset,
-                                             byte_cnt, bp);
-                memcpy(buf, src, byte_cnt);
-                if (byte_cnt < BBTOB(bp->b_length))
-                        xfs_buf_zero(bp, byte_cnt,
-                                     BBTOB(bp->b_length) - byte_cnt);
                error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
                xfs_buf_relse(bp);
                if (error)
                        return error;
-                src += byte_cnt;
-                valuelen -= byte_cnt;
-                offset += byte_cnt;
-                hdrcnt--;
+                /* roll attribute extent map forwards */
                lblkno += map.br_blockcount;
+                blkcnt -= map.br_blockcount;
        }
        ASSERT(valuelen == 0);
-        ASSERT(hdrcnt == 0);
        return 0;
 }
@@ -448,33 +530,40 @@ xfs_attr_rmtval_set(
 * out-of-line buffer that it is stored on.
 */
 int
-xfs_attr_rmtval_remove(xfs_da_args_t *args)
+xfs_attr_rmtval_remove(
+        struct xfs_da_args      *args)
 {
-        xfs_mount_t *mp;
+        struct xfs_mount        *mp = args->dp->i_mount;
-        xfs_bmbt_irec_t map;
+        xfs_dablk_t             lblkno;
-        xfs_buf_t *bp;
+        int                     blkcnt;
-        xfs_daddr_t dblkno;
+        int                     error;
-        xfs_dablk_t lblkno;
+        int                     done;
-        int valuelen, blkcnt, nmap, error, done, committed;
        trace_xfs_attr_rmtval_remove(args);
-        mp = args->dp->i_mount;
        /*
-         * Roll through the "value", invalidating the attribute value's
+         * Roll through the "value", invalidating the attribute value's blocks.
-         * blocks.
+         * Note that args->rmtblkcnt is the minimum number of data blocks we'll
+         * see for a CRC enabled remote attribute. Each extent will have a
+         * header, and so we may have more blocks than we realise here.  If we
+         * fail to map the blocks correctly, we'll have problems with the buffer
+         * lookups.
         */
        lblkno = args->rmtblkno;
-        valuelen = args->rmtblkcnt;
+        blkcnt = args->rmtblkcnt;
-        while (valuelen > 0) {
+        while (blkcnt > 0) {
+                struct xfs_bmbt_irec    map;
+                struct xfs_buf          *bp;
+                xfs_daddr_t             dblkno;
+                int                     dblkcnt;
+                int                     nmap;
                /*
                 * Try to remember where we decided to put the value.
                 */
                nmap = 1;
                error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
-                                       args->rmtblkcnt, &map, &nmap,
+                                       blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
-                                       XFS_BMAPI_ATTRFORK);
                if (error)
                        return(error);
                ASSERT(nmap == 1);
@@ -482,21 +571,20 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
                       (map.br_startblock != HOLESTARTBLOCK));
                dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
-                blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
+                dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
                /*
                 * If the "remote" value is in the cache, remove it.
                 */
-                bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
+                bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
                if (bp) {
                        xfs_buf_stale(bp);
                        xfs_buf_relse(bp);
                        bp = NULL;
                }
-                valuelen -= map.br_blockcount;
                lblkno += map.br_blockcount;
+                blkcnt -= map.br_blockcount;
        }
        /*
@@ -506,6 +594,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
        blkcnt = args->rmtblkcnt;
        done = 0;
        while (!done) {
+                int committed;
                xfs_bmap_init(args->flist, args->firstblock);
                error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
                                    XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/xfs_attr_remote.h
index c7cca60a062a..92a8fd7977cc 100644
--- a/fs/xfs/xfs_attr_remote.h
+++ b/fs/xfs/xfs_attr_remote.h
@@ -20,6 +20,14 @@
 #define XFS_ATTR3_RMT_MAGIC     0x5841524d      /* XARM */
+/*
+ * There is one of these headers per filesystem block in a remote attribute.
+ * This is done to ensure there is a 1:1 mapping between the attribute value
+ * length and the number of blocks needed to store the attribute. This makes the
+ * verification of a buffer a little more complex, but greatly simplifies the
+ * allocation, reading and writing of these attributes as we don't have to guess
+ * the number of blocks needed to store the attribute data.
+ */
 struct xfs_attr3_rmt_hdr {
        __be32  rm_magic;
        __be32  rm_offset;
@@ -39,6 +47,8 @@ struct xfs_attr3_rmt_hdr {
 extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
+int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);
 int xfs_attr_rmtval_get(struct xfs_da_args *args);
 int xfs_attr_rmtval_set(struct xfs_da_args *args);
 int xfs_attr_rmtval_remove(struct xfs_da_args *args);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 82b70bda9f47..1b2472a46e46 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -513,6 +513,7 @@ _xfs_buf_find(
                xfs_alert(btp->bt_mount,
                          "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
                          __func__, blkno, eofs);
+                WARN_ON(1);
                return NULL;
        }
@@ -1649,7 +1650,7 @@ xfs_alloc_buftarg(
 {
        xfs_buftarg_t           *btp;
-        btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+        btp = kmem_zalloc(sizeof(*btp), KM_SLEEP | KM_NOFS);
        btp->bt_mount = mp;
        btp->bt_dev =  bdev->bd_dev;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index cf263476d6b4..4ec431777048 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -262,12 +262,7 @@ xfs_buf_item_format_segment(
                        vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
                        vecp->i_len = nbits * XFS_BLF_CHUNK;
                        vecp->i_type = XLOG_REG_TYPE_BCHUNK;
-/*
+                        nvecs++;
- * You would think we need to bump the nvecs here too, but we do not
- * this number is used by recovery, and it gets confused by the boundary
- * split here
- *                      nvecs++;
- */
                        vecp++;
                        first_bit = next_bit;
                        last_bit = next_bit;
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9b26a99ebfe9..0b8b2a13cd24 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -270,6 +270,7 @@ xfs_da3_node_read_verify(
                                break;
                        return;
                case XFS_ATTR_LEAF_MAGIC:
+                case XFS_ATTR3_LEAF_MAGIC:
                        bp->b_ops = &xfs_attr3_leaf_buf_ops;
                        bp->b_ops->verify_read(bp);
                        return;
@@ -2464,7 +2465,8 @@ xfs_buf_map_from_irec(
        ASSERT(nirecs >= 1);
        if (nirecs > 1) {
-                map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_SLEEP);
+                map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map),
+                                  KM_SLEEP | KM_NOFS);
                if (!map)
                        return ENOMEM;
                *mapp = map;
@@ -2520,7 +2522,8 @@ xfs_dabuf_map(
                 * Optimize the one-block case.
                 */
                if (nfsb != 1)
-                        irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_SLEEP);
+                        irecs = kmem_zalloc(sizeof(irec) * nfsb,
+                                            KM_SLEEP | KM_NOFS);
                nirecs = nfsb;
                error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs,
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index f852b082a084..c407e1ccff43 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -219,6 +219,14 @@ xfs_swap_extents(
        int             taforkblks = 0;
        __uint64_t      tmp;
+        /*
+         * We have no way of updating owner information in the BMBT blocks for
+         * each inode on CRC enabled filesystems, so to avoid corrupting the
+         * this metadata we simply don't allow extent swaps to occur.
+         */
+        if (xfs_sb_version_hascrc(&mp->m_sb))
+                return XFS_ERROR(EINVAL);
        tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
        if (!tempifp) {
                error = XFS_ERROR(ENOMEM);
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
index a3b1bd841a80..995f1f505a52 100644
--- a/fs/xfs/xfs_dir2_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -715,6 +715,7 @@ struct xfs_dir3_free_hdr {
        __be32                  firstdb;        /* db of first entry */
        __be32                  nvalid;         /* count of valid entries */
        __be32                  nused;          /* count of used entries */
+        __be32                  pad;            /* 64 bit alignment. */
 };
 struct xfs_dir3_free {
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 721ba2fe8e54..da71a1819d78 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1336,7 +1336,7 @@ xfs_dir2_leaf_getdents(
                                     mp->m_sb.sb_blocksize);
        map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
                                (length * sizeof(struct xfs_bmbt_irec)),
-                               KM_SLEEP);
+                               KM_SLEEP | KM_NOFS);
        map_info->map_size = length;
        /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 5246de4912d4..2226a00acd15 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -263,18 +263,19 @@ xfs_dir3_free_get_buf(
         * Initialize the new block to be empty, and remember
         * its first slot as our empty slot.
         */
-        hdr.magic = XFS_DIR2_FREE_MAGIC;
+        memset(bp->b_addr, 0, sizeof(struct xfs_dir3_free_hdr));
-        hdr.firstdb = 0;
+        memset(&hdr, 0, sizeof(hdr));
-        hdr.nused = 0;
-        hdr.nvalid = 0;
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
                hdr.magic = XFS_DIR3_FREE_MAGIC;
                hdr3->hdr.blkno = cpu_to_be64(bp->b_bn);
                hdr3->hdr.owner = cpu_to_be64(dp->i_ino);
                uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid);
-        }
+        } else
+                hdr.magic = XFS_DIR2_FREE_MAGIC;
        xfs_dir3_free_hdr_to_disk(bp->b_addr, &hdr);
        *bpp = bp;
        return 0;
@@ -1921,8 +1922,6 @@ xfs_dir2_node_addname_int(
                         */
                        freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
                                        xfs_dir3_free_max_bests(mp);
-                        free->hdr.nvalid = 0;
-                        free->hdr.nused = 0;
                } else {
                        free = fbp->b_addr;
                        bests = xfs_dir3_free_bests_p(mp, free);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index c0f375087efc..452920a3f03f 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -305,11 +305,12 @@ xfs_efi_release(xfs_efi_log_item_t	*efip,
 {
        ASSERT(atomic_read(&efip->efi_next_extent) >= nextents);
        if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) {
-                __xfs_efi_release(efip);
                /* recovery needs us to drop the EFI reference, too */
                if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
                        __xfs_efi_release(efip);
+                __xfs_efi_release(efip);
+                /* efip may now have been freed, do not reference it again. */
        }
 }
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 6dda3f949b04..d04695545397 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -236,6 +236,7 @@ typedef struct xfs_fsop_resblks {
 #define XFS_FSOP_GEOM_FLAGS_PROJID32    0x0800  /* 32-bit project IDs   */
 #define XFS_FSOP_GEOM_FLAGS_DIRV2CI     0x1000  /* ASCII only CI names  */
 #define XFS_FSOP_GEOM_FLAGS_LAZYSB      0x4000  /* lazy superblock counters */
+#define XFS_FSOP_GEOM_FLAGS_V5SB        0x8000  /* version 5 superblock */
 /*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 87595b211da1..3c3644ea825b 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -99,7 +99,9 @@ xfs_fs_geometry(
                        (xfs_sb_version_hasattr2(&mp->m_sb) ?
                                XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) |
                        (xfs_sb_version_hasprojid32bit(&mp->m_sb) ?
-                                XFS_FSOP_GEOM_FLAGS_PROJID32 : 0);
+                                XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) |
+                        (xfs_sb_version_hascrc(&mp->m_sb) ?
+                                XFS_FSOP_GEOM_FLAGS_V5SB : 0);
                geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
                                mp->m_sb.sb_logsectsize : BBSIZE;
                geo->rtsectsize = mp->m_sb.sb_blocksize;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index d82efaa2ac73..ca9ecaa81112 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -455,6 +455,28 @@ xfs_vn_getattr(
        return 0;
 }
+static void
+xfs_setattr_mode(
+        struct xfs_trans        *tp,
+        struct xfs_inode        *ip,
+        struct iattr            *iattr)
+{
+        struct inode    *inode = VFS_I(ip);
+        umode_t         mode = iattr->ia_mode;
+        ASSERT(tp);
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+        if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+                mode &= ~S_ISGID;
+        ip->i_d.di_mode &= S_IFMT;
+        ip->i_d.di_mode |= mode & ~S_IFMT;
+        inode->i_mode &= S_IFMT;
+        inode->i_mode |= mode & ~S_IFMT;
+}
 int
 xfs_setattr_nonsize(
        struct xfs_inode        *ip,
@@ -606,18 +628,8 @@ xfs_setattr_nonsize(
        /*
         * Change file access modes.
         */
-        if (mask & ATTR_MODE) {
+        if (mask & ATTR_MODE)
-                umode_t mode = iattr->ia_mode;
+                xfs_setattr_mode(tp, ip, iattr);
-                if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                        mode &= ~S_ISGID;
-                ip->i_d.di_mode &= S_IFMT;
-                ip->i_d.di_mode |= mode & ~S_IFMT;
-                inode->i_mode &= S_IFMT;
-                inode->i_mode |= mode & ~S_IFMT;
-        }
        /*
         * Change file access or modified times.
@@ -714,9 +726,8 @@ xfs_setattr_size(
                return XFS_ERROR(error);
        ASSERT(S_ISREG(ip->i_d.di_mode));
-        ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
+        ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
-                        ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
+                        ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
-                        ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
        if (!(flags & XFS_ATTR_NOLOCK)) {
                lock_flags |= XFS_IOLOCK_EXCL;
@@ -860,6 +871,12 @@ xfs_setattr_size(
                xfs_inode_clear_eofblocks_tag(ip);
        }
+        /*
+         * Change file access modes.
+         */
+        if (mask & ATTR_MODE)
+                xfs_setattr_mode(tp, ip, iattr);
        if (mask & ATTR_CTIME) {
                inode->i_ctime = iattr->ia_ctime;
                ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index e3d0b85d852b..d0833b54e55d 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -139,7 +139,7 @@ xlog_cil_prepare_log_vecs(
                new_lv = kmem_zalloc(sizeof(*new_lv) +
                                niovecs * sizeof(struct xfs_log_iovec),
-                                KM_SLEEP);
+                                KM_SLEEP|KM_NOFS);
                /* The allocated iovec region lies beyond the log vector. */
                new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 93f03ec17eec..d9e4d3c3991a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2097,6 +2097,17 @@ xlog_recover_do_reg_buffer(
                       ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
                /*
+                 * The dirty regions logged in the buffer, even though
+                 * contiguous, may span multiple chunks. This is because the
+                 * dirty region may span a physical page boundary in a buffer
+                 * and hence be split into two separate vectors for writing into
+                 * the log. Hence we need to trim nbits back to the length of
+                 * the current region being copied out of the log.
+                 */
+                if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
+                        nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
+                /*
                 * Do a sanity check if this is a dquot buffer. Just checking
                 * the first dquot in the buffer should do. XXXThis is
                 * probably a good thing to do for other buf types also.
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index c41190cad6e9..6cdf6ffc36a1 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -489,31 +489,36 @@ xfs_qm_scall_setqlim(
        if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
                return 0;
-        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
-        error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
-                                  0, 0, XFS_DEFAULT_LOG_COUNT);
-        if (error) {
-                xfs_trans_cancel(tp, 0);
-                return (error);
-        }
        /*
         * We don't want to race with a quotaoff so take the quotaoff lock.
-         * (We don't hold an inode lock, so there's nothing else to stop
+         * We don't hold an inode lock, so there's nothing else to stop
-         * a quotaoff from happening). (XXXThis doesn't currently happen
+         * a quotaoff from happening.
-         * because we take the vfslock before calling xfs_qm_sysent).
         */
        mutex_lock(&q->qi_quotaofflock);
        /*
-         * Get the dquot (locked), and join it to the transaction.
+         * Get the dquot (locked) before we start, as we need to do a
-         * Allocate the dquot if this doesn't exist.
+         * transaction to allocate it if it doesn't exist. Once we have the
+         * dquot, unlock it so we can start the next transaction safely. We hold
+         * a reference to the dquot, so it's safe to do this unlock/lock without
+         * it being reclaimed in the mean time.
         */
-        if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
+        error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp);
-                xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+        if (error) {
                ASSERT(error != ENOENT);
                goto out_unlock;
        }
+        xfs_dqunlock(dqp);
+        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
+        error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
+                                  0, 0, XFS_DEFAULT_LOG_COUNT);
+        if (error) {
+                xfs_trans_cancel(tp, 0);
+                goto out_rele;
+        }
+        xfs_dqlock(dqp);
        xfs_trans_dqjoin(tp, dqp);
        ddq = &dqp->q_core;
@@ -621,9 +626,10 @@ xfs_qm_scall_setqlim(
        xfs_trans_log_dquot(tp, dqp);
        error = xfs_trans_commit(tp, 0);
-        xfs_qm_dqrele(dqp);
- out_unlock:
+out_rele:
+        xfs_qm_dqrele(dqp);
+out_unlock:
        mutex_unlock(&q->qi_quotaofflock);
        return error;
 }
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 5f234389327c..195a403e1522 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -56,16 +56,9 @@ xfs_symlink_blocks(
        struct xfs_mount *mp,
        int             pathlen)
 {
-        int             fsblocks = 0;
+        int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
-        int             len = pathlen;
-        do {
+        return (pathlen + buflen - 1) / buflen;
-                fsblocks++;
-                len -= XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
-        } while (len > 0);
-        ASSERT(fsblocks <= XFS_SYMLINK_MAPS);
-        return fsblocks;
 }
 static int
@@ -405,7 +398,7 @@ xfs_symlink(
        if (pathlen <= XFS_LITINO(mp, dp->i_d.di_version))
                fs_blocks = 0;
        else
-                fs_blocks = XFS_B_TO_FSB(mp, pathlen);
+                fs_blocks = xfs_symlink_blocks(mp, pathlen);
        resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
        error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
                        XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
@@ -512,7 +505,7 @@ xfs_symlink(
                cur_chunk = target_path;
                offset = 0;
                for (n = 0; n < nmaps; n++) {
-                        char *buf;
+                        char    *buf;
                        d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
                        byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
@@ -525,9 +518,7 @@ xfs_symlink(
                        bp->b_ops = &xfs_symlink_buf_ops;
                        byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
-                        if (pathlen < byte_cnt) {
+                        byte_cnt = min(byte_cnt, pathlen);
-                                byte_cnt = pathlen;
-                        }
                        buf = bp->b_addr;
                        buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset,
@@ -542,6 +533,7 @@ xfs_symlink(
                        xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) -
                                                        (char *)bp->b_addr);
                }
+                ASSERT(pathlen == 0);
        }
        /*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 1501f4fa51a6..0176bb21f09a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1453,7 +1453,7 @@ xfs_free_file_space(
        xfs_mount_t             *mp;
        int                     nimap;
        uint                    resblks;
-        uint                    rounding;
+        xfs_off_t               rounding;
        int                     rt;
        xfs_fileoff_t           startoffset_fsb;
        xfs_trans_t             *tp;
@@ -1482,7 +1482,7 @@ xfs_free_file_space(
                inode_dio_wait(VFS_I(ip));
        }
-        rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
+        rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
        ioffset = offset & ~(rounding - 1);
        error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
                                              ioffset, -1);