aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-18 14:35:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-18 14:35:28 -0400
commit130901ba33c4a040e12cf7cce240c1056cc096ee (patch)
tree6471f689c88dad8bc1eabf1e404ffda60629abe7 /fs/btrfs
parente51066824af49c2b1e9e686ee9660f58641c7f36 (diff)
parentc5cb6a0573bef87e098ee3cd946115ebe60a910e (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "Miao Xie has been very busy, fixing races and enospc problems and many other small but important pieces. Alexandre Oliva discovered some problems with how our error handling was interacting with the block layer and for now has disabled our partial handling of sub-page writes. The real sub-page work is in a series of patches from IBM that we still need to integrate and test. The code Alexandre has turned off was really incomplete. Josef has more error handling fixes and an important fix for the new skinny extent format. This also has my fix for the tracepoint crash from late in 3.9. It's the first stage in a larger clean up to get rid of btrfs_bio and make a proper bioset for all the items we need to tack into the bio. For now the bioset only holds our mirror_num and stripe_index, but for the next merge window I'll shuffle more in." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (25 commits) Btrfs: use a btrfs bioset instead of abusing bio internals Btrfs: make sure roots are assigned before freeing their nodes Btrfs: explicitly use global_block_rsv for quota_tree btrfs: do away with non-whole_page extent I/O Btrfs: don't invoke btrfs_invalidate_inodes() in the spin lock context Btrfs: remove BUG_ON() in btrfs_read_fs_tree_no_radix() Btrfs: pause the space balance when remounting to R/O Btrfs: fix unprotected root node of the subvolume's inode rb-tree Btrfs: fix accessing a freed tree root Btrfs: return errno if possible when we fail to allocate memory Btrfs: update the global reserve if it is empty Btrfs: don't steal the reserved space from the global reserve if their space type is different Btrfs: optimize the error handle of use_block_rsv() Btrfs: don't use global block reservation for inode cache truncation Btrfs: don't abort the current transaction if there is no enough space for inode cache Correct allowed raid levels on balance. Btrfs: fix possible memory leak in replace_path() Btrfs: fix possible memory leak in the find_parent_nodes() Btrfs: don't allow device replace on RAID5/RAID6 Btrfs: handle running extent ops with skinny metadata ...
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/backref.c3
-rw-r--r--fs/btrfs/check-integrity.c2
-rw-r--r--fs/btrfs/ctree.c4
-rw-r--r--fs/btrfs/ctree.h8
-rw-r--r--fs/btrfs/delayed-ref.h1
-rw-r--r--fs/btrfs/dev-replace.c5
-rw-r--r--fs/btrfs/disk-io.c52
-rw-r--r--fs/btrfs/extent-tree.c94
-rw-r--r--fs/btrfs/extent_io.c138
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/free-space-cache.c43
-rw-r--r--fs/btrfs/free-space-cache.h2
-rw-r--r--fs/btrfs/inode-map.c8
-rw-r--r--fs/btrfs/inode.c81
-rw-r--r--fs/btrfs/ioctl.c10
-rw-r--r--fs/btrfs/raid56.c2
-rw-r--r--fs/btrfs/relocation.c7
-rw-r--r--fs/btrfs/scrub.c10
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--fs/btrfs/volumes.c54
-rw-r--r--fs/btrfs/volumes.h20
21 files changed, 301 insertions, 246 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index b4fb41558111..290e347b6db3 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -918,7 +918,8 @@ again:
918 ref->parent, bsz, 0); 918 ref->parent, bsz, 0);
919 if (!eb || !extent_buffer_uptodate(eb)) { 919 if (!eb || !extent_buffer_uptodate(eb)) {
920 free_extent_buffer(eb); 920 free_extent_buffer(eb);
921 return -EIO; 921 ret = -EIO;
922 goto out;
922 } 923 }
923 ret = find_extent_in_eb(eb, bytenr, 924 ret = find_extent_in_eb(eb, bytenr,
924 *extent_item_pos, &eie); 925 *extent_item_pos, &eie);
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 18af6f48781a..1431a6965017 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1700,7 +1700,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
1700 unsigned int j; 1700 unsigned int j;
1701 DECLARE_COMPLETION_ONSTACK(complete); 1701 DECLARE_COMPLETION_ONSTACK(complete);
1702 1702
1703 bio = bio_alloc(GFP_NOFS, num_pages - i); 1703 bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
1704 if (!bio) { 1704 if (!bio) {
1705 printk(KERN_INFO 1705 printk(KERN_INFO
1706 "btrfsic: bio_alloc() for %u pages failed!\n", 1706 "btrfsic: bio_alloc() for %u pages failed!\n",
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index de6de8e60b46..02fae7f7e42c 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -951,10 +951,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
951 BUG_ON(ret); /* -ENOMEM */ 951 BUG_ON(ret); /* -ENOMEM */
952 } 952 }
953 if (new_flags != 0) { 953 if (new_flags != 0) {
954 int level = btrfs_header_level(buf);
955
954 ret = btrfs_set_disk_extent_flags(trans, root, 956 ret = btrfs_set_disk_extent_flags(trans, root,
955 buf->start, 957 buf->start,
956 buf->len, 958 buf->len,
957 new_flags, 0); 959 new_flags, level, 0);
958 if (ret) 960 if (ret)
959 return ret; 961 return ret;
960 } 962 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 63c328a9ce95..d6dd49b51ba8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -88,12 +88,12 @@ struct btrfs_ordered_sum;
88/* holds checksums of all the data extents */ 88/* holds checksums of all the data extents */
89#define BTRFS_CSUM_TREE_OBJECTID 7ULL 89#define BTRFS_CSUM_TREE_OBJECTID 7ULL
90 90
91/* for storing balance parameters in the root tree */
92#define BTRFS_BALANCE_OBJECTID -4ULL
93
94/* holds quota configuration and tracking */ 91/* holds quota configuration and tracking */
95#define BTRFS_QUOTA_TREE_OBJECTID 8ULL 92#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
96 93
94/* for storing balance parameters in the root tree */
95#define BTRFS_BALANCE_OBJECTID -4ULL
96
97/* orhpan objectid for tracking unlinked/truncated files */ 97/* orhpan objectid for tracking unlinked/truncated files */
98#define BTRFS_ORPHAN_OBJECTID -5ULL 98#define BTRFS_ORPHAN_OBJECTID -5ULL
99 99
@@ -3075,7 +3075,7 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3075int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 3075int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3076 struct btrfs_root *root, 3076 struct btrfs_root *root,
3077 u64 bytenr, u64 num_bytes, u64 flags, 3077 u64 bytenr, u64 num_bytes, u64 flags,
3078 int is_data); 3078 int level, int is_data);
3079int btrfs_free_extent(struct btrfs_trans_handle *trans, 3079int btrfs_free_extent(struct btrfs_trans_handle *trans,
3080 struct btrfs_root *root, 3080 struct btrfs_root *root,
3081 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 3081 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index f75fcaf79aeb..70b962cc177d 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -60,6 +60,7 @@ struct btrfs_delayed_ref_node {
60struct btrfs_delayed_extent_op { 60struct btrfs_delayed_extent_op {
61 struct btrfs_disk_key key; 61 struct btrfs_disk_key key;
62 u64 flags_to_set; 62 u64 flags_to_set;
63 int level;
63 unsigned int update_key:1; 64 unsigned int update_key:1;
64 unsigned int update_flags:1; 65 unsigned int update_flags:1;
65 unsigned int is_data:1; 66 unsigned int is_data:1;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 7ba7b3900cb8..65241f32d3f8 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -313,6 +313,11 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
313 struct btrfs_device *tgt_device = NULL; 313 struct btrfs_device *tgt_device = NULL;
314 struct btrfs_device *src_device = NULL; 314 struct btrfs_device *src_device = NULL;
315 315
316 if (btrfs_fs_incompat(fs_info, RAID56)) {
317 pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n");
318 return -EINVAL;
319 }
320
316 switch (args->start.cont_reading_from_srcdev_mode) { 321 switch (args->start.cont_reading_from_srcdev_mode) {
317 case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: 322 case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:
318 case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: 323 case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4e9ebe1f1827..e7b3cb5286a5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -152,7 +152,7 @@ static struct btrfs_lockdep_keyset {
152 { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" }, 152 { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" },
153 { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" }, 153 { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" },
154 { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" }, 154 { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" },
155 { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" }, 155 { .id = BTRFS_QUOTA_TREE_OBJECTID, .name_stem = "quota" },
156 { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" }, 156 { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
157 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, 157 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
158 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, 158 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
@@ -1513,7 +1513,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1513 } 1513 }
1514 1514
1515 root->commit_root = btrfs_root_node(root); 1515 root->commit_root = btrfs_root_node(root);
1516 BUG_ON(!root->node); /* -ENOMEM */
1517out: 1516out:
1518 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { 1517 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
1519 root->ref_cows = 1; 1518 root->ref_cows = 1;
@@ -1988,30 +1987,33 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
1988{ 1987{
1989 free_extent_buffer(info->tree_root->node); 1988 free_extent_buffer(info->tree_root->node);
1990 free_extent_buffer(info->tree_root->commit_root); 1989 free_extent_buffer(info->tree_root->commit_root);
1991 free_extent_buffer(info->dev_root->node);
1992 free_extent_buffer(info->dev_root->commit_root);
1993 free_extent_buffer(info->extent_root->node);
1994 free_extent_buffer(info->extent_root->commit_root);
1995 free_extent_buffer(info->csum_root->node);
1996 free_extent_buffer(info->csum_root->commit_root);
1997 if (info->quota_root) {
1998 free_extent_buffer(info->quota_root->node);
1999 free_extent_buffer(info->quota_root->commit_root);
2000 }
2001
2002 info->tree_root->node = NULL; 1990 info->tree_root->node = NULL;
2003 info->tree_root->commit_root = NULL; 1991 info->tree_root->commit_root = NULL;
2004 info->dev_root->node = NULL; 1992
2005 info->dev_root->commit_root = NULL; 1993 if (info->dev_root) {
2006 info->extent_root->node = NULL; 1994 free_extent_buffer(info->dev_root->node);
2007 info->extent_root->commit_root = NULL; 1995 free_extent_buffer(info->dev_root->commit_root);
2008 info->csum_root->node = NULL; 1996 info->dev_root->node = NULL;
2009 info->csum_root->commit_root = NULL; 1997 info->dev_root->commit_root = NULL;
1998 }
1999 if (info->extent_root) {
2000 free_extent_buffer(info->extent_root->node);
2001 free_extent_buffer(info->extent_root->commit_root);
2002 info->extent_root->node = NULL;
2003 info->extent_root->commit_root = NULL;
2004 }
2005 if (info->csum_root) {
2006 free_extent_buffer(info->csum_root->node);
2007 free_extent_buffer(info->csum_root->commit_root);
2008 info->csum_root->node = NULL;
2009 info->csum_root->commit_root = NULL;
2010 }
2010 if (info->quota_root) { 2011 if (info->quota_root) {
2012 free_extent_buffer(info->quota_root->node);
2013 free_extent_buffer(info->quota_root->commit_root);
2011 info->quota_root->node = NULL; 2014 info->quota_root->node = NULL;
2012 info->quota_root->commit_root = NULL; 2015 info->quota_root->commit_root = NULL;
2013 } 2016 }
2014
2015 if (chunk_root) { 2017 if (chunk_root) {
2016 free_extent_buffer(info->chunk_root->node); 2018 free_extent_buffer(info->chunk_root->node);
2017 free_extent_buffer(info->chunk_root->commit_root); 2019 free_extent_buffer(info->chunk_root->commit_root);
@@ -3128,7 +3130,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
3128 * caller 3130 * caller
3129 */ 3131 */
3130 device->flush_bio = NULL; 3132 device->flush_bio = NULL;
3131 bio = bio_alloc(GFP_NOFS, 0); 3133 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
3132 if (!bio) 3134 if (!bio)
3133 return -ENOMEM; 3135 return -ENOMEM;
3134 3136
@@ -3659,8 +3661,11 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
3659 ordered_operations); 3661 ordered_operations);
3660 3662
3661 list_del_init(&btrfs_inode->ordered_operations); 3663 list_del_init(&btrfs_inode->ordered_operations);
3664 spin_unlock(&root->fs_info->ordered_extent_lock);
3662 3665
3663 btrfs_invalidate_inodes(btrfs_inode->root); 3666 btrfs_invalidate_inodes(btrfs_inode->root);
3667
3668 spin_lock(&root->fs_info->ordered_extent_lock);
3664 } 3669 }
3665 3670
3666 spin_unlock(&root->fs_info->ordered_extent_lock); 3671 spin_unlock(&root->fs_info->ordered_extent_lock);
@@ -3782,8 +3787,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
3782 list_del_init(&btrfs_inode->delalloc_inodes); 3787 list_del_init(&btrfs_inode->delalloc_inodes);
3783 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, 3788 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
3784 &btrfs_inode->runtime_flags); 3789 &btrfs_inode->runtime_flags);
3790 spin_unlock(&root->fs_info->delalloc_lock);
3785 3791
3786 btrfs_invalidate_inodes(btrfs_inode->root); 3792 btrfs_invalidate_inodes(btrfs_inode->root);
3793
3794 spin_lock(&root->fs_info->delalloc_lock);
3787 } 3795 }
3788 3796
3789 spin_unlock(&root->fs_info->delalloc_lock); 3797 spin_unlock(&root->fs_info->delalloc_lock);
@@ -3808,7 +3816,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3808 while (start <= end) { 3816 while (start <= end) {
3809 eb = btrfs_find_tree_block(root, start, 3817 eb = btrfs_find_tree_block(root, start,
3810 root->leafsize); 3818 root->leafsize);
3811 start += eb->len; 3819 start += root->leafsize;
3812 if (!eb) 3820 if (!eb)
3813 continue; 3821 continue;
3814 wait_on_extent_buffer_writeback(eb); 3822 wait_on_extent_buffer_writeback(eb);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2305b5c5cf00..df472ab1b5ac 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2070,8 +2070,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2070 u32 item_size; 2070 u32 item_size;
2071 int ret; 2071 int ret;
2072 int err = 0; 2072 int err = 0;
2073 int metadata = (node->type == BTRFS_TREE_BLOCK_REF_KEY || 2073 int metadata = !extent_op->is_data;
2074 node->type == BTRFS_SHARED_BLOCK_REF_KEY);
2075 2074
2076 if (trans->aborted) 2075 if (trans->aborted)
2077 return 0; 2076 return 0;
@@ -2086,11 +2085,8 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2086 key.objectid = node->bytenr; 2085 key.objectid = node->bytenr;
2087 2086
2088 if (metadata) { 2087 if (metadata) {
2089 struct btrfs_delayed_tree_ref *tree_ref;
2090
2091 tree_ref = btrfs_delayed_node_to_tree_ref(node);
2092 key.type = BTRFS_METADATA_ITEM_KEY; 2088 key.type = BTRFS_METADATA_ITEM_KEY;
2093 key.offset = tree_ref->level; 2089 key.offset = extent_op->level;
2094 } else { 2090 } else {
2095 key.type = BTRFS_EXTENT_ITEM_KEY; 2091 key.type = BTRFS_EXTENT_ITEM_KEY;
2096 key.offset = node->num_bytes; 2092 key.offset = node->num_bytes;
@@ -2719,7 +2715,7 @@ out:
2719int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2715int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2720 struct btrfs_root *root, 2716 struct btrfs_root *root,
2721 u64 bytenr, u64 num_bytes, u64 flags, 2717 u64 bytenr, u64 num_bytes, u64 flags,
2722 int is_data) 2718 int level, int is_data)
2723{ 2719{
2724 struct btrfs_delayed_extent_op *extent_op; 2720 struct btrfs_delayed_extent_op *extent_op;
2725 int ret; 2721 int ret;
@@ -2732,6 +2728,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2732 extent_op->update_flags = 1; 2728 extent_op->update_flags = 1;
2733 extent_op->update_key = 0; 2729 extent_op->update_key = 0;
2734 extent_op->is_data = is_data ? 1 : 0; 2730 extent_op->is_data = is_data ? 1 : 0;
2731 extent_op->level = level;
2735 2732
2736 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, 2733 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
2737 num_bytes, extent_op); 2734 num_bytes, extent_op);
@@ -3109,6 +3106,11 @@ again:
3109 WARN_ON(ret); 3106 WARN_ON(ret);
3110 3107
3111 if (i_size_read(inode) > 0) { 3108 if (i_size_read(inode) > 0) {
3109 ret = btrfs_check_trunc_cache_free_space(root,
3110 &root->fs_info->global_block_rsv);
3111 if (ret)
3112 goto out_put;
3113
3112 ret = btrfs_truncate_free_space_cache(root, trans, path, 3114 ret = btrfs_truncate_free_space_cache(root, trans, path,
3113 inode); 3115 inode);
3114 if (ret) 3116 if (ret)
@@ -4562,6 +4564,8 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
4562 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; 4564 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
4563 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; 4565 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
4564 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; 4566 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
4567 if (fs_info->quota_root)
4568 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
4565 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; 4569 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
4566 4570
4567 update_global_block_rsv(fs_info); 4571 update_global_block_rsv(fs_info);
@@ -6651,51 +6655,51 @@ use_block_rsv(struct btrfs_trans_handle *trans,
6651 struct btrfs_block_rsv *block_rsv; 6655 struct btrfs_block_rsv *block_rsv;
6652 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 6656 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
6653 int ret; 6657 int ret;
6658 bool global_updated = false;
6654 6659
6655 block_rsv = get_block_rsv(trans, root); 6660 block_rsv = get_block_rsv(trans, root);
6656 6661
6657 if (block_rsv->size == 0) { 6662 if (unlikely(block_rsv->size == 0))
6658 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 6663 goto try_reserve;
6659 BTRFS_RESERVE_NO_FLUSH); 6664again:
6660 /* 6665 ret = block_rsv_use_bytes(block_rsv, blocksize);
6661 * If we couldn't reserve metadata bytes try and use some from 6666 if (!ret)
6662 * the global reserve.
6663 */
6664 if (ret && block_rsv != global_rsv) {
6665 ret = block_rsv_use_bytes(global_rsv, blocksize);
6666 if (!ret)
6667 return global_rsv;
6668 return ERR_PTR(ret);
6669 } else if (ret) {
6670 return ERR_PTR(ret);
6671 }
6672 return block_rsv; 6667 return block_rsv;
6668
6669 if (block_rsv->failfast)
6670 return ERR_PTR(ret);
6671
6672 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
6673 global_updated = true;
6674 update_global_block_rsv(root->fs_info);
6675 goto again;
6673 } 6676 }
6674 6677
6675 ret = block_rsv_use_bytes(block_rsv, blocksize); 6678 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
6679 static DEFINE_RATELIMIT_STATE(_rs,
6680 DEFAULT_RATELIMIT_INTERVAL * 10,
6681 /*DEFAULT_RATELIMIT_BURST*/ 1);
6682 if (__ratelimit(&_rs))
6683 WARN(1, KERN_DEBUG
6684 "btrfs: block rsv returned %d\n", ret);
6685 }
6686try_reserve:
6687 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
6688 BTRFS_RESERVE_NO_FLUSH);
6676 if (!ret) 6689 if (!ret)
6677 return block_rsv; 6690 return block_rsv;
6678 if (ret && !block_rsv->failfast) { 6691 /*
6679 if (btrfs_test_opt(root, ENOSPC_DEBUG)) { 6692 * If we couldn't reserve metadata bytes try and use some from
6680 static DEFINE_RATELIMIT_STATE(_rs, 6693 * the global reserve if its space type is the same as the global
6681 DEFAULT_RATELIMIT_INTERVAL * 10, 6694 * reservation.
6682 /*DEFAULT_RATELIMIT_BURST*/ 1); 6695 */
6683 if (__ratelimit(&_rs)) 6696 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
6684 WARN(1, KERN_DEBUG 6697 block_rsv->space_info == global_rsv->space_info) {
6685 "btrfs: block rsv returned %d\n", ret); 6698 ret = block_rsv_use_bytes(global_rsv, blocksize);
6686 } 6699 if (!ret)
6687 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 6700 return global_rsv;
6688 BTRFS_RESERVE_NO_FLUSH);
6689 if (!ret) {
6690 return block_rsv;
6691 } else if (ret && block_rsv != global_rsv) {
6692 ret = block_rsv_use_bytes(global_rsv, blocksize);
6693 if (!ret)
6694 return global_rsv;
6695 }
6696 } 6701 }
6697 6702 return ERR_PTR(ret);
6698 return ERR_PTR(-ENOSPC);
6699} 6703}
6700 6704
6701static void unuse_block_rsv(struct btrfs_fs_info *fs_info, 6705static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
@@ -6763,6 +6767,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6763 extent_op->update_key = 1; 6767 extent_op->update_key = 1;
6764 extent_op->update_flags = 1; 6768 extent_op->update_flags = 1;
6765 extent_op->is_data = 0; 6769 extent_op->is_data = 0;
6770 extent_op->level = level;
6766 6771
6767 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, 6772 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
6768 ins.objectid, 6773 ins.objectid,
@@ -6934,7 +6939,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
6934 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); 6939 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
6935 BUG_ON(ret); /* -ENOMEM */ 6940 BUG_ON(ret); /* -ENOMEM */
6936 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 6941 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
6937 eb->len, flag, 0); 6942 eb->len, flag,
6943 btrfs_header_level(eb), 0);
6938 BUG_ON(ret); /* -ENOMEM */ 6944 BUG_ON(ret); /* -ENOMEM */
6939 wc->flags[level] |= flag; 6945 wc->flags[level] |= flag;
6940 } 6946 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 32d67a822e93..e7e7afb4a872 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -23,6 +23,7 @@
23 23
24static struct kmem_cache *extent_state_cache; 24static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache; 25static struct kmem_cache *extent_buffer_cache;
26static struct bio_set *btrfs_bioset;
26 27
27#ifdef CONFIG_BTRFS_DEBUG 28#ifdef CONFIG_BTRFS_DEBUG
28static LIST_HEAD(buffers); 29static LIST_HEAD(buffers);
@@ -125,10 +126,20 @@ int __init extent_io_init(void)
125 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); 126 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
126 if (!extent_buffer_cache) 127 if (!extent_buffer_cache)
127 goto free_state_cache; 128 goto free_state_cache;
129
130 btrfs_bioset = bioset_create(BIO_POOL_SIZE,
131 offsetof(struct btrfs_io_bio, bio));
132 if (!btrfs_bioset)
133 goto free_buffer_cache;
128 return 0; 134 return 0;
129 135
136free_buffer_cache:
137 kmem_cache_destroy(extent_buffer_cache);
138 extent_buffer_cache = NULL;
139
130free_state_cache: 140free_state_cache:
131 kmem_cache_destroy(extent_state_cache); 141 kmem_cache_destroy(extent_state_cache);
142 extent_state_cache = NULL;
132 return -ENOMEM; 143 return -ENOMEM;
133} 144}
134 145
@@ -145,6 +156,8 @@ void extent_io_exit(void)
145 kmem_cache_destroy(extent_state_cache); 156 kmem_cache_destroy(extent_state_cache);
146 if (extent_buffer_cache) 157 if (extent_buffer_cache)
147 kmem_cache_destroy(extent_buffer_cache); 158 kmem_cache_destroy(extent_buffer_cache);
159 if (btrfs_bioset)
160 bioset_free(btrfs_bioset);
148} 161}
149 162
150void extent_io_tree_init(struct extent_io_tree *tree, 163void extent_io_tree_init(struct extent_io_tree *tree,
@@ -1948,28 +1961,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1948} 1961}
1949 1962
1950/* 1963/*
1951 * helper function to unlock a page if all the extents in the tree
1952 * for that page are unlocked
1953 */
1954static void check_page_locked(struct extent_io_tree *tree, struct page *page)
1955{
1956 u64 start = page_offset(page);
1957 u64 end = start + PAGE_CACHE_SIZE - 1;
1958 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
1959 unlock_page(page);
1960}
1961
1962/*
1963 * helper function to end page writeback if all the extents
1964 * in the tree for that page are done with writeback
1965 */
1966static void check_page_writeback(struct extent_io_tree *tree,
1967 struct page *page)
1968{
1969 end_page_writeback(page);
1970}
1971
1972/*
1973 * When IO fails, either with EIO or csum verification fails, we 1964 * When IO fails, either with EIO or csum verification fails, we
1974 * try other mirrors that might have a good copy of the data. This 1965 * try other mirrors that might have a good copy of the data. This
1975 * io_failure_record is used to record state as we go through all the 1966 * io_failure_record is used to record state as we go through all the
@@ -2046,7 +2037,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
2046 if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num)) 2037 if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
2047 return 0; 2038 return 0;
2048 2039
2049 bio = bio_alloc(GFP_NOFS, 1); 2040 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2050 if (!bio) 2041 if (!bio)
2051 return -EIO; 2042 return -EIO;
2052 bio->bi_private = &compl; 2043 bio->bi_private = &compl;
@@ -2336,7 +2327,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2336 return -EIO; 2327 return -EIO;
2337 } 2328 }
2338 2329
2339 bio = bio_alloc(GFP_NOFS, 1); 2330 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2340 if (!bio) { 2331 if (!bio) {
2341 free_io_failure(inode, failrec, 0); 2332 free_io_failure(inode, failrec, 0);
2342 return -EIO; 2333 return -EIO;
@@ -2398,19 +2389,24 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2398 struct extent_io_tree *tree; 2389 struct extent_io_tree *tree;
2399 u64 start; 2390 u64 start;
2400 u64 end; 2391 u64 end;
2401 int whole_page;
2402 2392
2403 do { 2393 do {
2404 struct page *page = bvec->bv_page; 2394 struct page *page = bvec->bv_page;
2405 tree = &BTRFS_I(page->mapping->host)->io_tree; 2395 tree = &BTRFS_I(page->mapping->host)->io_tree;
2406 2396
2407 start = page_offset(page) + bvec->bv_offset; 2397 /* We always issue full-page reads, but if some block
2408 end = start + bvec->bv_len - 1; 2398 * in a page fails to read, blk_update_request() will
2399 * advance bv_offset and adjust bv_len to compensate.
2400 * Print a warning for nonzero offsets, and an error
2401 * if they don't add up to a full page. */
2402 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
2403 printk("%s page write in btrfs with offset %u and length %u\n",
2404 bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
2405 ? KERN_ERR "partial" : KERN_INFO "incomplete",
2406 bvec->bv_offset, bvec->bv_len);
2409 2407
2410 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 2408 start = page_offset(page);
2411 whole_page = 1; 2409 end = start + bvec->bv_offset + bvec->bv_len - 1;
2412 else
2413 whole_page = 0;
2414 2410
2415 if (--bvec >= bio->bi_io_vec) 2411 if (--bvec >= bio->bi_io_vec)
2416 prefetchw(&bvec->bv_page->flags); 2412 prefetchw(&bvec->bv_page->flags);
@@ -2418,10 +2414,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2418 if (end_extent_writepage(page, err, start, end)) 2414 if (end_extent_writepage(page, err, start, end))
2419 continue; 2415 continue;
2420 2416
2421 if (whole_page) 2417 end_page_writeback(page);
2422 end_page_writeback(page);
2423 else
2424 check_page_writeback(tree, page);
2425 } while (bvec >= bio->bi_io_vec); 2418 } while (bvec >= bio->bi_io_vec);
2426 2419
2427 bio_put(bio); 2420 bio_put(bio);
@@ -2446,7 +2439,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2446 struct extent_io_tree *tree; 2439 struct extent_io_tree *tree;
2447 u64 start; 2440 u64 start;
2448 u64 end; 2441 u64 end;
2449 int whole_page;
2450 int mirror; 2442 int mirror;
2451 int ret; 2443 int ret;
2452 2444
@@ -2457,19 +2449,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2457 struct page *page = bvec->bv_page; 2449 struct page *page = bvec->bv_page;
2458 struct extent_state *cached = NULL; 2450 struct extent_state *cached = NULL;
2459 struct extent_state *state; 2451 struct extent_state *state;
2452 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2460 2453
2461 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " 2454 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
2462 "mirror=%ld\n", (u64)bio->bi_sector, err, 2455 "mirror=%lu\n", (u64)bio->bi_sector, err,
2463 (long int)bio->bi_bdev); 2456 io_bio->mirror_num);
2464 tree = &BTRFS_I(page->mapping->host)->io_tree; 2457 tree = &BTRFS_I(page->mapping->host)->io_tree;
2465 2458
2466 start = page_offset(page) + bvec->bv_offset; 2459 /* We always issue full-page reads, but if some block
2467 end = start + bvec->bv_len - 1; 2460 * in a page fails to read, blk_update_request() will
2461 * advance bv_offset and adjust bv_len to compensate.
2462 * Print a warning for nonzero offsets, and an error
2463 * if they don't add up to a full page. */
2464 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
2465 printk("%s page read in btrfs with offset %u and length %u\n",
2466 bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
2467 ? KERN_ERR "partial" : KERN_INFO "incomplete",
2468 bvec->bv_offset, bvec->bv_len);
2468 2469
2469 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 2470 start = page_offset(page);
2470 whole_page = 1; 2471 end = start + bvec->bv_offset + bvec->bv_len - 1;
2471 else
2472 whole_page = 0;
2473 2472
2474 if (++bvec <= bvec_end) 2473 if (++bvec <= bvec_end)
2475 prefetchw(&bvec->bv_page->flags); 2474 prefetchw(&bvec->bv_page->flags);
@@ -2485,7 +2484,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2485 } 2484 }
2486 spin_unlock(&tree->lock); 2485 spin_unlock(&tree->lock);
2487 2486
2488 mirror = (int)(unsigned long)bio->bi_bdev; 2487 mirror = io_bio->mirror_num;
2489 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2488 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
2490 ret = tree->ops->readpage_end_io_hook(page, start, end, 2489 ret = tree->ops->readpage_end_io_hook(page, start, end,
2491 state, mirror); 2490 state, mirror);
@@ -2528,39 +2527,35 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2528 } 2527 }
2529 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); 2528 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2530 2529
2531 if (whole_page) { 2530 if (uptodate) {
2532 if (uptodate) { 2531 SetPageUptodate(page);
2533 SetPageUptodate(page);
2534 } else {
2535 ClearPageUptodate(page);
2536 SetPageError(page);
2537 }
2538 unlock_page(page);
2539 } else { 2532 } else {
2540 if (uptodate) { 2533 ClearPageUptodate(page);
2541 check_page_uptodate(tree, page); 2534 SetPageError(page);
2542 } else {
2543 ClearPageUptodate(page);
2544 SetPageError(page);
2545 }
2546 check_page_locked(tree, page);
2547 } 2535 }
2536 unlock_page(page);
2548 } while (bvec <= bvec_end); 2537 } while (bvec <= bvec_end);
2549 2538
2550 bio_put(bio); 2539 bio_put(bio);
2551} 2540}
2552 2541
2542/*
2543 * this allocates from the btrfs_bioset. We're returning a bio right now
2544 * but you can call btrfs_io_bio for the appropriate container_of magic
2545 */
2553struct bio * 2546struct bio *
2554btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 2547btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2555 gfp_t gfp_flags) 2548 gfp_t gfp_flags)
2556{ 2549{
2557 struct bio *bio; 2550 struct bio *bio;
2558 2551
2559 bio = bio_alloc(gfp_flags, nr_vecs); 2552 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
2560 2553
2561 if (bio == NULL && (current->flags & PF_MEMALLOC)) { 2554 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
2562 while (!bio && (nr_vecs /= 2)) 2555 while (!bio && (nr_vecs /= 2)) {
2563 bio = bio_alloc(gfp_flags, nr_vecs); 2556 bio = bio_alloc_bioset(gfp_flags,
2557 nr_vecs, btrfs_bioset);
2558 }
2564 } 2559 }
2565 2560
2566 if (bio) { 2561 if (bio) {
@@ -2571,6 +2566,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2571 return bio; 2566 return bio;
2572} 2567}
2573 2568
2569struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
2570{
2571 return bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
2572}
2573
2574
2575/* this also allocates from the btrfs_bioset */
2576struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2577{
2578 return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
2579}
2580
2581
2574static int __must_check submit_one_bio(int rw, struct bio *bio, 2582static int __must_check submit_one_bio(int rw, struct bio *bio,
2575 int mirror_num, unsigned long bio_flags) 2583 int mirror_num, unsigned long bio_flags)
2576{ 2584{
@@ -3988,7 +3996,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3988 last_for_get_extent = isize; 3996 last_for_get_extent = isize;
3989 } 3997 }
3990 3998
3991 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3999 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
3992 &cached_state); 4000 &cached_state);
3993 4001
3994 em = get_extent_skip_holes(inode, start, last_for_get_extent, 4002 em = get_extent_skip_holes(inode, start, last_for_get_extent,
@@ -4075,7 +4083,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4075out_free: 4083out_free:
4076 free_extent_map(em); 4084 free_extent_map(em);
4077out: 4085out:
4078 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len, 4086 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4079 &cached_state, GFP_NOFS); 4087 &cached_state, GFP_NOFS);
4080 return ret; 4088 return ret;
4081} 4089}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a2c03a175009..41fb81e7ec53 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -336,6 +336,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
336struct bio * 336struct bio *
337btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 337btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
338 gfp_t gfp_flags); 338 gfp_t gfp_flags);
339struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
340struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
339 341
340struct btrfs_fs_info; 342struct btrfs_fs_info;
341 343
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ecca6c7375a6..e53009657f0e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -197,30 +197,32 @@ int create_free_space_inode(struct btrfs_root *root,
197 block_group->key.objectid); 197 block_group->key.objectid);
198} 198}
199 199
200int btrfs_truncate_free_space_cache(struct btrfs_root *root, 200int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
201 struct btrfs_trans_handle *trans, 201 struct btrfs_block_rsv *rsv)
202 struct btrfs_path *path,
203 struct inode *inode)
204{ 202{
205 struct btrfs_block_rsv *rsv;
206 u64 needed_bytes; 203 u64 needed_bytes;
207 loff_t oldsize; 204 int ret;
208 int ret = 0;
209
210 rsv = trans->block_rsv;
211 trans->block_rsv = &root->fs_info->global_block_rsv;
212 205
213 /* 1 for slack space, 1 for updating the inode */ 206 /* 1 for slack space, 1 for updating the inode */
214 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + 207 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
215 btrfs_calc_trans_metadata_size(root, 1); 208 btrfs_calc_trans_metadata_size(root, 1);
216 209
217 spin_lock(&trans->block_rsv->lock); 210 spin_lock(&rsv->lock);
218 if (trans->block_rsv->reserved < needed_bytes) { 211 if (rsv->reserved < needed_bytes)
219 spin_unlock(&trans->block_rsv->lock); 212 ret = -ENOSPC;
220 trans->block_rsv = rsv; 213 else
221 return -ENOSPC; 214 ret = 0;
222 } 215 spin_unlock(&rsv->lock);
223 spin_unlock(&trans->block_rsv->lock); 216 return 0;
217}
218
219int btrfs_truncate_free_space_cache(struct btrfs_root *root,
220 struct btrfs_trans_handle *trans,
221 struct btrfs_path *path,
222 struct inode *inode)
223{
224 loff_t oldsize;
225 int ret = 0;
224 226
225 oldsize = i_size_read(inode); 227 oldsize = i_size_read(inode);
226 btrfs_i_size_write(inode, 0); 228 btrfs_i_size_write(inode, 0);
@@ -232,9 +234,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
232 */ 234 */
233 ret = btrfs_truncate_inode_items(trans, root, inode, 235 ret = btrfs_truncate_inode_items(trans, root, inode,
234 0, BTRFS_EXTENT_DATA_KEY); 236 0, BTRFS_EXTENT_DATA_KEY);
235
236 if (ret) { 237 if (ret) {
237 trans->block_rsv = rsv;
238 btrfs_abort_transaction(trans, root, ret); 238 btrfs_abort_transaction(trans, root, ret);
239 return ret; 239 return ret;
240 } 240 }
@@ -242,7 +242,6 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
242 ret = btrfs_update_inode(trans, root, inode); 242 ret = btrfs_update_inode(trans, root, inode);
243 if (ret) 243 if (ret)
244 btrfs_abort_transaction(trans, root, ret); 244 btrfs_abort_transaction(trans, root, ret);
245 trans->block_rsv = rsv;
246 245
247 return ret; 246 return ret;
248} 247}
@@ -920,10 +919,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
920 919
921 /* Make sure we can fit our crcs into the first page */ 920 /* Make sure we can fit our crcs into the first page */
922 if (io_ctl.check_crcs && 921 if (io_ctl.check_crcs &&
923 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { 922 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
924 WARN_ON(1);
925 goto out_nospc; 923 goto out_nospc;
926 }
927 924
928 io_ctl_set_generation(&io_ctl, trans->transid); 925 io_ctl_set_generation(&io_ctl, trans->transid);
929 926
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 4dc17d8809c7..8b7f19f44961 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -54,6 +54,8 @@ int create_free_space_inode(struct btrfs_root *root,
54 struct btrfs_block_group_cache *block_group, 54 struct btrfs_block_group_cache *block_group,
55 struct btrfs_path *path); 55 struct btrfs_path *path);
56 56
57int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
58 struct btrfs_block_rsv *rsv);
57int btrfs_truncate_free_space_cache(struct btrfs_root *root, 59int btrfs_truncate_free_space_cache(struct btrfs_root *root,
58 struct btrfs_trans_handle *trans, 60 struct btrfs_trans_handle *trans,
59 struct btrfs_path *path, 61 struct btrfs_path *path,
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index d26f67a59e36..2c66ddbbe670 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -429,11 +429,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
429 num_bytes = trans->bytes_reserved; 429 num_bytes = trans->bytes_reserved;
430 /* 430 /*
431 * 1 item for inode item insertion if need 431 * 1 item for inode item insertion if need
432 * 3 items for inode item update (in the worst case) 432 * 4 items for inode item update (in the worst case)
433 * 1 items for slack space if we need do truncation
433 * 1 item for free space object 434 * 1 item for free space object
434 * 3 items for pre-allocation 435 * 3 items for pre-allocation
435 */ 436 */
436 trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8); 437 trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 10);
437 ret = btrfs_block_rsv_add(root, trans->block_rsv, 438 ret = btrfs_block_rsv_add(root, trans->block_rsv,
438 trans->bytes_reserved, 439 trans->bytes_reserved,
439 BTRFS_RESERVE_NO_FLUSH); 440 BTRFS_RESERVE_NO_FLUSH);
@@ -468,7 +469,8 @@ again:
468 if (i_size_read(inode) > 0) { 469 if (i_size_read(inode) > 0) {
469 ret = btrfs_truncate_free_space_cache(root, trans, path, inode); 470 ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
470 if (ret) { 471 if (ret) {
471 btrfs_abort_transaction(trans, root, ret); 472 if (ret != -ENOSPC)
473 btrfs_abort_transaction(trans, root, ret);
472 goto out_put; 474 goto out_put;
473 } 475 }
474 } 476 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9b31b3b091fc..af978f7682b3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -715,8 +715,10 @@ retry:
715 async_extent->ram_size - 1, 0); 715 async_extent->ram_size - 1, 0);
716 716
717 em = alloc_extent_map(); 717 em = alloc_extent_map();
718 if (!em) 718 if (!em) {
719 ret = -ENOMEM;
719 goto out_free_reserve; 720 goto out_free_reserve;
721 }
720 em->start = async_extent->start; 722 em->start = async_extent->start;
721 em->len = async_extent->ram_size; 723 em->len = async_extent->ram_size;
722 em->orig_start = em->start; 724 em->orig_start = em->start;
@@ -923,8 +925,10 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
923 } 925 }
924 926
925 em = alloc_extent_map(); 927 em = alloc_extent_map();
926 if (!em) 928 if (!em) {
929 ret = -ENOMEM;
927 goto out_reserve; 930 goto out_reserve;
931 }
928 em->start = start; 932 em->start = start;
929 em->orig_start = em->start; 933 em->orig_start = em->start;
930 ram_size = ins.offset; 934 ram_size = ins.offset;
@@ -4724,6 +4728,7 @@ void btrfs_evict_inode(struct inode *inode)
4724 btrfs_end_transaction(trans, root); 4728 btrfs_end_transaction(trans, root);
4725 btrfs_btree_balance_dirty(root); 4729 btrfs_btree_balance_dirty(root);
4726no_delete: 4730no_delete:
4731 btrfs_remove_delayed_node(inode);
4727 clear_inode(inode); 4732 clear_inode(inode);
4728 return; 4733 return;
4729} 4734}
@@ -4839,14 +4844,13 @@ static void inode_tree_add(struct inode *inode)
4839 struct rb_node **p; 4844 struct rb_node **p;
4840 struct rb_node *parent; 4845 struct rb_node *parent;
4841 u64 ino = btrfs_ino(inode); 4846 u64 ino = btrfs_ino(inode);
4842again:
4843 p = &root->inode_tree.rb_node;
4844 parent = NULL;
4845 4847
4846 if (inode_unhashed(inode)) 4848 if (inode_unhashed(inode))
4847 return; 4849 return;
4848 4850again:
4851 parent = NULL;
4849 spin_lock(&root->inode_lock); 4852 spin_lock(&root->inode_lock);
4853 p = &root->inode_tree.rb_node;
4850 while (*p) { 4854 while (*p) {
4851 parent = *p; 4855 parent = *p;
4852 entry = rb_entry(parent, struct btrfs_inode, rb_node); 4856 entry = rb_entry(parent, struct btrfs_inode, rb_node);
@@ -6928,7 +6932,11 @@ struct btrfs_dio_private {
6928 /* IO errors */ 6932 /* IO errors */
6929 int errors; 6933 int errors;
6930 6934
6935 /* orig_bio is our btrfs_io_bio */
6931 struct bio *orig_bio; 6936 struct bio *orig_bio;
6937
6938 /* dio_bio came from fs/direct-io.c */
6939 struct bio *dio_bio;
6932}; 6940};
6933 6941
6934static void btrfs_endio_direct_read(struct bio *bio, int err) 6942static void btrfs_endio_direct_read(struct bio *bio, int err)
@@ -6938,6 +6946,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6938 struct bio_vec *bvec = bio->bi_io_vec; 6946 struct bio_vec *bvec = bio->bi_io_vec;
6939 struct inode *inode = dip->inode; 6947 struct inode *inode = dip->inode;
6940 struct btrfs_root *root = BTRFS_I(inode)->root; 6948 struct btrfs_root *root = BTRFS_I(inode)->root;
6949 struct bio *dio_bio;
6941 u64 start; 6950 u64 start;
6942 6951
6943 start = dip->logical_offset; 6952 start = dip->logical_offset;
@@ -6977,14 +6986,15 @@ failed:
6977 6986
6978 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 6987 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
6979 dip->logical_offset + dip->bytes - 1); 6988 dip->logical_offset + dip->bytes - 1);
6980 bio->bi_private = dip->private; 6989 dio_bio = dip->dio_bio;
6981 6990
6982 kfree(dip); 6991 kfree(dip);
6983 6992
6984 /* If we had a csum failure make sure to clear the uptodate flag */ 6993 /* If we had a csum failure make sure to clear the uptodate flag */
6985 if (err) 6994 if (err)
6986 clear_bit(BIO_UPTODATE, &bio->bi_flags); 6995 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
6987 dio_end_io(bio, err); 6996 dio_end_io(dio_bio, err);
6997 bio_put(bio);
6988} 6998}
6989 6999
6990static void btrfs_endio_direct_write(struct bio *bio, int err) 7000static void btrfs_endio_direct_write(struct bio *bio, int err)
@@ -6995,6 +7005,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
6995 struct btrfs_ordered_extent *ordered = NULL; 7005 struct btrfs_ordered_extent *ordered = NULL;
6996 u64 ordered_offset = dip->logical_offset; 7006 u64 ordered_offset = dip->logical_offset;
6997 u64 ordered_bytes = dip->bytes; 7007 u64 ordered_bytes = dip->bytes;
7008 struct bio *dio_bio;
6998 int ret; 7009 int ret;
6999 7010
7000 if (err) 7011 if (err)
@@ -7022,14 +7033,15 @@ out_test:
7022 goto again; 7033 goto again;
7023 } 7034 }
7024out_done: 7035out_done:
7025 bio->bi_private = dip->private; 7036 dio_bio = dip->dio_bio;
7026 7037
7027 kfree(dip); 7038 kfree(dip);
7028 7039
7029 /* If we had an error make sure to clear the uptodate flag */ 7040 /* If we had an error make sure to clear the uptodate flag */
7030 if (err) 7041 if (err)
7031 clear_bit(BIO_UPTODATE, &bio->bi_flags); 7042 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
7032 dio_end_io(bio, err); 7043 dio_end_io(dio_bio, err);
7044 bio_put(bio);
7033} 7045}
7034 7046
7035static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, 7047static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
@@ -7065,10 +7077,10 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
7065 if (!atomic_dec_and_test(&dip->pending_bios)) 7077 if (!atomic_dec_and_test(&dip->pending_bios))
7066 goto out; 7078 goto out;
7067 7079
7068 if (dip->errors) 7080 if (dip->errors) {
7069 bio_io_error(dip->orig_bio); 7081 bio_io_error(dip->orig_bio);
7070 else { 7082 } else {
7071 set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags); 7083 set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
7072 bio_endio(dip->orig_bio, 0); 7084 bio_endio(dip->orig_bio, 0);
7073 } 7085 }
7074out: 7086out:
@@ -7243,25 +7255,34 @@ out_err:
7243 return 0; 7255 return 0;
7244} 7256}
7245 7257
7246static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, 7258static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7247 loff_t file_offset) 7259 struct inode *inode, loff_t file_offset)
7248{ 7260{
7249 struct btrfs_root *root = BTRFS_I(inode)->root; 7261 struct btrfs_root *root = BTRFS_I(inode)->root;
7250 struct btrfs_dio_private *dip; 7262 struct btrfs_dio_private *dip;
7251 struct bio_vec *bvec = bio->bi_io_vec; 7263 struct bio_vec *bvec = dio_bio->bi_io_vec;
7264 struct bio *io_bio;
7252 int skip_sum; 7265 int skip_sum;
7253 int write = rw & REQ_WRITE; 7266 int write = rw & REQ_WRITE;
7254 int ret = 0; 7267 int ret = 0;
7255 7268
7256 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 7269 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
7257 7270
7271 io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
7272
7273 if (!io_bio) {
7274 ret = -ENOMEM;
7275 goto free_ordered;
7276 }
7277
7258 dip = kmalloc(sizeof(*dip), GFP_NOFS); 7278 dip = kmalloc(sizeof(*dip), GFP_NOFS);
7259 if (!dip) { 7279 if (!dip) {
7260 ret = -ENOMEM; 7280 ret = -ENOMEM;
7261 goto free_ordered; 7281 goto free_io_bio;
7262 } 7282 }
7263 7283
7264 dip->private = bio->bi_private; 7284 dip->private = dio_bio->bi_private;
7285 io_bio->bi_private = dio_bio->bi_private;
7265 dip->inode = inode; 7286 dip->inode = inode;
7266 dip->logical_offset = file_offset; 7287 dip->logical_offset = file_offset;
7267 7288
@@ -7269,22 +7290,27 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
7269 do { 7290 do {
7270 dip->bytes += bvec->bv_len; 7291 dip->bytes += bvec->bv_len;
7271 bvec++; 7292 bvec++;
7272 } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1)); 7293 } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1));
7273 7294
7274 dip->disk_bytenr = (u64)bio->bi_sector << 9; 7295 dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
7275 bio->bi_private = dip; 7296 io_bio->bi_private = dip;
7276 dip->errors = 0; 7297 dip->errors = 0;
7277 dip->orig_bio = bio; 7298 dip->orig_bio = io_bio;
7299 dip->dio_bio = dio_bio;
7278 atomic_set(&dip->pending_bios, 0); 7300 atomic_set(&dip->pending_bios, 0);
7279 7301
7280 if (write) 7302 if (write)
7281 bio->bi_end_io = btrfs_endio_direct_write; 7303 io_bio->bi_end_io = btrfs_endio_direct_write;
7282 else 7304 else
7283 bio->bi_end_io = btrfs_endio_direct_read; 7305 io_bio->bi_end_io = btrfs_endio_direct_read;
7284 7306
7285 ret = btrfs_submit_direct_hook(rw, dip, skip_sum); 7307 ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
7286 if (!ret) 7308 if (!ret)
7287 return; 7309 return;
7310
7311free_io_bio:
7312 bio_put(io_bio);
7313
7288free_ordered: 7314free_ordered:
7289 /* 7315 /*
7290 * If this is a write, we need to clean up the reserved space and kill 7316 * If this is a write, we need to clean up the reserved space and kill
@@ -7300,7 +7326,7 @@ free_ordered:
7300 btrfs_put_ordered_extent(ordered); 7326 btrfs_put_ordered_extent(ordered);
7301 btrfs_put_ordered_extent(ordered); 7327 btrfs_put_ordered_extent(ordered);
7302 } 7328 }
7303 bio_endio(bio, ret); 7329 bio_endio(dio_bio, ret);
7304} 7330}
7305 7331
7306static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, 7332static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
@@ -7979,7 +8005,6 @@ void btrfs_destroy_inode(struct inode *inode)
7979 inode_tree_del(inode); 8005 inode_tree_del(inode);
7980 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 8006 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
7981free: 8007free:
7982 btrfs_remove_delayed_node(inode);
7983 call_rcu(&inode->i_rcu, btrfs_i_callback); 8008 call_rcu(&inode->i_rcu, btrfs_i_callback);
7984} 8009}
7985 8010
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0de4a2fcfb24..0f81d67cdc8d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1801,7 +1801,11 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1801 item_off = btrfs_item_ptr_offset(leaf, i); 1801 item_off = btrfs_item_ptr_offset(leaf, i);
1802 item_len = btrfs_item_size_nr(leaf, i); 1802 item_len = btrfs_item_size_nr(leaf, i);
1803 1803
1804 if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) 1804 btrfs_item_key_to_cpu(leaf, key, i);
1805 if (!key_in_sk(key, sk))
1806 continue;
1807
1808 if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
1805 item_len = 0; 1809 item_len = 0;
1806 1810
1807 if (sizeof(sh) + item_len + *sk_offset > 1811 if (sizeof(sh) + item_len + *sk_offset >
@@ -1810,10 +1814,6 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1810 goto overflow; 1814 goto overflow;
1811 } 1815 }
1812 1816
1813 btrfs_item_key_to_cpu(leaf, key, i);
1814 if (!key_in_sk(key, sk))
1815 continue;
1816
1817 sh.objectid = key->objectid; 1817 sh.objectid = key->objectid;
1818 sh.offset = key->offset; 1818 sh.offset = key->offset;
1819 sh.type = key->type; 1819 sh.type = key->type;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 0740621daf6c..0525e1389f5b 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1050,7 +1050,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1050 } 1050 }
1051 1051
1052 /* put a new bio on the list */ 1052 /* put a new bio on the list */
1053 bio = bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1); 1053 bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
1054 if (!bio) 1054 if (!bio)
1055 return -ENOMEM; 1055 return -ENOMEM;
1056 1056
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 704a1b8d2a2b..395b82031a42 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1773,7 +1773,7 @@ again:
1773 if (!eb || !extent_buffer_uptodate(eb)) { 1773 if (!eb || !extent_buffer_uptodate(eb)) {
1774 ret = (!eb) ? -ENOMEM : -EIO; 1774 ret = (!eb) ? -ENOMEM : -EIO;
1775 free_extent_buffer(eb); 1775 free_extent_buffer(eb);
1776 return ret; 1776 break;
1777 } 1777 }
1778 btrfs_tree_lock(eb); 1778 btrfs_tree_lock(eb);
1779 if (cow) { 1779 if (cow) {
@@ -3350,6 +3350,11 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3350 } 3350 }
3351 3351
3352truncate: 3352truncate:
3353 ret = btrfs_check_trunc_cache_free_space(root,
3354 &fs_info->global_block_rsv);
3355 if (ret)
3356 goto out;
3357
3353 path = btrfs_alloc_path(); 3358 path = btrfs_alloc_path();
3354 if (!path) { 3359 if (!path) {
3355 ret = -ENOMEM; 3360 ret = -ENOMEM;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f489e24659a4..79bd479317cb 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1296,7 +1296,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1296 } 1296 }
1297 1297
1298 WARN_ON(!page->page); 1298 WARN_ON(!page->page);
1299 bio = bio_alloc(GFP_NOFS, 1); 1299 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1300 if (!bio) { 1300 if (!bio) {
1301 page->io_error = 1; 1301 page->io_error = 1;
1302 sblock->no_io_error_seen = 0; 1302 sblock->no_io_error_seen = 0;
@@ -1431,7 +1431,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1431 return -EIO; 1431 return -EIO;
1432 } 1432 }
1433 1433
1434 bio = bio_alloc(GFP_NOFS, 1); 1434 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1435 if (!bio) 1435 if (!bio)
1436 return -EIO; 1436 return -EIO;
1437 bio->bi_bdev = page_bad->dev->bdev; 1437 bio->bi_bdev = page_bad->dev->bdev;
@@ -1522,7 +1522,7 @@ again:
1522 sbio->dev = wr_ctx->tgtdev; 1522 sbio->dev = wr_ctx->tgtdev;
1523 bio = sbio->bio; 1523 bio = sbio->bio;
1524 if (!bio) { 1524 if (!bio) {
1525 bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); 1525 bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
1526 if (!bio) { 1526 if (!bio) {
1527 mutex_unlock(&wr_ctx->wr_lock); 1527 mutex_unlock(&wr_ctx->wr_lock);
1528 return -ENOMEM; 1528 return -ENOMEM;
@@ -1930,7 +1930,7 @@ again:
1930 sbio->dev = spage->dev; 1930 sbio->dev = spage->dev;
1931 bio = sbio->bio; 1931 bio = sbio->bio;
1932 if (!bio) { 1932 if (!bio) {
1933 bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); 1933 bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
1934 if (!bio) 1934 if (!bio)
1935 return -ENOMEM; 1935 return -ENOMEM;
1936 sbio->bio = bio; 1936 sbio->bio = bio;
@@ -3307,7 +3307,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
3307 "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n"); 3307 "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
3308 return -EIO; 3308 return -EIO;
3309 } 3309 }
3310 bio = bio_alloc(GFP_NOFS, 1); 3310 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
3311 if (!bio) { 3311 if (!bio) {
3312 spin_lock(&sctx->stat_lock); 3312 spin_lock(&sctx->stat_lock);
3313 sctx->stat.malloc_errors++; 3313 sctx->stat.malloc_errors++;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a4807ced23cc..f0857e092a3c 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1263,6 +1263,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1263 1263
1264 btrfs_dev_replace_suspend_for_unmount(fs_info); 1264 btrfs_dev_replace_suspend_for_unmount(fs_info);
1265 btrfs_scrub_cancel(fs_info); 1265 btrfs_scrub_cancel(fs_info);
1266 btrfs_pause_balance(fs_info);
1266 1267
1267 ret = btrfs_commit_super(root); 1268 ret = btrfs_commit_super(root);
1268 if (ret) 1269 if (ret)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e925ced971b..8bffb9174afb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3120,14 +3120,13 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3120 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; 3120 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
3121 if (num_devices == 1) 3121 if (num_devices == 1)
3122 allowed |= BTRFS_BLOCK_GROUP_DUP; 3122 allowed |= BTRFS_BLOCK_GROUP_DUP;
3123 else if (num_devices < 4) 3123 else if (num_devices > 1)
3124 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); 3124 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
3125 else 3125 if (num_devices > 2)
3126 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | 3126 allowed |= BTRFS_BLOCK_GROUP_RAID5;
3127 BTRFS_BLOCK_GROUP_RAID10 | 3127 if (num_devices > 3)
3128 BTRFS_BLOCK_GROUP_RAID5 | 3128 allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
3129 BTRFS_BLOCK_GROUP_RAID6); 3129 BTRFS_BLOCK_GROUP_RAID6);
3130
3131 if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && 3130 if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3132 (!alloc_profile_is_valid(bctl->data.target, 1) || 3131 (!alloc_profile_is_valid(bctl->data.target, 1) ||
3133 (bctl->data.target & ~allowed))) { 3132 (bctl->data.target & ~allowed))) {
@@ -5019,42 +5018,16 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
5019 return 0; 5018 return 0;
5020} 5019}
5021 5020
5022static void *merge_stripe_index_into_bio_private(void *bi_private,
5023 unsigned int stripe_index)
5024{
5025 /*
5026 * with single, dup, RAID0, RAID1 and RAID10, stripe_index is
5027 * at most 1.
5028 * The alternative solution (instead of stealing bits from the
5029 * pointer) would be to allocate an intermediate structure
5030 * that contains the old private pointer plus the stripe_index.
5031 */
5032 BUG_ON((((uintptr_t)bi_private) & 3) != 0);
5033 BUG_ON(stripe_index > 3);
5034 return (void *)(((uintptr_t)bi_private) | stripe_index);
5035}
5036
5037static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private)
5038{
5039 return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3));
5040}
5041
5042static unsigned int extract_stripe_index_from_bio_private(void *bi_private)
5043{
5044 return (unsigned int)((uintptr_t)bi_private) & 3;
5045}
5046
5047static void btrfs_end_bio(struct bio *bio, int err) 5021static void btrfs_end_bio(struct bio *bio, int err)
5048{ 5022{
5049 struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private); 5023 struct btrfs_bio *bbio = bio->bi_private;
5050 int is_orig_bio = 0; 5024 int is_orig_bio = 0;
5051 5025
5052 if (err) { 5026 if (err) {
5053 atomic_inc(&bbio->error); 5027 atomic_inc(&bbio->error);
5054 if (err == -EIO || err == -EREMOTEIO) { 5028 if (err == -EIO || err == -EREMOTEIO) {
5055 unsigned int stripe_index = 5029 unsigned int stripe_index =
5056 extract_stripe_index_from_bio_private( 5030 btrfs_io_bio(bio)->stripe_index;
5057 bio->bi_private);
5058 struct btrfs_device *dev; 5031 struct btrfs_device *dev;
5059 5032
5060 BUG_ON(stripe_index >= bbio->num_stripes); 5033 BUG_ON(stripe_index >= bbio->num_stripes);
@@ -5084,8 +5057,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
5084 } 5057 }
5085 bio->bi_private = bbio->private; 5058 bio->bi_private = bbio->private;
5086 bio->bi_end_io = bbio->end_io; 5059 bio->bi_end_io = bbio->end_io;
5087 bio->bi_bdev = (struct block_device *) 5060 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
5088 (unsigned long)bbio->mirror_num;
5089 /* only send an error to the higher layers if it is 5061 /* only send an error to the higher layers if it is
5090 * beyond the tolerance of the btrfs bio 5062 * beyond the tolerance of the btrfs bio
5091 */ 5063 */
@@ -5211,8 +5183,7 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
5211 struct btrfs_device *dev = bbio->stripes[dev_nr].dev; 5183 struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
5212 5184
5213 bio->bi_private = bbio; 5185 bio->bi_private = bbio;
5214 bio->bi_private = merge_stripe_index_into_bio_private( 5186 btrfs_io_bio(bio)->stripe_index = dev_nr;
5215 bio->bi_private, (unsigned int)dev_nr);
5216 bio->bi_end_io = btrfs_end_bio; 5187 bio->bi_end_io = btrfs_end_bio;
5217 bio->bi_sector = physical >> 9; 5188 bio->bi_sector = physical >> 9;
5218#ifdef DEBUG 5189#ifdef DEBUG
@@ -5273,8 +5244,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
5273 if (atomic_dec_and_test(&bbio->stripes_pending)) { 5244 if (atomic_dec_and_test(&bbio->stripes_pending)) {
5274 bio->bi_private = bbio->private; 5245 bio->bi_private = bbio->private;
5275 bio->bi_end_io = bbio->end_io; 5246 bio->bi_end_io = bbio->end_io;
5276 bio->bi_bdev = (struct block_device *) 5247 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
5277 (unsigned long)bbio->mirror_num;
5278 bio->bi_sector = logical >> 9; 5248 bio->bi_sector = logical >> 9;
5279 kfree(bbio); 5249 kfree(bbio);
5280 bio_endio(bio, -EIO); 5250 bio_endio(bio, -EIO);
@@ -5352,7 +5322,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5352 } 5322 }
5353 5323
5354 if (dev_nr < total_devs - 1) { 5324 if (dev_nr < total_devs - 1) {
5355 bio = bio_clone(first_bio, GFP_NOFS); 5325 bio = btrfs_bio_clone(first_bio, GFP_NOFS);
5356 BUG_ON(!bio); /* -ENOMEM */ 5326 BUG_ON(!bio); /* -ENOMEM */
5357 } else { 5327 } else {
5358 bio = first_bio; 5328 bio = first_bio;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 845ccbb0d2e3..f6247e2a47f7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -152,6 +152,26 @@ struct btrfs_fs_devices {
152 int rotating; 152 int rotating;
153}; 153};
154 154
155/*
156 * we need the mirror number and stripe index to be passed around
157 * the call chain while we are processing end_io (especially errors).
158 * Really, what we need is a btrfs_bio structure that has this info
159 * and is properly sized with its stripe array, but we're not there
160 * quite yet. We have our own btrfs bioset, and all of the bios
161 * we allocate are actually btrfs_io_bios. We'll cram as much of
162 * struct btrfs_bio as we can into this over time.
163 */
164struct btrfs_io_bio {
165 unsigned long mirror_num;
166 unsigned long stripe_index;
167 struct bio bio;
168};
169
170static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio)
171{
172 return container_of(bio, struct btrfs_io_bio, bio);
173}
174
155struct btrfs_bio_stripe { 175struct btrfs_bio_stripe {
156 struct btrfs_device *dev; 176 struct btrfs_device *dev;
157 u64 physical; 177 u64 physical;