aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fusionio.com>2013-06-27 13:22:46 -0400
committerJosef Bacik <jbacik@fusionio.com>2013-07-02 11:50:53 -0400
commit6df9a95e63395f595d0d1eb5d561dd6c91c40270 (patch)
tree4636de10454ab03afac3a0d33fdc82e9dbeb44b8 /fs
parent68a7342c51c950428d90cd15da898c63d6c33267 (diff)
Btrfs: make the chunk allocator completely tree lockless
When adjusting the enospc rules for relocation I ran into a deadlock because we were relocating the only system chunk and that forced us to try and allocate a new system chunk while holding locks in the chunk tree, which caused us to deadlock. To fix this I've moved all of the dev extent addition and chunk addition out to the delayed chunk completion stuff. We still keep the in-memory stuff which makes sure everything is consistent. One change I had to make was to search the commit root of the device tree to find a free dev extent, and hold onto any chunk em's that we allocated in that transaction so we do not allocate the same dev extent twice. This has the side effect of fixing a bug with balance that has been there ever since balance existed. Basically you can free a block group and it's dev extent and then immediately allocate that dev extent for a new block group and write stuff to that dev extent, all within the same transaction. So if you happen to crash during a balance you could come back to a completely broken file system. This patch should keep these sort of things from happening in the future since we won't be able to allocate free'd dev extents until after the transaction commits. This has passed all of the xfstests and my super annoying stress test followed by a balance. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent-tree.c15
-rw-r--r--fs/btrfs/transaction.c9
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/volumes.c304
-rw-r--r--fs/btrfs/volumes.h6
5 files changed, 166 insertions, 169 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 11ba82e43e8b..0236de711989 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7950,6 +7950,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
7950 struct btrfs_space_info *space_info; 7950 struct btrfs_space_info *space_info;
7951 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 7951 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
7952 struct btrfs_device *device; 7952 struct btrfs_device *device;
7953 struct btrfs_trans_handle *trans;
7953 u64 min_free; 7954 u64 min_free;
7954 u64 dev_min = 1; 7955 u64 dev_min = 1;
7955 u64 dev_nr = 0; 7956 u64 dev_nr = 0;
@@ -8036,6 +8037,13 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
8036 do_div(min_free, dev_min); 8037 do_div(min_free, dev_min);
8037 } 8038 }
8038 8039
8040 /* We need to do this so that we can look at pending chunks */
8041 trans = btrfs_join_transaction(root);
8042 if (IS_ERR(trans)) {
8043 ret = PTR_ERR(trans);
8044 goto out;
8045 }
8046
8039 mutex_lock(&root->fs_info->chunk_mutex); 8047 mutex_lock(&root->fs_info->chunk_mutex);
8040 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 8048 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
8041 u64 dev_offset; 8049 u64 dev_offset;
@@ -8046,7 +8054,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
8046 */ 8054 */
8047 if (device->total_bytes > device->bytes_used + min_free && 8055 if (device->total_bytes > device->bytes_used + min_free &&
8048 !device->is_tgtdev_for_dev_replace) { 8056 !device->is_tgtdev_for_dev_replace) {
8049 ret = find_free_dev_extent(device, min_free, 8057 ret = find_free_dev_extent(trans, device, min_free,
8050 &dev_offset, NULL); 8058 &dev_offset, NULL);
8051 if (!ret) 8059 if (!ret)
8052 dev_nr++; 8060 dev_nr++;
@@ -8058,6 +8066,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
8058 } 8066 }
8059 } 8067 }
8060 mutex_unlock(&root->fs_info->chunk_mutex); 8068 mutex_unlock(&root->fs_info->chunk_mutex);
8069 btrfs_end_transaction(trans, root);
8061out: 8070out:
8062 btrfs_put_block_group(block_group); 8071 btrfs_put_block_group(block_group);
8063 return ret; 8072 return ret;
@@ -8423,6 +8432,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
8423 sizeof(item)); 8432 sizeof(item));
8424 if (ret) 8433 if (ret)
8425 btrfs_abort_transaction(trans, extent_root, ret); 8434 btrfs_abort_transaction(trans, extent_root, ret);
8435 ret = btrfs_finish_chunk_alloc(trans, extent_root,
8436 key.objectid, key.offset);
8437 if (ret)
8438 btrfs_abort_transaction(trans, extent_root, ret);
8426 } 8439 }
8427} 8440}
8428 8441
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index bcfa32c91b5d..d58cce77fc6c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -63,6 +63,14 @@ static void put_transaction(struct btrfs_transaction *transaction)
63 if (atomic_dec_and_test(&transaction->use_count)) { 63 if (atomic_dec_and_test(&transaction->use_count)) {
64 BUG_ON(!list_empty(&transaction->list)); 64 BUG_ON(!list_empty(&transaction->list));
65 WARN_ON(transaction->delayed_refs.root.rb_node); 65 WARN_ON(transaction->delayed_refs.root.rb_node);
66 while (!list_empty(&transaction->pending_chunks)) {
67 struct extent_map *em;
68
69 em = list_first_entry(&transaction->pending_chunks,
70 struct extent_map, list);
71 list_del_init(&em->list);
72 free_extent_map(em);
73 }
66 kmem_cache_free(btrfs_transaction_cachep, transaction); 74 kmem_cache_free(btrfs_transaction_cachep, transaction);
67 } 75 }
68} 76}
@@ -202,6 +210,7 @@ loop:
202 210
203 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 211 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
204 INIT_LIST_HEAD(&cur_trans->ordered_operations); 212 INIT_LIST_HEAD(&cur_trans->ordered_operations);
213 INIT_LIST_HEAD(&cur_trans->pending_chunks);
205 list_add_tail(&cur_trans->list, &fs_info->trans_list); 214 list_add_tail(&cur_trans->list, &fs_info->trans_list);
206 extent_io_tree_init(&cur_trans->dirty_pages, 215 extent_io_tree_init(&cur_trans->dirty_pages,
207 fs_info->btree_inode->i_mapping); 216 fs_info->btree_inode->i_mapping);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 66d2a6ccbf05..005b0375d18c 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -56,6 +56,7 @@ struct btrfs_transaction {
56 wait_queue_head_t commit_wait; 56 wait_queue_head_t commit_wait;
57 struct list_head pending_snapshots; 57 struct list_head pending_snapshots;
58 struct list_head ordered_operations; 58 struct list_head ordered_operations;
59 struct list_head pending_chunks;
59 struct btrfs_delayed_ref_root delayed_refs; 60 struct btrfs_delayed_ref_root delayed_refs;
60 int aborted; 61 int aborted;
61}; 62};
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7789598eeb75..b2d1eacc07c9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -982,6 +982,35 @@ out:
982 return ret; 982 return ret;
983} 983}
984 984
985static int contains_pending_extent(struct btrfs_trans_handle *trans,
986 struct btrfs_device *device,
987 u64 *start, u64 len)
988{
989 struct extent_map *em;
990 int ret = 0;
991
992 list_for_each_entry(em, &trans->transaction->pending_chunks, list) {
993 struct map_lookup *map;
994 int i;
995
996 map = (struct map_lookup *)em->bdev;
997 for (i = 0; i < map->num_stripes; i++) {
998 if (map->stripes[i].dev != device)
999 continue;
1000 if (map->stripes[i].physical >= *start + len ||
1001 map->stripes[i].physical + em->orig_block_len <=
1002 *start)
1003 continue;
1004 *start = map->stripes[i].physical +
1005 em->orig_block_len;
1006 ret = 1;
1007 }
1008 }
1009
1010 return ret;
1011}
1012
1013
985/* 1014/*
986 * find_free_dev_extent - find free space in the specified device 1015 * find_free_dev_extent - find free space in the specified device
987 * @device: the device which we search the free space in 1016 * @device: the device which we search the free space in
@@ -1002,7 +1031,8 @@ out:
1002 * But if we don't find suitable free space, it is used to store the size of 1031 * But if we don't find suitable free space, it is used to store the size of
1003 * the max free space. 1032 * the max free space.
1004 */ 1033 */
1005int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, 1034int find_free_dev_extent(struct btrfs_trans_handle *trans,
1035 struct btrfs_device *device, u64 num_bytes,
1006 u64 *start, u64 *len) 1036 u64 *start, u64 *len)
1007{ 1037{
1008 struct btrfs_key key; 1038 struct btrfs_key key;
@@ -1026,21 +1056,22 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
1026 */ 1056 */
1027 search_start = max(root->fs_info->alloc_start, 1024ull * 1024); 1057 search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
1028 1058
1059 path = btrfs_alloc_path();
1060 if (!path)
1061 return -ENOMEM;
1062again:
1029 max_hole_start = search_start; 1063 max_hole_start = search_start;
1030 max_hole_size = 0; 1064 max_hole_size = 0;
1031 hole_size = 0; 1065 hole_size = 0;
1032 1066
1033 if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { 1067 if (search_start >= search_end || device->is_tgtdev_for_dev_replace) {
1034 ret = -ENOSPC; 1068 ret = -ENOSPC;
1035 goto error; 1069 goto out;
1036 } 1070 }
1037 1071
1038 path = btrfs_alloc_path();
1039 if (!path) {
1040 ret = -ENOMEM;
1041 goto error;
1042 }
1043 path->reada = 2; 1072 path->reada = 2;
1073 path->search_commit_root = 1;
1074 path->skip_locking = 1;
1044 1075
1045 key.objectid = device->devid; 1076 key.objectid = device->devid;
1046 key.offset = search_start; 1077 key.offset = search_start;
@@ -1081,6 +1112,15 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
1081 if (key.offset > search_start) { 1112 if (key.offset > search_start) {
1082 hole_size = key.offset - search_start; 1113 hole_size = key.offset - search_start;
1083 1114
1115 /*
1116 * Have to check before we set max_hole_start, otherwise
1117 * we could end up sending back this offset anyway.
1118 */
1119 if (contains_pending_extent(trans, device,
1120 &search_start,
1121 hole_size))
1122 hole_size = 0;
1123
1084 if (hole_size > max_hole_size) { 1124 if (hole_size > max_hole_size) {
1085 max_hole_start = search_start; 1125 max_hole_start = search_start;
1086 max_hole_size = hole_size; 1126 max_hole_size = hole_size;
@@ -1124,6 +1164,11 @@ next:
1124 max_hole_size = hole_size; 1164 max_hole_size = hole_size;
1125 } 1165 }
1126 1166
1167 if (contains_pending_extent(trans, device, &search_start, hole_size)) {
1168 btrfs_release_path(path);
1169 goto again;
1170 }
1171
1127 /* See above. */ 1172 /* See above. */
1128 if (hole_size < num_bytes) 1173 if (hole_size < num_bytes)
1129 ret = -ENOSPC; 1174 ret = -ENOSPC;
@@ -1132,7 +1177,6 @@ next:
1132 1177
1133out: 1178out:
1134 btrfs_free_path(path); 1179 btrfs_free_path(path);
1135error:
1136 *start = max_hole_start; 1180 *start = max_hole_start;
1137 if (len) 1181 if (len)
1138 *len = max_hole_size; 1182 *len = max_hole_size;
@@ -1244,47 +1288,22 @@ out:
1244 return ret; 1288 return ret;
1245} 1289}
1246 1290
1247static noinline int find_next_chunk(struct btrfs_root *root, 1291static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
1248 u64 objectid, u64 *offset)
1249{ 1292{
1250 struct btrfs_path *path; 1293 struct extent_map_tree *em_tree;
1251 int ret; 1294 struct extent_map *em;
1252 struct btrfs_key key; 1295 struct rb_node *n;
1253 struct btrfs_chunk *chunk; 1296 u64 ret = 0;
1254 struct btrfs_key found_key;
1255
1256 path = btrfs_alloc_path();
1257 if (!path)
1258 return -ENOMEM;
1259
1260 key.objectid = objectid;
1261 key.offset = (u64)-1;
1262 key.type = BTRFS_CHUNK_ITEM_KEY;
1263
1264 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1265 if (ret < 0)
1266 goto error;
1267
1268 BUG_ON(ret == 0); /* Corruption */
1269 1297
1270 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); 1298 em_tree = &fs_info->mapping_tree.map_tree;
1271 if (ret) { 1299 read_lock(&em_tree->lock);
1272 *offset = 0; 1300 n = rb_last(&em_tree->map);
1273 } else { 1301 if (n) {
1274 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1302 em = rb_entry(n, struct extent_map, rb_node);
1275 path->slots[0]); 1303 ret = em->start + em->len;
1276 if (found_key.objectid != objectid)
1277 *offset = 0;
1278 else {
1279 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
1280 struct btrfs_chunk);
1281 *offset = found_key.offset +
1282 btrfs_chunk_length(path->nodes[0], chunk);
1283 }
1284 } 1304 }
1285 ret = 0; 1305 read_unlock(&em_tree->lock);
1286error: 1306
1287 btrfs_free_path(path);
1288 return ret; 1307 return ret;
1289} 1308}
1290 1309
@@ -3666,10 +3685,8 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
3666} 3685}
3667 3686
3668static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 3687static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3669 struct btrfs_root *extent_root, 3688 struct btrfs_root *extent_root, u64 start,
3670 struct map_lookup **map_ret, 3689 u64 type)
3671 u64 *num_bytes_out, u64 *stripe_size_out,
3672 u64 start, u64 type)
3673{ 3690{
3674 struct btrfs_fs_info *info = extent_root->fs_info; 3691 struct btrfs_fs_info *info = extent_root->fs_info;
3675 struct btrfs_fs_devices *fs_devices = info->fs_devices; 3692 struct btrfs_fs_devices *fs_devices = info->fs_devices;
@@ -3776,7 +3793,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3776 if (total_avail == 0) 3793 if (total_avail == 0)
3777 continue; 3794 continue;
3778 3795
3779 ret = find_free_dev_extent(device, 3796 ret = find_free_dev_extent(trans, device,
3780 max_stripe_size * dev_stripes, 3797 max_stripe_size * dev_stripes,
3781 &dev_offset, &max_avail); 3798 &dev_offset, &max_avail);
3782 if (ret && ret != -ENOSPC) 3799 if (ret && ret != -ENOSPC)
@@ -3888,12 +3905,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3888 map->type = type; 3905 map->type = type;
3889 map->sub_stripes = sub_stripes; 3906 map->sub_stripes = sub_stripes;
3890 3907
3891 *map_ret = map;
3892 num_bytes = stripe_size * data_stripes; 3908 num_bytes = stripe_size * data_stripes;
3893 3909
3894 *stripe_size_out = stripe_size;
3895 *num_bytes_out = num_bytes;
3896
3897 trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes); 3910 trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes);
3898 3911
3899 em = alloc_extent_map(); 3912 em = alloc_extent_map();
@@ -3906,38 +3919,26 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3906 em->len = num_bytes; 3919 em->len = num_bytes;
3907 em->block_start = 0; 3920 em->block_start = 0;
3908 em->block_len = em->len; 3921 em->block_len = em->len;
3922 em->orig_block_len = stripe_size;
3909 3923
3910 em_tree = &extent_root->fs_info->mapping_tree.map_tree; 3924 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
3911 write_lock(&em_tree->lock); 3925 write_lock(&em_tree->lock);
3912 ret = add_extent_mapping(em_tree, em, 0); 3926 ret = add_extent_mapping(em_tree, em, 0);
3927 if (!ret) {
3928 list_add_tail(&em->list, &trans->transaction->pending_chunks);
3929 atomic_inc(&em->refs);
3930 }
3913 write_unlock(&em_tree->lock); 3931 write_unlock(&em_tree->lock);
3914 if (ret) { 3932 if (ret) {
3915 free_extent_map(em); 3933 free_extent_map(em);
3916 goto error; 3934 goto error;
3917 } 3935 }
3918 3936
3919 for (i = 0; i < map->num_stripes; ++i) {
3920 struct btrfs_device *device;
3921 u64 dev_offset;
3922
3923 device = map->stripes[i].dev;
3924 dev_offset = map->stripes[i].physical;
3925
3926 ret = btrfs_alloc_dev_extent(trans, device,
3927 info->chunk_root->root_key.objectid,
3928 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
3929 start, dev_offset, stripe_size);
3930 if (ret)
3931 goto error_dev_extent;
3932 }
3933
3934 ret = btrfs_make_block_group(trans, extent_root, 0, type, 3937 ret = btrfs_make_block_group(trans, extent_root, 0, type,
3935 BTRFS_FIRST_CHUNK_TREE_OBJECTID, 3938 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
3936 start, num_bytes); 3939 start, num_bytes);
3937 if (ret) { 3940 if (ret)
3938 i = map->num_stripes - 1; 3941 goto error_del_extent;
3939 goto error_dev_extent;
3940 }
3941 3942
3942 free_extent_map(em); 3943 free_extent_map(em);
3943 check_raid56_incompat_flag(extent_root->fs_info, type); 3944 check_raid56_incompat_flag(extent_root->fs_info, type);
@@ -3945,18 +3946,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3945 kfree(devices_info); 3946 kfree(devices_info);
3946 return 0; 3947 return 0;
3947 3948
3948error_dev_extent: 3949error_del_extent:
3949 for (; i >= 0; i--) {
3950 struct btrfs_device *device;
3951 int err;
3952
3953 device = map->stripes[i].dev;
3954 err = btrfs_free_dev_extent(trans, device, start);
3955 if (err) {
3956 btrfs_abort_transaction(trans, extent_root, err);
3957 break;
3958 }
3959 }
3960 write_lock(&em_tree->lock); 3950 write_lock(&em_tree->lock);
3961 remove_extent_mapping(em_tree, em); 3951 remove_extent_mapping(em_tree, em);
3962 write_unlock(&em_tree->lock); 3952 write_unlock(&em_tree->lock);
@@ -3971,33 +3961,68 @@ error:
3971 return ret; 3961 return ret;
3972} 3962}
3973 3963
3974static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, 3964int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
3975 struct btrfs_root *extent_root, 3965 struct btrfs_root *extent_root,
3976 struct map_lookup *map, u64 chunk_offset, 3966 u64 chunk_offset, u64 chunk_size)
3977 u64 chunk_size, u64 stripe_size)
3978{ 3967{
3979 u64 dev_offset;
3980 struct btrfs_key key; 3968 struct btrfs_key key;
3981 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; 3969 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
3982 struct btrfs_device *device; 3970 struct btrfs_device *device;
3983 struct btrfs_chunk *chunk; 3971 struct btrfs_chunk *chunk;
3984 struct btrfs_stripe *stripe; 3972 struct btrfs_stripe *stripe;
3985 size_t item_size = btrfs_chunk_item_size(map->num_stripes); 3973 struct extent_map_tree *em_tree;
3986 int index = 0; 3974 struct extent_map *em;
3975 struct map_lookup *map;
3976 size_t item_size;
3977 u64 dev_offset;
3978 u64 stripe_size;
3979 int i = 0;
3987 int ret; 3980 int ret;
3988 3981
3982 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
3983 read_lock(&em_tree->lock);
3984 em = lookup_extent_mapping(em_tree, chunk_offset, chunk_size);
3985 read_unlock(&em_tree->lock);
3986
3987 if (!em) {
3988 btrfs_crit(extent_root->fs_info, "unable to find logical "
3989 "%Lu len %Lu", chunk_offset, chunk_size);
3990 return -EINVAL;
3991 }
3992
3993 if (em->start != chunk_offset || em->len != chunk_size) {
3994 btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted"
3995 " %Lu-%Lu, found %Lu-%Lu\n", chunk_offset,
3996 chunk_size, em->start, em->len);
3997 free_extent_map(em);
3998 return -EINVAL;
3999 }
4000
4001 map = (struct map_lookup *)em->bdev;
4002 item_size = btrfs_chunk_item_size(map->num_stripes);
4003 stripe_size = em->orig_block_len;
4004
3989 chunk = kzalloc(item_size, GFP_NOFS); 4005 chunk = kzalloc(item_size, GFP_NOFS);
3990 if (!chunk) 4006 if (!chunk) {
3991 return -ENOMEM; 4007 ret = -ENOMEM;
4008 goto out;
4009 }
4010
4011 for (i = 0; i < map->num_stripes; i++) {
4012 device = map->stripes[i].dev;
4013 dev_offset = map->stripes[i].physical;
3992 4014
3993 index = 0;
3994 while (index < map->num_stripes) {
3995 device = map->stripes[index].dev;
3996 device->bytes_used += stripe_size; 4015 device->bytes_used += stripe_size;
3997 ret = btrfs_update_device(trans, device); 4016 ret = btrfs_update_device(trans, device);
3998 if (ret) 4017 if (ret)
3999 goto out_free; 4018 goto out;
4000 index++; 4019 ret = btrfs_alloc_dev_extent(trans, device,
4020 chunk_root->root_key.objectid,
4021 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
4022 chunk_offset, dev_offset,
4023 stripe_size);
4024 if (ret)
4025 goto out;
4001 } 4026 }
4002 4027
4003 spin_lock(&extent_root->fs_info->free_chunk_lock); 4028 spin_lock(&extent_root->fs_info->free_chunk_lock);
@@ -4005,17 +4030,15 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
4005 map->num_stripes); 4030 map->num_stripes);
4006 spin_unlock(&extent_root->fs_info->free_chunk_lock); 4031 spin_unlock(&extent_root->fs_info->free_chunk_lock);
4007 4032
4008 index = 0;
4009 stripe = &chunk->stripe; 4033 stripe = &chunk->stripe;
4010 while (index < map->num_stripes) { 4034 for (i = 0; i < map->num_stripes; i++) {
4011 device = map->stripes[index].dev; 4035 device = map->stripes[i].dev;
4012 dev_offset = map->stripes[index].physical; 4036 dev_offset = map->stripes[i].physical;
4013 4037
4014 btrfs_set_stack_stripe_devid(stripe, device->devid); 4038 btrfs_set_stack_stripe_devid(stripe, device->devid);
4015 btrfs_set_stack_stripe_offset(stripe, dev_offset); 4039 btrfs_set_stack_stripe_offset(stripe, dev_offset);
4016 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); 4040 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
4017 stripe++; 4041 stripe++;
4018 index++;
4019 } 4042 }
4020 4043
4021 btrfs_set_stack_chunk_length(chunk, chunk_size); 4044 btrfs_set_stack_chunk_length(chunk, chunk_size);
@@ -4033,7 +4056,6 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
4033 key.offset = chunk_offset; 4056 key.offset = chunk_offset;
4034 4057
4035 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size); 4058 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
4036
4037 if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) { 4059 if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
4038 /* 4060 /*
4039 * TODO: Cleanup of inserted chunk root in case of 4061 * TODO: Cleanup of inserted chunk root in case of
@@ -4043,8 +4065,9 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
4043 item_size); 4065 item_size);
4044 } 4066 }
4045 4067
4046out_free: 4068out:
4047 kfree(chunk); 4069 kfree(chunk);
4070 free_extent_map(em);
4048 return ret; 4071 return ret;
4049} 4072}
4050 4073
@@ -4059,27 +4082,9 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4059 struct btrfs_root *extent_root, u64 type) 4082 struct btrfs_root *extent_root, u64 type)
4060{ 4083{
4061 u64 chunk_offset; 4084 u64 chunk_offset;
4062 u64 chunk_size;
4063 u64 stripe_size;
4064 struct map_lookup *map;
4065 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
4066 int ret;
4067 4085
4068 ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, 4086 chunk_offset = find_next_chunk(extent_root->fs_info);
4069 &chunk_offset); 4087 return __btrfs_alloc_chunk(trans, extent_root, chunk_offset, type);
4070 if (ret)
4071 return ret;
4072
4073 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
4074 &stripe_size, chunk_offset, type);
4075 if (ret)
4076 return ret;
4077
4078 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
4079 chunk_size, stripe_size);
4080 if (ret)
4081 return ret;
4082 return 0;
4083} 4088}
4084 4089
4085static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, 4090static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
@@ -4088,66 +4093,31 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
4088{ 4093{
4089 u64 chunk_offset; 4094 u64 chunk_offset;
4090 u64 sys_chunk_offset; 4095 u64 sys_chunk_offset;
4091 u64 chunk_size;
4092 u64 sys_chunk_size;
4093 u64 stripe_size;
4094 u64 sys_stripe_size;
4095 u64 alloc_profile; 4096 u64 alloc_profile;
4096 struct map_lookup *map;
4097 struct map_lookup *sys_map;
4098 struct btrfs_fs_info *fs_info = root->fs_info; 4097 struct btrfs_fs_info *fs_info = root->fs_info;
4099 struct btrfs_root *extent_root = fs_info->extent_root; 4098 struct btrfs_root *extent_root = fs_info->extent_root;
4100 int ret; 4099 int ret;
4101 4100
4102 ret = find_next_chunk(fs_info->chunk_root, 4101 chunk_offset = find_next_chunk(fs_info);
4103 BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
4104 if (ret)
4105 return ret;
4106
4107 alloc_profile = btrfs_get_alloc_profile(extent_root, 0); 4102 alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
4108 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, 4103 ret = __btrfs_alloc_chunk(trans, extent_root, chunk_offset,
4109 &stripe_size, chunk_offset, alloc_profile); 4104 alloc_profile);
4110 if (ret) 4105 if (ret)
4111 return ret; 4106 return ret;
4112 4107
4113 sys_chunk_offset = chunk_offset + chunk_size; 4108 sys_chunk_offset = find_next_chunk(root->fs_info);
4114
4115 alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); 4109 alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
4116 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, 4110 ret = __btrfs_alloc_chunk(trans, extent_root, sys_chunk_offset,
4117 &sys_chunk_size, &sys_stripe_size, 4111 alloc_profile);
4118 sys_chunk_offset, alloc_profile);
4119 if (ret) { 4112 if (ret) {
4120 btrfs_abort_transaction(trans, root, ret); 4113 btrfs_abort_transaction(trans, root, ret);
4121 goto out; 4114 goto out;
4122 } 4115 }
4123 4116
4124 ret = btrfs_add_device(trans, fs_info->chunk_root, device); 4117 ret = btrfs_add_device(trans, fs_info->chunk_root, device);
4125 if (ret) {
4126 btrfs_abort_transaction(trans, root, ret);
4127 goto out;
4128 }
4129
4130 /*
4131 * Modifying chunk tree needs allocating new blocks from both
4132 * system block group and metadata block group. So we only can
4133 * do operations require modifying the chunk tree after both
4134 * block groups were created.
4135 */
4136 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
4137 chunk_size, stripe_size);
4138 if (ret) {
4139 btrfs_abort_transaction(trans, root, ret);
4140 goto out;
4141 }
4142
4143 ret = __finish_chunk_alloc(trans, extent_root, sys_map,
4144 sys_chunk_offset, sys_chunk_size,
4145 sys_stripe_size);
4146 if (ret) 4118 if (ret)
4147 btrfs_abort_transaction(trans, root, ret); 4119 btrfs_abort_transaction(trans, root, ret);
4148
4149out: 4120out:
4150
4151 return ret; 4121 return ret;
4152} 4122}
4153 4123
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 857acd34ccde..86705583480d 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -316,7 +316,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
316int btrfs_pause_balance(struct btrfs_fs_info *fs_info); 316int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
317int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); 317int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
318int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 318int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
319int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, 319int find_free_dev_extent(struct btrfs_trans_handle *trans,
320 struct btrfs_device *device, u64 num_bytes,
320 u64 *start, u64 *max_avail); 321 u64 *start, u64 *max_avail);
321void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); 322void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
322int btrfs_get_dev_stats(struct btrfs_root *root, 323int btrfs_get_dev_stats(struct btrfs_root *root,
@@ -337,6 +338,9 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
337unsigned long btrfs_full_stripe_len(struct btrfs_root *root, 338unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
338 struct btrfs_mapping_tree *map_tree, 339 struct btrfs_mapping_tree *map_tree,
339 u64 logical); 340 u64 logical);
341int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
342 struct btrfs_root *extent_root,
343 u64 chunk_offset, u64 chunk_size);
340static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, 344static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
341 int index) 345 int index)
342{ 346{