aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2014-11-17 15:45:48 -0500
committerChris Mason <clm@fb.com>2015-01-21 20:36:52 -0500
commitce93ec548cfa02f9cd6b70d546d5f36f4d160f57 (patch)
treeb6274b12d55c6ed97d3ad508e2cb5c428d74df30
parente7070be198b34c26f39bd9010a29ce6462dc4f3e (diff)
Btrfs: track dirty block groups on their own list
Currently any time we try to update the block groups on disk we will walk _all_ block groups and check for the ->dirty flag to see if it is set. This function can get called several times during a commit. So if you have several terabytes of data you will be a very sad panda as we will loop through _all_ of the block groups several times, which makes the commit take a while which slows down the rest of the file system operations. This patch introduces a dirty list for the block groups that we get added to when we dirty the block group for the first time. Then we simply update any block groups that have been dirtied since the last time we called btrfs_write_dirty_block_groups. This allows us to clean up how we write the free space cache out so it is much cleaner. Thanks, Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/extent-tree.c167
-rw-r--r--fs/btrfs/free-space-cache.c8
-rw-r--r--fs/btrfs/transaction.c14
-rw-r--r--fs/btrfs/transaction.h2
5 files changed, 72 insertions, 124 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 45ed4dc6a0ce..0b4683f560c8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1238,7 +1238,6 @@ enum btrfs_disk_cache_state {
1238 BTRFS_DC_ERROR = 1, 1238 BTRFS_DC_ERROR = 1,
1239 BTRFS_DC_CLEAR = 2, 1239 BTRFS_DC_CLEAR = 2,
1240 BTRFS_DC_SETUP = 3, 1240 BTRFS_DC_SETUP = 3,
1241 BTRFS_DC_NEED_WRITE = 4,
1242}; 1241};
1243 1242
1244struct btrfs_caching_control { 1243struct btrfs_caching_control {
@@ -1276,7 +1275,6 @@ struct btrfs_block_group_cache {
1276 unsigned long full_stripe_len; 1275 unsigned long full_stripe_len;
1277 1276
1278 unsigned int ro:1; 1277 unsigned int ro:1;
1279 unsigned int dirty:1;
1280 unsigned int iref:1; 1278 unsigned int iref:1;
1281 unsigned int has_caching_ctl:1; 1279 unsigned int has_caching_ctl:1;
1282 unsigned int removed:1; 1280 unsigned int removed:1;
@@ -1314,6 +1312,9 @@ struct btrfs_block_group_cache {
1314 struct list_head ro_list; 1312 struct list_head ro_list;
1315 1313
1316 atomic_t trimming; 1314 atomic_t trimming;
1315
1316 /* For dirty block groups */
1317 struct list_head dirty_list;
1317}; 1318};
1318 1319
1319/* delayed seq elem */ 1320/* delayed seq elem */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 15116585e714..21c373fe256c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -74,8 +74,9 @@ enum {
74 RESERVE_ALLOC_NO_ACCOUNT = 2, 74 RESERVE_ALLOC_NO_ACCOUNT = 2,
75}; 75};
76 76
77static int update_block_group(struct btrfs_root *root, 77static int update_block_group(struct btrfs_trans_handle *trans,
78 u64 bytenr, u64 num_bytes, int alloc); 78 struct btrfs_root *root, u64 bytenr,
79 u64 num_bytes, int alloc);
79static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 80static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
80 struct btrfs_root *root, 81 struct btrfs_root *root,
81 u64 bytenr, u64 num_bytes, u64 parent, 82 u64 bytenr, u64 num_bytes, u64 parent,
@@ -3315,120 +3316,42 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3315 struct btrfs_root *root) 3316 struct btrfs_root *root)
3316{ 3317{
3317 struct btrfs_block_group_cache *cache; 3318 struct btrfs_block_group_cache *cache;
3318 int err = 0; 3319 struct btrfs_transaction *cur_trans = trans->transaction;
3320 int ret = 0;
3319 struct btrfs_path *path; 3321 struct btrfs_path *path;
3320 u64 last = 0; 3322
3323 if (list_empty(&cur_trans->dirty_bgs))
3324 return 0;
3321 3325
3322 path = btrfs_alloc_path(); 3326 path = btrfs_alloc_path();
3323 if (!path) 3327 if (!path)
3324 return -ENOMEM; 3328 return -ENOMEM;
3325 3329
3326again: 3330 /*
3327 while (1) { 3331 * We don't need the lock here since we are protected by the transaction
3328 cache = btrfs_lookup_first_block_group(root->fs_info, last); 3332 * commit. We want to do the cache_save_setup first and then run the
3329 while (cache) { 3333 * delayed refs to make sure we have the best chance at doing this all
3330 if (cache->disk_cache_state == BTRFS_DC_CLEAR) 3334 * in one shot.
3331 break; 3335 */
3332 cache = next_block_group(root, cache); 3336 while (!list_empty(&cur_trans->dirty_bgs)) {
3333 } 3337 cache = list_first_entry(&cur_trans->dirty_bgs,
3334 if (!cache) { 3338 struct btrfs_block_group_cache,
3335 if (last == 0) 3339 dirty_list);
3336 break; 3340 list_del_init(&cache->dirty_list);
3337 last = 0; 3341 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3338 continue; 3342 cache_save_setup(cache, trans, path);
3339 } 3343 if (!ret)
3340 err = cache_save_setup(cache, trans, path); 3344 ret = btrfs_run_delayed_refs(trans, root,
3341 last = cache->key.objectid + cache->key.offset; 3345 (unsigned long) -1);
3342 btrfs_put_block_group(cache); 3346 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP)
3343 } 3347 btrfs_write_out_cache(root, trans, cache, path);
3344 3348 if (!ret)
3345 while (1) { 3349 ret = write_one_cache_group(trans, root, path, cache);
3346 if (last == 0) {
3347 err = btrfs_run_delayed_refs(trans, root,
3348 (unsigned long)-1);
3349 if (err) /* File system offline */
3350 goto out;
3351 }
3352
3353 cache = btrfs_lookup_first_block_group(root->fs_info, last);
3354 while (cache) {
3355 if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
3356 btrfs_put_block_group(cache);
3357 goto again;
3358 }
3359
3360 if (cache->dirty)
3361 break;
3362 cache = next_block_group(root, cache);
3363 }
3364 if (!cache) {
3365 if (last == 0)
3366 break;
3367 last = 0;
3368 continue;
3369 }
3370
3371 if (cache->disk_cache_state == BTRFS_DC_SETUP)
3372 cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
3373 cache->dirty = 0;
3374 last = cache->key.objectid + cache->key.offset;
3375
3376 err = write_one_cache_group(trans, root, path, cache);
3377 btrfs_put_block_group(cache);
3378 if (err) /* File system offline */
3379 goto out;
3380 }
3381
3382 while (1) {
3383 /*
3384 * I don't think this is needed since we're just marking our
3385 * preallocated extent as written, but just in case it can't
3386 * hurt.
3387 */
3388 if (last == 0) {
3389 err = btrfs_run_delayed_refs(trans, root,
3390 (unsigned long)-1);
3391 if (err) /* File system offline */
3392 goto out;
3393 }
3394
3395 cache = btrfs_lookup_first_block_group(root->fs_info, last);
3396 while (cache) {
3397 /*
3398 * Really this shouldn't happen, but it could if we
3399 * couldn't write the entire preallocated extent and
3400 * splitting the extent resulted in a new block.
3401 */
3402 if (cache->dirty) {
3403 btrfs_put_block_group(cache);
3404 goto again;
3405 }
3406 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
3407 break;
3408 cache = next_block_group(root, cache);
3409 }
3410 if (!cache) {
3411 if (last == 0)
3412 break;
3413 last = 0;
3414 continue;
3415 }
3416
3417 err = btrfs_write_out_cache(root, trans, cache, path);
3418
3419 /*
3420 * If we didn't have an error then the cache state is still
3421 * NEED_WRITE, so we can set it to WRITTEN.
3422 */
3423 if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
3424 cache->disk_cache_state = BTRFS_DC_WRITTEN;
3425 last = cache->key.objectid + cache->key.offset;
3426 btrfs_put_block_group(cache); 3350 btrfs_put_block_group(cache);
3427 } 3351 }
3428out:
3429 3352
3430 btrfs_free_path(path); 3353 btrfs_free_path(path);
3431 return err; 3354 return ret;
3432} 3355}
3433 3356
3434int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 3357int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@@ -5375,8 +5298,9 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
5375 btrfs_free_reserved_data_space(inode, num_bytes); 5298 btrfs_free_reserved_data_space(inode, num_bytes);
5376} 5299}
5377 5300
5378static int update_block_group(struct btrfs_root *root, 5301static int update_block_group(struct btrfs_trans_handle *trans,
5379 u64 bytenr, u64 num_bytes, int alloc) 5302 struct btrfs_root *root, u64 bytenr,
5303 u64 num_bytes, int alloc)
5380{ 5304{
5381 struct btrfs_block_group_cache *cache = NULL; 5305 struct btrfs_block_group_cache *cache = NULL;
5382 struct btrfs_fs_info *info = root->fs_info; 5306 struct btrfs_fs_info *info = root->fs_info;
@@ -5414,6 +5338,14 @@ static int update_block_group(struct btrfs_root *root,
5414 if (!alloc && cache->cached == BTRFS_CACHE_NO) 5338 if (!alloc && cache->cached == BTRFS_CACHE_NO)
5415 cache_block_group(cache, 1); 5339 cache_block_group(cache, 1);
5416 5340
5341 spin_lock(&trans->transaction->dirty_bgs_lock);
5342 if (list_empty(&cache->dirty_list)) {
5343 list_add_tail(&cache->dirty_list,
5344 &trans->transaction->dirty_bgs);
5345 btrfs_get_block_group(cache);
5346 }
5347 spin_unlock(&trans->transaction->dirty_bgs_lock);
5348
5417 byte_in_group = bytenr - cache->key.objectid; 5349 byte_in_group = bytenr - cache->key.objectid;
5418 WARN_ON(byte_in_group > cache->key.offset); 5350 WARN_ON(byte_in_group > cache->key.offset);
5419 5351
@@ -5424,7 +5356,6 @@ static int update_block_group(struct btrfs_root *root,
5424 cache->disk_cache_state < BTRFS_DC_CLEAR) 5356 cache->disk_cache_state < BTRFS_DC_CLEAR)
5425 cache->disk_cache_state = BTRFS_DC_CLEAR; 5357 cache->disk_cache_state = BTRFS_DC_CLEAR;
5426 5358
5427 cache->dirty = 1;
5428 old_val = btrfs_block_group_used(&cache->item); 5359 old_val = btrfs_block_group_used(&cache->item);
5429 num_bytes = min(total, cache->key.offset - byte_in_group); 5360 num_bytes = min(total, cache->key.offset - byte_in_group);
5430 if (alloc) { 5361 if (alloc) {
@@ -6103,7 +6034,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6103 } 6034 }
6104 } 6035 }
6105 6036
6106 ret = update_block_group(root, bytenr, num_bytes, 0); 6037 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
6107 if (ret) { 6038 if (ret) {
6108 btrfs_abort_transaction(trans, extent_root, ret); 6039 btrfs_abort_transaction(trans, extent_root, ret);
6109 goto out; 6040 goto out;
@@ -7063,7 +6994,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7063 if (ret) 6994 if (ret)
7064 return ret; 6995 return ret;
7065 6996
7066 ret = update_block_group(root, ins->objectid, ins->offset, 1); 6997 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
7067 if (ret) { /* -ENOENT, logic error */ 6998 if (ret) { /* -ENOENT, logic error */
7068 btrfs_err(fs_info, "update block group failed for %llu %llu", 6999 btrfs_err(fs_info, "update block group failed for %llu %llu",
7069 ins->objectid, ins->offset); 7000 ins->objectid, ins->offset);
@@ -7152,7 +7083,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
7152 return ret; 7083 return ret;
7153 } 7084 }
7154 7085
7155 ret = update_block_group(root, ins->objectid, root->nodesize, 1); 7086 ret = update_block_group(trans, root, ins->objectid, root->nodesize,
7087 1);
7156 if (ret) { /* -ENOENT, logic error */ 7088 if (ret) { /* -ENOENT, logic error */
7157 btrfs_err(fs_info, "update block group failed for %llu %llu", 7089 btrfs_err(fs_info, "update block group failed for %llu %llu",
7158 ins->objectid, ins->offset); 7090 ins->objectid, ins->offset);
@@ -9005,6 +8937,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
9005 INIT_LIST_HEAD(&cache->cluster_list); 8937 INIT_LIST_HEAD(&cache->cluster_list);
9006 INIT_LIST_HEAD(&cache->bg_list); 8938 INIT_LIST_HEAD(&cache->bg_list);
9007 INIT_LIST_HEAD(&cache->ro_list); 8939 INIT_LIST_HEAD(&cache->ro_list);
8940 INIT_LIST_HEAD(&cache->dirty_list);
9008 btrfs_init_free_space_ctl(cache); 8941 btrfs_init_free_space_ctl(cache);
9009 atomic_set(&cache->trimming, 0); 8942 atomic_set(&cache->trimming, 0);
9010 8943
@@ -9068,9 +9001,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
9068 * b) Setting 'dirty flag' makes sure that we flush 9001 * b) Setting 'dirty flag' makes sure that we flush
9069 * the new space cache info onto disk. 9002 * the new space cache info onto disk.
9070 */ 9003 */
9071 cache->disk_cache_state = BTRFS_DC_CLEAR;
9072 if (btrfs_test_opt(root, SPACE_CACHE)) 9004 if (btrfs_test_opt(root, SPACE_CACHE))
9073 cache->dirty = 1; 9005 cache->disk_cache_state = BTRFS_DC_CLEAR;
9074 } 9006 }
9075 9007
9076 read_extent_buffer(leaf, &cache->item, 9008 read_extent_buffer(leaf, &cache->item,
@@ -9461,6 +9393,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9461 } 9393 }
9462 } 9394 }
9463 9395
9396 spin_lock(&trans->transaction->dirty_bgs_lock);
9397 if (!list_empty(&block_group->dirty_list)) {
9398 list_del_init(&block_group->dirty_list);
9399 btrfs_put_block_group(block_group);
9400 }
9401 spin_unlock(&trans->transaction->dirty_bgs_lock);
9402
9464 btrfs_remove_free_space_cache(block_group); 9403 btrfs_remove_free_space_cache(block_group);
9465 9404
9466 spin_lock(&block_group->space_info->lock); 9405 spin_lock(&block_group->space_info->lock);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d6c03f7f136b..80a3141463e7 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1243,6 +1243,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1243 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1243 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1244 struct inode *inode; 1244 struct inode *inode;
1245 int ret = 0; 1245 int ret = 0;
1246 enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN;
1246 1247
1247 root = root->fs_info->tree_root; 1248 root = root->fs_info->tree_root;
1248 1249
@@ -1266,9 +1267,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1266 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, 1267 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1267 path, block_group->key.objectid); 1268 path, block_group->key.objectid);
1268 if (ret) { 1269 if (ret) {
1269 spin_lock(&block_group->lock); 1270 dcs = BTRFS_DC_ERROR;
1270 block_group->disk_cache_state = BTRFS_DC_ERROR;
1271 spin_unlock(&block_group->lock);
1272 ret = 0; 1271 ret = 0;
1273#ifdef DEBUG 1272#ifdef DEBUG
1274 btrfs_err(root->fs_info, 1273 btrfs_err(root->fs_info,
@@ -1277,6 +1276,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1277#endif 1276#endif
1278 } 1277 }
1279 1278
1279 spin_lock(&block_group->lock);
1280 block_group->disk_cache_state = dcs;
1281 spin_unlock(&block_group->lock);
1280 iput(inode); 1282 iput(inode);
1281 return ret; 1283 return ret;
1282} 1284}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index aa2219ebecc9..e0faf803513a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -248,6 +248,8 @@ loop:
248 INIT_LIST_HEAD(&cur_trans->pending_chunks); 248 INIT_LIST_HEAD(&cur_trans->pending_chunks);
249 INIT_LIST_HEAD(&cur_trans->switch_commits); 249 INIT_LIST_HEAD(&cur_trans->switch_commits);
250 INIT_LIST_HEAD(&cur_trans->pending_ordered); 250 INIT_LIST_HEAD(&cur_trans->pending_ordered);
251 INIT_LIST_HEAD(&cur_trans->dirty_bgs);
252 spin_lock_init(&cur_trans->dirty_bgs_lock);
251 list_add_tail(&cur_trans->list, &fs_info->trans_list); 253 list_add_tail(&cur_trans->list, &fs_info->trans_list);
252 extent_io_tree_init(&cur_trans->dirty_pages, 254 extent_io_tree_init(&cur_trans->dirty_pages,
253 fs_info->btree_inode->i_mapping); 255 fs_info->btree_inode->i_mapping);
@@ -1028,7 +1030,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1028 while (1) { 1030 while (1) {
1029 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 1031 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
1030 if (old_root_bytenr == root->node->start && 1032 if (old_root_bytenr == root->node->start &&
1031 old_root_used == btrfs_root_used(&root->root_item)) 1033 old_root_used == btrfs_root_used(&root->root_item) &&
1034 (!extent_root ||
1035 list_empty(&trans->transaction->dirty_bgs)))
1032 break; 1036 break;
1033 1037
1034 btrfs_set_root_node(&root->root_item, root->node); 1038 btrfs_set_root_node(&root->root_item, root->node);
@@ -1047,6 +1051,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1047 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1051 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1048 if (ret) 1052 if (ret)
1049 return ret; 1053 return ret;
1054 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1055 if (ret)
1056 return ret;
1050 } 1057 }
1051 1058
1052 return 0; 1059 return 0;
@@ -1067,10 +1074,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1067 struct extent_buffer *eb; 1074 struct extent_buffer *eb;
1068 int ret; 1075 int ret;
1069 1076
1070 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1071 if (ret)
1072 return ret;
1073
1074 eb = btrfs_lock_root_node(fs_info->tree_root); 1077 eb = btrfs_lock_root_node(fs_info->tree_root);
1075 ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 1078 ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
1076 0, &eb); 1079 0, &eb);
@@ -1990,6 +1993,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1990 switch_commit_roots(cur_trans, root->fs_info); 1993 switch_commit_roots(cur_trans, root->fs_info);
1991 1994
1992 assert_qgroups_uptodate(trans); 1995 assert_qgroups_uptodate(trans);
1996 ASSERT(list_empty(&cur_trans->dirty_bgs));
1993 update_super_roots(root); 1997 update_super_roots(root);
1994 1998
1995 btrfs_set_super_log_root(root->fs_info->super_copy, 0); 1999 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 00ed29c4b3f9..3305451451ca 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -58,6 +58,8 @@ struct btrfs_transaction {
58 struct list_head pending_chunks; 58 struct list_head pending_chunks;
59 struct list_head pending_ordered; 59 struct list_head pending_ordered;
60 struct list_head switch_commits; 60 struct list_head switch_commits;
61 struct list_head dirty_bgs;
62 spinlock_t dirty_bgs_lock;
61 struct btrfs_delayed_ref_root delayed_refs; 63 struct btrfs_delayed_ref_root delayed_refs;
62 int aborted; 64 int aborted;
63}; 65};