aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-03-13 10:10:06 -0400
committerChris Mason <chris.mason@oracle.com>2009-03-24 16:14:25 -0400
commit56bec294dea971335d4466b30f2d959f28f6e36d (patch)
treefc0b5bbf4bb6ab35582a4c7f58f5ac88f71c38bf /fs/btrfs/transaction.c
parent9fa8cfe706f9c20067c042a064999d5825a35330 (diff)
Btrfs: do extent allocation and reference count updates in the background
The extent allocation tree maintains a reference count and full back reference information for every extent allocated in the filesystem. For subvolume and snapshot trees, every time a block goes through COW, the new copy of the block adds a reference on every block it points to. If a btree node points to 150 leaves, then the COW code needs to go and add backrefs on 150 different extents, which might be spread all over the extent allocation tree. These updates currently happen during btrfs_cow_block, and most COWs happen during btrfs_search_slot. btrfs_search_slot has locks held on both the parent and the node we are COWing, and so we really want to avoid IO during the COW if we can. This commit adds an rbtree of pending reference count updates and extent allocations. The tree is ordered by byte number of the extent and byte number of the parent for the back reference. The tree allows us to: 1) Modify back references in something close to disk order, reducing seeks 2) Significantly reduce the number of modifications made as block pointers are balanced around 3) Do all of the extent insertion and back reference modifications outside of the performance critical btrfs_search_slot code. #3 has the added benefit of greatly reducing the btrfs stack footprint. The extent allocation tree modifications are done without the deep (and somewhat recursive) call chains used in the past. These delayed back reference updates must be done before the transaction commits, and so the rbtree is tied to the transaction. Throttling is implemented to help keep the queue of backrefs at a reasonable size. Since there was a similar mechanism in place for the extent tree extents, that is removed and replaced by the delayed reference tree. Yan Zheng <yan.zheng@oracle.com> helped review and fixup this code. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c54
1 files changed, 47 insertions, 7 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index d638c54d39e9..f94c2ad8996c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -65,6 +65,12 @@ static noinline int join_transaction(struct btrfs_root *root)
65 cur_trans->use_count = 1; 65 cur_trans->use_count = 1;
66 cur_trans->commit_done = 0; 66 cur_trans->commit_done = 0;
67 cur_trans->start_time = get_seconds(); 67 cur_trans->start_time = get_seconds();
68
69 cur_trans->delayed_refs.root.rb_node = NULL;
70 cur_trans->delayed_refs.num_entries = 0;
71 cur_trans->delayed_refs.flushing = 0;
72 spin_lock_init(&cur_trans->delayed_refs.lock);
73
68 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 74 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
69 list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 75 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
70 extent_io_tree_init(&cur_trans->dirty_pages, 76 extent_io_tree_init(&cur_trans->dirty_pages,
@@ -182,6 +188,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
182 h->block_group = 0; 188 h->block_group = 0;
183 h->alloc_exclude_nr = 0; 189 h->alloc_exclude_nr = 0;
184 h->alloc_exclude_start = 0; 190 h->alloc_exclude_start = 0;
191 h->delayed_ref_updates = 0;
185 root->fs_info->running_transaction->use_count++; 192 root->fs_info->running_transaction->use_count++;
186 mutex_unlock(&root->fs_info->trans_mutex); 193 mutex_unlock(&root->fs_info->trans_mutex);
187 return h; 194 return h;
@@ -281,6 +288,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
281 struct btrfs_transaction *cur_trans; 288 struct btrfs_transaction *cur_trans;
282 struct btrfs_fs_info *info = root->fs_info; 289 struct btrfs_fs_info *info = root->fs_info;
283 290
291 if (trans->delayed_ref_updates &&
292 (trans->transaction->delayed_refs.flushing ||
293 trans->transaction->delayed_refs.num_entries > 16384)) {
294 btrfs_run_delayed_refs(trans, root, trans->delayed_ref_updates);
295 } else if (trans->transaction->delayed_refs.num_entries > 64) {
296 wake_up_process(root->fs_info->transaction_kthread);
297 }
298
284 mutex_lock(&info->trans_mutex); 299 mutex_lock(&info->trans_mutex);
285 cur_trans = info->running_transaction; 300 cur_trans = info->running_transaction;
286 WARN_ON(cur_trans != trans->transaction); 301 WARN_ON(cur_trans != trans->transaction);
@@ -424,9 +439,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
424 u64 old_root_bytenr; 439 u64 old_root_bytenr;
425 struct btrfs_root *tree_root = root->fs_info->tree_root; 440 struct btrfs_root *tree_root = root->fs_info->tree_root;
426 441
427 btrfs_extent_post_op(trans, root);
428 btrfs_write_dirty_block_groups(trans, root); 442 btrfs_write_dirty_block_groups(trans, root);
429 btrfs_extent_post_op(trans, root); 443
444 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
445 BUG_ON(ret);
430 446
431 while (1) { 447 while (1) {
432 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 448 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
@@ -438,14 +454,14 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
438 btrfs_header_level(root->node)); 454 btrfs_header_level(root->node));
439 btrfs_set_root_generation(&root->root_item, trans->transid); 455 btrfs_set_root_generation(&root->root_item, trans->transid);
440 456
441 btrfs_extent_post_op(trans, root);
442
443 ret = btrfs_update_root(trans, tree_root, 457 ret = btrfs_update_root(trans, tree_root,
444 &root->root_key, 458 &root->root_key,
445 &root->root_item); 459 &root->root_item);
446 BUG_ON(ret); 460 BUG_ON(ret);
447 btrfs_write_dirty_block_groups(trans, root); 461 btrfs_write_dirty_block_groups(trans, root);
448 btrfs_extent_post_op(trans, root); 462
463 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
464 BUG_ON(ret);
449 } 465 }
450 return 0; 466 return 0;
451} 467}
@@ -459,15 +475,18 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
459 struct btrfs_fs_info *fs_info = root->fs_info; 475 struct btrfs_fs_info *fs_info = root->fs_info;
460 struct list_head *next; 476 struct list_head *next;
461 struct extent_buffer *eb; 477 struct extent_buffer *eb;
478 int ret;
462 479
463 btrfs_extent_post_op(trans, fs_info->tree_root); 480 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
481 BUG_ON(ret);
464 482
465 eb = btrfs_lock_root_node(fs_info->tree_root); 483 eb = btrfs_lock_root_node(fs_info->tree_root);
466 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); 484 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb);
467 btrfs_tree_unlock(eb); 485 btrfs_tree_unlock(eb);
468 free_extent_buffer(eb); 486 free_extent_buffer(eb);
469 487
470 btrfs_extent_post_op(trans, fs_info->tree_root); 488 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
489 BUG_ON(ret);
471 490
472 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 491 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
473 next = fs_info->dirty_cowonly_roots.next; 492 next = fs_info->dirty_cowonly_roots.next;
@@ -475,6 +494,9 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
475 root = list_entry(next, struct btrfs_root, dirty_list); 494 root = list_entry(next, struct btrfs_root, dirty_list);
476 495
477 update_cowonly_root(trans, root); 496 update_cowonly_root(trans, root);
497
498 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
499 BUG_ON(ret);
478 } 500 }
479 return 0; 501 return 0;
480} 502}
@@ -895,6 +917,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
895 DEFINE_WAIT(wait); 917 DEFINE_WAIT(wait);
896 int ret; 918 int ret;
897 919
920 /* make a pass through all the delayed refs we have so far
921 * any runnings procs may add more while we are here
922 */
923 ret = btrfs_run_delayed_refs(trans, root, 0);
924 BUG_ON(ret);
925
926 /*
927 * set the flushing flag so procs in this transaction have to
928 * start sending their work down.
929 */
930 trans->transaction->delayed_refs.flushing = 1;
931
932 ret = btrfs_run_delayed_refs(trans, root, (u64)-1);
933 BUG_ON(ret);
934
898 INIT_LIST_HEAD(&dirty_fs_roots); 935 INIT_LIST_HEAD(&dirty_fs_roots);
899 mutex_lock(&root->fs_info->trans_mutex); 936 mutex_lock(&root->fs_info->trans_mutex);
900 if (trans->transaction->in_commit) { 937 if (trans->transaction->in_commit) {
@@ -969,6 +1006,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
969 ret = create_pending_snapshots(trans, root->fs_info); 1006 ret = create_pending_snapshots(trans, root->fs_info);
970 BUG_ON(ret); 1007 BUG_ON(ret);
971 1008
1009 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1010 BUG_ON(ret);
1011
972 WARN_ON(cur_trans != trans->transaction); 1012 WARN_ON(cur_trans != trans->transaction);
973 1013
974 /* btrfs_commit_tree_roots is responsible for getting the 1014 /* btrfs_commit_tree_roots is responsible for getting the