aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2015-02-03 10:50:16 -0500
committerChris Mason <clm@fb.com>2015-04-10 17:04:47 -0400
commit1262133b8d6f10f5ca7621cd4cf65ddf6254126a (patch)
treeac508ea4fc6d1e9b394ac2bdfee04eb20f5fc930 /fs/btrfs
parent28ed1345a50491d78e1454ad4005dc5d3557a69e (diff)
Btrfs: account for crcs in delayed ref processing
As we delete large extents, we end up doing huge amounts of COW in order to delete the corresponding crcs. This adds accounting so that we keep track of that space and flushing of delayed refs so that we don't build up too much delayed crc work. This helps limit the delayed work that must be done at commit time and tries to avoid ENOSPC aborts because the crcs eat all the global reserves. Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/delayed-ref.c22
-rw-r--r--fs/btrfs/delayed-ref.h10
-rw-r--r--fs/btrfs/extent-tree.c46
-rw-r--r--fs/btrfs/inode.c25
-rw-r--r--fs/btrfs/transaction.c4
5 files changed, 83 insertions, 24 deletions
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 6d16bea94e1c..8f8ed7d20bac 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -489,11 +489,13 @@ update_existing_ref(struct btrfs_trans_handle *trans,
489 * existing and update must have the same bytenr 489 * existing and update must have the same bytenr
490 */ 490 */
491static noinline void 491static noinline void
492update_existing_head_ref(struct btrfs_delayed_ref_node *existing, 492update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
493 struct btrfs_delayed_ref_node *existing,
493 struct btrfs_delayed_ref_node *update) 494 struct btrfs_delayed_ref_node *update)
494{ 495{
495 struct btrfs_delayed_ref_head *existing_ref; 496 struct btrfs_delayed_ref_head *existing_ref;
496 struct btrfs_delayed_ref_head *ref; 497 struct btrfs_delayed_ref_head *ref;
498 int old_ref_mod;
497 499
498 existing_ref = btrfs_delayed_node_to_head(existing); 500 existing_ref = btrfs_delayed_node_to_head(existing);
499 ref = btrfs_delayed_node_to_head(update); 501 ref = btrfs_delayed_node_to_head(update);
@@ -541,7 +543,20 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
541 * only need the lock for this case cause we could be processing it 543 * only need the lock for this case cause we could be processing it
542 * currently, for refs we just added we know we're a-ok. 544 * currently, for refs we just added we know we're a-ok.
543 */ 545 */
546 old_ref_mod = existing_ref->total_ref_mod;
544 existing->ref_mod += update->ref_mod; 547 existing->ref_mod += update->ref_mod;
548 existing_ref->total_ref_mod += update->ref_mod;
549
550 /*
551 * If we are going to from a positive ref mod to a negative or vice
552 * versa we need to make sure to adjust pending_csums accordingly.
553 */
554 if (existing_ref->is_data) {
555 if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0)
556 delayed_refs->pending_csums -= existing->num_bytes;
557 if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0)
558 delayed_refs->pending_csums += existing->num_bytes;
559 }
545 spin_unlock(&existing_ref->lock); 560 spin_unlock(&existing_ref->lock);
546} 561}
547 562
@@ -605,6 +620,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
605 head_ref->is_data = is_data; 620 head_ref->is_data = is_data;
606 head_ref->ref_root = RB_ROOT; 621 head_ref->ref_root = RB_ROOT;
607 head_ref->processing = 0; 622 head_ref->processing = 0;
623 head_ref->total_ref_mod = count_mod;
608 624
609 spin_lock_init(&head_ref->lock); 625 spin_lock_init(&head_ref->lock);
610 mutex_init(&head_ref->mutex); 626 mutex_init(&head_ref->mutex);
@@ -614,7 +630,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
614 existing = htree_insert(&delayed_refs->href_root, 630 existing = htree_insert(&delayed_refs->href_root,
615 &head_ref->href_node); 631 &head_ref->href_node);
616 if (existing) { 632 if (existing) {
617 update_existing_head_ref(&existing->node, ref); 633 update_existing_head_ref(delayed_refs, &existing->node, ref);
618 /* 634 /*
619 * we've updated the existing ref, free the newly 635 * we've updated the existing ref, free the newly
620 * allocated ref 636 * allocated ref
@@ -622,6 +638,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
622 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 638 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
623 head_ref = existing; 639 head_ref = existing;
624 } else { 640 } else {
641 if (is_data && count_mod < 0)
642 delayed_refs->pending_csums += num_bytes;
625 delayed_refs->num_heads++; 643 delayed_refs->num_heads++;
626 delayed_refs->num_heads_ready++; 644 delayed_refs->num_heads_ready++;
627 atomic_inc(&delayed_refs->num_entries); 645 atomic_inc(&delayed_refs->num_entries);
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index a764e2340d48..5eb0892396d0 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -88,6 +88,14 @@ struct btrfs_delayed_ref_head {
88 struct rb_node href_node; 88 struct rb_node href_node;
89 89
90 struct btrfs_delayed_extent_op *extent_op; 90 struct btrfs_delayed_extent_op *extent_op;
91
92 /*
93 * This is used to track the final ref_mod from all the refs associated
94 * with this head ref, this is not adjusted as delayed refs are run,
95 * this is meant to track if we need to do the csum accounting or not.
96 */
97 int total_ref_mod;
98
91 /* 99 /*
92 * when a new extent is allocated, it is just reserved in memory 100 * when a new extent is allocated, it is just reserved in memory
93 * The actual extent isn't inserted into the extent allocation tree 101 * The actual extent isn't inserted into the extent allocation tree
@@ -138,6 +146,8 @@ struct btrfs_delayed_ref_root {
138 /* total number of head nodes ready for processing */ 146 /* total number of head nodes ready for processing */
139 unsigned long num_heads_ready; 147 unsigned long num_heads_ready;
140 148
149 u64 pending_csums;
150
141 /* 151 /*
142 * set when the tree is flushing before a transaction commit, 152 * set when the tree is flushing before a transaction commit,
143 * used by the throttling code to decide if new updates need 153 * used by the throttling code to decide if new updates need
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 41e5812c131f..a6f88eb57b39 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2538,6 +2538,12 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2538 * list before we release it. 2538 * list before we release it.
2539 */ 2539 */
2540 if (btrfs_delayed_ref_is_head(ref)) { 2540 if (btrfs_delayed_ref_is_head(ref)) {
2541 if (locked_ref->is_data &&
2542 locked_ref->total_ref_mod < 0) {
2543 spin_lock(&delayed_refs->lock);
2544 delayed_refs->pending_csums -= ref->num_bytes;
2545 spin_unlock(&delayed_refs->lock);
2546 }
2541 btrfs_delayed_ref_unlock(locked_ref); 2547 btrfs_delayed_ref_unlock(locked_ref);
2542 locked_ref = NULL; 2548 locked_ref = NULL;
2543 } 2549 }
@@ -2626,11 +2632,31 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2626 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); 2632 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
2627} 2633}
2628 2634
2635/*
2636 * Takes the number of bytes to be csumm'ed and figures out how many leaves it
2637 * would require to store the csums for that many bytes.
2638 */
2639static u64 csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
2640{
2641 u64 csum_size;
2642 u64 num_csums_per_leaf;
2643 u64 num_csums;
2644
2645 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
2646 num_csums_per_leaf = div64_u64(csum_size,
2647 (u64)btrfs_super_csum_size(root->fs_info->super_copy));
2648 num_csums = div64_u64(csum_bytes, root->sectorsize);
2649 num_csums += num_csums_per_leaf - 1;
2650 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2651 return num_csums;
2652}
2653
2629int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, 2654int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2630 struct btrfs_root *root) 2655 struct btrfs_root *root)
2631{ 2656{
2632 struct btrfs_block_rsv *global_rsv; 2657 struct btrfs_block_rsv *global_rsv;
2633 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; 2658 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
2659 u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
2634 u64 num_bytes; 2660 u64 num_bytes;
2635 int ret = 0; 2661 int ret = 0;
2636 2662
@@ -2639,6 +2665,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2639 if (num_heads > 1) 2665 if (num_heads > 1)
2640 num_bytes += (num_heads - 1) * root->nodesize; 2666 num_bytes += (num_heads - 1) * root->nodesize;
2641 num_bytes <<= 1; 2667 num_bytes <<= 1;
2668 num_bytes += csum_bytes_to_leaves(root, csum_bytes) * root->nodesize;
2642 global_rsv = &root->fs_info->global_block_rsv; 2669 global_rsv = &root->fs_info->global_block_rsv;
2643 2670
2644 /* 2671 /*
@@ -5065,30 +5092,19 @@ static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
5065 int reserve) 5092 int reserve)
5066{ 5093{
5067 struct btrfs_root *root = BTRFS_I(inode)->root; 5094 struct btrfs_root *root = BTRFS_I(inode)->root;
5068 u64 csum_size; 5095 u64 old_csums, num_csums;
5069 int num_csums_per_leaf;
5070 int num_csums;
5071 int old_csums;
5072 5096
5073 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM && 5097 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
5074 BTRFS_I(inode)->csum_bytes == 0) 5098 BTRFS_I(inode)->csum_bytes == 0)
5075 return 0; 5099 return 0;
5076 5100
5077 old_csums = (int)div_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); 5101 old_csums = csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5102
5078 if (reserve) 5103 if (reserve)
5079 BTRFS_I(inode)->csum_bytes += num_bytes; 5104 BTRFS_I(inode)->csum_bytes += num_bytes;
5080 else 5105 else
5081 BTRFS_I(inode)->csum_bytes -= num_bytes; 5106 BTRFS_I(inode)->csum_bytes -= num_bytes;
5082 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); 5107 num_csums = csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5083 num_csums_per_leaf = (int)div_u64(csum_size,
5084 sizeof(struct btrfs_csum_item) +
5085 sizeof(struct btrfs_disk_key));
5086 num_csums = (int)div_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
5087 num_csums = num_csums + num_csums_per_leaf - 1;
5088 num_csums = num_csums / num_csums_per_leaf;
5089
5090 old_csums = old_csums + num_csums_per_leaf - 1;
5091 old_csums = old_csums / num_csums_per_leaf;
5092 5108
5093 /* No change, no need to reserve more */ 5109 /* No change, no need to reserve more */
5094 if (old_csums == num_csums) 5110 if (old_csums == num_csums)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e3fe137fb826..cec23cf812ee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4197,9 +4197,10 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4197 int extent_type = -1; 4197 int extent_type = -1;
4198 int ret; 4198 int ret;
4199 int err = 0; 4199 int err = 0;
4200 int be_nice = 0;
4201 u64 ino = btrfs_ino(inode); 4200 u64 ino = btrfs_ino(inode);
4202 u64 bytes_deleted = 0; 4201 u64 bytes_deleted = 0;
4202 bool be_nice = 0;
4203 bool should_throttle = 0;
4203 4204
4204 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 4205 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
4205 4206
@@ -4405,19 +4406,20 @@ delete:
4405 btrfs_header_owner(leaf), 4406 btrfs_header_owner(leaf),
4406 ino, extent_offset, 0); 4407 ino, extent_offset, 0);
4407 BUG_ON(ret); 4408 BUG_ON(ret);
4408 if (be_nice && pending_del_nr && 4409 if (btrfs_should_throttle_delayed_refs(trans, root))
4409 (pending_del_nr % 16 == 0) &&
4410 bytes_deleted > 1024 * 1024) {
4411 btrfs_async_run_delayed_refs(root, 4410 btrfs_async_run_delayed_refs(root,
4412 trans->delayed_ref_updates * 2, 0); 4411 trans->delayed_ref_updates * 2, 0);
4413 }
4414 } 4412 }
4415 4413
4416 if (found_type == BTRFS_INODE_ITEM_KEY) 4414 if (found_type == BTRFS_INODE_ITEM_KEY)
4417 break; 4415 break;
4418 4416
4417 should_throttle =
4418 btrfs_should_throttle_delayed_refs(trans, root);
4419
4419 if (path->slots[0] == 0 || 4420 if (path->slots[0] == 0 ||
4420 path->slots[0] != pending_del_slot) { 4421 path->slots[0] != pending_del_slot ||
4422 (be_nice && should_throttle)) {
4421 if (pending_del_nr) { 4423 if (pending_del_nr) {
4422 ret = btrfs_del_items(trans, root, path, 4424 ret = btrfs_del_items(trans, root, path,
4423 pending_del_slot, 4425 pending_del_slot,
@@ -4430,6 +4432,15 @@ delete:
4430 pending_del_nr = 0; 4432 pending_del_nr = 0;
4431 } 4433 }
4432 btrfs_release_path(path); 4434 btrfs_release_path(path);
4435 if (be_nice && should_throttle) {
4436 unsigned long updates = trans->delayed_ref_updates;
4437 if (updates) {
4438 trans->delayed_ref_updates = 0;
4439 ret = btrfs_run_delayed_refs(trans, root, updates * 2);
4440 if (ret && !err)
4441 err = ret;
4442 }
4443 }
4433 goto search_again; 4444 goto search_again;
4434 } else { 4445 } else {
4435 path->slots[0]--; 4446 path->slots[0]--;
@@ -4449,7 +4460,7 @@ error:
4449 4460
4450 btrfs_free_path(path); 4461 btrfs_free_path(path);
4451 4462
4452 if (be_nice && bytes_deleted > 32 * 1024 * 1024) { 4463 if (be_nice && btrfs_should_throttle_delayed_refs(trans, root)) {
4453 unsigned long updates = trans->delayed_ref_updates; 4464 unsigned long updates = trans->delayed_ref_updates;
4454 if (updates) { 4465 if (updates) {
4455 trans->delayed_ref_updates = 0; 4466 trans->delayed_ref_updates = 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ba831ee41891..8b9eea8f2406 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -64,6 +64,9 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
64 if (atomic_dec_and_test(&transaction->use_count)) { 64 if (atomic_dec_and_test(&transaction->use_count)) {
65 BUG_ON(!list_empty(&transaction->list)); 65 BUG_ON(!list_empty(&transaction->list));
66 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root)); 66 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
67 if (transaction->delayed_refs.pending_csums)
68 printk(KERN_ERR "pending csums is %llu\n",
69 transaction->delayed_refs.pending_csums);
67 while (!list_empty(&transaction->pending_chunks)) { 70 while (!list_empty(&transaction->pending_chunks)) {
68 struct extent_map *em; 71 struct extent_map *em;
69 72
@@ -223,6 +226,7 @@ loop:
223 cur_trans->delayed_refs.href_root = RB_ROOT; 226 cur_trans->delayed_refs.href_root = RB_ROOT;
224 atomic_set(&cur_trans->delayed_refs.num_entries, 0); 227 atomic_set(&cur_trans->delayed_refs.num_entries, 0);
225 cur_trans->delayed_refs.num_heads_ready = 0; 228 cur_trans->delayed_refs.num_heads_ready = 0;
229 cur_trans->delayed_refs.pending_csums = 0;
226 cur_trans->delayed_refs.num_heads = 0; 230 cur_trans->delayed_refs.num_heads = 0;
227 cur_trans->delayed_refs.flushing = 0; 231 cur_trans->delayed_refs.flushing = 0;
228 cur_trans->delayed_refs.run_delayed_start = 0; 232 cur_trans->delayed_refs.run_delayed_start = 0;