aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-09-24 13:00:21 -0400
committerTejun Heo <tj@kernel.org>2014-09-24 13:00:21 -0400
commitd06efebf0c37d438fcf07057be00dd40fcfce08d (patch)
tree31a0786d132aadf4cbb9725f3f444ef6e1052128 /fs/btrfs/extent-tree.c
parentbb2e226b3bef596dd56be97df655d857b4603923 (diff)
parent0a30288da1aec914e158c2d7a3482a85f632750f (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block into for-3.18
This is to receive 0a30288da1ae ("blk-mq, percpu_ref: implement a kludge for SCSI blk-mq stall during probe") which implements __percpu_ref_kill_expedited() to work around SCSI blk-mq stall. The commit reverted and patches to implement proper fix will be added. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c308
1 files changed, 278 insertions, 30 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 94ec71eda86b..caaf015d6e4b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -552,7 +552,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
552 caching_ctl->block_group = cache; 552 caching_ctl->block_group = cache;
553 caching_ctl->progress = cache->key.objectid; 553 caching_ctl->progress = cache->key.objectid;
554 atomic_set(&caching_ctl->count, 1); 554 atomic_set(&caching_ctl->count, 1);
555 btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); 555 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
556 caching_thread, NULL, NULL);
556 557
557 spin_lock(&cache->lock); 558 spin_lock(&cache->lock);
558 /* 559 /*
@@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root,
2749 async->sync = 0; 2750 async->sync = 0;
2750 init_completion(&async->wait); 2751 init_completion(&async->wait);
2751 2752
2752 btrfs_init_work(&async->work, delayed_ref_async_start, 2753 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2753 NULL, NULL); 2754 delayed_ref_async_start, NULL, NULL);
2754 2755
2755 btrfs_queue_work(root->fs_info->extent_workers, &async->work); 2756 btrfs_queue_work(root->fs_info->extent_workers, &async->work);
2756 2757
@@ -3057,7 +3058,7 @@ out:
3057static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 3058static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3058 struct btrfs_root *root, 3059 struct btrfs_root *root,
3059 struct extent_buffer *buf, 3060 struct extent_buffer *buf,
3060 int full_backref, int inc, int no_quota) 3061 int full_backref, int inc)
3061{ 3062{
3062 u64 bytenr; 3063 u64 bytenr;
3063 u64 num_bytes; 3064 u64 num_bytes;
@@ -3111,7 +3112,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3111 key.offset -= btrfs_file_extent_offset(buf, fi); 3112 key.offset -= btrfs_file_extent_offset(buf, fi);
3112 ret = process_func(trans, root, bytenr, num_bytes, 3113 ret = process_func(trans, root, bytenr, num_bytes,
3113 parent, ref_root, key.objectid, 3114 parent, ref_root, key.objectid,
3114 key.offset, no_quota); 3115 key.offset, 1);
3115 if (ret) 3116 if (ret)
3116 goto fail; 3117 goto fail;
3117 } else { 3118 } else {
@@ -3119,7 +3120,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3119 num_bytes = btrfs_level_size(root, level - 1); 3120 num_bytes = btrfs_level_size(root, level - 1);
3120 ret = process_func(trans, root, bytenr, num_bytes, 3121 ret = process_func(trans, root, bytenr, num_bytes,
3121 parent, ref_root, level - 1, 0, 3122 parent, ref_root, level - 1, 0,
3122 no_quota); 3123 1);
3123 if (ret) 3124 if (ret)
3124 goto fail; 3125 goto fail;
3125 } 3126 }
@@ -3130,15 +3131,15 @@ fail:
3130} 3131}
3131 3132
3132int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3133int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3133 struct extent_buffer *buf, int full_backref, int no_quota) 3134 struct extent_buffer *buf, int full_backref)
3134{ 3135{
3135 return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota); 3136 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
3136} 3137}
3137 3138
3138int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3139int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3139 struct extent_buffer *buf, int full_backref, int no_quota) 3140 struct extent_buffer *buf, int full_backref)
3140{ 3141{
3141 return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota); 3142 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
3142} 3143}
3143 3144
3144static int write_one_cache_group(struct btrfs_trans_handle *trans, 3145static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -3586,13 +3587,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3586 */ 3587 */
3587static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 3588static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3588{ 3589{
3589 /* 3590 u64 num_devices = root->fs_info->fs_devices->rw_devices;
3590 * we add in the count of missing devices because we want
3591 * to make sure that any RAID levels on a degraded FS
3592 * continue to be honored.
3593 */
3594 u64 num_devices = root->fs_info->fs_devices->rw_devices +
3595 root->fs_info->fs_devices->missing_devices;
3596 u64 target; 3591 u64 target;
3597 u64 tmp; 3592 u64 tmp;
3598 3593
@@ -7478,6 +7473,220 @@ reada:
7478 wc->reada_slot = slot; 7473 wc->reada_slot = slot;
7479} 7474}
7480 7475
7476static int account_leaf_items(struct btrfs_trans_handle *trans,
7477 struct btrfs_root *root,
7478 struct extent_buffer *eb)
7479{
7480 int nr = btrfs_header_nritems(eb);
7481 int i, extent_type, ret;
7482 struct btrfs_key key;
7483 struct btrfs_file_extent_item *fi;
7484 u64 bytenr, num_bytes;
7485
7486 for (i = 0; i < nr; i++) {
7487 btrfs_item_key_to_cpu(eb, &key, i);
7488
7489 if (key.type != BTRFS_EXTENT_DATA_KEY)
7490 continue;
7491
7492 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
7493 /* filter out non qgroup-accountable extents */
7494 extent_type = btrfs_file_extent_type(eb, fi);
7495
7496 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
7497 continue;
7498
7499 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
7500 if (!bytenr)
7501 continue;
7502
7503 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
7504
7505 ret = btrfs_qgroup_record_ref(trans, root->fs_info,
7506 root->objectid,
7507 bytenr, num_bytes,
7508 BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
7509 if (ret)
7510 return ret;
7511 }
7512 return 0;
7513}
7514
7515/*
7516 * Walk up the tree from the bottom, freeing leaves and any interior
7517 * nodes which have had all slots visited. If a node (leaf or
7518 * interior) is freed, the node above it will have it's slot
7519 * incremented. The root node will never be freed.
7520 *
7521 * At the end of this function, we should have a path which has all
7522 * slots incremented to the next position for a search. If we need to
7523 * read a new node it will be NULL and the node above it will have the
7524 * correct slot selected for a later read.
7525 *
7526 * If we increment the root nodes slot counter past the number of
7527 * elements, 1 is returned to signal completion of the search.
7528 */
7529static int adjust_slots_upwards(struct btrfs_root *root,
7530 struct btrfs_path *path, int root_level)
7531{
7532 int level = 0;
7533 int nr, slot;
7534 struct extent_buffer *eb;
7535
7536 if (root_level == 0)
7537 return 1;
7538
7539 while (level <= root_level) {
7540 eb = path->nodes[level];
7541 nr = btrfs_header_nritems(eb);
7542 path->slots[level]++;
7543 slot = path->slots[level];
7544 if (slot >= nr || level == 0) {
7545 /*
7546 * Don't free the root - we will detect this
7547 * condition after our loop and return a
7548 * positive value for caller to stop walking the tree.
7549 */
7550 if (level != root_level) {
7551 btrfs_tree_unlock_rw(eb, path->locks[level]);
7552 path->locks[level] = 0;
7553
7554 free_extent_buffer(eb);
7555 path->nodes[level] = NULL;
7556 path->slots[level] = 0;
7557 }
7558 } else {
7559 /*
7560 * We have a valid slot to walk back down
7561 * from. Stop here so caller can process these
7562 * new nodes.
7563 */
7564 break;
7565 }
7566
7567 level++;
7568 }
7569
7570 eb = path->nodes[root_level];
7571 if (path->slots[root_level] >= btrfs_header_nritems(eb))
7572 return 1;
7573
7574 return 0;
7575}
7576
7577/*
7578 * root_eb is the subtree root and is locked before this function is called.
7579 */
7580static int account_shared_subtree(struct btrfs_trans_handle *trans,
7581 struct btrfs_root *root,
7582 struct extent_buffer *root_eb,
7583 u64 root_gen,
7584 int root_level)
7585{
7586 int ret = 0;
7587 int level;
7588 struct extent_buffer *eb = root_eb;
7589 struct btrfs_path *path = NULL;
7590
7591 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
7592 BUG_ON(root_eb == NULL);
7593
7594 if (!root->fs_info->quota_enabled)
7595 return 0;
7596
7597 if (!extent_buffer_uptodate(root_eb)) {
7598 ret = btrfs_read_buffer(root_eb, root_gen);
7599 if (ret)
7600 goto out;
7601 }
7602
7603 if (root_level == 0) {
7604 ret = account_leaf_items(trans, root, root_eb);
7605 goto out;
7606 }
7607
7608 path = btrfs_alloc_path();
7609 if (!path)
7610 return -ENOMEM;
7611
7612 /*
7613 * Walk down the tree. Missing extent blocks are filled in as
7614 * we go. Metadata is accounted every time we read a new
7615 * extent block.
7616 *
7617 * When we reach a leaf, we account for file extent items in it,
7618 * walk back up the tree (adjusting slot pointers as we go)
7619 * and restart the search process.
7620 */
7621 extent_buffer_get(root_eb); /* For path */
7622 path->nodes[root_level] = root_eb;
7623 path->slots[root_level] = 0;
7624 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
7625walk_down:
7626 level = root_level;
7627 while (level >= 0) {
7628 if (path->nodes[level] == NULL) {
7629 int child_bsize = root->nodesize;
7630 int parent_slot;
7631 u64 child_gen;
7632 u64 child_bytenr;
7633
7634 /* We need to get child blockptr/gen from
7635 * parent before we can read it. */
7636 eb = path->nodes[level + 1];
7637 parent_slot = path->slots[level + 1];
7638 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
7639 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
7640
7641 eb = read_tree_block(root, child_bytenr, child_bsize,
7642 child_gen);
7643 if (!eb || !extent_buffer_uptodate(eb)) {
7644 ret = -EIO;
7645 goto out;
7646 }
7647
7648 path->nodes[level] = eb;
7649 path->slots[level] = 0;
7650
7651 btrfs_tree_read_lock(eb);
7652 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
7653 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
7654
7655 ret = btrfs_qgroup_record_ref(trans, root->fs_info,
7656 root->objectid,
7657 child_bytenr,
7658 child_bsize,
7659 BTRFS_QGROUP_OPER_SUB_SUBTREE,
7660 0);
7661 if (ret)
7662 goto out;
7663
7664 }
7665
7666 if (level == 0) {
7667 ret = account_leaf_items(trans, root, path->nodes[level]);
7668 if (ret)
7669 goto out;
7670
7671 /* Nonzero return here means we completed our search */
7672 ret = adjust_slots_upwards(root, path, root_level);
7673 if (ret)
7674 break;
7675
7676 /* Restart search with new slots */
7677 goto walk_down;
7678 }
7679
7680 level--;
7681 }
7682
7683 ret = 0;
7684out:
7685 btrfs_free_path(path);
7686
7687 return ret;
7688}
7689
7481/* 7690/*
7482 * helper to process tree block while walking down the tree. 7691 * helper to process tree block while walking down the tree.
7483 * 7692 *
@@ -7532,9 +7741,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
7532 /* wc->stage == UPDATE_BACKREF */ 7741 /* wc->stage == UPDATE_BACKREF */
7533 if (!(wc->flags[level] & flag)) { 7742 if (!(wc->flags[level] & flag)) {
7534 BUG_ON(!path->locks[level]); 7743 BUG_ON(!path->locks[level]);
7535 ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc); 7744 ret = btrfs_inc_ref(trans, root, eb, 1);
7536 BUG_ON(ret); /* -ENOMEM */ 7745 BUG_ON(ret); /* -ENOMEM */
7537 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); 7746 ret = btrfs_dec_ref(trans, root, eb, 0);
7538 BUG_ON(ret); /* -ENOMEM */ 7747 BUG_ON(ret); /* -ENOMEM */
7539 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 7748 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
7540 eb->len, flag, 7749 eb->len, flag,
@@ -7581,6 +7790,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7581 int level = wc->level; 7790 int level = wc->level;
7582 int reada = 0; 7791 int reada = 0;
7583 int ret = 0; 7792 int ret = 0;
7793 bool need_account = false;
7584 7794
7585 generation = btrfs_node_ptr_generation(path->nodes[level], 7795 generation = btrfs_node_ptr_generation(path->nodes[level],
7586 path->slots[level]); 7796 path->slots[level]);
@@ -7626,6 +7836,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7626 7836
7627 if (wc->stage == DROP_REFERENCE) { 7837 if (wc->stage == DROP_REFERENCE) {
7628 if (wc->refs[level - 1] > 1) { 7838 if (wc->refs[level - 1] > 1) {
7839 need_account = true;
7629 if (level == 1 && 7840 if (level == 1 &&
7630 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 7841 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7631 goto skip; 7842 goto skip;
@@ -7689,6 +7900,16 @@ skip:
7689 parent = 0; 7900 parent = 0;
7690 } 7901 }
7691 7902
7903 if (need_account) {
7904 ret = account_shared_subtree(trans, root, next,
7905 generation, level - 1);
7906 if (ret) {
7907 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
7908 "%d accounting shared subtree. Quota "
7909 "is out of sync, rescan required.\n",
7910 root->fs_info->sb->s_id, ret);
7911 }
7912 }
7692 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 7913 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
7693 root->root_key.objectid, level - 1, 0, 0); 7914 root->root_key.objectid, level - 1, 0, 0);
7694 BUG_ON(ret); /* -ENOMEM */ 7915 BUG_ON(ret); /* -ENOMEM */
@@ -7769,12 +7990,17 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
7769 if (wc->refs[level] == 1) { 7990 if (wc->refs[level] == 1) {
7770 if (level == 0) { 7991 if (level == 0) {
7771 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 7992 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7772 ret = btrfs_dec_ref(trans, root, eb, 1, 7993 ret = btrfs_dec_ref(trans, root, eb, 1);
7773 wc->for_reloc);
7774 else 7994 else
7775 ret = btrfs_dec_ref(trans, root, eb, 0, 7995 ret = btrfs_dec_ref(trans, root, eb, 0);
7776 wc->for_reloc);
7777 BUG_ON(ret); /* -ENOMEM */ 7996 BUG_ON(ret); /* -ENOMEM */
7997 ret = account_leaf_items(trans, root, eb);
7998 if (ret) {
7999 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
8000 "%d accounting leaf items. Quota "
8001 "is out of sync, rescan required.\n",
8002 root->fs_info->sb->s_id, ret);
8003 }
7778 } 8004 }
7779 /* make block locked assertion in clean_tree_block happy */ 8005 /* make block locked assertion in clean_tree_block happy */
7780 if (!path->locks[level] && 8006 if (!path->locks[level] &&
@@ -7900,6 +8126,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7900 int level; 8126 int level;
7901 bool root_dropped = false; 8127 bool root_dropped = false;
7902 8128
8129 btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
8130
7903 path = btrfs_alloc_path(); 8131 path = btrfs_alloc_path();
7904 if (!path) { 8132 if (!path) {
7905 err = -ENOMEM; 8133 err = -ENOMEM;
@@ -8025,6 +8253,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
8025 goto out_end_trans; 8253 goto out_end_trans;
8026 } 8254 }
8027 8255
8256 /*
8257 * Qgroup update accounting is run from
8258 * delayed ref handling. This usually works
8259 * out because delayed refs are normally the
8260 * only way qgroup updates are added. However,
8261 * we may have added updates during our tree
8262 * walk so run qgroups here to make sure we
8263 * don't lose any updates.
8264 */
8265 ret = btrfs_delayed_qgroup_accounting(trans,
8266 root->fs_info);
8267 if (ret)
8268 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
8269 "running qgroup updates "
8270 "during snapshot delete. "
8271 "Quota is out of sync, "
8272 "rescan required.\n", ret);
8273
8028 btrfs_end_transaction_throttle(trans, tree_root); 8274 btrfs_end_transaction_throttle(trans, tree_root);
8029 if (!for_reloc && btrfs_need_cleaner_sleep(root)) { 8275 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
8030 pr_debug("BTRFS: drop snapshot early exit\n"); 8276 pr_debug("BTRFS: drop snapshot early exit\n");
@@ -8078,6 +8324,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
8078 } 8324 }
8079 root_dropped = true; 8325 root_dropped = true;
8080out_end_trans: 8326out_end_trans:
8327 ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
8328 if (ret)
8329 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
8330 "running qgroup updates "
8331 "during snapshot delete. "
8332 "Quota is out of sync, "
8333 "rescan required.\n", ret);
8334
8081 btrfs_end_transaction_throttle(trans, tree_root); 8335 btrfs_end_transaction_throttle(trans, tree_root);
8082out_free: 8336out_free:
8083 kfree(wc); 8337 kfree(wc);
@@ -8181,13 +8435,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8181 if (stripped) 8435 if (stripped)
8182 return extended_to_chunk(stripped); 8436 return extended_to_chunk(stripped);
8183 8437
8184 /* 8438 num_devices = root->fs_info->fs_devices->rw_devices;
8185 * we add in the count of missing devices because we want
8186 * to make sure that any RAID levels on a degraded FS
8187 * continue to be honored.
8188 */
8189 num_devices = root->fs_info->fs_devices->rw_devices +
8190 root->fs_info->fs_devices->missing_devices;
8191 8439
8192 stripped = BTRFS_BLOCK_GROUP_RAID0 | 8440 stripped = BTRFS_BLOCK_GROUP_RAID0 |
8193 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | 8441 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |