aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zheng.yan@oracle.com>2009-09-21 15:55:59 -0400
committerChris Mason <chris.mason@oracle.com>2009-09-21 15:55:59 -0400
commit1c4850e21df8b441164d910bc611ef46a01d5d75 (patch)
treeaeccbf3495421d1343bbe08cb824ac1ae6764e43
parentb917b7c3be50435fa8257591b964934e917f2d45 (diff)
Btrfs: speed up snapshot dropping
This patch contains two changes to avoid unnecessary tree block reads during snapshot dropping. First, check tree block's reference count and flags before reading the tree block. if reference count > 1 and there is no need to update backrefs, we can avoid reading the tree block. Second, save when snapshot was created in root_key.offset. we can compare block pointer's generation with snapshot's creation generation during updating backrefs. If a given block was created before snapshot was created, the snapshot can't be the tree block's owner. So we can avoid reading the block. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/extent-tree.c253
-rw-r--r--fs/btrfs/transaction.c3
2 files changed, 207 insertions, 49 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9bcb9c09c3b8..8fc922982183 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4886,19 +4886,90 @@ struct walk_control {
4886 int shared_level; 4886 int shared_level;
4887 int update_ref; 4887 int update_ref;
4888 int keep_locks; 4888 int keep_locks;
4889 int reada_slot;
4890 int reada_count;
4889}; 4891};
4890 4892
4891#define DROP_REFERENCE 1 4893#define DROP_REFERENCE 1
4892#define UPDATE_BACKREF 2 4894#define UPDATE_BACKREF 2
4893 4895
4896static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4897 struct btrfs_root *root,
4898 struct walk_control *wc,
4899 struct btrfs_path *path)
4900{
4901 u64 bytenr;
4902 u64 generation;
4903 u64 refs;
4904 u64 last = 0;
4905 u32 nritems;
4906 u32 blocksize;
4907 struct btrfs_key key;
4908 struct extent_buffer *eb;
4909 int ret;
4910 int slot;
4911 int nread = 0;
4912
4913 if (path->slots[wc->level] < wc->reada_slot) {
4914 wc->reada_count = wc->reada_count * 2 / 3;
4915 wc->reada_count = max(wc->reada_count, 2);
4916 } else {
4917 wc->reada_count = wc->reada_count * 3 / 2;
4918 wc->reada_count = min_t(int, wc->reada_count,
4919 BTRFS_NODEPTRS_PER_BLOCK(root));
4920 }
4921
4922 eb = path->nodes[wc->level];
4923 nritems = btrfs_header_nritems(eb);
4924 blocksize = btrfs_level_size(root, wc->level - 1);
4925
4926 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
4927 if (nread >= wc->reada_count)
4928 break;
4929
4930 cond_resched();
4931 bytenr = btrfs_node_blockptr(eb, slot);
4932 generation = btrfs_node_ptr_generation(eb, slot);
4933
4934 if (slot == path->slots[wc->level])
4935 goto reada;
4936
4937 if (wc->stage == UPDATE_BACKREF &&
4938 generation <= root->root_key.offset)
4939 continue;
4940
4941 if (wc->stage == DROP_REFERENCE) {
4942 ret = btrfs_lookup_extent_info(trans, root,
4943 bytenr, blocksize,
4944 &refs, NULL);
4945 BUG_ON(ret);
4946 BUG_ON(refs == 0);
4947 if (refs == 1)
4948 goto reada;
4949
4950 if (!wc->update_ref ||
4951 generation <= root->root_key.offset)
4952 continue;
4953 btrfs_node_key_to_cpu(eb, &key, slot);
4954 ret = btrfs_comp_cpu_keys(&key,
4955 &wc->update_progress);
4956 if (ret < 0)
4957 continue;
4958 }
4959reada:
4960 ret = readahead_tree_block(root, bytenr, blocksize,
4961 generation);
4962 if (ret)
4963 break;
4964 last = bytenr + blocksize;
4965 nread++;
4966 }
4967 wc->reada_slot = slot;
4968}
4969
4894/* 4970/*
4895 * hepler to process tree block while walking down the tree. 4971 * hepler to process tree block while walking down the tree.
4896 * 4972 *
4897 * when wc->stage == DROP_REFERENCE, this function checks
4898 * reference count of the block. if the block is shared and
4899 * we need update back refs for the subtree rooted at the
4900 * block, this function changes wc->stage to UPDATE_BACKREF
4901 *
4902 * when wc->stage == UPDATE_BACKREF, this function updates 4973 * when wc->stage == UPDATE_BACKREF, this function updates
4903 * back refs for pointers in the block. 4974 * back refs for pointers in the block.
4904 * 4975 *
@@ -4911,7 +4982,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4911{ 4982{
4912 int level = wc->level; 4983 int level = wc->level;
4913 struct extent_buffer *eb = path->nodes[level]; 4984 struct extent_buffer *eb = path->nodes[level];
4914 struct btrfs_key key;
4915 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 4985 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4916 int ret; 4986 int ret;
4917 4987
@@ -4934,21 +5004,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4934 BUG_ON(wc->refs[level] == 0); 5004 BUG_ON(wc->refs[level] == 0);
4935 } 5005 }
4936 5006
4937 if (wc->stage == DROP_REFERENCE &&
4938 wc->update_ref && wc->refs[level] > 1) {
4939 BUG_ON(eb == root->node);
4940 BUG_ON(path->slots[level] > 0);
4941 if (level == 0)
4942 btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
4943 else
4944 btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
4945 if (btrfs_header_owner(eb) == root->root_key.objectid &&
4946 btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
4947 wc->stage = UPDATE_BACKREF;
4948 wc->shared_level = level;
4949 }
4950 }
4951
4952 if (wc->stage == DROP_REFERENCE) { 5007 if (wc->stage == DROP_REFERENCE) {
4953 if (wc->refs[level] > 1) 5008 if (wc->refs[level] > 1)
4954 return 1; 5009 return 1;
@@ -4985,6 +5040,123 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4985} 5040}
4986 5041
4987/* 5042/*
5043 * hepler to process tree block pointer.
5044 *
5045 * when wc->stage == DROP_REFERENCE, this function checks
5046 * reference count of the block pointed to. if the block
5047 * is shared and we need update back refs for the subtree
5048 * rooted at the block, this function changes wc->stage to
5049 * UPDATE_BACKREF. if the block is shared and there is no
5050 * need to update back, this function drops the reference
5051 * to the block.
5052 *
5053 * NOTE: return value 1 means we should stop walking down.
5054 */
5055static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5056 struct btrfs_root *root,
5057 struct btrfs_path *path,
5058 struct walk_control *wc)
5059{
5060 u64 bytenr;
5061 u64 generation;
5062 u64 parent;
5063 u32 blocksize;
5064 struct btrfs_key key;
5065 struct extent_buffer *next;
5066 int level = wc->level;
5067 int reada = 0;
5068 int ret = 0;
5069
5070 generation = btrfs_node_ptr_generation(path->nodes[level],
5071 path->slots[level]);
5072 /*
5073 * if the lower level block was created before the snapshot
5074 * was created, we know there is no need to update back refs
5075 * for the subtree
5076 */
5077 if (wc->stage == UPDATE_BACKREF &&
5078 generation <= root->root_key.offset)
5079 return 1;
5080
5081 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
5082 blocksize = btrfs_level_size(root, level - 1);
5083
5084 next = btrfs_find_tree_block(root, bytenr, blocksize);
5085 if (!next) {
5086 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
5087 reada = 1;
5088 }
5089 btrfs_tree_lock(next);
5090 btrfs_set_lock_blocking(next);
5091
5092 if (wc->stage == DROP_REFERENCE) {
5093 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
5094 &wc->refs[level - 1],
5095 &wc->flags[level - 1]);
5096 BUG_ON(ret);
5097 BUG_ON(wc->refs[level - 1] == 0);
5098
5099 if (wc->refs[level - 1] > 1) {
5100 if (!wc->update_ref ||
5101 generation <= root->root_key.offset)
5102 goto skip;
5103
5104 btrfs_node_key_to_cpu(path->nodes[level], &key,
5105 path->slots[level]);
5106 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
5107 if (ret < 0)
5108 goto skip;
5109
5110 wc->stage = UPDATE_BACKREF;
5111 wc->shared_level = level - 1;
5112 }
5113 }
5114
5115 if (!btrfs_buffer_uptodate(next, generation)) {
5116 btrfs_tree_unlock(next);
5117 free_extent_buffer(next);
5118 next = NULL;
5119 }
5120
5121 if (!next) {
5122 if (reada && level == 1)
5123 reada_walk_down(trans, root, wc, path);
5124 next = read_tree_block(root, bytenr, blocksize, generation);
5125 btrfs_tree_lock(next);
5126 btrfs_set_lock_blocking(next);
5127 }
5128
5129 level--;
5130 BUG_ON(level != btrfs_header_level(next));
5131 path->nodes[level] = next;
5132 path->slots[level] = 0;
5133 path->locks[level] = 1;
5134 wc->level = level;
5135 if (wc->level == 1)
5136 wc->reada_slot = 0;
5137 return 0;
5138skip:
5139 wc->refs[level - 1] = 0;
5140 wc->flags[level - 1] = 0;
5141
5142 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5143 parent = path->nodes[level]->start;
5144 } else {
5145 BUG_ON(root->root_key.objectid !=
5146 btrfs_header_owner(path->nodes[level]));
5147 parent = 0;
5148 }
5149
5150 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
5151 root->root_key.objectid, level - 1, 0);
5152 BUG_ON(ret);
5153
5154 btrfs_tree_unlock(next);
5155 free_extent_buffer(next);
5156 return 1;
5157}
5158
5159/*
4988 * hepler to process tree block while walking up the tree. 5160 * hepler to process tree block while walking up the tree.
4989 * 5161 *
4990 * when wc->stage == DROP_REFERENCE, this function drops 5162 * when wc->stage == DROP_REFERENCE, this function drops
@@ -5011,7 +5183,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5011 if (level < wc->shared_level) 5183 if (level < wc->shared_level)
5012 goto out; 5184 goto out;
5013 5185
5014 BUG_ON(wc->refs[level] <= 1);
5015 ret = find_next_key(path, level + 1, &wc->update_progress); 5186 ret = find_next_key(path, level + 1, &wc->update_progress);
5016 if (ret > 0) 5187 if (ret > 0)
5017 wc->update_ref = 0; 5188 wc->update_ref = 0;
@@ -5042,8 +5213,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5042 path->locks[level] = 0; 5213 path->locks[level] = 0;
5043 return 1; 5214 return 1;
5044 } 5215 }
5045 } else {
5046 BUG_ON(level != 0);
5047 } 5216 }
5048 } 5217 }
5049 5218
@@ -5096,17 +5265,13 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5096 struct btrfs_path *path, 5265 struct btrfs_path *path,
5097 struct walk_control *wc) 5266 struct walk_control *wc)
5098{ 5267{
5099 struct extent_buffer *next;
5100 struct extent_buffer *cur;
5101 u64 bytenr;
5102 u64 ptr_gen;
5103 u32 blocksize;
5104 int level = wc->level; 5268 int level = wc->level;
5105 int ret; 5269 int ret;
5106 5270
5107 while (level >= 0) { 5271 while (level >= 0) {
5108 cur = path->nodes[level]; 5272 if (path->slots[level] >=
5109 BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); 5273 btrfs_header_nritems(path->nodes[level]))
5274 break;
5110 5275
5111 ret = walk_down_proc(trans, root, path, wc); 5276 ret = walk_down_proc(trans, root, path, wc);
5112 if (ret > 0) 5277 if (ret > 0)
@@ -5115,20 +5280,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5115 if (level == 0) 5280 if (level == 0)
5116 break; 5281 break;
5117 5282
5118 bytenr = btrfs_node_blockptr(cur, path->slots[level]); 5283 ret = do_walk_down(trans, root, path, wc);
5119 blocksize = btrfs_level_size(root, level - 1); 5284 if (ret > 0) {
5120 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); 5285 path->slots[level]++;
5121 5286 continue;
5122 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 5287 }
5123 btrfs_tree_lock(next); 5288 level = wc->level;
5124 btrfs_set_lock_blocking(next);
5125
5126 level--;
5127 BUG_ON(level != btrfs_header_level(next));
5128 path->nodes[level] = next;
5129 path->slots[level] = 0;
5130 path->locks[level] = 1;
5131 wc->level = level;
5132 } 5289 }
5133 return 0; 5290 return 0;
5134} 5291}
@@ -5218,9 +5375,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5218 err = ret; 5375 err = ret;
5219 goto out; 5376 goto out;
5220 } 5377 }
5221 btrfs_node_key_to_cpu(path->nodes[level], &key, 5378 WARN_ON(ret > 0);
5222 path->slots[level]);
5223 WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
5224 5379
5225 /* 5380 /*
5226 * unlock our path, this is safe because only this 5381 * unlock our path, this is safe because only this
@@ -5255,6 +5410,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5255 wc->stage = DROP_REFERENCE; 5410 wc->stage = DROP_REFERENCE;
5256 wc->update_ref = update_ref; 5411 wc->update_ref = update_ref;
5257 wc->keep_locks = 0; 5412 wc->keep_locks = 0;
5413 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
5258 5414
5259 while (1) { 5415 while (1) {
5260 ret = walk_down_tree(trans, root, path, wc); 5416 ret = walk_down_tree(trans, root, path, wc);
@@ -5361,6 +5517,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5361 wc->stage = DROP_REFERENCE; 5517 wc->stage = DROP_REFERENCE;
5362 wc->update_ref = 0; 5518 wc->update_ref = 0;
5363 wc->keep_locks = 1; 5519 wc->keep_locks = 1;
5520 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
5364 5521
5365 while (1) { 5522 while (1) {
5366 wret = walk_down_tree(trans, root, path, wc); 5523 wret = walk_down_tree(trans, root, path, wc);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 6ed6186f51cd..94f816cb6e35 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -720,7 +720,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
720 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 720 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
721 721
722 key.objectid = objectid; 722 key.objectid = objectid;
723 key.offset = 0; 723 /* record when the snapshot was created in key.offset */
724 key.offset = trans->transid;
724 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 725 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
725 726
726 old = btrfs_lock_root_node(root); 727 old = btrfs_lock_root_node(root);