diff options
| -rw-r--r-- | fs/btrfs/extent-tree.c | 253 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 3 |
2 files changed, 207 insertions, 49 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9bcb9c09c3b8..8fc922982183 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -4886,19 +4886,90 @@ struct walk_control { | |||
| 4886 | int shared_level; | 4886 | int shared_level; |
| 4887 | int update_ref; | 4887 | int update_ref; |
| 4888 | int keep_locks; | 4888 | int keep_locks; |
| 4889 | int reada_slot; | ||
| 4890 | int reada_count; | ||
| 4889 | }; | 4891 | }; |
| 4890 | 4892 | ||
| 4891 | #define DROP_REFERENCE 1 | 4893 | #define DROP_REFERENCE 1 |
| 4892 | #define UPDATE_BACKREF 2 | 4894 | #define UPDATE_BACKREF 2 |
| 4893 | 4895 | ||
| 4896 | static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | ||
| 4897 | struct btrfs_root *root, | ||
| 4898 | struct walk_control *wc, | ||
| 4899 | struct btrfs_path *path) | ||
| 4900 | { | ||
| 4901 | u64 bytenr; | ||
| 4902 | u64 generation; | ||
| 4903 | u64 refs; | ||
| 4904 | u64 last = 0; | ||
| 4905 | u32 nritems; | ||
| 4906 | u32 blocksize; | ||
| 4907 | struct btrfs_key key; | ||
| 4908 | struct extent_buffer *eb; | ||
| 4909 | int ret; | ||
| 4910 | int slot; | ||
| 4911 | int nread = 0; | ||
| 4912 | |||
| 4913 | if (path->slots[wc->level] < wc->reada_slot) { | ||
| 4914 | wc->reada_count = wc->reada_count * 2 / 3; | ||
| 4915 | wc->reada_count = max(wc->reada_count, 2); | ||
| 4916 | } else { | ||
| 4917 | wc->reada_count = wc->reada_count * 3 / 2; | ||
| 4918 | wc->reada_count = min_t(int, wc->reada_count, | ||
| 4919 | BTRFS_NODEPTRS_PER_BLOCK(root)); | ||
| 4920 | } | ||
| 4921 | |||
| 4922 | eb = path->nodes[wc->level]; | ||
| 4923 | nritems = btrfs_header_nritems(eb); | ||
| 4924 | blocksize = btrfs_level_size(root, wc->level - 1); | ||
| 4925 | |||
| 4926 | for (slot = path->slots[wc->level]; slot < nritems; slot++) { | ||
| 4927 | if (nread >= wc->reada_count) | ||
| 4928 | break; | ||
| 4929 | |||
| 4930 | cond_resched(); | ||
| 4931 | bytenr = btrfs_node_blockptr(eb, slot); | ||
| 4932 | generation = btrfs_node_ptr_generation(eb, slot); | ||
| 4933 | |||
| 4934 | if (slot == path->slots[wc->level]) | ||
| 4935 | goto reada; | ||
| 4936 | |||
| 4937 | if (wc->stage == UPDATE_BACKREF && | ||
| 4938 | generation <= root->root_key.offset) | ||
| 4939 | continue; | ||
| 4940 | |||
| 4941 | if (wc->stage == DROP_REFERENCE) { | ||
| 4942 | ret = btrfs_lookup_extent_info(trans, root, | ||
| 4943 | bytenr, blocksize, | ||
| 4944 | &refs, NULL); | ||
| 4945 | BUG_ON(ret); | ||
| 4946 | BUG_ON(refs == 0); | ||
| 4947 | if (refs == 1) | ||
| 4948 | goto reada; | ||
| 4949 | |||
| 4950 | if (!wc->update_ref || | ||
| 4951 | generation <= root->root_key.offset) | ||
| 4952 | continue; | ||
| 4953 | btrfs_node_key_to_cpu(eb, &key, slot); | ||
| 4954 | ret = btrfs_comp_cpu_keys(&key, | ||
| 4955 | &wc->update_progress); | ||
| 4956 | if (ret < 0) | ||
| 4957 | continue; | ||
| 4958 | } | ||
| 4959 | reada: | ||
| 4960 | ret = readahead_tree_block(root, bytenr, blocksize, | ||
| 4961 | generation); | ||
| 4962 | if (ret) | ||
| 4963 | break; | ||
| 4964 | last = bytenr + blocksize; | ||
| 4965 | nread++; | ||
| 4966 | } | ||
| 4967 | wc->reada_slot = slot; | ||
| 4968 | } | ||
| 4969 | |||
| 4894 | /* | 4970 | /* |
| 4895 | * hepler to process tree block while walking down the tree. | 4971 | * hepler to process tree block while walking down the tree. |
| 4896 | * | 4972 | * |
| 4897 | * when wc->stage == DROP_REFERENCE, this function checks | ||
| 4898 | * reference count of the block. if the block is shared and | ||
| 4899 | * we need update back refs for the subtree rooted at the | ||
| 4900 | * block, this function changes wc->stage to UPDATE_BACKREF | ||
| 4901 | * | ||
| 4902 | * when wc->stage == UPDATE_BACKREF, this function updates | 4973 | * when wc->stage == UPDATE_BACKREF, this function updates |
| 4903 | * back refs for pointers in the block. | 4974 | * back refs for pointers in the block. |
| 4904 | * | 4975 | * |
| @@ -4911,7 +4982,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
| 4911 | { | 4982 | { |
| 4912 | int level = wc->level; | 4983 | int level = wc->level; |
| 4913 | struct extent_buffer *eb = path->nodes[level]; | 4984 | struct extent_buffer *eb = path->nodes[level]; |
| 4914 | struct btrfs_key key; | ||
| 4915 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; | 4985 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; |
| 4916 | int ret; | 4986 | int ret; |
| 4917 | 4987 | ||
| @@ -4934,21 +5004,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
| 4934 | BUG_ON(wc->refs[level] == 0); | 5004 | BUG_ON(wc->refs[level] == 0); |
| 4935 | } | 5005 | } |
| 4936 | 5006 | ||
| 4937 | if (wc->stage == DROP_REFERENCE && | ||
| 4938 | wc->update_ref && wc->refs[level] > 1) { | ||
| 4939 | BUG_ON(eb == root->node); | ||
| 4940 | BUG_ON(path->slots[level] > 0); | ||
| 4941 | if (level == 0) | ||
| 4942 | btrfs_item_key_to_cpu(eb, &key, path->slots[level]); | ||
| 4943 | else | ||
| 4944 | btrfs_node_key_to_cpu(eb, &key, path->slots[level]); | ||
| 4945 | if (btrfs_header_owner(eb) == root->root_key.objectid && | ||
| 4946 | btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { | ||
| 4947 | wc->stage = UPDATE_BACKREF; | ||
| 4948 | wc->shared_level = level; | ||
| 4949 | } | ||
| 4950 | } | ||
| 4951 | |||
| 4952 | if (wc->stage == DROP_REFERENCE) { | 5007 | if (wc->stage == DROP_REFERENCE) { |
| 4953 | if (wc->refs[level] > 1) | 5008 | if (wc->refs[level] > 1) |
| 4954 | return 1; | 5009 | return 1; |
| @@ -4985,6 +5040,123 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
| 4985 | } | 5040 | } |
| 4986 | 5041 | ||
| 4987 | /* | 5042 | /* |
| 5043 | * hepler to process tree block pointer. | ||
| 5044 | * | ||
| 5045 | * when wc->stage == DROP_REFERENCE, this function checks | ||
| 5046 | * reference count of the block pointed to. if the block | ||
| 5047 | * is shared and we need update back refs for the subtree | ||
| 5048 | * rooted at the block, this function changes wc->stage to | ||
| 5049 | * UPDATE_BACKREF. if the block is shared and there is no | ||
| 5050 | * need to update back, this function drops the reference | ||
| 5051 | * to the block. | ||
| 5052 | * | ||
| 5053 | * NOTE: return value 1 means we should stop walking down. | ||
| 5054 | */ | ||
| 5055 | static noinline int do_walk_down(struct btrfs_trans_handle *trans, | ||
| 5056 | struct btrfs_root *root, | ||
| 5057 | struct btrfs_path *path, | ||
| 5058 | struct walk_control *wc) | ||
| 5059 | { | ||
| 5060 | u64 bytenr; | ||
| 5061 | u64 generation; | ||
| 5062 | u64 parent; | ||
| 5063 | u32 blocksize; | ||
| 5064 | struct btrfs_key key; | ||
| 5065 | struct extent_buffer *next; | ||
| 5066 | int level = wc->level; | ||
| 5067 | int reada = 0; | ||
| 5068 | int ret = 0; | ||
| 5069 | |||
| 5070 | generation = btrfs_node_ptr_generation(path->nodes[level], | ||
| 5071 | path->slots[level]); | ||
| 5072 | /* | ||
| 5073 | * if the lower level block was created before the snapshot | ||
| 5074 | * was created, we know there is no need to update back refs | ||
| 5075 | * for the subtree | ||
| 5076 | */ | ||
| 5077 | if (wc->stage == UPDATE_BACKREF && | ||
| 5078 | generation <= root->root_key.offset) | ||
| 5079 | return 1; | ||
| 5080 | |||
| 5081 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | ||
| 5082 | blocksize = btrfs_level_size(root, level - 1); | ||
| 5083 | |||
| 5084 | next = btrfs_find_tree_block(root, bytenr, blocksize); | ||
| 5085 | if (!next) { | ||
| 5086 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
| 5087 | reada = 1; | ||
| 5088 | } | ||
| 5089 | btrfs_tree_lock(next); | ||
| 5090 | btrfs_set_lock_blocking(next); | ||
| 5091 | |||
| 5092 | if (wc->stage == DROP_REFERENCE) { | ||
| 5093 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | ||
| 5094 | &wc->refs[level - 1], | ||
| 5095 | &wc->flags[level - 1]); | ||
| 5096 | BUG_ON(ret); | ||
| 5097 | BUG_ON(wc->refs[level - 1] == 0); | ||
| 5098 | |||
| 5099 | if (wc->refs[level - 1] > 1) { | ||
| 5100 | if (!wc->update_ref || | ||
| 5101 | generation <= root->root_key.offset) | ||
| 5102 | goto skip; | ||
| 5103 | |||
| 5104 | btrfs_node_key_to_cpu(path->nodes[level], &key, | ||
| 5105 | path->slots[level]); | ||
| 5106 | ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); | ||
| 5107 | if (ret < 0) | ||
| 5108 | goto skip; | ||
| 5109 | |||
| 5110 | wc->stage = UPDATE_BACKREF; | ||
| 5111 | wc->shared_level = level - 1; | ||
| 5112 | } | ||
| 5113 | } | ||
| 5114 | |||
| 5115 | if (!btrfs_buffer_uptodate(next, generation)) { | ||
| 5116 | btrfs_tree_unlock(next); | ||
| 5117 | free_extent_buffer(next); | ||
| 5118 | next = NULL; | ||
| 5119 | } | ||
| 5120 | |||
| 5121 | if (!next) { | ||
| 5122 | if (reada && level == 1) | ||
| 5123 | reada_walk_down(trans, root, wc, path); | ||
| 5124 | next = read_tree_block(root, bytenr, blocksize, generation); | ||
| 5125 | btrfs_tree_lock(next); | ||
| 5126 | btrfs_set_lock_blocking(next); | ||
| 5127 | } | ||
| 5128 | |||
| 5129 | level--; | ||
| 5130 | BUG_ON(level != btrfs_header_level(next)); | ||
| 5131 | path->nodes[level] = next; | ||
| 5132 | path->slots[level] = 0; | ||
| 5133 | path->locks[level] = 1; | ||
| 5134 | wc->level = level; | ||
| 5135 | if (wc->level == 1) | ||
| 5136 | wc->reada_slot = 0; | ||
| 5137 | return 0; | ||
| 5138 | skip: | ||
| 5139 | wc->refs[level - 1] = 0; | ||
| 5140 | wc->flags[level - 1] = 0; | ||
| 5141 | |||
| 5142 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | ||
| 5143 | parent = path->nodes[level]->start; | ||
| 5144 | } else { | ||
| 5145 | BUG_ON(root->root_key.objectid != | ||
| 5146 | btrfs_header_owner(path->nodes[level])); | ||
| 5147 | parent = 0; | ||
| 5148 | } | ||
| 5149 | |||
| 5150 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, | ||
| 5151 | root->root_key.objectid, level - 1, 0); | ||
| 5152 | BUG_ON(ret); | ||
| 5153 | |||
| 5154 | btrfs_tree_unlock(next); | ||
| 5155 | free_extent_buffer(next); | ||
| 5156 | return 1; | ||
| 5157 | } | ||
| 5158 | |||
| 5159 | /* | ||
| 4988 | * hepler to process tree block while walking up the tree. | 5160 | * hepler to process tree block while walking up the tree. |
| 4989 | * | 5161 | * |
| 4990 | * when wc->stage == DROP_REFERENCE, this function drops | 5162 | * when wc->stage == DROP_REFERENCE, this function drops |
| @@ -5011,7 +5183,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
| 5011 | if (level < wc->shared_level) | 5183 | if (level < wc->shared_level) |
| 5012 | goto out; | 5184 | goto out; |
| 5013 | 5185 | ||
| 5014 | BUG_ON(wc->refs[level] <= 1); | ||
| 5015 | ret = find_next_key(path, level + 1, &wc->update_progress); | 5186 | ret = find_next_key(path, level + 1, &wc->update_progress); |
| 5016 | if (ret > 0) | 5187 | if (ret > 0) |
| 5017 | wc->update_ref = 0; | 5188 | wc->update_ref = 0; |
| @@ -5042,8 +5213,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
| 5042 | path->locks[level] = 0; | 5213 | path->locks[level] = 0; |
| 5043 | return 1; | 5214 | return 1; |
| 5044 | } | 5215 | } |
| 5045 | } else { | ||
| 5046 | BUG_ON(level != 0); | ||
| 5047 | } | 5216 | } |
| 5048 | } | 5217 | } |
| 5049 | 5218 | ||
| @@ -5096,17 +5265,13 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
| 5096 | struct btrfs_path *path, | 5265 | struct btrfs_path *path, |
| 5097 | struct walk_control *wc) | 5266 | struct walk_control *wc) |
| 5098 | { | 5267 | { |
| 5099 | struct extent_buffer *next; | ||
| 5100 | struct extent_buffer *cur; | ||
| 5101 | u64 bytenr; | ||
| 5102 | u64 ptr_gen; | ||
| 5103 | u32 blocksize; | ||
| 5104 | int level = wc->level; | 5268 | int level = wc->level; |
| 5105 | int ret; | 5269 | int ret; |
| 5106 | 5270 | ||
| 5107 | while (level >= 0) { | 5271 | while (level >= 0) { |
| 5108 | cur = path->nodes[level]; | 5272 | if (path->slots[level] >= |
| 5109 | BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); | 5273 | btrfs_header_nritems(path->nodes[level])) |
| 5274 | break; | ||
| 5110 | 5275 | ||
| 5111 | ret = walk_down_proc(trans, root, path, wc); | 5276 | ret = walk_down_proc(trans, root, path, wc); |
| 5112 | if (ret > 0) | 5277 | if (ret > 0) |
| @@ -5115,20 +5280,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
| 5115 | if (level == 0) | 5280 | if (level == 0) |
| 5116 | break; | 5281 | break; |
| 5117 | 5282 | ||
| 5118 | bytenr = btrfs_node_blockptr(cur, path->slots[level]); | 5283 | ret = do_walk_down(trans, root, path, wc); |
| 5119 | blocksize = btrfs_level_size(root, level - 1); | 5284 | if (ret > 0) { |
| 5120 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); | 5285 | path->slots[level]++; |
| 5121 | 5286 | continue; | |
| 5122 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | 5287 | } |
| 5123 | btrfs_tree_lock(next); | 5288 | level = wc->level; |
| 5124 | btrfs_set_lock_blocking(next); | ||
| 5125 | |||
| 5126 | level--; | ||
| 5127 | BUG_ON(level != btrfs_header_level(next)); | ||
| 5128 | path->nodes[level] = next; | ||
| 5129 | path->slots[level] = 0; | ||
| 5130 | path->locks[level] = 1; | ||
| 5131 | wc->level = level; | ||
| 5132 | } | 5289 | } |
| 5133 | return 0; | 5290 | return 0; |
| 5134 | } | 5291 | } |
| @@ -5218,9 +5375,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5218 | err = ret; | 5375 | err = ret; |
| 5219 | goto out; | 5376 | goto out; |
| 5220 | } | 5377 | } |
| 5221 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 5378 | WARN_ON(ret > 0); |
| 5222 | path->slots[level]); | ||
| 5223 | WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); | ||
| 5224 | 5379 | ||
| 5225 | /* | 5380 | /* |
| 5226 | * unlock our path, this is safe because only this | 5381 | * unlock our path, this is safe because only this |
| @@ -5255,6 +5410,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5255 | wc->stage = DROP_REFERENCE; | 5410 | wc->stage = DROP_REFERENCE; |
| 5256 | wc->update_ref = update_ref; | 5411 | wc->update_ref = update_ref; |
| 5257 | wc->keep_locks = 0; | 5412 | wc->keep_locks = 0; |
| 5413 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
| 5258 | 5414 | ||
| 5259 | while (1) { | 5415 | while (1) { |
| 5260 | ret = walk_down_tree(trans, root, path, wc); | 5416 | ret = walk_down_tree(trans, root, path, wc); |
| @@ -5361,6 +5517,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
| 5361 | wc->stage = DROP_REFERENCE; | 5517 | wc->stage = DROP_REFERENCE; |
| 5362 | wc->update_ref = 0; | 5518 | wc->update_ref = 0; |
| 5363 | wc->keep_locks = 1; | 5519 | wc->keep_locks = 1; |
| 5520 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
| 5364 | 5521 | ||
| 5365 | while (1) { | 5522 | while (1) { |
| 5366 | wret = walk_down_tree(trans, root, path, wc); | 5523 | wret = walk_down_tree(trans, root, path, wc); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 6ed6186f51cd..94f816cb6e35 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -720,7 +720,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 720 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 720 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
| 721 | 721 | ||
| 722 | key.objectid = objectid; | 722 | key.objectid = objectid; |
| 723 | key.offset = 0; | 723 | /* record when the snapshot was created in key.offset */ |
| 724 | key.offset = trans->transid; | ||
| 724 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 725 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
| 725 | 726 | ||
| 726 | old = btrfs_lock_root_node(root); | 727 | old = btrfs_lock_root_node(root); |
