diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 1096 |
1 files changed, 794 insertions, 302 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index edc7d208c5ce..72a2b9c28e9f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/blkdev.h> | 21 | #include <linux/blkdev.h> |
22 | #include <linux/sort.h> | 22 | #include <linux/sort.h> |
23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
24 | #include <linux/kthread.h> | ||
24 | #include "compat.h" | 25 | #include "compat.h" |
25 | #include "hash.h" | 26 | #include "hash.h" |
26 | #include "ctree.h" | 27 | #include "ctree.h" |
@@ -61,6 +62,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
61 | struct btrfs_root *extent_root, u64 alloc_bytes, | 62 | struct btrfs_root *extent_root, u64 alloc_bytes, |
62 | u64 flags, int force); | 63 | u64 flags, int force); |
63 | 64 | ||
65 | static noinline int | ||
66 | block_group_cache_done(struct btrfs_block_group_cache *cache) | ||
67 | { | ||
68 | smp_mb(); | ||
69 | return cache->cached == BTRFS_CACHE_FINISHED; | ||
70 | } | ||
71 | |||
64 | static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) | 72 | static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) |
65 | { | 73 | { |
66 | return (cache->flags & bits) == bits; | 74 | return (cache->flags & bits) == bits; |
@@ -146,20 +154,70 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, | |||
146 | } | 154 | } |
147 | 155 | ||
148 | /* | 156 | /* |
157 | * We always set EXTENT_LOCKED for the super mirror extents so we don't | ||
158 | * overwrite them, so those bits need to be unset. Also, if we are unmounting | ||
159 | * with pinned extents still sitting there because we had a block group caching, | ||
160 | * we need to clear those now, since we are done. | ||
161 | */ | ||
162 | void btrfs_free_pinned_extents(struct btrfs_fs_info *info) | ||
163 | { | ||
164 | u64 start, end, last = 0; | ||
165 | int ret; | ||
166 | |||
167 | while (1) { | ||
168 | ret = find_first_extent_bit(&info->pinned_extents, last, | ||
169 | &start, &end, | ||
170 | EXTENT_LOCKED|EXTENT_DIRTY); | ||
171 | if (ret) | ||
172 | break; | ||
173 | |||
174 | clear_extent_bits(&info->pinned_extents, start, end, | ||
175 | EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); | ||
176 | last = end+1; | ||
177 | } | ||
178 | } | ||
179 | |||
180 | static int remove_sb_from_cache(struct btrfs_root *root, | ||
181 | struct btrfs_block_group_cache *cache) | ||
182 | { | ||
183 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
184 | u64 bytenr; | ||
185 | u64 *logical; | ||
186 | int stripe_len; | ||
187 | int i, nr, ret; | ||
188 | |||
189 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | ||
190 | bytenr = btrfs_sb_offset(i); | ||
191 | ret = btrfs_rmap_block(&root->fs_info->mapping_tree, | ||
192 | cache->key.objectid, bytenr, | ||
193 | 0, &logical, &nr, &stripe_len); | ||
194 | BUG_ON(ret); | ||
195 | while (nr--) { | ||
196 | try_lock_extent(&fs_info->pinned_extents, | ||
197 | logical[nr], | ||
198 | logical[nr] + stripe_len - 1, GFP_NOFS); | ||
199 | } | ||
200 | kfree(logical); | ||
201 | } | ||
202 | |||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | /* | ||
149 | * this is only called by cache_block_group, since we could have freed extents | 207 | * this is only called by cache_block_group, since we could have freed extents |
150 | * we need to check the pinned_extents for any extents that can't be used yet | 208 | * we need to check the pinned_extents for any extents that can't be used yet |
151 | * since their free space will be released as soon as the transaction commits. | 209 | * since their free space will be released as soon as the transaction commits. |
152 | */ | 210 | */ |
153 | static int add_new_free_space(struct btrfs_block_group_cache *block_group, | 211 | static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, |
154 | struct btrfs_fs_info *info, u64 start, u64 end) | 212 | struct btrfs_fs_info *info, u64 start, u64 end) |
155 | { | 213 | { |
156 | u64 extent_start, extent_end, size; | 214 | u64 extent_start, extent_end, size, total_added = 0; |
157 | int ret; | 215 | int ret; |
158 | 216 | ||
159 | while (start < end) { | 217 | while (start < end) { |
160 | ret = find_first_extent_bit(&info->pinned_extents, start, | 218 | ret = find_first_extent_bit(&info->pinned_extents, start, |
161 | &extent_start, &extent_end, | 219 | &extent_start, &extent_end, |
162 | EXTENT_DIRTY); | 220 | EXTENT_DIRTY|EXTENT_LOCKED); |
163 | if (ret) | 221 | if (ret) |
164 | break; | 222 | break; |
165 | 223 | ||
@@ -167,6 +225,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
167 | start = extent_end + 1; | 225 | start = extent_end + 1; |
168 | } else if (extent_start > start && extent_start < end) { | 226 | } else if (extent_start > start && extent_start < end) { |
169 | size = extent_start - start; | 227 | size = extent_start - start; |
228 | total_added += size; | ||
170 | ret = btrfs_add_free_space(block_group, start, | 229 | ret = btrfs_add_free_space(block_group, start, |
171 | size); | 230 | size); |
172 | BUG_ON(ret); | 231 | BUG_ON(ret); |
@@ -178,84 +237,93 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
178 | 237 | ||
179 | if (start < end) { | 238 | if (start < end) { |
180 | size = end - start; | 239 | size = end - start; |
240 | total_added += size; | ||
181 | ret = btrfs_add_free_space(block_group, start, size); | 241 | ret = btrfs_add_free_space(block_group, start, size); |
182 | BUG_ON(ret); | 242 | BUG_ON(ret); |
183 | } | 243 | } |
184 | 244 | ||
185 | return 0; | 245 | return total_added; |
186 | } | 246 | } |
187 | 247 | ||
188 | static int remove_sb_from_cache(struct btrfs_root *root, | 248 | static int caching_kthread(void *data) |
189 | struct btrfs_block_group_cache *cache) | ||
190 | { | ||
191 | u64 bytenr; | ||
192 | u64 *logical; | ||
193 | int stripe_len; | ||
194 | int i, nr, ret; | ||
195 | |||
196 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | ||
197 | bytenr = btrfs_sb_offset(i); | ||
198 | ret = btrfs_rmap_block(&root->fs_info->mapping_tree, | ||
199 | cache->key.objectid, bytenr, 0, | ||
200 | &logical, &nr, &stripe_len); | ||
201 | BUG_ON(ret); | ||
202 | while (nr--) { | ||
203 | btrfs_remove_free_space(cache, logical[nr], | ||
204 | stripe_len); | ||
205 | } | ||
206 | kfree(logical); | ||
207 | } | ||
208 | return 0; | ||
209 | } | ||
210 | |||
211 | static int cache_block_group(struct btrfs_root *root, | ||
212 | struct btrfs_block_group_cache *block_group) | ||
213 | { | 249 | { |
250 | struct btrfs_block_group_cache *block_group = data; | ||
251 | struct btrfs_fs_info *fs_info = block_group->fs_info; | ||
252 | u64 last = 0; | ||
214 | struct btrfs_path *path; | 253 | struct btrfs_path *path; |
215 | int ret = 0; | 254 | int ret = 0; |
216 | struct btrfs_key key; | 255 | struct btrfs_key key; |
217 | struct extent_buffer *leaf; | 256 | struct extent_buffer *leaf; |
218 | int slot; | 257 | int slot; |
219 | u64 last; | 258 | u64 total_found = 0; |
220 | 259 | ||
221 | if (!block_group) | 260 | BUG_ON(!fs_info); |
222 | return 0; | ||
223 | |||
224 | root = root->fs_info->extent_root; | ||
225 | |||
226 | if (block_group->cached) | ||
227 | return 0; | ||
228 | 261 | ||
229 | path = btrfs_alloc_path(); | 262 | path = btrfs_alloc_path(); |
230 | if (!path) | 263 | if (!path) |
231 | return -ENOMEM; | 264 | return -ENOMEM; |
232 | 265 | ||
233 | path->reada = 2; | 266 | atomic_inc(&block_group->space_info->caching_threads); |
267 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | ||
234 | /* | 268 | /* |
235 | * we get into deadlocks with paths held by callers of this function. | 269 | * We don't want to deadlock with somebody trying to allocate a new |
236 | * since the alloc_mutex is protecting things right now, just | 270 | * extent for the extent root while also trying to search the extent |
237 | * skip the locking here | 271 | * root to add free space. So we skip locking and search the commit |
272 | * root, since its read-only | ||
238 | */ | 273 | */ |
239 | path->skip_locking = 1; | 274 | path->skip_locking = 1; |
240 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 275 | path->search_commit_root = 1; |
276 | path->reada = 2; | ||
277 | |||
241 | key.objectid = last; | 278 | key.objectid = last; |
242 | key.offset = 0; | 279 | key.offset = 0; |
243 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 280 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); |
244 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 281 | again: |
282 | /* need to make sure the commit_root doesn't disappear */ | ||
283 | down_read(&fs_info->extent_commit_sem); | ||
284 | |||
285 | ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); | ||
245 | if (ret < 0) | 286 | if (ret < 0) |
246 | goto err; | 287 | goto err; |
247 | 288 | ||
248 | while (1) { | 289 | while (1) { |
290 | smp_mb(); | ||
291 | if (block_group->fs_info->closing > 1) { | ||
292 | last = (u64)-1; | ||
293 | break; | ||
294 | } | ||
295 | |||
249 | leaf = path->nodes[0]; | 296 | leaf = path->nodes[0]; |
250 | slot = path->slots[0]; | 297 | slot = path->slots[0]; |
251 | if (slot >= btrfs_header_nritems(leaf)) { | 298 | if (slot >= btrfs_header_nritems(leaf)) { |
252 | ret = btrfs_next_leaf(root, path); | 299 | ret = btrfs_next_leaf(fs_info->extent_root, path); |
253 | if (ret < 0) | 300 | if (ret < 0) |
254 | goto err; | 301 | goto err; |
255 | if (ret == 0) | 302 | else if (ret) |
256 | continue; | ||
257 | else | ||
258 | break; | 303 | break; |
304 | |||
305 | if (need_resched() || | ||
306 | btrfs_transaction_in_commit(fs_info)) { | ||
307 | leaf = path->nodes[0]; | ||
308 | |||
309 | /* this shouldn't happen, but if the | ||
310 | * leaf is empty just move on. | ||
311 | */ | ||
312 | if (btrfs_header_nritems(leaf) == 0) | ||
313 | break; | ||
314 | /* | ||
315 | * we need to copy the key out so that | ||
316 | * we are sure the next search advances | ||
317 | * us forward in the btree. | ||
318 | */ | ||
319 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
320 | btrfs_release_path(fs_info->extent_root, path); | ||
321 | up_read(&fs_info->extent_commit_sem); | ||
322 | schedule_timeout(1); | ||
323 | goto again; | ||
324 | } | ||
325 | |||
326 | continue; | ||
259 | } | 327 | } |
260 | btrfs_item_key_to_cpu(leaf, &key, slot); | 328 | btrfs_item_key_to_cpu(leaf, &key, slot); |
261 | if (key.objectid < block_group->key.objectid) | 329 | if (key.objectid < block_group->key.objectid) |
@@ -266,24 +334,59 @@ static int cache_block_group(struct btrfs_root *root, | |||
266 | break; | 334 | break; |
267 | 335 | ||
268 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { | 336 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { |
269 | add_new_free_space(block_group, root->fs_info, last, | 337 | total_found += add_new_free_space(block_group, |
270 | key.objectid); | 338 | fs_info, last, |
271 | 339 | key.objectid); | |
272 | last = key.objectid + key.offset; | 340 | last = key.objectid + key.offset; |
273 | } | 341 | } |
342 | |||
343 | if (total_found > (1024 * 1024 * 2)) { | ||
344 | total_found = 0; | ||
345 | wake_up(&block_group->caching_q); | ||
346 | } | ||
274 | next: | 347 | next: |
275 | path->slots[0]++; | 348 | path->slots[0]++; |
276 | } | 349 | } |
350 | ret = 0; | ||
277 | 351 | ||
278 | add_new_free_space(block_group, root->fs_info, last, | 352 | total_found += add_new_free_space(block_group, fs_info, last, |
279 | block_group->key.objectid + | 353 | block_group->key.objectid + |
280 | block_group->key.offset); | 354 | block_group->key.offset); |
355 | |||
356 | spin_lock(&block_group->lock); | ||
357 | block_group->cached = BTRFS_CACHE_FINISHED; | ||
358 | spin_unlock(&block_group->lock); | ||
281 | 359 | ||
282 | block_group->cached = 1; | ||
283 | remove_sb_from_cache(root, block_group); | ||
284 | ret = 0; | ||
285 | err: | 360 | err: |
286 | btrfs_free_path(path); | 361 | btrfs_free_path(path); |
362 | up_read(&fs_info->extent_commit_sem); | ||
363 | atomic_dec(&block_group->space_info->caching_threads); | ||
364 | wake_up(&block_group->caching_q); | ||
365 | |||
366 | return 0; | ||
367 | } | ||
368 | |||
369 | static int cache_block_group(struct btrfs_block_group_cache *cache) | ||
370 | { | ||
371 | struct task_struct *tsk; | ||
372 | int ret = 0; | ||
373 | |||
374 | spin_lock(&cache->lock); | ||
375 | if (cache->cached != BTRFS_CACHE_NO) { | ||
376 | spin_unlock(&cache->lock); | ||
377 | return ret; | ||
378 | } | ||
379 | cache->cached = BTRFS_CACHE_STARTED; | ||
380 | spin_unlock(&cache->lock); | ||
381 | |||
382 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | ||
383 | cache->key.objectid); | ||
384 | if (IS_ERR(tsk)) { | ||
385 | ret = PTR_ERR(tsk); | ||
386 | printk(KERN_ERR "error running thread %d\n", ret); | ||
387 | BUG(); | ||
388 | } | ||
389 | |||
287 | return ret; | 390 | return ret; |
288 | } | 391 | } |
289 | 392 | ||
@@ -990,15 +1093,13 @@ static inline int extent_ref_type(u64 parent, u64 owner) | |||
990 | return type; | 1093 | return type; |
991 | } | 1094 | } |
992 | 1095 | ||
993 | static int find_next_key(struct btrfs_path *path, struct btrfs_key *key) | 1096 | static int find_next_key(struct btrfs_path *path, int level, |
1097 | struct btrfs_key *key) | ||
994 | 1098 | ||
995 | { | 1099 | { |
996 | int level; | 1100 | for (; level < BTRFS_MAX_LEVEL; level++) { |
997 | BUG_ON(!path->keep_locks); | ||
998 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
999 | if (!path->nodes[level]) | 1101 | if (!path->nodes[level]) |
1000 | break; | 1102 | break; |
1001 | btrfs_assert_tree_locked(path->nodes[level]); | ||
1002 | if (path->slots[level] + 1 >= | 1103 | if (path->slots[level] + 1 >= |
1003 | btrfs_header_nritems(path->nodes[level])) | 1104 | btrfs_header_nritems(path->nodes[level])) |
1004 | continue; | 1105 | continue; |
@@ -1158,7 +1259,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, | |||
1158 | * For simplicity, we just do not add new inline back | 1259 | * For simplicity, we just do not add new inline back |
1159 | * ref if there is any kind of item for this block | 1260 | * ref if there is any kind of item for this block |
1160 | */ | 1261 | */ |
1161 | if (find_next_key(path, &key) == 0 && key.objectid == bytenr && | 1262 | if (find_next_key(path, 0, &key) == 0 && |
1263 | key.objectid == bytenr && | ||
1162 | key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { | 1264 | key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { |
1163 | err = -EAGAIN; | 1265 | err = -EAGAIN; |
1164 | goto out; | 1266 | goto out; |
@@ -2388,13 +2490,29 @@ fail: | |||
2388 | 2490 | ||
2389 | } | 2491 | } |
2390 | 2492 | ||
2493 | static struct btrfs_block_group_cache * | ||
2494 | next_block_group(struct btrfs_root *root, | ||
2495 | struct btrfs_block_group_cache *cache) | ||
2496 | { | ||
2497 | struct rb_node *node; | ||
2498 | spin_lock(&root->fs_info->block_group_cache_lock); | ||
2499 | node = rb_next(&cache->cache_node); | ||
2500 | btrfs_put_block_group(cache); | ||
2501 | if (node) { | ||
2502 | cache = rb_entry(node, struct btrfs_block_group_cache, | ||
2503 | cache_node); | ||
2504 | atomic_inc(&cache->count); | ||
2505 | } else | ||
2506 | cache = NULL; | ||
2507 | spin_unlock(&root->fs_info->block_group_cache_lock); | ||
2508 | return cache; | ||
2509 | } | ||
2510 | |||
2391 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 2511 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
2392 | struct btrfs_root *root) | 2512 | struct btrfs_root *root) |
2393 | { | 2513 | { |
2394 | struct btrfs_block_group_cache *cache, *entry; | 2514 | struct btrfs_block_group_cache *cache; |
2395 | struct rb_node *n; | ||
2396 | int err = 0; | 2515 | int err = 0; |
2397 | int werr = 0; | ||
2398 | struct btrfs_path *path; | 2516 | struct btrfs_path *path; |
2399 | u64 last = 0; | 2517 | u64 last = 0; |
2400 | 2518 | ||
@@ -2403,39 +2521,35 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2403 | return -ENOMEM; | 2521 | return -ENOMEM; |
2404 | 2522 | ||
2405 | while (1) { | 2523 | while (1) { |
2406 | cache = NULL; | 2524 | if (last == 0) { |
2407 | spin_lock(&root->fs_info->block_group_cache_lock); | 2525 | err = btrfs_run_delayed_refs(trans, root, |
2408 | for (n = rb_first(&root->fs_info->block_group_cache_tree); | 2526 | (unsigned long)-1); |
2409 | n; n = rb_next(n)) { | 2527 | BUG_ON(err); |
2410 | entry = rb_entry(n, struct btrfs_block_group_cache, | ||
2411 | cache_node); | ||
2412 | if (entry->dirty) { | ||
2413 | cache = entry; | ||
2414 | break; | ||
2415 | } | ||
2416 | } | 2528 | } |
2417 | spin_unlock(&root->fs_info->block_group_cache_lock); | ||
2418 | 2529 | ||
2419 | if (!cache) | 2530 | cache = btrfs_lookup_first_block_group(root->fs_info, last); |
2420 | break; | 2531 | while (cache) { |
2532 | if (cache->dirty) | ||
2533 | break; | ||
2534 | cache = next_block_group(root, cache); | ||
2535 | } | ||
2536 | if (!cache) { | ||
2537 | if (last == 0) | ||
2538 | break; | ||
2539 | last = 0; | ||
2540 | continue; | ||
2541 | } | ||
2421 | 2542 | ||
2422 | cache->dirty = 0; | 2543 | cache->dirty = 0; |
2423 | last += cache->key.offset; | 2544 | last = cache->key.objectid + cache->key.offset; |
2424 | 2545 | ||
2425 | err = write_one_cache_group(trans, root, | 2546 | err = write_one_cache_group(trans, root, path, cache); |
2426 | path, cache); | 2547 | BUG_ON(err); |
2427 | /* | 2548 | btrfs_put_block_group(cache); |
2428 | * if we fail to write the cache group, we want | ||
2429 | * to keep it marked dirty in hopes that a later | ||
2430 | * write will work | ||
2431 | */ | ||
2432 | if (err) { | ||
2433 | werr = err; | ||
2434 | continue; | ||
2435 | } | ||
2436 | } | 2549 | } |
2550 | |||
2437 | btrfs_free_path(path); | 2551 | btrfs_free_path(path); |
2438 | return werr; | 2552 | return 0; |
2439 | } | 2553 | } |
2440 | 2554 | ||
2441 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) | 2555 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) |
@@ -2485,6 +2599,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2485 | found->force_alloc = 0; | 2599 | found->force_alloc = 0; |
2486 | *space_info = found; | 2600 | *space_info = found; |
2487 | list_add_rcu(&found->list, &info->space_info); | 2601 | list_add_rcu(&found->list, &info->space_info); |
2602 | atomic_set(&found->caching_threads, 0); | ||
2488 | return 0; | 2603 | return 0; |
2489 | } | 2604 | } |
2490 | 2605 | ||
@@ -2697,7 +2812,7 @@ again: | |||
2697 | 2812 | ||
2698 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" | 2813 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" |
2699 | ", %llu bytes_used, %llu bytes_reserved, " | 2814 | ", %llu bytes_used, %llu bytes_reserved, " |
2700 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use" | 2815 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use " |
2701 | "%llu total\n", (unsigned long long)bytes, | 2816 | "%llu total\n", (unsigned long long)bytes, |
2702 | (unsigned long long)data_sinfo->bytes_delalloc, | 2817 | (unsigned long long)data_sinfo->bytes_delalloc, |
2703 | (unsigned long long)data_sinfo->bytes_used, | 2818 | (unsigned long long)data_sinfo->bytes_used, |
@@ -2948,13 +3063,9 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2948 | struct btrfs_block_group_cache *cache; | 3063 | struct btrfs_block_group_cache *cache; |
2949 | struct btrfs_fs_info *fs_info = root->fs_info; | 3064 | struct btrfs_fs_info *fs_info = root->fs_info; |
2950 | 3065 | ||
2951 | if (pin) { | 3066 | if (pin) |
2952 | set_extent_dirty(&fs_info->pinned_extents, | 3067 | set_extent_dirty(&fs_info->pinned_extents, |
2953 | bytenr, bytenr + num - 1, GFP_NOFS); | 3068 | bytenr, bytenr + num - 1, GFP_NOFS); |
2954 | } else { | ||
2955 | clear_extent_dirty(&fs_info->pinned_extents, | ||
2956 | bytenr, bytenr + num - 1, GFP_NOFS); | ||
2957 | } | ||
2958 | 3069 | ||
2959 | while (num > 0) { | 3070 | while (num > 0) { |
2960 | cache = btrfs_lookup_block_group(fs_info, bytenr); | 3071 | cache = btrfs_lookup_block_group(fs_info, bytenr); |
@@ -2970,14 +3081,34 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2970 | spin_unlock(&cache->space_info->lock); | 3081 | spin_unlock(&cache->space_info->lock); |
2971 | fs_info->total_pinned += len; | 3082 | fs_info->total_pinned += len; |
2972 | } else { | 3083 | } else { |
3084 | int unpin = 0; | ||
3085 | |||
3086 | /* | ||
3087 | * in order to not race with the block group caching, we | ||
3088 | * only want to unpin the extent if we are cached. If | ||
3089 | * we aren't cached, we want to start async caching this | ||
3090 | * block group so we can free the extent the next time | ||
3091 | * around. | ||
3092 | */ | ||
2973 | spin_lock(&cache->space_info->lock); | 3093 | spin_lock(&cache->space_info->lock); |
2974 | spin_lock(&cache->lock); | 3094 | spin_lock(&cache->lock); |
2975 | cache->pinned -= len; | 3095 | unpin = (cache->cached == BTRFS_CACHE_FINISHED); |
2976 | cache->space_info->bytes_pinned -= len; | 3096 | if (likely(unpin)) { |
3097 | cache->pinned -= len; | ||
3098 | cache->space_info->bytes_pinned -= len; | ||
3099 | fs_info->total_pinned -= len; | ||
3100 | } | ||
2977 | spin_unlock(&cache->lock); | 3101 | spin_unlock(&cache->lock); |
2978 | spin_unlock(&cache->space_info->lock); | 3102 | spin_unlock(&cache->space_info->lock); |
2979 | fs_info->total_pinned -= len; | 3103 | |
2980 | if (cache->cached) | 3104 | if (likely(unpin)) |
3105 | clear_extent_dirty(&fs_info->pinned_extents, | ||
3106 | bytenr, bytenr + len -1, | ||
3107 | GFP_NOFS); | ||
3108 | else | ||
3109 | cache_block_group(cache); | ||
3110 | |||
3111 | if (unpin) | ||
2981 | btrfs_add_free_space(cache, bytenr, len); | 3112 | btrfs_add_free_space(cache, bytenr, len); |
2982 | } | 3113 | } |
2983 | btrfs_put_block_group(cache); | 3114 | btrfs_put_block_group(cache); |
@@ -3031,6 +3162,7 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | |||
3031 | &start, &end, EXTENT_DIRTY); | 3162 | &start, &end, EXTENT_DIRTY); |
3032 | if (ret) | 3163 | if (ret) |
3033 | break; | 3164 | break; |
3165 | |||
3034 | set_extent_dirty(copy, start, end, GFP_NOFS); | 3166 | set_extent_dirty(copy, start, end, GFP_NOFS); |
3035 | last = end + 1; | 3167 | last = end + 1; |
3036 | } | 3168 | } |
@@ -3059,6 +3191,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3059 | 3191 | ||
3060 | cond_resched(); | 3192 | cond_resched(); |
3061 | } | 3193 | } |
3194 | |||
3062 | return ret; | 3195 | return ret; |
3063 | } | 3196 | } |
3064 | 3197 | ||
@@ -3437,6 +3570,45 @@ static u64 stripe_align(struct btrfs_root *root, u64 val) | |||
3437 | } | 3570 | } |
3438 | 3571 | ||
3439 | /* | 3572 | /* |
3573 | * when we wait for progress in the block group caching, its because | ||
3574 | * our allocation attempt failed at least once. So, we must sleep | ||
3575 | * and let some progress happen before we try again. | ||
3576 | * | ||
3577 | * This function will sleep at least once waiting for new free space to | ||
3578 | * show up, and then it will check the block group free space numbers | ||
3579 | * for our min num_bytes. Another option is to have it go ahead | ||
3580 | * and look in the rbtree for a free extent of a given size, but this | ||
3581 | * is a good start. | ||
3582 | */ | ||
3583 | static noinline int | ||
3584 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | ||
3585 | u64 num_bytes) | ||
3586 | { | ||
3587 | DEFINE_WAIT(wait); | ||
3588 | |||
3589 | prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); | ||
3590 | |||
3591 | if (block_group_cache_done(cache)) { | ||
3592 | finish_wait(&cache->caching_q, &wait); | ||
3593 | return 0; | ||
3594 | } | ||
3595 | schedule(); | ||
3596 | finish_wait(&cache->caching_q, &wait); | ||
3597 | |||
3598 | wait_event(cache->caching_q, block_group_cache_done(cache) || | ||
3599 | (cache->free_space >= num_bytes)); | ||
3600 | return 0; | ||
3601 | } | ||
3602 | |||
3603 | enum btrfs_loop_type { | ||
3604 | LOOP_CACHED_ONLY = 0, | ||
3605 | LOOP_CACHING_NOWAIT = 1, | ||
3606 | LOOP_CACHING_WAIT = 2, | ||
3607 | LOOP_ALLOC_CHUNK = 3, | ||
3608 | LOOP_NO_EMPTY_SIZE = 4, | ||
3609 | }; | ||
3610 | |||
3611 | /* | ||
3440 | * walks the btree of allocated extents and find a hole of a given size. | 3612 | * walks the btree of allocated extents and find a hole of a given size. |
3441 | * The key ins is changed to record the hole: | 3613 | * The key ins is changed to record the hole: |
3442 | * ins->objectid == block start | 3614 | * ins->objectid == block start |
@@ -3461,6 +3633,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3461 | struct btrfs_space_info *space_info; | 3633 | struct btrfs_space_info *space_info; |
3462 | int last_ptr_loop = 0; | 3634 | int last_ptr_loop = 0; |
3463 | int loop = 0; | 3635 | int loop = 0; |
3636 | bool found_uncached_bg = false; | ||
3464 | 3637 | ||
3465 | WARN_ON(num_bytes < root->sectorsize); | 3638 | WARN_ON(num_bytes < root->sectorsize); |
3466 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 3639 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
@@ -3492,15 +3665,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3492 | search_start = max(search_start, first_logical_byte(root, 0)); | 3665 | search_start = max(search_start, first_logical_byte(root, 0)); |
3493 | search_start = max(search_start, hint_byte); | 3666 | search_start = max(search_start, hint_byte); |
3494 | 3667 | ||
3495 | if (!last_ptr) { | 3668 | if (!last_ptr) |
3496 | empty_cluster = 0; | 3669 | empty_cluster = 0; |
3497 | loop = 1; | ||
3498 | } | ||
3499 | 3670 | ||
3500 | if (search_start == hint_byte) { | 3671 | if (search_start == hint_byte) { |
3501 | block_group = btrfs_lookup_block_group(root->fs_info, | 3672 | block_group = btrfs_lookup_block_group(root->fs_info, |
3502 | search_start); | 3673 | search_start); |
3503 | if (block_group && block_group_bits(block_group, data)) { | 3674 | /* |
3675 | * we don't want to use the block group if it doesn't match our | ||
3676 | * allocation bits, or if its not cached. | ||
3677 | */ | ||
3678 | if (block_group && block_group_bits(block_group, data) && | ||
3679 | block_group_cache_done(block_group)) { | ||
3504 | down_read(&space_info->groups_sem); | 3680 | down_read(&space_info->groups_sem); |
3505 | if (list_empty(&block_group->list) || | 3681 | if (list_empty(&block_group->list) || |
3506 | block_group->ro) { | 3682 | block_group->ro) { |
@@ -3523,21 +3699,35 @@ search: | |||
3523 | down_read(&space_info->groups_sem); | 3699 | down_read(&space_info->groups_sem); |
3524 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 3700 | list_for_each_entry(block_group, &space_info->block_groups, list) { |
3525 | u64 offset; | 3701 | u64 offset; |
3702 | int cached; | ||
3526 | 3703 | ||
3527 | atomic_inc(&block_group->count); | 3704 | atomic_inc(&block_group->count); |
3528 | search_start = block_group->key.objectid; | 3705 | search_start = block_group->key.objectid; |
3529 | 3706 | ||
3530 | have_block_group: | 3707 | have_block_group: |
3531 | if (unlikely(!block_group->cached)) { | 3708 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
3532 | mutex_lock(&block_group->cache_mutex); | 3709 | /* |
3533 | ret = cache_block_group(root, block_group); | 3710 | * we want to start caching kthreads, but not too many |
3534 | mutex_unlock(&block_group->cache_mutex); | 3711 | * right off the bat so we don't overwhelm the system, |
3535 | if (ret) { | 3712 | * so only start them if there are less than 2 and we're |
3536 | btrfs_put_block_group(block_group); | 3713 | * in the initial allocation phase. |
3537 | break; | 3714 | */ |
3715 | if (loop > LOOP_CACHING_NOWAIT || | ||
3716 | atomic_read(&space_info->caching_threads) < 2) { | ||
3717 | ret = cache_block_group(block_group); | ||
3718 | BUG_ON(ret); | ||
3538 | } | 3719 | } |
3539 | } | 3720 | } |
3540 | 3721 | ||
3722 | cached = block_group_cache_done(block_group); | ||
3723 | if (unlikely(!cached)) { | ||
3724 | found_uncached_bg = true; | ||
3725 | |||
3726 | /* if we only want cached bgs, loop */ | ||
3727 | if (loop == LOOP_CACHED_ONLY) | ||
3728 | goto loop; | ||
3729 | } | ||
3730 | |||
3541 | if (unlikely(block_group->ro)) | 3731 | if (unlikely(block_group->ro)) |
3542 | goto loop; | 3732 | goto loop; |
3543 | 3733 | ||
@@ -3616,14 +3806,21 @@ refill_cluster: | |||
3616 | spin_unlock(&last_ptr->refill_lock); | 3806 | spin_unlock(&last_ptr->refill_lock); |
3617 | goto checks; | 3807 | goto checks; |
3618 | } | 3808 | } |
3809 | } else if (!cached && loop > LOOP_CACHING_NOWAIT) { | ||
3810 | spin_unlock(&last_ptr->refill_lock); | ||
3811 | |||
3812 | wait_block_group_cache_progress(block_group, | ||
3813 | num_bytes + empty_cluster + empty_size); | ||
3814 | goto have_block_group; | ||
3619 | } | 3815 | } |
3816 | |||
3620 | /* | 3817 | /* |
3621 | * at this point we either didn't find a cluster | 3818 | * at this point we either didn't find a cluster |
3622 | * or we weren't able to allocate a block from our | 3819 | * or we weren't able to allocate a block from our |
3623 | * cluster. Free the cluster we've been trying | 3820 | * cluster. Free the cluster we've been trying |
3624 | * to use, and go to the next block group | 3821 | * to use, and go to the next block group |
3625 | */ | 3822 | */ |
3626 | if (loop < 2) { | 3823 | if (loop < LOOP_NO_EMPTY_SIZE) { |
3627 | btrfs_return_cluster_to_free_space(NULL, | 3824 | btrfs_return_cluster_to_free_space(NULL, |
3628 | last_ptr); | 3825 | last_ptr); |
3629 | spin_unlock(&last_ptr->refill_lock); | 3826 | spin_unlock(&last_ptr->refill_lock); |
@@ -3634,11 +3831,17 @@ refill_cluster: | |||
3634 | 3831 | ||
3635 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 3832 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
3636 | num_bytes, empty_size); | 3833 | num_bytes, empty_size); |
3637 | if (!offset) | 3834 | if (!offset && (cached || (!cached && |
3835 | loop == LOOP_CACHING_NOWAIT))) { | ||
3638 | goto loop; | 3836 | goto loop; |
3837 | } else if (!offset && (!cached && | ||
3838 | loop > LOOP_CACHING_NOWAIT)) { | ||
3839 | wait_block_group_cache_progress(block_group, | ||
3840 | num_bytes + empty_size); | ||
3841 | goto have_block_group; | ||
3842 | } | ||
3639 | checks: | 3843 | checks: |
3640 | search_start = stripe_align(root, offset); | 3844 | search_start = stripe_align(root, offset); |
3641 | |||
3642 | /* move on to the next group */ | 3845 | /* move on to the next group */ |
3643 | if (search_start + num_bytes >= search_end) { | 3846 | if (search_start + num_bytes >= search_end) { |
3644 | btrfs_add_free_space(block_group, offset, num_bytes); | 3847 | btrfs_add_free_space(block_group, offset, num_bytes); |
@@ -3684,13 +3887,26 @@ loop: | |||
3684 | } | 3887 | } |
3685 | up_read(&space_info->groups_sem); | 3888 | up_read(&space_info->groups_sem); |
3686 | 3889 | ||
3687 | /* loop == 0, try to find a clustered alloc in every block group | 3890 | /* LOOP_CACHED_ONLY, only search fully cached block groups |
3688 | * loop == 1, try again after forcing a chunk allocation | 3891 | * LOOP_CACHING_NOWAIT, search partially cached block groups, but |
3689 | * loop == 2, set empty_size and empty_cluster to 0 and try again | 3892 | * dont wait foR them to finish caching |
3893 | * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching | ||
3894 | * LOOP_ALLOC_CHUNK, force a chunk allocation and try again | ||
3895 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try | ||
3896 | * again | ||
3690 | */ | 3897 | */ |
3691 | if (!ins->objectid && loop < 3 && | 3898 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
3692 | (empty_size || empty_cluster || allowed_chunk_alloc)) { | 3899 | (found_uncached_bg || empty_size || empty_cluster || |
3693 | if (loop >= 2) { | 3900 | allowed_chunk_alloc)) { |
3901 | if (found_uncached_bg) { | ||
3902 | found_uncached_bg = false; | ||
3903 | if (loop < LOOP_CACHING_WAIT) { | ||
3904 | loop++; | ||
3905 | goto search; | ||
3906 | } | ||
3907 | } | ||
3908 | |||
3909 | if (loop == LOOP_ALLOC_CHUNK) { | ||
3694 | empty_size = 0; | 3910 | empty_size = 0; |
3695 | empty_cluster = 0; | 3911 | empty_cluster = 0; |
3696 | } | 3912 | } |
@@ -3703,7 +3919,7 @@ loop: | |||
3703 | space_info->force_alloc = 1; | 3919 | space_info->force_alloc = 1; |
3704 | } | 3920 | } |
3705 | 3921 | ||
3706 | if (loop < 3) { | 3922 | if (loop < LOOP_NO_EMPTY_SIZE) { |
3707 | loop++; | 3923 | loop++; |
3708 | goto search; | 3924 | goto search; |
3709 | } | 3925 | } |
@@ -3799,7 +4015,7 @@ again: | |||
3799 | num_bytes, data, 1); | 4015 | num_bytes, data, 1); |
3800 | goto again; | 4016 | goto again; |
3801 | } | 4017 | } |
3802 | if (ret) { | 4018 | if (ret == -ENOSPC) { |
3803 | struct btrfs_space_info *sinfo; | 4019 | struct btrfs_space_info *sinfo; |
3804 | 4020 | ||
3805 | sinfo = __find_space_info(root->fs_info, data); | 4021 | sinfo = __find_space_info(root->fs_info, data); |
@@ -3807,7 +4023,6 @@ again: | |||
3807 | "wanted %llu\n", (unsigned long long)data, | 4023 | "wanted %llu\n", (unsigned long long)data, |
3808 | (unsigned long long)num_bytes); | 4024 | (unsigned long long)num_bytes); |
3809 | dump_space_info(sinfo, num_bytes); | 4025 | dump_space_info(sinfo, num_bytes); |
3810 | BUG(); | ||
3811 | } | 4026 | } |
3812 | 4027 | ||
3813 | return ret; | 4028 | return ret; |
@@ -3845,7 +4060,9 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
3845 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, | 4060 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, |
3846 | empty_size, hint_byte, search_end, ins, | 4061 | empty_size, hint_byte, search_end, ins, |
3847 | data); | 4062 | data); |
3848 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | 4063 | if (!ret) |
4064 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
4065 | |||
3849 | return ret; | 4066 | return ret; |
3850 | } | 4067 | } |
3851 | 4068 | ||
@@ -4007,9 +4224,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
4007 | struct btrfs_block_group_cache *block_group; | 4224 | struct btrfs_block_group_cache *block_group; |
4008 | 4225 | ||
4009 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 4226 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
4010 | mutex_lock(&block_group->cache_mutex); | 4227 | cache_block_group(block_group); |
4011 | cache_block_group(root, block_group); | 4228 | wait_event(block_group->caching_q, |
4012 | mutex_unlock(&block_group->cache_mutex); | 4229 | block_group_cache_done(block_group)); |
4013 | 4230 | ||
4014 | ret = btrfs_remove_free_space(block_group, ins->objectid, | 4231 | ret = btrfs_remove_free_space(block_group, ins->objectid, |
4015 | ins->offset); | 4232 | ins->offset); |
@@ -4040,7 +4257,8 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
4040 | ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | 4257 | ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, |
4041 | empty_size, hint_byte, search_end, | 4258 | empty_size, hint_byte, search_end, |
4042 | ins, 0); | 4259 | ins, 0); |
4043 | BUG_ON(ret); | 4260 | if (ret) |
4261 | return ret; | ||
4044 | 4262 | ||
4045 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | 4263 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { |
4046 | if (parent == 0) | 4264 | if (parent == 0) |
@@ -4128,6 +4346,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
4128 | return buf; | 4346 | return buf; |
4129 | } | 4347 | } |
4130 | 4348 | ||
4349 | #if 0 | ||
4131 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 4350 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
4132 | struct btrfs_root *root, struct extent_buffer *leaf) | 4351 | struct btrfs_root *root, struct extent_buffer *leaf) |
4133 | { | 4352 | { |
@@ -4171,8 +4390,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
4171 | return 0; | 4390 | return 0; |
4172 | } | 4391 | } |
4173 | 4392 | ||
4174 | #if 0 | ||
4175 | |||
4176 | static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | 4393 | static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, |
4177 | struct btrfs_root *root, | 4394 | struct btrfs_root *root, |
4178 | struct btrfs_leaf_ref *ref) | 4395 | struct btrfs_leaf_ref *ref) |
@@ -4553,262 +4770,471 @@ out: | |||
4553 | } | 4770 | } |
4554 | #endif | 4771 | #endif |
4555 | 4772 | ||
4773 | struct walk_control { | ||
4774 | u64 refs[BTRFS_MAX_LEVEL]; | ||
4775 | u64 flags[BTRFS_MAX_LEVEL]; | ||
4776 | struct btrfs_key update_progress; | ||
4777 | int stage; | ||
4778 | int level; | ||
4779 | int shared_level; | ||
4780 | int update_ref; | ||
4781 | int keep_locks; | ||
4782 | }; | ||
4783 | |||
4784 | #define DROP_REFERENCE 1 | ||
4785 | #define UPDATE_BACKREF 2 | ||
4786 | |||
4556 | /* | 4787 | /* |
4557 | * helper function for drop_subtree, this function is similar to | 4788 | * hepler to process tree block while walking down the tree. |
4558 | * walk_down_tree. The main difference is that it checks reference | 4789 | * |
4559 | * counts while tree blocks are locked. | 4790 | * when wc->stage == DROP_REFERENCE, this function checks |
4791 | * reference count of the block. if the block is shared and | ||
4792 | * we need update back refs for the subtree rooted at the | ||
4793 | * block, this function changes wc->stage to UPDATE_BACKREF | ||
4794 | * | ||
4795 | * when wc->stage == UPDATE_BACKREF, this function updates | ||
4796 | * back refs for pointers in the block. | ||
4797 | * | ||
4798 | * NOTE: return value 1 means we should stop walking down. | ||
4560 | */ | 4799 | */ |
4561 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | 4800 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, |
4562 | struct btrfs_root *root, | 4801 | struct btrfs_root *root, |
4563 | struct btrfs_path *path, int *level) | 4802 | struct btrfs_path *path, |
4803 | struct walk_control *wc) | ||
4564 | { | 4804 | { |
4565 | struct extent_buffer *next; | 4805 | int level = wc->level; |
4566 | struct extent_buffer *cur; | 4806 | struct extent_buffer *eb = path->nodes[level]; |
4567 | struct extent_buffer *parent; | 4807 | struct btrfs_key key; |
4568 | u64 bytenr; | 4808 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; |
4569 | u64 ptr_gen; | ||
4570 | u64 refs; | ||
4571 | u64 flags; | ||
4572 | u32 blocksize; | ||
4573 | int ret; | 4809 | int ret; |
4574 | 4810 | ||
4575 | cur = path->nodes[*level]; | 4811 | if (wc->stage == UPDATE_BACKREF && |
4576 | ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len, | 4812 | btrfs_header_owner(eb) != root->root_key.objectid) |
4577 | &refs, &flags); | 4813 | return 1; |
4578 | BUG_ON(ret); | ||
4579 | if (refs > 1) | ||
4580 | goto out; | ||
4581 | 4814 | ||
4582 | BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); | 4815 | /* |
4816 | * when reference count of tree block is 1, it won't increase | ||
4817 | * again. once full backref flag is set, we never clear it. | ||
4818 | */ | ||
4819 | if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || | ||
4820 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { | ||
4821 | BUG_ON(!path->locks[level]); | ||
4822 | ret = btrfs_lookup_extent_info(trans, root, | ||
4823 | eb->start, eb->len, | ||
4824 | &wc->refs[level], | ||
4825 | &wc->flags[level]); | ||
4826 | BUG_ON(ret); | ||
4827 | BUG_ON(wc->refs[level] == 0); | ||
4828 | } | ||
4583 | 4829 | ||
4584 | while (*level >= 0) { | 4830 | if (wc->stage == DROP_REFERENCE && |
4585 | cur = path->nodes[*level]; | 4831 | wc->update_ref && wc->refs[level] > 1) { |
4586 | if (*level == 0) { | 4832 | BUG_ON(eb == root->node); |
4587 | ret = btrfs_drop_leaf_ref(trans, root, cur); | 4833 | BUG_ON(path->slots[level] > 0); |
4588 | BUG_ON(ret); | 4834 | if (level == 0) |
4589 | clean_tree_block(trans, root, cur); | 4835 | btrfs_item_key_to_cpu(eb, &key, path->slots[level]); |
4590 | break; | 4836 | else |
4591 | } | 4837 | btrfs_node_key_to_cpu(eb, &key, path->slots[level]); |
4592 | if (path->slots[*level] >= btrfs_header_nritems(cur)) { | 4838 | if (btrfs_header_owner(eb) == root->root_key.objectid && |
4593 | clean_tree_block(trans, root, cur); | 4839 | btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { |
4594 | break; | 4840 | wc->stage = UPDATE_BACKREF; |
4841 | wc->shared_level = level; | ||
4595 | } | 4842 | } |
4843 | } | ||
4596 | 4844 | ||
4597 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | 4845 | if (wc->stage == DROP_REFERENCE) { |
4598 | blocksize = btrfs_level_size(root, *level - 1); | 4846 | if (wc->refs[level] > 1) |
4599 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | 4847 | return 1; |
4600 | 4848 | ||
4601 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | 4849 | if (path->locks[level] && !wc->keep_locks) { |
4602 | btrfs_tree_lock(next); | 4850 | btrfs_tree_unlock(eb); |
4603 | btrfs_set_lock_blocking(next); | 4851 | path->locks[level] = 0; |
4852 | } | ||
4853 | return 0; | ||
4854 | } | ||
4604 | 4855 | ||
4605 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | 4856 | /* wc->stage == UPDATE_BACKREF */ |
4606 | &refs, &flags); | 4857 | if (!(wc->flags[level] & flag)) { |
4858 | BUG_ON(!path->locks[level]); | ||
4859 | ret = btrfs_inc_ref(trans, root, eb, 1); | ||
4607 | BUG_ON(ret); | 4860 | BUG_ON(ret); |
4608 | if (refs > 1) { | 4861 | ret = btrfs_dec_ref(trans, root, eb, 0); |
4609 | parent = path->nodes[*level]; | 4862 | BUG_ON(ret); |
4610 | ret = btrfs_free_extent(trans, root, bytenr, | 4863 | ret = btrfs_set_disk_extent_flags(trans, root, eb->start, |
4611 | blocksize, parent->start, | 4864 | eb->len, flag, 0); |
4612 | btrfs_header_owner(parent), | 4865 | BUG_ON(ret); |
4613 | *level - 1, 0); | 4866 | wc->flags[level] |= flag; |
4867 | } | ||
4868 | |||
4869 | /* | ||
4870 | * the block is shared by multiple trees, so it's not good to | ||
4871 | * keep the tree lock | ||
4872 | */ | ||
4873 | if (path->locks[level] && level > 0) { | ||
4874 | btrfs_tree_unlock(eb); | ||
4875 | path->locks[level] = 0; | ||
4876 | } | ||
4877 | return 0; | ||
4878 | } | ||
4879 | |||
4880 | /* | ||
4881 | * hepler to process tree block while walking up the tree. | ||
4882 | * | ||
4883 | * when wc->stage == DROP_REFERENCE, this function drops | ||
4884 | * reference count on the block. | ||
4885 | * | ||
4886 | * when wc->stage == UPDATE_BACKREF, this function changes | ||
4887 | * wc->stage back to DROP_REFERENCE if we changed wc->stage | ||
4888 | * to UPDATE_BACKREF previously while processing the block. | ||
4889 | * | ||
4890 | * NOTE: return value 1 means we should stop walking up. | ||
4891 | */ | ||
4892 | static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | ||
4893 | struct btrfs_root *root, | ||
4894 | struct btrfs_path *path, | ||
4895 | struct walk_control *wc) | ||
4896 | { | ||
4897 | int ret = 0; | ||
4898 | int level = wc->level; | ||
4899 | struct extent_buffer *eb = path->nodes[level]; | ||
4900 | u64 parent = 0; | ||
4901 | |||
4902 | if (wc->stage == UPDATE_BACKREF) { | ||
4903 | BUG_ON(wc->shared_level < level); | ||
4904 | if (level < wc->shared_level) | ||
4905 | goto out; | ||
4906 | |||
4907 | BUG_ON(wc->refs[level] <= 1); | ||
4908 | ret = find_next_key(path, level + 1, &wc->update_progress); | ||
4909 | if (ret > 0) | ||
4910 | wc->update_ref = 0; | ||
4911 | |||
4912 | wc->stage = DROP_REFERENCE; | ||
4913 | wc->shared_level = -1; | ||
4914 | path->slots[level] = 0; | ||
4915 | |||
4916 | /* | ||
4917 | * check reference count again if the block isn't locked. | ||
4918 | * we should start walking down the tree again if reference | ||
4919 | * count is one. | ||
4920 | */ | ||
4921 | if (!path->locks[level]) { | ||
4922 | BUG_ON(level == 0); | ||
4923 | btrfs_tree_lock(eb); | ||
4924 | btrfs_set_lock_blocking(eb); | ||
4925 | path->locks[level] = 1; | ||
4926 | |||
4927 | ret = btrfs_lookup_extent_info(trans, root, | ||
4928 | eb->start, eb->len, | ||
4929 | &wc->refs[level], | ||
4930 | &wc->flags[level]); | ||
4614 | BUG_ON(ret); | 4931 | BUG_ON(ret); |
4615 | path->slots[*level]++; | 4932 | BUG_ON(wc->refs[level] == 0); |
4616 | btrfs_tree_unlock(next); | 4933 | if (wc->refs[level] == 1) { |
4617 | free_extent_buffer(next); | 4934 | btrfs_tree_unlock(eb); |
4618 | continue; | 4935 | path->locks[level] = 0; |
4936 | return 1; | ||
4937 | } | ||
4938 | } else { | ||
4939 | BUG_ON(level != 0); | ||
4619 | } | 4940 | } |
4941 | } | ||
4620 | 4942 | ||
4621 | BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); | 4943 | /* wc->stage == DROP_REFERENCE */ |
4944 | BUG_ON(wc->refs[level] > 1 && !path->locks[level]); | ||
4622 | 4945 | ||
4623 | *level = btrfs_header_level(next); | 4946 | if (wc->refs[level] == 1) { |
4624 | path->nodes[*level] = next; | 4947 | if (level == 0) { |
4625 | path->slots[*level] = 0; | 4948 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) |
4626 | path->locks[*level] = 1; | 4949 | ret = btrfs_dec_ref(trans, root, eb, 1); |
4627 | cond_resched(); | 4950 | else |
4951 | ret = btrfs_dec_ref(trans, root, eb, 0); | ||
4952 | BUG_ON(ret); | ||
4953 | } | ||
4954 | /* make block locked assertion in clean_tree_block happy */ | ||
4955 | if (!path->locks[level] && | ||
4956 | btrfs_header_generation(eb) == trans->transid) { | ||
4957 | btrfs_tree_lock(eb); | ||
4958 | btrfs_set_lock_blocking(eb); | ||
4959 | path->locks[level] = 1; | ||
4960 | } | ||
4961 | clean_tree_block(trans, root, eb); | ||
4628 | } | 4962 | } |
4629 | out: | ||
4630 | if (path->nodes[*level] == root->node) | ||
4631 | parent = path->nodes[*level]; | ||
4632 | else | ||
4633 | parent = path->nodes[*level + 1]; | ||
4634 | bytenr = path->nodes[*level]->start; | ||
4635 | blocksize = path->nodes[*level]->len; | ||
4636 | 4963 | ||
4637 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, | 4964 | if (eb == root->node) { |
4638 | btrfs_header_owner(parent), *level, 0); | 4965 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) |
4966 | parent = eb->start; | ||
4967 | else | ||
4968 | BUG_ON(root->root_key.objectid != | ||
4969 | btrfs_header_owner(eb)); | ||
4970 | } else { | ||
4971 | if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) | ||
4972 | parent = path->nodes[level + 1]->start; | ||
4973 | else | ||
4974 | BUG_ON(root->root_key.objectid != | ||
4975 | btrfs_header_owner(path->nodes[level + 1])); | ||
4976 | } | ||
4977 | |||
4978 | ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, | ||
4979 | root->root_key.objectid, level, 0); | ||
4639 | BUG_ON(ret); | 4980 | BUG_ON(ret); |
4981 | out: | ||
4982 | wc->refs[level] = 0; | ||
4983 | wc->flags[level] = 0; | ||
4984 | return ret; | ||
4985 | } | ||
4986 | |||
4987 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | ||
4988 | struct btrfs_root *root, | ||
4989 | struct btrfs_path *path, | ||
4990 | struct walk_control *wc) | ||
4991 | { | ||
4992 | struct extent_buffer *next; | ||
4993 | struct extent_buffer *cur; | ||
4994 | u64 bytenr; | ||
4995 | u64 ptr_gen; | ||
4996 | u32 blocksize; | ||
4997 | int level = wc->level; | ||
4998 | int ret; | ||
4999 | |||
5000 | while (level >= 0) { | ||
5001 | cur = path->nodes[level]; | ||
5002 | BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); | ||
5003 | |||
5004 | ret = walk_down_proc(trans, root, path, wc); | ||
5005 | if (ret > 0) | ||
5006 | break; | ||
5007 | |||
5008 | if (level == 0) | ||
5009 | break; | ||
5010 | |||
5011 | bytenr = btrfs_node_blockptr(cur, path->slots[level]); | ||
5012 | blocksize = btrfs_level_size(root, level - 1); | ||
5013 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); | ||
5014 | |||
5015 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | ||
5016 | btrfs_tree_lock(next); | ||
5017 | btrfs_set_lock_blocking(next); | ||
4640 | 5018 | ||
4641 | if (path->locks[*level]) { | 5019 | level--; |
4642 | btrfs_tree_unlock(path->nodes[*level]); | 5020 | BUG_ON(level != btrfs_header_level(next)); |
4643 | path->locks[*level] = 0; | 5021 | path->nodes[level] = next; |
5022 | path->slots[level] = 0; | ||
5023 | path->locks[level] = 1; | ||
5024 | wc->level = level; | ||
4644 | } | 5025 | } |
4645 | free_extent_buffer(path->nodes[*level]); | ||
4646 | path->nodes[*level] = NULL; | ||
4647 | *level += 1; | ||
4648 | cond_resched(); | ||
4649 | return 0; | 5026 | return 0; |
4650 | } | 5027 | } |
4651 | 5028 | ||
4652 | /* | ||
4653 | * helper for dropping snapshots. This walks back up the tree in the path | ||
4654 | * to find the first node higher up where we haven't yet gone through | ||
4655 | * all the slots | ||
4656 | */ | ||
4657 | static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | 5029 | static noinline int walk_up_tree(struct btrfs_trans_handle *trans, |
4658 | struct btrfs_root *root, | 5030 | struct btrfs_root *root, |
4659 | struct btrfs_path *path, | 5031 | struct btrfs_path *path, |
4660 | int *level, int max_level) | 5032 | struct walk_control *wc, int max_level) |
4661 | { | 5033 | { |
4662 | struct btrfs_root_item *root_item = &root->root_item; | 5034 | int level = wc->level; |
4663 | int i; | ||
4664 | int slot; | ||
4665 | int ret; | 5035 | int ret; |
4666 | 5036 | ||
4667 | for (i = *level; i < max_level && path->nodes[i]; i++) { | 5037 | path->slots[level] = btrfs_header_nritems(path->nodes[level]); |
4668 | slot = path->slots[i]; | 5038 | while (level < max_level && path->nodes[level]) { |
4669 | if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { | 5039 | wc->level = level; |
4670 | /* | 5040 | if (path->slots[level] + 1 < |
4671 | * there is more work to do in this level. | 5041 | btrfs_header_nritems(path->nodes[level])) { |
4672 | * Update the drop_progress marker to reflect | 5042 | path->slots[level]++; |
4673 | * the work we've done so far, and then bump | ||
4674 | * the slot number | ||
4675 | */ | ||
4676 | path->slots[i]++; | ||
4677 | WARN_ON(*level == 0); | ||
4678 | if (max_level == BTRFS_MAX_LEVEL) { | ||
4679 | btrfs_node_key(path->nodes[i], | ||
4680 | &root_item->drop_progress, | ||
4681 | path->slots[i]); | ||
4682 | root_item->drop_level = i; | ||
4683 | } | ||
4684 | *level = i; | ||
4685 | return 0; | 5043 | return 0; |
4686 | } else { | 5044 | } else { |
4687 | struct extent_buffer *parent; | 5045 | ret = walk_up_proc(trans, root, path, wc); |
4688 | 5046 | if (ret > 0) | |
4689 | /* | 5047 | return 0; |
4690 | * this whole node is done, free our reference | ||
4691 | * on it and go up one level | ||
4692 | */ | ||
4693 | if (path->nodes[*level] == root->node) | ||
4694 | parent = path->nodes[*level]; | ||
4695 | else | ||
4696 | parent = path->nodes[*level + 1]; | ||
4697 | 5048 | ||
4698 | clean_tree_block(trans, root, path->nodes[i]); | 5049 | if (path->locks[level]) { |
4699 | ret = btrfs_free_extent(trans, root, | 5050 | btrfs_tree_unlock(path->nodes[level]); |
4700 | path->nodes[i]->start, | 5051 | path->locks[level] = 0; |
4701 | path->nodes[i]->len, | ||
4702 | parent->start, | ||
4703 | btrfs_header_owner(parent), | ||
4704 | *level, 0); | ||
4705 | BUG_ON(ret); | ||
4706 | if (path->locks[*level]) { | ||
4707 | btrfs_tree_unlock(path->nodes[i]); | ||
4708 | path->locks[i] = 0; | ||
4709 | } | 5052 | } |
4710 | free_extent_buffer(path->nodes[i]); | 5053 | free_extent_buffer(path->nodes[level]); |
4711 | path->nodes[i] = NULL; | 5054 | path->nodes[level] = NULL; |
4712 | *level = i + 1; | 5055 | level++; |
4713 | } | 5056 | } |
4714 | } | 5057 | } |
4715 | return 1; | 5058 | return 1; |
4716 | } | 5059 | } |
4717 | 5060 | ||
4718 | /* | 5061 | /* |
4719 | * drop the reference count on the tree rooted at 'snap'. This traverses | 5062 | * drop a subvolume tree. |
4720 | * the tree freeing any blocks that have a ref count of zero after being | 5063 | * |
4721 | * decremented. | 5064 | * this function traverses the tree freeing any blocks that only |
5065 | * referenced by the tree. | ||
5066 | * | ||
5067 | * when a shared tree block is found. this function decreases its | ||
5068 | * reference count by one. if update_ref is true, this function | ||
5069 | * also make sure backrefs for the shared block and all lower level | ||
5070 | * blocks are properly updated. | ||
4722 | */ | 5071 | */ |
4723 | int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | 5072 | int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) |
4724 | *root) | ||
4725 | { | 5073 | { |
4726 | int ret = 0; | ||
4727 | int wret; | ||
4728 | int level; | ||
4729 | struct btrfs_path *path; | 5074 | struct btrfs_path *path; |
4730 | int update_count; | 5075 | struct btrfs_trans_handle *trans; |
5076 | struct btrfs_root *tree_root = root->fs_info->tree_root; | ||
4731 | struct btrfs_root_item *root_item = &root->root_item; | 5077 | struct btrfs_root_item *root_item = &root->root_item; |
5078 | struct walk_control *wc; | ||
5079 | struct btrfs_key key; | ||
5080 | int err = 0; | ||
5081 | int ret; | ||
5082 | int level; | ||
4732 | 5083 | ||
4733 | path = btrfs_alloc_path(); | 5084 | path = btrfs_alloc_path(); |
4734 | BUG_ON(!path); | 5085 | BUG_ON(!path); |
4735 | 5086 | ||
4736 | level = btrfs_header_level(root->node); | 5087 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
5088 | BUG_ON(!wc); | ||
5089 | |||
5090 | trans = btrfs_start_transaction(tree_root, 1); | ||
5091 | |||
4737 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { | 5092 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { |
5093 | level = btrfs_header_level(root->node); | ||
4738 | path->nodes[level] = btrfs_lock_root_node(root); | 5094 | path->nodes[level] = btrfs_lock_root_node(root); |
4739 | btrfs_set_lock_blocking(path->nodes[level]); | 5095 | btrfs_set_lock_blocking(path->nodes[level]); |
4740 | path->slots[level] = 0; | 5096 | path->slots[level] = 0; |
4741 | path->locks[level] = 1; | 5097 | path->locks[level] = 1; |
5098 | memset(&wc->update_progress, 0, | ||
5099 | sizeof(wc->update_progress)); | ||
4742 | } else { | 5100 | } else { |
4743 | struct btrfs_key key; | ||
4744 | struct btrfs_disk_key found_key; | ||
4745 | struct extent_buffer *node; | ||
4746 | |||
4747 | btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); | 5101 | btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); |
5102 | memcpy(&wc->update_progress, &key, | ||
5103 | sizeof(wc->update_progress)); | ||
5104 | |||
4748 | level = root_item->drop_level; | 5105 | level = root_item->drop_level; |
5106 | BUG_ON(level == 0); | ||
4749 | path->lowest_level = level; | 5107 | path->lowest_level = level; |
4750 | wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 5108 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4751 | if (wret < 0) { | 5109 | path->lowest_level = 0; |
4752 | ret = wret; | 5110 | if (ret < 0) { |
5111 | err = ret; | ||
4753 | goto out; | 5112 | goto out; |
4754 | } | 5113 | } |
4755 | node = path->nodes[level]; | 5114 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
4756 | btrfs_node_key(node, &found_key, path->slots[level]); | 5115 | path->slots[level]); |
4757 | WARN_ON(memcmp(&found_key, &root_item->drop_progress, | 5116 | WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); |
4758 | sizeof(found_key))); | 5117 | |
4759 | /* | 5118 | /* |
4760 | * unlock our path, this is safe because only this | 5119 | * unlock our path, this is safe because only this |
4761 | * function is allowed to delete this snapshot | 5120 | * function is allowed to delete this snapshot |
4762 | */ | 5121 | */ |
4763 | btrfs_unlock_up_safe(path, 0); | 5122 | btrfs_unlock_up_safe(path, 0); |
5123 | |||
5124 | level = btrfs_header_level(root->node); | ||
5125 | while (1) { | ||
5126 | btrfs_tree_lock(path->nodes[level]); | ||
5127 | btrfs_set_lock_blocking(path->nodes[level]); | ||
5128 | |||
5129 | ret = btrfs_lookup_extent_info(trans, root, | ||
5130 | path->nodes[level]->start, | ||
5131 | path->nodes[level]->len, | ||
5132 | &wc->refs[level], | ||
5133 | &wc->flags[level]); | ||
5134 | BUG_ON(ret); | ||
5135 | BUG_ON(wc->refs[level] == 0); | ||
5136 | |||
5137 | if (level == root_item->drop_level) | ||
5138 | break; | ||
5139 | |||
5140 | btrfs_tree_unlock(path->nodes[level]); | ||
5141 | WARN_ON(wc->refs[level] != 1); | ||
5142 | level--; | ||
5143 | } | ||
4764 | } | 5144 | } |
5145 | |||
5146 | wc->level = level; | ||
5147 | wc->shared_level = -1; | ||
5148 | wc->stage = DROP_REFERENCE; | ||
5149 | wc->update_ref = update_ref; | ||
5150 | wc->keep_locks = 0; | ||
5151 | |||
4765 | while (1) { | 5152 | while (1) { |
4766 | unsigned long update; | 5153 | ret = walk_down_tree(trans, root, path, wc); |
4767 | wret = walk_down_tree(trans, root, path, &level); | 5154 | if (ret < 0) { |
4768 | if (wret > 0) | 5155 | err = ret; |
4769 | break; | 5156 | break; |
4770 | if (wret < 0) | 5157 | } |
4771 | ret = wret; | ||
4772 | 5158 | ||
4773 | wret = walk_up_tree(trans, root, path, &level, | 5159 | ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); |
4774 | BTRFS_MAX_LEVEL); | 5160 | if (ret < 0) { |
4775 | if (wret > 0) | 5161 | err = ret; |
4776 | break; | 5162 | break; |
4777 | if (wret < 0) | 5163 | } |
4778 | ret = wret; | 5164 | |
4779 | if (trans->transaction->in_commit || | 5165 | if (ret > 0) { |
4780 | trans->transaction->delayed_refs.flushing) { | 5166 | BUG_ON(wc->stage != DROP_REFERENCE); |
4781 | ret = -EAGAIN; | ||
4782 | break; | 5167 | break; |
4783 | } | 5168 | } |
4784 | for (update_count = 0; update_count < 16; update_count++) { | 5169 | |
5170 | if (wc->stage == DROP_REFERENCE) { | ||
5171 | level = wc->level; | ||
5172 | btrfs_node_key(path->nodes[level], | ||
5173 | &root_item->drop_progress, | ||
5174 | path->slots[level]); | ||
5175 | root_item->drop_level = level; | ||
5176 | } | ||
5177 | |||
5178 | BUG_ON(wc->level == 0); | ||
5179 | if (trans->transaction->in_commit || | ||
5180 | trans->transaction->delayed_refs.flushing) { | ||
5181 | ret = btrfs_update_root(trans, tree_root, | ||
5182 | &root->root_key, | ||
5183 | root_item); | ||
5184 | BUG_ON(ret); | ||
5185 | |||
5186 | btrfs_end_transaction(trans, tree_root); | ||
5187 | trans = btrfs_start_transaction(tree_root, 1); | ||
5188 | } else { | ||
5189 | unsigned long update; | ||
4785 | update = trans->delayed_ref_updates; | 5190 | update = trans->delayed_ref_updates; |
4786 | trans->delayed_ref_updates = 0; | 5191 | trans->delayed_ref_updates = 0; |
4787 | if (update) | 5192 | if (update) |
4788 | btrfs_run_delayed_refs(trans, root, update); | 5193 | btrfs_run_delayed_refs(trans, tree_root, |
4789 | else | 5194 | update); |
4790 | break; | ||
4791 | } | 5195 | } |
4792 | } | 5196 | } |
5197 | btrfs_release_path(root, path); | ||
5198 | BUG_ON(err); | ||
5199 | |||
5200 | ret = btrfs_del_root(trans, tree_root, &root->root_key); | ||
5201 | BUG_ON(ret); | ||
5202 | |||
5203 | free_extent_buffer(root->node); | ||
5204 | free_extent_buffer(root->commit_root); | ||
5205 | kfree(root); | ||
4793 | out: | 5206 | out: |
5207 | btrfs_end_transaction(trans, tree_root); | ||
5208 | kfree(wc); | ||
4794 | btrfs_free_path(path); | 5209 | btrfs_free_path(path); |
4795 | return ret; | 5210 | return err; |
4796 | } | 5211 | } |
4797 | 5212 | ||
5213 | /* | ||
5214 | * drop subtree rooted at tree block 'node'. | ||
5215 | * | ||
5216 | * NOTE: this function will unlock and release tree block 'node' | ||
5217 | */ | ||
4798 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | 5218 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, |
4799 | struct btrfs_root *root, | 5219 | struct btrfs_root *root, |
4800 | struct extent_buffer *node, | 5220 | struct extent_buffer *node, |
4801 | struct extent_buffer *parent) | 5221 | struct extent_buffer *parent) |
4802 | { | 5222 | { |
4803 | struct btrfs_path *path; | 5223 | struct btrfs_path *path; |
5224 | struct walk_control *wc; | ||
4804 | int level; | 5225 | int level; |
4805 | int parent_level; | 5226 | int parent_level; |
4806 | int ret = 0; | 5227 | int ret = 0; |
4807 | int wret; | 5228 | int wret; |
4808 | 5229 | ||
5230 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | ||
5231 | |||
4809 | path = btrfs_alloc_path(); | 5232 | path = btrfs_alloc_path(); |
4810 | BUG_ON(!path); | 5233 | BUG_ON(!path); |
4811 | 5234 | ||
5235 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | ||
5236 | BUG_ON(!wc); | ||
5237 | |||
4812 | btrfs_assert_tree_locked(parent); | 5238 | btrfs_assert_tree_locked(parent); |
4813 | parent_level = btrfs_header_level(parent); | 5239 | parent_level = btrfs_header_level(parent); |
4814 | extent_buffer_get(parent); | 5240 | extent_buffer_get(parent); |
@@ -4817,24 +5243,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
4817 | 5243 | ||
4818 | btrfs_assert_tree_locked(node); | 5244 | btrfs_assert_tree_locked(node); |
4819 | level = btrfs_header_level(node); | 5245 | level = btrfs_header_level(node); |
4820 | extent_buffer_get(node); | ||
4821 | path->nodes[level] = node; | 5246 | path->nodes[level] = node; |
4822 | path->slots[level] = 0; | 5247 | path->slots[level] = 0; |
5248 | path->locks[level] = 1; | ||
5249 | |||
5250 | wc->refs[parent_level] = 1; | ||
5251 | wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
5252 | wc->level = level; | ||
5253 | wc->shared_level = -1; | ||
5254 | wc->stage = DROP_REFERENCE; | ||
5255 | wc->update_ref = 0; | ||
5256 | wc->keep_locks = 1; | ||
4823 | 5257 | ||
4824 | while (1) { | 5258 | while (1) { |
4825 | wret = walk_down_tree(trans, root, path, &level); | 5259 | wret = walk_down_tree(trans, root, path, wc); |
4826 | if (wret < 0) | 5260 | if (wret < 0) { |
4827 | ret = wret; | 5261 | ret = wret; |
4828 | if (wret != 0) | ||
4829 | break; | 5262 | break; |
5263 | } | ||
4830 | 5264 | ||
4831 | wret = walk_up_tree(trans, root, path, &level, parent_level); | 5265 | wret = walk_up_tree(trans, root, path, wc, parent_level); |
4832 | if (wret < 0) | 5266 | if (wret < 0) |
4833 | ret = wret; | 5267 | ret = wret; |
4834 | if (wret != 0) | 5268 | if (wret != 0) |
4835 | break; | 5269 | break; |
4836 | } | 5270 | } |
4837 | 5271 | ||
5272 | kfree(wc); | ||
4838 | btrfs_free_path(path); | 5273 | btrfs_free_path(path); |
4839 | return ret; | 5274 | return ret; |
4840 | } | 5275 | } |
@@ -6739,11 +7174,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
6739 | &info->block_group_cache_tree); | 7174 | &info->block_group_cache_tree); |
6740 | spin_unlock(&info->block_group_cache_lock); | 7175 | spin_unlock(&info->block_group_cache_lock); |
6741 | 7176 | ||
6742 | btrfs_remove_free_space_cache(block_group); | ||
6743 | down_write(&block_group->space_info->groups_sem); | 7177 | down_write(&block_group->space_info->groups_sem); |
6744 | list_del(&block_group->list); | 7178 | list_del(&block_group->list); |
6745 | up_write(&block_group->space_info->groups_sem); | 7179 | up_write(&block_group->space_info->groups_sem); |
6746 | 7180 | ||
7181 | if (block_group->cached == BTRFS_CACHE_STARTED) | ||
7182 | wait_event(block_group->caching_q, | ||
7183 | block_group_cache_done(block_group)); | ||
7184 | |||
7185 | btrfs_remove_free_space_cache(block_group); | ||
7186 | |||
6747 | WARN_ON(atomic_read(&block_group->count) != 1); | 7187 | WARN_ON(atomic_read(&block_group->count) != 1); |
6748 | kfree(block_group); | 7188 | kfree(block_group); |
6749 | 7189 | ||
@@ -6809,9 +7249,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
6809 | atomic_set(&cache->count, 1); | 7249 | atomic_set(&cache->count, 1); |
6810 | spin_lock_init(&cache->lock); | 7250 | spin_lock_init(&cache->lock); |
6811 | spin_lock_init(&cache->tree_lock); | 7251 | spin_lock_init(&cache->tree_lock); |
6812 | mutex_init(&cache->cache_mutex); | 7252 | cache->fs_info = info; |
7253 | init_waitqueue_head(&cache->caching_q); | ||
6813 | INIT_LIST_HEAD(&cache->list); | 7254 | INIT_LIST_HEAD(&cache->list); |
6814 | INIT_LIST_HEAD(&cache->cluster_list); | 7255 | INIT_LIST_HEAD(&cache->cluster_list); |
7256 | |||
7257 | /* | ||
7258 | * we only want to have 32k of ram per block group for keeping | ||
7259 | * track of free space, and if we pass 1/2 of that we want to | ||
7260 | * start converting things over to using bitmaps | ||
7261 | */ | ||
7262 | cache->extents_thresh = ((1024 * 32) / 2) / | ||
7263 | sizeof(struct btrfs_free_space); | ||
7264 | |||
6815 | read_extent_buffer(leaf, &cache->item, | 7265 | read_extent_buffer(leaf, &cache->item, |
6816 | btrfs_item_ptr_offset(leaf, path->slots[0]), | 7266 | btrfs_item_ptr_offset(leaf, path->slots[0]), |
6817 | sizeof(cache->item)); | 7267 | sizeof(cache->item)); |
@@ -6820,6 +7270,26 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
6820 | key.objectid = found_key.objectid + found_key.offset; | 7270 | key.objectid = found_key.objectid + found_key.offset; |
6821 | btrfs_release_path(root, path); | 7271 | btrfs_release_path(root, path); |
6822 | cache->flags = btrfs_block_group_flags(&cache->item); | 7272 | cache->flags = btrfs_block_group_flags(&cache->item); |
7273 | cache->sectorsize = root->sectorsize; | ||
7274 | |||
7275 | remove_sb_from_cache(root, cache); | ||
7276 | |||
7277 | /* | ||
7278 | * check for two cases, either we are full, and therefore | ||
7279 | * don't need to bother with the caching work since we won't | ||
7280 | * find any space, or we are empty, and we can just add all | ||
7281 | * the space in and be done with it. This saves us _alot_ of | ||
7282 | * time, particularly in the full case. | ||
7283 | */ | ||
7284 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { | ||
7285 | cache->cached = BTRFS_CACHE_FINISHED; | ||
7286 | } else if (btrfs_block_group_used(&cache->item) == 0) { | ||
7287 | cache->cached = BTRFS_CACHE_FINISHED; | ||
7288 | add_new_free_space(cache, root->fs_info, | ||
7289 | found_key.objectid, | ||
7290 | found_key.objectid + | ||
7291 | found_key.offset); | ||
7292 | } | ||
6823 | 7293 | ||
6824 | ret = update_space_info(info, cache->flags, found_key.offset, | 7294 | ret = update_space_info(info, cache->flags, found_key.offset, |
6825 | btrfs_block_group_used(&cache->item), | 7295 | btrfs_block_group_used(&cache->item), |
@@ -6863,10 +7333,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
6863 | cache->key.objectid = chunk_offset; | 7333 | cache->key.objectid = chunk_offset; |
6864 | cache->key.offset = size; | 7334 | cache->key.offset = size; |
6865 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 7335 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
7336 | cache->sectorsize = root->sectorsize; | ||
7337 | |||
7338 | /* | ||
7339 | * we only want to have 32k of ram per block group for keeping track | ||
7340 | * of free space, and if we pass 1/2 of that we want to start | ||
7341 | * converting things over to using bitmaps | ||
7342 | */ | ||
7343 | cache->extents_thresh = ((1024 * 32) / 2) / | ||
7344 | sizeof(struct btrfs_free_space); | ||
6866 | atomic_set(&cache->count, 1); | 7345 | atomic_set(&cache->count, 1); |
6867 | spin_lock_init(&cache->lock); | 7346 | spin_lock_init(&cache->lock); |
6868 | spin_lock_init(&cache->tree_lock); | 7347 | spin_lock_init(&cache->tree_lock); |
6869 | mutex_init(&cache->cache_mutex); | 7348 | init_waitqueue_head(&cache->caching_q); |
6870 | INIT_LIST_HEAD(&cache->list); | 7349 | INIT_LIST_HEAD(&cache->list); |
6871 | INIT_LIST_HEAD(&cache->cluster_list); | 7350 | INIT_LIST_HEAD(&cache->cluster_list); |
6872 | 7351 | ||
@@ -6875,6 +7354,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
6875 | cache->flags = type; | 7354 | cache->flags = type; |
6876 | btrfs_set_block_group_flags(&cache->item, type); | 7355 | btrfs_set_block_group_flags(&cache->item, type); |
6877 | 7356 | ||
7357 | cache->cached = BTRFS_CACHE_FINISHED; | ||
7358 | remove_sb_from_cache(root, cache); | ||
7359 | |||
7360 | add_new_free_space(cache, root->fs_info, chunk_offset, | ||
7361 | chunk_offset + size); | ||
7362 | |||
6878 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, | 7363 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, |
6879 | &cache->space_info); | 7364 | &cache->space_info); |
6880 | BUG_ON(ret); | 7365 | BUG_ON(ret); |
@@ -6933,7 +7418,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
6933 | rb_erase(&block_group->cache_node, | 7418 | rb_erase(&block_group->cache_node, |
6934 | &root->fs_info->block_group_cache_tree); | 7419 | &root->fs_info->block_group_cache_tree); |
6935 | spin_unlock(&root->fs_info->block_group_cache_lock); | 7420 | spin_unlock(&root->fs_info->block_group_cache_lock); |
6936 | btrfs_remove_free_space_cache(block_group); | 7421 | |
6937 | down_write(&block_group->space_info->groups_sem); | 7422 | down_write(&block_group->space_info->groups_sem); |
6938 | /* | 7423 | /* |
6939 | * we must use list_del_init so people can check to see if they | 7424 | * we must use list_del_init so people can check to see if they |
@@ -6942,11 +7427,18 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
6942 | list_del_init(&block_group->list); | 7427 | list_del_init(&block_group->list); |
6943 | up_write(&block_group->space_info->groups_sem); | 7428 | up_write(&block_group->space_info->groups_sem); |
6944 | 7429 | ||
7430 | if (block_group->cached == BTRFS_CACHE_STARTED) | ||
7431 | wait_event(block_group->caching_q, | ||
7432 | block_group_cache_done(block_group)); | ||
7433 | |||
7434 | btrfs_remove_free_space_cache(block_group); | ||
7435 | |||
6945 | spin_lock(&block_group->space_info->lock); | 7436 | spin_lock(&block_group->space_info->lock); |
6946 | block_group->space_info->total_bytes -= block_group->key.offset; | 7437 | block_group->space_info->total_bytes -= block_group->key.offset; |
6947 | block_group->space_info->bytes_readonly -= block_group->key.offset; | 7438 | block_group->space_info->bytes_readonly -= block_group->key.offset; |
6948 | spin_unlock(&block_group->space_info->lock); | 7439 | spin_unlock(&block_group->space_info->lock); |
6949 | block_group->space_info->full = 0; | 7440 | |
7441 | btrfs_clear_space_info_full(root->fs_info); | ||
6950 | 7442 | ||
6951 | btrfs_put_block_group(block_group); | 7443 | btrfs_put_block_group(block_group); |
6952 | btrfs_put_block_group(block_group); | 7444 | btrfs_put_block_group(block_group); |