diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2009-12-07 01:28:35 -0500 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2009-12-07 01:29:22 -0500 |
commit | 6548698f929814375fa5d62ae1db96959b0418c1 (patch) | |
tree | 340924ae82cb0946aa15045b2b72186de52a8146 /fs/btrfs/extent-tree.c | |
parent | 1d2c6cfd40b2dece3bb958cbbc405a2c1536ab75 (diff) | |
parent | 22763c5cf3690a681551162c15d34d935308c8d7 (diff) |
Merge commit 'v2.6.32' into reiserfs/kill-bkl
Merge-reason: The tree was based 2.6.31. It's better to be up to date
with 2.6.32. Although no conflicting changes were made in between,
it gives benchmarking results closer to the lastest kernel behaviour.
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 2319 |
1 files changed, 1261 insertions, 1058 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 72a2b9c28e9f..94627c4cc193 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -32,12 +32,12 @@ | |||
32 | #include "locking.h" | 32 | #include "locking.h" |
33 | #include "free-space-cache.h" | 33 | #include "free-space-cache.h" |
34 | 34 | ||
35 | static int update_reserved_extents(struct btrfs_root *root, | ||
36 | u64 bytenr, u64 num, int reserve); | ||
37 | static int update_block_group(struct btrfs_trans_handle *trans, | 35 | static int update_block_group(struct btrfs_trans_handle *trans, |
38 | struct btrfs_root *root, | 36 | struct btrfs_root *root, |
39 | u64 bytenr, u64 num_bytes, int alloc, | 37 | u64 bytenr, u64 num_bytes, int alloc, |
40 | int mark_free); | 38 | int mark_free); |
39 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | ||
40 | u64 num_bytes, int reserve); | ||
41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
42 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
43 | u64 bytenr, u64 num_bytes, u64 parent, | 43 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -57,10 +57,19 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
57 | u64 parent, u64 root_objectid, | 57 | u64 parent, u64 root_objectid, |
58 | u64 flags, struct btrfs_disk_key *key, | 58 | u64 flags, struct btrfs_disk_key *key, |
59 | int level, struct btrfs_key *ins); | 59 | int level, struct btrfs_key *ins); |
60 | |||
61 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
62 | struct btrfs_root *extent_root, u64 alloc_bytes, | 61 | struct btrfs_root *extent_root, u64 alloc_bytes, |
63 | u64 flags, int force); | 62 | u64 flags, int force); |
63 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
64 | struct btrfs_root *root, | ||
65 | struct btrfs_path *path, | ||
66 | u64 bytenr, u64 num_bytes, | ||
67 | int is_data, int reserved, | ||
68 | struct extent_buffer **must_clean); | ||
69 | static int find_next_key(struct btrfs_path *path, int level, | ||
70 | struct btrfs_key *key); | ||
71 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | ||
72 | int dump_block_groups); | ||
64 | 73 | ||
65 | static noinline int | 74 | static noinline int |
66 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 75 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -153,34 +162,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, | |||
153 | return ret; | 162 | return ret; |
154 | } | 163 | } |
155 | 164 | ||
156 | /* | 165 | static int add_excluded_extent(struct btrfs_root *root, |
157 | * We always set EXTENT_LOCKED for the super mirror extents so we don't | 166 | u64 start, u64 num_bytes) |
158 | * overwrite them, so those bits need to be unset. Also, if we are unmounting | ||
159 | * with pinned extents still sitting there because we had a block group caching, | ||
160 | * we need to clear those now, since we are done. | ||
161 | */ | ||
162 | void btrfs_free_pinned_extents(struct btrfs_fs_info *info) | ||
163 | { | 167 | { |
164 | u64 start, end, last = 0; | 168 | u64 end = start + num_bytes - 1; |
165 | int ret; | 169 | set_extent_bits(&root->fs_info->freed_extents[0], |
170 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
171 | set_extent_bits(&root->fs_info->freed_extents[1], | ||
172 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
173 | return 0; | ||
174 | } | ||
166 | 175 | ||
167 | while (1) { | 176 | static void free_excluded_extents(struct btrfs_root *root, |
168 | ret = find_first_extent_bit(&info->pinned_extents, last, | 177 | struct btrfs_block_group_cache *cache) |
169 | &start, &end, | 178 | { |
170 | EXTENT_LOCKED|EXTENT_DIRTY); | 179 | u64 start, end; |
171 | if (ret) | ||
172 | break; | ||
173 | 180 | ||
174 | clear_extent_bits(&info->pinned_extents, start, end, | 181 | start = cache->key.objectid; |
175 | EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); | 182 | end = start + cache->key.offset - 1; |
176 | last = end+1; | 183 | |
177 | } | 184 | clear_extent_bits(&root->fs_info->freed_extents[0], |
185 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
186 | clear_extent_bits(&root->fs_info->freed_extents[1], | ||
187 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
178 | } | 188 | } |
179 | 189 | ||
180 | static int remove_sb_from_cache(struct btrfs_root *root, | 190 | static int exclude_super_stripes(struct btrfs_root *root, |
181 | struct btrfs_block_group_cache *cache) | 191 | struct btrfs_block_group_cache *cache) |
182 | { | 192 | { |
183 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
184 | u64 bytenr; | 193 | u64 bytenr; |
185 | u64 *logical; | 194 | u64 *logical; |
186 | int stripe_len; | 195 | int stripe_len; |
@@ -192,17 +201,42 @@ static int remove_sb_from_cache(struct btrfs_root *root, | |||
192 | cache->key.objectid, bytenr, | 201 | cache->key.objectid, bytenr, |
193 | 0, &logical, &nr, &stripe_len); | 202 | 0, &logical, &nr, &stripe_len); |
194 | BUG_ON(ret); | 203 | BUG_ON(ret); |
204 | |||
195 | while (nr--) { | 205 | while (nr--) { |
196 | try_lock_extent(&fs_info->pinned_extents, | 206 | cache->bytes_super += stripe_len; |
197 | logical[nr], | 207 | ret = add_excluded_extent(root, logical[nr], |
198 | logical[nr] + stripe_len - 1, GFP_NOFS); | 208 | stripe_len); |
209 | BUG_ON(ret); | ||
199 | } | 210 | } |
211 | |||
200 | kfree(logical); | 212 | kfree(logical); |
201 | } | 213 | } |
202 | |||
203 | return 0; | 214 | return 0; |
204 | } | 215 | } |
205 | 216 | ||
217 | static struct btrfs_caching_control * | ||
218 | get_caching_control(struct btrfs_block_group_cache *cache) | ||
219 | { | ||
220 | struct btrfs_caching_control *ctl; | ||
221 | |||
222 | spin_lock(&cache->lock); | ||
223 | if (cache->cached != BTRFS_CACHE_STARTED) { | ||
224 | spin_unlock(&cache->lock); | ||
225 | return NULL; | ||
226 | } | ||
227 | |||
228 | ctl = cache->caching_ctl; | ||
229 | atomic_inc(&ctl->count); | ||
230 | spin_unlock(&cache->lock); | ||
231 | return ctl; | ||
232 | } | ||
233 | |||
234 | static void put_caching_control(struct btrfs_caching_control *ctl) | ||
235 | { | ||
236 | if (atomic_dec_and_test(&ctl->count)) | ||
237 | kfree(ctl); | ||
238 | } | ||
239 | |||
206 | /* | 240 | /* |
207 | * this is only called by cache_block_group, since we could have freed extents | 241 | * this is only called by cache_block_group, since we could have freed extents |
208 | * we need to check the pinned_extents for any extents that can't be used yet | 242 | * we need to check the pinned_extents for any extents that can't be used yet |
@@ -215,9 +249,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
215 | int ret; | 249 | int ret; |
216 | 250 | ||
217 | while (start < end) { | 251 | while (start < end) { |
218 | ret = find_first_extent_bit(&info->pinned_extents, start, | 252 | ret = find_first_extent_bit(info->pinned_extents, start, |
219 | &extent_start, &extent_end, | 253 | &extent_start, &extent_end, |
220 | EXTENT_DIRTY|EXTENT_LOCKED); | 254 | EXTENT_DIRTY | EXTENT_UPTODATE); |
221 | if (ret) | 255 | if (ret) |
222 | break; | 256 | break; |
223 | 257 | ||
@@ -249,22 +283,27 @@ static int caching_kthread(void *data) | |||
249 | { | 283 | { |
250 | struct btrfs_block_group_cache *block_group = data; | 284 | struct btrfs_block_group_cache *block_group = data; |
251 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 285 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
252 | u64 last = 0; | 286 | struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; |
287 | struct btrfs_root *extent_root = fs_info->extent_root; | ||
253 | struct btrfs_path *path; | 288 | struct btrfs_path *path; |
254 | int ret = 0; | ||
255 | struct btrfs_key key; | ||
256 | struct extent_buffer *leaf; | 289 | struct extent_buffer *leaf; |
257 | int slot; | 290 | struct btrfs_key key; |
258 | u64 total_found = 0; | 291 | u64 total_found = 0; |
259 | 292 | u64 last = 0; | |
260 | BUG_ON(!fs_info); | 293 | u32 nritems; |
294 | int ret = 0; | ||
261 | 295 | ||
262 | path = btrfs_alloc_path(); | 296 | path = btrfs_alloc_path(); |
263 | if (!path) | 297 | if (!path) |
264 | return -ENOMEM; | 298 | return -ENOMEM; |
265 | 299 | ||
266 | atomic_inc(&block_group->space_info->caching_threads); | 300 | exclude_super_stripes(extent_root, block_group); |
301 | spin_lock(&block_group->space_info->lock); | ||
302 | block_group->space_info->bytes_super += block_group->bytes_super; | ||
303 | spin_unlock(&block_group->space_info->lock); | ||
304 | |||
267 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 305 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
306 | |||
268 | /* | 307 | /* |
269 | * We don't want to deadlock with somebody trying to allocate a new | 308 | * We don't want to deadlock with somebody trying to allocate a new |
270 | * extent for the extent root while also trying to search the extent | 309 | * extent for the extent root while also trying to search the extent |
@@ -277,74 +316,64 @@ static int caching_kthread(void *data) | |||
277 | 316 | ||
278 | key.objectid = last; | 317 | key.objectid = last; |
279 | key.offset = 0; | 318 | key.offset = 0; |
280 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 319 | key.type = BTRFS_EXTENT_ITEM_KEY; |
281 | again: | 320 | again: |
321 | mutex_lock(&caching_ctl->mutex); | ||
282 | /* need to make sure the commit_root doesn't disappear */ | 322 | /* need to make sure the commit_root doesn't disappear */ |
283 | down_read(&fs_info->extent_commit_sem); | 323 | down_read(&fs_info->extent_commit_sem); |
284 | 324 | ||
285 | ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); | 325 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); |
286 | if (ret < 0) | 326 | if (ret < 0) |
287 | goto err; | 327 | goto err; |
288 | 328 | ||
329 | leaf = path->nodes[0]; | ||
330 | nritems = btrfs_header_nritems(leaf); | ||
331 | |||
289 | while (1) { | 332 | while (1) { |
290 | smp_mb(); | 333 | smp_mb(); |
291 | if (block_group->fs_info->closing > 1) { | 334 | if (fs_info->closing > 1) { |
292 | last = (u64)-1; | 335 | last = (u64)-1; |
293 | break; | 336 | break; |
294 | } | 337 | } |
295 | 338 | ||
296 | leaf = path->nodes[0]; | 339 | if (path->slots[0] < nritems) { |
297 | slot = path->slots[0]; | 340 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
298 | if (slot >= btrfs_header_nritems(leaf)) { | 341 | } else { |
299 | ret = btrfs_next_leaf(fs_info->extent_root, path); | 342 | ret = find_next_key(path, 0, &key); |
300 | if (ret < 0) | 343 | if (ret) |
301 | goto err; | ||
302 | else if (ret) | ||
303 | break; | 344 | break; |
304 | 345 | ||
305 | if (need_resched() || | 346 | caching_ctl->progress = last; |
306 | btrfs_transaction_in_commit(fs_info)) { | 347 | btrfs_release_path(extent_root, path); |
307 | leaf = path->nodes[0]; | 348 | up_read(&fs_info->extent_commit_sem); |
308 | 349 | mutex_unlock(&caching_ctl->mutex); | |
309 | /* this shouldn't happen, but if the | 350 | if (btrfs_transaction_in_commit(fs_info)) |
310 | * leaf is empty just move on. | ||
311 | */ | ||
312 | if (btrfs_header_nritems(leaf) == 0) | ||
313 | break; | ||
314 | /* | ||
315 | * we need to copy the key out so that | ||
316 | * we are sure the next search advances | ||
317 | * us forward in the btree. | ||
318 | */ | ||
319 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
320 | btrfs_release_path(fs_info->extent_root, path); | ||
321 | up_read(&fs_info->extent_commit_sem); | ||
322 | schedule_timeout(1); | 351 | schedule_timeout(1); |
323 | goto again; | 352 | else |
324 | } | 353 | cond_resched(); |
354 | goto again; | ||
355 | } | ||
325 | 356 | ||
357 | if (key.objectid < block_group->key.objectid) { | ||
358 | path->slots[0]++; | ||
326 | continue; | 359 | continue; |
327 | } | 360 | } |
328 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
329 | if (key.objectid < block_group->key.objectid) | ||
330 | goto next; | ||
331 | 361 | ||
332 | if (key.objectid >= block_group->key.objectid + | 362 | if (key.objectid >= block_group->key.objectid + |
333 | block_group->key.offset) | 363 | block_group->key.offset) |
334 | break; | 364 | break; |
335 | 365 | ||
336 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { | 366 | if (key.type == BTRFS_EXTENT_ITEM_KEY) { |
337 | total_found += add_new_free_space(block_group, | 367 | total_found += add_new_free_space(block_group, |
338 | fs_info, last, | 368 | fs_info, last, |
339 | key.objectid); | 369 | key.objectid); |
340 | last = key.objectid + key.offset; | 370 | last = key.objectid + key.offset; |
341 | } | ||
342 | 371 | ||
343 | if (total_found > (1024 * 1024 * 2)) { | 372 | if (total_found > (1024 * 1024 * 2)) { |
344 | total_found = 0; | 373 | total_found = 0; |
345 | wake_up(&block_group->caching_q); | 374 | wake_up(&caching_ctl->wait); |
375 | } | ||
346 | } | 376 | } |
347 | next: | ||
348 | path->slots[0]++; | 377 | path->slots[0]++; |
349 | } | 378 | } |
350 | ret = 0; | 379 | ret = 0; |
@@ -352,33 +381,65 @@ next: | |||
352 | total_found += add_new_free_space(block_group, fs_info, last, | 381 | total_found += add_new_free_space(block_group, fs_info, last, |
353 | block_group->key.objectid + | 382 | block_group->key.objectid + |
354 | block_group->key.offset); | 383 | block_group->key.offset); |
384 | caching_ctl->progress = (u64)-1; | ||
355 | 385 | ||
356 | spin_lock(&block_group->lock); | 386 | spin_lock(&block_group->lock); |
387 | block_group->caching_ctl = NULL; | ||
357 | block_group->cached = BTRFS_CACHE_FINISHED; | 388 | block_group->cached = BTRFS_CACHE_FINISHED; |
358 | spin_unlock(&block_group->lock); | 389 | spin_unlock(&block_group->lock); |
359 | 390 | ||
360 | err: | 391 | err: |
361 | btrfs_free_path(path); | 392 | btrfs_free_path(path); |
362 | up_read(&fs_info->extent_commit_sem); | 393 | up_read(&fs_info->extent_commit_sem); |
363 | atomic_dec(&block_group->space_info->caching_threads); | ||
364 | wake_up(&block_group->caching_q); | ||
365 | 394 | ||
395 | free_excluded_extents(extent_root, block_group); | ||
396 | |||
397 | mutex_unlock(&caching_ctl->mutex); | ||
398 | wake_up(&caching_ctl->wait); | ||
399 | |||
400 | put_caching_control(caching_ctl); | ||
401 | atomic_dec(&block_group->space_info->caching_threads); | ||
366 | return 0; | 402 | return 0; |
367 | } | 403 | } |
368 | 404 | ||
369 | static int cache_block_group(struct btrfs_block_group_cache *cache) | 405 | static int cache_block_group(struct btrfs_block_group_cache *cache) |
370 | { | 406 | { |
407 | struct btrfs_fs_info *fs_info = cache->fs_info; | ||
408 | struct btrfs_caching_control *caching_ctl; | ||
371 | struct task_struct *tsk; | 409 | struct task_struct *tsk; |
372 | int ret = 0; | 410 | int ret = 0; |
373 | 411 | ||
412 | smp_mb(); | ||
413 | if (cache->cached != BTRFS_CACHE_NO) | ||
414 | return 0; | ||
415 | |||
416 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | ||
417 | BUG_ON(!caching_ctl); | ||
418 | |||
419 | INIT_LIST_HEAD(&caching_ctl->list); | ||
420 | mutex_init(&caching_ctl->mutex); | ||
421 | init_waitqueue_head(&caching_ctl->wait); | ||
422 | caching_ctl->block_group = cache; | ||
423 | caching_ctl->progress = cache->key.objectid; | ||
424 | /* one for caching kthread, one for caching block group list */ | ||
425 | atomic_set(&caching_ctl->count, 2); | ||
426 | |||
374 | spin_lock(&cache->lock); | 427 | spin_lock(&cache->lock); |
375 | if (cache->cached != BTRFS_CACHE_NO) { | 428 | if (cache->cached != BTRFS_CACHE_NO) { |
376 | spin_unlock(&cache->lock); | 429 | spin_unlock(&cache->lock); |
377 | return ret; | 430 | kfree(caching_ctl); |
431 | return 0; | ||
378 | } | 432 | } |
433 | cache->caching_ctl = caching_ctl; | ||
379 | cache->cached = BTRFS_CACHE_STARTED; | 434 | cache->cached = BTRFS_CACHE_STARTED; |
380 | spin_unlock(&cache->lock); | 435 | spin_unlock(&cache->lock); |
381 | 436 | ||
437 | down_write(&fs_info->extent_commit_sem); | ||
438 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | ||
439 | up_write(&fs_info->extent_commit_sem); | ||
440 | |||
441 | atomic_inc(&cache->space_info->caching_threads); | ||
442 | |||
382 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | 443 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", |
383 | cache->key.objectid); | 444 | cache->key.objectid); |
384 | if (IS_ERR(tsk)) { | 445 | if (IS_ERR(tsk)) { |
@@ -1507,22 +1568,23 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1507 | return ret; | 1568 | return ret; |
1508 | } | 1569 | } |
1509 | 1570 | ||
1510 | #ifdef BIO_RW_DISCARD | ||
1511 | static void btrfs_issue_discard(struct block_device *bdev, | 1571 | static void btrfs_issue_discard(struct block_device *bdev, |
1512 | u64 start, u64 len) | 1572 | u64 start, u64 len) |
1513 | { | 1573 | { |
1514 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); | 1574 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, |
1575 | DISCARD_FL_BARRIER); | ||
1515 | } | 1576 | } |
1516 | #endif | ||
1517 | 1577 | ||
1518 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1578 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
1519 | u64 num_bytes) | 1579 | u64 num_bytes) |
1520 | { | 1580 | { |
1521 | #ifdef BIO_RW_DISCARD | ||
1522 | int ret; | 1581 | int ret; |
1523 | u64 map_length = num_bytes; | 1582 | u64 map_length = num_bytes; |
1524 | struct btrfs_multi_bio *multi = NULL; | 1583 | struct btrfs_multi_bio *multi = NULL; |
1525 | 1584 | ||
1585 | if (!btrfs_test_opt(root, DISCARD)) | ||
1586 | return 0; | ||
1587 | |||
1526 | /* Tell the block device(s) that the sectors can be discarded */ | 1588 | /* Tell the block device(s) that the sectors can be discarded */ |
1527 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, | 1589 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, |
1528 | bytenr, &map_length, &multi, 0); | 1590 | bytenr, &map_length, &multi, 0); |
@@ -1542,9 +1604,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1542 | } | 1604 | } |
1543 | 1605 | ||
1544 | return ret; | 1606 | return ret; |
1545 | #else | ||
1546 | return 0; | ||
1547 | #endif | ||
1548 | } | 1607 | } |
1549 | 1608 | ||
1550 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 1609 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
@@ -1656,7 +1715,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
1656 | parent, ref_root, flags, | 1715 | parent, ref_root, flags, |
1657 | ref->objectid, ref->offset, | 1716 | ref->objectid, ref->offset, |
1658 | &ins, node->ref_mod); | 1717 | &ins, node->ref_mod); |
1659 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
1660 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 1718 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
1661 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 1719 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
1662 | node->num_bytes, parent, | 1720 | node->num_bytes, parent, |
@@ -1782,7 +1840,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
1782 | extent_op->flags_to_set, | 1840 | extent_op->flags_to_set, |
1783 | &extent_op->key, | 1841 | &extent_op->key, |
1784 | ref->level, &ins); | 1842 | ref->level, &ins); |
1785 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
1786 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 1843 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
1787 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 1844 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
1788 | node->num_bytes, parent, ref_root, | 1845 | node->num_bytes, parent, ref_root, |
@@ -1817,16 +1874,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
1817 | BUG_ON(extent_op); | 1874 | BUG_ON(extent_op); |
1818 | head = btrfs_delayed_node_to_head(node); | 1875 | head = btrfs_delayed_node_to_head(node); |
1819 | if (insert_reserved) { | 1876 | if (insert_reserved) { |
1877 | int mark_free = 0; | ||
1878 | struct extent_buffer *must_clean = NULL; | ||
1879 | |||
1880 | ret = pin_down_bytes(trans, root, NULL, | ||
1881 | node->bytenr, node->num_bytes, | ||
1882 | head->is_data, 1, &must_clean); | ||
1883 | if (ret > 0) | ||
1884 | mark_free = 1; | ||
1885 | |||
1886 | if (must_clean) { | ||
1887 | clean_tree_block(NULL, root, must_clean); | ||
1888 | btrfs_tree_unlock(must_clean); | ||
1889 | free_extent_buffer(must_clean); | ||
1890 | } | ||
1820 | if (head->is_data) { | 1891 | if (head->is_data) { |
1821 | ret = btrfs_del_csums(trans, root, | 1892 | ret = btrfs_del_csums(trans, root, |
1822 | node->bytenr, | 1893 | node->bytenr, |
1823 | node->num_bytes); | 1894 | node->num_bytes); |
1824 | BUG_ON(ret); | 1895 | BUG_ON(ret); |
1825 | } | 1896 | } |
1826 | btrfs_update_pinned_extents(root, node->bytenr, | 1897 | if (mark_free) { |
1827 | node->num_bytes, 1); | 1898 | ret = btrfs_free_reserved_extent(root, |
1828 | update_reserved_extents(root, node->bytenr, | 1899 | node->bytenr, |
1829 | node->num_bytes, 0); | 1900 | node->num_bytes); |
1901 | BUG_ON(ret); | ||
1902 | } | ||
1830 | } | 1903 | } |
1831 | mutex_unlock(&head->mutex); | 1904 | mutex_unlock(&head->mutex); |
1832 | return 0; | 1905 | return 0; |
@@ -2691,60 +2764,448 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | |||
2691 | alloc_target); | 2764 | alloc_target); |
2692 | } | 2765 | } |
2693 | 2766 | ||
2767 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
2768 | { | ||
2769 | u64 num_bytes; | ||
2770 | int level; | ||
2771 | |||
2772 | level = BTRFS_MAX_LEVEL - 2; | ||
2773 | /* | ||
2774 | * NOTE: these calculations are absolutely the worst possible case. | ||
2775 | * This assumes that _every_ item we insert will require a new leaf, and | ||
2776 | * that the tree has grown to its maximum level size. | ||
2777 | */ | ||
2778 | |||
2779 | /* | ||
2780 | * for every item we insert we could insert both an extent item and a | ||
2781 | * extent ref item. Then for ever item we insert, we will need to cow | ||
2782 | * both the original leaf, plus the leaf to the left and right of it. | ||
2783 | * | ||
2784 | * Unless we are talking about the extent root, then we just want the | ||
2785 | * number of items * 2, since we just need the extent item plus its ref. | ||
2786 | */ | ||
2787 | if (root == root->fs_info->extent_root) | ||
2788 | num_bytes = num_items * 2; | ||
2789 | else | ||
2790 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
2791 | |||
2792 | /* | ||
2793 | * num_bytes is total number of leaves we could need times the leaf | ||
2794 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
2795 | * level, down to the leaf level. | ||
2796 | */ | ||
2797 | num_bytes = (num_bytes * root->leafsize) + | ||
2798 | (num_bytes * (level * 2)) * root->nodesize; | ||
2799 | |||
2800 | return num_bytes; | ||
2801 | } | ||
2802 | |||
2694 | /* | 2803 | /* |
2695 | * for now this just makes sure we have at least 5% of our metadata space free | 2804 | * Unreserve metadata space for delalloc. If we have less reserved credits than |
2696 | * for use. | 2805 | * we have extents, this function does nothing. |
2697 | */ | 2806 | */ |
2698 | int btrfs_check_metadata_free_space(struct btrfs_root *root) | 2807 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, |
2808 | struct inode *inode, int num_items) | ||
2699 | { | 2809 | { |
2700 | struct btrfs_fs_info *info = root->fs_info; | 2810 | struct btrfs_fs_info *info = root->fs_info; |
2701 | struct btrfs_space_info *meta_sinfo; | 2811 | struct btrfs_space_info *meta_sinfo; |
2702 | u64 alloc_target, thresh; | 2812 | u64 num_bytes; |
2703 | int committed = 0, ret; | 2813 | u64 alloc_target; |
2814 | bool bug = false; | ||
2704 | 2815 | ||
2705 | /* get the space info for where the metadata will live */ | 2816 | /* get the space info for where the metadata will live */ |
2706 | alloc_target = btrfs_get_alloc_profile(root, 0); | 2817 | alloc_target = btrfs_get_alloc_profile(root, 0); |
2707 | meta_sinfo = __find_space_info(info, alloc_target); | 2818 | meta_sinfo = __find_space_info(info, alloc_target); |
2708 | 2819 | ||
2709 | again: | 2820 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, |
2821 | num_items); | ||
2822 | |||
2710 | spin_lock(&meta_sinfo->lock); | 2823 | spin_lock(&meta_sinfo->lock); |
2711 | if (!meta_sinfo->full) | 2824 | spin_lock(&BTRFS_I(inode)->accounting_lock); |
2712 | thresh = meta_sinfo->total_bytes * 80; | 2825 | if (BTRFS_I(inode)->reserved_extents <= |
2713 | else | 2826 | BTRFS_I(inode)->outstanding_extents) { |
2714 | thresh = meta_sinfo->total_bytes * 95; | 2827 | spin_unlock(&BTRFS_I(inode)->accounting_lock); |
2828 | spin_unlock(&meta_sinfo->lock); | ||
2829 | return 0; | ||
2830 | } | ||
2831 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
2832 | |||
2833 | BTRFS_I(inode)->reserved_extents--; | ||
2834 | BUG_ON(BTRFS_I(inode)->reserved_extents < 0); | ||
2835 | |||
2836 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
2837 | bug = true; | ||
2838 | meta_sinfo->bytes_delalloc = 0; | ||
2839 | } else { | ||
2840 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
2841 | } | ||
2842 | spin_unlock(&meta_sinfo->lock); | ||
2843 | |||
2844 | BUG_ON(bug); | ||
2845 | |||
2846 | return 0; | ||
2847 | } | ||
2715 | 2848 | ||
2849 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
2850 | { | ||
2851 | u64 thresh; | ||
2852 | |||
2853 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
2854 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
2855 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
2856 | meta_sinfo->bytes_may_use; | ||
2857 | |||
2858 | thresh = meta_sinfo->total_bytes - thresh; | ||
2859 | thresh *= 80; | ||
2716 | do_div(thresh, 100); | 2860 | do_div(thresh, 100); |
2861 | if (thresh <= meta_sinfo->bytes_delalloc) | ||
2862 | meta_sinfo->force_delalloc = 1; | ||
2863 | else | ||
2864 | meta_sinfo->force_delalloc = 0; | ||
2865 | } | ||
2717 | 2866 | ||
2718 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2867 | struct async_flush { |
2719 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { | 2868 | struct btrfs_root *root; |
2720 | struct btrfs_trans_handle *trans; | 2869 | struct btrfs_space_info *info; |
2721 | if (!meta_sinfo->full) { | 2870 | struct btrfs_work work; |
2722 | meta_sinfo->force_alloc = 1; | 2871 | }; |
2723 | spin_unlock(&meta_sinfo->lock); | ||
2724 | 2872 | ||
2725 | trans = btrfs_start_transaction(root, 1); | 2873 | static noinline void flush_delalloc_async(struct btrfs_work *work) |
2726 | if (!trans) | 2874 | { |
2727 | return -ENOMEM; | 2875 | struct async_flush *async; |
2876 | struct btrfs_root *root; | ||
2877 | struct btrfs_space_info *info; | ||
2728 | 2878 | ||
2729 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 2879 | async = container_of(work, struct async_flush, work); |
2730 | 2 * 1024 * 1024, alloc_target, 0); | 2880 | root = async->root; |
2731 | btrfs_end_transaction(trans, root); | 2881 | info = async->info; |
2882 | |||
2883 | btrfs_start_delalloc_inodes(root); | ||
2884 | wake_up(&info->flush_wait); | ||
2885 | btrfs_wait_ordered_extents(root, 0); | ||
2886 | |||
2887 | spin_lock(&info->lock); | ||
2888 | info->flushing = 0; | ||
2889 | spin_unlock(&info->lock); | ||
2890 | wake_up(&info->flush_wait); | ||
2891 | |||
2892 | kfree(async); | ||
2893 | } | ||
2894 | |||
2895 | static void wait_on_flush(struct btrfs_space_info *info) | ||
2896 | { | ||
2897 | DEFINE_WAIT(wait); | ||
2898 | u64 used; | ||
2899 | |||
2900 | while (1) { | ||
2901 | prepare_to_wait(&info->flush_wait, &wait, | ||
2902 | TASK_UNINTERRUPTIBLE); | ||
2903 | spin_lock(&info->lock); | ||
2904 | if (!info->flushing) { | ||
2905 | spin_unlock(&info->lock); | ||
2906 | break; | ||
2907 | } | ||
2908 | |||
2909 | used = info->bytes_used + info->bytes_reserved + | ||
2910 | info->bytes_pinned + info->bytes_readonly + | ||
2911 | info->bytes_super + info->bytes_root + | ||
2912 | info->bytes_may_use + info->bytes_delalloc; | ||
2913 | if (used < info->total_bytes) { | ||
2914 | spin_unlock(&info->lock); | ||
2915 | break; | ||
2916 | } | ||
2917 | spin_unlock(&info->lock); | ||
2918 | schedule(); | ||
2919 | } | ||
2920 | finish_wait(&info->flush_wait, &wait); | ||
2921 | } | ||
2922 | |||
2923 | static void flush_delalloc(struct btrfs_root *root, | ||
2924 | struct btrfs_space_info *info) | ||
2925 | { | ||
2926 | struct async_flush *async; | ||
2927 | bool wait = false; | ||
2928 | |||
2929 | spin_lock(&info->lock); | ||
2930 | |||
2931 | if (!info->flushing) { | ||
2932 | info->flushing = 1; | ||
2933 | init_waitqueue_head(&info->flush_wait); | ||
2934 | } else { | ||
2935 | wait = true; | ||
2936 | } | ||
2937 | |||
2938 | spin_unlock(&info->lock); | ||
2939 | |||
2940 | if (wait) { | ||
2941 | wait_on_flush(info); | ||
2942 | return; | ||
2943 | } | ||
2944 | |||
2945 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
2946 | if (!async) | ||
2947 | goto flush; | ||
2948 | |||
2949 | async->root = root; | ||
2950 | async->info = info; | ||
2951 | async->work.func = flush_delalloc_async; | ||
2952 | |||
2953 | btrfs_queue_worker(&root->fs_info->enospc_workers, | ||
2954 | &async->work); | ||
2955 | wait_on_flush(info); | ||
2956 | return; | ||
2957 | |||
2958 | flush: | ||
2959 | btrfs_start_delalloc_inodes(root); | ||
2960 | btrfs_wait_ordered_extents(root, 0); | ||
2961 | |||
2962 | spin_lock(&info->lock); | ||
2963 | info->flushing = 0; | ||
2964 | spin_unlock(&info->lock); | ||
2965 | wake_up(&info->flush_wait); | ||
2966 | } | ||
2967 | |||
2968 | static int maybe_allocate_chunk(struct btrfs_root *root, | ||
2969 | struct btrfs_space_info *info) | ||
2970 | { | ||
2971 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | ||
2972 | struct btrfs_trans_handle *trans; | ||
2973 | bool wait = false; | ||
2974 | int ret = 0; | ||
2975 | u64 min_metadata; | ||
2976 | u64 free_space; | ||
2977 | |||
2978 | free_space = btrfs_super_total_bytes(disk_super); | ||
2979 | /* | ||
2980 | * we allow the metadata to grow to a max of either 10gb or 5% of the | ||
2981 | * space in the volume. | ||
2982 | */ | ||
2983 | min_metadata = min((u64)10 * 1024 * 1024 * 1024, | ||
2984 | div64_u64(free_space * 5, 100)); | ||
2985 | if (info->total_bytes >= min_metadata) { | ||
2986 | spin_unlock(&info->lock); | ||
2987 | return 0; | ||
2988 | } | ||
2989 | |||
2990 | if (info->full) { | ||
2991 | spin_unlock(&info->lock); | ||
2992 | return 0; | ||
2993 | } | ||
2994 | |||
2995 | if (!info->allocating_chunk) { | ||
2996 | info->force_alloc = 1; | ||
2997 | info->allocating_chunk = 1; | ||
2998 | init_waitqueue_head(&info->allocate_wait); | ||
2999 | } else { | ||
3000 | wait = true; | ||
3001 | } | ||
3002 | |||
3003 | spin_unlock(&info->lock); | ||
3004 | |||
3005 | if (wait) { | ||
3006 | wait_event(info->allocate_wait, | ||
3007 | !info->allocating_chunk); | ||
3008 | return 1; | ||
3009 | } | ||
3010 | |||
3011 | trans = btrfs_start_transaction(root, 1); | ||
3012 | if (!trans) { | ||
3013 | ret = -ENOMEM; | ||
3014 | goto out; | ||
3015 | } | ||
3016 | |||
3017 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
3018 | 4096 + 2 * 1024 * 1024, | ||
3019 | info->flags, 0); | ||
3020 | btrfs_end_transaction(trans, root); | ||
3021 | if (ret) | ||
3022 | goto out; | ||
3023 | out: | ||
3024 | spin_lock(&info->lock); | ||
3025 | info->allocating_chunk = 0; | ||
3026 | spin_unlock(&info->lock); | ||
3027 | wake_up(&info->allocate_wait); | ||
3028 | |||
3029 | if (ret) | ||
3030 | return 0; | ||
3031 | return 1; | ||
3032 | } | ||
3033 | |||
3034 | /* | ||
3035 | * Reserve metadata space for delalloc. | ||
3036 | */ | ||
3037 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
3038 | struct inode *inode, int num_items) | ||
3039 | { | ||
3040 | struct btrfs_fs_info *info = root->fs_info; | ||
3041 | struct btrfs_space_info *meta_sinfo; | ||
3042 | u64 num_bytes; | ||
3043 | u64 used; | ||
3044 | u64 alloc_target; | ||
3045 | int flushed = 0; | ||
3046 | int force_delalloc; | ||
3047 | |||
3048 | /* get the space info for where the metadata will live */ | ||
3049 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3050 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3051 | |||
3052 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
3053 | num_items); | ||
3054 | again: | ||
3055 | spin_lock(&meta_sinfo->lock); | ||
3056 | |||
3057 | force_delalloc = meta_sinfo->force_delalloc; | ||
3058 | |||
3059 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3060 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3061 | |||
3062 | if (!flushed) | ||
3063 | meta_sinfo->bytes_delalloc += num_bytes; | ||
3064 | |||
3065 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3066 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3067 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3068 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3069 | |||
3070 | if (used > meta_sinfo->total_bytes) { | ||
3071 | flushed++; | ||
3072 | |||
3073 | if (flushed == 1) { | ||
3074 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3075 | goto again; | ||
3076 | flushed++; | ||
3077 | } else { | ||
3078 | spin_unlock(&meta_sinfo->lock); | ||
3079 | } | ||
3080 | |||
3081 | if (flushed == 2) { | ||
3082 | filemap_flush(inode->i_mapping); | ||
3083 | goto again; | ||
3084 | } else if (flushed == 3) { | ||
3085 | flush_delalloc(root, meta_sinfo); | ||
2732 | goto again; | 3086 | goto again; |
2733 | } | 3087 | } |
3088 | spin_lock(&meta_sinfo->lock); | ||
3089 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
2734 | spin_unlock(&meta_sinfo->lock); | 3090 | spin_unlock(&meta_sinfo->lock); |
3091 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
3092 | BTRFS_I(inode)->outstanding_extents, | ||
3093 | BTRFS_I(inode)->reserved_extents); | ||
3094 | dump_space_info(meta_sinfo, 0, 0); | ||
3095 | return -ENOSPC; | ||
3096 | } | ||
2735 | 3097 | ||
2736 | if (!committed) { | 3098 | BTRFS_I(inode)->reserved_extents++; |
2737 | committed = 1; | 3099 | check_force_delalloc(meta_sinfo); |
2738 | trans = btrfs_join_transaction(root, 1); | 3100 | spin_unlock(&meta_sinfo->lock); |
2739 | if (!trans) | 3101 | |
2740 | return -ENOMEM; | 3102 | if (!flushed && force_delalloc) |
2741 | ret = btrfs_commit_transaction(trans, root); | 3103 | filemap_flush(inode->i_mapping); |
2742 | if (ret) | 3104 | |
2743 | return ret; | 3105 | return 0; |
3106 | } | ||
3107 | |||
3108 | /* | ||
3109 | * unreserve num_items number of items worth of metadata space. This needs to | ||
3110 | * be paired with btrfs_reserve_metadata_space. | ||
3111 | * | ||
3112 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
3113 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
3114 | * oprations which will result in more used metadata, so we want to make sure we | ||
3115 | * can do that without issue. | ||
3116 | */ | ||
3117 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3118 | { | ||
3119 | struct btrfs_fs_info *info = root->fs_info; | ||
3120 | struct btrfs_space_info *meta_sinfo; | ||
3121 | u64 num_bytes; | ||
3122 | u64 alloc_target; | ||
3123 | bool bug = false; | ||
3124 | |||
3125 | /* get the space info for where the metadata will live */ | ||
3126 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3127 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3128 | |||
3129 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3130 | |||
3131 | spin_lock(&meta_sinfo->lock); | ||
3132 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
3133 | bug = true; | ||
3134 | meta_sinfo->bytes_may_use = 0; | ||
3135 | } else { | ||
3136 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3137 | } | ||
3138 | spin_unlock(&meta_sinfo->lock); | ||
3139 | |||
3140 | BUG_ON(bug); | ||
3141 | |||
3142 | return 0; | ||
3143 | } | ||
3144 | |||
3145 | /* | ||
3146 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
3147 | * of bytes that would be needed to modify num_items number of items. If we | ||
3148 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
3149 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
3150 | * items you reserved, since whatever metadata you needed should have already | ||
3151 | * been allocated. | ||
3152 | * | ||
3153 | * This will commit the transaction to make more space if we don't have enough | ||
3154 | * metadata space. THe only time we don't do this is if we're reserving space | ||
3155 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
3156 | * callers responsibility to handle it properly. | ||
3157 | */ | ||
3158 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3159 | { | ||
3160 | struct btrfs_fs_info *info = root->fs_info; | ||
3161 | struct btrfs_space_info *meta_sinfo; | ||
3162 | u64 num_bytes; | ||
3163 | u64 used; | ||
3164 | u64 alloc_target; | ||
3165 | int retries = 0; | ||
3166 | |||
3167 | /* get the space info for where the metadata will live */ | ||
3168 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3169 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3170 | |||
3171 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3172 | again: | ||
3173 | spin_lock(&meta_sinfo->lock); | ||
3174 | |||
3175 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3176 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3177 | |||
3178 | if (!retries) | ||
3179 | meta_sinfo->bytes_may_use += num_bytes; | ||
3180 | |||
3181 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3182 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3183 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3184 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3185 | |||
3186 | if (used > meta_sinfo->total_bytes) { | ||
3187 | retries++; | ||
3188 | if (retries == 1) { | ||
3189 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3190 | goto again; | ||
3191 | retries++; | ||
3192 | } else { | ||
3193 | spin_unlock(&meta_sinfo->lock); | ||
3194 | } | ||
3195 | |||
3196 | if (retries == 2) { | ||
3197 | flush_delalloc(root, meta_sinfo); | ||
2744 | goto again; | 3198 | goto again; |
2745 | } | 3199 | } |
3200 | spin_lock(&meta_sinfo->lock); | ||
3201 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3202 | spin_unlock(&meta_sinfo->lock); | ||
3203 | |||
3204 | dump_space_info(meta_sinfo, 0, 0); | ||
2746 | return -ENOSPC; | 3205 | return -ENOSPC; |
2747 | } | 3206 | } |
3207 | |||
3208 | check_force_delalloc(meta_sinfo); | ||
2748 | spin_unlock(&meta_sinfo->lock); | 3209 | spin_unlock(&meta_sinfo->lock); |
2749 | 3210 | ||
2750 | return 0; | 3211 | return 0; |
@@ -2764,13 +3225,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | |||
2764 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3225 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
2765 | 3226 | ||
2766 | data_sinfo = BTRFS_I(inode)->space_info; | 3227 | data_sinfo = BTRFS_I(inode)->space_info; |
3228 | if (!data_sinfo) | ||
3229 | goto alloc; | ||
3230 | |||
2767 | again: | 3231 | again: |
2768 | /* make sure we have enough space to handle the data first */ | 3232 | /* make sure we have enough space to handle the data first */ |
2769 | spin_lock(&data_sinfo->lock); | 3233 | spin_lock(&data_sinfo->lock); |
2770 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - | 3234 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - |
2771 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - | 3235 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - |
2772 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - | 3236 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - |
2773 | data_sinfo->bytes_may_use < bytes) { | 3237 | data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { |
2774 | struct btrfs_trans_handle *trans; | 3238 | struct btrfs_trans_handle *trans; |
2775 | 3239 | ||
2776 | /* | 3240 | /* |
@@ -2782,7 +3246,7 @@ again: | |||
2782 | 3246 | ||
2783 | data_sinfo->force_alloc = 1; | 3247 | data_sinfo->force_alloc = 1; |
2784 | spin_unlock(&data_sinfo->lock); | 3248 | spin_unlock(&data_sinfo->lock); |
2785 | 3249 | alloc: | |
2786 | alloc_target = btrfs_get_alloc_profile(root, 1); | 3250 | alloc_target = btrfs_get_alloc_profile(root, 1); |
2787 | trans = btrfs_start_transaction(root, 1); | 3251 | trans = btrfs_start_transaction(root, 1); |
2788 | if (!trans) | 3252 | if (!trans) |
@@ -2794,12 +3258,17 @@ again: | |||
2794 | btrfs_end_transaction(trans, root); | 3258 | btrfs_end_transaction(trans, root); |
2795 | if (ret) | 3259 | if (ret) |
2796 | return ret; | 3260 | return ret; |
3261 | |||
3262 | if (!data_sinfo) { | ||
3263 | btrfs_set_inode_space_info(root, inode); | ||
3264 | data_sinfo = BTRFS_I(inode)->space_info; | ||
3265 | } | ||
2797 | goto again; | 3266 | goto again; |
2798 | } | 3267 | } |
2799 | spin_unlock(&data_sinfo->lock); | 3268 | spin_unlock(&data_sinfo->lock); |
2800 | 3269 | ||
2801 | /* commit the current transaction and try again */ | 3270 | /* commit the current transaction and try again */ |
2802 | if (!committed) { | 3271 | if (!committed && !root->fs_info->open_ioctl_trans) { |
2803 | committed = 1; | 3272 | committed = 1; |
2804 | trans = btrfs_join_transaction(root, 1); | 3273 | trans = btrfs_join_transaction(root, 1); |
2805 | if (!trans) | 3274 | if (!trans) |
@@ -2827,7 +3296,7 @@ again: | |||
2827 | BTRFS_I(inode)->reserved_bytes += bytes; | 3296 | BTRFS_I(inode)->reserved_bytes += bytes; |
2828 | spin_unlock(&data_sinfo->lock); | 3297 | spin_unlock(&data_sinfo->lock); |
2829 | 3298 | ||
2830 | return btrfs_check_metadata_free_space(root); | 3299 | return 0; |
2831 | } | 3300 | } |
2832 | 3301 | ||
2833 | /* | 3302 | /* |
@@ -2926,17 +3395,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2926 | BUG_ON(!space_info); | 3395 | BUG_ON(!space_info); |
2927 | 3396 | ||
2928 | spin_lock(&space_info->lock); | 3397 | spin_lock(&space_info->lock); |
2929 | if (space_info->force_alloc) { | 3398 | if (space_info->force_alloc) |
2930 | force = 1; | 3399 | force = 1; |
2931 | space_info->force_alloc = 0; | ||
2932 | } | ||
2933 | if (space_info->full) { | 3400 | if (space_info->full) { |
2934 | spin_unlock(&space_info->lock); | 3401 | spin_unlock(&space_info->lock); |
2935 | goto out; | 3402 | goto out; |
2936 | } | 3403 | } |
2937 | 3404 | ||
2938 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3405 | thresh = space_info->total_bytes - space_info->bytes_readonly; |
2939 | thresh = div_factor(thresh, 6); | 3406 | thresh = div_factor(thresh, 8); |
2940 | if (!force && | 3407 | if (!force && |
2941 | (space_info->bytes_used + space_info->bytes_pinned + | 3408 | (space_info->bytes_used + space_info->bytes_pinned + |
2942 | space_info->bytes_reserved + alloc_bytes) < thresh) { | 3409 | space_info->bytes_reserved + alloc_bytes) < thresh) { |
@@ -2950,7 +3417,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2950 | * we keep a reasonable number of metadata chunks allocated in the | 3417 | * we keep a reasonable number of metadata chunks allocated in the |
2951 | * FS as well. | 3418 | * FS as well. |
2952 | */ | 3419 | */ |
2953 | if (flags & BTRFS_BLOCK_GROUP_DATA) { | 3420 | if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { |
2954 | fs_info->data_chunk_allocations++; | 3421 | fs_info->data_chunk_allocations++; |
2955 | if (!(fs_info->data_chunk_allocations % | 3422 | if (!(fs_info->data_chunk_allocations % |
2956 | fs_info->metadata_ratio)) | 3423 | fs_info->metadata_ratio)) |
@@ -2958,8 +3425,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2958 | } | 3425 | } |
2959 | 3426 | ||
2960 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3427 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
3428 | spin_lock(&space_info->lock); | ||
2961 | if (ret) | 3429 | if (ret) |
2962 | space_info->full = 1; | 3430 | space_info->full = 1; |
3431 | space_info->force_alloc = 0; | ||
3432 | spin_unlock(&space_info->lock); | ||
2963 | out: | 3433 | out: |
2964 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3434 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
2965 | return ret; | 3435 | return ret; |
@@ -3008,10 +3478,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3008 | num_bytes = min(total, cache->key.offset - byte_in_group); | 3478 | num_bytes = min(total, cache->key.offset - byte_in_group); |
3009 | if (alloc) { | 3479 | if (alloc) { |
3010 | old_val += num_bytes; | 3480 | old_val += num_bytes; |
3481 | btrfs_set_block_group_used(&cache->item, old_val); | ||
3482 | cache->reserved -= num_bytes; | ||
3011 | cache->space_info->bytes_used += num_bytes; | 3483 | cache->space_info->bytes_used += num_bytes; |
3484 | cache->space_info->bytes_reserved -= num_bytes; | ||
3012 | if (cache->ro) | 3485 | if (cache->ro) |
3013 | cache->space_info->bytes_readonly -= num_bytes; | 3486 | cache->space_info->bytes_readonly -= num_bytes; |
3014 | btrfs_set_block_group_used(&cache->item, old_val); | ||
3015 | spin_unlock(&cache->lock); | 3487 | spin_unlock(&cache->lock); |
3016 | spin_unlock(&cache->space_info->lock); | 3488 | spin_unlock(&cache->space_info->lock); |
3017 | } else { | 3489 | } else { |
@@ -3056,127 +3528,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
3056 | return bytenr; | 3528 | return bytenr; |
3057 | } | 3529 | } |
3058 | 3530 | ||
3059 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 3531 | /* |
3060 | u64 bytenr, u64 num, int pin) | 3532 | * this function must be called within transaction |
3533 | */ | ||
3534 | int btrfs_pin_extent(struct btrfs_root *root, | ||
3535 | u64 bytenr, u64 num_bytes, int reserved) | ||
3061 | { | 3536 | { |
3062 | u64 len; | ||
3063 | struct btrfs_block_group_cache *cache; | ||
3064 | struct btrfs_fs_info *fs_info = root->fs_info; | 3537 | struct btrfs_fs_info *fs_info = root->fs_info; |
3538 | struct btrfs_block_group_cache *cache; | ||
3065 | 3539 | ||
3066 | if (pin) | 3540 | cache = btrfs_lookup_block_group(fs_info, bytenr); |
3067 | set_extent_dirty(&fs_info->pinned_extents, | 3541 | BUG_ON(!cache); |
3068 | bytenr, bytenr + num - 1, GFP_NOFS); | ||
3069 | |||
3070 | while (num > 0) { | ||
3071 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
3072 | BUG_ON(!cache); | ||
3073 | len = min(num, cache->key.offset - | ||
3074 | (bytenr - cache->key.objectid)); | ||
3075 | if (pin) { | ||
3076 | spin_lock(&cache->space_info->lock); | ||
3077 | spin_lock(&cache->lock); | ||
3078 | cache->pinned += len; | ||
3079 | cache->space_info->bytes_pinned += len; | ||
3080 | spin_unlock(&cache->lock); | ||
3081 | spin_unlock(&cache->space_info->lock); | ||
3082 | fs_info->total_pinned += len; | ||
3083 | } else { | ||
3084 | int unpin = 0; | ||
3085 | 3542 | ||
3086 | /* | 3543 | spin_lock(&cache->space_info->lock); |
3087 | * in order to not race with the block group caching, we | 3544 | spin_lock(&cache->lock); |
3088 | * only want to unpin the extent if we are cached. If | 3545 | cache->pinned += num_bytes; |
3089 | * we aren't cached, we want to start async caching this | 3546 | cache->space_info->bytes_pinned += num_bytes; |
3090 | * block group so we can free the extent the next time | 3547 | if (reserved) { |
3091 | * around. | 3548 | cache->reserved -= num_bytes; |
3092 | */ | 3549 | cache->space_info->bytes_reserved -= num_bytes; |
3093 | spin_lock(&cache->space_info->lock); | 3550 | } |
3094 | spin_lock(&cache->lock); | 3551 | spin_unlock(&cache->lock); |
3095 | unpin = (cache->cached == BTRFS_CACHE_FINISHED); | 3552 | spin_unlock(&cache->space_info->lock); |
3096 | if (likely(unpin)) { | ||
3097 | cache->pinned -= len; | ||
3098 | cache->space_info->bytes_pinned -= len; | ||
3099 | fs_info->total_pinned -= len; | ||
3100 | } | ||
3101 | spin_unlock(&cache->lock); | ||
3102 | spin_unlock(&cache->space_info->lock); | ||
3103 | 3553 | ||
3104 | if (likely(unpin)) | 3554 | btrfs_put_block_group(cache); |
3105 | clear_extent_dirty(&fs_info->pinned_extents, | ||
3106 | bytenr, bytenr + len -1, | ||
3107 | GFP_NOFS); | ||
3108 | else | ||
3109 | cache_block_group(cache); | ||
3110 | 3555 | ||
3111 | if (unpin) | 3556 | set_extent_dirty(fs_info->pinned_extents, |
3112 | btrfs_add_free_space(cache, bytenr, len); | 3557 | bytenr, bytenr + num_bytes - 1, GFP_NOFS); |
3113 | } | 3558 | return 0; |
3114 | btrfs_put_block_group(cache); | 3559 | } |
3115 | bytenr += len; | 3560 | |
3116 | num -= len; | 3561 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, |
3562 | u64 num_bytes, int reserve) | ||
3563 | { | ||
3564 | spin_lock(&cache->space_info->lock); | ||
3565 | spin_lock(&cache->lock); | ||
3566 | if (reserve) { | ||
3567 | cache->reserved += num_bytes; | ||
3568 | cache->space_info->bytes_reserved += num_bytes; | ||
3569 | } else { | ||
3570 | cache->reserved -= num_bytes; | ||
3571 | cache->space_info->bytes_reserved -= num_bytes; | ||
3117 | } | 3572 | } |
3573 | spin_unlock(&cache->lock); | ||
3574 | spin_unlock(&cache->space_info->lock); | ||
3118 | return 0; | 3575 | return 0; |
3119 | } | 3576 | } |
3120 | 3577 | ||
3121 | static int update_reserved_extents(struct btrfs_root *root, | 3578 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
3122 | u64 bytenr, u64 num, int reserve) | 3579 | struct btrfs_root *root) |
3123 | { | 3580 | { |
3124 | u64 len; | ||
3125 | struct btrfs_block_group_cache *cache; | ||
3126 | struct btrfs_fs_info *fs_info = root->fs_info; | 3581 | struct btrfs_fs_info *fs_info = root->fs_info; |
3582 | struct btrfs_caching_control *next; | ||
3583 | struct btrfs_caching_control *caching_ctl; | ||
3584 | struct btrfs_block_group_cache *cache; | ||
3127 | 3585 | ||
3128 | while (num > 0) { | 3586 | down_write(&fs_info->extent_commit_sem); |
3129 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
3130 | BUG_ON(!cache); | ||
3131 | len = min(num, cache->key.offset - | ||
3132 | (bytenr - cache->key.objectid)); | ||
3133 | 3587 | ||
3134 | spin_lock(&cache->space_info->lock); | 3588 | list_for_each_entry_safe(caching_ctl, next, |
3135 | spin_lock(&cache->lock); | 3589 | &fs_info->caching_block_groups, list) { |
3136 | if (reserve) { | 3590 | cache = caching_ctl->block_group; |
3137 | cache->reserved += len; | 3591 | if (block_group_cache_done(cache)) { |
3138 | cache->space_info->bytes_reserved += len; | 3592 | cache->last_byte_to_unpin = (u64)-1; |
3593 | list_del_init(&caching_ctl->list); | ||
3594 | put_caching_control(caching_ctl); | ||
3139 | } else { | 3595 | } else { |
3140 | cache->reserved -= len; | 3596 | cache->last_byte_to_unpin = caching_ctl->progress; |
3141 | cache->space_info->bytes_reserved -= len; | ||
3142 | } | 3597 | } |
3143 | spin_unlock(&cache->lock); | ||
3144 | spin_unlock(&cache->space_info->lock); | ||
3145 | btrfs_put_block_group(cache); | ||
3146 | bytenr += len; | ||
3147 | num -= len; | ||
3148 | } | 3598 | } |
3599 | |||
3600 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | ||
3601 | fs_info->pinned_extents = &fs_info->freed_extents[1]; | ||
3602 | else | ||
3603 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | ||
3604 | |||
3605 | up_write(&fs_info->extent_commit_sem); | ||
3149 | return 0; | 3606 | return 0; |
3150 | } | 3607 | } |
3151 | 3608 | ||
3152 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | 3609 | static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) |
3153 | { | 3610 | { |
3154 | u64 last = 0; | 3611 | struct btrfs_fs_info *fs_info = root->fs_info; |
3155 | u64 start; | 3612 | struct btrfs_block_group_cache *cache = NULL; |
3156 | u64 end; | 3613 | u64 len; |
3157 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; | ||
3158 | int ret; | ||
3159 | 3614 | ||
3160 | while (1) { | 3615 | while (start <= end) { |
3161 | ret = find_first_extent_bit(pinned_extents, last, | 3616 | if (!cache || |
3162 | &start, &end, EXTENT_DIRTY); | 3617 | start >= cache->key.objectid + cache->key.offset) { |
3163 | if (ret) | 3618 | if (cache) |
3164 | break; | 3619 | btrfs_put_block_group(cache); |
3620 | cache = btrfs_lookup_block_group(fs_info, start); | ||
3621 | BUG_ON(!cache); | ||
3622 | } | ||
3623 | |||
3624 | len = cache->key.objectid + cache->key.offset - start; | ||
3625 | len = min(len, end + 1 - start); | ||
3626 | |||
3627 | if (start < cache->last_byte_to_unpin) { | ||
3628 | len = min(len, cache->last_byte_to_unpin - start); | ||
3629 | btrfs_add_free_space(cache, start, len); | ||
3630 | } | ||
3165 | 3631 | ||
3166 | set_extent_dirty(copy, start, end, GFP_NOFS); | 3632 | spin_lock(&cache->space_info->lock); |
3167 | last = end + 1; | 3633 | spin_lock(&cache->lock); |
3634 | cache->pinned -= len; | ||
3635 | cache->space_info->bytes_pinned -= len; | ||
3636 | spin_unlock(&cache->lock); | ||
3637 | spin_unlock(&cache->space_info->lock); | ||
3638 | |||
3639 | start += len; | ||
3168 | } | 3640 | } |
3641 | |||
3642 | if (cache) | ||
3643 | btrfs_put_block_group(cache); | ||
3169 | return 0; | 3644 | return 0; |
3170 | } | 3645 | } |
3171 | 3646 | ||
3172 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 3647 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
3173 | struct btrfs_root *root, | 3648 | struct btrfs_root *root) |
3174 | struct extent_io_tree *unpin) | ||
3175 | { | 3649 | { |
3650 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3651 | struct extent_io_tree *unpin; | ||
3176 | u64 start; | 3652 | u64 start; |
3177 | u64 end; | 3653 | u64 end; |
3178 | int ret; | 3654 | int ret; |
3179 | 3655 | ||
3656 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | ||
3657 | unpin = &fs_info->freed_extents[1]; | ||
3658 | else | ||
3659 | unpin = &fs_info->freed_extents[0]; | ||
3660 | |||
3180 | while (1) { | 3661 | while (1) { |
3181 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 3662 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
3182 | EXTENT_DIRTY); | 3663 | EXTENT_DIRTY); |
@@ -3185,10 +3666,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3185 | 3666 | ||
3186 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 3667 | ret = btrfs_discard_extent(root, start, end + 1 - start); |
3187 | 3668 | ||
3188 | /* unlocks the pinned mutex */ | ||
3189 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); | ||
3190 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 3669 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
3191 | 3670 | unpin_extent_range(root, start, end); | |
3192 | cond_resched(); | 3671 | cond_resched(); |
3193 | } | 3672 | } |
3194 | 3673 | ||
@@ -3198,7 +3677,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3198 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 3677 | static int pin_down_bytes(struct btrfs_trans_handle *trans, |
3199 | struct btrfs_root *root, | 3678 | struct btrfs_root *root, |
3200 | struct btrfs_path *path, | 3679 | struct btrfs_path *path, |
3201 | u64 bytenr, u64 num_bytes, int is_data, | 3680 | u64 bytenr, u64 num_bytes, |
3681 | int is_data, int reserved, | ||
3202 | struct extent_buffer **must_clean) | 3682 | struct extent_buffer **must_clean) |
3203 | { | 3683 | { |
3204 | int err = 0; | 3684 | int err = 0; |
@@ -3207,6 +3687,14 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
3207 | if (is_data) | 3687 | if (is_data) |
3208 | goto pinit; | 3688 | goto pinit; |
3209 | 3689 | ||
3690 | /* | ||
3691 | * discard is sloooow, and so triggering discards on | ||
3692 | * individual btree blocks isn't a good plan. Just | ||
3693 | * pin everything in discard mode. | ||
3694 | */ | ||
3695 | if (btrfs_test_opt(root, DISCARD)) | ||
3696 | goto pinit; | ||
3697 | |||
3210 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); | 3698 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); |
3211 | if (!buf) | 3699 | if (!buf) |
3212 | goto pinit; | 3700 | goto pinit; |
@@ -3230,15 +3718,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
3230 | } | 3718 | } |
3231 | free_extent_buffer(buf); | 3719 | free_extent_buffer(buf); |
3232 | pinit: | 3720 | pinit: |
3233 | btrfs_set_path_blocking(path); | 3721 | if (path) |
3722 | btrfs_set_path_blocking(path); | ||
3234 | /* unlocks the pinned mutex */ | 3723 | /* unlocks the pinned mutex */ |
3235 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 3724 | btrfs_pin_extent(root, bytenr, num_bytes, reserved); |
3236 | 3725 | ||
3237 | BUG_ON(err < 0); | 3726 | BUG_ON(err < 0); |
3238 | return 0; | 3727 | return 0; |
3239 | } | 3728 | } |
3240 | 3729 | ||
3241 | |||
3242 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 3730 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
3243 | struct btrfs_root *root, | 3731 | struct btrfs_root *root, |
3244 | u64 bytenr, u64 num_bytes, u64 parent, | 3732 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -3412,7 +3900,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3412 | } | 3900 | } |
3413 | 3901 | ||
3414 | ret = pin_down_bytes(trans, root, path, bytenr, | 3902 | ret = pin_down_bytes(trans, root, path, bytenr, |
3415 | num_bytes, is_data, &must_clean); | 3903 | num_bytes, is_data, 0, &must_clean); |
3416 | if (ret > 0) | 3904 | if (ret > 0) |
3417 | mark_free = 1; | 3905 | mark_free = 1; |
3418 | BUG_ON(ret < 0); | 3906 | BUG_ON(ret < 0); |
@@ -3543,8 +4031,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3543 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { | 4031 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { |
3544 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); | 4032 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); |
3545 | /* unlocks the pinned mutex */ | 4033 | /* unlocks the pinned mutex */ |
3546 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 4034 | btrfs_pin_extent(root, bytenr, num_bytes, 1); |
3547 | update_reserved_extents(root, bytenr, num_bytes, 0); | ||
3548 | ret = 0; | 4035 | ret = 0; |
3549 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 4036 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
3550 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, | 4037 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, |
@@ -3584,24 +4071,38 @@ static noinline int | |||
3584 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | 4071 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, |
3585 | u64 num_bytes) | 4072 | u64 num_bytes) |
3586 | { | 4073 | { |
4074 | struct btrfs_caching_control *caching_ctl; | ||
3587 | DEFINE_WAIT(wait); | 4075 | DEFINE_WAIT(wait); |
3588 | 4076 | ||
3589 | prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); | 4077 | caching_ctl = get_caching_control(cache); |
3590 | 4078 | if (!caching_ctl) | |
3591 | if (block_group_cache_done(cache)) { | ||
3592 | finish_wait(&cache->caching_q, &wait); | ||
3593 | return 0; | 4079 | return 0; |
3594 | } | ||
3595 | schedule(); | ||
3596 | finish_wait(&cache->caching_q, &wait); | ||
3597 | 4080 | ||
3598 | wait_event(cache->caching_q, block_group_cache_done(cache) || | 4081 | wait_event(caching_ctl->wait, block_group_cache_done(cache) || |
3599 | (cache->free_space >= num_bytes)); | 4082 | (cache->free_space >= num_bytes)); |
4083 | |||
4084 | put_caching_control(caching_ctl); | ||
4085 | return 0; | ||
4086 | } | ||
4087 | |||
4088 | static noinline int | ||
4089 | wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | ||
4090 | { | ||
4091 | struct btrfs_caching_control *caching_ctl; | ||
4092 | DEFINE_WAIT(wait); | ||
4093 | |||
4094 | caching_ctl = get_caching_control(cache); | ||
4095 | if (!caching_ctl) | ||
4096 | return 0; | ||
4097 | |||
4098 | wait_event(caching_ctl->wait, block_group_cache_done(cache)); | ||
4099 | |||
4100 | put_caching_control(caching_ctl); | ||
3600 | return 0; | 4101 | return 0; |
3601 | } | 4102 | } |
3602 | 4103 | ||
3603 | enum btrfs_loop_type { | 4104 | enum btrfs_loop_type { |
3604 | LOOP_CACHED_ONLY = 0, | 4105 | LOOP_FIND_IDEAL = 0, |
3605 | LOOP_CACHING_NOWAIT = 1, | 4106 | LOOP_CACHING_NOWAIT = 1, |
3606 | LOOP_CACHING_WAIT = 2, | 4107 | LOOP_CACHING_WAIT = 2, |
3607 | LOOP_ALLOC_CHUNK = 3, | 4108 | LOOP_ALLOC_CHUNK = 3, |
@@ -3630,10 +4131,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3630 | struct btrfs_block_group_cache *block_group = NULL; | 4131 | struct btrfs_block_group_cache *block_group = NULL; |
3631 | int empty_cluster = 2 * 1024 * 1024; | 4132 | int empty_cluster = 2 * 1024 * 1024; |
3632 | int allowed_chunk_alloc = 0; | 4133 | int allowed_chunk_alloc = 0; |
4134 | int done_chunk_alloc = 0; | ||
3633 | struct btrfs_space_info *space_info; | 4135 | struct btrfs_space_info *space_info; |
3634 | int last_ptr_loop = 0; | 4136 | int last_ptr_loop = 0; |
3635 | int loop = 0; | 4137 | int loop = 0; |
3636 | bool found_uncached_bg = false; | 4138 | bool found_uncached_bg = false; |
4139 | bool failed_cluster_refill = false; | ||
4140 | bool failed_alloc = false; | ||
4141 | u64 ideal_cache_percent = 0; | ||
4142 | u64 ideal_cache_offset = 0; | ||
3637 | 4143 | ||
3638 | WARN_ON(num_bytes < root->sectorsize); | 4144 | WARN_ON(num_bytes < root->sectorsize); |
3639 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 4145 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
@@ -3669,14 +4175,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3669 | empty_cluster = 0; | 4175 | empty_cluster = 0; |
3670 | 4176 | ||
3671 | if (search_start == hint_byte) { | 4177 | if (search_start == hint_byte) { |
4178 | ideal_cache: | ||
3672 | block_group = btrfs_lookup_block_group(root->fs_info, | 4179 | block_group = btrfs_lookup_block_group(root->fs_info, |
3673 | search_start); | 4180 | search_start); |
3674 | /* | 4181 | /* |
3675 | * we don't want to use the block group if it doesn't match our | 4182 | * we don't want to use the block group if it doesn't match our |
3676 | * allocation bits, or if its not cached. | 4183 | * allocation bits, or if its not cached. |
4184 | * | ||
4185 | * However if we are re-searching with an ideal block group | ||
4186 | * picked out then we don't care that the block group is cached. | ||
3677 | */ | 4187 | */ |
3678 | if (block_group && block_group_bits(block_group, data) && | 4188 | if (block_group && block_group_bits(block_group, data) && |
3679 | block_group_cache_done(block_group)) { | 4189 | (block_group->cached != BTRFS_CACHE_NO || |
4190 | search_start == ideal_cache_offset)) { | ||
3680 | down_read(&space_info->groups_sem); | 4191 | down_read(&space_info->groups_sem); |
3681 | if (list_empty(&block_group->list) || | 4192 | if (list_empty(&block_group->list) || |
3682 | block_group->ro) { | 4193 | block_group->ro) { |
@@ -3688,13 +4199,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3688 | */ | 4199 | */ |
3689 | btrfs_put_block_group(block_group); | 4200 | btrfs_put_block_group(block_group); |
3690 | up_read(&space_info->groups_sem); | 4201 | up_read(&space_info->groups_sem); |
3691 | } else | 4202 | } else { |
3692 | goto have_block_group; | 4203 | goto have_block_group; |
4204 | } | ||
3693 | } else if (block_group) { | 4205 | } else if (block_group) { |
3694 | btrfs_put_block_group(block_group); | 4206 | btrfs_put_block_group(block_group); |
3695 | } | 4207 | } |
3696 | } | 4208 | } |
3697 | |||
3698 | search: | 4209 | search: |
3699 | down_read(&space_info->groups_sem); | 4210 | down_read(&space_info->groups_sem); |
3700 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 4211 | list_for_each_entry(block_group, &space_info->block_groups, list) { |
@@ -3706,32 +4217,58 @@ search: | |||
3706 | 4217 | ||
3707 | have_block_group: | 4218 | have_block_group: |
3708 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 4219 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
4220 | u64 free_percent; | ||
4221 | |||
4222 | free_percent = btrfs_block_group_used(&block_group->item); | ||
4223 | free_percent *= 100; | ||
4224 | free_percent = div64_u64(free_percent, | ||
4225 | block_group->key.offset); | ||
4226 | free_percent = 100 - free_percent; | ||
4227 | if (free_percent > ideal_cache_percent && | ||
4228 | likely(!block_group->ro)) { | ||
4229 | ideal_cache_offset = block_group->key.objectid; | ||
4230 | ideal_cache_percent = free_percent; | ||
4231 | } | ||
4232 | |||
3709 | /* | 4233 | /* |
3710 | * we want to start caching kthreads, but not too many | 4234 | * We only want to start kthread caching if we are at |
3711 | * right off the bat so we don't overwhelm the system, | 4235 | * the point where we will wait for caching to make |
3712 | * so only start them if there are less than 2 and we're | 4236 | * progress, or if our ideal search is over and we've |
3713 | * in the initial allocation phase. | 4237 | * found somebody to start caching. |
3714 | */ | 4238 | */ |
3715 | if (loop > LOOP_CACHING_NOWAIT || | 4239 | if (loop > LOOP_CACHING_NOWAIT || |
3716 | atomic_read(&space_info->caching_threads) < 2) { | 4240 | (loop > LOOP_FIND_IDEAL && |
4241 | atomic_read(&space_info->caching_threads) < 2)) { | ||
3717 | ret = cache_block_group(block_group); | 4242 | ret = cache_block_group(block_group); |
3718 | BUG_ON(ret); | 4243 | BUG_ON(ret); |
3719 | } | 4244 | } |
3720 | } | ||
3721 | |||
3722 | cached = block_group_cache_done(block_group); | ||
3723 | if (unlikely(!cached)) { | ||
3724 | found_uncached_bg = true; | 4245 | found_uncached_bg = true; |
3725 | 4246 | ||
3726 | /* if we only want cached bgs, loop */ | 4247 | /* |
3727 | if (loop == LOOP_CACHED_ONLY) | 4248 | * If loop is set for cached only, try the next block |
4249 | * group. | ||
4250 | */ | ||
4251 | if (loop == LOOP_FIND_IDEAL) | ||
3728 | goto loop; | 4252 | goto loop; |
3729 | } | 4253 | } |
3730 | 4254 | ||
4255 | cached = block_group_cache_done(block_group); | ||
4256 | if (unlikely(!cached)) | ||
4257 | found_uncached_bg = true; | ||
4258 | |||
3731 | if (unlikely(block_group->ro)) | 4259 | if (unlikely(block_group->ro)) |
3732 | goto loop; | 4260 | goto loop; |
3733 | 4261 | ||
3734 | if (last_ptr) { | 4262 | /* |
4263 | * Ok we want to try and use the cluster allocator, so lets look | ||
4264 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will | ||
4265 | * have tried the cluster allocator plenty of times at this | ||
4266 | * point and not have found anything, so we are likely way too | ||
4267 | * fragmented for the clustering stuff to find anything, so lets | ||
4268 | * just skip it and let the allocator find whatever block it can | ||
4269 | * find | ||
4270 | */ | ||
4271 | if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { | ||
3735 | /* | 4272 | /* |
3736 | * the refill lock keeps out other | 4273 | * the refill lock keeps out other |
3737 | * people trying to start a new cluster | 4274 | * people trying to start a new cluster |
@@ -3806,9 +4343,11 @@ refill_cluster: | |||
3806 | spin_unlock(&last_ptr->refill_lock); | 4343 | spin_unlock(&last_ptr->refill_lock); |
3807 | goto checks; | 4344 | goto checks; |
3808 | } | 4345 | } |
3809 | } else if (!cached && loop > LOOP_CACHING_NOWAIT) { | 4346 | } else if (!cached && loop > LOOP_CACHING_NOWAIT |
4347 | && !failed_cluster_refill) { | ||
3810 | spin_unlock(&last_ptr->refill_lock); | 4348 | spin_unlock(&last_ptr->refill_lock); |
3811 | 4349 | ||
4350 | failed_cluster_refill = true; | ||
3812 | wait_block_group_cache_progress(block_group, | 4351 | wait_block_group_cache_progress(block_group, |
3813 | num_bytes + empty_cluster + empty_size); | 4352 | num_bytes + empty_cluster + empty_size); |
3814 | goto have_block_group; | 4353 | goto have_block_group; |
@@ -3820,25 +4359,30 @@ refill_cluster: | |||
3820 | * cluster. Free the cluster we've been trying | 4359 | * cluster. Free the cluster we've been trying |
3821 | * to use, and go to the next block group | 4360 | * to use, and go to the next block group |
3822 | */ | 4361 | */ |
3823 | if (loop < LOOP_NO_EMPTY_SIZE) { | 4362 | btrfs_return_cluster_to_free_space(NULL, last_ptr); |
3824 | btrfs_return_cluster_to_free_space(NULL, | ||
3825 | last_ptr); | ||
3826 | spin_unlock(&last_ptr->refill_lock); | ||
3827 | goto loop; | ||
3828 | } | ||
3829 | spin_unlock(&last_ptr->refill_lock); | 4363 | spin_unlock(&last_ptr->refill_lock); |
4364 | goto loop; | ||
3830 | } | 4365 | } |
3831 | 4366 | ||
3832 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 4367 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
3833 | num_bytes, empty_size); | 4368 | num_bytes, empty_size); |
3834 | if (!offset && (cached || (!cached && | 4369 | /* |
3835 | loop == LOOP_CACHING_NOWAIT))) { | 4370 | * If we didn't find a chunk, and we haven't failed on this |
3836 | goto loop; | 4371 | * block group before, and this block group is in the middle of |
3837 | } else if (!offset && (!cached && | 4372 | * caching and we are ok with waiting, then go ahead and wait |
3838 | loop > LOOP_CACHING_NOWAIT)) { | 4373 | * for progress to be made, and set failed_alloc to true. |
4374 | * | ||
4375 | * If failed_alloc is true then we've already waited on this | ||
4376 | * block group once and should move on to the next block group. | ||
4377 | */ | ||
4378 | if (!offset && !failed_alloc && !cached && | ||
4379 | loop > LOOP_CACHING_NOWAIT) { | ||
3839 | wait_block_group_cache_progress(block_group, | 4380 | wait_block_group_cache_progress(block_group, |
3840 | num_bytes + empty_size); | 4381 | num_bytes + empty_size); |
4382 | failed_alloc = true; | ||
3841 | goto have_block_group; | 4383 | goto have_block_group; |
4384 | } else if (!offset) { | ||
4385 | goto loop; | ||
3842 | } | 4386 | } |
3843 | checks: | 4387 | checks: |
3844 | search_start = stripe_align(root, offset); | 4388 | search_start = stripe_align(root, offset); |
@@ -3880,16 +4424,22 @@ checks: | |||
3880 | search_start - offset); | 4424 | search_start - offset); |
3881 | BUG_ON(offset > search_start); | 4425 | BUG_ON(offset > search_start); |
3882 | 4426 | ||
4427 | update_reserved_extents(block_group, num_bytes, 1); | ||
4428 | |||
3883 | /* we are all good, lets return */ | 4429 | /* we are all good, lets return */ |
3884 | break; | 4430 | break; |
3885 | loop: | 4431 | loop: |
4432 | failed_cluster_refill = false; | ||
4433 | failed_alloc = false; | ||
3886 | btrfs_put_block_group(block_group); | 4434 | btrfs_put_block_group(block_group); |
3887 | } | 4435 | } |
3888 | up_read(&space_info->groups_sem); | 4436 | up_read(&space_info->groups_sem); |
3889 | 4437 | ||
3890 | /* LOOP_CACHED_ONLY, only search fully cached block groups | 4438 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for |
3891 | * LOOP_CACHING_NOWAIT, search partially cached block groups, but | 4439 | * for them to make caching progress. Also |
3892 | * dont wait foR them to finish caching | 4440 | * determine the best possible bg to cache |
4441 | * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking | ||
4442 | * caching kthreads as we move along | ||
3893 | * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching | 4443 | * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching |
3894 | * LOOP_ALLOC_CHUNK, force a chunk allocation and try again | 4444 | * LOOP_ALLOC_CHUNK, force a chunk allocation and try again |
3895 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try | 4445 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try |
@@ -3898,12 +4448,47 @@ loop: | |||
3898 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 4448 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
3899 | (found_uncached_bg || empty_size || empty_cluster || | 4449 | (found_uncached_bg || empty_size || empty_cluster || |
3900 | allowed_chunk_alloc)) { | 4450 | allowed_chunk_alloc)) { |
3901 | if (found_uncached_bg) { | 4451 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
3902 | found_uncached_bg = false; | 4452 | found_uncached_bg = false; |
3903 | if (loop < LOOP_CACHING_WAIT) { | 4453 | loop++; |
3904 | loop++; | 4454 | if (!ideal_cache_percent && |
4455 | atomic_read(&space_info->caching_threads)) | ||
3905 | goto search; | 4456 | goto search; |
3906 | } | 4457 | |
4458 | /* | ||
4459 | * 1 of the following 2 things have happened so far | ||
4460 | * | ||
4461 | * 1) We found an ideal block group for caching that | ||
4462 | * is mostly full and will cache quickly, so we might | ||
4463 | * as well wait for it. | ||
4464 | * | ||
4465 | * 2) We searched for cached only and we didn't find | ||
4466 | * anything, and we didn't start any caching kthreads | ||
4467 | * either, so chances are we will loop through and | ||
4468 | * start a couple caching kthreads, and then come back | ||
4469 | * around and just wait for them. This will be slower | ||
4470 | * because we will have 2 caching kthreads reading at | ||
4471 | * the same time when we could have just started one | ||
4472 | * and waited for it to get far enough to give us an | ||
4473 | * allocation, so go ahead and go to the wait caching | ||
4474 | * loop. | ||
4475 | */ | ||
4476 | loop = LOOP_CACHING_WAIT; | ||
4477 | search_start = ideal_cache_offset; | ||
4478 | ideal_cache_percent = 0; | ||
4479 | goto ideal_cache; | ||
4480 | } else if (loop == LOOP_FIND_IDEAL) { | ||
4481 | /* | ||
4482 | * Didn't find a uncached bg, wait on anything we find | ||
4483 | * next. | ||
4484 | */ | ||
4485 | loop = LOOP_CACHING_WAIT; | ||
4486 | goto search; | ||
4487 | } | ||
4488 | |||
4489 | if (loop < LOOP_CACHING_WAIT) { | ||
4490 | loop++; | ||
4491 | goto search; | ||
3907 | } | 4492 | } |
3908 | 4493 | ||
3909 | if (loop == LOOP_ALLOC_CHUNK) { | 4494 | if (loop == LOOP_ALLOC_CHUNK) { |
@@ -3915,7 +4500,8 @@ loop: | |||
3915 | ret = do_chunk_alloc(trans, root, num_bytes + | 4500 | ret = do_chunk_alloc(trans, root, num_bytes + |
3916 | 2 * 1024 * 1024, data, 1); | 4501 | 2 * 1024 * 1024, data, 1); |
3917 | allowed_chunk_alloc = 0; | 4502 | allowed_chunk_alloc = 0; |
3918 | } else { | 4503 | done_chunk_alloc = 1; |
4504 | } else if (!done_chunk_alloc) { | ||
3919 | space_info->force_alloc = 1; | 4505 | space_info->force_alloc = 1; |
3920 | } | 4506 | } |
3921 | 4507 | ||
@@ -3940,21 +4526,32 @@ loop: | |||
3940 | return ret; | 4526 | return ret; |
3941 | } | 4527 | } |
3942 | 4528 | ||
3943 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | 4529 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
4530 | int dump_block_groups) | ||
3944 | { | 4531 | { |
3945 | struct btrfs_block_group_cache *cache; | 4532 | struct btrfs_block_group_cache *cache; |
3946 | 4533 | ||
4534 | spin_lock(&info->lock); | ||
3947 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4535 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
3948 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4536 | (unsigned long long)(info->total_bytes - info->bytes_used - |
3949 | info->bytes_pinned - info->bytes_reserved), | 4537 | info->bytes_pinned - info->bytes_reserved - |
4538 | info->bytes_super), | ||
3950 | (info->full) ? "" : "not "); | 4539 | (info->full) ? "" : "not "); |
3951 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4540 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," |
3952 | " may_use=%llu, used=%llu\n", | 4541 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" |
4542 | "\n", | ||
3953 | (unsigned long long)info->total_bytes, | 4543 | (unsigned long long)info->total_bytes, |
3954 | (unsigned long long)info->bytes_pinned, | 4544 | (unsigned long long)info->bytes_pinned, |
3955 | (unsigned long long)info->bytes_delalloc, | 4545 | (unsigned long long)info->bytes_delalloc, |
3956 | (unsigned long long)info->bytes_may_use, | 4546 | (unsigned long long)info->bytes_may_use, |
3957 | (unsigned long long)info->bytes_used); | 4547 | (unsigned long long)info->bytes_used, |
4548 | (unsigned long long)info->bytes_root, | ||
4549 | (unsigned long long)info->bytes_super, | ||
4550 | (unsigned long long)info->bytes_reserved); | ||
4551 | spin_unlock(&info->lock); | ||
4552 | |||
4553 | if (!dump_block_groups) | ||
4554 | return; | ||
3958 | 4555 | ||
3959 | down_read(&info->groups_sem); | 4556 | down_read(&info->groups_sem); |
3960 | list_for_each_entry(cache, &info->block_groups, list) { | 4557 | list_for_each_entry(cache, &info->block_groups, list) { |
@@ -3972,12 +4569,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | |||
3972 | up_read(&info->groups_sem); | 4569 | up_read(&info->groups_sem); |
3973 | } | 4570 | } |
3974 | 4571 | ||
3975 | static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, | 4572 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, |
3976 | struct btrfs_root *root, | 4573 | struct btrfs_root *root, |
3977 | u64 num_bytes, u64 min_alloc_size, | 4574 | u64 num_bytes, u64 min_alloc_size, |
3978 | u64 empty_size, u64 hint_byte, | 4575 | u64 empty_size, u64 hint_byte, |
3979 | u64 search_end, struct btrfs_key *ins, | 4576 | u64 search_end, struct btrfs_key *ins, |
3980 | u64 data) | 4577 | u64 data) |
3981 | { | 4578 | { |
3982 | int ret; | 4579 | int ret; |
3983 | u64 search_start = 0; | 4580 | u64 search_start = 0; |
@@ -4022,7 +4619,7 @@ again: | |||
4022 | printk(KERN_ERR "btrfs allocation failed flags %llu, " | 4619 | printk(KERN_ERR "btrfs allocation failed flags %llu, " |
4023 | "wanted %llu\n", (unsigned long long)data, | 4620 | "wanted %llu\n", (unsigned long long)data, |
4024 | (unsigned long long)num_bytes); | 4621 | (unsigned long long)num_bytes); |
4025 | dump_space_info(sinfo, num_bytes); | 4622 | dump_space_info(sinfo, num_bytes, 1); |
4026 | } | 4623 | } |
4027 | 4624 | ||
4028 | return ret; | 4625 | return ret; |
@@ -4043,25 +4640,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
4043 | ret = btrfs_discard_extent(root, start, len); | 4640 | ret = btrfs_discard_extent(root, start, len); |
4044 | 4641 | ||
4045 | btrfs_add_free_space(cache, start, len); | 4642 | btrfs_add_free_space(cache, start, len); |
4643 | update_reserved_extents(cache, len, 0); | ||
4046 | btrfs_put_block_group(cache); | 4644 | btrfs_put_block_group(cache); |
4047 | update_reserved_extents(root, start, len, 0); | ||
4048 | |||
4049 | return ret; | ||
4050 | } | ||
4051 | |||
4052 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | ||
4053 | struct btrfs_root *root, | ||
4054 | u64 num_bytes, u64 min_alloc_size, | ||
4055 | u64 empty_size, u64 hint_byte, | ||
4056 | u64 search_end, struct btrfs_key *ins, | ||
4057 | u64 data) | ||
4058 | { | ||
4059 | int ret; | ||
4060 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, | ||
4061 | empty_size, hint_byte, search_end, ins, | ||
4062 | data); | ||
4063 | if (!ret) | ||
4064 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
4065 | 4645 | ||
4066 | return ret; | 4646 | return ret; |
4067 | } | 4647 | } |
@@ -4222,15 +4802,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
4222 | { | 4802 | { |
4223 | int ret; | 4803 | int ret; |
4224 | struct btrfs_block_group_cache *block_group; | 4804 | struct btrfs_block_group_cache *block_group; |
4805 | struct btrfs_caching_control *caching_ctl; | ||
4806 | u64 start = ins->objectid; | ||
4807 | u64 num_bytes = ins->offset; | ||
4225 | 4808 | ||
4226 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 4809 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
4227 | cache_block_group(block_group); | 4810 | cache_block_group(block_group); |
4228 | wait_event(block_group->caching_q, | 4811 | caching_ctl = get_caching_control(block_group); |
4229 | block_group_cache_done(block_group)); | ||
4230 | 4812 | ||
4231 | ret = btrfs_remove_free_space(block_group, ins->objectid, | 4813 | if (!caching_ctl) { |
4232 | ins->offset); | 4814 | BUG_ON(!block_group_cache_done(block_group)); |
4233 | BUG_ON(ret); | 4815 | ret = btrfs_remove_free_space(block_group, start, num_bytes); |
4816 | BUG_ON(ret); | ||
4817 | } else { | ||
4818 | mutex_lock(&caching_ctl->mutex); | ||
4819 | |||
4820 | if (start >= caching_ctl->progress) { | ||
4821 | ret = add_excluded_extent(root, start, num_bytes); | ||
4822 | BUG_ON(ret); | ||
4823 | } else if (start + num_bytes <= caching_ctl->progress) { | ||
4824 | ret = btrfs_remove_free_space(block_group, | ||
4825 | start, num_bytes); | ||
4826 | BUG_ON(ret); | ||
4827 | } else { | ||
4828 | num_bytes = caching_ctl->progress - start; | ||
4829 | ret = btrfs_remove_free_space(block_group, | ||
4830 | start, num_bytes); | ||
4831 | BUG_ON(ret); | ||
4832 | |||
4833 | start = caching_ctl->progress; | ||
4834 | num_bytes = ins->objectid + ins->offset - | ||
4835 | caching_ctl->progress; | ||
4836 | ret = add_excluded_extent(root, start, num_bytes); | ||
4837 | BUG_ON(ret); | ||
4838 | } | ||
4839 | |||
4840 | mutex_unlock(&caching_ctl->mutex); | ||
4841 | put_caching_control(caching_ctl); | ||
4842 | } | ||
4843 | |||
4844 | update_reserved_extents(block_group, ins->offset, 1); | ||
4234 | btrfs_put_block_group(block_group); | 4845 | btrfs_put_block_group(block_group); |
4235 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 4846 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
4236 | 0, owner, offset, ins, 1); | 4847 | 0, owner, offset, ins, 1); |
@@ -4254,9 +4865,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
4254 | int ret; | 4865 | int ret; |
4255 | u64 flags = 0; | 4866 | u64 flags = 0; |
4256 | 4867 | ||
4257 | ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | 4868 | ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, |
4258 | empty_size, hint_byte, search_end, | 4869 | empty_size, hint_byte, search_end, |
4259 | ins, 0); | 4870 | ins, 0); |
4260 | if (ret) | 4871 | if (ret) |
4261 | return ret; | 4872 | return ret; |
4262 | 4873 | ||
@@ -4267,7 +4878,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
4267 | } else | 4878 | } else |
4268 | BUG_ON(parent > 0); | 4879 | BUG_ON(parent > 0); |
4269 | 4880 | ||
4270 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
4271 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 4881 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
4272 | struct btrfs_delayed_extent_op *extent_op; | 4882 | struct btrfs_delayed_extent_op *extent_op; |
4273 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 4883 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); |
@@ -4346,452 +4956,108 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
4346 | return buf; | 4956 | return buf; |
4347 | } | 4957 | } |
4348 | 4958 | ||
4349 | #if 0 | 4959 | struct walk_control { |
4350 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 4960 | u64 refs[BTRFS_MAX_LEVEL]; |
4351 | struct btrfs_root *root, struct extent_buffer *leaf) | 4961 | u64 flags[BTRFS_MAX_LEVEL]; |
4352 | { | 4962 | struct btrfs_key update_progress; |
4353 | u64 disk_bytenr; | 4963 | int stage; |
4354 | u64 num_bytes; | 4964 | int level; |
4355 | struct btrfs_key key; | 4965 | int shared_level; |
4356 | struct btrfs_file_extent_item *fi; | 4966 | int update_ref; |
4357 | u32 nritems; | 4967 | int keep_locks; |
4358 | int i; | 4968 | int reada_slot; |
4359 | int ret; | 4969 | int reada_count; |
4360 | 4970 | }; | |
4361 | BUG_ON(!btrfs_is_leaf(leaf)); | ||
4362 | nritems = btrfs_header_nritems(leaf); | ||
4363 | |||
4364 | for (i = 0; i < nritems; i++) { | ||
4365 | cond_resched(); | ||
4366 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
4367 | |||
4368 | /* only extents have references, skip everything else */ | ||
4369 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
4370 | continue; | ||
4371 | |||
4372 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
4373 | |||
4374 | /* inline extents live in the btree, they don't have refs */ | ||
4375 | if (btrfs_file_extent_type(leaf, fi) == | ||
4376 | BTRFS_FILE_EXTENT_INLINE) | ||
4377 | continue; | ||
4378 | |||
4379 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
4380 | |||
4381 | /* holes don't have refs */ | ||
4382 | if (disk_bytenr == 0) | ||
4383 | continue; | ||
4384 | |||
4385 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
4386 | ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes, | ||
4387 | leaf->start, 0, key.objectid, 0); | ||
4388 | BUG_ON(ret); | ||
4389 | } | ||
4390 | return 0; | ||
4391 | } | ||
4392 | |||
4393 | static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | ||
4394 | struct btrfs_root *root, | ||
4395 | struct btrfs_leaf_ref *ref) | ||
4396 | { | ||
4397 | int i; | ||
4398 | int ret; | ||
4399 | struct btrfs_extent_info *info; | ||
4400 | struct refsort *sorted; | ||
4401 | |||
4402 | if (ref->nritems == 0) | ||
4403 | return 0; | ||
4404 | |||
4405 | sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); | ||
4406 | for (i = 0; i < ref->nritems; i++) { | ||
4407 | sorted[i].bytenr = ref->extents[i].bytenr; | ||
4408 | sorted[i].slot = i; | ||
4409 | } | ||
4410 | sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); | ||
4411 | |||
4412 | /* | ||
4413 | * the items in the ref were sorted when the ref was inserted | ||
4414 | * into the ref cache, so this is already in order | ||
4415 | */ | ||
4416 | for (i = 0; i < ref->nritems; i++) { | ||
4417 | info = ref->extents + sorted[i].slot; | ||
4418 | ret = btrfs_free_extent(trans, root, info->bytenr, | ||
4419 | info->num_bytes, ref->bytenr, | ||
4420 | ref->owner, ref->generation, | ||
4421 | info->objectid, 0); | ||
4422 | |||
4423 | atomic_inc(&root->fs_info->throttle_gen); | ||
4424 | wake_up(&root->fs_info->transaction_throttle); | ||
4425 | cond_resched(); | ||
4426 | |||
4427 | BUG_ON(ret); | ||
4428 | info++; | ||
4429 | } | ||
4430 | |||
4431 | kfree(sorted); | ||
4432 | return 0; | ||
4433 | } | ||
4434 | |||
4435 | |||
4436 | static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, | ||
4437 | struct btrfs_root *root, u64 start, | ||
4438 | u64 len, u32 *refs) | ||
4439 | { | ||
4440 | int ret; | ||
4441 | |||
4442 | ret = btrfs_lookup_extent_refs(trans, root, start, len, refs); | ||
4443 | BUG_ON(ret); | ||
4444 | |||
4445 | #if 0 /* some debugging code in case we see problems here */ | ||
4446 | /* if the refs count is one, it won't get increased again. But | ||
4447 | * if the ref count is > 1, someone may be decreasing it at | ||
4448 | * the same time we are. | ||
4449 | */ | ||
4450 | if (*refs != 1) { | ||
4451 | struct extent_buffer *eb = NULL; | ||
4452 | eb = btrfs_find_create_tree_block(root, start, len); | ||
4453 | if (eb) | ||
4454 | btrfs_tree_lock(eb); | ||
4455 | |||
4456 | mutex_lock(&root->fs_info->alloc_mutex); | ||
4457 | ret = lookup_extent_ref(NULL, root, start, len, refs); | ||
4458 | BUG_ON(ret); | ||
4459 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
4460 | |||
4461 | if (eb) { | ||
4462 | btrfs_tree_unlock(eb); | ||
4463 | free_extent_buffer(eb); | ||
4464 | } | ||
4465 | if (*refs == 1) { | ||
4466 | printk(KERN_ERR "btrfs block %llu went down to one " | ||
4467 | "during drop_snap\n", (unsigned long long)start); | ||
4468 | } | ||
4469 | |||
4470 | } | ||
4471 | #endif | ||
4472 | |||
4473 | cond_resched(); | ||
4474 | return ret; | ||
4475 | } | ||
4476 | 4971 | ||
4972 | #define DROP_REFERENCE 1 | ||
4973 | #define UPDATE_BACKREF 2 | ||
4477 | 4974 | ||
4478 | /* | 4975 | static noinline void reada_walk_down(struct btrfs_trans_handle *trans, |
4479 | * this is used while deleting old snapshots, and it drops the refs | 4976 | struct btrfs_root *root, |
4480 | * on a whole subtree starting from a level 1 node. | 4977 | struct walk_control *wc, |
4481 | * | 4978 | struct btrfs_path *path) |
4482 | * The idea is to sort all the leaf pointers, and then drop the | ||
4483 | * ref on all the leaves in order. Most of the time the leaves | ||
4484 | * will have ref cache entries, so no leaf IOs will be required to | ||
4485 | * find the extents they have references on. | ||
4486 | * | ||
4487 | * For each leaf, any references it has are also dropped in order | ||
4488 | * | ||
4489 | * This ends up dropping the references in something close to optimal | ||
4490 | * order for reading and modifying the extent allocation tree. | ||
4491 | */ | ||
4492 | static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | ||
4493 | struct btrfs_root *root, | ||
4494 | struct btrfs_path *path) | ||
4495 | { | 4979 | { |
4496 | u64 bytenr; | 4980 | u64 bytenr; |
4497 | u64 root_owner; | 4981 | u64 generation; |
4498 | u64 root_gen; | 4982 | u64 refs; |
4499 | struct extent_buffer *eb = path->nodes[1]; | 4983 | u64 flags; |
4500 | struct extent_buffer *leaf; | 4984 | u64 last = 0; |
4501 | struct btrfs_leaf_ref *ref; | 4985 | u32 nritems; |
4502 | struct refsort *sorted = NULL; | 4986 | u32 blocksize; |
4503 | int nritems = btrfs_header_nritems(eb); | 4987 | struct btrfs_key key; |
4988 | struct extent_buffer *eb; | ||
4504 | int ret; | 4989 | int ret; |
4505 | int i; | 4990 | int slot; |
4506 | int refi = 0; | 4991 | int nread = 0; |
4507 | int slot = path->slots[1]; | ||
4508 | u32 blocksize = btrfs_level_size(root, 0); | ||
4509 | u32 refs; | ||
4510 | |||
4511 | if (nritems == 0) | ||
4512 | goto out; | ||
4513 | |||
4514 | root_owner = btrfs_header_owner(eb); | ||
4515 | root_gen = btrfs_header_generation(eb); | ||
4516 | sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); | ||
4517 | 4992 | ||
4518 | /* | 4993 | if (path->slots[wc->level] < wc->reada_slot) { |
4519 | * step one, sort all the leaf pointers so we don't scribble | 4994 | wc->reada_count = wc->reada_count * 2 / 3; |
4520 | * randomly into the extent allocation tree | 4995 | wc->reada_count = max(wc->reada_count, 2); |
4521 | */ | 4996 | } else { |
4522 | for (i = slot; i < nritems; i++) { | 4997 | wc->reada_count = wc->reada_count * 3 / 2; |
4523 | sorted[refi].bytenr = btrfs_node_blockptr(eb, i); | 4998 | wc->reada_count = min_t(int, wc->reada_count, |
4524 | sorted[refi].slot = i; | 4999 | BTRFS_NODEPTRS_PER_BLOCK(root)); |
4525 | refi++; | ||
4526 | } | 5000 | } |
4527 | 5001 | ||
4528 | /* | 5002 | eb = path->nodes[wc->level]; |
4529 | * nritems won't be zero, but if we're picking up drop_snapshot | 5003 | nritems = btrfs_header_nritems(eb); |
4530 | * after a crash, slot might be > 0, so double check things | 5004 | blocksize = btrfs_level_size(root, wc->level - 1); |
4531 | * just in case. | ||
4532 | */ | ||
4533 | if (refi == 0) | ||
4534 | goto out; | ||
4535 | 5005 | ||
4536 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | 5006 | for (slot = path->slots[wc->level]; slot < nritems; slot++) { |
5007 | if (nread >= wc->reada_count) | ||
5008 | break; | ||
4537 | 5009 | ||
4538 | /* | 5010 | cond_resched(); |
4539 | * the first loop frees everything the leaves point to | 5011 | bytenr = btrfs_node_blockptr(eb, slot); |
4540 | */ | 5012 | generation = btrfs_node_ptr_generation(eb, slot); |
4541 | for (i = 0; i < refi; i++) { | ||
4542 | u64 ptr_gen; | ||
4543 | 5013 | ||
4544 | bytenr = sorted[i].bytenr; | 5014 | if (slot == path->slots[wc->level]) |
5015 | goto reada; | ||
4545 | 5016 | ||
4546 | /* | 5017 | if (wc->stage == UPDATE_BACKREF && |
4547 | * check the reference count on this leaf. If it is > 1 | 5018 | generation <= root->root_key.offset) |
4548 | * we just decrement it below and don't update any | ||
4549 | * of the refs the leaf points to. | ||
4550 | */ | ||
4551 | ret = drop_snap_lookup_refcount(trans, root, bytenr, | ||
4552 | blocksize, &refs); | ||
4553 | BUG_ON(ret); | ||
4554 | if (refs != 1) | ||
4555 | continue; | 5019 | continue; |
4556 | 5020 | ||
4557 | ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); | 5021 | /* We don't lock the tree block, it's OK to be racy here */ |
4558 | 5022 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | |
4559 | /* | 5023 | &refs, &flags); |
4560 | * the leaf only had one reference, which means the | ||
4561 | * only thing pointing to this leaf is the snapshot | ||
4562 | * we're deleting. It isn't possible for the reference | ||
4563 | * count to increase again later | ||
4564 | * | ||
4565 | * The reference cache is checked for the leaf, | ||
4566 | * and if found we'll be able to drop any refs held by | ||
4567 | * the leaf without needing to read it in. | ||
4568 | */ | ||
4569 | ref = btrfs_lookup_leaf_ref(root, bytenr); | ||
4570 | if (ref && ref->generation != ptr_gen) { | ||
4571 | btrfs_free_leaf_ref(root, ref); | ||
4572 | ref = NULL; | ||
4573 | } | ||
4574 | if (ref) { | ||
4575 | ret = cache_drop_leaf_ref(trans, root, ref); | ||
4576 | BUG_ON(ret); | ||
4577 | btrfs_remove_leaf_ref(root, ref); | ||
4578 | btrfs_free_leaf_ref(root, ref); | ||
4579 | } else { | ||
4580 | /* | ||
4581 | * the leaf wasn't in the reference cache, so | ||
4582 | * we have to read it. | ||
4583 | */ | ||
4584 | leaf = read_tree_block(root, bytenr, blocksize, | ||
4585 | ptr_gen); | ||
4586 | ret = btrfs_drop_leaf_ref(trans, root, leaf); | ||
4587 | BUG_ON(ret); | ||
4588 | free_extent_buffer(leaf); | ||
4589 | } | ||
4590 | atomic_inc(&root->fs_info->throttle_gen); | ||
4591 | wake_up(&root->fs_info->transaction_throttle); | ||
4592 | cond_resched(); | ||
4593 | } | ||
4594 | |||
4595 | /* | ||
4596 | * run through the loop again to free the refs on the leaves. | ||
4597 | * This is faster than doing it in the loop above because | ||
4598 | * the leaves are likely to be clustered together. We end up | ||
4599 | * working in nice chunks on the extent allocation tree. | ||
4600 | */ | ||
4601 | for (i = 0; i < refi; i++) { | ||
4602 | bytenr = sorted[i].bytenr; | ||
4603 | ret = btrfs_free_extent(trans, root, bytenr, | ||
4604 | blocksize, eb->start, | ||
4605 | root_owner, root_gen, 0, 1); | ||
4606 | BUG_ON(ret); | 5024 | BUG_ON(ret); |
5025 | BUG_ON(refs == 0); | ||
4607 | 5026 | ||
4608 | atomic_inc(&root->fs_info->throttle_gen); | 5027 | if (wc->stage == DROP_REFERENCE) { |
4609 | wake_up(&root->fs_info->transaction_throttle); | 5028 | if (refs == 1) |
4610 | cond_resched(); | 5029 | goto reada; |
4611 | } | ||
4612 | out: | ||
4613 | kfree(sorted); | ||
4614 | |||
4615 | /* | ||
4616 | * update the path to show we've processed the entire level 1 | ||
4617 | * node. This will get saved into the root's drop_snapshot_progress | ||
4618 | * field so these drops are not repeated again if this transaction | ||
4619 | * commits. | ||
4620 | */ | ||
4621 | path->slots[1] = nritems; | ||
4622 | return 0; | ||
4623 | } | ||
4624 | |||
4625 | /* | ||
4626 | * helper function for drop_snapshot, this walks down the tree dropping ref | ||
4627 | * counts as it goes. | ||
4628 | */ | ||
4629 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | ||
4630 | struct btrfs_root *root, | ||
4631 | struct btrfs_path *path, int *level) | ||
4632 | { | ||
4633 | u64 root_owner; | ||
4634 | u64 root_gen; | ||
4635 | u64 bytenr; | ||
4636 | u64 ptr_gen; | ||
4637 | struct extent_buffer *next; | ||
4638 | struct extent_buffer *cur; | ||
4639 | struct extent_buffer *parent; | ||
4640 | u32 blocksize; | ||
4641 | int ret; | ||
4642 | u32 refs; | ||
4643 | |||
4644 | WARN_ON(*level < 0); | ||
4645 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
4646 | ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, | ||
4647 | path->nodes[*level]->len, &refs); | ||
4648 | BUG_ON(ret); | ||
4649 | if (refs > 1) | ||
4650 | goto out; | ||
4651 | |||
4652 | /* | ||
4653 | * walk down to the last node level and free all the leaves | ||
4654 | */ | ||
4655 | while (*level >= 0) { | ||
4656 | WARN_ON(*level < 0); | ||
4657 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
4658 | cur = path->nodes[*level]; | ||
4659 | |||
4660 | if (btrfs_header_level(cur) != *level) | ||
4661 | WARN_ON(1); | ||
4662 | |||
4663 | if (path->slots[*level] >= | ||
4664 | btrfs_header_nritems(cur)) | ||
4665 | break; | ||
4666 | 5030 | ||
4667 | /* the new code goes down to level 1 and does all the | 5031 | if (wc->level == 1 && |
4668 | * leaves pointed to that node in bulk. So, this check | 5032 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) |
4669 | * for level 0 will always be false. | 5033 | continue; |
4670 | * | 5034 | if (!wc->update_ref || |
4671 | * But, the disk format allows the drop_snapshot_progress | 5035 | generation <= root->root_key.offset) |
4672 | * field in the root to leave things in a state where | 5036 | continue; |
4673 | * a leaf will need cleaning up here. If someone crashes | 5037 | btrfs_node_key_to_cpu(eb, &key, slot); |
4674 | * with the old code and then boots with the new code, | 5038 | ret = btrfs_comp_cpu_keys(&key, |
4675 | * we might find a leaf here. | 5039 | &wc->update_progress); |
4676 | */ | 5040 | if (ret < 0) |
4677 | if (*level == 0) { | 5041 | continue; |
4678 | ret = btrfs_drop_leaf_ref(trans, root, cur); | 5042 | } else { |
4679 | BUG_ON(ret); | 5043 | if (wc->level == 1 && |
4680 | break; | 5044 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) |
5045 | continue; | ||
4681 | } | 5046 | } |
4682 | 5047 | reada: | |
4683 | /* | 5048 | ret = readahead_tree_block(root, bytenr, blocksize, |
4684 | * once we get to level one, process the whole node | 5049 | generation); |
4685 | * at once, including everything below it. | 5050 | if (ret) |
4686 | */ | ||
4687 | if (*level == 1) { | ||
4688 | ret = drop_level_one_refs(trans, root, path); | ||
4689 | BUG_ON(ret); | ||
4690 | break; | 5051 | break; |
4691 | } | 5052 | last = bytenr + blocksize; |
4692 | 5053 | nread++; | |
4693 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | ||
4694 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | ||
4695 | blocksize = btrfs_level_size(root, *level - 1); | ||
4696 | |||
4697 | ret = drop_snap_lookup_refcount(trans, root, bytenr, | ||
4698 | blocksize, &refs); | ||
4699 | BUG_ON(ret); | ||
4700 | |||
4701 | /* | ||
4702 | * if there is more than one reference, we don't need | ||
4703 | * to read that node to drop any references it has. We | ||
4704 | * just drop the ref we hold on that node and move on to the | ||
4705 | * next slot in this level. | ||
4706 | */ | ||
4707 | if (refs != 1) { | ||
4708 | parent = path->nodes[*level]; | ||
4709 | root_owner = btrfs_header_owner(parent); | ||
4710 | root_gen = btrfs_header_generation(parent); | ||
4711 | path->slots[*level]++; | ||
4712 | |||
4713 | ret = btrfs_free_extent(trans, root, bytenr, | ||
4714 | blocksize, parent->start, | ||
4715 | root_owner, root_gen, | ||
4716 | *level - 1, 1); | ||
4717 | BUG_ON(ret); | ||
4718 | |||
4719 | atomic_inc(&root->fs_info->throttle_gen); | ||
4720 | wake_up(&root->fs_info->transaction_throttle); | ||
4721 | cond_resched(); | ||
4722 | |||
4723 | continue; | ||
4724 | } | ||
4725 | |||
4726 | /* | ||
4727 | * we need to keep freeing things in the next level down. | ||
4728 | * read the block and loop around to process it | ||
4729 | */ | ||
4730 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | ||
4731 | WARN_ON(*level <= 0); | ||
4732 | if (path->nodes[*level-1]) | ||
4733 | free_extent_buffer(path->nodes[*level-1]); | ||
4734 | path->nodes[*level-1] = next; | ||
4735 | *level = btrfs_header_level(next); | ||
4736 | path->slots[*level] = 0; | ||
4737 | cond_resched(); | ||
4738 | } | 5054 | } |
4739 | out: | 5055 | wc->reada_slot = slot; |
4740 | WARN_ON(*level < 0); | ||
4741 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
4742 | |||
4743 | if (path->nodes[*level] == root->node) { | ||
4744 | parent = path->nodes[*level]; | ||
4745 | bytenr = path->nodes[*level]->start; | ||
4746 | } else { | ||
4747 | parent = path->nodes[*level + 1]; | ||
4748 | bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); | ||
4749 | } | ||
4750 | |||
4751 | blocksize = btrfs_level_size(root, *level); | ||
4752 | root_owner = btrfs_header_owner(parent); | ||
4753 | root_gen = btrfs_header_generation(parent); | ||
4754 | |||
4755 | /* | ||
4756 | * cleanup and free the reference on the last node | ||
4757 | * we processed | ||
4758 | */ | ||
4759 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, | ||
4760 | parent->start, root_owner, root_gen, | ||
4761 | *level, 1); | ||
4762 | free_extent_buffer(path->nodes[*level]); | ||
4763 | path->nodes[*level] = NULL; | ||
4764 | |||
4765 | *level += 1; | ||
4766 | BUG_ON(ret); | ||
4767 | |||
4768 | cond_resched(); | ||
4769 | return 0; | ||
4770 | } | 5056 | } |
4771 | #endif | ||
4772 | |||
4773 | struct walk_control { | ||
4774 | u64 refs[BTRFS_MAX_LEVEL]; | ||
4775 | u64 flags[BTRFS_MAX_LEVEL]; | ||
4776 | struct btrfs_key update_progress; | ||
4777 | int stage; | ||
4778 | int level; | ||
4779 | int shared_level; | ||
4780 | int update_ref; | ||
4781 | int keep_locks; | ||
4782 | }; | ||
4783 | |||
4784 | #define DROP_REFERENCE 1 | ||
4785 | #define UPDATE_BACKREF 2 | ||
4786 | 5057 | ||
4787 | /* | 5058 | /* |
4788 | * hepler to process tree block while walking down the tree. | 5059 | * hepler to process tree block while walking down the tree. |
4789 | * | 5060 | * |
4790 | * when wc->stage == DROP_REFERENCE, this function checks | ||
4791 | * reference count of the block. if the block is shared and | ||
4792 | * we need update back refs for the subtree rooted at the | ||
4793 | * block, this function changes wc->stage to UPDATE_BACKREF | ||
4794 | * | ||
4795 | * when wc->stage == UPDATE_BACKREF, this function updates | 5061 | * when wc->stage == UPDATE_BACKREF, this function updates |
4796 | * back refs for pointers in the block. | 5062 | * back refs for pointers in the block. |
4797 | * | 5063 | * |
@@ -4800,11 +5066,10 @@ struct walk_control { | |||
4800 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | 5066 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, |
4801 | struct btrfs_root *root, | 5067 | struct btrfs_root *root, |
4802 | struct btrfs_path *path, | 5068 | struct btrfs_path *path, |
4803 | struct walk_control *wc) | 5069 | struct walk_control *wc, int lookup_info) |
4804 | { | 5070 | { |
4805 | int level = wc->level; | 5071 | int level = wc->level; |
4806 | struct extent_buffer *eb = path->nodes[level]; | 5072 | struct extent_buffer *eb = path->nodes[level]; |
4807 | struct btrfs_key key; | ||
4808 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; | 5073 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; |
4809 | int ret; | 5074 | int ret; |
4810 | 5075 | ||
@@ -4816,8 +5081,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4816 | * when reference count of tree block is 1, it won't increase | 5081 | * when reference count of tree block is 1, it won't increase |
4817 | * again. once full backref flag is set, we never clear it. | 5082 | * again. once full backref flag is set, we never clear it. |
4818 | */ | 5083 | */ |
4819 | if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || | 5084 | if (lookup_info && |
4820 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { | 5085 | ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || |
5086 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { | ||
4821 | BUG_ON(!path->locks[level]); | 5087 | BUG_ON(!path->locks[level]); |
4822 | ret = btrfs_lookup_extent_info(trans, root, | 5088 | ret = btrfs_lookup_extent_info(trans, root, |
4823 | eb->start, eb->len, | 5089 | eb->start, eb->len, |
@@ -4827,21 +5093,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4827 | BUG_ON(wc->refs[level] == 0); | 5093 | BUG_ON(wc->refs[level] == 0); |
4828 | } | 5094 | } |
4829 | 5095 | ||
4830 | if (wc->stage == DROP_REFERENCE && | ||
4831 | wc->update_ref && wc->refs[level] > 1) { | ||
4832 | BUG_ON(eb == root->node); | ||
4833 | BUG_ON(path->slots[level] > 0); | ||
4834 | if (level == 0) | ||
4835 | btrfs_item_key_to_cpu(eb, &key, path->slots[level]); | ||
4836 | else | ||
4837 | btrfs_node_key_to_cpu(eb, &key, path->slots[level]); | ||
4838 | if (btrfs_header_owner(eb) == root->root_key.objectid && | ||
4839 | btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { | ||
4840 | wc->stage = UPDATE_BACKREF; | ||
4841 | wc->shared_level = level; | ||
4842 | } | ||
4843 | } | ||
4844 | |||
4845 | if (wc->stage == DROP_REFERENCE) { | 5096 | if (wc->stage == DROP_REFERENCE) { |
4846 | if (wc->refs[level] > 1) | 5097 | if (wc->refs[level] > 1) |
4847 | return 1; | 5098 | return 1; |
@@ -4878,6 +5129,136 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4878 | } | 5129 | } |
4879 | 5130 | ||
4880 | /* | 5131 | /* |
5132 | * hepler to process tree block pointer. | ||
5133 | * | ||
5134 | * when wc->stage == DROP_REFERENCE, this function checks | ||
5135 | * reference count of the block pointed to. if the block | ||
5136 | * is shared and we need update back refs for the subtree | ||
5137 | * rooted at the block, this function changes wc->stage to | ||
5138 | * UPDATE_BACKREF. if the block is shared and there is no | ||
5139 | * need to update back, this function drops the reference | ||
5140 | * to the block. | ||
5141 | * | ||
5142 | * NOTE: return value 1 means we should stop walking down. | ||
5143 | */ | ||
5144 | static noinline int do_walk_down(struct btrfs_trans_handle *trans, | ||
5145 | struct btrfs_root *root, | ||
5146 | struct btrfs_path *path, | ||
5147 | struct walk_control *wc, int *lookup_info) | ||
5148 | { | ||
5149 | u64 bytenr; | ||
5150 | u64 generation; | ||
5151 | u64 parent; | ||
5152 | u32 blocksize; | ||
5153 | struct btrfs_key key; | ||
5154 | struct extent_buffer *next; | ||
5155 | int level = wc->level; | ||
5156 | int reada = 0; | ||
5157 | int ret = 0; | ||
5158 | |||
5159 | generation = btrfs_node_ptr_generation(path->nodes[level], | ||
5160 | path->slots[level]); | ||
5161 | /* | ||
5162 | * if the lower level block was created before the snapshot | ||
5163 | * was created, we know there is no need to update back refs | ||
5164 | * for the subtree | ||
5165 | */ | ||
5166 | if (wc->stage == UPDATE_BACKREF && | ||
5167 | generation <= root->root_key.offset) { | ||
5168 | *lookup_info = 1; | ||
5169 | return 1; | ||
5170 | } | ||
5171 | |||
5172 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | ||
5173 | blocksize = btrfs_level_size(root, level - 1); | ||
5174 | |||
5175 | next = btrfs_find_tree_block(root, bytenr, blocksize); | ||
5176 | if (!next) { | ||
5177 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
5178 | reada = 1; | ||
5179 | } | ||
5180 | btrfs_tree_lock(next); | ||
5181 | btrfs_set_lock_blocking(next); | ||
5182 | |||
5183 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | ||
5184 | &wc->refs[level - 1], | ||
5185 | &wc->flags[level - 1]); | ||
5186 | BUG_ON(ret); | ||
5187 | BUG_ON(wc->refs[level - 1] == 0); | ||
5188 | *lookup_info = 0; | ||
5189 | |||
5190 | if (wc->stage == DROP_REFERENCE) { | ||
5191 | if (wc->refs[level - 1] > 1) { | ||
5192 | if (level == 1 && | ||
5193 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5194 | goto skip; | ||
5195 | |||
5196 | if (!wc->update_ref || | ||
5197 | generation <= root->root_key.offset) | ||
5198 | goto skip; | ||
5199 | |||
5200 | btrfs_node_key_to_cpu(path->nodes[level], &key, | ||
5201 | path->slots[level]); | ||
5202 | ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); | ||
5203 | if (ret < 0) | ||
5204 | goto skip; | ||
5205 | |||
5206 | wc->stage = UPDATE_BACKREF; | ||
5207 | wc->shared_level = level - 1; | ||
5208 | } | ||
5209 | } else { | ||
5210 | if (level == 1 && | ||
5211 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5212 | goto skip; | ||
5213 | } | ||
5214 | |||
5215 | if (!btrfs_buffer_uptodate(next, generation)) { | ||
5216 | btrfs_tree_unlock(next); | ||
5217 | free_extent_buffer(next); | ||
5218 | next = NULL; | ||
5219 | *lookup_info = 1; | ||
5220 | } | ||
5221 | |||
5222 | if (!next) { | ||
5223 | if (reada && level == 1) | ||
5224 | reada_walk_down(trans, root, wc, path); | ||
5225 | next = read_tree_block(root, bytenr, blocksize, generation); | ||
5226 | btrfs_tree_lock(next); | ||
5227 | btrfs_set_lock_blocking(next); | ||
5228 | } | ||
5229 | |||
5230 | level--; | ||
5231 | BUG_ON(level != btrfs_header_level(next)); | ||
5232 | path->nodes[level] = next; | ||
5233 | path->slots[level] = 0; | ||
5234 | path->locks[level] = 1; | ||
5235 | wc->level = level; | ||
5236 | if (wc->level == 1) | ||
5237 | wc->reada_slot = 0; | ||
5238 | return 0; | ||
5239 | skip: | ||
5240 | wc->refs[level - 1] = 0; | ||
5241 | wc->flags[level - 1] = 0; | ||
5242 | if (wc->stage == DROP_REFERENCE) { | ||
5243 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | ||
5244 | parent = path->nodes[level]->start; | ||
5245 | } else { | ||
5246 | BUG_ON(root->root_key.objectid != | ||
5247 | btrfs_header_owner(path->nodes[level])); | ||
5248 | parent = 0; | ||
5249 | } | ||
5250 | |||
5251 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, | ||
5252 | root->root_key.objectid, level - 1, 0); | ||
5253 | BUG_ON(ret); | ||
5254 | } | ||
5255 | btrfs_tree_unlock(next); | ||
5256 | free_extent_buffer(next); | ||
5257 | *lookup_info = 1; | ||
5258 | return 1; | ||
5259 | } | ||
5260 | |||
5261 | /* | ||
4881 | * hepler to process tree block while walking up the tree. | 5262 | * hepler to process tree block while walking up the tree. |
4882 | * | 5263 | * |
4883 | * when wc->stage == DROP_REFERENCE, this function drops | 5264 | * when wc->stage == DROP_REFERENCE, this function drops |
@@ -4904,7 +5285,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
4904 | if (level < wc->shared_level) | 5285 | if (level < wc->shared_level) |
4905 | goto out; | 5286 | goto out; |
4906 | 5287 | ||
4907 | BUG_ON(wc->refs[level] <= 1); | ||
4908 | ret = find_next_key(path, level + 1, &wc->update_progress); | 5288 | ret = find_next_key(path, level + 1, &wc->update_progress); |
4909 | if (ret > 0) | 5289 | if (ret > 0) |
4910 | wc->update_ref = 0; | 5290 | wc->update_ref = 0; |
@@ -4935,8 +5315,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
4935 | path->locks[level] = 0; | 5315 | path->locks[level] = 0; |
4936 | return 1; | 5316 | return 1; |
4937 | } | 5317 | } |
4938 | } else { | ||
4939 | BUG_ON(level != 0); | ||
4940 | } | 5318 | } |
4941 | } | 5319 | } |
4942 | 5320 | ||
@@ -4989,39 +5367,28 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4989 | struct btrfs_path *path, | 5367 | struct btrfs_path *path, |
4990 | struct walk_control *wc) | 5368 | struct walk_control *wc) |
4991 | { | 5369 | { |
4992 | struct extent_buffer *next; | ||
4993 | struct extent_buffer *cur; | ||
4994 | u64 bytenr; | ||
4995 | u64 ptr_gen; | ||
4996 | u32 blocksize; | ||
4997 | int level = wc->level; | 5370 | int level = wc->level; |
5371 | int lookup_info = 1; | ||
4998 | int ret; | 5372 | int ret; |
4999 | 5373 | ||
5000 | while (level >= 0) { | 5374 | while (level >= 0) { |
5001 | cur = path->nodes[level]; | 5375 | if (path->slots[level] >= |
5002 | BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); | 5376 | btrfs_header_nritems(path->nodes[level])) |
5377 | break; | ||
5003 | 5378 | ||
5004 | ret = walk_down_proc(trans, root, path, wc); | 5379 | ret = walk_down_proc(trans, root, path, wc, lookup_info); |
5005 | if (ret > 0) | 5380 | if (ret > 0) |
5006 | break; | 5381 | break; |
5007 | 5382 | ||
5008 | if (level == 0) | 5383 | if (level == 0) |
5009 | break; | 5384 | break; |
5010 | 5385 | ||
5011 | bytenr = btrfs_node_blockptr(cur, path->slots[level]); | 5386 | ret = do_walk_down(trans, root, path, wc, &lookup_info); |
5012 | blocksize = btrfs_level_size(root, level - 1); | 5387 | if (ret > 0) { |
5013 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); | 5388 | path->slots[level]++; |
5014 | 5389 | continue; | |
5015 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | 5390 | } |
5016 | btrfs_tree_lock(next); | 5391 | level = wc->level; |
5017 | btrfs_set_lock_blocking(next); | ||
5018 | |||
5019 | level--; | ||
5020 | BUG_ON(level != btrfs_header_level(next)); | ||
5021 | path->nodes[level] = next; | ||
5022 | path->slots[level] = 0; | ||
5023 | path->locks[level] = 1; | ||
5024 | wc->level = level; | ||
5025 | } | 5392 | } |
5026 | return 0; | 5393 | return 0; |
5027 | } | 5394 | } |
@@ -5111,9 +5478,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5111 | err = ret; | 5478 | err = ret; |
5112 | goto out; | 5479 | goto out; |
5113 | } | 5480 | } |
5114 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 5481 | WARN_ON(ret > 0); |
5115 | path->slots[level]); | ||
5116 | WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); | ||
5117 | 5482 | ||
5118 | /* | 5483 | /* |
5119 | * unlock our path, this is safe because only this | 5484 | * unlock our path, this is safe because only this |
@@ -5148,6 +5513,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5148 | wc->stage = DROP_REFERENCE; | 5513 | wc->stage = DROP_REFERENCE; |
5149 | wc->update_ref = update_ref; | 5514 | wc->update_ref = update_ref; |
5150 | wc->keep_locks = 0; | 5515 | wc->keep_locks = 0; |
5516 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
5151 | 5517 | ||
5152 | while (1) { | 5518 | while (1) { |
5153 | ret = walk_down_tree(trans, root, path, wc); | 5519 | ret = walk_down_tree(trans, root, path, wc); |
@@ -5200,9 +5566,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5200 | ret = btrfs_del_root(trans, tree_root, &root->root_key); | 5566 | ret = btrfs_del_root(trans, tree_root, &root->root_key); |
5201 | BUG_ON(ret); | 5567 | BUG_ON(ret); |
5202 | 5568 | ||
5203 | free_extent_buffer(root->node); | 5569 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
5204 | free_extent_buffer(root->commit_root); | 5570 | ret = btrfs_find_last_root(tree_root, root->root_key.objectid, |
5205 | kfree(root); | 5571 | NULL, NULL); |
5572 | BUG_ON(ret < 0); | ||
5573 | if (ret > 0) { | ||
5574 | ret = btrfs_del_orphan_item(trans, tree_root, | ||
5575 | root->root_key.objectid); | ||
5576 | BUG_ON(ret); | ||
5577 | } | ||
5578 | } | ||
5579 | |||
5580 | if (root->in_radix) { | ||
5581 | btrfs_free_fs_root(tree_root->fs_info, root); | ||
5582 | } else { | ||
5583 | free_extent_buffer(root->node); | ||
5584 | free_extent_buffer(root->commit_root); | ||
5585 | kfree(root); | ||
5586 | } | ||
5206 | out: | 5587 | out: |
5207 | btrfs_end_transaction(trans, tree_root); | 5588 | btrfs_end_transaction(trans, tree_root); |
5208 | kfree(wc); | 5589 | kfree(wc); |
@@ -5254,6 +5635,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
5254 | wc->stage = DROP_REFERENCE; | 5635 | wc->stage = DROP_REFERENCE; |
5255 | wc->update_ref = 0; | 5636 | wc->update_ref = 0; |
5256 | wc->keep_locks = 1; | 5637 | wc->keep_locks = 1; |
5638 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
5257 | 5639 | ||
5258 | while (1) { | 5640 | while (1) { |
5259 | wret = walk_down_tree(trans, root, path, wc); | 5641 | wret = walk_down_tree(trans, root, path, wc); |
@@ -5396,9 +5778,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode, | |||
5396 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); | 5778 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); |
5397 | while (1) { | 5779 | while (1) { |
5398 | int ret; | 5780 | int ret; |
5399 | spin_lock(&em_tree->lock); | 5781 | write_lock(&em_tree->lock); |
5400 | ret = add_extent_mapping(em_tree, em); | 5782 | ret = add_extent_mapping(em_tree, em); |
5401 | spin_unlock(&em_tree->lock); | 5783 | write_unlock(&em_tree->lock); |
5402 | if (ret != -EEXIST) { | 5784 | if (ret != -EEXIST) { |
5403 | free_extent_map(em); | 5785 | free_extent_map(em); |
5404 | break; | 5786 | break; |
@@ -6841,287 +7223,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | |||
6841 | return 0; | 7223 | return 0; |
6842 | } | 7224 | } |
6843 | 7225 | ||
6844 | #if 0 | 7226 | /* |
6845 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | 7227 | * checks to see if its even possible to relocate this block group. |
6846 | struct btrfs_root *root, | 7228 | * |
6847 | u64 objectid, u64 size) | 7229 | * @return - -1 if it's not a good idea to relocate this block group, 0 if its |
6848 | { | 7230 | * ok to go ahead and try. |
6849 | struct btrfs_path *path; | 7231 | */ |
6850 | struct btrfs_inode_item *item; | 7232 | int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) |
6851 | struct extent_buffer *leaf; | ||
6852 | int ret; | ||
6853 | |||
6854 | path = btrfs_alloc_path(); | ||
6855 | if (!path) | ||
6856 | return -ENOMEM; | ||
6857 | |||
6858 | path->leave_spinning = 1; | ||
6859 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | ||
6860 | if (ret) | ||
6861 | goto out; | ||
6862 | |||
6863 | leaf = path->nodes[0]; | ||
6864 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | ||
6865 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | ||
6866 | btrfs_set_inode_generation(leaf, item, 1); | ||
6867 | btrfs_set_inode_size(leaf, item, size); | ||
6868 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | ||
6869 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | ||
6870 | btrfs_mark_buffer_dirty(leaf); | ||
6871 | btrfs_release_path(root, path); | ||
6872 | out: | ||
6873 | btrfs_free_path(path); | ||
6874 | return ret; | ||
6875 | } | ||
6876 | |||
6877 | static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | ||
6878 | struct btrfs_block_group_cache *group) | ||
6879 | { | 7233 | { |
6880 | struct inode *inode = NULL; | 7234 | struct btrfs_block_group_cache *block_group; |
6881 | struct btrfs_trans_handle *trans; | 7235 | struct btrfs_space_info *space_info; |
6882 | struct btrfs_root *root; | 7236 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
6883 | struct btrfs_key root_key; | 7237 | struct btrfs_device *device; |
6884 | u64 objectid = BTRFS_FIRST_FREE_OBJECTID; | 7238 | int full = 0; |
6885 | int err = 0; | 7239 | int ret = 0; |
6886 | 7240 | ||
6887 | root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; | 7241 | block_group = btrfs_lookup_block_group(root->fs_info, bytenr); |
6888 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
6889 | root_key.offset = (u64)-1; | ||
6890 | root = btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
6891 | if (IS_ERR(root)) | ||
6892 | return ERR_CAST(root); | ||
6893 | 7242 | ||
6894 | trans = btrfs_start_transaction(root, 1); | 7243 | /* odd, couldn't find the block group, leave it alone */ |
6895 | BUG_ON(!trans); | 7244 | if (!block_group) |
7245 | return -1; | ||
6896 | 7246 | ||
6897 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | 7247 | /* no bytes used, we're good */ |
6898 | if (err) | 7248 | if (!btrfs_block_group_used(&block_group->item)) |
6899 | goto out; | 7249 | goto out; |
6900 | 7250 | ||
6901 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | 7251 | space_info = block_group->space_info; |
6902 | BUG_ON(err); | 7252 | spin_lock(&space_info->lock); |
6903 | |||
6904 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
6905 | group->key.offset, 0, group->key.offset, | ||
6906 | 0, 0, 0); | ||
6907 | BUG_ON(err); | ||
6908 | |||
6909 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); | ||
6910 | if (inode->i_state & I_NEW) { | ||
6911 | BTRFS_I(inode)->root = root; | ||
6912 | BTRFS_I(inode)->location.objectid = objectid; | ||
6913 | BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; | ||
6914 | BTRFS_I(inode)->location.offset = 0; | ||
6915 | btrfs_read_locked_inode(inode); | ||
6916 | unlock_new_inode(inode); | ||
6917 | BUG_ON(is_bad_inode(inode)); | ||
6918 | } else { | ||
6919 | BUG_ON(1); | ||
6920 | } | ||
6921 | BTRFS_I(inode)->index_cnt = group->key.objectid; | ||
6922 | |||
6923 | err = btrfs_orphan_add(trans, inode); | ||
6924 | out: | ||
6925 | btrfs_end_transaction(trans, root); | ||
6926 | if (err) { | ||
6927 | if (inode) | ||
6928 | iput(inode); | ||
6929 | inode = ERR_PTR(err); | ||
6930 | } | ||
6931 | return inode; | ||
6932 | } | ||
6933 | |||
6934 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | ||
6935 | { | ||
6936 | |||
6937 | struct btrfs_ordered_sum *sums; | ||
6938 | struct btrfs_sector_sum *sector_sum; | ||
6939 | struct btrfs_ordered_extent *ordered; | ||
6940 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
6941 | struct list_head list; | ||
6942 | size_t offset; | ||
6943 | int ret; | ||
6944 | u64 disk_bytenr; | ||
6945 | |||
6946 | INIT_LIST_HEAD(&list); | ||
6947 | |||
6948 | ordered = btrfs_lookup_ordered_extent(inode, file_pos); | ||
6949 | BUG_ON(ordered->file_offset != file_pos || ordered->len != len); | ||
6950 | |||
6951 | disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; | ||
6952 | ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, | ||
6953 | disk_bytenr + len - 1, &list); | ||
6954 | |||
6955 | while (!list_empty(&list)) { | ||
6956 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); | ||
6957 | list_del_init(&sums->list); | ||
6958 | |||
6959 | sector_sum = sums->sums; | ||
6960 | sums->bytenr = ordered->start; | ||
6961 | 7253 | ||
6962 | offset = 0; | 7254 | full = space_info->full; |
6963 | while (offset < sums->len) { | ||
6964 | sector_sum->bytenr += ordered->start - disk_bytenr; | ||
6965 | sector_sum++; | ||
6966 | offset += root->sectorsize; | ||
6967 | } | ||
6968 | 7255 | ||
6969 | btrfs_add_ordered_sum(inode, ordered, sums); | 7256 | /* |
7257 | * if this is the last block group we have in this space, we can't | ||
7258 | * relocate it unless we're able to allocate a new chunk below. | ||
7259 | * | ||
7260 | * Otherwise, we need to make sure we have room in the space to handle | ||
7261 | * all of the extents from this block group. If we can, we're good | ||
7262 | */ | ||
7263 | if ((space_info->total_bytes != block_group->key.offset) && | ||
7264 | (space_info->bytes_used + space_info->bytes_reserved + | ||
7265 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
7266 | btrfs_block_group_used(&block_group->item) < | ||
7267 | space_info->total_bytes)) { | ||
7268 | spin_unlock(&space_info->lock); | ||
7269 | goto out; | ||
6970 | } | 7270 | } |
6971 | btrfs_put_ordered_extent(ordered); | 7271 | spin_unlock(&space_info->lock); |
6972 | return 0; | ||
6973 | } | ||
6974 | |||
6975 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) | ||
6976 | { | ||
6977 | struct btrfs_trans_handle *trans; | ||
6978 | struct btrfs_path *path; | ||
6979 | struct btrfs_fs_info *info = root->fs_info; | ||
6980 | struct extent_buffer *leaf; | ||
6981 | struct inode *reloc_inode; | ||
6982 | struct btrfs_block_group_cache *block_group; | ||
6983 | struct btrfs_key key; | ||
6984 | u64 skipped; | ||
6985 | u64 cur_byte; | ||
6986 | u64 total_found; | ||
6987 | u32 nritems; | ||
6988 | int ret; | ||
6989 | int progress; | ||
6990 | int pass = 0; | ||
6991 | |||
6992 | root = root->fs_info->extent_root; | ||
6993 | |||
6994 | block_group = btrfs_lookup_block_group(info, group_start); | ||
6995 | BUG_ON(!block_group); | ||
6996 | |||
6997 | printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", | ||
6998 | (unsigned long long)block_group->key.objectid, | ||
6999 | (unsigned long long)block_group->flags); | ||
7000 | |||
7001 | path = btrfs_alloc_path(); | ||
7002 | BUG_ON(!path); | ||
7003 | |||
7004 | reloc_inode = create_reloc_inode(info, block_group); | ||
7005 | BUG_ON(IS_ERR(reloc_inode)); | ||
7006 | |||
7007 | __alloc_chunk_for_shrink(root, block_group, 1); | ||
7008 | set_block_group_readonly(block_group); | ||
7009 | |||
7010 | btrfs_start_delalloc_inodes(info->tree_root); | ||
7011 | btrfs_wait_ordered_extents(info->tree_root, 0); | ||
7012 | again: | ||
7013 | skipped = 0; | ||
7014 | total_found = 0; | ||
7015 | progress = 0; | ||
7016 | key.objectid = block_group->key.objectid; | ||
7017 | key.offset = 0; | ||
7018 | key.type = 0; | ||
7019 | cur_byte = key.objectid; | ||
7020 | |||
7021 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
7022 | btrfs_commit_transaction(trans, info->tree_root); | ||
7023 | 7272 | ||
7024 | mutex_lock(&root->fs_info->cleaner_mutex); | 7273 | /* |
7025 | btrfs_clean_old_snapshots(info->tree_root); | 7274 | * ok we don't have enough space, but maybe we have free space on our |
7026 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); | 7275 | * devices to allocate new chunks for relocation, so loop through our |
7027 | mutex_unlock(&root->fs_info->cleaner_mutex); | 7276 | * alloc devices and guess if we have enough space. However, if we |
7277 | * were marked as full, then we know there aren't enough chunks, and we | ||
7278 | * can just return. | ||
7279 | */ | ||
7280 | ret = -1; | ||
7281 | if (full) | ||
7282 | goto out; | ||
7028 | 7283 | ||
7029 | trans = btrfs_start_transaction(info->tree_root, 1); | 7284 | mutex_lock(&root->fs_info->chunk_mutex); |
7030 | btrfs_commit_transaction(trans, info->tree_root); | 7285 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
7286 | u64 min_free = btrfs_block_group_used(&block_group->item); | ||
7287 | u64 dev_offset, max_avail; | ||
7031 | 7288 | ||
7032 | while (1) { | 7289 | /* |
7033 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 7290 | * check to make sure we can actually find a chunk with enough |
7034 | if (ret < 0) | 7291 | * space to fit our block group in. |
7035 | goto out; | 7292 | */ |
7036 | next: | 7293 | if (device->total_bytes > device->bytes_used + min_free) { |
7037 | leaf = path->nodes[0]; | 7294 | ret = find_free_dev_extent(NULL, device, min_free, |
7038 | nritems = btrfs_header_nritems(leaf); | 7295 | &dev_offset, &max_avail); |
7039 | if (path->slots[0] >= nritems) { | 7296 | if (!ret) |
7040 | ret = btrfs_next_leaf(root, path); | ||
7041 | if (ret < 0) | ||
7042 | goto out; | ||
7043 | if (ret == 1) { | ||
7044 | ret = 0; | ||
7045 | break; | 7297 | break; |
7046 | } | 7298 | ret = -1; |
7047 | leaf = path->nodes[0]; | ||
7048 | nritems = btrfs_header_nritems(leaf); | ||
7049 | } | 7299 | } |
7050 | |||
7051 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
7052 | |||
7053 | if (key.objectid >= block_group->key.objectid + | ||
7054 | block_group->key.offset) | ||
7055 | break; | ||
7056 | |||
7057 | if (progress && need_resched()) { | ||
7058 | btrfs_release_path(root, path); | ||
7059 | cond_resched(); | ||
7060 | progress = 0; | ||
7061 | continue; | ||
7062 | } | ||
7063 | progress = 1; | ||
7064 | |||
7065 | if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || | ||
7066 | key.objectid + key.offset <= cur_byte) { | ||
7067 | path->slots[0]++; | ||
7068 | goto next; | ||
7069 | } | ||
7070 | |||
7071 | total_found++; | ||
7072 | cur_byte = key.objectid + key.offset; | ||
7073 | btrfs_release_path(root, path); | ||
7074 | |||
7075 | __alloc_chunk_for_shrink(root, block_group, 0); | ||
7076 | ret = relocate_one_extent(root, path, &key, block_group, | ||
7077 | reloc_inode, pass); | ||
7078 | BUG_ON(ret < 0); | ||
7079 | if (ret > 0) | ||
7080 | skipped++; | ||
7081 | |||
7082 | key.objectid = cur_byte; | ||
7083 | key.type = 0; | ||
7084 | key.offset = 0; | ||
7085 | } | ||
7086 | |||
7087 | btrfs_release_path(root, path); | ||
7088 | |||
7089 | if (pass == 0) { | ||
7090 | btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); | ||
7091 | invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); | ||
7092 | } | 7300 | } |
7093 | 7301 | mutex_unlock(&root->fs_info->chunk_mutex); | |
7094 | if (total_found > 0) { | ||
7095 | printk(KERN_INFO "btrfs found %llu extents in pass %d\n", | ||
7096 | (unsigned long long)total_found, pass); | ||
7097 | pass++; | ||
7098 | if (total_found == skipped && pass > 2) { | ||
7099 | iput(reloc_inode); | ||
7100 | reloc_inode = create_reloc_inode(info, block_group); | ||
7101 | pass = 0; | ||
7102 | } | ||
7103 | goto again; | ||
7104 | } | ||
7105 | |||
7106 | /* delete reloc_inode */ | ||
7107 | iput(reloc_inode); | ||
7108 | |||
7109 | /* unpin extents in this range */ | ||
7110 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
7111 | btrfs_commit_transaction(trans, info->tree_root); | ||
7112 | |||
7113 | spin_lock(&block_group->lock); | ||
7114 | WARN_ON(block_group->pinned > 0); | ||
7115 | WARN_ON(block_group->reserved > 0); | ||
7116 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); | ||
7117 | spin_unlock(&block_group->lock); | ||
7118 | btrfs_put_block_group(block_group); | ||
7119 | ret = 0; | ||
7120 | out: | 7302 | out: |
7121 | btrfs_free_path(path); | 7303 | btrfs_put_block_group(block_group); |
7122 | return ret; | 7304 | return ret; |
7123 | } | 7305 | } |
7124 | #endif | ||
7125 | 7306 | ||
7126 | static int find_first_block_group(struct btrfs_root *root, | 7307 | static int find_first_block_group(struct btrfs_root *root, |
7127 | struct btrfs_path *path, struct btrfs_key *key) | 7308 | struct btrfs_path *path, struct btrfs_key *key) |
@@ -7164,8 +7345,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7164 | { | 7345 | { |
7165 | struct btrfs_block_group_cache *block_group; | 7346 | struct btrfs_block_group_cache *block_group; |
7166 | struct btrfs_space_info *space_info; | 7347 | struct btrfs_space_info *space_info; |
7348 | struct btrfs_caching_control *caching_ctl; | ||
7167 | struct rb_node *n; | 7349 | struct rb_node *n; |
7168 | 7350 | ||
7351 | down_write(&info->extent_commit_sem); | ||
7352 | while (!list_empty(&info->caching_block_groups)) { | ||
7353 | caching_ctl = list_entry(info->caching_block_groups.next, | ||
7354 | struct btrfs_caching_control, list); | ||
7355 | list_del(&caching_ctl->list); | ||
7356 | put_caching_control(caching_ctl); | ||
7357 | } | ||
7358 | up_write(&info->extent_commit_sem); | ||
7359 | |||
7169 | spin_lock(&info->block_group_cache_lock); | 7360 | spin_lock(&info->block_group_cache_lock); |
7170 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { | 7361 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { |
7171 | block_group = rb_entry(n, struct btrfs_block_group_cache, | 7362 | block_group = rb_entry(n, struct btrfs_block_group_cache, |
@@ -7179,8 +7370,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7179 | up_write(&block_group->space_info->groups_sem); | 7370 | up_write(&block_group->space_info->groups_sem); |
7180 | 7371 | ||
7181 | if (block_group->cached == BTRFS_CACHE_STARTED) | 7372 | if (block_group->cached == BTRFS_CACHE_STARTED) |
7182 | wait_event(block_group->caching_q, | 7373 | wait_block_group_cache_done(block_group); |
7183 | block_group_cache_done(block_group)); | ||
7184 | 7374 | ||
7185 | btrfs_remove_free_space_cache(block_group); | 7375 | btrfs_remove_free_space_cache(block_group); |
7186 | 7376 | ||
@@ -7250,7 +7440,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7250 | spin_lock_init(&cache->lock); | 7440 | spin_lock_init(&cache->lock); |
7251 | spin_lock_init(&cache->tree_lock); | 7441 | spin_lock_init(&cache->tree_lock); |
7252 | cache->fs_info = info; | 7442 | cache->fs_info = info; |
7253 | init_waitqueue_head(&cache->caching_q); | ||
7254 | INIT_LIST_HEAD(&cache->list); | 7443 | INIT_LIST_HEAD(&cache->list); |
7255 | INIT_LIST_HEAD(&cache->cluster_list); | 7444 | INIT_LIST_HEAD(&cache->cluster_list); |
7256 | 7445 | ||
@@ -7272,8 +7461,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7272 | cache->flags = btrfs_block_group_flags(&cache->item); | 7461 | cache->flags = btrfs_block_group_flags(&cache->item); |
7273 | cache->sectorsize = root->sectorsize; | 7462 | cache->sectorsize = root->sectorsize; |
7274 | 7463 | ||
7275 | remove_sb_from_cache(root, cache); | ||
7276 | |||
7277 | /* | 7464 | /* |
7278 | * check for two cases, either we are full, and therefore | 7465 | * check for two cases, either we are full, and therefore |
7279 | * don't need to bother with the caching work since we won't | 7466 | * don't need to bother with the caching work since we won't |
@@ -7282,13 +7469,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7282 | * time, particularly in the full case. | 7469 | * time, particularly in the full case. |
7283 | */ | 7470 | */ |
7284 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { | 7471 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { |
7472 | exclude_super_stripes(root, cache); | ||
7473 | cache->last_byte_to_unpin = (u64)-1; | ||
7285 | cache->cached = BTRFS_CACHE_FINISHED; | 7474 | cache->cached = BTRFS_CACHE_FINISHED; |
7475 | free_excluded_extents(root, cache); | ||
7286 | } else if (btrfs_block_group_used(&cache->item) == 0) { | 7476 | } else if (btrfs_block_group_used(&cache->item) == 0) { |
7477 | exclude_super_stripes(root, cache); | ||
7478 | cache->last_byte_to_unpin = (u64)-1; | ||
7287 | cache->cached = BTRFS_CACHE_FINISHED; | 7479 | cache->cached = BTRFS_CACHE_FINISHED; |
7288 | add_new_free_space(cache, root->fs_info, | 7480 | add_new_free_space(cache, root->fs_info, |
7289 | found_key.objectid, | 7481 | found_key.objectid, |
7290 | found_key.objectid + | 7482 | found_key.objectid + |
7291 | found_key.offset); | 7483 | found_key.offset); |
7484 | free_excluded_extents(root, cache); | ||
7292 | } | 7485 | } |
7293 | 7486 | ||
7294 | ret = update_space_info(info, cache->flags, found_key.offset, | 7487 | ret = update_space_info(info, cache->flags, found_key.offset, |
@@ -7296,6 +7489,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7296 | &space_info); | 7489 | &space_info); |
7297 | BUG_ON(ret); | 7490 | BUG_ON(ret); |
7298 | cache->space_info = space_info; | 7491 | cache->space_info = space_info; |
7492 | spin_lock(&cache->space_info->lock); | ||
7493 | cache->space_info->bytes_super += cache->bytes_super; | ||
7494 | spin_unlock(&cache->space_info->lock); | ||
7495 | |||
7299 | down_write(&space_info->groups_sem); | 7496 | down_write(&space_info->groups_sem); |
7300 | list_add_tail(&cache->list, &space_info->block_groups); | 7497 | list_add_tail(&cache->list, &space_info->block_groups); |
7301 | up_write(&space_info->groups_sem); | 7498 | up_write(&space_info->groups_sem); |
@@ -7345,7 +7542,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7345 | atomic_set(&cache->count, 1); | 7542 | atomic_set(&cache->count, 1); |
7346 | spin_lock_init(&cache->lock); | 7543 | spin_lock_init(&cache->lock); |
7347 | spin_lock_init(&cache->tree_lock); | 7544 | spin_lock_init(&cache->tree_lock); |
7348 | init_waitqueue_head(&cache->caching_q); | ||
7349 | INIT_LIST_HEAD(&cache->list); | 7545 | INIT_LIST_HEAD(&cache->list); |
7350 | INIT_LIST_HEAD(&cache->cluster_list); | 7546 | INIT_LIST_HEAD(&cache->cluster_list); |
7351 | 7547 | ||
@@ -7354,15 +7550,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7354 | cache->flags = type; | 7550 | cache->flags = type; |
7355 | btrfs_set_block_group_flags(&cache->item, type); | 7551 | btrfs_set_block_group_flags(&cache->item, type); |
7356 | 7552 | ||
7553 | cache->last_byte_to_unpin = (u64)-1; | ||
7357 | cache->cached = BTRFS_CACHE_FINISHED; | 7554 | cache->cached = BTRFS_CACHE_FINISHED; |
7358 | remove_sb_from_cache(root, cache); | 7555 | exclude_super_stripes(root, cache); |
7359 | 7556 | ||
7360 | add_new_free_space(cache, root->fs_info, chunk_offset, | 7557 | add_new_free_space(cache, root->fs_info, chunk_offset, |
7361 | chunk_offset + size); | 7558 | chunk_offset + size); |
7362 | 7559 | ||
7560 | free_excluded_extents(root, cache); | ||
7561 | |||
7363 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, | 7562 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, |
7364 | &cache->space_info); | 7563 | &cache->space_info); |
7365 | BUG_ON(ret); | 7564 | BUG_ON(ret); |
7565 | |||
7566 | spin_lock(&cache->space_info->lock); | ||
7567 | cache->space_info->bytes_super += cache->bytes_super; | ||
7568 | spin_unlock(&cache->space_info->lock); | ||
7569 | |||
7366 | down_write(&cache->space_info->groups_sem); | 7570 | down_write(&cache->space_info->groups_sem); |
7367 | list_add_tail(&cache->list, &cache->space_info->block_groups); | 7571 | list_add_tail(&cache->list, &cache->space_info->block_groups); |
7368 | up_write(&cache->space_info->groups_sem); | 7572 | up_write(&cache->space_info->groups_sem); |
@@ -7428,8 +7632,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7428 | up_write(&block_group->space_info->groups_sem); | 7632 | up_write(&block_group->space_info->groups_sem); |
7429 | 7633 | ||
7430 | if (block_group->cached == BTRFS_CACHE_STARTED) | 7634 | if (block_group->cached == BTRFS_CACHE_STARTED) |
7431 | wait_event(block_group->caching_q, | 7635 | wait_block_group_cache_done(block_group); |
7432 | block_group_cache_done(block_group)); | ||
7433 | 7636 | ||
7434 | btrfs_remove_free_space_cache(block_group); | 7637 | btrfs_remove_free_space_cache(block_group); |
7435 | 7638 | ||