diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/btrfs/extent-tree.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 3159 |
1 files changed, 1170 insertions, 1989 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32d094002a57..71cd456fdb60 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -33,11 +33,28 @@ | |||
33 | #include "locking.h" | 33 | #include "locking.h" |
34 | #include "free-space-cache.h" | 34 | #include "free-space-cache.h" |
35 | 35 | ||
36 | /* control flags for do_chunk_alloc's force field | ||
37 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk | ||
38 | * if we really need one. | ||
39 | * | ||
40 | * CHUNK_ALLOC_FORCE means it must try to allocate one | ||
41 | * | ||
42 | * CHUNK_ALLOC_LIMITED means to only try and allocate one | ||
43 | * if we have very few chunks already allocated. This is | ||
44 | * used as part of the clustering code to help make sure | ||
45 | * we have a good pool of storage to cluster in, without | ||
46 | * filling the FS with empty chunks | ||
47 | * | ||
48 | */ | ||
49 | enum { | ||
50 | CHUNK_ALLOC_NO_FORCE = 0, | ||
51 | CHUNK_ALLOC_FORCE = 1, | ||
52 | CHUNK_ALLOC_LIMITED = 2, | ||
53 | }; | ||
54 | |||
36 | static int update_block_group(struct btrfs_trans_handle *trans, | 55 | static int update_block_group(struct btrfs_trans_handle *trans, |
37 | struct btrfs_root *root, | 56 | struct btrfs_root *root, |
38 | u64 bytenr, u64 num_bytes, int alloc); | 57 | u64 bytenr, u64 num_bytes, int alloc); |
39 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
40 | u64 num_bytes, int reserve, int sinfo); | ||
41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 58 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
42 | struct btrfs_root *root, | 59 | struct btrfs_root *root, |
43 | u64 bytenr, u64 num_bytes, u64 parent, | 60 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -77,7 +94,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) | |||
77 | return (cache->flags & bits) == bits; | 94 | return (cache->flags & bits) == bits; |
78 | } | 95 | } |
79 | 96 | ||
80 | void btrfs_get_block_group(struct btrfs_block_group_cache *cache) | 97 | static void btrfs_get_block_group(struct btrfs_block_group_cache *cache) |
81 | { | 98 | { |
82 | atomic_inc(&cache->count); | 99 | atomic_inc(&cache->count); |
83 | } | 100 | } |
@@ -88,6 +105,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache) | |||
88 | WARN_ON(cache->pinned > 0); | 105 | WARN_ON(cache->pinned > 0); |
89 | WARN_ON(cache->reserved > 0); | 106 | WARN_ON(cache->reserved > 0); |
90 | WARN_ON(cache->reserved_pinned > 0); | 107 | WARN_ON(cache->reserved_pinned > 0); |
108 | kfree(cache->free_space_ctl); | ||
91 | kfree(cache); | 109 | kfree(cache); |
92 | } | 110 | } |
93 | } | 111 | } |
@@ -242,6 +260,12 @@ get_caching_control(struct btrfs_block_group_cache *cache) | |||
242 | return NULL; | 260 | return NULL; |
243 | } | 261 | } |
244 | 262 | ||
263 | /* We're loading it the fast way, so we don't have a caching_ctl. */ | ||
264 | if (!cache->caching_ctl) { | ||
265 | spin_unlock(&cache->lock); | ||
266 | return NULL; | ||
267 | } | ||
268 | |||
245 | ctl = cache->caching_ctl; | 269 | ctl = cache->caching_ctl; |
246 | atomic_inc(&ctl->count); | 270 | atomic_inc(&ctl->count); |
247 | spin_unlock(&cache->lock); | 271 | spin_unlock(&cache->lock); |
@@ -314,11 +338,6 @@ static int caching_kthread(void *data) | |||
314 | if (!path) | 338 | if (!path) |
315 | return -ENOMEM; | 339 | return -ENOMEM; |
316 | 340 | ||
317 | exclude_super_stripes(extent_root, block_group); | ||
318 | spin_lock(&block_group->space_info->lock); | ||
319 | block_group->space_info->bytes_readonly += block_group->bytes_super; | ||
320 | spin_unlock(&block_group->space_info->lock); | ||
321 | |||
322 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 341 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
323 | 342 | ||
324 | /* | 343 | /* |
@@ -329,7 +348,7 @@ static int caching_kthread(void *data) | |||
329 | */ | 348 | */ |
330 | path->skip_locking = 1; | 349 | path->skip_locking = 1; |
331 | path->search_commit_root = 1; | 350 | path->search_commit_root = 1; |
332 | path->reada = 2; | 351 | path->reada = 1; |
333 | 352 | ||
334 | key.objectid = last; | 353 | key.objectid = last; |
335 | key.offset = 0; | 354 | key.offset = 0; |
@@ -347,8 +366,7 @@ again: | |||
347 | nritems = btrfs_header_nritems(leaf); | 366 | nritems = btrfs_header_nritems(leaf); |
348 | 367 | ||
349 | while (1) { | 368 | while (1) { |
350 | smp_mb(); | 369 | if (btrfs_fs_closing(fs_info) > 1) { |
351 | if (fs_info->closing > 1) { | ||
352 | last = (u64)-1; | 370 | last = (u64)-1; |
353 | break; | 371 | break; |
354 | } | 372 | } |
@@ -360,15 +378,18 @@ again: | |||
360 | if (ret) | 378 | if (ret) |
361 | break; | 379 | break; |
362 | 380 | ||
363 | caching_ctl->progress = last; | 381 | if (need_resched() || |
364 | btrfs_release_path(extent_root, path); | 382 | btrfs_next_leaf(extent_root, path)) { |
365 | up_read(&fs_info->extent_commit_sem); | 383 | caching_ctl->progress = last; |
366 | mutex_unlock(&caching_ctl->mutex); | 384 | btrfs_release_path(path); |
367 | if (btrfs_transaction_in_commit(fs_info)) | 385 | up_read(&fs_info->extent_commit_sem); |
368 | schedule_timeout(1); | 386 | mutex_unlock(&caching_ctl->mutex); |
369 | else | ||
370 | cond_resched(); | 387 | cond_resched(); |
371 | goto again; | 388 | goto again; |
389 | } | ||
390 | leaf = path->nodes[0]; | ||
391 | nritems = btrfs_header_nritems(leaf); | ||
392 | continue; | ||
372 | } | 393 | } |
373 | 394 | ||
374 | if (key.objectid < block_group->key.objectid) { | 395 | if (key.objectid < block_group->key.objectid) { |
@@ -421,7 +442,10 @@ err: | |||
421 | return 0; | 442 | return 0; |
422 | } | 443 | } |
423 | 444 | ||
424 | static int cache_block_group(struct btrfs_block_group_cache *cache) | 445 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
446 | struct btrfs_trans_handle *trans, | ||
447 | struct btrfs_root *root, | ||
448 | int load_cache_only) | ||
425 | { | 449 | { |
426 | struct btrfs_fs_info *fs_info = cache->fs_info; | 450 | struct btrfs_fs_info *fs_info = cache->fs_info; |
427 | struct btrfs_caching_control *caching_ctl; | 451 | struct btrfs_caching_control *caching_ctl; |
@@ -432,7 +456,42 @@ static int cache_block_group(struct btrfs_block_group_cache *cache) | |||
432 | if (cache->cached != BTRFS_CACHE_NO) | 456 | if (cache->cached != BTRFS_CACHE_NO) |
433 | return 0; | 457 | return 0; |
434 | 458 | ||
435 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | 459 | /* |
460 | * We can't do the read from on-disk cache during a commit since we need | ||
461 | * to have the normal tree locking. Also if we are currently trying to | ||
462 | * allocate blocks for the tree root we can't do the fast caching since | ||
463 | * we likely hold important locks. | ||
464 | */ | ||
465 | if (trans && (!trans->transaction->in_commit) && | ||
466 | (root && root != root->fs_info->tree_root)) { | ||
467 | spin_lock(&cache->lock); | ||
468 | if (cache->cached != BTRFS_CACHE_NO) { | ||
469 | spin_unlock(&cache->lock); | ||
470 | return 0; | ||
471 | } | ||
472 | cache->cached = BTRFS_CACHE_STARTED; | ||
473 | spin_unlock(&cache->lock); | ||
474 | |||
475 | ret = load_free_space_cache(fs_info, cache); | ||
476 | |||
477 | spin_lock(&cache->lock); | ||
478 | if (ret == 1) { | ||
479 | cache->cached = BTRFS_CACHE_FINISHED; | ||
480 | cache->last_byte_to_unpin = (u64)-1; | ||
481 | } else { | ||
482 | cache->cached = BTRFS_CACHE_NO; | ||
483 | } | ||
484 | spin_unlock(&cache->lock); | ||
485 | if (ret == 1) { | ||
486 | free_excluded_extents(fs_info->extent_root, cache); | ||
487 | return 0; | ||
488 | } | ||
489 | } | ||
490 | |||
491 | if (load_cache_only) | ||
492 | return 0; | ||
493 | |||
494 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); | ||
436 | BUG_ON(!caching_ctl); | 495 | BUG_ON(!caching_ctl); |
437 | 496 | ||
438 | INIT_LIST_HEAD(&caching_ctl->list); | 497 | INIT_LIST_HEAD(&caching_ctl->list); |
@@ -509,7 +568,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
509 | 568 | ||
510 | rcu_read_lock(); | 569 | rcu_read_lock(); |
511 | list_for_each_entry_rcu(found, head, list) { | 570 | list_for_each_entry_rcu(found, head, list) { |
512 | if (found->flags == flags) { | 571 | if (found->flags & flags) { |
513 | rcu_read_unlock(); | 572 | rcu_read_unlock(); |
514 | return found; | 573 | return found; |
515 | } | 574 | } |
@@ -542,6 +601,15 @@ static u64 div_factor(u64 num, int factor) | |||
542 | return num; | 601 | return num; |
543 | } | 602 | } |
544 | 603 | ||
604 | static u64 div_factor_fine(u64 num, int factor) | ||
605 | { | ||
606 | if (factor == 100) | ||
607 | return num; | ||
608 | num *= factor; | ||
609 | do_div(num, 100); | ||
610 | return num; | ||
611 | } | ||
612 | |||
545 | u64 btrfs_find_block_group(struct btrfs_root *root, | 613 | u64 btrfs_find_block_group(struct btrfs_root *root, |
546 | u64 search_start, u64 search_hint, int owner) | 614 | u64 search_start, u64 search_hint, int owner) |
547 | { | 615 | { |
@@ -689,8 +757,12 @@ again: | |||
689 | atomic_inc(&head->node.refs); | 757 | atomic_inc(&head->node.refs); |
690 | spin_unlock(&delayed_refs->lock); | 758 | spin_unlock(&delayed_refs->lock); |
691 | 759 | ||
692 | btrfs_release_path(root->fs_info->extent_root, path); | 760 | btrfs_release_path(path); |
693 | 761 | ||
762 | /* | ||
763 | * Mutex was contended, block until it's released and try | ||
764 | * again | ||
765 | */ | ||
694 | mutex_lock(&head->mutex); | 766 | mutex_lock(&head->mutex); |
695 | mutex_unlock(&head->mutex); | 767 | mutex_unlock(&head->mutex); |
696 | btrfs_put_delayed_ref(&head->node); | 768 | btrfs_put_delayed_ref(&head->node); |
@@ -869,7 +941,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, | |||
869 | break; | 941 | break; |
870 | } | 942 | } |
871 | } | 943 | } |
872 | btrfs_release_path(root, path); | 944 | btrfs_release_path(path); |
873 | 945 | ||
874 | if (owner < BTRFS_FIRST_FREE_OBJECTID) | 946 | if (owner < BTRFS_FIRST_FREE_OBJECTID) |
875 | new_size += sizeof(*bi); | 947 | new_size += sizeof(*bi); |
@@ -882,7 +954,6 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, | |||
882 | BUG_ON(ret); | 954 | BUG_ON(ret); |
883 | 955 | ||
884 | ret = btrfs_extend_item(trans, root, path, new_size); | 956 | ret = btrfs_extend_item(trans, root, path, new_size); |
885 | BUG_ON(ret); | ||
886 | 957 | ||
887 | leaf = path->nodes[0]; | 958 | leaf = path->nodes[0]; |
888 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | 959 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); |
@@ -977,7 +1048,7 @@ again: | |||
977 | return 0; | 1048 | return 0; |
978 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 1049 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
979 | key.type = BTRFS_EXTENT_REF_V0_KEY; | 1050 | key.type = BTRFS_EXTENT_REF_V0_KEY; |
980 | btrfs_release_path(root, path); | 1051 | btrfs_release_path(path); |
981 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 1052 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
982 | if (ret < 0) { | 1053 | if (ret < 0) { |
983 | err = ret; | 1054 | err = ret; |
@@ -1015,7 +1086,7 @@ again: | |||
1015 | if (match_extent_data_ref(leaf, ref, root_objectid, | 1086 | if (match_extent_data_ref(leaf, ref, root_objectid, |
1016 | owner, offset)) { | 1087 | owner, offset)) { |
1017 | if (recow) { | 1088 | if (recow) { |
1018 | btrfs_release_path(root, path); | 1089 | btrfs_release_path(path); |
1019 | goto again; | 1090 | goto again; |
1020 | } | 1091 | } |
1021 | err = 0; | 1092 | err = 0; |
@@ -1076,7 +1147,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, | |||
1076 | if (match_extent_data_ref(leaf, ref, root_objectid, | 1147 | if (match_extent_data_ref(leaf, ref, root_objectid, |
1077 | owner, offset)) | 1148 | owner, offset)) |
1078 | break; | 1149 | break; |
1079 | btrfs_release_path(root, path); | 1150 | btrfs_release_path(path); |
1080 | key.offset++; | 1151 | key.offset++; |
1081 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 1152 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
1082 | size); | 1153 | size); |
@@ -1102,7 +1173,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, | |||
1102 | btrfs_mark_buffer_dirty(leaf); | 1173 | btrfs_mark_buffer_dirty(leaf); |
1103 | ret = 0; | 1174 | ret = 0; |
1104 | fail: | 1175 | fail: |
1105 | btrfs_release_path(root, path); | 1176 | btrfs_release_path(path); |
1106 | return ret; | 1177 | return ret; |
1107 | } | 1178 | } |
1108 | 1179 | ||
@@ -1228,7 +1299,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, | |||
1228 | ret = -ENOENT; | 1299 | ret = -ENOENT; |
1229 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 1300 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
1230 | if (ret == -ENOENT && parent) { | 1301 | if (ret == -ENOENT && parent) { |
1231 | btrfs_release_path(root, path); | 1302 | btrfs_release_path(path); |
1232 | key.type = BTRFS_EXTENT_REF_V0_KEY; | 1303 | key.type = BTRFS_EXTENT_REF_V0_KEY; |
1233 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 1304 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
1234 | if (ret > 0) | 1305 | if (ret > 0) |
@@ -1257,7 +1328,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, | |||
1257 | } | 1328 | } |
1258 | 1329 | ||
1259 | ret = btrfs_insert_empty_item(trans, root, path, &key, 0); | 1330 | ret = btrfs_insert_empty_item(trans, root, path, &key, 0); |
1260 | btrfs_release_path(root, path); | 1331 | btrfs_release_path(path); |
1261 | return ret; | 1332 | return ret; |
1262 | } | 1333 | } |
1263 | 1334 | ||
@@ -1490,7 +1561,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans, | |||
1490 | size = btrfs_extent_inline_ref_size(type); | 1561 | size = btrfs_extent_inline_ref_size(type); |
1491 | 1562 | ||
1492 | ret = btrfs_extend_item(trans, root, path, size); | 1563 | ret = btrfs_extend_item(trans, root, path, size); |
1493 | BUG_ON(ret); | ||
1494 | 1564 | ||
1495 | ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | 1565 | ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); |
1496 | refs = btrfs_extent_refs(leaf, ei); | 1566 | refs = btrfs_extent_refs(leaf, ei); |
@@ -1543,7 +1613,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, | |||
1543 | if (ret != -ENOENT) | 1613 | if (ret != -ENOENT) |
1544 | return ret; | 1614 | return ret; |
1545 | 1615 | ||
1546 | btrfs_release_path(root, path); | 1616 | btrfs_release_path(path); |
1547 | *ref_ret = NULL; | 1617 | *ref_ret = NULL; |
1548 | 1618 | ||
1549 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 1619 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
@@ -1619,7 +1689,6 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans, | |||
1619 | end - ptr - size); | 1689 | end - ptr - size); |
1620 | item_size -= size; | 1690 | item_size -= size; |
1621 | ret = btrfs_truncate_item(trans, root, path, item_size, 1); | 1691 | ret = btrfs_truncate_item(trans, root, path, item_size, 1); |
1622 | BUG_ON(ret); | ||
1623 | } | 1692 | } |
1624 | btrfs_mark_buffer_dirty(leaf); | 1693 | btrfs_mark_buffer_dirty(leaf); |
1625 | return 0; | 1694 | return 0; |
@@ -1692,40 +1761,45 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1692 | return ret; | 1761 | return ret; |
1693 | } | 1762 | } |
1694 | 1763 | ||
1695 | static void btrfs_issue_discard(struct block_device *bdev, | 1764 | static int btrfs_issue_discard(struct block_device *bdev, |
1696 | u64 start, u64 len) | 1765 | u64 start, u64 len) |
1697 | { | 1766 | { |
1698 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, | 1767 | return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); |
1699 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); | ||
1700 | } | 1768 | } |
1701 | 1769 | ||
1702 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1770 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
1703 | u64 num_bytes) | 1771 | u64 num_bytes, u64 *actual_bytes) |
1704 | { | 1772 | { |
1705 | int ret; | 1773 | int ret; |
1706 | u64 map_length = num_bytes; | 1774 | u64 discarded_bytes = 0; |
1707 | struct btrfs_multi_bio *multi = NULL; | 1775 | struct btrfs_multi_bio *multi = NULL; |
1708 | 1776 | ||
1709 | if (!btrfs_test_opt(root, DISCARD)) | ||
1710 | return 0; | ||
1711 | 1777 | ||
1712 | /* Tell the block device(s) that the sectors can be discarded */ | 1778 | /* Tell the block device(s) that the sectors can be discarded */ |
1713 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, | 1779 | ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, |
1714 | bytenr, &map_length, &multi, 0); | 1780 | bytenr, &num_bytes, &multi, 0); |
1715 | if (!ret) { | 1781 | if (!ret) { |
1716 | struct btrfs_bio_stripe *stripe = multi->stripes; | 1782 | struct btrfs_bio_stripe *stripe = multi->stripes; |
1717 | int i; | 1783 | int i; |
1718 | 1784 | ||
1719 | if (map_length > num_bytes) | ||
1720 | map_length = num_bytes; | ||
1721 | 1785 | ||
1722 | for (i = 0; i < multi->num_stripes; i++, stripe++) { | 1786 | for (i = 0; i < multi->num_stripes; i++, stripe++) { |
1723 | btrfs_issue_discard(stripe->dev->bdev, | 1787 | ret = btrfs_issue_discard(stripe->dev->bdev, |
1724 | stripe->physical, | 1788 | stripe->physical, |
1725 | map_length); | 1789 | stripe->length); |
1790 | if (!ret) | ||
1791 | discarded_bytes += stripe->length; | ||
1792 | else if (ret != -EOPNOTSUPP) | ||
1793 | break; | ||
1726 | } | 1794 | } |
1727 | kfree(multi); | 1795 | kfree(multi); |
1728 | } | 1796 | } |
1797 | if (discarded_bytes && ret == -EOPNOTSUPP) | ||
1798 | ret = 0; | ||
1799 | |||
1800 | if (actual_bytes) | ||
1801 | *actual_bytes = discarded_bytes; | ||
1802 | |||
1729 | 1803 | ||
1730 | return ret; | 1804 | return ret; |
1731 | } | 1805 | } |
@@ -1792,7 +1866,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1792 | __run_delayed_extent_op(extent_op, leaf, item); | 1866 | __run_delayed_extent_op(extent_op, leaf, item); |
1793 | 1867 | ||
1794 | btrfs_mark_buffer_dirty(leaf); | 1868 | btrfs_mark_buffer_dirty(leaf); |
1795 | btrfs_release_path(root->fs_info->extent_root, path); | 1869 | btrfs_release_path(path); |
1796 | 1870 | ||
1797 | path->reada = 1; | 1871 | path->reada = 1; |
1798 | path->leave_spinning = 1; | 1872 | path->leave_spinning = 1; |
@@ -2227,6 +2301,10 @@ again: | |||
2227 | atomic_inc(&ref->refs); | 2301 | atomic_inc(&ref->refs); |
2228 | 2302 | ||
2229 | spin_unlock(&delayed_refs->lock); | 2303 | spin_unlock(&delayed_refs->lock); |
2304 | /* | ||
2305 | * Mutex was contended, block until it's | ||
2306 | * released and try again | ||
2307 | */ | ||
2230 | mutex_lock(&head->mutex); | 2308 | mutex_lock(&head->mutex); |
2231 | mutex_unlock(&head->mutex); | 2309 | mutex_unlock(&head->mutex); |
2232 | 2310 | ||
@@ -2291,8 +2369,12 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, | |||
2291 | atomic_inc(&head->node.refs); | 2369 | atomic_inc(&head->node.refs); |
2292 | spin_unlock(&delayed_refs->lock); | 2370 | spin_unlock(&delayed_refs->lock); |
2293 | 2371 | ||
2294 | btrfs_release_path(root->fs_info->extent_root, path); | 2372 | btrfs_release_path(path); |
2295 | 2373 | ||
2374 | /* | ||
2375 | * Mutex was contended, block until it's released and let | ||
2376 | * caller try again | ||
2377 | */ | ||
2296 | mutex_lock(&head->mutex); | 2378 | mutex_lock(&head->mutex); |
2297 | mutex_unlock(&head->mutex); | 2379 | mutex_unlock(&head->mutex); |
2298 | btrfs_put_delayed_ref(&head->node); | 2380 | btrfs_put_delayed_ref(&head->node); |
@@ -2440,126 +2522,6 @@ out: | |||
2440 | return ret; | 2522 | return ret; |
2441 | } | 2523 | } |
2442 | 2524 | ||
2443 | #if 0 | ||
2444 | int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
2445 | struct extent_buffer *buf, u32 nr_extents) | ||
2446 | { | ||
2447 | struct btrfs_key key; | ||
2448 | struct btrfs_file_extent_item *fi; | ||
2449 | u64 root_gen; | ||
2450 | u32 nritems; | ||
2451 | int i; | ||
2452 | int level; | ||
2453 | int ret = 0; | ||
2454 | int shared = 0; | ||
2455 | |||
2456 | if (!root->ref_cows) | ||
2457 | return 0; | ||
2458 | |||
2459 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { | ||
2460 | shared = 0; | ||
2461 | root_gen = root->root_key.offset; | ||
2462 | } else { | ||
2463 | shared = 1; | ||
2464 | root_gen = trans->transid - 1; | ||
2465 | } | ||
2466 | |||
2467 | level = btrfs_header_level(buf); | ||
2468 | nritems = btrfs_header_nritems(buf); | ||
2469 | |||
2470 | if (level == 0) { | ||
2471 | struct btrfs_leaf_ref *ref; | ||
2472 | struct btrfs_extent_info *info; | ||
2473 | |||
2474 | ref = btrfs_alloc_leaf_ref(root, nr_extents); | ||
2475 | if (!ref) { | ||
2476 | ret = -ENOMEM; | ||
2477 | goto out; | ||
2478 | } | ||
2479 | |||
2480 | ref->root_gen = root_gen; | ||
2481 | ref->bytenr = buf->start; | ||
2482 | ref->owner = btrfs_header_owner(buf); | ||
2483 | ref->generation = btrfs_header_generation(buf); | ||
2484 | ref->nritems = nr_extents; | ||
2485 | info = ref->extents; | ||
2486 | |||
2487 | for (i = 0; nr_extents > 0 && i < nritems; i++) { | ||
2488 | u64 disk_bytenr; | ||
2489 | btrfs_item_key_to_cpu(buf, &key, i); | ||
2490 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
2491 | continue; | ||
2492 | fi = btrfs_item_ptr(buf, i, | ||
2493 | struct btrfs_file_extent_item); | ||
2494 | if (btrfs_file_extent_type(buf, fi) == | ||
2495 | BTRFS_FILE_EXTENT_INLINE) | ||
2496 | continue; | ||
2497 | disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); | ||
2498 | if (disk_bytenr == 0) | ||
2499 | continue; | ||
2500 | |||
2501 | info->bytenr = disk_bytenr; | ||
2502 | info->num_bytes = | ||
2503 | btrfs_file_extent_disk_num_bytes(buf, fi); | ||
2504 | info->objectid = key.objectid; | ||
2505 | info->offset = key.offset; | ||
2506 | info++; | ||
2507 | } | ||
2508 | |||
2509 | ret = btrfs_add_leaf_ref(root, ref, shared); | ||
2510 | if (ret == -EEXIST && shared) { | ||
2511 | struct btrfs_leaf_ref *old; | ||
2512 | old = btrfs_lookup_leaf_ref(root, ref->bytenr); | ||
2513 | BUG_ON(!old); | ||
2514 | btrfs_remove_leaf_ref(root, old); | ||
2515 | btrfs_free_leaf_ref(root, old); | ||
2516 | ret = btrfs_add_leaf_ref(root, ref, shared); | ||
2517 | } | ||
2518 | WARN_ON(ret); | ||
2519 | btrfs_free_leaf_ref(root, ref); | ||
2520 | } | ||
2521 | out: | ||
2522 | return ret; | ||
2523 | } | ||
2524 | |||
2525 | /* when a block goes through cow, we update the reference counts of | ||
2526 | * everything that block points to. The internal pointers of the block | ||
2527 | * can be in just about any order, and it is likely to have clusters of | ||
2528 | * things that are close together and clusters of things that are not. | ||
2529 | * | ||
2530 | * To help reduce the seeks that come with updating all of these reference | ||
2531 | * counts, sort them by byte number before actual updates are done. | ||
2532 | * | ||
2533 | * struct refsort is used to match byte number to slot in the btree block. | ||
2534 | * we sort based on the byte number and then use the slot to actually | ||
2535 | * find the item. | ||
2536 | * | ||
2537 | * struct refsort is smaller than strcut btrfs_item and smaller than | ||
2538 | * struct btrfs_key_ptr. Since we're currently limited to the page size | ||
2539 | * for a btree block, there's no way for a kmalloc of refsorts for a | ||
2540 | * single node to be bigger than a page. | ||
2541 | */ | ||
2542 | struct refsort { | ||
2543 | u64 bytenr; | ||
2544 | u32 slot; | ||
2545 | }; | ||
2546 | |||
2547 | /* | ||
2548 | * for passing into sort() | ||
2549 | */ | ||
2550 | static int refsort_cmp(const void *a_void, const void *b_void) | ||
2551 | { | ||
2552 | const struct refsort *a = a_void; | ||
2553 | const struct refsort *b = b_void; | ||
2554 | |||
2555 | if (a->bytenr < b->bytenr) | ||
2556 | return -1; | ||
2557 | if (a->bytenr > b->bytenr) | ||
2558 | return 1; | ||
2559 | return 0; | ||
2560 | } | ||
2561 | #endif | ||
2562 | |||
2563 | static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | 2525 | static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, |
2564 | struct btrfs_root *root, | 2526 | struct btrfs_root *root, |
2565 | struct extent_buffer *buf, | 2527 | struct extent_buffer *buf, |
@@ -2662,7 +2624,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, | |||
2662 | bi = btrfs_item_ptr_offset(leaf, path->slots[0]); | 2624 | bi = btrfs_item_ptr_offset(leaf, path->slots[0]); |
2663 | write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); | 2625 | write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); |
2664 | btrfs_mark_buffer_dirty(leaf); | 2626 | btrfs_mark_buffer_dirty(leaf); |
2665 | btrfs_release_path(extent_root, path); | 2627 | btrfs_release_path(path); |
2666 | fail: | 2628 | fail: |
2667 | if (ret) | 2629 | if (ret) |
2668 | return ret; | 2630 | return ret; |
@@ -2688,6 +2650,111 @@ next_block_group(struct btrfs_root *root, | |||
2688 | return cache; | 2650 | return cache; |
2689 | } | 2651 | } |
2690 | 2652 | ||
2653 | static int cache_save_setup(struct btrfs_block_group_cache *block_group, | ||
2654 | struct btrfs_trans_handle *trans, | ||
2655 | struct btrfs_path *path) | ||
2656 | { | ||
2657 | struct btrfs_root *root = block_group->fs_info->tree_root; | ||
2658 | struct inode *inode = NULL; | ||
2659 | u64 alloc_hint = 0; | ||
2660 | int dcs = BTRFS_DC_ERROR; | ||
2661 | int num_pages = 0; | ||
2662 | int retries = 0; | ||
2663 | int ret = 0; | ||
2664 | |||
2665 | /* | ||
2666 | * If this block group is smaller than 100 megs don't bother caching the | ||
2667 | * block group. | ||
2668 | */ | ||
2669 | if (block_group->key.offset < (100 * 1024 * 1024)) { | ||
2670 | spin_lock(&block_group->lock); | ||
2671 | block_group->disk_cache_state = BTRFS_DC_WRITTEN; | ||
2672 | spin_unlock(&block_group->lock); | ||
2673 | return 0; | ||
2674 | } | ||
2675 | |||
2676 | again: | ||
2677 | inode = lookup_free_space_inode(root, block_group, path); | ||
2678 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { | ||
2679 | ret = PTR_ERR(inode); | ||
2680 | btrfs_release_path(path); | ||
2681 | goto out; | ||
2682 | } | ||
2683 | |||
2684 | if (IS_ERR(inode)) { | ||
2685 | BUG_ON(retries); | ||
2686 | retries++; | ||
2687 | |||
2688 | if (block_group->ro) | ||
2689 | goto out_free; | ||
2690 | |||
2691 | ret = create_free_space_inode(root, trans, block_group, path); | ||
2692 | if (ret) | ||
2693 | goto out_free; | ||
2694 | goto again; | ||
2695 | } | ||
2696 | |||
2697 | /* | ||
2698 | * We want to set the generation to 0, that way if anything goes wrong | ||
2699 | * from here on out we know not to trust this cache when we load up next | ||
2700 | * time. | ||
2701 | */ | ||
2702 | BTRFS_I(inode)->generation = 0; | ||
2703 | ret = btrfs_update_inode(trans, root, inode); | ||
2704 | WARN_ON(ret); | ||
2705 | |||
2706 | if (i_size_read(inode) > 0) { | ||
2707 | ret = btrfs_truncate_free_space_cache(root, trans, path, | ||
2708 | inode); | ||
2709 | if (ret) | ||
2710 | goto out_put; | ||
2711 | } | ||
2712 | |||
2713 | spin_lock(&block_group->lock); | ||
2714 | if (block_group->cached != BTRFS_CACHE_FINISHED) { | ||
2715 | /* We're not cached, don't bother trying to write stuff out */ | ||
2716 | dcs = BTRFS_DC_WRITTEN; | ||
2717 | spin_unlock(&block_group->lock); | ||
2718 | goto out_put; | ||
2719 | } | ||
2720 | spin_unlock(&block_group->lock); | ||
2721 | |||
2722 | num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); | ||
2723 | if (!num_pages) | ||
2724 | num_pages = 1; | ||
2725 | |||
2726 | /* | ||
2727 | * Just to make absolutely sure we have enough space, we're going to | ||
2728 | * preallocate 12 pages worth of space for each block group. In | ||
2729 | * practice we ought to use at most 8, but we need extra space so we can | ||
2730 | * add our header and have a terminator between the extents and the | ||
2731 | * bitmaps. | ||
2732 | */ | ||
2733 | num_pages *= 16; | ||
2734 | num_pages *= PAGE_CACHE_SIZE; | ||
2735 | |||
2736 | ret = btrfs_check_data_free_space(inode, num_pages); | ||
2737 | if (ret) | ||
2738 | goto out_put; | ||
2739 | |||
2740 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, | ||
2741 | num_pages, num_pages, | ||
2742 | &alloc_hint); | ||
2743 | if (!ret) | ||
2744 | dcs = BTRFS_DC_SETUP; | ||
2745 | btrfs_free_reserved_data_space(inode, num_pages); | ||
2746 | out_put: | ||
2747 | iput(inode); | ||
2748 | out_free: | ||
2749 | btrfs_release_path(path); | ||
2750 | out: | ||
2751 | spin_lock(&block_group->lock); | ||
2752 | block_group->disk_cache_state = dcs; | ||
2753 | spin_unlock(&block_group->lock); | ||
2754 | |||
2755 | return ret; | ||
2756 | } | ||
2757 | |||
2691 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 2758 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
2692 | struct btrfs_root *root) | 2759 | struct btrfs_root *root) |
2693 | { | 2760 | { |
@@ -2700,6 +2767,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2700 | if (!path) | 2767 | if (!path) |
2701 | return -ENOMEM; | 2768 | return -ENOMEM; |
2702 | 2769 | ||
2770 | again: | ||
2771 | while (1) { | ||
2772 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | ||
2773 | while (cache) { | ||
2774 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) | ||
2775 | break; | ||
2776 | cache = next_block_group(root, cache); | ||
2777 | } | ||
2778 | if (!cache) { | ||
2779 | if (last == 0) | ||
2780 | break; | ||
2781 | last = 0; | ||
2782 | continue; | ||
2783 | } | ||
2784 | err = cache_save_setup(cache, trans, path); | ||
2785 | last = cache->key.objectid + cache->key.offset; | ||
2786 | btrfs_put_block_group(cache); | ||
2787 | } | ||
2788 | |||
2703 | while (1) { | 2789 | while (1) { |
2704 | if (last == 0) { | 2790 | if (last == 0) { |
2705 | err = btrfs_run_delayed_refs(trans, root, | 2791 | err = btrfs_run_delayed_refs(trans, root, |
@@ -2709,6 +2795,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2709 | 2795 | ||
2710 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | 2796 | cache = btrfs_lookup_first_block_group(root->fs_info, last); |
2711 | while (cache) { | 2797 | while (cache) { |
2798 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) { | ||
2799 | btrfs_put_block_group(cache); | ||
2800 | goto again; | ||
2801 | } | ||
2802 | |||
2712 | if (cache->dirty) | 2803 | if (cache->dirty) |
2713 | break; | 2804 | break; |
2714 | cache = next_block_group(root, cache); | 2805 | cache = next_block_group(root, cache); |
@@ -2720,6 +2811,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2720 | continue; | 2811 | continue; |
2721 | } | 2812 | } |
2722 | 2813 | ||
2814 | if (cache->disk_cache_state == BTRFS_DC_SETUP) | ||
2815 | cache->disk_cache_state = BTRFS_DC_NEED_WRITE; | ||
2723 | cache->dirty = 0; | 2816 | cache->dirty = 0; |
2724 | last = cache->key.objectid + cache->key.offset; | 2817 | last = cache->key.objectid + cache->key.offset; |
2725 | 2818 | ||
@@ -2728,6 +2821,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2728 | btrfs_put_block_group(cache); | 2821 | btrfs_put_block_group(cache); |
2729 | } | 2822 | } |
2730 | 2823 | ||
2824 | while (1) { | ||
2825 | /* | ||
2826 | * I don't think this is needed since we're just marking our | ||
2827 | * preallocated extent as written, but just in case it can't | ||
2828 | * hurt. | ||
2829 | */ | ||
2830 | if (last == 0) { | ||
2831 | err = btrfs_run_delayed_refs(trans, root, | ||
2832 | (unsigned long)-1); | ||
2833 | BUG_ON(err); | ||
2834 | } | ||
2835 | |||
2836 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | ||
2837 | while (cache) { | ||
2838 | /* | ||
2839 | * Really this shouldn't happen, but it could if we | ||
2840 | * couldn't write the entire preallocated extent and | ||
2841 | * splitting the extent resulted in a new block. | ||
2842 | */ | ||
2843 | if (cache->dirty) { | ||
2844 | btrfs_put_block_group(cache); | ||
2845 | goto again; | ||
2846 | } | ||
2847 | if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) | ||
2848 | break; | ||
2849 | cache = next_block_group(root, cache); | ||
2850 | } | ||
2851 | if (!cache) { | ||
2852 | if (last == 0) | ||
2853 | break; | ||
2854 | last = 0; | ||
2855 | continue; | ||
2856 | } | ||
2857 | |||
2858 | btrfs_write_out_cache(root, trans, cache, path); | ||
2859 | |||
2860 | /* | ||
2861 | * If we didn't have an error then the cache state is still | ||
2862 | * NEED_WRITE, so we can set it to WRITTEN. | ||
2863 | */ | ||
2864 | if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) | ||
2865 | cache->disk_cache_state = BTRFS_DC_WRITTEN; | ||
2866 | last = cache->key.objectid + cache->key.offset; | ||
2867 | btrfs_put_block_group(cache); | ||
2868 | } | ||
2869 | |||
2731 | btrfs_free_path(path); | 2870 | btrfs_free_path(path); |
2732 | return 0; | 2871 | return 0; |
2733 | } | 2872 | } |
@@ -2763,6 +2902,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2763 | if (found) { | 2902 | if (found) { |
2764 | spin_lock(&found->lock); | 2903 | spin_lock(&found->lock); |
2765 | found->total_bytes += total_bytes; | 2904 | found->total_bytes += total_bytes; |
2905 | found->disk_total += total_bytes * factor; | ||
2766 | found->bytes_used += bytes_used; | 2906 | found->bytes_used += bytes_used; |
2767 | found->disk_used += bytes_used * factor; | 2907 | found->disk_used += bytes_used * factor; |
2768 | found->full = 0; | 2908 | found->full = 0; |
@@ -2782,6 +2922,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2782 | BTRFS_BLOCK_GROUP_SYSTEM | | 2922 | BTRFS_BLOCK_GROUP_SYSTEM | |
2783 | BTRFS_BLOCK_GROUP_METADATA); | 2923 | BTRFS_BLOCK_GROUP_METADATA); |
2784 | found->total_bytes = total_bytes; | 2924 | found->total_bytes = total_bytes; |
2925 | found->disk_total = total_bytes * factor; | ||
2785 | found->bytes_used = bytes_used; | 2926 | found->bytes_used = bytes_used; |
2786 | found->disk_used = bytes_used * factor; | 2927 | found->disk_used = bytes_used * factor; |
2787 | found->bytes_pinned = 0; | 2928 | found->bytes_pinned = 0; |
@@ -2789,7 +2930,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2789 | found->bytes_readonly = 0; | 2930 | found->bytes_readonly = 0; |
2790 | found->bytes_may_use = 0; | 2931 | found->bytes_may_use = 0; |
2791 | found->full = 0; | 2932 | found->full = 0; |
2792 | found->force_alloc = 0; | 2933 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; |
2934 | found->chunk_alloc = 0; | ||
2793 | *space_info = found; | 2935 | *space_info = found; |
2794 | list_add_rcu(&found->list, &info->space_info); | 2936 | list_add_rcu(&found->list, &info->space_info); |
2795 | atomic_set(&found->caching_threads, 0); | 2937 | atomic_set(&found->caching_threads, 0); |
@@ -2814,7 +2956,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
2814 | 2956 | ||
2815 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | 2957 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) |
2816 | { | 2958 | { |
2817 | u64 num_devices = root->fs_info->fs_devices->rw_devices; | 2959 | /* |
2960 | * we add in the count of missing devices because we want | ||
2961 | * to make sure that any RAID levels on a degraded FS | ||
2962 | * continue to be honored. | ||
2963 | */ | ||
2964 | u64 num_devices = root->fs_info->fs_devices->rw_devices + | ||
2965 | root->fs_info->fs_devices->missing_devices; | ||
2818 | 2966 | ||
2819 | if (num_devices == 1) | 2967 | if (num_devices == 1) |
2820 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); | 2968 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); |
@@ -2854,7 +3002,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | |||
2854 | return btrfs_reduce_alloc_profile(root, flags); | 3002 | return btrfs_reduce_alloc_profile(root, flags); |
2855 | } | 3003 | } |
2856 | 3004 | ||
2857 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | 3005 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
2858 | { | 3006 | { |
2859 | u64 flags; | 3007 | u64 flags; |
2860 | 3008 | ||
@@ -2883,11 +3031,17 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) | |||
2883 | struct btrfs_space_info *data_sinfo; | 3031 | struct btrfs_space_info *data_sinfo; |
2884 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3032 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2885 | u64 used; | 3033 | u64 used; |
2886 | int ret = 0, committed = 0; | 3034 | int ret = 0, committed = 0, alloc_chunk = 1; |
2887 | 3035 | ||
2888 | /* make sure bytes are sectorsize aligned */ | 3036 | /* make sure bytes are sectorsize aligned */ |
2889 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3037 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
2890 | 3038 | ||
3039 | if (root == root->fs_info->tree_root || | ||
3040 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { | ||
3041 | alloc_chunk = 0; | ||
3042 | committed = 1; | ||
3043 | } | ||
3044 | |||
2891 | data_sinfo = BTRFS_I(inode)->space_info; | 3045 | data_sinfo = BTRFS_I(inode)->space_info; |
2892 | if (!data_sinfo) | 3046 | if (!data_sinfo) |
2893 | goto alloc; | 3047 | goto alloc; |
@@ -2906,23 +3060,28 @@ again: | |||
2906 | * if we don't have enough free bytes in this space then we need | 3060 | * if we don't have enough free bytes in this space then we need |
2907 | * to alloc a new chunk. | 3061 | * to alloc a new chunk. |
2908 | */ | 3062 | */ |
2909 | if (!data_sinfo->full) { | 3063 | if (!data_sinfo->full && alloc_chunk) { |
2910 | u64 alloc_target; | 3064 | u64 alloc_target; |
2911 | 3065 | ||
2912 | data_sinfo->force_alloc = 1; | 3066 | data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; |
2913 | spin_unlock(&data_sinfo->lock); | 3067 | spin_unlock(&data_sinfo->lock); |
2914 | alloc: | 3068 | alloc: |
2915 | alloc_target = btrfs_get_alloc_profile(root, 1); | 3069 | alloc_target = btrfs_get_alloc_profile(root, 1); |
2916 | trans = btrfs_join_transaction(root, 1); | 3070 | trans = btrfs_join_transaction(root); |
2917 | if (IS_ERR(trans)) | 3071 | if (IS_ERR(trans)) |
2918 | return PTR_ERR(trans); | 3072 | return PTR_ERR(trans); |
2919 | 3073 | ||
2920 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 3074 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
2921 | bytes + 2 * 1024 * 1024, | 3075 | bytes + 2 * 1024 * 1024, |
2922 | alloc_target, 0); | 3076 | alloc_target, |
3077 | CHUNK_ALLOC_NO_FORCE); | ||
2923 | btrfs_end_transaction(trans, root); | 3078 | btrfs_end_transaction(trans, root); |
2924 | if (ret < 0) | 3079 | if (ret < 0) { |
2925 | return ret; | 3080 | if (ret != -ENOSPC) |
3081 | return ret; | ||
3082 | else | ||
3083 | goto commit_trans; | ||
3084 | } | ||
2926 | 3085 | ||
2927 | if (!data_sinfo) { | 3086 | if (!data_sinfo) { |
2928 | btrfs_set_inode_space_info(root, inode); | 3087 | btrfs_set_inode_space_info(root, inode); |
@@ -2930,12 +3089,21 @@ alloc: | |||
2930 | } | 3089 | } |
2931 | goto again; | 3090 | goto again; |
2932 | } | 3091 | } |
3092 | |||
3093 | /* | ||
3094 | * If we have less pinned bytes than we want to allocate then | ||
3095 | * don't bother committing the transaction, it won't help us. | ||
3096 | */ | ||
3097 | if (data_sinfo->bytes_pinned < bytes) | ||
3098 | committed = 1; | ||
2933 | spin_unlock(&data_sinfo->lock); | 3099 | spin_unlock(&data_sinfo->lock); |
2934 | 3100 | ||
2935 | /* commit the current transaction and try again */ | 3101 | /* commit the current transaction and try again */ |
2936 | if (!committed && !root->fs_info->open_ioctl_trans) { | 3102 | commit_trans: |
3103 | if (!committed && | ||
3104 | !atomic_read(&root->fs_info->open_ioctl_trans)) { | ||
2937 | committed = 1; | 3105 | committed = 1; |
2938 | trans = btrfs_join_transaction(root, 1); | 3106 | trans = btrfs_join_transaction(root); |
2939 | if (IS_ERR(trans)) | 3107 | if (IS_ERR(trans)) |
2940 | return PTR_ERR(trans); | 3108 | return PTR_ERR(trans); |
2941 | ret = btrfs_commit_transaction(trans, root); | 3109 | ret = btrfs_commit_transaction(trans, root); |
@@ -2944,18 +3112,6 @@ alloc: | |||
2944 | goto again; | 3112 | goto again; |
2945 | } | 3113 | } |
2946 | 3114 | ||
2947 | #if 0 /* I hope we never need this code again, just in case */ | ||
2948 | printk(KERN_ERR "no space left, need %llu, %llu bytes_used, " | ||
2949 | "%llu bytes_reserved, " "%llu bytes_pinned, " | ||
2950 | "%llu bytes_readonly, %llu may use %llu total\n", | ||
2951 | (unsigned long long)bytes, | ||
2952 | (unsigned long long)data_sinfo->bytes_used, | ||
2953 | (unsigned long long)data_sinfo->bytes_reserved, | ||
2954 | (unsigned long long)data_sinfo->bytes_pinned, | ||
2955 | (unsigned long long)data_sinfo->bytes_readonly, | ||
2956 | (unsigned long long)data_sinfo->bytes_may_use, | ||
2957 | (unsigned long long)data_sinfo->total_bytes); | ||
2958 | #endif | ||
2959 | return -ENOSPC; | 3115 | return -ENOSPC; |
2960 | } | 3116 | } |
2961 | data_sinfo->bytes_may_use += bytes; | 3117 | data_sinfo->bytes_may_use += bytes; |
@@ -2993,24 +3149,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
2993 | rcu_read_lock(); | 3149 | rcu_read_lock(); |
2994 | list_for_each_entry_rcu(found, head, list) { | 3150 | list_for_each_entry_rcu(found, head, list) { |
2995 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) | 3151 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) |
2996 | found->force_alloc = 1; | 3152 | found->force_alloc = CHUNK_ALLOC_FORCE; |
2997 | } | 3153 | } |
2998 | rcu_read_unlock(); | 3154 | rcu_read_unlock(); |
2999 | } | 3155 | } |
3000 | 3156 | ||
3001 | static int should_alloc_chunk(struct btrfs_space_info *sinfo, | 3157 | static int should_alloc_chunk(struct btrfs_root *root, |
3002 | u64 alloc_bytes) | 3158 | struct btrfs_space_info *sinfo, u64 alloc_bytes, |
3159 | int force) | ||
3003 | { | 3160 | { |
3004 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | 3161 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; |
3162 | u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; | ||
3163 | u64 thresh; | ||
3164 | |||
3165 | if (force == CHUNK_ALLOC_FORCE) | ||
3166 | return 1; | ||
3005 | 3167 | ||
3006 | if (sinfo->bytes_used + sinfo->bytes_reserved + | 3168 | /* |
3007 | alloc_bytes + 256 * 1024 * 1024 < num_bytes) | 3169 | * in limited mode, we want to have some free space up to |
3170 | * about 1% of the FS size. | ||
3171 | */ | ||
3172 | if (force == CHUNK_ALLOC_LIMITED) { | ||
3173 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | ||
3174 | thresh = max_t(u64, 64 * 1024 * 1024, | ||
3175 | div_factor_fine(thresh, 1)); | ||
3176 | |||
3177 | if (num_bytes - num_allocated < thresh) | ||
3178 | return 1; | ||
3179 | } | ||
3180 | |||
3181 | /* | ||
3182 | * we have two similar checks here, one based on percentage | ||
3183 | * and once based on a hard number of 256MB. The idea | ||
3184 | * is that if we have a good amount of free | ||
3185 | * room, don't allocate a chunk. A good mount is | ||
3186 | * less than 80% utilized of the chunks we have allocated, | ||
3187 | * or more than 256MB free | ||
3188 | */ | ||
3189 | if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes) | ||
3008 | return 0; | 3190 | return 0; |
3009 | 3191 | ||
3010 | if (sinfo->bytes_used + sinfo->bytes_reserved + | 3192 | if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) |
3011 | alloc_bytes < div_factor(num_bytes, 8)) | ||
3012 | return 0; | 3193 | return 0; |
3013 | 3194 | ||
3195 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | ||
3196 | |||
3197 | /* 256MB or 5% of the FS */ | ||
3198 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); | ||
3199 | |||
3200 | if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) | ||
3201 | return 0; | ||
3014 | return 1; | 3202 | return 1; |
3015 | } | 3203 | } |
3016 | 3204 | ||
@@ -3020,10 +3208,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3020 | { | 3208 | { |
3021 | struct btrfs_space_info *space_info; | 3209 | struct btrfs_space_info *space_info; |
3022 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3210 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
3211 | int wait_for_alloc = 0; | ||
3023 | int ret = 0; | 3212 | int ret = 0; |
3024 | 3213 | ||
3025 | mutex_lock(&fs_info->chunk_mutex); | ||
3026 | |||
3027 | flags = btrfs_reduce_alloc_profile(extent_root, flags); | 3214 | flags = btrfs_reduce_alloc_profile(extent_root, flags); |
3028 | 3215 | ||
3029 | space_info = __find_space_info(extent_root->fs_info, flags); | 3216 | space_info = __find_space_info(extent_root->fs_info, flags); |
@@ -3034,20 +3221,47 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3034 | } | 3221 | } |
3035 | BUG_ON(!space_info); | 3222 | BUG_ON(!space_info); |
3036 | 3223 | ||
3224 | again: | ||
3037 | spin_lock(&space_info->lock); | 3225 | spin_lock(&space_info->lock); |
3038 | if (space_info->force_alloc) | 3226 | if (space_info->force_alloc) |
3039 | force = 1; | 3227 | force = space_info->force_alloc; |
3040 | if (space_info->full) { | 3228 | if (space_info->full) { |
3041 | spin_unlock(&space_info->lock); | 3229 | spin_unlock(&space_info->lock); |
3042 | goto out; | 3230 | return 0; |
3043 | } | 3231 | } |
3044 | 3232 | ||
3045 | if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { | 3233 | if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { |
3046 | spin_unlock(&space_info->lock); | 3234 | spin_unlock(&space_info->lock); |
3047 | goto out; | 3235 | return 0; |
3236 | } else if (space_info->chunk_alloc) { | ||
3237 | wait_for_alloc = 1; | ||
3238 | } else { | ||
3239 | space_info->chunk_alloc = 1; | ||
3048 | } | 3240 | } |
3241 | |||
3049 | spin_unlock(&space_info->lock); | 3242 | spin_unlock(&space_info->lock); |
3050 | 3243 | ||
3244 | mutex_lock(&fs_info->chunk_mutex); | ||
3245 | |||
3246 | /* | ||
3247 | * The chunk_mutex is held throughout the entirety of a chunk | ||
3248 | * allocation, so once we've acquired the chunk_mutex we know that the | ||
3249 | * other guy is done and we need to recheck and see if we should | ||
3250 | * allocate. | ||
3251 | */ | ||
3252 | if (wait_for_alloc) { | ||
3253 | mutex_unlock(&fs_info->chunk_mutex); | ||
3254 | wait_for_alloc = 0; | ||
3255 | goto again; | ||
3256 | } | ||
3257 | |||
3258 | /* | ||
3259 | * If we have mixed data/metadata chunks we want to make sure we keep | ||
3260 | * allocating mixed chunks instead of individual chunks. | ||
3261 | */ | ||
3262 | if (btrfs_mixed_space_info(space_info)) | ||
3263 | flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); | ||
3264 | |||
3051 | /* | 3265 | /* |
3052 | * if we're doing a data chunk, go ahead and make sure that | 3266 | * if we're doing a data chunk, go ahead and make sure that |
3053 | * we keep a reasonable number of metadata chunks allocated in the | 3267 | * we keep a reasonable number of metadata chunks allocated in the |
@@ -3066,167 +3280,220 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3066 | space_info->full = 1; | 3280 | space_info->full = 1; |
3067 | else | 3281 | else |
3068 | ret = 1; | 3282 | ret = 1; |
3069 | space_info->force_alloc = 0; | 3283 | |
3284 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; | ||
3285 | space_info->chunk_alloc = 0; | ||
3070 | spin_unlock(&space_info->lock); | 3286 | spin_unlock(&space_info->lock); |
3071 | out: | ||
3072 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3287 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
3073 | return ret; | 3288 | return ret; |
3074 | } | 3289 | } |
3075 | 3290 | ||
3076 | static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, | ||
3077 | struct btrfs_root *root, | ||
3078 | struct btrfs_space_info *sinfo, u64 num_bytes) | ||
3079 | { | ||
3080 | int ret; | ||
3081 | int end_trans = 0; | ||
3082 | |||
3083 | if (sinfo->full) | ||
3084 | return 0; | ||
3085 | |||
3086 | spin_lock(&sinfo->lock); | ||
3087 | ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024); | ||
3088 | spin_unlock(&sinfo->lock); | ||
3089 | if (!ret) | ||
3090 | return 0; | ||
3091 | |||
3092 | if (!trans) { | ||
3093 | trans = btrfs_join_transaction(root, 1); | ||
3094 | BUG_ON(IS_ERR(trans)); | ||
3095 | end_trans = 1; | ||
3096 | } | ||
3097 | |||
3098 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
3099 | num_bytes + 2 * 1024 * 1024, | ||
3100 | get_alloc_profile(root, sinfo->flags), 0); | ||
3101 | |||
3102 | if (end_trans) | ||
3103 | btrfs_end_transaction(trans, root); | ||
3104 | |||
3105 | return ret == 1 ? 1 : 0; | ||
3106 | } | ||
3107 | |||
3108 | /* | 3291 | /* |
3109 | * shrink metadata reservation for delalloc | 3292 | * shrink metadata reservation for delalloc |
3110 | */ | 3293 | */ |
3111 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | 3294 | static int shrink_delalloc(struct btrfs_trans_handle *trans, |
3112 | struct btrfs_root *root, u64 to_reclaim) | 3295 | struct btrfs_root *root, u64 to_reclaim, int sync) |
3113 | { | 3296 | { |
3114 | struct btrfs_block_rsv *block_rsv; | 3297 | struct btrfs_block_rsv *block_rsv; |
3298 | struct btrfs_space_info *space_info; | ||
3115 | u64 reserved; | 3299 | u64 reserved; |
3116 | u64 max_reclaim; | 3300 | u64 max_reclaim; |
3117 | u64 reclaimed = 0; | 3301 | u64 reclaimed = 0; |
3118 | int pause = 1; | 3302 | long time_left; |
3119 | int ret; | 3303 | int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; |
3304 | int loops = 0; | ||
3305 | unsigned long progress; | ||
3120 | 3306 | ||
3121 | block_rsv = &root->fs_info->delalloc_block_rsv; | 3307 | block_rsv = &root->fs_info->delalloc_block_rsv; |
3122 | spin_lock(&block_rsv->lock); | 3308 | space_info = block_rsv->space_info; |
3123 | reserved = block_rsv->reserved; | 3309 | |
3124 | spin_unlock(&block_rsv->lock); | 3310 | smp_mb(); |
3311 | reserved = space_info->bytes_reserved; | ||
3312 | progress = space_info->reservation_progress; | ||
3125 | 3313 | ||
3126 | if (reserved == 0) | 3314 | if (reserved == 0) |
3127 | return 0; | 3315 | return 0; |
3128 | 3316 | ||
3129 | max_reclaim = min(reserved, to_reclaim); | 3317 | max_reclaim = min(reserved, to_reclaim); |
3130 | 3318 | ||
3131 | while (1) { | 3319 | while (loops < 1024) { |
3132 | ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); | 3320 | /* have the flusher threads jump in and do some IO */ |
3133 | if (!ret) { | 3321 | smp_mb(); |
3134 | __set_current_state(TASK_INTERRUPTIBLE); | 3322 | nr_pages = min_t(unsigned long, nr_pages, |
3135 | schedule_timeout(pause); | 3323 | root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); |
3136 | pause <<= 1; | 3324 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); |
3137 | if (pause > HZ / 10) | ||
3138 | pause = HZ / 10; | ||
3139 | } else { | ||
3140 | pause = 1; | ||
3141 | } | ||
3142 | 3325 | ||
3143 | spin_lock(&block_rsv->lock); | 3326 | spin_lock(&space_info->lock); |
3144 | if (reserved > block_rsv->reserved) | 3327 | if (reserved > space_info->bytes_reserved) |
3145 | reclaimed = reserved - block_rsv->reserved; | 3328 | reclaimed += reserved - space_info->bytes_reserved; |
3146 | reserved = block_rsv->reserved; | 3329 | reserved = space_info->bytes_reserved; |
3147 | spin_unlock(&block_rsv->lock); | 3330 | spin_unlock(&space_info->lock); |
3331 | |||
3332 | loops++; | ||
3148 | 3333 | ||
3149 | if (reserved == 0 || reclaimed >= max_reclaim) | 3334 | if (reserved == 0 || reclaimed >= max_reclaim) |
3150 | break; | 3335 | break; |
3151 | 3336 | ||
3152 | if (trans && trans->transaction->blocked) | 3337 | if (trans && trans->transaction->blocked) |
3153 | return -EAGAIN; | 3338 | return -EAGAIN; |
3339 | |||
3340 | time_left = schedule_timeout_interruptible(1); | ||
3341 | |||
3342 | /* We were interrupted, exit */ | ||
3343 | if (time_left) | ||
3344 | break; | ||
3345 | |||
3346 | /* we've kicked the IO a few times, if anything has been freed, | ||
3347 | * exit. There is no sense in looping here for a long time | ||
3348 | * when we really need to commit the transaction, or there are | ||
3349 | * just too many writers without enough free space | ||
3350 | */ | ||
3351 | |||
3352 | if (loops > 3) { | ||
3353 | smp_mb(); | ||
3354 | if (progress != space_info->reservation_progress) | ||
3355 | break; | ||
3356 | } | ||
3357 | |||
3154 | } | 3358 | } |
3155 | return reclaimed >= to_reclaim; | 3359 | return reclaimed >= to_reclaim; |
3156 | } | 3360 | } |
3157 | 3361 | ||
3158 | static int should_retry_reserve(struct btrfs_trans_handle *trans, | 3362 | /* |
3159 | struct btrfs_root *root, | 3363 | * Retries tells us how many times we've called reserve_metadata_bytes. The |
3160 | struct btrfs_block_rsv *block_rsv, | 3364 | * idea is if this is the first call (retries == 0) then we will add to our |
3161 | u64 num_bytes, int *retries) | 3365 | * reserved count if we can't make the allocation in order to hold our place |
3366 | * while we go and try and free up space. That way for retries > 1 we don't try | ||
3367 | * and add space, we just check to see if the amount of unused space is >= the | ||
3368 | * total space, meaning that our reservation is valid. | ||
3369 | * | ||
3370 | * However if we don't intend to retry this reservation, pass -1 as retries so | ||
3371 | * that it short circuits this logic. | ||
3372 | */ | ||
3373 | static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, | ||
3374 | struct btrfs_root *root, | ||
3375 | struct btrfs_block_rsv *block_rsv, | ||
3376 | u64 orig_bytes, int flush) | ||
3162 | { | 3377 | { |
3163 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3378 | struct btrfs_space_info *space_info = block_rsv->space_info; |
3164 | int ret; | 3379 | u64 unused; |
3380 | u64 num_bytes = orig_bytes; | ||
3381 | int retries = 0; | ||
3382 | int ret = 0; | ||
3383 | bool reserved = false; | ||
3384 | bool committed = false; | ||
3165 | 3385 | ||
3166 | if ((*retries) > 2) | 3386 | again: |
3167 | return -ENOSPC; | 3387 | ret = -ENOSPC; |
3388 | if (reserved) | ||
3389 | num_bytes = 0; | ||
3168 | 3390 | ||
3169 | ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); | 3391 | spin_lock(&space_info->lock); |
3170 | if (ret) | 3392 | unused = space_info->bytes_used + space_info->bytes_reserved + |
3171 | return 1; | 3393 | space_info->bytes_pinned + space_info->bytes_readonly + |
3394 | space_info->bytes_may_use; | ||
3172 | 3395 | ||
3173 | if (trans && trans->transaction->in_commit) | 3396 | /* |
3174 | return -ENOSPC; | 3397 | * The idea here is that we've not already over-reserved the block group |
3398 | * then we can go ahead and save our reservation first and then start | ||
3399 | * flushing if we need to. Otherwise if we've already overcommitted | ||
3400 | * lets start flushing stuff first and then come back and try to make | ||
3401 | * our reservation. | ||
3402 | */ | ||
3403 | if (unused <= space_info->total_bytes) { | ||
3404 | unused = space_info->total_bytes - unused; | ||
3405 | if (unused >= num_bytes) { | ||
3406 | if (!reserved) | ||
3407 | space_info->bytes_reserved += orig_bytes; | ||
3408 | ret = 0; | ||
3409 | } else { | ||
3410 | /* | ||
3411 | * Ok set num_bytes to orig_bytes since we aren't | ||
3412 | * overocmmitted, this way we only try and reclaim what | ||
3413 | * we need. | ||
3414 | */ | ||
3415 | num_bytes = orig_bytes; | ||
3416 | } | ||
3417 | } else { | ||
3418 | /* | ||
3419 | * Ok we're over committed, set num_bytes to the overcommitted | ||
3420 | * amount plus the amount of bytes that we need for this | ||
3421 | * reservation. | ||
3422 | */ | ||
3423 | num_bytes = unused - space_info->total_bytes + | ||
3424 | (orig_bytes * (retries + 1)); | ||
3425 | } | ||
3175 | 3426 | ||
3176 | ret = shrink_delalloc(trans, root, num_bytes); | 3427 | /* |
3177 | if (ret) | 3428 | * Couldn't make our reservation, save our place so while we're trying |
3178 | return ret; | 3429 | * to reclaim space we can actually use it instead of somebody else |
3430 | * stealing it from us. | ||
3431 | */ | ||
3432 | if (ret && !reserved) { | ||
3433 | space_info->bytes_reserved += orig_bytes; | ||
3434 | reserved = true; | ||
3435 | } | ||
3179 | 3436 | ||
3180 | spin_lock(&space_info->lock); | ||
3181 | if (space_info->bytes_pinned < num_bytes) | ||
3182 | ret = 1; | ||
3183 | spin_unlock(&space_info->lock); | 3437 | spin_unlock(&space_info->lock); |
3184 | if (ret) | ||
3185 | return -ENOSPC; | ||
3186 | 3438 | ||
3187 | (*retries)++; | 3439 | if (!ret) |
3188 | 3440 | return 0; | |
3189 | if (trans) | ||
3190 | return -EAGAIN; | ||
3191 | 3441 | ||
3192 | trans = btrfs_join_transaction(root, 1); | 3442 | if (!flush) |
3193 | BUG_ON(IS_ERR(trans)); | 3443 | goto out; |
3194 | ret = btrfs_commit_transaction(trans, root); | ||
3195 | BUG_ON(ret); | ||
3196 | 3444 | ||
3197 | return 1; | 3445 | /* |
3198 | } | 3446 | * We do synchronous shrinking since we don't actually unreserve |
3447 | * metadata until after the IO is completed. | ||
3448 | */ | ||
3449 | ret = shrink_delalloc(trans, root, num_bytes, 1); | ||
3450 | if (ret > 0) | ||
3451 | return 0; | ||
3452 | else if (ret < 0) | ||
3453 | goto out; | ||
3199 | 3454 | ||
3200 | static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, | 3455 | /* |
3201 | u64 num_bytes) | 3456 | * So if we were overcommitted it's possible that somebody else flushed |
3202 | { | 3457 | * out enough space and we simply didn't have enough space to reclaim, |
3203 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3458 | * so go back around and try again. |
3204 | u64 unused; | 3459 | */ |
3205 | int ret = -ENOSPC; | 3460 | if (retries < 2) { |
3461 | retries++; | ||
3462 | goto again; | ||
3463 | } | ||
3206 | 3464 | ||
3207 | spin_lock(&space_info->lock); | 3465 | spin_lock(&space_info->lock); |
3208 | unused = space_info->bytes_used + space_info->bytes_reserved + | 3466 | /* |
3209 | space_info->bytes_pinned + space_info->bytes_readonly; | 3467 | * Not enough space to be reclaimed, don't bother committing the |
3468 | * transaction. | ||
3469 | */ | ||
3470 | if (space_info->bytes_pinned < orig_bytes) | ||
3471 | ret = -ENOSPC; | ||
3472 | spin_unlock(&space_info->lock); | ||
3473 | if (ret) | ||
3474 | goto out; | ||
3210 | 3475 | ||
3211 | if (unused < space_info->total_bytes) | 3476 | ret = -EAGAIN; |
3212 | unused = space_info->total_bytes - unused; | 3477 | if (trans || committed) |
3213 | else | 3478 | goto out; |
3214 | unused = 0; | ||
3215 | 3479 | ||
3216 | if (unused >= num_bytes) { | 3480 | ret = -ENOSPC; |
3217 | if (block_rsv->priority >= 10) { | 3481 | trans = btrfs_join_transaction(root); |
3218 | space_info->bytes_reserved += num_bytes; | 3482 | if (IS_ERR(trans)) |
3219 | ret = 0; | 3483 | goto out; |
3220 | } else { | 3484 | ret = btrfs_commit_transaction(trans, root); |
3221 | if ((unused + block_rsv->reserved) * | 3485 | if (!ret) { |
3222 | block_rsv->priority >= | 3486 | trans = NULL; |
3223 | (num_bytes + block_rsv->reserved) * 10) { | 3487 | committed = true; |
3224 | space_info->bytes_reserved += num_bytes; | 3488 | goto again; |
3225 | ret = 0; | 3489 | } |
3226 | } | 3490 | |
3227 | } | 3491 | out: |
3492 | if (reserved) { | ||
3493 | spin_lock(&space_info->lock); | ||
3494 | space_info->bytes_reserved -= orig_bytes; | ||
3495 | spin_unlock(&space_info->lock); | ||
3228 | } | 3496 | } |
3229 | spin_unlock(&space_info->lock); | ||
3230 | 3497 | ||
3231 | return ret; | 3498 | return ret; |
3232 | } | 3499 | } |
@@ -3273,8 +3540,8 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | |||
3273 | spin_unlock(&block_rsv->lock); | 3540 | spin_unlock(&block_rsv->lock); |
3274 | } | 3541 | } |
3275 | 3542 | ||
3276 | void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | 3543 | static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, |
3277 | struct btrfs_block_rsv *dest, u64 num_bytes) | 3544 | struct btrfs_block_rsv *dest, u64 num_bytes) |
3278 | { | 3545 | { |
3279 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3546 | struct btrfs_space_info *space_info = block_rsv->space_info; |
3280 | 3547 | ||
@@ -3293,10 +3560,23 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | |||
3293 | 3560 | ||
3294 | if (num_bytes > 0) { | 3561 | if (num_bytes > 0) { |
3295 | if (dest) { | 3562 | if (dest) { |
3296 | block_rsv_add_bytes(dest, num_bytes, 0); | 3563 | spin_lock(&dest->lock); |
3297 | } else { | 3564 | if (!dest->full) { |
3565 | u64 bytes_to_add; | ||
3566 | |||
3567 | bytes_to_add = dest->size - dest->reserved; | ||
3568 | bytes_to_add = min(num_bytes, bytes_to_add); | ||
3569 | dest->reserved += bytes_to_add; | ||
3570 | if (dest->reserved >= dest->size) | ||
3571 | dest->full = 1; | ||
3572 | num_bytes -= bytes_to_add; | ||
3573 | } | ||
3574 | spin_unlock(&dest->lock); | ||
3575 | } | ||
3576 | if (num_bytes) { | ||
3298 | spin_lock(&space_info->lock); | 3577 | spin_lock(&space_info->lock); |
3299 | space_info->bytes_reserved -= num_bytes; | 3578 | space_info->bytes_reserved -= num_bytes; |
3579 | space_info->reservation_progress++; | ||
3300 | spin_unlock(&space_info->lock); | 3580 | spin_unlock(&space_info->lock); |
3301 | } | 3581 | } |
3302 | } | 3582 | } |
@@ -3328,18 +3608,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | |||
3328 | { | 3608 | { |
3329 | struct btrfs_block_rsv *block_rsv; | 3609 | struct btrfs_block_rsv *block_rsv; |
3330 | struct btrfs_fs_info *fs_info = root->fs_info; | 3610 | struct btrfs_fs_info *fs_info = root->fs_info; |
3331 | u64 alloc_target; | ||
3332 | 3611 | ||
3333 | block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); | 3612 | block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); |
3334 | if (!block_rsv) | 3613 | if (!block_rsv) |
3335 | return NULL; | 3614 | return NULL; |
3336 | 3615 | ||
3337 | btrfs_init_block_rsv(block_rsv); | 3616 | btrfs_init_block_rsv(block_rsv); |
3338 | |||
3339 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3340 | block_rsv->space_info = __find_space_info(fs_info, | 3617 | block_rsv->space_info = __find_space_info(fs_info, |
3341 | BTRFS_BLOCK_GROUP_METADATA); | 3618 | BTRFS_BLOCK_GROUP_METADATA); |
3342 | |||
3343 | return block_rsv; | 3619 | return block_rsv; |
3344 | } | 3620 | } |
3345 | 3621 | ||
@@ -3370,23 +3646,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | |||
3370 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | 3646 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, |
3371 | struct btrfs_root *root, | 3647 | struct btrfs_root *root, |
3372 | struct btrfs_block_rsv *block_rsv, | 3648 | struct btrfs_block_rsv *block_rsv, |
3373 | u64 num_bytes, int *retries) | 3649 | u64 num_bytes) |
3374 | { | 3650 | { |
3375 | int ret; | 3651 | int ret; |
3376 | 3652 | ||
3377 | if (num_bytes == 0) | 3653 | if (num_bytes == 0) |
3378 | return 0; | 3654 | return 0; |
3379 | again: | 3655 | |
3380 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | 3656 | ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); |
3381 | if (!ret) { | 3657 | if (!ret) { |
3382 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | 3658 | block_rsv_add_bytes(block_rsv, num_bytes, 1); |
3383 | return 0; | 3659 | return 0; |
3384 | } | 3660 | } |
3385 | 3661 | ||
3386 | ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries); | ||
3387 | if (ret > 0) | ||
3388 | goto again; | ||
3389 | |||
3390 | return ret; | 3662 | return ret; |
3391 | } | 3663 | } |
3392 | 3664 | ||
@@ -3421,7 +3693,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3421 | return 0; | 3693 | return 0; |
3422 | 3694 | ||
3423 | if (block_rsv->refill_used) { | 3695 | if (block_rsv->refill_used) { |
3424 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | 3696 | ret = reserve_metadata_bytes(trans, root, block_rsv, |
3697 | num_bytes, 0); | ||
3425 | if (!ret) { | 3698 | if (!ret) { |
3426 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | 3699 | block_rsv_add_bytes(block_rsv, num_bytes, 0); |
3427 | return 0; | 3700 | return 0; |
@@ -3432,17 +3705,12 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3432 | if (trans) | 3705 | if (trans) |
3433 | return -EAGAIN; | 3706 | return -EAGAIN; |
3434 | 3707 | ||
3435 | trans = btrfs_join_transaction(root, 1); | 3708 | trans = btrfs_join_transaction(root); |
3436 | BUG_ON(IS_ERR(trans)); | 3709 | BUG_ON(IS_ERR(trans)); |
3437 | ret = btrfs_commit_transaction(trans, root); | 3710 | ret = btrfs_commit_transaction(trans, root); |
3438 | return 0; | 3711 | return 0; |
3439 | } | 3712 | } |
3440 | 3713 | ||
3441 | WARN_ON(1); | ||
3442 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
3443 | block_rsv->size, block_rsv->reserved, | ||
3444 | block_rsv->freed[0], block_rsv->freed[1]); | ||
3445 | |||
3446 | return -ENOSPC; | 3714 | return -ENOSPC; |
3447 | } | 3715 | } |
3448 | 3716 | ||
@@ -3476,23 +3744,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | |||
3476 | u64 meta_used; | 3744 | u64 meta_used; |
3477 | u64 data_used; | 3745 | u64 data_used; |
3478 | int csum_size = btrfs_super_csum_size(&fs_info->super_copy); | 3746 | int csum_size = btrfs_super_csum_size(&fs_info->super_copy); |
3479 | #if 0 | ||
3480 | /* | ||
3481 | * per tree used space accounting can be inaccuracy, so we | ||
3482 | * can't rely on it. | ||
3483 | */ | ||
3484 | spin_lock(&fs_info->extent_root->accounting_lock); | ||
3485 | num_bytes = btrfs_root_used(&fs_info->extent_root->root_item); | ||
3486 | spin_unlock(&fs_info->extent_root->accounting_lock); | ||
3487 | |||
3488 | spin_lock(&fs_info->csum_root->accounting_lock); | ||
3489 | num_bytes += btrfs_root_used(&fs_info->csum_root->root_item); | ||
3490 | spin_unlock(&fs_info->csum_root->accounting_lock); | ||
3491 | 3747 | ||
3492 | spin_lock(&fs_info->tree_root->accounting_lock); | ||
3493 | num_bytes += btrfs_root_used(&fs_info->tree_root->root_item); | ||
3494 | spin_unlock(&fs_info->tree_root->accounting_lock); | ||
3495 | #endif | ||
3496 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); | 3748 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); |
3497 | spin_lock(&sinfo->lock); | 3749 | spin_lock(&sinfo->lock); |
3498 | data_used = sinfo->bytes_used; | 3750 | data_used = sinfo->bytes_used; |
@@ -3500,6 +3752,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | |||
3500 | 3752 | ||
3501 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | 3753 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
3502 | spin_lock(&sinfo->lock); | 3754 | spin_lock(&sinfo->lock); |
3755 | if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) | ||
3756 | data_used = 0; | ||
3503 | meta_used = sinfo->bytes_used; | 3757 | meta_used = sinfo->bytes_used; |
3504 | spin_unlock(&sinfo->lock); | 3758 | spin_unlock(&sinfo->lock); |
3505 | 3759 | ||
@@ -3527,7 +3781,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3527 | block_rsv->size = num_bytes; | 3781 | block_rsv->size = num_bytes; |
3528 | 3782 | ||
3529 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + | 3783 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + |
3530 | sinfo->bytes_reserved + sinfo->bytes_readonly; | 3784 | sinfo->bytes_reserved + sinfo->bytes_readonly + |
3785 | sinfo->bytes_may_use; | ||
3531 | 3786 | ||
3532 | if (sinfo->total_bytes > num_bytes) { | 3787 | if (sinfo->total_bytes > num_bytes) { |
3533 | num_bytes = sinfo->total_bytes - num_bytes; | 3788 | num_bytes = sinfo->total_bytes - num_bytes; |
@@ -3538,13 +3793,11 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3538 | if (block_rsv->reserved >= block_rsv->size) { | 3793 | if (block_rsv->reserved >= block_rsv->size) { |
3539 | num_bytes = block_rsv->reserved - block_rsv->size; | 3794 | num_bytes = block_rsv->reserved - block_rsv->size; |
3540 | sinfo->bytes_reserved -= num_bytes; | 3795 | sinfo->bytes_reserved -= num_bytes; |
3796 | sinfo->reservation_progress++; | ||
3541 | block_rsv->reserved = block_rsv->size; | 3797 | block_rsv->reserved = block_rsv->size; |
3542 | block_rsv->full = 1; | 3798 | block_rsv->full = 1; |
3543 | } | 3799 | } |
3544 | #if 0 | 3800 | |
3545 | printk(KERN_INFO"global block rsv size %llu reserved %llu\n", | ||
3546 | block_rsv->size, block_rsv->reserved); | ||
3547 | #endif | ||
3548 | spin_unlock(&sinfo->lock); | 3801 | spin_unlock(&sinfo->lock); |
3549 | spin_unlock(&block_rsv->lock); | 3802 | spin_unlock(&block_rsv->lock); |
3550 | } | 3803 | } |
@@ -3590,15 +3843,40 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3590 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | 3843 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); |
3591 | } | 3844 | } |
3592 | 3845 | ||
3593 | static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) | 3846 | int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, |
3847 | struct btrfs_root *root, | ||
3848 | struct btrfs_block_rsv *rsv) | ||
3594 | { | 3849 | { |
3595 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | 3850 | struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; |
3596 | 3 * num_items; | 3851 | u64 num_bytes; |
3852 | int ret; | ||
3853 | |||
3854 | /* | ||
3855 | * Truncate should be freeing data, but give us 2 items just in case it | ||
3856 | * needs to use some space. We may want to be smarter about this in the | ||
3857 | * future. | ||
3858 | */ | ||
3859 | num_bytes = btrfs_calc_trans_metadata_size(root, 2); | ||
3860 | |||
3861 | /* We already have enough bytes, just return */ | ||
3862 | if (rsv->reserved >= num_bytes) | ||
3863 | return 0; | ||
3864 | |||
3865 | num_bytes -= rsv->reserved; | ||
3866 | |||
3867 | /* | ||
3868 | * You should have reserved enough space before hand to do this, so this | ||
3869 | * should not fail. | ||
3870 | */ | ||
3871 | ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); | ||
3872 | BUG_ON(ret); | ||
3873 | |||
3874 | return 0; | ||
3597 | } | 3875 | } |
3598 | 3876 | ||
3599 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | 3877 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, |
3600 | struct btrfs_root *root, | 3878 | struct btrfs_root *root, |
3601 | int num_items, int *retries) | 3879 | int num_items) |
3602 | { | 3880 | { |
3603 | u64 num_bytes; | 3881 | u64 num_bytes; |
3604 | int ret; | 3882 | int ret; |
@@ -3606,9 +3884,9 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3606 | if (num_items == 0 || root->fs_info->chunk_root == root) | 3884 | if (num_items == 0 || root->fs_info->chunk_root == root) |
3607 | return 0; | 3885 | return 0; |
3608 | 3886 | ||
3609 | num_bytes = calc_trans_metadata_size(root, num_items); | 3887 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); |
3610 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | 3888 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, |
3611 | num_bytes, retries); | 3889 | num_bytes); |
3612 | if (!ret) { | 3890 | if (!ret) { |
3613 | trans->bytes_reserved += num_bytes; | 3891 | trans->bytes_reserved += num_bytes; |
3614 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 3892 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
@@ -3636,23 +3914,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3636 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; | 3914 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; |
3637 | 3915 | ||
3638 | /* | 3916 | /* |
3639 | * one for deleting orphan item, one for updating inode and | 3917 | * We need to hold space in order to delete our orphan item once we've |
3640 | * two for calling btrfs_truncate_inode_items. | 3918 | * added it, so this takes the reservation so we can release it later |
3641 | * | 3919 | * when we are truly done with the orphan item. |
3642 | * btrfs_truncate_inode_items is a delete operation, it frees | ||
3643 | * more space than it uses in most cases. So two units of | ||
3644 | * metadata space should be enough for calling it many times. | ||
3645 | * If all of the metadata space is used, we can commit | ||
3646 | * transaction and use space it freed. | ||
3647 | */ | 3920 | */ |
3648 | u64 num_bytes = calc_trans_metadata_size(root, 4); | 3921 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
3649 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 3922 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3650 | } | 3923 | } |
3651 | 3924 | ||
3652 | void btrfs_orphan_release_metadata(struct inode *inode) | 3925 | void btrfs_orphan_release_metadata(struct inode *inode) |
3653 | { | 3926 | { |
3654 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3927 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3655 | u64 num_bytes = calc_trans_metadata_size(root, 4); | 3928 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
3656 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); | 3929 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); |
3657 | } | 3930 | } |
3658 | 3931 | ||
@@ -3666,7 +3939,7 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3666 | * two for root back/forward refs, two for directory entries | 3939 | * two for root back/forward refs, two for directory entries |
3667 | * and one for root of the snapshot. | 3940 | * and one for root of the snapshot. |
3668 | */ | 3941 | */ |
3669 | u64 num_bytes = calc_trans_metadata_size(root, 5); | 3942 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); |
3670 | dst_rsv->space_info = src_rsv->space_info; | 3943 | dst_rsv->space_info = src_rsv->space_info; |
3671 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 3944 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3672 | } | 3945 | } |
@@ -3682,43 +3955,37 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3682 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 3955 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
3683 | u64 to_reserve; | 3956 | u64 to_reserve; |
3684 | int nr_extents; | 3957 | int nr_extents; |
3685 | int retries = 0; | 3958 | int reserved_extents; |
3686 | int ret; | 3959 | int ret; |
3687 | 3960 | ||
3688 | if (btrfs_transaction_in_commit(root->fs_info)) | 3961 | if (btrfs_transaction_in_commit(root->fs_info)) |
3689 | schedule_timeout(1); | 3962 | schedule_timeout(1); |
3690 | 3963 | ||
3691 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 3964 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
3692 | again: | 3965 | |
3693 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
3694 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | 3966 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; |
3695 | if (nr_extents > BTRFS_I(inode)->reserved_extents) { | 3967 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); |
3696 | nr_extents -= BTRFS_I(inode)->reserved_extents; | 3968 | |
3697 | to_reserve = calc_trans_metadata_size(root, nr_extents); | 3969 | if (nr_extents > reserved_extents) { |
3970 | nr_extents -= reserved_extents; | ||
3971 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | ||
3698 | } else { | 3972 | } else { |
3699 | nr_extents = 0; | 3973 | nr_extents = 0; |
3700 | to_reserve = 0; | 3974 | to_reserve = 0; |
3701 | } | 3975 | } |
3702 | 3976 | ||
3703 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 3977 | to_reserve += calc_csum_metadata_size(inode, num_bytes); |
3704 | ret = reserve_metadata_bytes(block_rsv, to_reserve); | 3978 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); |
3705 | if (ret) { | 3979 | if (ret) |
3706 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3707 | ret = should_retry_reserve(NULL, root, block_rsv, to_reserve, | ||
3708 | &retries); | ||
3709 | if (ret > 0) | ||
3710 | goto again; | ||
3711 | return ret; | 3980 | return ret; |
3712 | } | ||
3713 | 3981 | ||
3714 | BTRFS_I(inode)->reserved_extents += nr_extents; | 3982 | atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); |
3715 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 3983 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
3716 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3717 | 3984 | ||
3718 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 3985 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
3719 | 3986 | ||
3720 | if (block_rsv->size > 512 * 1024 * 1024) | 3987 | if (block_rsv->size > 512 * 1024 * 1024) |
3721 | shrink_delalloc(NULL, root, to_reserve); | 3988 | shrink_delalloc(NULL, root, to_reserve, 0); |
3722 | 3989 | ||
3723 | return 0; | 3990 | return 0; |
3724 | } | 3991 | } |
@@ -3728,23 +3995,34 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
3728 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3995 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3729 | u64 to_free; | 3996 | u64 to_free; |
3730 | int nr_extents; | 3997 | int nr_extents; |
3998 | int reserved_extents; | ||
3731 | 3999 | ||
3732 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4000 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
3733 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 4001 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
4002 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); | ||
3734 | 4003 | ||
3735 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 4004 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); |
3736 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | 4005 | do { |
3737 | if (nr_extents < BTRFS_I(inode)->reserved_extents) { | 4006 | int old, new; |
3738 | nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; | 4007 | |
3739 | BTRFS_I(inode)->reserved_extents -= nr_extents; | 4008 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); |
3740 | } else { | 4009 | if (nr_extents >= reserved_extents) { |
3741 | nr_extents = 0; | 4010 | nr_extents = 0; |
3742 | } | 4011 | break; |
3743 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 4012 | } |
4013 | old = reserved_extents; | ||
4014 | nr_extents = reserved_extents - nr_extents; | ||
4015 | new = reserved_extents - nr_extents; | ||
4016 | old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, | ||
4017 | reserved_extents, new); | ||
4018 | if (likely(old == reserved_extents)) | ||
4019 | break; | ||
4020 | reserved_extents = old; | ||
4021 | } while (1); | ||
3744 | 4022 | ||
3745 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4023 | to_free = calc_csum_metadata_size(inode, num_bytes); |
3746 | if (nr_extents > 0) | 4024 | if (nr_extents > 0) |
3747 | to_free += calc_trans_metadata_size(root, nr_extents); | 4025 | to_free += btrfs_calc_trans_metadata_size(root, nr_extents); |
3748 | 4026 | ||
3749 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 4027 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
3750 | to_free); | 4028 | to_free); |
@@ -3777,12 +4055,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3777 | struct btrfs_root *root, | 4055 | struct btrfs_root *root, |
3778 | u64 bytenr, u64 num_bytes, int alloc) | 4056 | u64 bytenr, u64 num_bytes, int alloc) |
3779 | { | 4057 | { |
3780 | struct btrfs_block_group_cache *cache; | 4058 | struct btrfs_block_group_cache *cache = NULL; |
3781 | struct btrfs_fs_info *info = root->fs_info; | 4059 | struct btrfs_fs_info *info = root->fs_info; |
3782 | int factor; | ||
3783 | u64 total = num_bytes; | 4060 | u64 total = num_bytes; |
3784 | u64 old_val; | 4061 | u64 old_val; |
3785 | u64 byte_in_group; | 4062 | u64 byte_in_group; |
4063 | int factor; | ||
3786 | 4064 | ||
3787 | /* block accounting for super block */ | 4065 | /* block accounting for super block */ |
3788 | spin_lock(&info->delalloc_lock); | 4066 | spin_lock(&info->delalloc_lock); |
@@ -3804,11 +4082,25 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3804 | factor = 2; | 4082 | factor = 2; |
3805 | else | 4083 | else |
3806 | factor = 1; | 4084 | factor = 1; |
4085 | /* | ||
4086 | * If this block group has free space cache written out, we | ||
4087 | * need to make sure to load it if we are removing space. This | ||
4088 | * is because we need the unpinning stage to actually add the | ||
4089 | * space back to the block group, otherwise we will leak space. | ||
4090 | */ | ||
4091 | if (!alloc && cache->cached == BTRFS_CACHE_NO) | ||
4092 | cache_block_group(cache, trans, NULL, 1); | ||
4093 | |||
3807 | byte_in_group = bytenr - cache->key.objectid; | 4094 | byte_in_group = bytenr - cache->key.objectid; |
3808 | WARN_ON(byte_in_group > cache->key.offset); | 4095 | WARN_ON(byte_in_group > cache->key.offset); |
3809 | 4096 | ||
3810 | spin_lock(&cache->space_info->lock); | 4097 | spin_lock(&cache->space_info->lock); |
3811 | spin_lock(&cache->lock); | 4098 | spin_lock(&cache->lock); |
4099 | |||
4100 | if (btrfs_super_cache_generation(&info->super_copy) != 0 && | ||
4101 | cache->disk_cache_state < BTRFS_DC_CLEAR) | ||
4102 | cache->disk_cache_state = BTRFS_DC_CLEAR; | ||
4103 | |||
3812 | cache->dirty = 1; | 4104 | cache->dirty = 1; |
3813 | old_val = btrfs_block_group_used(&cache->item); | 4105 | old_val = btrfs_block_group_used(&cache->item); |
3814 | num_bytes = min(total, cache->key.offset - byte_in_group); | 4106 | num_bytes = min(total, cache->key.offset - byte_in_group); |
@@ -3817,6 +4109,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3817 | btrfs_set_block_group_used(&cache->item, old_val); | 4109 | btrfs_set_block_group_used(&cache->item, old_val); |
3818 | cache->reserved -= num_bytes; | 4110 | cache->reserved -= num_bytes; |
3819 | cache->space_info->bytes_reserved -= num_bytes; | 4111 | cache->space_info->bytes_reserved -= num_bytes; |
4112 | cache->space_info->reservation_progress++; | ||
3820 | cache->space_info->bytes_used += num_bytes; | 4113 | cache->space_info->bytes_used += num_bytes; |
3821 | cache->space_info->disk_used += num_bytes * factor; | 4114 | cache->space_info->disk_used += num_bytes * factor; |
3822 | spin_unlock(&cache->lock); | 4115 | spin_unlock(&cache->lock); |
@@ -3868,6 +4161,7 @@ static int pin_down_extent(struct btrfs_root *root, | |||
3868 | if (reserved) { | 4161 | if (reserved) { |
3869 | cache->reserved -= num_bytes; | 4162 | cache->reserved -= num_bytes; |
3870 | cache->space_info->bytes_reserved -= num_bytes; | 4163 | cache->space_info->bytes_reserved -= num_bytes; |
4164 | cache->space_info->reservation_progress++; | ||
3871 | } | 4165 | } |
3872 | spin_unlock(&cache->lock); | 4166 | spin_unlock(&cache->lock); |
3873 | spin_unlock(&cache->space_info->lock); | 4167 | spin_unlock(&cache->space_info->lock); |
@@ -3898,8 +4192,8 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
3898 | * update size of reserved extents. this function may return -EAGAIN | 4192 | * update size of reserved extents. this function may return -EAGAIN |
3899 | * if 'reserve' is true or 'sinfo' is false. | 4193 | * if 'reserve' is true or 'sinfo' is false. |
3900 | */ | 4194 | */ |
3901 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | 4195 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, |
3902 | u64 num_bytes, int reserve, int sinfo) | 4196 | u64 num_bytes, int reserve, int sinfo) |
3903 | { | 4197 | { |
3904 | int ret = 0; | 4198 | int ret = 0; |
3905 | if (sinfo) { | 4199 | if (sinfo) { |
@@ -3918,6 +4212,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | |||
3918 | space_info->bytes_readonly += num_bytes; | 4212 | space_info->bytes_readonly += num_bytes; |
3919 | cache->reserved -= num_bytes; | 4213 | cache->reserved -= num_bytes; |
3920 | space_info->bytes_reserved -= num_bytes; | 4214 | space_info->bytes_reserved -= num_bytes; |
4215 | space_info->reservation_progress++; | ||
3921 | } | 4216 | } |
3922 | spin_unlock(&cache->lock); | 4217 | spin_unlock(&cache->lock); |
3923 | spin_unlock(&space_info->lock); | 4218 | spin_unlock(&space_info->lock); |
@@ -4037,7 +4332,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
4037 | if (ret) | 4332 | if (ret) |
4038 | break; | 4333 | break; |
4039 | 4334 | ||
4040 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 4335 | if (btrfs_test_opt(root, DISCARD)) |
4336 | ret = btrfs_discard_extent(root, start, | ||
4337 | end + 1 - start, NULL); | ||
4041 | 4338 | ||
4042 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 4339 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
4043 | unpin_extent_range(root, start, end); | 4340 | unpin_extent_range(root, start, end); |
@@ -4134,7 +4431,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4134 | NULL, refs_to_drop, | 4431 | NULL, refs_to_drop, |
4135 | is_data); | 4432 | is_data); |
4136 | BUG_ON(ret); | 4433 | BUG_ON(ret); |
4137 | btrfs_release_path(extent_root, path); | 4434 | btrfs_release_path(path); |
4138 | path->leave_spinning = 1; | 4435 | path->leave_spinning = 1; |
4139 | 4436 | ||
4140 | key.objectid = bytenr; | 4437 | key.objectid = bytenr; |
@@ -4173,7 +4470,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4173 | owner_objectid, 0); | 4470 | owner_objectid, 0); |
4174 | BUG_ON(ret < 0); | 4471 | BUG_ON(ret < 0); |
4175 | 4472 | ||
4176 | btrfs_release_path(extent_root, path); | 4473 | btrfs_release_path(path); |
4177 | path->leave_spinning = 1; | 4474 | path->leave_spinning = 1; |
4178 | 4475 | ||
4179 | key.objectid = bytenr; | 4476 | key.objectid = bytenr; |
@@ -4243,7 +4540,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4243 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 4540 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
4244 | num_to_del); | 4541 | num_to_del); |
4245 | BUG_ON(ret); | 4542 | BUG_ON(ret); |
4246 | btrfs_release_path(extent_root, path); | 4543 | btrfs_release_path(path); |
4247 | 4544 | ||
4248 | if (is_data) { | 4545 | if (is_data) { |
4249 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 4546 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
@@ -4378,10 +4675,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
4378 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | 4675 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); |
4379 | 4676 | ||
4380 | btrfs_add_free_space(cache, buf->start, buf->len); | 4677 | btrfs_add_free_space(cache, buf->start, buf->len); |
4381 | ret = update_reserved_bytes(cache, buf->len, 0, 0); | 4678 | ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); |
4382 | if (ret == -EAGAIN) { | 4679 | if (ret == -EAGAIN) { |
4383 | /* block group became read-only */ | 4680 | /* block group became read-only */ |
4384 | update_reserved_bytes(cache, buf->len, 0, 1); | 4681 | btrfs_update_reserved_bytes(cache, buf->len, 0, 1); |
4385 | goto out; | 4682 | goto out; |
4386 | } | 4683 | } |
4387 | 4684 | ||
@@ -4396,6 +4693,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
4396 | if (ret) { | 4693 | if (ret) { |
4397 | spin_lock(&cache->space_info->lock); | 4694 | spin_lock(&cache->space_info->lock); |
4398 | cache->space_info->bytes_reserved -= buf->len; | 4695 | cache->space_info->bytes_reserved -= buf->len; |
4696 | cache->space_info->reservation_progress++; | ||
4399 | spin_unlock(&cache->space_info->lock); | 4697 | spin_unlock(&cache->space_info->lock); |
4400 | } | 4698 | } |
4401 | goto out; | 4699 | goto out; |
@@ -4417,6 +4715,11 @@ pin: | |||
4417 | } | 4715 | } |
4418 | } | 4716 | } |
4419 | out: | 4717 | out: |
4718 | /* | ||
4719 | * Deleting the buffer, clear the corrupt flag since it doesn't matter | ||
4720 | * anymore. | ||
4721 | */ | ||
4722 | clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); | ||
4420 | btrfs_put_block_group(cache); | 4723 | btrfs_put_block_group(cache); |
4421 | } | 4724 | } |
4422 | 4725 | ||
@@ -4480,7 +4783,7 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | |||
4480 | return 0; | 4783 | return 0; |
4481 | 4784 | ||
4482 | wait_event(caching_ctl->wait, block_group_cache_done(cache) || | 4785 | wait_event(caching_ctl->wait, block_group_cache_done(cache) || |
4483 | (cache->free_space >= num_bytes)); | 4786 | (cache->free_space_ctl->free_space >= num_bytes)); |
4484 | 4787 | ||
4485 | put_caching_control(caching_ctl); | 4788 | put_caching_control(caching_ctl); |
4486 | return 0; | 4789 | return 0; |
@@ -4539,7 +4842,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4539 | u64 num_bytes, u64 empty_size, | 4842 | u64 num_bytes, u64 empty_size, |
4540 | u64 search_start, u64 search_end, | 4843 | u64 search_start, u64 search_end, |
4541 | u64 hint_byte, struct btrfs_key *ins, | 4844 | u64 hint_byte, struct btrfs_key *ins, |
4542 | int data) | 4845 | u64 data) |
4543 | { | 4846 | { |
4544 | int ret = 0; | 4847 | int ret = 0; |
4545 | struct btrfs_root *root = orig_root->fs_info->extent_root; | 4848 | struct btrfs_root *root = orig_root->fs_info->extent_root; |
@@ -4555,6 +4858,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4555 | bool found_uncached_bg = false; | 4858 | bool found_uncached_bg = false; |
4556 | bool failed_cluster_refill = false; | 4859 | bool failed_cluster_refill = false; |
4557 | bool failed_alloc = false; | 4860 | bool failed_alloc = false; |
4861 | bool use_cluster = true; | ||
4558 | u64 ideal_cache_percent = 0; | 4862 | u64 ideal_cache_percent = 0; |
4559 | u64 ideal_cache_offset = 0; | 4863 | u64 ideal_cache_offset = 0; |
4560 | 4864 | ||
@@ -4565,20 +4869,28 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4565 | 4869 | ||
4566 | space_info = __find_space_info(root->fs_info, data); | 4870 | space_info = __find_space_info(root->fs_info, data); |
4567 | if (!space_info) { | 4871 | if (!space_info) { |
4568 | printk(KERN_ERR "No space info for %d\n", data); | 4872 | printk(KERN_ERR "No space info for %llu\n", data); |
4569 | return -ENOSPC; | 4873 | return -ENOSPC; |
4570 | } | 4874 | } |
4571 | 4875 | ||
4876 | /* | ||
4877 | * If the space info is for both data and metadata it means we have a | ||
4878 | * small filesystem and we can't use the clustering stuff. | ||
4879 | */ | ||
4880 | if (btrfs_mixed_space_info(space_info)) | ||
4881 | use_cluster = false; | ||
4882 | |||
4572 | if (orig_root->ref_cows || empty_size) | 4883 | if (orig_root->ref_cows || empty_size) |
4573 | allowed_chunk_alloc = 1; | 4884 | allowed_chunk_alloc = 1; |
4574 | 4885 | ||
4575 | if (data & BTRFS_BLOCK_GROUP_METADATA) { | 4886 | if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { |
4576 | last_ptr = &root->fs_info->meta_alloc_cluster; | 4887 | last_ptr = &root->fs_info->meta_alloc_cluster; |
4577 | if (!btrfs_test_opt(root, SSD)) | 4888 | if (!btrfs_test_opt(root, SSD)) |
4578 | empty_cluster = 64 * 1024; | 4889 | empty_cluster = 64 * 1024; |
4579 | } | 4890 | } |
4580 | 4891 | ||
4581 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { | 4892 | if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster && |
4893 | btrfs_test_opt(root, SSD)) { | ||
4582 | last_ptr = &root->fs_info->data_alloc_cluster; | 4894 | last_ptr = &root->fs_info->data_alloc_cluster; |
4583 | } | 4895 | } |
4584 | 4896 | ||
@@ -4638,10 +4950,34 @@ search: | |||
4638 | btrfs_get_block_group(block_group); | 4950 | btrfs_get_block_group(block_group); |
4639 | search_start = block_group->key.objectid; | 4951 | search_start = block_group->key.objectid; |
4640 | 4952 | ||
4953 | /* | ||
4954 | * this can happen if we end up cycling through all the | ||
4955 | * raid types, but we want to make sure we only allocate | ||
4956 | * for the proper type. | ||
4957 | */ | ||
4958 | if (!block_group_bits(block_group, data)) { | ||
4959 | u64 extra = BTRFS_BLOCK_GROUP_DUP | | ||
4960 | BTRFS_BLOCK_GROUP_RAID1 | | ||
4961 | BTRFS_BLOCK_GROUP_RAID10; | ||
4962 | |||
4963 | /* | ||
4964 | * if they asked for extra copies and this block group | ||
4965 | * doesn't provide them, bail. This does allow us to | ||
4966 | * fill raid0 from raid1. | ||
4967 | */ | ||
4968 | if ((data & extra) && !(block_group->flags & extra)) | ||
4969 | goto loop; | ||
4970 | } | ||
4971 | |||
4641 | have_block_group: | 4972 | have_block_group: |
4642 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 4973 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
4643 | u64 free_percent; | 4974 | u64 free_percent; |
4644 | 4975 | ||
4976 | ret = cache_block_group(block_group, trans, | ||
4977 | orig_root, 1); | ||
4978 | if (block_group->cached == BTRFS_CACHE_FINISHED) | ||
4979 | goto have_block_group; | ||
4980 | |||
4645 | free_percent = btrfs_block_group_used(&block_group->item); | 4981 | free_percent = btrfs_block_group_used(&block_group->item); |
4646 | free_percent *= 100; | 4982 | free_percent *= 100; |
4647 | free_percent = div64_u64(free_percent, | 4983 | free_percent = div64_u64(free_percent, |
@@ -4662,7 +4998,8 @@ have_block_group: | |||
4662 | if (loop > LOOP_CACHING_NOWAIT || | 4998 | if (loop > LOOP_CACHING_NOWAIT || |
4663 | (loop > LOOP_FIND_IDEAL && | 4999 | (loop > LOOP_FIND_IDEAL && |
4664 | atomic_read(&space_info->caching_threads) < 2)) { | 5000 | atomic_read(&space_info->caching_threads) < 2)) { |
4665 | ret = cache_block_group(block_group); | 5001 | ret = cache_block_group(block_group, trans, |
5002 | orig_root, 0); | ||
4666 | BUG_ON(ret); | 5003 | BUG_ON(ret); |
4667 | } | 5004 | } |
4668 | found_uncached_bg = true; | 5005 | found_uncached_bg = true; |
@@ -4682,6 +5019,15 @@ have_block_group: | |||
4682 | if (unlikely(block_group->ro)) | 5019 | if (unlikely(block_group->ro)) |
4683 | goto loop; | 5020 | goto loop; |
4684 | 5021 | ||
5022 | spin_lock(&block_group->free_space_ctl->tree_lock); | ||
5023 | if (cached && | ||
5024 | block_group->free_space_ctl->free_space < | ||
5025 | num_bytes + empty_size) { | ||
5026 | spin_unlock(&block_group->free_space_ctl->tree_lock); | ||
5027 | goto loop; | ||
5028 | } | ||
5029 | spin_unlock(&block_group->free_space_ctl->tree_lock); | ||
5030 | |||
4685 | /* | 5031 | /* |
4686 | * Ok we want to try and use the cluster allocator, so lets look | 5032 | * Ok we want to try and use the cluster allocator, so lets look |
4687 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will | 5033 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will |
@@ -4830,7 +5176,7 @@ checks: | |||
4830 | search_start - offset); | 5176 | search_start - offset); |
4831 | BUG_ON(offset > search_start); | 5177 | BUG_ON(offset > search_start); |
4832 | 5178 | ||
4833 | ret = update_reserved_bytes(block_group, num_bytes, 1, | 5179 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, |
4834 | (data & BTRFS_BLOCK_GROUP_DATA)); | 5180 | (data & BTRFS_BLOCK_GROUP_DATA)); |
4835 | if (ret == -EAGAIN) { | 5181 | if (ret == -EAGAIN) { |
4836 | btrfs_add_free_space(block_group, offset, num_bytes); | 5182 | btrfs_add_free_space(block_group, offset, num_bytes); |
@@ -4845,6 +5191,7 @@ checks: | |||
4845 | btrfs_add_free_space(block_group, offset, | 5191 | btrfs_add_free_space(block_group, offset, |
4846 | search_start - offset); | 5192 | search_start - offset); |
4847 | BUG_ON(offset > search_start); | 5193 | BUG_ON(offset > search_start); |
5194 | btrfs_put_block_group(block_group); | ||
4848 | break; | 5195 | break; |
4849 | loop: | 5196 | loop: |
4850 | failed_cluster_refill = false; | 5197 | failed_cluster_refill = false; |
@@ -4867,9 +5214,7 @@ loop: | |||
4867 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try | 5214 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try |
4868 | * again | 5215 | * again |
4869 | */ | 5216 | */ |
4870 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 5217 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { |
4871 | (found_uncached_bg || empty_size || empty_cluster || | ||
4872 | allowed_chunk_alloc)) { | ||
4873 | index = 0; | 5218 | index = 0; |
4874 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 5219 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
4875 | found_uncached_bg = false; | 5220 | found_uncached_bg = false; |
@@ -4909,40 +5254,39 @@ loop: | |||
4909 | goto search; | 5254 | goto search; |
4910 | } | 5255 | } |
4911 | 5256 | ||
4912 | if (loop < LOOP_CACHING_WAIT) { | 5257 | loop++; |
4913 | loop++; | ||
4914 | goto search; | ||
4915 | } | ||
4916 | 5258 | ||
4917 | if (loop == LOOP_ALLOC_CHUNK) { | 5259 | if (loop == LOOP_ALLOC_CHUNK) { |
4918 | empty_size = 0; | 5260 | if (allowed_chunk_alloc) { |
4919 | empty_cluster = 0; | 5261 | ret = do_chunk_alloc(trans, root, num_bytes + |
4920 | } | 5262 | 2 * 1024 * 1024, data, |
5263 | CHUNK_ALLOC_LIMITED); | ||
5264 | allowed_chunk_alloc = 0; | ||
5265 | if (ret == 1) | ||
5266 | done_chunk_alloc = 1; | ||
5267 | } else if (!done_chunk_alloc && | ||
5268 | space_info->force_alloc == | ||
5269 | CHUNK_ALLOC_NO_FORCE) { | ||
5270 | space_info->force_alloc = CHUNK_ALLOC_LIMITED; | ||
5271 | } | ||
4921 | 5272 | ||
4922 | if (allowed_chunk_alloc) { | 5273 | /* |
4923 | ret = do_chunk_alloc(trans, root, num_bytes + | 5274 | * We didn't allocate a chunk, go ahead and drop the |
4924 | 2 * 1024 * 1024, data, 1); | 5275 | * empty size and loop again. |
4925 | allowed_chunk_alloc = 0; | 5276 | */ |
4926 | done_chunk_alloc = 1; | 5277 | if (!done_chunk_alloc) |
4927 | } else if (!done_chunk_alloc) { | 5278 | loop = LOOP_NO_EMPTY_SIZE; |
4928 | space_info->force_alloc = 1; | ||
4929 | } | 5279 | } |
4930 | 5280 | ||
4931 | if (loop < LOOP_NO_EMPTY_SIZE) { | 5281 | if (loop == LOOP_NO_EMPTY_SIZE) { |
4932 | loop++; | 5282 | empty_size = 0; |
4933 | goto search; | 5283 | empty_cluster = 0; |
4934 | } | 5284 | } |
4935 | ret = -ENOSPC; | 5285 | |
5286 | goto search; | ||
4936 | } else if (!ins->objectid) { | 5287 | } else if (!ins->objectid) { |
4937 | ret = -ENOSPC; | 5288 | ret = -ENOSPC; |
4938 | } | 5289 | } else if (ins->objectid) { |
4939 | |||
4940 | /* we found what we needed */ | ||
4941 | if (ins->objectid) { | ||
4942 | if (!(data & BTRFS_BLOCK_GROUP_DATA)) | ||
4943 | trans->block_group = block_group->key.objectid; | ||
4944 | |||
4945 | btrfs_put_block_group(block_group); | ||
4946 | ret = 0; | 5290 | ret = 0; |
4947 | } | 5291 | } |
4948 | 5292 | ||
@@ -5011,7 +5355,8 @@ again: | |||
5011 | */ | 5355 | */ |
5012 | if (empty_size || root->ref_cows) | 5356 | if (empty_size || root->ref_cows) |
5013 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 5357 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
5014 | num_bytes + 2 * 1024 * 1024, data, 0); | 5358 | num_bytes + 2 * 1024 * 1024, data, |
5359 | CHUNK_ALLOC_NO_FORCE); | ||
5015 | 5360 | ||
5016 | WARN_ON(num_bytes < root->sectorsize); | 5361 | WARN_ON(num_bytes < root->sectorsize); |
5017 | ret = find_free_extent(trans, root, num_bytes, empty_size, | 5362 | ret = find_free_extent(trans, root, num_bytes, empty_size, |
@@ -5023,10 +5368,10 @@ again: | |||
5023 | num_bytes = num_bytes & ~(root->sectorsize - 1); | 5368 | num_bytes = num_bytes & ~(root->sectorsize - 1); |
5024 | num_bytes = max(num_bytes, min_alloc_size); | 5369 | num_bytes = max(num_bytes, min_alloc_size); |
5025 | do_chunk_alloc(trans, root->fs_info->extent_root, | 5370 | do_chunk_alloc(trans, root->fs_info->extent_root, |
5026 | num_bytes, data, 1); | 5371 | num_bytes, data, CHUNK_ALLOC_FORCE); |
5027 | goto again; | 5372 | goto again; |
5028 | } | 5373 | } |
5029 | if (ret == -ENOSPC) { | 5374 | if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { |
5030 | struct btrfs_space_info *sinfo; | 5375 | struct btrfs_space_info *sinfo; |
5031 | 5376 | ||
5032 | sinfo = __find_space_info(root->fs_info, data); | 5377 | sinfo = __find_space_info(root->fs_info, data); |
@@ -5036,6 +5381,8 @@ again: | |||
5036 | dump_space_info(sinfo, num_bytes, 1); | 5381 | dump_space_info(sinfo, num_bytes, 1); |
5037 | } | 5382 | } |
5038 | 5383 | ||
5384 | trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); | ||
5385 | |||
5039 | return ret; | 5386 | return ret; |
5040 | } | 5387 | } |
5041 | 5388 | ||
@@ -5051,12 +5398,15 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
5051 | return -ENOSPC; | 5398 | return -ENOSPC; |
5052 | } | 5399 | } |
5053 | 5400 | ||
5054 | ret = btrfs_discard_extent(root, start, len); | 5401 | if (btrfs_test_opt(root, DISCARD)) |
5402 | ret = btrfs_discard_extent(root, start, len, NULL); | ||
5055 | 5403 | ||
5056 | btrfs_add_free_space(cache, start, len); | 5404 | btrfs_add_free_space(cache, start, len); |
5057 | update_reserved_bytes(cache, len, 0, 1); | 5405 | btrfs_update_reserved_bytes(cache, len, 0, 1); |
5058 | btrfs_put_block_group(cache); | 5406 | btrfs_put_block_group(cache); |
5059 | 5407 | ||
5408 | trace_btrfs_reserved_extent_free(root, start, len); | ||
5409 | |||
5060 | return ret; | 5410 | return ret; |
5061 | } | 5411 | } |
5062 | 5412 | ||
@@ -5083,7 +5433,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
5083 | size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); | 5433 | size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); |
5084 | 5434 | ||
5085 | path = btrfs_alloc_path(); | 5435 | path = btrfs_alloc_path(); |
5086 | BUG_ON(!path); | 5436 | if (!path) |
5437 | return -ENOMEM; | ||
5087 | 5438 | ||
5088 | path->leave_spinning = 1; | 5439 | path->leave_spinning = 1; |
5089 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, | 5440 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, |
@@ -5219,7 +5570,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
5219 | u64 num_bytes = ins->offset; | 5570 | u64 num_bytes = ins->offset; |
5220 | 5571 | ||
5221 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 5572 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
5222 | cache_block_group(block_group); | 5573 | cache_block_group(block_group, trans, NULL, 0); |
5223 | caching_ctl = get_caching_control(block_group); | 5574 | caching_ctl = get_caching_control(block_group); |
5224 | 5575 | ||
5225 | if (!caching_ctl) { | 5576 | if (!caching_ctl) { |
@@ -5253,7 +5604,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
5253 | put_caching_control(caching_ctl); | 5604 | put_caching_control(caching_ctl); |
5254 | } | 5605 | } |
5255 | 5606 | ||
5256 | ret = update_reserved_bytes(block_group, ins->offset, 1, 1); | 5607 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); |
5257 | BUG_ON(ret); | 5608 | BUG_ON(ret); |
5258 | btrfs_put_block_group(block_group); | 5609 | btrfs_put_block_group(block_group); |
5259 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5610 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
@@ -5304,25 +5655,47 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
5304 | struct btrfs_root *root, u32 blocksize) | 5655 | struct btrfs_root *root, u32 blocksize) |
5305 | { | 5656 | { |
5306 | struct btrfs_block_rsv *block_rsv; | 5657 | struct btrfs_block_rsv *block_rsv; |
5658 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
5307 | int ret; | 5659 | int ret; |
5308 | 5660 | ||
5309 | block_rsv = get_block_rsv(trans, root); | 5661 | block_rsv = get_block_rsv(trans, root); |
5310 | 5662 | ||
5311 | if (block_rsv->size == 0) { | 5663 | if (block_rsv->size == 0) { |
5312 | ret = reserve_metadata_bytes(block_rsv, blocksize); | 5664 | ret = reserve_metadata_bytes(trans, root, block_rsv, |
5313 | if (ret) | 5665 | blocksize, 0); |
5666 | /* | ||
5667 | * If we couldn't reserve metadata bytes try and use some from | ||
5668 | * the global reserve. | ||
5669 | */ | ||
5670 | if (ret && block_rsv != global_rsv) { | ||
5671 | ret = block_rsv_use_bytes(global_rsv, blocksize); | ||
5672 | if (!ret) | ||
5673 | return global_rsv; | ||
5314 | return ERR_PTR(ret); | 5674 | return ERR_PTR(ret); |
5675 | } else if (ret) { | ||
5676 | return ERR_PTR(ret); | ||
5677 | } | ||
5315 | return block_rsv; | 5678 | return block_rsv; |
5316 | } | 5679 | } |
5317 | 5680 | ||
5318 | ret = block_rsv_use_bytes(block_rsv, blocksize); | 5681 | ret = block_rsv_use_bytes(block_rsv, blocksize); |
5319 | if (!ret) | 5682 | if (!ret) |
5320 | return block_rsv; | 5683 | return block_rsv; |
5321 | 5684 | if (ret) { | |
5322 | WARN_ON(1); | 5685 | WARN_ON(1); |
5323 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | 5686 | ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, |
5324 | block_rsv->size, block_rsv->reserved, | 5687 | 0); |
5325 | block_rsv->freed[0], block_rsv->freed[1]); | 5688 | if (!ret) { |
5689 | spin_lock(&block_rsv->lock); | ||
5690 | block_rsv->size += blocksize; | ||
5691 | spin_unlock(&block_rsv->lock); | ||
5692 | return block_rsv; | ||
5693 | } else if (ret && block_rsv != global_rsv) { | ||
5694 | ret = block_rsv_use_bytes(global_rsv, blocksize); | ||
5695 | if (!ret) | ||
5696 | return global_rsv; | ||
5697 | } | ||
5698 | } | ||
5326 | 5699 | ||
5327 | return ERR_PTR(-ENOSPC); | 5700 | return ERR_PTR(-ENOSPC); |
5328 | } | 5701 | } |
@@ -5422,7 +5795,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
5422 | u64 generation; | 5795 | u64 generation; |
5423 | u64 refs; | 5796 | u64 refs; |
5424 | u64 flags; | 5797 | u64 flags; |
5425 | u64 last = 0; | ||
5426 | u32 nritems; | 5798 | u32 nritems; |
5427 | u32 blocksize; | 5799 | u32 blocksize; |
5428 | struct btrfs_key key; | 5800 | struct btrfs_key key; |
@@ -5490,7 +5862,6 @@ reada: | |||
5490 | generation); | 5862 | generation); |
5491 | if (ret) | 5863 | if (ret) |
5492 | break; | 5864 | break; |
5493 | last = bytenr + blocksize; | ||
5494 | nread++; | 5865 | nread++; |
5495 | } | 5866 | } |
5496 | wc->reada_slot = slot; | 5867 | wc->reada_slot = slot; |
@@ -5666,6 +6037,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
5666 | if (reada && level == 1) | 6037 | if (reada && level == 1) |
5667 | reada_walk_down(trans, root, wc, path); | 6038 | reada_walk_down(trans, root, wc, path); |
5668 | next = read_tree_block(root, bytenr, blocksize, generation); | 6039 | next = read_tree_block(root, bytenr, blocksize, generation); |
6040 | if (!next) | ||
6041 | return -EIO; | ||
5669 | btrfs_tree_lock(next); | 6042 | btrfs_tree_lock(next); |
5670 | btrfs_set_lock_blocking(next); | 6043 | btrfs_set_lock_blocking(next); |
5671 | } | 6044 | } |
@@ -5898,6 +6271,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
5898 | BUG_ON(!wc); | 6271 | BUG_ON(!wc); |
5899 | 6272 | ||
5900 | trans = btrfs_start_transaction(tree_root, 0); | 6273 | trans = btrfs_start_transaction(tree_root, 0); |
6274 | BUG_ON(IS_ERR(trans)); | ||
6275 | |||
5901 | if (block_rsv) | 6276 | if (block_rsv) |
5902 | trans->block_rsv = block_rsv; | 6277 | trans->block_rsv = block_rsv; |
5903 | 6278 | ||
@@ -5995,11 +6370,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
5995 | 6370 | ||
5996 | btrfs_end_transaction_throttle(trans, tree_root); | 6371 | btrfs_end_transaction_throttle(trans, tree_root); |
5997 | trans = btrfs_start_transaction(tree_root, 0); | 6372 | trans = btrfs_start_transaction(tree_root, 0); |
6373 | BUG_ON(IS_ERR(trans)); | ||
5998 | if (block_rsv) | 6374 | if (block_rsv) |
5999 | trans->block_rsv = block_rsv; | 6375 | trans->block_rsv = block_rsv; |
6000 | } | 6376 | } |
6001 | } | 6377 | } |
6002 | btrfs_release_path(root, path); | 6378 | btrfs_release_path(path); |
6003 | BUG_ON(err); | 6379 | BUG_ON(err); |
6004 | 6380 | ||
6005 | ret = btrfs_del_root(trans, tree_root, &root->root_key); | 6381 | ret = btrfs_del_root(trans, tree_root, &root->root_key); |
@@ -6010,9 +6386,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6010 | NULL, NULL); | 6386 | NULL, NULL); |
6011 | BUG_ON(ret < 0); | 6387 | BUG_ON(ret < 0); |
6012 | if (ret > 0) { | 6388 | if (ret > 0) { |
6013 | ret = btrfs_del_orphan_item(trans, tree_root, | 6389 | /* if we fail to delete the orphan item this time |
6014 | root->root_key.objectid); | 6390 | * around, it'll get picked up the next time. |
6015 | BUG_ON(ret); | 6391 | * |
6392 | * The most common failure here is just -ENOENT. | ||
6393 | */ | ||
6394 | btrfs_del_orphan_item(trans, tree_root, | ||
6395 | root->root_key.objectid); | ||
6016 | } | 6396 | } |
6017 | } | 6397 | } |
6018 | 6398 | ||
@@ -6050,10 +6430,14 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
6050 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | 6430 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); |
6051 | 6431 | ||
6052 | path = btrfs_alloc_path(); | 6432 | path = btrfs_alloc_path(); |
6053 | BUG_ON(!path); | 6433 | if (!path) |
6434 | return -ENOMEM; | ||
6054 | 6435 | ||
6055 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 6436 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
6056 | BUG_ON(!wc); | 6437 | if (!wc) { |
6438 | btrfs_free_path(path); | ||
6439 | return -ENOMEM; | ||
6440 | } | ||
6057 | 6441 | ||
6058 | btrfs_assert_tree_locked(parent); | 6442 | btrfs_assert_tree_locked(parent); |
6059 | parent_level = btrfs_header_level(parent); | 6443 | parent_level = btrfs_header_level(parent); |
@@ -6095,1500 +6479,20 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
6095 | return ret; | 6479 | return ret; |
6096 | } | 6480 | } |
6097 | 6481 | ||
6098 | #if 0 | ||
6099 | static unsigned long calc_ra(unsigned long start, unsigned long last, | ||
6100 | unsigned long nr) | ||
6101 | { | ||
6102 | return min(last, start + nr - 1); | ||
6103 | } | ||
6104 | |||
6105 | static noinline int relocate_inode_pages(struct inode *inode, u64 start, | ||
6106 | u64 len) | ||
6107 | { | ||
6108 | u64 page_start; | ||
6109 | u64 page_end; | ||
6110 | unsigned long first_index; | ||
6111 | unsigned long last_index; | ||
6112 | unsigned long i; | ||
6113 | struct page *page; | ||
6114 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
6115 | struct file_ra_state *ra; | ||
6116 | struct btrfs_ordered_extent *ordered; | ||
6117 | unsigned int total_read = 0; | ||
6118 | unsigned int total_dirty = 0; | ||
6119 | int ret = 0; | ||
6120 | |||
6121 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | ||
6122 | |||
6123 | mutex_lock(&inode->i_mutex); | ||
6124 | first_index = start >> PAGE_CACHE_SHIFT; | ||
6125 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; | ||
6126 | |||
6127 | /* make sure the dirty trick played by the caller work */ | ||
6128 | ret = invalidate_inode_pages2_range(inode->i_mapping, | ||
6129 | first_index, last_index); | ||
6130 | if (ret) | ||
6131 | goto out_unlock; | ||
6132 | |||
6133 | file_ra_state_init(ra, inode->i_mapping); | ||
6134 | |||
6135 | for (i = first_index ; i <= last_index; i++) { | ||
6136 | if (total_read % ra->ra_pages == 0) { | ||
6137 | btrfs_force_ra(inode->i_mapping, ra, NULL, i, | ||
6138 | calc_ra(i, last_index, ra->ra_pages)); | ||
6139 | } | ||
6140 | total_read++; | ||
6141 | again: | ||
6142 | if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) | ||
6143 | BUG_ON(1); | ||
6144 | page = grab_cache_page(inode->i_mapping, i); | ||
6145 | if (!page) { | ||
6146 | ret = -ENOMEM; | ||
6147 | goto out_unlock; | ||
6148 | } | ||
6149 | if (!PageUptodate(page)) { | ||
6150 | btrfs_readpage(NULL, page); | ||
6151 | lock_page(page); | ||
6152 | if (!PageUptodate(page)) { | ||
6153 | unlock_page(page); | ||
6154 | page_cache_release(page); | ||
6155 | ret = -EIO; | ||
6156 | goto out_unlock; | ||
6157 | } | ||
6158 | } | ||
6159 | wait_on_page_writeback(page); | ||
6160 | |||
6161 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
6162 | page_end = page_start + PAGE_CACHE_SIZE - 1; | ||
6163 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
6164 | |||
6165 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | ||
6166 | if (ordered) { | ||
6167 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
6168 | unlock_page(page); | ||
6169 | page_cache_release(page); | ||
6170 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
6171 | btrfs_put_ordered_extent(ordered); | ||
6172 | goto again; | ||
6173 | } | ||
6174 | set_page_extent_mapped(page); | ||
6175 | |||
6176 | if (i == first_index) | ||
6177 | set_extent_bits(io_tree, page_start, page_end, | ||
6178 | EXTENT_BOUNDARY, GFP_NOFS); | ||
6179 | btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
6180 | |||
6181 | set_page_dirty(page); | ||
6182 | total_dirty++; | ||
6183 | |||
6184 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
6185 | unlock_page(page); | ||
6186 | page_cache_release(page); | ||
6187 | } | ||
6188 | |||
6189 | out_unlock: | ||
6190 | kfree(ra); | ||
6191 | mutex_unlock(&inode->i_mutex); | ||
6192 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); | ||
6193 | return ret; | ||
6194 | } | ||
6195 | |||
6196 | static noinline int relocate_data_extent(struct inode *reloc_inode, | ||
6197 | struct btrfs_key *extent_key, | ||
6198 | u64 offset) | ||
6199 | { | ||
6200 | struct btrfs_root *root = BTRFS_I(reloc_inode)->root; | ||
6201 | struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; | ||
6202 | struct extent_map *em; | ||
6203 | u64 start = extent_key->objectid - offset; | ||
6204 | u64 end = start + extent_key->offset - 1; | ||
6205 | |||
6206 | em = alloc_extent_map(GFP_NOFS); | ||
6207 | BUG_ON(!em || IS_ERR(em)); | ||
6208 | |||
6209 | em->start = start; | ||
6210 | em->len = extent_key->offset; | ||
6211 | em->block_len = extent_key->offset; | ||
6212 | em->block_start = extent_key->objectid; | ||
6213 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
6214 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
6215 | |||
6216 | /* setup extent map to cheat btrfs_readpage */ | ||
6217 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); | ||
6218 | while (1) { | ||
6219 | int ret; | ||
6220 | write_lock(&em_tree->lock); | ||
6221 | ret = add_extent_mapping(em_tree, em); | ||
6222 | write_unlock(&em_tree->lock); | ||
6223 | if (ret != -EEXIST) { | ||
6224 | free_extent_map(em); | ||
6225 | break; | ||
6226 | } | ||
6227 | btrfs_drop_extent_cache(reloc_inode, start, end, 0); | ||
6228 | } | ||
6229 | unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); | ||
6230 | |||
6231 | return relocate_inode_pages(reloc_inode, start, extent_key->offset); | ||
6232 | } | ||
6233 | |||
6234 | struct btrfs_ref_path { | ||
6235 | u64 extent_start; | ||
6236 | u64 nodes[BTRFS_MAX_LEVEL]; | ||
6237 | u64 root_objectid; | ||
6238 | u64 root_generation; | ||
6239 | u64 owner_objectid; | ||
6240 | u32 num_refs; | ||
6241 | int lowest_level; | ||
6242 | int current_level; | ||
6243 | int shared_level; | ||
6244 | |||
6245 | struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; | ||
6246 | u64 new_nodes[BTRFS_MAX_LEVEL]; | ||
6247 | }; | ||
6248 | |||
6249 | struct disk_extent { | ||
6250 | u64 ram_bytes; | ||
6251 | u64 disk_bytenr; | ||
6252 | u64 disk_num_bytes; | ||
6253 | u64 offset; | ||
6254 | u64 num_bytes; | ||
6255 | u8 compression; | ||
6256 | u8 encryption; | ||
6257 | u16 other_encoding; | ||
6258 | }; | ||
6259 | |||
6260 | static int is_cowonly_root(u64 root_objectid) | ||
6261 | { | ||
6262 | if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || | ||
6263 | root_objectid == BTRFS_EXTENT_TREE_OBJECTID || | ||
6264 | root_objectid == BTRFS_CHUNK_TREE_OBJECTID || | ||
6265 | root_objectid == BTRFS_DEV_TREE_OBJECTID || | ||
6266 | root_objectid == BTRFS_TREE_LOG_OBJECTID || | ||
6267 | root_objectid == BTRFS_CSUM_TREE_OBJECTID) | ||
6268 | return 1; | ||
6269 | return 0; | ||
6270 | } | ||
6271 | |||
6272 | static noinline int __next_ref_path(struct btrfs_trans_handle *trans, | ||
6273 | struct btrfs_root *extent_root, | ||
6274 | struct btrfs_ref_path *ref_path, | ||
6275 | int first_time) | ||
6276 | { | ||
6277 | struct extent_buffer *leaf; | ||
6278 | struct btrfs_path *path; | ||
6279 | struct btrfs_extent_ref *ref; | ||
6280 | struct btrfs_key key; | ||
6281 | struct btrfs_key found_key; | ||
6282 | u64 bytenr; | ||
6283 | u32 nritems; | ||
6284 | int level; | ||
6285 | int ret = 1; | ||
6286 | |||
6287 | path = btrfs_alloc_path(); | ||
6288 | if (!path) | ||
6289 | return -ENOMEM; | ||
6290 | |||
6291 | if (first_time) { | ||
6292 | ref_path->lowest_level = -1; | ||
6293 | ref_path->current_level = -1; | ||
6294 | ref_path->shared_level = -1; | ||
6295 | goto walk_up; | ||
6296 | } | ||
6297 | walk_down: | ||
6298 | level = ref_path->current_level - 1; | ||
6299 | while (level >= -1) { | ||
6300 | u64 parent; | ||
6301 | if (level < ref_path->lowest_level) | ||
6302 | break; | ||
6303 | |||
6304 | if (level >= 0) | ||
6305 | bytenr = ref_path->nodes[level]; | ||
6306 | else | ||
6307 | bytenr = ref_path->extent_start; | ||
6308 | BUG_ON(bytenr == 0); | ||
6309 | |||
6310 | parent = ref_path->nodes[level + 1]; | ||
6311 | ref_path->nodes[level + 1] = 0; | ||
6312 | ref_path->current_level = level; | ||
6313 | BUG_ON(parent == 0); | ||
6314 | |||
6315 | key.objectid = bytenr; | ||
6316 | key.offset = parent + 1; | ||
6317 | key.type = BTRFS_EXTENT_REF_KEY; | ||
6318 | |||
6319 | ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); | ||
6320 | if (ret < 0) | ||
6321 | goto out; | ||
6322 | BUG_ON(ret == 0); | ||
6323 | |||
6324 | leaf = path->nodes[0]; | ||
6325 | nritems = btrfs_header_nritems(leaf); | ||
6326 | if (path->slots[0] >= nritems) { | ||
6327 | ret = btrfs_next_leaf(extent_root, path); | ||
6328 | if (ret < 0) | ||
6329 | goto out; | ||
6330 | if (ret > 0) | ||
6331 | goto next; | ||
6332 | leaf = path->nodes[0]; | ||
6333 | } | ||
6334 | |||
6335 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
6336 | if (found_key.objectid == bytenr && | ||
6337 | found_key.type == BTRFS_EXTENT_REF_KEY) { | ||
6338 | if (level < ref_path->shared_level) | ||
6339 | ref_path->shared_level = level; | ||
6340 | goto found; | ||
6341 | } | ||
6342 | next: | ||
6343 | level--; | ||
6344 | btrfs_release_path(extent_root, path); | ||
6345 | cond_resched(); | ||
6346 | } | ||
6347 | /* reached lowest level */ | ||
6348 | ret = 1; | ||
6349 | goto out; | ||
6350 | walk_up: | ||
6351 | level = ref_path->current_level; | ||
6352 | while (level < BTRFS_MAX_LEVEL - 1) { | ||
6353 | u64 ref_objectid; | ||
6354 | |||
6355 | if (level >= 0) | ||
6356 | bytenr = ref_path->nodes[level]; | ||
6357 | else | ||
6358 | bytenr = ref_path->extent_start; | ||
6359 | |||
6360 | BUG_ON(bytenr == 0); | ||
6361 | |||
6362 | key.objectid = bytenr; | ||
6363 | key.offset = 0; | ||
6364 | key.type = BTRFS_EXTENT_REF_KEY; | ||
6365 | |||
6366 | ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); | ||
6367 | if (ret < 0) | ||
6368 | goto out; | ||
6369 | |||
6370 | leaf = path->nodes[0]; | ||
6371 | nritems = btrfs_header_nritems(leaf); | ||
6372 | if (path->slots[0] >= nritems) { | ||
6373 | ret = btrfs_next_leaf(extent_root, path); | ||
6374 | if (ret < 0) | ||
6375 | goto out; | ||
6376 | if (ret > 0) { | ||
6377 | /* the extent was freed by someone */ | ||
6378 | if (ref_path->lowest_level == level) | ||
6379 | goto out; | ||
6380 | btrfs_release_path(extent_root, path); | ||
6381 | goto walk_down; | ||
6382 | } | ||
6383 | leaf = path->nodes[0]; | ||
6384 | } | ||
6385 | |||
6386 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
6387 | if (found_key.objectid != bytenr || | ||
6388 | found_key.type != BTRFS_EXTENT_REF_KEY) { | ||
6389 | /* the extent was freed by someone */ | ||
6390 | if (ref_path->lowest_level == level) { | ||
6391 | ret = 1; | ||
6392 | goto out; | ||
6393 | } | ||
6394 | btrfs_release_path(extent_root, path); | ||
6395 | goto walk_down; | ||
6396 | } | ||
6397 | found: | ||
6398 | ref = btrfs_item_ptr(leaf, path->slots[0], | ||
6399 | struct btrfs_extent_ref); | ||
6400 | ref_objectid = btrfs_ref_objectid(leaf, ref); | ||
6401 | if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { | ||
6402 | if (first_time) { | ||
6403 | level = (int)ref_objectid; | ||
6404 | BUG_ON(level >= BTRFS_MAX_LEVEL); | ||
6405 | ref_path->lowest_level = level; | ||
6406 | ref_path->current_level = level; | ||
6407 | ref_path->nodes[level] = bytenr; | ||
6408 | } else { | ||
6409 | WARN_ON(ref_objectid != level); | ||
6410 | } | ||
6411 | } else { | ||
6412 | WARN_ON(level != -1); | ||
6413 | } | ||
6414 | first_time = 0; | ||
6415 | |||
6416 | if (ref_path->lowest_level == level) { | ||
6417 | ref_path->owner_objectid = ref_objectid; | ||
6418 | ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); | ||
6419 | } | ||
6420 | |||
6421 | /* | ||
6422 | * the block is tree root or the block isn't in reference | ||
6423 | * counted tree. | ||
6424 | */ | ||
6425 | if (found_key.objectid == found_key.offset || | ||
6426 | is_cowonly_root(btrfs_ref_root(leaf, ref))) { | ||
6427 | ref_path->root_objectid = btrfs_ref_root(leaf, ref); | ||
6428 | ref_path->root_generation = | ||
6429 | btrfs_ref_generation(leaf, ref); | ||
6430 | if (level < 0) { | ||
6431 | /* special reference from the tree log */ | ||
6432 | ref_path->nodes[0] = found_key.offset; | ||
6433 | ref_path->current_level = 0; | ||
6434 | } | ||
6435 | ret = 0; | ||
6436 | goto out; | ||
6437 | } | ||
6438 | |||
6439 | level++; | ||
6440 | BUG_ON(ref_path->nodes[level] != 0); | ||
6441 | ref_path->nodes[level] = found_key.offset; | ||
6442 | ref_path->current_level = level; | ||
6443 | |||
6444 | /* | ||
6445 | * the reference was created in the running transaction, | ||
6446 | * no need to continue walking up. | ||
6447 | */ | ||
6448 | if (btrfs_ref_generation(leaf, ref) == trans->transid) { | ||
6449 | ref_path->root_objectid = btrfs_ref_root(leaf, ref); | ||
6450 | ref_path->root_generation = | ||
6451 | btrfs_ref_generation(leaf, ref); | ||
6452 | ret = 0; | ||
6453 | goto out; | ||
6454 | } | ||
6455 | |||
6456 | btrfs_release_path(extent_root, path); | ||
6457 | cond_resched(); | ||
6458 | } | ||
6459 | /* reached max tree level, but no tree root found. */ | ||
6460 | BUG(); | ||
6461 | out: | ||
6462 | btrfs_free_path(path); | ||
6463 | return ret; | ||
6464 | } | ||
6465 | |||
6466 | static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, | ||
6467 | struct btrfs_root *extent_root, | ||
6468 | struct btrfs_ref_path *ref_path, | ||
6469 | u64 extent_start) | ||
6470 | { | ||
6471 | memset(ref_path, 0, sizeof(*ref_path)); | ||
6472 | ref_path->extent_start = extent_start; | ||
6473 | |||
6474 | return __next_ref_path(trans, extent_root, ref_path, 1); | ||
6475 | } | ||
6476 | |||
6477 | static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, | ||
6478 | struct btrfs_root *extent_root, | ||
6479 | struct btrfs_ref_path *ref_path) | ||
6480 | { | ||
6481 | return __next_ref_path(trans, extent_root, ref_path, 0); | ||
6482 | } | ||
6483 | |||
6484 | static noinline int get_new_locations(struct inode *reloc_inode, | ||
6485 | struct btrfs_key *extent_key, | ||
6486 | u64 offset, int no_fragment, | ||
6487 | struct disk_extent **extents, | ||
6488 | int *nr_extents) | ||
6489 | { | ||
6490 | struct btrfs_root *root = BTRFS_I(reloc_inode)->root; | ||
6491 | struct btrfs_path *path; | ||
6492 | struct btrfs_file_extent_item *fi; | ||
6493 | struct extent_buffer *leaf; | ||
6494 | struct disk_extent *exts = *extents; | ||
6495 | struct btrfs_key found_key; | ||
6496 | u64 cur_pos; | ||
6497 | u64 last_byte; | ||
6498 | u32 nritems; | ||
6499 | int nr = 0; | ||
6500 | int max = *nr_extents; | ||
6501 | int ret; | ||
6502 | |||
6503 | WARN_ON(!no_fragment && *extents); | ||
6504 | if (!exts) { | ||
6505 | max = 1; | ||
6506 | exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); | ||
6507 | if (!exts) | ||
6508 | return -ENOMEM; | ||
6509 | } | ||
6510 | |||
6511 | path = btrfs_alloc_path(); | ||
6512 | BUG_ON(!path); | ||
6513 | |||
6514 | cur_pos = extent_key->objectid - offset; | ||
6515 | last_byte = extent_key->objectid + extent_key->offset; | ||
6516 | ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, | ||
6517 | cur_pos, 0); | ||
6518 | if (ret < 0) | ||
6519 | goto out; | ||
6520 | if (ret > 0) { | ||
6521 | ret = -ENOENT; | ||
6522 | goto out; | ||
6523 | } | ||
6524 | |||
6525 | while (1) { | ||
6526 | leaf = path->nodes[0]; | ||
6527 | nritems = btrfs_header_nritems(leaf); | ||
6528 | if (path->slots[0] >= nritems) { | ||
6529 | ret = btrfs_next_leaf(root, path); | ||
6530 | if (ret < 0) | ||
6531 | goto out; | ||
6532 | if (ret > 0) | ||
6533 | break; | ||
6534 | leaf = path->nodes[0]; | ||
6535 | } | ||
6536 | |||
6537 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
6538 | if (found_key.offset != cur_pos || | ||
6539 | found_key.type != BTRFS_EXTENT_DATA_KEY || | ||
6540 | found_key.objectid != reloc_inode->i_ino) | ||
6541 | break; | ||
6542 | |||
6543 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
6544 | struct btrfs_file_extent_item); | ||
6545 | if (btrfs_file_extent_type(leaf, fi) != | ||
6546 | BTRFS_FILE_EXTENT_REG || | ||
6547 | btrfs_file_extent_disk_bytenr(leaf, fi) == 0) | ||
6548 | break; | ||
6549 | |||
6550 | if (nr == max) { | ||
6551 | struct disk_extent *old = exts; | ||
6552 | max *= 2; | ||
6553 | exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); | ||
6554 | memcpy(exts, old, sizeof(*exts) * nr); | ||
6555 | if (old != *extents) | ||
6556 | kfree(old); | ||
6557 | } | ||
6558 | |||
6559 | exts[nr].disk_bytenr = | ||
6560 | btrfs_file_extent_disk_bytenr(leaf, fi); | ||
6561 | exts[nr].disk_num_bytes = | ||
6562 | btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
6563 | exts[nr].offset = btrfs_file_extent_offset(leaf, fi); | ||
6564 | exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | ||
6565 | exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); | ||
6566 | exts[nr].compression = btrfs_file_extent_compression(leaf, fi); | ||
6567 | exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); | ||
6568 | exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, | ||
6569 | fi); | ||
6570 | BUG_ON(exts[nr].offset > 0); | ||
6571 | BUG_ON(exts[nr].compression || exts[nr].encryption); | ||
6572 | BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); | ||
6573 | |||
6574 | cur_pos += exts[nr].num_bytes; | ||
6575 | nr++; | ||
6576 | |||
6577 | if (cur_pos + offset >= last_byte) | ||
6578 | break; | ||
6579 | |||
6580 | if (no_fragment) { | ||
6581 | ret = 1; | ||
6582 | goto out; | ||
6583 | } | ||
6584 | path->slots[0]++; | ||
6585 | } | ||
6586 | |||
6587 | BUG_ON(cur_pos + offset > last_byte); | ||
6588 | if (cur_pos + offset < last_byte) { | ||
6589 | ret = -ENOENT; | ||
6590 | goto out; | ||
6591 | } | ||
6592 | ret = 0; | ||
6593 | out: | ||
6594 | btrfs_free_path(path); | ||
6595 | if (ret) { | ||
6596 | if (exts != *extents) | ||
6597 | kfree(exts); | ||
6598 | } else { | ||
6599 | *extents = exts; | ||
6600 | *nr_extents = nr; | ||
6601 | } | ||
6602 | return ret; | ||
6603 | } | ||
6604 | |||
6605 | static noinline int replace_one_extent(struct btrfs_trans_handle *trans, | ||
6606 | struct btrfs_root *root, | ||
6607 | struct btrfs_path *path, | ||
6608 | struct btrfs_key *extent_key, | ||
6609 | struct btrfs_key *leaf_key, | ||
6610 | struct btrfs_ref_path *ref_path, | ||
6611 | struct disk_extent *new_extents, | ||
6612 | int nr_extents) | ||
6613 | { | ||
6614 | struct extent_buffer *leaf; | ||
6615 | struct btrfs_file_extent_item *fi; | ||
6616 | struct inode *inode = NULL; | ||
6617 | struct btrfs_key key; | ||
6618 | u64 lock_start = 0; | ||
6619 | u64 lock_end = 0; | ||
6620 | u64 num_bytes; | ||
6621 | u64 ext_offset; | ||
6622 | u64 search_end = (u64)-1; | ||
6623 | u32 nritems; | ||
6624 | int nr_scaned = 0; | ||
6625 | int extent_locked = 0; | ||
6626 | int extent_type; | ||
6627 | int ret; | ||
6628 | |||
6629 | memcpy(&key, leaf_key, sizeof(key)); | ||
6630 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { | ||
6631 | if (key.objectid < ref_path->owner_objectid || | ||
6632 | (key.objectid == ref_path->owner_objectid && | ||
6633 | key.type < BTRFS_EXTENT_DATA_KEY)) { | ||
6634 | key.objectid = ref_path->owner_objectid; | ||
6635 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
6636 | key.offset = 0; | ||
6637 | } | ||
6638 | } | ||
6639 | |||
6640 | while (1) { | ||
6641 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
6642 | if (ret < 0) | ||
6643 | goto out; | ||
6644 | |||
6645 | leaf = path->nodes[0]; | ||
6646 | nritems = btrfs_header_nritems(leaf); | ||
6647 | next: | ||
6648 | if (extent_locked && ret > 0) { | ||
6649 | /* | ||
6650 | * the file extent item was modified by someone | ||
6651 | * before the extent got locked. | ||
6652 | */ | ||
6653 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
6654 | lock_end, GFP_NOFS); | ||
6655 | extent_locked = 0; | ||
6656 | } | ||
6657 | |||
6658 | if (path->slots[0] >= nritems) { | ||
6659 | if (++nr_scaned > 2) | ||
6660 | break; | ||
6661 | |||
6662 | BUG_ON(extent_locked); | ||
6663 | ret = btrfs_next_leaf(root, path); | ||
6664 | if (ret < 0) | ||
6665 | goto out; | ||
6666 | if (ret > 0) | ||
6667 | break; | ||
6668 | leaf = path->nodes[0]; | ||
6669 | nritems = btrfs_header_nritems(leaf); | ||
6670 | } | ||
6671 | |||
6672 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
6673 | |||
6674 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { | ||
6675 | if ((key.objectid > ref_path->owner_objectid) || | ||
6676 | (key.objectid == ref_path->owner_objectid && | ||
6677 | key.type > BTRFS_EXTENT_DATA_KEY) || | ||
6678 | key.offset >= search_end) | ||
6679 | break; | ||
6680 | } | ||
6681 | |||
6682 | if (inode && key.objectid != inode->i_ino) { | ||
6683 | BUG_ON(extent_locked); | ||
6684 | btrfs_release_path(root, path); | ||
6685 | mutex_unlock(&inode->i_mutex); | ||
6686 | iput(inode); | ||
6687 | inode = NULL; | ||
6688 | continue; | ||
6689 | } | ||
6690 | |||
6691 | if (key.type != BTRFS_EXTENT_DATA_KEY) { | ||
6692 | path->slots[0]++; | ||
6693 | ret = 1; | ||
6694 | goto next; | ||
6695 | } | ||
6696 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
6697 | struct btrfs_file_extent_item); | ||
6698 | extent_type = btrfs_file_extent_type(leaf, fi); | ||
6699 | if ((extent_type != BTRFS_FILE_EXTENT_REG && | ||
6700 | extent_type != BTRFS_FILE_EXTENT_PREALLOC) || | ||
6701 | (btrfs_file_extent_disk_bytenr(leaf, fi) != | ||
6702 | extent_key->objectid)) { | ||
6703 | path->slots[0]++; | ||
6704 | ret = 1; | ||
6705 | goto next; | ||
6706 | } | ||
6707 | |||
6708 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | ||
6709 | ext_offset = btrfs_file_extent_offset(leaf, fi); | ||
6710 | |||
6711 | if (search_end == (u64)-1) { | ||
6712 | search_end = key.offset - ext_offset + | ||
6713 | btrfs_file_extent_ram_bytes(leaf, fi); | ||
6714 | } | ||
6715 | |||
6716 | if (!extent_locked) { | ||
6717 | lock_start = key.offset; | ||
6718 | lock_end = lock_start + num_bytes - 1; | ||
6719 | } else { | ||
6720 | if (lock_start > key.offset || | ||
6721 | lock_end + 1 < key.offset + num_bytes) { | ||
6722 | unlock_extent(&BTRFS_I(inode)->io_tree, | ||
6723 | lock_start, lock_end, GFP_NOFS); | ||
6724 | extent_locked = 0; | ||
6725 | } | ||
6726 | } | ||
6727 | |||
6728 | if (!inode) { | ||
6729 | btrfs_release_path(root, path); | ||
6730 | |||
6731 | inode = btrfs_iget_locked(root->fs_info->sb, | ||
6732 | key.objectid, root); | ||
6733 | if (inode->i_state & I_NEW) { | ||
6734 | BTRFS_I(inode)->root = root; | ||
6735 | BTRFS_I(inode)->location.objectid = | ||
6736 | key.objectid; | ||
6737 | BTRFS_I(inode)->location.type = | ||
6738 | BTRFS_INODE_ITEM_KEY; | ||
6739 | BTRFS_I(inode)->location.offset = 0; | ||
6740 | btrfs_read_locked_inode(inode); | ||
6741 | unlock_new_inode(inode); | ||
6742 | } | ||
6743 | /* | ||
6744 | * some code call btrfs_commit_transaction while | ||
6745 | * holding the i_mutex, so we can't use mutex_lock | ||
6746 | * here. | ||
6747 | */ | ||
6748 | if (is_bad_inode(inode) || | ||
6749 | !mutex_trylock(&inode->i_mutex)) { | ||
6750 | iput(inode); | ||
6751 | inode = NULL; | ||
6752 | key.offset = (u64)-1; | ||
6753 | goto skip; | ||
6754 | } | ||
6755 | } | ||
6756 | |||
6757 | if (!extent_locked) { | ||
6758 | struct btrfs_ordered_extent *ordered; | ||
6759 | |||
6760 | btrfs_release_path(root, path); | ||
6761 | |||
6762 | lock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
6763 | lock_end, GFP_NOFS); | ||
6764 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
6765 | lock_end); | ||
6766 | if (ordered && | ||
6767 | ordered->file_offset <= lock_end && | ||
6768 | ordered->file_offset + ordered->len > lock_start) { | ||
6769 | unlock_extent(&BTRFS_I(inode)->io_tree, | ||
6770 | lock_start, lock_end, GFP_NOFS); | ||
6771 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
6772 | btrfs_put_ordered_extent(ordered); | ||
6773 | key.offset += num_bytes; | ||
6774 | goto skip; | ||
6775 | } | ||
6776 | if (ordered) | ||
6777 | btrfs_put_ordered_extent(ordered); | ||
6778 | |||
6779 | extent_locked = 1; | ||
6780 | continue; | ||
6781 | } | ||
6782 | |||
6783 | if (nr_extents == 1) { | ||
6784 | /* update extent pointer in place */ | ||
6785 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | ||
6786 | new_extents[0].disk_bytenr); | ||
6787 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | ||
6788 | new_extents[0].disk_num_bytes); | ||
6789 | btrfs_mark_buffer_dirty(leaf); | ||
6790 | |||
6791 | btrfs_drop_extent_cache(inode, key.offset, | ||
6792 | key.offset + num_bytes - 1, 0); | ||
6793 | |||
6794 | ret = btrfs_inc_extent_ref(trans, root, | ||
6795 | new_extents[0].disk_bytenr, | ||
6796 | new_extents[0].disk_num_bytes, | ||
6797 | leaf->start, | ||
6798 | root->root_key.objectid, | ||
6799 | trans->transid, | ||
6800 | key.objectid); | ||
6801 | BUG_ON(ret); | ||
6802 | |||
6803 | ret = btrfs_free_extent(trans, root, | ||
6804 | extent_key->objectid, | ||
6805 | extent_key->offset, | ||
6806 | leaf->start, | ||
6807 | btrfs_header_owner(leaf), | ||
6808 | btrfs_header_generation(leaf), | ||
6809 | key.objectid, 0); | ||
6810 | BUG_ON(ret); | ||
6811 | |||
6812 | btrfs_release_path(root, path); | ||
6813 | key.offset += num_bytes; | ||
6814 | } else { | ||
6815 | BUG_ON(1); | ||
6816 | #if 0 | ||
6817 | u64 alloc_hint; | ||
6818 | u64 extent_len; | ||
6819 | int i; | ||
6820 | /* | ||
6821 | * drop old extent pointer at first, then insert the | ||
6822 | * new pointers one bye one | ||
6823 | */ | ||
6824 | btrfs_release_path(root, path); | ||
6825 | ret = btrfs_drop_extents(trans, root, inode, key.offset, | ||
6826 | key.offset + num_bytes, | ||
6827 | key.offset, &alloc_hint); | ||
6828 | BUG_ON(ret); | ||
6829 | |||
6830 | for (i = 0; i < nr_extents; i++) { | ||
6831 | if (ext_offset >= new_extents[i].num_bytes) { | ||
6832 | ext_offset -= new_extents[i].num_bytes; | ||
6833 | continue; | ||
6834 | } | ||
6835 | extent_len = min(new_extents[i].num_bytes - | ||
6836 | ext_offset, num_bytes); | ||
6837 | |||
6838 | ret = btrfs_insert_empty_item(trans, root, | ||
6839 | path, &key, | ||
6840 | sizeof(*fi)); | ||
6841 | BUG_ON(ret); | ||
6842 | |||
6843 | leaf = path->nodes[0]; | ||
6844 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
6845 | struct btrfs_file_extent_item); | ||
6846 | btrfs_set_file_extent_generation(leaf, fi, | ||
6847 | trans->transid); | ||
6848 | btrfs_set_file_extent_type(leaf, fi, | ||
6849 | BTRFS_FILE_EXTENT_REG); | ||
6850 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | ||
6851 | new_extents[i].disk_bytenr); | ||
6852 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | ||
6853 | new_extents[i].disk_num_bytes); | ||
6854 | btrfs_set_file_extent_ram_bytes(leaf, fi, | ||
6855 | new_extents[i].ram_bytes); | ||
6856 | |||
6857 | btrfs_set_file_extent_compression(leaf, fi, | ||
6858 | new_extents[i].compression); | ||
6859 | btrfs_set_file_extent_encryption(leaf, fi, | ||
6860 | new_extents[i].encryption); | ||
6861 | btrfs_set_file_extent_other_encoding(leaf, fi, | ||
6862 | new_extents[i].other_encoding); | ||
6863 | |||
6864 | btrfs_set_file_extent_num_bytes(leaf, fi, | ||
6865 | extent_len); | ||
6866 | ext_offset += new_extents[i].offset; | ||
6867 | btrfs_set_file_extent_offset(leaf, fi, | ||
6868 | ext_offset); | ||
6869 | btrfs_mark_buffer_dirty(leaf); | ||
6870 | |||
6871 | btrfs_drop_extent_cache(inode, key.offset, | ||
6872 | key.offset + extent_len - 1, 0); | ||
6873 | |||
6874 | ret = btrfs_inc_extent_ref(trans, root, | ||
6875 | new_extents[i].disk_bytenr, | ||
6876 | new_extents[i].disk_num_bytes, | ||
6877 | leaf->start, | ||
6878 | root->root_key.objectid, | ||
6879 | trans->transid, key.objectid); | ||
6880 | BUG_ON(ret); | ||
6881 | btrfs_release_path(root, path); | ||
6882 | |||
6883 | inode_add_bytes(inode, extent_len); | ||
6884 | |||
6885 | ext_offset = 0; | ||
6886 | num_bytes -= extent_len; | ||
6887 | key.offset += extent_len; | ||
6888 | |||
6889 | if (num_bytes == 0) | ||
6890 | break; | ||
6891 | } | ||
6892 | BUG_ON(i >= nr_extents); | ||
6893 | #endif | ||
6894 | } | ||
6895 | |||
6896 | if (extent_locked) { | ||
6897 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
6898 | lock_end, GFP_NOFS); | ||
6899 | extent_locked = 0; | ||
6900 | } | ||
6901 | skip: | ||
6902 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && | ||
6903 | key.offset >= search_end) | ||
6904 | break; | ||
6905 | |||
6906 | cond_resched(); | ||
6907 | } | ||
6908 | ret = 0; | ||
6909 | out: | ||
6910 | btrfs_release_path(root, path); | ||
6911 | if (inode) { | ||
6912 | mutex_unlock(&inode->i_mutex); | ||
6913 | if (extent_locked) { | ||
6914 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
6915 | lock_end, GFP_NOFS); | ||
6916 | } | ||
6917 | iput(inode); | ||
6918 | } | ||
6919 | return ret; | ||
6920 | } | ||
6921 | |||
6922 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, | ||
6923 | struct btrfs_root *root, | ||
6924 | struct extent_buffer *buf, u64 orig_start) | ||
6925 | { | ||
6926 | int level; | ||
6927 | int ret; | ||
6928 | |||
6929 | BUG_ON(btrfs_header_generation(buf) != trans->transid); | ||
6930 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | ||
6931 | |||
6932 | level = btrfs_header_level(buf); | ||
6933 | if (level == 0) { | ||
6934 | struct btrfs_leaf_ref *ref; | ||
6935 | struct btrfs_leaf_ref *orig_ref; | ||
6936 | |||
6937 | orig_ref = btrfs_lookup_leaf_ref(root, orig_start); | ||
6938 | if (!orig_ref) | ||
6939 | return -ENOENT; | ||
6940 | |||
6941 | ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); | ||
6942 | if (!ref) { | ||
6943 | btrfs_free_leaf_ref(root, orig_ref); | ||
6944 | return -ENOMEM; | ||
6945 | } | ||
6946 | |||
6947 | ref->nritems = orig_ref->nritems; | ||
6948 | memcpy(ref->extents, orig_ref->extents, | ||
6949 | sizeof(ref->extents[0]) * ref->nritems); | ||
6950 | |||
6951 | btrfs_free_leaf_ref(root, orig_ref); | ||
6952 | |||
6953 | ref->root_gen = trans->transid; | ||
6954 | ref->bytenr = buf->start; | ||
6955 | ref->owner = btrfs_header_owner(buf); | ||
6956 | ref->generation = btrfs_header_generation(buf); | ||
6957 | |||
6958 | ret = btrfs_add_leaf_ref(root, ref, 0); | ||
6959 | WARN_ON(ret); | ||
6960 | btrfs_free_leaf_ref(root, ref); | ||
6961 | } | ||
6962 | return 0; | ||
6963 | } | ||
6964 | |||
6965 | static noinline int invalidate_extent_cache(struct btrfs_root *root, | ||
6966 | struct extent_buffer *leaf, | ||
6967 | struct btrfs_block_group_cache *group, | ||
6968 | struct btrfs_root *target_root) | ||
6969 | { | ||
6970 | struct btrfs_key key; | ||
6971 | struct inode *inode = NULL; | ||
6972 | struct btrfs_file_extent_item *fi; | ||
6973 | struct extent_state *cached_state = NULL; | ||
6974 | u64 num_bytes; | ||
6975 | u64 skip_objectid = 0; | ||
6976 | u32 nritems; | ||
6977 | u32 i; | ||
6978 | |||
6979 | nritems = btrfs_header_nritems(leaf); | ||
6980 | for (i = 0; i < nritems; i++) { | ||
6981 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
6982 | if (key.objectid == skip_objectid || | ||
6983 | key.type != BTRFS_EXTENT_DATA_KEY) | ||
6984 | continue; | ||
6985 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
6986 | if (btrfs_file_extent_type(leaf, fi) == | ||
6987 | BTRFS_FILE_EXTENT_INLINE) | ||
6988 | continue; | ||
6989 | if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) | ||
6990 | continue; | ||
6991 | if (!inode || inode->i_ino != key.objectid) { | ||
6992 | iput(inode); | ||
6993 | inode = btrfs_ilookup(target_root->fs_info->sb, | ||
6994 | key.objectid, target_root, 1); | ||
6995 | } | ||
6996 | if (!inode) { | ||
6997 | skip_objectid = key.objectid; | ||
6998 | continue; | ||
6999 | } | ||
7000 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | ||
7001 | |||
7002 | lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset, | ||
7003 | key.offset + num_bytes - 1, 0, &cached_state, | ||
7004 | GFP_NOFS); | ||
7005 | btrfs_drop_extent_cache(inode, key.offset, | ||
7006 | key.offset + num_bytes - 1, 1); | ||
7007 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset, | ||
7008 | key.offset + num_bytes - 1, &cached_state, | ||
7009 | GFP_NOFS); | ||
7010 | cond_resched(); | ||
7011 | } | ||
7012 | iput(inode); | ||
7013 | return 0; | ||
7014 | } | ||
7015 | |||
7016 | static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, | ||
7017 | struct btrfs_root *root, | ||
7018 | struct extent_buffer *leaf, | ||
7019 | struct btrfs_block_group_cache *group, | ||
7020 | struct inode *reloc_inode) | ||
7021 | { | ||
7022 | struct btrfs_key key; | ||
7023 | struct btrfs_key extent_key; | ||
7024 | struct btrfs_file_extent_item *fi; | ||
7025 | struct btrfs_leaf_ref *ref; | ||
7026 | struct disk_extent *new_extent; | ||
7027 | u64 bytenr; | ||
7028 | u64 num_bytes; | ||
7029 | u32 nritems; | ||
7030 | u32 i; | ||
7031 | int ext_index; | ||
7032 | int nr_extent; | ||
7033 | int ret; | ||
7034 | |||
7035 | new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); | ||
7036 | BUG_ON(!new_extent); | ||
7037 | |||
7038 | ref = btrfs_lookup_leaf_ref(root, leaf->start); | ||
7039 | BUG_ON(!ref); | ||
7040 | |||
7041 | ext_index = -1; | ||
7042 | nritems = btrfs_header_nritems(leaf); | ||
7043 | for (i = 0; i < nritems; i++) { | ||
7044 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
7045 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
7046 | continue; | ||
7047 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
7048 | if (btrfs_file_extent_type(leaf, fi) == | ||
7049 | BTRFS_FILE_EXTENT_INLINE) | ||
7050 | continue; | ||
7051 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
7052 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
7053 | if (bytenr == 0) | ||
7054 | continue; | ||
7055 | |||
7056 | ext_index++; | ||
7057 | if (bytenr >= group->key.objectid + group->key.offset || | ||
7058 | bytenr + num_bytes <= group->key.objectid) | ||
7059 | continue; | ||
7060 | |||
7061 | extent_key.objectid = bytenr; | ||
7062 | extent_key.offset = num_bytes; | ||
7063 | extent_key.type = BTRFS_EXTENT_ITEM_KEY; | ||
7064 | nr_extent = 1; | ||
7065 | ret = get_new_locations(reloc_inode, &extent_key, | ||
7066 | group->key.objectid, 1, | ||
7067 | &new_extent, &nr_extent); | ||
7068 | if (ret > 0) | ||
7069 | continue; | ||
7070 | BUG_ON(ret < 0); | ||
7071 | |||
7072 | BUG_ON(ref->extents[ext_index].bytenr != bytenr); | ||
7073 | BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); | ||
7074 | ref->extents[ext_index].bytenr = new_extent->disk_bytenr; | ||
7075 | ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; | ||
7076 | |||
7077 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | ||
7078 | new_extent->disk_bytenr); | ||
7079 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | ||
7080 | new_extent->disk_num_bytes); | ||
7081 | btrfs_mark_buffer_dirty(leaf); | ||
7082 | |||
7083 | ret = btrfs_inc_extent_ref(trans, root, | ||
7084 | new_extent->disk_bytenr, | ||
7085 | new_extent->disk_num_bytes, | ||
7086 | leaf->start, | ||
7087 | root->root_key.objectid, | ||
7088 | trans->transid, key.objectid); | ||
7089 | BUG_ON(ret); | ||
7090 | |||
7091 | ret = btrfs_free_extent(trans, root, | ||
7092 | bytenr, num_bytes, leaf->start, | ||
7093 | btrfs_header_owner(leaf), | ||
7094 | btrfs_header_generation(leaf), | ||
7095 | key.objectid, 0); | ||
7096 | BUG_ON(ret); | ||
7097 | cond_resched(); | ||
7098 | } | ||
7099 | kfree(new_extent); | ||
7100 | BUG_ON(ext_index + 1 != ref->nritems); | ||
7101 | btrfs_free_leaf_ref(root, ref); | ||
7102 | return 0; | ||
7103 | } | ||
7104 | |||
7105 | int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, | ||
7106 | struct btrfs_root *root) | ||
7107 | { | ||
7108 | struct btrfs_root *reloc_root; | ||
7109 | int ret; | ||
7110 | |||
7111 | if (root->reloc_root) { | ||
7112 | reloc_root = root->reloc_root; | ||
7113 | root->reloc_root = NULL; | ||
7114 | list_add(&reloc_root->dead_list, | ||
7115 | &root->fs_info->dead_reloc_roots); | ||
7116 | |||
7117 | btrfs_set_root_bytenr(&reloc_root->root_item, | ||
7118 | reloc_root->node->start); | ||
7119 | btrfs_set_root_level(&root->root_item, | ||
7120 | btrfs_header_level(reloc_root->node)); | ||
7121 | memset(&reloc_root->root_item.drop_progress, 0, | ||
7122 | sizeof(struct btrfs_disk_key)); | ||
7123 | reloc_root->root_item.drop_level = 0; | ||
7124 | |||
7125 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | ||
7126 | &reloc_root->root_key, | ||
7127 | &reloc_root->root_item); | ||
7128 | BUG_ON(ret); | ||
7129 | } | ||
7130 | return 0; | ||
7131 | } | ||
7132 | |||
7133 | int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) | ||
7134 | { | ||
7135 | struct btrfs_trans_handle *trans; | ||
7136 | struct btrfs_root *reloc_root; | ||
7137 | struct btrfs_root *prev_root = NULL; | ||
7138 | struct list_head dead_roots; | ||
7139 | int ret; | ||
7140 | unsigned long nr; | ||
7141 | |||
7142 | INIT_LIST_HEAD(&dead_roots); | ||
7143 | list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); | ||
7144 | |||
7145 | while (!list_empty(&dead_roots)) { | ||
7146 | reloc_root = list_entry(dead_roots.prev, | ||
7147 | struct btrfs_root, dead_list); | ||
7148 | list_del_init(&reloc_root->dead_list); | ||
7149 | |||
7150 | BUG_ON(reloc_root->commit_root != NULL); | ||
7151 | while (1) { | ||
7152 | trans = btrfs_join_transaction(root, 1); | ||
7153 | BUG_ON(!trans); | ||
7154 | |||
7155 | mutex_lock(&root->fs_info->drop_mutex); | ||
7156 | ret = btrfs_drop_snapshot(trans, reloc_root); | ||
7157 | if (ret != -EAGAIN) | ||
7158 | break; | ||
7159 | mutex_unlock(&root->fs_info->drop_mutex); | ||
7160 | |||
7161 | nr = trans->blocks_used; | ||
7162 | ret = btrfs_end_transaction(trans, root); | ||
7163 | BUG_ON(ret); | ||
7164 | btrfs_btree_balance_dirty(root, nr); | ||
7165 | } | ||
7166 | |||
7167 | free_extent_buffer(reloc_root->node); | ||
7168 | |||
7169 | ret = btrfs_del_root(trans, root->fs_info->tree_root, | ||
7170 | &reloc_root->root_key); | ||
7171 | BUG_ON(ret); | ||
7172 | mutex_unlock(&root->fs_info->drop_mutex); | ||
7173 | |||
7174 | nr = trans->blocks_used; | ||
7175 | ret = btrfs_end_transaction(trans, root); | ||
7176 | BUG_ON(ret); | ||
7177 | btrfs_btree_balance_dirty(root, nr); | ||
7178 | |||
7179 | kfree(prev_root); | ||
7180 | prev_root = reloc_root; | ||
7181 | } | ||
7182 | if (prev_root) { | ||
7183 | btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); | ||
7184 | kfree(prev_root); | ||
7185 | } | ||
7186 | return 0; | ||
7187 | } | ||
7188 | |||
7189 | int btrfs_add_dead_reloc_root(struct btrfs_root *root) | ||
7190 | { | ||
7191 | list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); | ||
7192 | return 0; | ||
7193 | } | ||
7194 | |||
7195 | int btrfs_cleanup_reloc_trees(struct btrfs_root *root) | ||
7196 | { | ||
7197 | struct btrfs_root *reloc_root; | ||
7198 | struct btrfs_trans_handle *trans; | ||
7199 | struct btrfs_key location; | ||
7200 | int found; | ||
7201 | int ret; | ||
7202 | |||
7203 | mutex_lock(&root->fs_info->tree_reloc_mutex); | ||
7204 | ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); | ||
7205 | BUG_ON(ret); | ||
7206 | found = !list_empty(&root->fs_info->dead_reloc_roots); | ||
7207 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
7208 | |||
7209 | if (found) { | ||
7210 | trans = btrfs_start_transaction(root, 1); | ||
7211 | BUG_ON(!trans); | ||
7212 | ret = btrfs_commit_transaction(trans, root); | ||
7213 | BUG_ON(ret); | ||
7214 | } | ||
7215 | |||
7216 | location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; | ||
7217 | location.offset = (u64)-1; | ||
7218 | location.type = BTRFS_ROOT_ITEM_KEY; | ||
7219 | |||
7220 | reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); | ||
7221 | BUG_ON(!reloc_root); | ||
7222 | btrfs_orphan_cleanup(reloc_root); | ||
7223 | return 0; | ||
7224 | } | ||
7225 | |||
7226 | static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, | ||
7227 | struct btrfs_root *root) | ||
7228 | { | ||
7229 | struct btrfs_root *reloc_root; | ||
7230 | struct extent_buffer *eb; | ||
7231 | struct btrfs_root_item *root_item; | ||
7232 | struct btrfs_key root_key; | ||
7233 | int ret; | ||
7234 | |||
7235 | BUG_ON(!root->ref_cows); | ||
7236 | if (root->reloc_root) | ||
7237 | return 0; | ||
7238 | |||
7239 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | ||
7240 | BUG_ON(!root_item); | ||
7241 | |||
7242 | ret = btrfs_copy_root(trans, root, root->commit_root, | ||
7243 | &eb, BTRFS_TREE_RELOC_OBJECTID); | ||
7244 | BUG_ON(ret); | ||
7245 | |||
7246 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; | ||
7247 | root_key.offset = root->root_key.objectid; | ||
7248 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
7249 | |||
7250 | memcpy(root_item, &root->root_item, sizeof(root_item)); | ||
7251 | btrfs_set_root_refs(root_item, 0); | ||
7252 | btrfs_set_root_bytenr(root_item, eb->start); | ||
7253 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); | ||
7254 | btrfs_set_root_generation(root_item, trans->transid); | ||
7255 | |||
7256 | btrfs_tree_unlock(eb); | ||
7257 | free_extent_buffer(eb); | ||
7258 | |||
7259 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, | ||
7260 | &root_key, root_item); | ||
7261 | BUG_ON(ret); | ||
7262 | kfree(root_item); | ||
7263 | |||
7264 | reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | ||
7265 | &root_key); | ||
7266 | BUG_ON(!reloc_root); | ||
7267 | reloc_root->last_trans = trans->transid; | ||
7268 | reloc_root->commit_root = NULL; | ||
7269 | reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; | ||
7270 | |||
7271 | root->reloc_root = reloc_root; | ||
7272 | return 0; | ||
7273 | } | ||
7274 | |||
7275 | /* | ||
7276 | * Core function of space balance. | ||
7277 | * | ||
7278 | * The idea is using reloc trees to relocate tree blocks in reference | ||
7279 | * counted roots. There is one reloc tree for each subvol, and all | ||
7280 | * reloc trees share same root key objectid. Reloc trees are snapshots | ||
7281 | * of the latest committed roots of subvols (root->commit_root). | ||
7282 | * | ||
7283 | * To relocate a tree block referenced by a subvol, there are two steps. | ||
7284 | * COW the block through subvol's reloc tree, then update block pointer | ||
7285 | * in the subvol to point to the new block. Since all reloc trees share | ||
7286 | * same root key objectid, doing special handing for tree blocks owned | ||
7287 | * by them is easy. Once a tree block has been COWed in one reloc tree, | ||
7288 | * we can use the resulting new block directly when the same block is | ||
7289 | * required to COW again through other reloc trees. By this way, relocated | ||
7290 | * tree blocks are shared between reloc trees, so they are also shared | ||
7291 | * between subvols. | ||
7292 | */ | ||
7293 | static noinline int relocate_one_path(struct btrfs_trans_handle *trans, | ||
7294 | struct btrfs_root *root, | ||
7295 | struct btrfs_path *path, | ||
7296 | struct btrfs_key *first_key, | ||
7297 | struct btrfs_ref_path *ref_path, | ||
7298 | struct btrfs_block_group_cache *group, | ||
7299 | struct inode *reloc_inode) | ||
7300 | { | ||
7301 | struct btrfs_root *reloc_root; | ||
7302 | struct extent_buffer *eb = NULL; | ||
7303 | struct btrfs_key *keys; | ||
7304 | u64 *nodes; | ||
7305 | int level; | ||
7306 | int shared_level; | ||
7307 | int lowest_level = 0; | ||
7308 | int ret; | ||
7309 | |||
7310 | if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | ||
7311 | lowest_level = ref_path->owner_objectid; | ||
7312 | |||
7313 | if (!root->ref_cows) { | ||
7314 | path->lowest_level = lowest_level; | ||
7315 | ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); | ||
7316 | BUG_ON(ret < 0); | ||
7317 | path->lowest_level = 0; | ||
7318 | btrfs_release_path(root, path); | ||
7319 | return 0; | ||
7320 | } | ||
7321 | |||
7322 | mutex_lock(&root->fs_info->tree_reloc_mutex); | ||
7323 | ret = init_reloc_tree(trans, root); | ||
7324 | BUG_ON(ret); | ||
7325 | reloc_root = root->reloc_root; | ||
7326 | |||
7327 | shared_level = ref_path->shared_level; | ||
7328 | ref_path->shared_level = BTRFS_MAX_LEVEL - 1; | ||
7329 | |||
7330 | keys = ref_path->node_keys; | ||
7331 | nodes = ref_path->new_nodes; | ||
7332 | memset(&keys[shared_level + 1], 0, | ||
7333 | sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); | ||
7334 | memset(&nodes[shared_level + 1], 0, | ||
7335 | sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); | ||
7336 | |||
7337 | if (nodes[lowest_level] == 0) { | ||
7338 | path->lowest_level = lowest_level; | ||
7339 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, | ||
7340 | 0, 1); | ||
7341 | BUG_ON(ret); | ||
7342 | for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { | ||
7343 | eb = path->nodes[level]; | ||
7344 | if (!eb || eb == reloc_root->node) | ||
7345 | break; | ||
7346 | nodes[level] = eb->start; | ||
7347 | if (level == 0) | ||
7348 | btrfs_item_key_to_cpu(eb, &keys[level], 0); | ||
7349 | else | ||
7350 | btrfs_node_key_to_cpu(eb, &keys[level], 0); | ||
7351 | } | ||
7352 | if (nodes[0] && | ||
7353 | ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
7354 | eb = path->nodes[0]; | ||
7355 | ret = replace_extents_in_leaf(trans, reloc_root, eb, | ||
7356 | group, reloc_inode); | ||
7357 | BUG_ON(ret); | ||
7358 | } | ||
7359 | btrfs_release_path(reloc_root, path); | ||
7360 | } else { | ||
7361 | ret = btrfs_merge_path(trans, reloc_root, keys, nodes, | ||
7362 | lowest_level); | ||
7363 | BUG_ON(ret); | ||
7364 | } | ||
7365 | |||
7366 | /* | ||
7367 | * replace tree blocks in the fs tree with tree blocks in | ||
7368 | * the reloc tree. | ||
7369 | */ | ||
7370 | ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); | ||
7371 | BUG_ON(ret < 0); | ||
7372 | |||
7373 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
7374 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, | ||
7375 | 0, 0); | ||
7376 | BUG_ON(ret); | ||
7377 | extent_buffer_get(path->nodes[0]); | ||
7378 | eb = path->nodes[0]; | ||
7379 | btrfs_release_path(reloc_root, path); | ||
7380 | ret = invalidate_extent_cache(reloc_root, eb, group, root); | ||
7381 | BUG_ON(ret); | ||
7382 | free_extent_buffer(eb); | ||
7383 | } | ||
7384 | |||
7385 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
7386 | path->lowest_level = 0; | ||
7387 | return 0; | ||
7388 | } | ||
7389 | |||
7390 | static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, | ||
7391 | struct btrfs_root *root, | ||
7392 | struct btrfs_path *path, | ||
7393 | struct btrfs_key *first_key, | ||
7394 | struct btrfs_ref_path *ref_path) | ||
7395 | { | ||
7396 | int ret; | ||
7397 | |||
7398 | ret = relocate_one_path(trans, root, path, first_key, | ||
7399 | ref_path, NULL, NULL); | ||
7400 | BUG_ON(ret); | ||
7401 | |||
7402 | return 0; | ||
7403 | } | ||
7404 | |||
7405 | static noinline int del_extent_zero(struct btrfs_trans_handle *trans, | ||
7406 | struct btrfs_root *extent_root, | ||
7407 | struct btrfs_path *path, | ||
7408 | struct btrfs_key *extent_key) | ||
7409 | { | ||
7410 | int ret; | ||
7411 | |||
7412 | ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); | ||
7413 | if (ret) | ||
7414 | goto out; | ||
7415 | ret = btrfs_del_item(trans, extent_root, path); | ||
7416 | out: | ||
7417 | btrfs_release_path(extent_root, path); | ||
7418 | return ret; | ||
7419 | } | ||
7420 | |||
7421 | static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, | ||
7422 | struct btrfs_ref_path *ref_path) | ||
7423 | { | ||
7424 | struct btrfs_key root_key; | ||
7425 | |||
7426 | root_key.objectid = ref_path->root_objectid; | ||
7427 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
7428 | if (is_cowonly_root(ref_path->root_objectid)) | ||
7429 | root_key.offset = 0; | ||
7430 | else | ||
7431 | root_key.offset = (u64)-1; | ||
7432 | |||
7433 | return btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
7434 | } | ||
7435 | |||
7436 | static noinline int relocate_one_extent(struct btrfs_root *extent_root, | ||
7437 | struct btrfs_path *path, | ||
7438 | struct btrfs_key *extent_key, | ||
7439 | struct btrfs_block_group_cache *group, | ||
7440 | struct inode *reloc_inode, int pass) | ||
7441 | { | ||
7442 | struct btrfs_trans_handle *trans; | ||
7443 | struct btrfs_root *found_root; | ||
7444 | struct btrfs_ref_path *ref_path = NULL; | ||
7445 | struct disk_extent *new_extents = NULL; | ||
7446 | int nr_extents = 0; | ||
7447 | int loops; | ||
7448 | int ret; | ||
7449 | int level; | ||
7450 | struct btrfs_key first_key; | ||
7451 | u64 prev_block = 0; | ||
7452 | |||
7453 | |||
7454 | trans = btrfs_start_transaction(extent_root, 1); | ||
7455 | BUG_ON(!trans); | ||
7456 | |||
7457 | if (extent_key->objectid == 0) { | ||
7458 | ret = del_extent_zero(trans, extent_root, path, extent_key); | ||
7459 | goto out; | ||
7460 | } | ||
7461 | |||
7462 | ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); | ||
7463 | if (!ref_path) { | ||
7464 | ret = -ENOMEM; | ||
7465 | goto out; | ||
7466 | } | ||
7467 | |||
7468 | for (loops = 0; ; loops++) { | ||
7469 | if (loops == 0) { | ||
7470 | ret = btrfs_first_ref_path(trans, extent_root, ref_path, | ||
7471 | extent_key->objectid); | ||
7472 | } else { | ||
7473 | ret = btrfs_next_ref_path(trans, extent_root, ref_path); | ||
7474 | } | ||
7475 | if (ret < 0) | ||
7476 | goto out; | ||
7477 | if (ret > 0) | ||
7478 | break; | ||
7479 | |||
7480 | if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || | ||
7481 | ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
7482 | continue; | ||
7483 | |||
7484 | found_root = read_ref_root(extent_root->fs_info, ref_path); | ||
7485 | BUG_ON(!found_root); | ||
7486 | /* | ||
7487 | * for reference counted tree, only process reference paths | ||
7488 | * rooted at the latest committed root. | ||
7489 | */ | ||
7490 | if (found_root->ref_cows && | ||
7491 | ref_path->root_generation != found_root->root_key.offset) | ||
7492 | continue; | ||
7493 | |||
7494 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
7495 | if (pass == 0) { | ||
7496 | /* | ||
7497 | * copy data extents to new locations | ||
7498 | */ | ||
7499 | u64 group_start = group->key.objectid; | ||
7500 | ret = relocate_data_extent(reloc_inode, | ||
7501 | extent_key, | ||
7502 | group_start); | ||
7503 | if (ret < 0) | ||
7504 | goto out; | ||
7505 | break; | ||
7506 | } | ||
7507 | level = 0; | ||
7508 | } else { | ||
7509 | level = ref_path->owner_objectid; | ||
7510 | } | ||
7511 | |||
7512 | if (prev_block != ref_path->nodes[level]) { | ||
7513 | struct extent_buffer *eb; | ||
7514 | u64 block_start = ref_path->nodes[level]; | ||
7515 | u64 block_size = btrfs_level_size(found_root, level); | ||
7516 | |||
7517 | eb = read_tree_block(found_root, block_start, | ||
7518 | block_size, 0); | ||
7519 | btrfs_tree_lock(eb); | ||
7520 | BUG_ON(level != btrfs_header_level(eb)); | ||
7521 | |||
7522 | if (level == 0) | ||
7523 | btrfs_item_key_to_cpu(eb, &first_key, 0); | ||
7524 | else | ||
7525 | btrfs_node_key_to_cpu(eb, &first_key, 0); | ||
7526 | |||
7527 | btrfs_tree_unlock(eb); | ||
7528 | free_extent_buffer(eb); | ||
7529 | prev_block = block_start; | ||
7530 | } | ||
7531 | |||
7532 | mutex_lock(&extent_root->fs_info->trans_mutex); | ||
7533 | btrfs_record_root_in_trans(found_root); | ||
7534 | mutex_unlock(&extent_root->fs_info->trans_mutex); | ||
7535 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
7536 | /* | ||
7537 | * try to update data extent references while | ||
7538 | * keeping metadata shared between snapshots. | ||
7539 | */ | ||
7540 | if (pass == 1) { | ||
7541 | ret = relocate_one_path(trans, found_root, | ||
7542 | path, &first_key, ref_path, | ||
7543 | group, reloc_inode); | ||
7544 | if (ret < 0) | ||
7545 | goto out; | ||
7546 | continue; | ||
7547 | } | ||
7548 | /* | ||
7549 | * use fallback method to process the remaining | ||
7550 | * references. | ||
7551 | */ | ||
7552 | if (!new_extents) { | ||
7553 | u64 group_start = group->key.objectid; | ||
7554 | new_extents = kmalloc(sizeof(*new_extents), | ||
7555 | GFP_NOFS); | ||
7556 | nr_extents = 1; | ||
7557 | ret = get_new_locations(reloc_inode, | ||
7558 | extent_key, | ||
7559 | group_start, 1, | ||
7560 | &new_extents, | ||
7561 | &nr_extents); | ||
7562 | if (ret) | ||
7563 | goto out; | ||
7564 | } | ||
7565 | ret = replace_one_extent(trans, found_root, | ||
7566 | path, extent_key, | ||
7567 | &first_key, ref_path, | ||
7568 | new_extents, nr_extents); | ||
7569 | } else { | ||
7570 | ret = relocate_tree_block(trans, found_root, path, | ||
7571 | &first_key, ref_path); | ||
7572 | } | ||
7573 | if (ret < 0) | ||
7574 | goto out; | ||
7575 | } | ||
7576 | ret = 0; | ||
7577 | out: | ||
7578 | btrfs_end_transaction(trans, extent_root); | ||
7579 | kfree(new_extents); | ||
7580 | kfree(ref_path); | ||
7581 | return ret; | ||
7582 | } | ||
7583 | #endif | ||
7584 | |||
7585 | static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | 6482 | static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) |
7586 | { | 6483 | { |
7587 | u64 num_devices; | 6484 | u64 num_devices; |
7588 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | | 6485 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | |
7589 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; | 6486 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; |
7590 | 6487 | ||
7591 | num_devices = root->fs_info->fs_devices->rw_devices; | 6488 | /* |
6489 | * we add in the count of missing devices because we want | ||
6490 | * to make sure that any RAID levels on a degraded FS | ||
6491 | * continue to be honored. | ||
6492 | */ | ||
6493 | num_devices = root->fs_info->fs_devices->rw_devices + | ||
6494 | root->fs_info->fs_devices->missing_devices; | ||
6495 | |||
7592 | if (num_devices == 1) { | 6496 | if (num_devices == 1) { |
7593 | stripped |= BTRFS_BLOCK_GROUP_DUP; | 6497 | stripped |= BTRFS_BLOCK_GROUP_DUP; |
7594 | stripped = flags & ~stripped; | 6498 | stripped = flags & ~stripped; |
@@ -7636,13 +6540,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) | |||
7636 | 6540 | ||
7637 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 6541 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
7638 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 6542 | sinfo->bytes_may_use + sinfo->bytes_readonly + |
7639 | cache->reserved_pinned + num_bytes < sinfo->total_bytes) { | 6543 | cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { |
7640 | sinfo->bytes_readonly += num_bytes; | 6544 | sinfo->bytes_readonly += num_bytes; |
7641 | sinfo->bytes_reserved += cache->reserved_pinned; | 6545 | sinfo->bytes_reserved += cache->reserved_pinned; |
7642 | cache->reserved_pinned = 0; | 6546 | cache->reserved_pinned = 0; |
7643 | cache->ro = 1; | 6547 | cache->ro = 1; |
7644 | ret = 0; | 6548 | ret = 0; |
7645 | } | 6549 | } |
6550 | |||
7646 | spin_unlock(&cache->lock); | 6551 | spin_unlock(&cache->lock); |
7647 | spin_unlock(&sinfo->lock); | 6552 | spin_unlock(&sinfo->lock); |
7648 | return ret; | 6553 | return ret; |
@@ -7658,18 +6563,20 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
7658 | 6563 | ||
7659 | BUG_ON(cache->ro); | 6564 | BUG_ON(cache->ro); |
7660 | 6565 | ||
7661 | trans = btrfs_join_transaction(root, 1); | 6566 | trans = btrfs_join_transaction(root); |
7662 | BUG_ON(IS_ERR(trans)); | 6567 | BUG_ON(IS_ERR(trans)); |
7663 | 6568 | ||
7664 | alloc_flags = update_block_group_flags(root, cache->flags); | 6569 | alloc_flags = update_block_group_flags(root, cache->flags); |
7665 | if (alloc_flags != cache->flags) | 6570 | if (alloc_flags != cache->flags) |
7666 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | 6571 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
6572 | CHUNK_ALLOC_FORCE); | ||
7667 | 6573 | ||
7668 | ret = set_block_group_ro(cache); | 6574 | ret = set_block_group_ro(cache); |
7669 | if (!ret) | 6575 | if (!ret) |
7670 | goto out; | 6576 | goto out; |
7671 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | 6577 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); |
7672 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | 6578 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
6579 | CHUNK_ALLOC_FORCE); | ||
7673 | if (ret < 0) | 6580 | if (ret < 0) |
7674 | goto out; | 6581 | goto out; |
7675 | ret = set_block_group_ro(cache); | 6582 | ret = set_block_group_ro(cache); |
@@ -7678,6 +6585,70 @@ out: | |||
7678 | return ret; | 6585 | return ret; |
7679 | } | 6586 | } |
7680 | 6587 | ||
6588 | int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | ||
6589 | struct btrfs_root *root, u64 type) | ||
6590 | { | ||
6591 | u64 alloc_flags = get_alloc_profile(root, type); | ||
6592 | return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | ||
6593 | CHUNK_ALLOC_FORCE); | ||
6594 | } | ||
6595 | |||
6596 | /* | ||
6597 | * helper to account the unused space of all the readonly block group in the | ||
6598 | * list. takes mirrors into account. | ||
6599 | */ | ||
6600 | static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) | ||
6601 | { | ||
6602 | struct btrfs_block_group_cache *block_group; | ||
6603 | u64 free_bytes = 0; | ||
6604 | int factor; | ||
6605 | |||
6606 | list_for_each_entry(block_group, groups_list, list) { | ||
6607 | spin_lock(&block_group->lock); | ||
6608 | |||
6609 | if (!block_group->ro) { | ||
6610 | spin_unlock(&block_group->lock); | ||
6611 | continue; | ||
6612 | } | ||
6613 | |||
6614 | if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
6615 | BTRFS_BLOCK_GROUP_RAID10 | | ||
6616 | BTRFS_BLOCK_GROUP_DUP)) | ||
6617 | factor = 2; | ||
6618 | else | ||
6619 | factor = 1; | ||
6620 | |||
6621 | free_bytes += (block_group->key.offset - | ||
6622 | btrfs_block_group_used(&block_group->item)) * | ||
6623 | factor; | ||
6624 | |||
6625 | spin_unlock(&block_group->lock); | ||
6626 | } | ||
6627 | |||
6628 | return free_bytes; | ||
6629 | } | ||
6630 | |||
6631 | /* | ||
6632 | * helper to account the unused space of all the readonly block group in the | ||
6633 | * space_info. takes mirrors into account. | ||
6634 | */ | ||
6635 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) | ||
6636 | { | ||
6637 | int i; | ||
6638 | u64 free_bytes = 0; | ||
6639 | |||
6640 | spin_lock(&sinfo->lock); | ||
6641 | |||
6642 | for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) | ||
6643 | if (!list_empty(&sinfo->block_groups[i])) | ||
6644 | free_bytes += __btrfs_get_ro_block_group_free_space( | ||
6645 | &sinfo->block_groups[i]); | ||
6646 | |||
6647 | spin_unlock(&sinfo->lock); | ||
6648 | |||
6649 | return free_bytes; | ||
6650 | } | ||
6651 | |||
7681 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 6652 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
7682 | struct btrfs_block_group_cache *cache) | 6653 | struct btrfs_block_group_cache *cache) |
7683 | { | 6654 | { |
@@ -7758,7 +6729,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7758 | mutex_lock(&root->fs_info->chunk_mutex); | 6729 | mutex_lock(&root->fs_info->chunk_mutex); |
7759 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 6730 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
7760 | u64 min_free = btrfs_block_group_used(&block_group->item); | 6731 | u64 min_free = btrfs_block_group_used(&block_group->item); |
7761 | u64 dev_offset, max_avail; | 6732 | u64 dev_offset; |
7762 | 6733 | ||
7763 | /* | 6734 | /* |
7764 | * check to make sure we can actually find a chunk with enough | 6735 | * check to make sure we can actually find a chunk with enough |
@@ -7766,7 +6737,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7766 | */ | 6737 | */ |
7767 | if (device->total_bytes > device->bytes_used + min_free) { | 6738 | if (device->total_bytes > device->bytes_used + min_free) { |
7768 | ret = find_free_dev_extent(NULL, device, min_free, | 6739 | ret = find_free_dev_extent(NULL, device, min_free, |
7769 | &dev_offset, &max_avail); | 6740 | &dev_offset, NULL); |
7770 | if (!ret) | 6741 | if (!ret) |
7771 | break; | 6742 | break; |
7772 | ret = -1; | 6743 | ret = -1; |
@@ -7814,6 +6785,40 @@ out: | |||
7814 | return ret; | 6785 | return ret; |
7815 | } | 6786 | } |
7816 | 6787 | ||
6788 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info) | ||
6789 | { | ||
6790 | struct btrfs_block_group_cache *block_group; | ||
6791 | u64 last = 0; | ||
6792 | |||
6793 | while (1) { | ||
6794 | struct inode *inode; | ||
6795 | |||
6796 | block_group = btrfs_lookup_first_block_group(info, last); | ||
6797 | while (block_group) { | ||
6798 | spin_lock(&block_group->lock); | ||
6799 | if (block_group->iref) | ||
6800 | break; | ||
6801 | spin_unlock(&block_group->lock); | ||
6802 | block_group = next_block_group(info->tree_root, | ||
6803 | block_group); | ||
6804 | } | ||
6805 | if (!block_group) { | ||
6806 | if (last == 0) | ||
6807 | break; | ||
6808 | last = 0; | ||
6809 | continue; | ||
6810 | } | ||
6811 | |||
6812 | inode = block_group->inode; | ||
6813 | block_group->iref = 0; | ||
6814 | block_group->inode = NULL; | ||
6815 | spin_unlock(&block_group->lock); | ||
6816 | iput(inode); | ||
6817 | last = block_group->key.objectid + block_group->key.offset; | ||
6818 | btrfs_put_block_group(block_group); | ||
6819 | } | ||
6820 | } | ||
6821 | |||
7817 | int btrfs_free_block_groups(struct btrfs_fs_info *info) | 6822 | int btrfs_free_block_groups(struct btrfs_fs_info *info) |
7818 | { | 6823 | { |
7819 | struct btrfs_block_group_cache *block_group; | 6824 | struct btrfs_block_group_cache *block_group; |
@@ -7845,6 +6850,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7845 | if (block_group->cached == BTRFS_CACHE_STARTED) | 6850 | if (block_group->cached == BTRFS_CACHE_STARTED) |
7846 | wait_block_group_cache_done(block_group); | 6851 | wait_block_group_cache_done(block_group); |
7847 | 6852 | ||
6853 | /* | ||
6854 | * We haven't cached this block group, which means we could | ||
6855 | * possibly have excluded extents on this block group. | ||
6856 | */ | ||
6857 | if (block_group->cached == BTRFS_CACHE_NO) | ||
6858 | free_excluded_extents(info->extent_root, block_group); | ||
6859 | |||
7848 | btrfs_remove_free_space_cache(block_group); | 6860 | btrfs_remove_free_space_cache(block_group); |
7849 | btrfs_put_block_group(block_group); | 6861 | btrfs_put_block_group(block_group); |
7850 | 6862 | ||
@@ -7897,6 +6909,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7897 | struct btrfs_key key; | 6909 | struct btrfs_key key; |
7898 | struct btrfs_key found_key; | 6910 | struct btrfs_key found_key; |
7899 | struct extent_buffer *leaf; | 6911 | struct extent_buffer *leaf; |
6912 | int need_clear = 0; | ||
6913 | u64 cache_gen; | ||
7900 | 6914 | ||
7901 | root = info->extent_root; | 6915 | root = info->extent_root; |
7902 | key.objectid = 0; | 6916 | key.objectid = 0; |
@@ -7905,6 +6919,16 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7905 | path = btrfs_alloc_path(); | 6919 | path = btrfs_alloc_path(); |
7906 | if (!path) | 6920 | if (!path) |
7907 | return -ENOMEM; | 6921 | return -ENOMEM; |
6922 | path->reada = 1; | ||
6923 | |||
6924 | cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); | ||
6925 | if (cache_gen != 0 && | ||
6926 | btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) | ||
6927 | need_clear = 1; | ||
6928 | if (btrfs_test_opt(root, CLEAR_CACHE)) | ||
6929 | need_clear = 1; | ||
6930 | if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) | ||
6931 | printk(KERN_INFO "btrfs: disk space caching is enabled\n"); | ||
7908 | 6932 | ||
7909 | while (1) { | 6933 | while (1) { |
7910 | ret = find_first_block_group(root, path, &key); | 6934 | ret = find_first_block_group(root, path, &key); |
@@ -7912,7 +6936,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7912 | break; | 6936 | break; |
7913 | if (ret != 0) | 6937 | if (ret != 0) |
7914 | goto error; | 6938 | goto error; |
7915 | |||
7916 | leaf = path->nodes[0]; | 6939 | leaf = path->nodes[0]; |
7917 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 6940 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
7918 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 6941 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
@@ -7920,21 +6943,22 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7920 | ret = -ENOMEM; | 6943 | ret = -ENOMEM; |
7921 | goto error; | 6944 | goto error; |
7922 | } | 6945 | } |
6946 | cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), | ||
6947 | GFP_NOFS); | ||
6948 | if (!cache->free_space_ctl) { | ||
6949 | kfree(cache); | ||
6950 | ret = -ENOMEM; | ||
6951 | goto error; | ||
6952 | } | ||
7923 | 6953 | ||
7924 | atomic_set(&cache->count, 1); | 6954 | atomic_set(&cache->count, 1); |
7925 | spin_lock_init(&cache->lock); | 6955 | spin_lock_init(&cache->lock); |
7926 | spin_lock_init(&cache->tree_lock); | ||
7927 | cache->fs_info = info; | 6956 | cache->fs_info = info; |
7928 | INIT_LIST_HEAD(&cache->list); | 6957 | INIT_LIST_HEAD(&cache->list); |
7929 | INIT_LIST_HEAD(&cache->cluster_list); | 6958 | INIT_LIST_HEAD(&cache->cluster_list); |
7930 | 6959 | ||
7931 | /* | 6960 | if (need_clear) |
7932 | * we only want to have 32k of ram per block group for keeping | 6961 | cache->disk_cache_state = BTRFS_DC_CLEAR; |
7933 | * track of free space, and if we pass 1/2 of that we want to | ||
7934 | * start converting things over to using bitmaps | ||
7935 | */ | ||
7936 | cache->extents_thresh = ((1024 * 32) / 2) / | ||
7937 | sizeof(struct btrfs_free_space); | ||
7938 | 6962 | ||
7939 | read_extent_buffer(leaf, &cache->item, | 6963 | read_extent_buffer(leaf, &cache->item, |
7940 | btrfs_item_ptr_offset(leaf, path->slots[0]), | 6964 | btrfs_item_ptr_offset(leaf, path->slots[0]), |
@@ -7942,10 +6966,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7942 | memcpy(&cache->key, &found_key, sizeof(found_key)); | 6966 | memcpy(&cache->key, &found_key, sizeof(found_key)); |
7943 | 6967 | ||
7944 | key.objectid = found_key.objectid + found_key.offset; | 6968 | key.objectid = found_key.objectid + found_key.offset; |
7945 | btrfs_release_path(root, path); | 6969 | btrfs_release_path(path); |
7946 | cache->flags = btrfs_block_group_flags(&cache->item); | 6970 | cache->flags = btrfs_block_group_flags(&cache->item); |
7947 | cache->sectorsize = root->sectorsize; | 6971 | cache->sectorsize = root->sectorsize; |
7948 | 6972 | ||
6973 | btrfs_init_free_space_ctl(cache); | ||
6974 | |||
6975 | /* | ||
6976 | * We need to exclude the super stripes now so that the space | ||
6977 | * info has super bytes accounted for, otherwise we'll think | ||
6978 | * we have more space than we actually do. | ||
6979 | */ | ||
6980 | exclude_super_stripes(root, cache); | ||
6981 | |||
7949 | /* | 6982 | /* |
7950 | * check for two cases, either we are full, and therefore | 6983 | * check for two cases, either we are full, and therefore |
7951 | * don't need to bother with the caching work since we won't | 6984 | * don't need to bother with the caching work since we won't |
@@ -7954,12 +6987,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7954 | * time, particularly in the full case. | 6987 | * time, particularly in the full case. |
7955 | */ | 6988 | */ |
7956 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { | 6989 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { |
7957 | exclude_super_stripes(root, cache); | ||
7958 | cache->last_byte_to_unpin = (u64)-1; | 6990 | cache->last_byte_to_unpin = (u64)-1; |
7959 | cache->cached = BTRFS_CACHE_FINISHED; | 6991 | cache->cached = BTRFS_CACHE_FINISHED; |
7960 | free_excluded_extents(root, cache); | 6992 | free_excluded_extents(root, cache); |
7961 | } else if (btrfs_block_group_used(&cache->item) == 0) { | 6993 | } else if (btrfs_block_group_used(&cache->item) == 0) { |
7962 | exclude_super_stripes(root, cache); | ||
7963 | cache->last_byte_to_unpin = (u64)-1; | 6994 | cache->last_byte_to_unpin = (u64)-1; |
7964 | cache->cached = BTRFS_CACHE_FINISHED; | 6995 | cache->cached = BTRFS_CACHE_FINISHED; |
7965 | add_new_free_space(cache, root->fs_info, | 6996 | add_new_free_space(cache, root->fs_info, |
@@ -8027,25 +7058,26 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
8027 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 7058 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
8028 | if (!cache) | 7059 | if (!cache) |
8029 | return -ENOMEM; | 7060 | return -ENOMEM; |
7061 | cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), | ||
7062 | GFP_NOFS); | ||
7063 | if (!cache->free_space_ctl) { | ||
7064 | kfree(cache); | ||
7065 | return -ENOMEM; | ||
7066 | } | ||
8030 | 7067 | ||
8031 | cache->key.objectid = chunk_offset; | 7068 | cache->key.objectid = chunk_offset; |
8032 | cache->key.offset = size; | 7069 | cache->key.offset = size; |
8033 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 7070 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
8034 | cache->sectorsize = root->sectorsize; | 7071 | cache->sectorsize = root->sectorsize; |
7072 | cache->fs_info = root->fs_info; | ||
8035 | 7073 | ||
8036 | /* | ||
8037 | * we only want to have 32k of ram per block group for keeping track | ||
8038 | * of free space, and if we pass 1/2 of that we want to start | ||
8039 | * converting things over to using bitmaps | ||
8040 | */ | ||
8041 | cache->extents_thresh = ((1024 * 32) / 2) / | ||
8042 | sizeof(struct btrfs_free_space); | ||
8043 | atomic_set(&cache->count, 1); | 7074 | atomic_set(&cache->count, 1); |
8044 | spin_lock_init(&cache->lock); | 7075 | spin_lock_init(&cache->lock); |
8045 | spin_lock_init(&cache->tree_lock); | ||
8046 | INIT_LIST_HEAD(&cache->list); | 7076 | INIT_LIST_HEAD(&cache->list); |
8047 | INIT_LIST_HEAD(&cache->cluster_list); | 7077 | INIT_LIST_HEAD(&cache->cluster_list); |
8048 | 7078 | ||
7079 | btrfs_init_free_space_ctl(cache); | ||
7080 | |||
8049 | btrfs_set_block_group_used(&cache->item, bytes_used); | 7081 | btrfs_set_block_group_used(&cache->item, bytes_used); |
8050 | btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); | 7082 | btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); |
8051 | cache->flags = type; | 7083 | cache->flags = type; |
@@ -8088,8 +7120,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8088 | struct btrfs_path *path; | 7120 | struct btrfs_path *path; |
8089 | struct btrfs_block_group_cache *block_group; | 7121 | struct btrfs_block_group_cache *block_group; |
8090 | struct btrfs_free_cluster *cluster; | 7122 | struct btrfs_free_cluster *cluster; |
7123 | struct btrfs_root *tree_root = root->fs_info->tree_root; | ||
8091 | struct btrfs_key key; | 7124 | struct btrfs_key key; |
7125 | struct inode *inode; | ||
8092 | int ret; | 7126 | int ret; |
7127 | int factor; | ||
8093 | 7128 | ||
8094 | root = root->fs_info->extent_root; | 7129 | root = root->fs_info->extent_root; |
8095 | 7130 | ||
@@ -8097,7 +7132,19 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8097 | BUG_ON(!block_group); | 7132 | BUG_ON(!block_group); |
8098 | BUG_ON(!block_group->ro); | 7133 | BUG_ON(!block_group->ro); |
8099 | 7134 | ||
7135 | /* | ||
7136 | * Free the reserved super bytes from this block group before | ||
7137 | * remove it. | ||
7138 | */ | ||
7139 | free_excluded_extents(root, block_group); | ||
7140 | |||
8100 | memcpy(&key, &block_group->key, sizeof(key)); | 7141 | memcpy(&key, &block_group->key, sizeof(key)); |
7142 | if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | | ||
7143 | BTRFS_BLOCK_GROUP_RAID1 | | ||
7144 | BTRFS_BLOCK_GROUP_RAID10)) | ||
7145 | factor = 2; | ||
7146 | else | ||
7147 | factor = 1; | ||
8101 | 7148 | ||
8102 | /* make sure this block group isn't part of an allocation cluster */ | 7149 | /* make sure this block group isn't part of an allocation cluster */ |
8103 | cluster = &root->fs_info->data_alloc_cluster; | 7150 | cluster = &root->fs_info->data_alloc_cluster; |
@@ -8117,6 +7164,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8117 | path = btrfs_alloc_path(); | 7164 | path = btrfs_alloc_path(); |
8118 | BUG_ON(!path); | 7165 | BUG_ON(!path); |
8119 | 7166 | ||
7167 | inode = lookup_free_space_inode(root, block_group, path); | ||
7168 | if (!IS_ERR(inode)) { | ||
7169 | btrfs_orphan_add(trans, inode); | ||
7170 | clear_nlink(inode); | ||
7171 | /* One for the block groups ref */ | ||
7172 | spin_lock(&block_group->lock); | ||
7173 | if (block_group->iref) { | ||
7174 | block_group->iref = 0; | ||
7175 | block_group->inode = NULL; | ||
7176 | spin_unlock(&block_group->lock); | ||
7177 | iput(inode); | ||
7178 | } else { | ||
7179 | spin_unlock(&block_group->lock); | ||
7180 | } | ||
7181 | /* One for our lookup ref */ | ||
7182 | iput(inode); | ||
7183 | } | ||
7184 | |||
7185 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
7186 | key.offset = block_group->key.objectid; | ||
7187 | key.type = 0; | ||
7188 | |||
7189 | ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); | ||
7190 | if (ret < 0) | ||
7191 | goto out; | ||
7192 | if (ret > 0) | ||
7193 | btrfs_release_path(path); | ||
7194 | if (ret == 0) { | ||
7195 | ret = btrfs_del_item(trans, tree_root, path); | ||
7196 | if (ret) | ||
7197 | goto out; | ||
7198 | btrfs_release_path(path); | ||
7199 | } | ||
7200 | |||
8120 | spin_lock(&root->fs_info->block_group_cache_lock); | 7201 | spin_lock(&root->fs_info->block_group_cache_lock); |
8121 | rb_erase(&block_group->cache_node, | 7202 | rb_erase(&block_group->cache_node, |
8122 | &root->fs_info->block_group_cache_tree); | 7203 | &root->fs_info->block_group_cache_tree); |
@@ -8138,8 +7219,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8138 | spin_lock(&block_group->space_info->lock); | 7219 | spin_lock(&block_group->space_info->lock); |
8139 | block_group->space_info->total_bytes -= block_group->key.offset; | 7220 | block_group->space_info->total_bytes -= block_group->key.offset; |
8140 | block_group->space_info->bytes_readonly -= block_group->key.offset; | 7221 | block_group->space_info->bytes_readonly -= block_group->key.offset; |
7222 | block_group->space_info->disk_total -= block_group->key.offset * factor; | ||
8141 | spin_unlock(&block_group->space_info->lock); | 7223 | spin_unlock(&block_group->space_info->lock); |
8142 | 7224 | ||
7225 | memcpy(&key, &block_group->key, sizeof(key)); | ||
7226 | |||
8143 | btrfs_clear_space_info_full(root->fs_info); | 7227 | btrfs_clear_space_info_full(root->fs_info); |
8144 | 7228 | ||
8145 | btrfs_put_block_group(block_group); | 7229 | btrfs_put_block_group(block_group); |
@@ -8156,3 +7240,100 @@ out: | |||
8156 | btrfs_free_path(path); | 7240 | btrfs_free_path(path); |
8157 | return ret; | 7241 | return ret; |
8158 | } | 7242 | } |
7243 | |||
7244 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | ||
7245 | { | ||
7246 | struct btrfs_space_info *space_info; | ||
7247 | struct btrfs_super_block *disk_super; | ||
7248 | u64 features; | ||
7249 | u64 flags; | ||
7250 | int mixed = 0; | ||
7251 | int ret; | ||
7252 | |||
7253 | disk_super = &fs_info->super_copy; | ||
7254 | if (!btrfs_super_root(disk_super)) | ||
7255 | return 1; | ||
7256 | |||
7257 | features = btrfs_super_incompat_flags(disk_super); | ||
7258 | if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) | ||
7259 | mixed = 1; | ||
7260 | |||
7261 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
7262 | ret = update_space_info(fs_info, flags, 0, 0, &space_info); | ||
7263 | if (ret) | ||
7264 | goto out; | ||
7265 | |||
7266 | if (mixed) { | ||
7267 | flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; | ||
7268 | ret = update_space_info(fs_info, flags, 0, 0, &space_info); | ||
7269 | } else { | ||
7270 | flags = BTRFS_BLOCK_GROUP_METADATA; | ||
7271 | ret = update_space_info(fs_info, flags, 0, 0, &space_info); | ||
7272 | if (ret) | ||
7273 | goto out; | ||
7274 | |||
7275 | flags = BTRFS_BLOCK_GROUP_DATA; | ||
7276 | ret = update_space_info(fs_info, flags, 0, 0, &space_info); | ||
7277 | } | ||
7278 | out: | ||
7279 | return ret; | ||
7280 | } | ||
7281 | |||
7282 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | ||
7283 | { | ||
7284 | return unpin_extent_range(root, start, end); | ||
7285 | } | ||
7286 | |||
7287 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
7288 | u64 num_bytes, u64 *actual_bytes) | ||
7289 | { | ||
7290 | return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); | ||
7291 | } | ||
7292 | |||
7293 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | ||
7294 | { | ||
7295 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
7296 | struct btrfs_block_group_cache *cache = NULL; | ||
7297 | u64 group_trimmed; | ||
7298 | u64 start; | ||
7299 | u64 end; | ||
7300 | u64 trimmed = 0; | ||
7301 | int ret = 0; | ||
7302 | |||
7303 | cache = btrfs_lookup_block_group(fs_info, range->start); | ||
7304 | |||
7305 | while (cache) { | ||
7306 | if (cache->key.objectid >= (range->start + range->len)) { | ||
7307 | btrfs_put_block_group(cache); | ||
7308 | break; | ||
7309 | } | ||
7310 | |||
7311 | start = max(range->start, cache->key.objectid); | ||
7312 | end = min(range->start + range->len, | ||
7313 | cache->key.objectid + cache->key.offset); | ||
7314 | |||
7315 | if (end - start >= range->minlen) { | ||
7316 | if (!block_group_cache_done(cache)) { | ||
7317 | ret = cache_block_group(cache, NULL, root, 0); | ||
7318 | if (!ret) | ||
7319 | wait_block_group_cache_done(cache); | ||
7320 | } | ||
7321 | ret = btrfs_trim_block_group(cache, | ||
7322 | &group_trimmed, | ||
7323 | start, | ||
7324 | end, | ||
7325 | range->minlen); | ||
7326 | |||
7327 | trimmed += group_trimmed; | ||
7328 | if (ret) { | ||
7329 | btrfs_put_block_group(cache); | ||
7330 | break; | ||
7331 | } | ||
7332 | } | ||
7333 | |||
7334 | cache = next_block_group(fs_info->tree_root, cache); | ||
7335 | } | ||
7336 | |||
7337 | range->len = trimmed; | ||
7338 | return ret; | ||
7339 | } | ||