aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c1788
1 files changed, 69 insertions, 1719 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9ee6bd55e16c..169bd62ce776 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -94,7 +94,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
94 return (cache->flags & bits) == bits; 94 return (cache->flags & bits) == bits;
95} 95}
96 96
97void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 97static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
98{ 98{
99 atomic_inc(&cache->count); 99 atomic_inc(&cache->count);
100} 100}
@@ -105,6 +105,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
105 WARN_ON(cache->pinned > 0); 105 WARN_ON(cache->pinned > 0);
106 WARN_ON(cache->reserved > 0); 106 WARN_ON(cache->reserved > 0);
107 WARN_ON(cache->reserved_pinned > 0); 107 WARN_ON(cache->reserved_pinned > 0);
108 kfree(cache->free_space_ctl);
108 kfree(cache); 109 kfree(cache);
109 } 110 }
110} 111}
@@ -379,7 +380,7 @@ again:
379 break; 380 break;
380 381
381 caching_ctl->progress = last; 382 caching_ctl->progress = last;
382 btrfs_release_path(extent_root, path); 383 btrfs_release_path(path);
383 up_read(&fs_info->extent_commit_sem); 384 up_read(&fs_info->extent_commit_sem);
384 mutex_unlock(&caching_ctl->mutex); 385 mutex_unlock(&caching_ctl->mutex);
385 if (btrfs_transaction_in_commit(fs_info)) 386 if (btrfs_transaction_in_commit(fs_info))
@@ -754,8 +755,12 @@ again:
754 atomic_inc(&head->node.refs); 755 atomic_inc(&head->node.refs);
755 spin_unlock(&delayed_refs->lock); 756 spin_unlock(&delayed_refs->lock);
756 757
757 btrfs_release_path(root->fs_info->extent_root, path); 758 btrfs_release_path(path);
758 759
760 /*
761 * Mutex was contended, block until it's released and try
762 * again
763 */
759 mutex_lock(&head->mutex); 764 mutex_lock(&head->mutex);
760 mutex_unlock(&head->mutex); 765 mutex_unlock(&head->mutex);
761 btrfs_put_delayed_ref(&head->node); 766 btrfs_put_delayed_ref(&head->node);
@@ -934,7 +939,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
934 break; 939 break;
935 } 940 }
936 } 941 }
937 btrfs_release_path(root, path); 942 btrfs_release_path(path);
938 943
939 if (owner < BTRFS_FIRST_FREE_OBJECTID) 944 if (owner < BTRFS_FIRST_FREE_OBJECTID)
940 new_size += sizeof(*bi); 945 new_size += sizeof(*bi);
@@ -947,7 +952,6 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
947 BUG_ON(ret); 952 BUG_ON(ret);
948 953
949 ret = btrfs_extend_item(trans, root, path, new_size); 954 ret = btrfs_extend_item(trans, root, path, new_size);
950 BUG_ON(ret);
951 955
952 leaf = path->nodes[0]; 956 leaf = path->nodes[0];
953 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 957 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1042,7 +1046,7 @@ again:
1042 return 0; 1046 return 0;
1043#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1047#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1044 key.type = BTRFS_EXTENT_REF_V0_KEY; 1048 key.type = BTRFS_EXTENT_REF_V0_KEY;
1045 btrfs_release_path(root, path); 1049 btrfs_release_path(path);
1046 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1050 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1047 if (ret < 0) { 1051 if (ret < 0) {
1048 err = ret; 1052 err = ret;
@@ -1080,7 +1084,7 @@ again:
1080 if (match_extent_data_ref(leaf, ref, root_objectid, 1084 if (match_extent_data_ref(leaf, ref, root_objectid,
1081 owner, offset)) { 1085 owner, offset)) {
1082 if (recow) { 1086 if (recow) {
1083 btrfs_release_path(root, path); 1087 btrfs_release_path(path);
1084 goto again; 1088 goto again;
1085 } 1089 }
1086 err = 0; 1090 err = 0;
@@ -1141,7 +1145,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1141 if (match_extent_data_ref(leaf, ref, root_objectid, 1145 if (match_extent_data_ref(leaf, ref, root_objectid,
1142 owner, offset)) 1146 owner, offset))
1143 break; 1147 break;
1144 btrfs_release_path(root, path); 1148 btrfs_release_path(path);
1145 key.offset++; 1149 key.offset++;
1146 ret = btrfs_insert_empty_item(trans, root, path, &key, 1150 ret = btrfs_insert_empty_item(trans, root, path, &key,
1147 size); 1151 size);
@@ -1167,7 +1171,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1167 btrfs_mark_buffer_dirty(leaf); 1171 btrfs_mark_buffer_dirty(leaf);
1168 ret = 0; 1172 ret = 0;
1169fail: 1173fail:
1170 btrfs_release_path(root, path); 1174 btrfs_release_path(path);
1171 return ret; 1175 return ret;
1172} 1176}
1173 1177
@@ -1293,7 +1297,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1293 ret = -ENOENT; 1297 ret = -ENOENT;
1294#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1298#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1295 if (ret == -ENOENT && parent) { 1299 if (ret == -ENOENT && parent) {
1296 btrfs_release_path(root, path); 1300 btrfs_release_path(path);
1297 key.type = BTRFS_EXTENT_REF_V0_KEY; 1301 key.type = BTRFS_EXTENT_REF_V0_KEY;
1298 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1302 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1299 if (ret > 0) 1303 if (ret > 0)
@@ -1322,7 +1326,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1322 } 1326 }
1323 1327
1324 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1328 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1325 btrfs_release_path(root, path); 1329 btrfs_release_path(path);
1326 return ret; 1330 return ret;
1327} 1331}
1328 1332
@@ -1555,7 +1559,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1555 size = btrfs_extent_inline_ref_size(type); 1559 size = btrfs_extent_inline_ref_size(type);
1556 1560
1557 ret = btrfs_extend_item(trans, root, path, size); 1561 ret = btrfs_extend_item(trans, root, path, size);
1558 BUG_ON(ret);
1559 1562
1560 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1563 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1561 refs = btrfs_extent_refs(leaf, ei); 1564 refs = btrfs_extent_refs(leaf, ei);
@@ -1608,7 +1611,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1608 if (ret != -ENOENT) 1611 if (ret != -ENOENT)
1609 return ret; 1612 return ret;
1610 1613
1611 btrfs_release_path(root, path); 1614 btrfs_release_path(path);
1612 *ref_ret = NULL; 1615 *ref_ret = NULL;
1613 1616
1614 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1617 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
@@ -1684,7 +1687,6 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1684 end - ptr - size); 1687 end - ptr - size);
1685 item_size -= size; 1688 item_size -= size;
1686 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1689 ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1687 BUG_ON(ret);
1688 } 1690 }
1689 btrfs_mark_buffer_dirty(leaf); 1691 btrfs_mark_buffer_dirty(leaf);
1690 return 0; 1692 return 0;
@@ -1862,7 +1864,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1862 __run_delayed_extent_op(extent_op, leaf, item); 1864 __run_delayed_extent_op(extent_op, leaf, item);
1863 1865
1864 btrfs_mark_buffer_dirty(leaf); 1866 btrfs_mark_buffer_dirty(leaf);
1865 btrfs_release_path(root->fs_info->extent_root, path); 1867 btrfs_release_path(path);
1866 1868
1867 path->reada = 1; 1869 path->reada = 1;
1868 path->leave_spinning = 1; 1870 path->leave_spinning = 1;
@@ -2297,6 +2299,10 @@ again:
2297 atomic_inc(&ref->refs); 2299 atomic_inc(&ref->refs);
2298 2300
2299 spin_unlock(&delayed_refs->lock); 2301 spin_unlock(&delayed_refs->lock);
2302 /*
2303 * Mutex was contended, block until it's
2304 * released and try again
2305 */
2300 mutex_lock(&head->mutex); 2306 mutex_lock(&head->mutex);
2301 mutex_unlock(&head->mutex); 2307 mutex_unlock(&head->mutex);
2302 2308
@@ -2361,8 +2367,12 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2361 atomic_inc(&head->node.refs); 2367 atomic_inc(&head->node.refs);
2362 spin_unlock(&delayed_refs->lock); 2368 spin_unlock(&delayed_refs->lock);
2363 2369
2364 btrfs_release_path(root->fs_info->extent_root, path); 2370 btrfs_release_path(path);
2365 2371
2372 /*
2373 * Mutex was contended, block until it's released and let
2374 * caller try again
2375 */
2366 mutex_lock(&head->mutex); 2376 mutex_lock(&head->mutex);
2367 mutex_unlock(&head->mutex); 2377 mutex_unlock(&head->mutex);
2368 btrfs_put_delayed_ref(&head->node); 2378 btrfs_put_delayed_ref(&head->node);
@@ -2510,126 +2520,6 @@ out:
2510 return ret; 2520 return ret;
2511} 2521}
2512 2522
2513#if 0
2514int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2515 struct extent_buffer *buf, u32 nr_extents)
2516{
2517 struct btrfs_key key;
2518 struct btrfs_file_extent_item *fi;
2519 u64 root_gen;
2520 u32 nritems;
2521 int i;
2522 int level;
2523 int ret = 0;
2524 int shared = 0;
2525
2526 if (!root->ref_cows)
2527 return 0;
2528
2529 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2530 shared = 0;
2531 root_gen = root->root_key.offset;
2532 } else {
2533 shared = 1;
2534 root_gen = trans->transid - 1;
2535 }
2536
2537 level = btrfs_header_level(buf);
2538 nritems = btrfs_header_nritems(buf);
2539
2540 if (level == 0) {
2541 struct btrfs_leaf_ref *ref;
2542 struct btrfs_extent_info *info;
2543
2544 ref = btrfs_alloc_leaf_ref(root, nr_extents);
2545 if (!ref) {
2546 ret = -ENOMEM;
2547 goto out;
2548 }
2549
2550 ref->root_gen = root_gen;
2551 ref->bytenr = buf->start;
2552 ref->owner = btrfs_header_owner(buf);
2553 ref->generation = btrfs_header_generation(buf);
2554 ref->nritems = nr_extents;
2555 info = ref->extents;
2556
2557 for (i = 0; nr_extents > 0 && i < nritems; i++) {
2558 u64 disk_bytenr;
2559 btrfs_item_key_to_cpu(buf, &key, i);
2560 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2561 continue;
2562 fi = btrfs_item_ptr(buf, i,
2563 struct btrfs_file_extent_item);
2564 if (btrfs_file_extent_type(buf, fi) ==
2565 BTRFS_FILE_EXTENT_INLINE)
2566 continue;
2567 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2568 if (disk_bytenr == 0)
2569 continue;
2570
2571 info->bytenr = disk_bytenr;
2572 info->num_bytes =
2573 btrfs_file_extent_disk_num_bytes(buf, fi);
2574 info->objectid = key.objectid;
2575 info->offset = key.offset;
2576 info++;
2577 }
2578
2579 ret = btrfs_add_leaf_ref(root, ref, shared);
2580 if (ret == -EEXIST && shared) {
2581 struct btrfs_leaf_ref *old;
2582 old = btrfs_lookup_leaf_ref(root, ref->bytenr);
2583 BUG_ON(!old);
2584 btrfs_remove_leaf_ref(root, old);
2585 btrfs_free_leaf_ref(root, old);
2586 ret = btrfs_add_leaf_ref(root, ref, shared);
2587 }
2588 WARN_ON(ret);
2589 btrfs_free_leaf_ref(root, ref);
2590 }
2591out:
2592 return ret;
2593}
2594
2595/* when a block goes through cow, we update the reference counts of
2596 * everything that block points to. The internal pointers of the block
2597 * can be in just about any order, and it is likely to have clusters of
2598 * things that are close together and clusters of things that are not.
2599 *
2600 * To help reduce the seeks that come with updating all of these reference
2601 * counts, sort them by byte number before actual updates are done.
2602 *
2603 * struct refsort is used to match byte number to slot in the btree block.
2604 * we sort based on the byte number and then use the slot to actually
2605 * find the item.
2606 *
2607 * struct refsort is smaller than strcut btrfs_item and smaller than
2608 * struct btrfs_key_ptr. Since we're currently limited to the page size
2609 * for a btree block, there's no way for a kmalloc of refsorts for a
2610 * single node to be bigger than a page.
2611 */
2612struct refsort {
2613 u64 bytenr;
2614 u32 slot;
2615};
2616
2617/*
2618 * for passing into sort()
2619 */
2620static int refsort_cmp(const void *a_void, const void *b_void)
2621{
2622 const struct refsort *a = a_void;
2623 const struct refsort *b = b_void;
2624
2625 if (a->bytenr < b->bytenr)
2626 return -1;
2627 if (a->bytenr > b->bytenr)
2628 return 1;
2629 return 0;
2630}
2631#endif
2632
2633static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2523static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2634 struct btrfs_root *root, 2524 struct btrfs_root *root,
2635 struct extent_buffer *buf, 2525 struct extent_buffer *buf,
@@ -2732,7 +2622,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
2732 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2622 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
2733 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2623 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
2734 btrfs_mark_buffer_dirty(leaf); 2624 btrfs_mark_buffer_dirty(leaf);
2735 btrfs_release_path(extent_root, path); 2625 btrfs_release_path(path);
2736fail: 2626fail:
2737 if (ret) 2627 if (ret)
2738 return ret; 2628 return ret;
@@ -2785,7 +2675,7 @@ again:
2785 inode = lookup_free_space_inode(root, block_group, path); 2675 inode = lookup_free_space_inode(root, block_group, path);
2786 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 2676 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2787 ret = PTR_ERR(inode); 2677 ret = PTR_ERR(inode);
2788 btrfs_release_path(root, path); 2678 btrfs_release_path(path);
2789 goto out; 2679 goto out;
2790 } 2680 }
2791 2681
@@ -2854,7 +2744,7 @@ again:
2854out_put: 2744out_put:
2855 iput(inode); 2745 iput(inode);
2856out_free: 2746out_free:
2857 btrfs_release_path(root, path); 2747 btrfs_release_path(path);
2858out: 2748out:
2859 spin_lock(&block_group->lock); 2749 spin_lock(&block_group->lock);
2860 block_group->disk_cache_state = dcs; 2750 block_group->disk_cache_state = dcs;
@@ -3144,7 +3034,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3144 /* make sure bytes are sectorsize aligned */ 3034 /* make sure bytes are sectorsize aligned */
3145 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3035 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
3146 3036
3147 if (root == root->fs_info->tree_root) { 3037 if (root == root->fs_info->tree_root ||
3038 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
3148 alloc_chunk = 0; 3039 alloc_chunk = 0;
3149 committed = 1; 3040 committed = 1;
3150 } 3041 }
@@ -3211,18 +3102,6 @@ commit_trans:
3211 goto again; 3102 goto again;
3212 } 3103 }
3213 3104
3214#if 0 /* I hope we never need this code again, just in case */
3215 printk(KERN_ERR "no space left, need %llu, %llu bytes_used, "
3216 "%llu bytes_reserved, " "%llu bytes_pinned, "
3217 "%llu bytes_readonly, %llu may use %llu total\n",
3218 (unsigned long long)bytes,
3219 (unsigned long long)data_sinfo->bytes_used,
3220 (unsigned long long)data_sinfo->bytes_reserved,
3221 (unsigned long long)data_sinfo->bytes_pinned,
3222 (unsigned long long)data_sinfo->bytes_readonly,
3223 (unsigned long long)data_sinfo->bytes_may_use,
3224 (unsigned long long)data_sinfo->total_bytes);
3225#endif
3226 return -ENOSPC; 3105 return -ENOSPC;
3227 } 3106 }
3228 data_sinfo->bytes_may_use += bytes; 3107 data_sinfo->bytes_may_use += bytes;
@@ -3425,6 +3304,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3425 if (reserved == 0) 3304 if (reserved == 0)
3426 return 0; 3305 return 0;
3427 3306
3307 /* nothing to shrink - nothing to reclaim */
3308 if (root->fs_info->delalloc_bytes == 0)
3309 return 0;
3310
3428 max_reclaim = min(reserved, to_reclaim); 3311 max_reclaim = min(reserved, to_reclaim);
3429 3312
3430 while (loops < 1024) { 3313 while (loops < 1024) {
@@ -3651,8 +3534,8 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
3651 spin_unlock(&block_rsv->lock); 3534 spin_unlock(&block_rsv->lock);
3652} 3535}
3653 3536
3654void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, 3537static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3655 struct btrfs_block_rsv *dest, u64 num_bytes) 3538 struct btrfs_block_rsv *dest, u64 num_bytes)
3656{ 3539{
3657 struct btrfs_space_info *space_info = block_rsv->space_info; 3540 struct btrfs_space_info *space_info = block_rsv->space_info;
3658 3541
@@ -3855,23 +3738,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3855 u64 meta_used; 3738 u64 meta_used;
3856 u64 data_used; 3739 u64 data_used;
3857 int csum_size = btrfs_super_csum_size(&fs_info->super_copy); 3740 int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
3858#if 0
3859 /*
3860 * per tree used space accounting can be inaccuracy, so we
3861 * can't rely on it.
3862 */
3863 spin_lock(&fs_info->extent_root->accounting_lock);
3864 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item);
3865 spin_unlock(&fs_info->extent_root->accounting_lock);
3866
3867 spin_lock(&fs_info->csum_root->accounting_lock);
3868 num_bytes += btrfs_root_used(&fs_info->csum_root->root_item);
3869 spin_unlock(&fs_info->csum_root->accounting_lock);
3870 3741
3871 spin_lock(&fs_info->tree_root->accounting_lock);
3872 num_bytes += btrfs_root_used(&fs_info->tree_root->root_item);
3873 spin_unlock(&fs_info->tree_root->accounting_lock);
3874#endif
3875 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); 3742 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
3876 spin_lock(&sinfo->lock); 3743 spin_lock(&sinfo->lock);
3877 data_used = sinfo->bytes_used; 3744 data_used = sinfo->bytes_used;
@@ -3924,10 +3791,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3924 block_rsv->reserved = block_rsv->size; 3791 block_rsv->reserved = block_rsv->size;
3925 block_rsv->full = 1; 3792 block_rsv->full = 1;
3926 } 3793 }
3927#if 0 3794
3928 printk(KERN_INFO"global block rsv size %llu reserved %llu\n",
3929 block_rsv->size, block_rsv->reserved);
3930#endif
3931 spin_unlock(&sinfo->lock); 3795 spin_unlock(&sinfo->lock);
3932 spin_unlock(&block_rsv->lock); 3796 spin_unlock(&block_rsv->lock);
3933} 3797}
@@ -3973,12 +3837,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3973 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3837 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3974} 3838}
3975 3839
3976static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
3977{
3978 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3979 3 * num_items;
3980}
3981
3982int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3840int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3983 struct btrfs_root *root, 3841 struct btrfs_root *root,
3984 int num_items) 3842 int num_items)
@@ -3989,7 +3847,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3989 if (num_items == 0 || root->fs_info->chunk_root == root) 3847 if (num_items == 0 || root->fs_info->chunk_root == root)
3990 return 0; 3848 return 0;
3991 3849
3992 num_bytes = calc_trans_metadata_size(root, num_items); 3850 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
3993 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3851 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3994 num_bytes); 3852 num_bytes);
3995 if (!ret) { 3853 if (!ret) {
@@ -4028,14 +3886,14 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
4028 * If all of the metadata space is used, we can commit 3886 * If all of the metadata space is used, we can commit
4029 * transaction and use space it freed. 3887 * transaction and use space it freed.
4030 */ 3888 */
4031 u64 num_bytes = calc_trans_metadata_size(root, 4); 3889 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
4032 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3890 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4033} 3891}
4034 3892
4035void btrfs_orphan_release_metadata(struct inode *inode) 3893void btrfs_orphan_release_metadata(struct inode *inode)
4036{ 3894{
4037 struct btrfs_root *root = BTRFS_I(inode)->root; 3895 struct btrfs_root *root = BTRFS_I(inode)->root;
4038 u64 num_bytes = calc_trans_metadata_size(root, 4); 3896 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
4039 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3897 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
4040} 3898}
4041 3899
@@ -4049,7 +3907,7 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
4049 * two for root back/forward refs, two for directory entries 3907 * two for root back/forward refs, two for directory entries
4050 * and one for root of the snapshot. 3908 * and one for root of the snapshot.
4051 */ 3909 */
4052 u64 num_bytes = calc_trans_metadata_size(root, 5); 3910 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
4053 dst_rsv->space_info = src_rsv->space_info; 3911 dst_rsv->space_info = src_rsv->space_info;
4054 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3912 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4055} 3913}
@@ -4078,7 +3936,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4078 3936
4079 if (nr_extents > reserved_extents) { 3937 if (nr_extents > reserved_extents) {
4080 nr_extents -= reserved_extents; 3938 nr_extents -= reserved_extents;
4081 to_reserve = calc_trans_metadata_size(root, nr_extents); 3939 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
4082 } else { 3940 } else {
4083 nr_extents = 0; 3941 nr_extents = 0;
4084 to_reserve = 0; 3942 to_reserve = 0;
@@ -4132,7 +3990,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4132 3990
4133 to_free = calc_csum_metadata_size(inode, num_bytes); 3991 to_free = calc_csum_metadata_size(inode, num_bytes);
4134 if (nr_extents > 0) 3992 if (nr_extents > 0)
4135 to_free += calc_trans_metadata_size(root, nr_extents); 3993 to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
4136 3994
4137 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 3995 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
4138 to_free); 3996 to_free);
@@ -4541,7 +4399,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4541 NULL, refs_to_drop, 4399 NULL, refs_to_drop,
4542 is_data); 4400 is_data);
4543 BUG_ON(ret); 4401 BUG_ON(ret);
4544 btrfs_release_path(extent_root, path); 4402 btrfs_release_path(path);
4545 path->leave_spinning = 1; 4403 path->leave_spinning = 1;
4546 4404
4547 key.objectid = bytenr; 4405 key.objectid = bytenr;
@@ -4580,7 +4438,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4580 owner_objectid, 0); 4438 owner_objectid, 0);
4581 BUG_ON(ret < 0); 4439 BUG_ON(ret < 0);
4582 4440
4583 btrfs_release_path(extent_root, path); 4441 btrfs_release_path(path);
4584 path->leave_spinning = 1; 4442 path->leave_spinning = 1;
4585 4443
4586 key.objectid = bytenr; 4444 key.objectid = bytenr;
@@ -4650,7 +4508,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4650 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 4508 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
4651 num_to_del); 4509 num_to_del);
4652 BUG_ON(ret); 4510 BUG_ON(ret);
4653 btrfs_release_path(extent_root, path); 4511 btrfs_release_path(path);
4654 4512
4655 if (is_data) { 4513 if (is_data) {
4656 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 4514 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
@@ -4893,7 +4751,7 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
4893 return 0; 4751 return 0;
4894 4752
4895 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 4753 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
4896 (cache->free_space >= num_bytes)); 4754 (cache->free_space_ctl->free_space >= num_bytes));
4897 4755
4898 put_caching_control(caching_ctl); 4756 put_caching_control(caching_ctl);
4899 return 0; 4757 return 0;
@@ -6480,7 +6338,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6480 trans->block_rsv = block_rsv; 6338 trans->block_rsv = block_rsv;
6481 } 6339 }
6482 } 6340 }
6483 btrfs_release_path(root, path); 6341 btrfs_release_path(path);
6484 BUG_ON(err); 6342 BUG_ON(err);
6485 6343
6486 ret = btrfs_del_root(trans, tree_root, &root->root_key); 6344 ret = btrfs_del_root(trans, tree_root, &root->root_key);
@@ -6584,1514 +6442,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6584 return ret; 6442 return ret;
6585} 6443}
6586 6444
6587#if 0
6588static unsigned long calc_ra(unsigned long start, unsigned long last,
6589 unsigned long nr)
6590{
6591 return min(last, start + nr - 1);
6592}
6593
6594static noinline int relocate_inode_pages(struct inode *inode, u64 start,
6595 u64 len)
6596{
6597 u64 page_start;
6598 u64 page_end;
6599 unsigned long first_index;
6600 unsigned long last_index;
6601 unsigned long i;
6602 struct page *page;
6603 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
6604 struct file_ra_state *ra;
6605 struct btrfs_ordered_extent *ordered;
6606 unsigned int total_read = 0;
6607 unsigned int total_dirty = 0;
6608 int ret = 0;
6609
6610 ra = kzalloc(sizeof(*ra), GFP_NOFS);
6611 if (!ra)
6612 return -ENOMEM;
6613
6614 mutex_lock(&inode->i_mutex);
6615 first_index = start >> PAGE_CACHE_SHIFT;
6616 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
6617
6618 /* make sure the dirty trick played by the caller work */
6619 ret = invalidate_inode_pages2_range(inode->i_mapping,
6620 first_index, last_index);
6621 if (ret)
6622 goto out_unlock;
6623
6624 file_ra_state_init(ra, inode->i_mapping);
6625
6626 for (i = first_index ; i <= last_index; i++) {
6627 if (total_read % ra->ra_pages == 0) {
6628 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
6629 calc_ra(i, last_index, ra->ra_pages));
6630 }
6631 total_read++;
6632again:
6633 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
6634 BUG_ON(1);
6635 page = grab_cache_page(inode->i_mapping, i);
6636 if (!page) {
6637 ret = -ENOMEM;
6638 goto out_unlock;
6639 }
6640 if (!PageUptodate(page)) {
6641 btrfs_readpage(NULL, page);
6642 lock_page(page);
6643 if (!PageUptodate(page)) {
6644 unlock_page(page);
6645 page_cache_release(page);
6646 ret = -EIO;
6647 goto out_unlock;
6648 }
6649 }
6650 wait_on_page_writeback(page);
6651
6652 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
6653 page_end = page_start + PAGE_CACHE_SIZE - 1;
6654 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
6655
6656 ordered = btrfs_lookup_ordered_extent(inode, page_start);
6657 if (ordered) {
6658 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6659 unlock_page(page);
6660 page_cache_release(page);
6661 btrfs_start_ordered_extent(inode, ordered, 1);
6662 btrfs_put_ordered_extent(ordered);
6663 goto again;
6664 }
6665 set_page_extent_mapped(page);
6666
6667 if (i == first_index)
6668 set_extent_bits(io_tree, page_start, page_end,
6669 EXTENT_BOUNDARY, GFP_NOFS);
6670 btrfs_set_extent_delalloc(inode, page_start, page_end);
6671
6672 set_page_dirty(page);
6673 total_dirty++;
6674
6675 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6676 unlock_page(page);
6677 page_cache_release(page);
6678 }
6679
6680out_unlock:
6681 kfree(ra);
6682 mutex_unlock(&inode->i_mutex);
6683 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
6684 return ret;
6685}
6686
6687static noinline int relocate_data_extent(struct inode *reloc_inode,
6688 struct btrfs_key *extent_key,
6689 u64 offset)
6690{
6691 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
6692 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
6693 struct extent_map *em;
6694 u64 start = extent_key->objectid - offset;
6695 u64 end = start + extent_key->offset - 1;
6696
6697 em = alloc_extent_map(GFP_NOFS);
6698 BUG_ON(!em);
6699
6700 em->start = start;
6701 em->len = extent_key->offset;
6702 em->block_len = extent_key->offset;
6703 em->block_start = extent_key->objectid;
6704 em->bdev = root->fs_info->fs_devices->latest_bdev;
6705 set_bit(EXTENT_FLAG_PINNED, &em->flags);
6706
6707 /* setup extent map to cheat btrfs_readpage */
6708 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6709 while (1) {
6710 int ret;
6711 write_lock(&em_tree->lock);
6712 ret = add_extent_mapping(em_tree, em);
6713 write_unlock(&em_tree->lock);
6714 if (ret != -EEXIST) {
6715 free_extent_map(em);
6716 break;
6717 }
6718 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
6719 }
6720 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6721
6722 return relocate_inode_pages(reloc_inode, start, extent_key->offset);
6723}
6724
6725struct btrfs_ref_path {
6726 u64 extent_start;
6727 u64 nodes[BTRFS_MAX_LEVEL];
6728 u64 root_objectid;
6729 u64 root_generation;
6730 u64 owner_objectid;
6731 u32 num_refs;
6732 int lowest_level;
6733 int current_level;
6734 int shared_level;
6735
6736 struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
6737 u64 new_nodes[BTRFS_MAX_LEVEL];
6738};
6739
6740struct disk_extent {
6741 u64 ram_bytes;
6742 u64 disk_bytenr;
6743 u64 disk_num_bytes;
6744 u64 offset;
6745 u64 num_bytes;
6746 u8 compression;
6747 u8 encryption;
6748 u16 other_encoding;
6749};
6750
6751static int is_cowonly_root(u64 root_objectid)
6752{
6753 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
6754 root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6755 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
6756 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
6757 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
6758 root_objectid == BTRFS_CSUM_TREE_OBJECTID)
6759 return 1;
6760 return 0;
6761}
6762
6763static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
6764 struct btrfs_root *extent_root,
6765 struct btrfs_ref_path *ref_path,
6766 int first_time)
6767{
6768 struct extent_buffer *leaf;
6769 struct btrfs_path *path;
6770 struct btrfs_extent_ref *ref;
6771 struct btrfs_key key;
6772 struct btrfs_key found_key;
6773 u64 bytenr;
6774 u32 nritems;
6775 int level;
6776 int ret = 1;
6777
6778 path = btrfs_alloc_path();
6779 if (!path)
6780 return -ENOMEM;
6781
6782 if (first_time) {
6783 ref_path->lowest_level = -1;
6784 ref_path->current_level = -1;
6785 ref_path->shared_level = -1;
6786 goto walk_up;
6787 }
6788walk_down:
6789 level = ref_path->current_level - 1;
6790 while (level >= -1) {
6791 u64 parent;
6792 if (level < ref_path->lowest_level)
6793 break;
6794
6795 if (level >= 0)
6796 bytenr = ref_path->nodes[level];
6797 else
6798 bytenr = ref_path->extent_start;
6799 BUG_ON(bytenr == 0);
6800
6801 parent = ref_path->nodes[level + 1];
6802 ref_path->nodes[level + 1] = 0;
6803 ref_path->current_level = level;
6804 BUG_ON(parent == 0);
6805
6806 key.objectid = bytenr;
6807 key.offset = parent + 1;
6808 key.type = BTRFS_EXTENT_REF_KEY;
6809
6810 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6811 if (ret < 0)
6812 goto out;
6813 BUG_ON(ret == 0);
6814
6815 leaf = path->nodes[0];
6816 nritems = btrfs_header_nritems(leaf);
6817 if (path->slots[0] >= nritems) {
6818 ret = btrfs_next_leaf(extent_root, path);
6819 if (ret < 0)
6820 goto out;
6821 if (ret > 0)
6822 goto next;
6823 leaf = path->nodes[0];
6824 }
6825
6826 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6827 if (found_key.objectid == bytenr &&
6828 found_key.type == BTRFS_EXTENT_REF_KEY) {
6829 if (level < ref_path->shared_level)
6830 ref_path->shared_level = level;
6831 goto found;
6832 }
6833next:
6834 level--;
6835 btrfs_release_path(extent_root, path);
6836 cond_resched();
6837 }
6838 /* reached lowest level */
6839 ret = 1;
6840 goto out;
6841walk_up:
6842 level = ref_path->current_level;
6843 while (level < BTRFS_MAX_LEVEL - 1) {
6844 u64 ref_objectid;
6845
6846 if (level >= 0)
6847 bytenr = ref_path->nodes[level];
6848 else
6849 bytenr = ref_path->extent_start;
6850
6851 BUG_ON(bytenr == 0);
6852
6853 key.objectid = bytenr;
6854 key.offset = 0;
6855 key.type = BTRFS_EXTENT_REF_KEY;
6856
6857 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6858 if (ret < 0)
6859 goto out;
6860
6861 leaf = path->nodes[0];
6862 nritems = btrfs_header_nritems(leaf);
6863 if (path->slots[0] >= nritems) {
6864 ret = btrfs_next_leaf(extent_root, path);
6865 if (ret < 0)
6866 goto out;
6867 if (ret > 0) {
6868 /* the extent was freed by someone */
6869 if (ref_path->lowest_level == level)
6870 goto out;
6871 btrfs_release_path(extent_root, path);
6872 goto walk_down;
6873 }
6874 leaf = path->nodes[0];
6875 }
6876
6877 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6878 if (found_key.objectid != bytenr ||
6879 found_key.type != BTRFS_EXTENT_REF_KEY) {
6880 /* the extent was freed by someone */
6881 if (ref_path->lowest_level == level) {
6882 ret = 1;
6883 goto out;
6884 }
6885 btrfs_release_path(extent_root, path);
6886 goto walk_down;
6887 }
6888found:
6889 ref = btrfs_item_ptr(leaf, path->slots[0],
6890 struct btrfs_extent_ref);
6891 ref_objectid = btrfs_ref_objectid(leaf, ref);
6892 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
6893 if (first_time) {
6894 level = (int)ref_objectid;
6895 BUG_ON(level >= BTRFS_MAX_LEVEL);
6896 ref_path->lowest_level = level;
6897 ref_path->current_level = level;
6898 ref_path->nodes[level] = bytenr;
6899 } else {
6900 WARN_ON(ref_objectid != level);
6901 }
6902 } else {
6903 WARN_ON(level != -1);
6904 }
6905 first_time = 0;
6906
6907 if (ref_path->lowest_level == level) {
6908 ref_path->owner_objectid = ref_objectid;
6909 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
6910 }
6911
6912 /*
6913 * the block is tree root or the block isn't in reference
6914 * counted tree.
6915 */
6916 if (found_key.objectid == found_key.offset ||
6917 is_cowonly_root(btrfs_ref_root(leaf, ref))) {
6918 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6919 ref_path->root_generation =
6920 btrfs_ref_generation(leaf, ref);
6921 if (level < 0) {
6922 /* special reference from the tree log */
6923 ref_path->nodes[0] = found_key.offset;
6924 ref_path->current_level = 0;
6925 }
6926 ret = 0;
6927 goto out;
6928 }
6929
6930 level++;
6931 BUG_ON(ref_path->nodes[level] != 0);
6932 ref_path->nodes[level] = found_key.offset;
6933 ref_path->current_level = level;
6934
6935 /*
6936 * the reference was created in the running transaction,
6937 * no need to continue walking up.
6938 */
6939 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
6940 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6941 ref_path->root_generation =
6942 btrfs_ref_generation(leaf, ref);
6943 ret = 0;
6944 goto out;
6945 }
6946
6947 btrfs_release_path(extent_root, path);
6948 cond_resched();
6949 }
6950 /* reached max tree level, but no tree root found. */
6951 BUG();
6952out:
6953 btrfs_free_path(path);
6954 return ret;
6955}
6956
6957static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
6958 struct btrfs_root *extent_root,
6959 struct btrfs_ref_path *ref_path,
6960 u64 extent_start)
6961{
6962 memset(ref_path, 0, sizeof(*ref_path));
6963 ref_path->extent_start = extent_start;
6964
6965 return __next_ref_path(trans, extent_root, ref_path, 1);
6966}
6967
6968static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
6969 struct btrfs_root *extent_root,
6970 struct btrfs_ref_path *ref_path)
6971{
6972 return __next_ref_path(trans, extent_root, ref_path, 0);
6973}
6974
6975static noinline int get_new_locations(struct inode *reloc_inode,
6976 struct btrfs_key *extent_key,
6977 u64 offset, int no_fragment,
6978 struct disk_extent **extents,
6979 int *nr_extents)
6980{
6981 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
6982 struct btrfs_path *path;
6983 struct btrfs_file_extent_item *fi;
6984 struct extent_buffer *leaf;
6985 struct disk_extent *exts = *extents;
6986 struct btrfs_key found_key;
6987 u64 cur_pos;
6988 u64 last_byte;
6989 u32 nritems;
6990 int nr = 0;
6991 int max = *nr_extents;
6992 int ret;
6993
6994 WARN_ON(!no_fragment && *extents);
6995 if (!exts) {
6996 max = 1;
6997 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
6998 if (!exts)
6999 return -ENOMEM;
7000 }
7001
7002 path = btrfs_alloc_path();
7003 if (!path) {
7004 if (exts != *extents)
7005 kfree(exts);
7006 return -ENOMEM;
7007 }
7008
7009 cur_pos = extent_key->objectid - offset;
7010 last_byte = extent_key->objectid + extent_key->offset;
7011 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
7012 cur_pos, 0);
7013 if (ret < 0)
7014 goto out;
7015 if (ret > 0) {
7016 ret = -ENOENT;
7017 goto out;
7018 }
7019
7020 while (1) {
7021 leaf = path->nodes[0];
7022 nritems = btrfs_header_nritems(leaf);
7023 if (path->slots[0] >= nritems) {
7024 ret = btrfs_next_leaf(root, path);
7025 if (ret < 0)
7026 goto out;
7027 if (ret > 0)
7028 break;
7029 leaf = path->nodes[0];
7030 }
7031
7032 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
7033 if (found_key.offset != cur_pos ||
7034 found_key.type != BTRFS_EXTENT_DATA_KEY ||
7035 found_key.objectid != reloc_inode->i_ino)
7036 break;
7037
7038 fi = btrfs_item_ptr(leaf, path->slots[0],
7039 struct btrfs_file_extent_item);
7040 if (btrfs_file_extent_type(leaf, fi) !=
7041 BTRFS_FILE_EXTENT_REG ||
7042 btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
7043 break;
7044
7045 if (nr == max) {
7046 struct disk_extent *old = exts;
7047 max *= 2;
7048 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
7049 if (!exts) {
7050 ret = -ENOMEM;
7051 goto out;
7052 }
7053 memcpy(exts, old, sizeof(*exts) * nr);
7054 if (old != *extents)
7055 kfree(old);
7056 }
7057
7058 exts[nr].disk_bytenr =
7059 btrfs_file_extent_disk_bytenr(leaf, fi);
7060 exts[nr].disk_num_bytes =
7061 btrfs_file_extent_disk_num_bytes(leaf, fi);
7062 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
7063 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7064 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7065 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
7066 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
7067 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
7068 fi);
7069 BUG_ON(exts[nr].offset > 0);
7070 BUG_ON(exts[nr].compression || exts[nr].encryption);
7071 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
7072
7073 cur_pos += exts[nr].num_bytes;
7074 nr++;
7075
7076 if (cur_pos + offset >= last_byte)
7077 break;
7078
7079 if (no_fragment) {
7080 ret = 1;
7081 goto out;
7082 }
7083 path->slots[0]++;
7084 }
7085
7086 BUG_ON(cur_pos + offset > last_byte);
7087 if (cur_pos + offset < last_byte) {
7088 ret = -ENOENT;
7089 goto out;
7090 }
7091 ret = 0;
7092out:
7093 btrfs_free_path(path);
7094 if (ret) {
7095 if (exts != *extents)
7096 kfree(exts);
7097 } else {
7098 *extents = exts;
7099 *nr_extents = nr;
7100 }
7101 return ret;
7102}
7103
7104static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
7105 struct btrfs_root *root,
7106 struct btrfs_path *path,
7107 struct btrfs_key *extent_key,
7108 struct btrfs_key *leaf_key,
7109 struct btrfs_ref_path *ref_path,
7110 struct disk_extent *new_extents,
7111 int nr_extents)
7112{
7113 struct extent_buffer *leaf;
7114 struct btrfs_file_extent_item *fi;
7115 struct inode *inode = NULL;
7116 struct btrfs_key key;
7117 u64 lock_start = 0;
7118 u64 lock_end = 0;
7119 u64 num_bytes;
7120 u64 ext_offset;
7121 u64 search_end = (u64)-1;
7122 u32 nritems;
7123 int nr_scaned = 0;
7124 int extent_locked = 0;
7125 int extent_type;
7126 int ret;
7127
7128 memcpy(&key, leaf_key, sizeof(key));
7129 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
7130 if (key.objectid < ref_path->owner_objectid ||
7131 (key.objectid == ref_path->owner_objectid &&
7132 key.type < BTRFS_EXTENT_DATA_KEY)) {
7133 key.objectid = ref_path->owner_objectid;
7134 key.type = BTRFS_EXTENT_DATA_KEY;
7135 key.offset = 0;
7136 }
7137 }
7138
7139 while (1) {
7140 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7141 if (ret < 0)
7142 goto out;
7143
7144 leaf = path->nodes[0];
7145 nritems = btrfs_header_nritems(leaf);
7146next:
7147 if (extent_locked && ret > 0) {
7148 /*
7149 * the file extent item was modified by someone
7150 * before the extent got locked.
7151 */
7152 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7153 lock_end, GFP_NOFS);
7154 extent_locked = 0;
7155 }
7156
7157 if (path->slots[0] >= nritems) {
7158 if (++nr_scaned > 2)
7159 break;
7160
7161 BUG_ON(extent_locked);
7162 ret = btrfs_next_leaf(root, path);
7163 if (ret < 0)
7164 goto out;
7165 if (ret > 0)
7166 break;
7167 leaf = path->nodes[0];
7168 nritems = btrfs_header_nritems(leaf);
7169 }
7170
7171 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7172
7173 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
7174 if ((key.objectid > ref_path->owner_objectid) ||
7175 (key.objectid == ref_path->owner_objectid &&
7176 key.type > BTRFS_EXTENT_DATA_KEY) ||
7177 key.offset >= search_end)
7178 break;
7179 }
7180
7181 if (inode && key.objectid != inode->i_ino) {
7182 BUG_ON(extent_locked);
7183 btrfs_release_path(root, path);
7184 mutex_unlock(&inode->i_mutex);
7185 iput(inode);
7186 inode = NULL;
7187 continue;
7188 }
7189
7190 if (key.type != BTRFS_EXTENT_DATA_KEY) {
7191 path->slots[0]++;
7192 ret = 1;
7193 goto next;
7194 }
7195 fi = btrfs_item_ptr(leaf, path->slots[0],
7196 struct btrfs_file_extent_item);
7197 extent_type = btrfs_file_extent_type(leaf, fi);
7198 if ((extent_type != BTRFS_FILE_EXTENT_REG &&
7199 extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
7200 (btrfs_file_extent_disk_bytenr(leaf, fi) !=
7201 extent_key->objectid)) {
7202 path->slots[0]++;
7203 ret = 1;
7204 goto next;
7205 }
7206
7207 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7208 ext_offset = btrfs_file_extent_offset(leaf, fi);
7209
7210 if (search_end == (u64)-1) {
7211 search_end = key.offset - ext_offset +
7212 btrfs_file_extent_ram_bytes(leaf, fi);
7213 }
7214
7215 if (!extent_locked) {
7216 lock_start = key.offset;
7217 lock_end = lock_start + num_bytes - 1;
7218 } else {
7219 if (lock_start > key.offset ||
7220 lock_end + 1 < key.offset + num_bytes) {
7221 unlock_extent(&BTRFS_I(inode)->io_tree,
7222 lock_start, lock_end, GFP_NOFS);
7223 extent_locked = 0;
7224 }
7225 }
7226
7227 if (!inode) {
7228 btrfs_release_path(root, path);
7229
7230 inode = btrfs_iget_locked(root->fs_info->sb,
7231 key.objectid, root);
7232 if (inode->i_state & I_NEW) {
7233 BTRFS_I(inode)->root = root;
7234 BTRFS_I(inode)->location.objectid =
7235 key.objectid;
7236 BTRFS_I(inode)->location.type =
7237 BTRFS_INODE_ITEM_KEY;
7238 BTRFS_I(inode)->location.offset = 0;
7239 btrfs_read_locked_inode(inode);
7240 unlock_new_inode(inode);
7241 }
7242 /*
7243 * some code call btrfs_commit_transaction while
7244 * holding the i_mutex, so we can't use mutex_lock
7245 * here.
7246 */
7247 if (is_bad_inode(inode) ||
7248 !mutex_trylock(&inode->i_mutex)) {
7249 iput(inode);
7250 inode = NULL;
7251 key.offset = (u64)-1;
7252 goto skip;
7253 }
7254 }
7255
7256 if (!extent_locked) {
7257 struct btrfs_ordered_extent *ordered;
7258
7259 btrfs_release_path(root, path);
7260
7261 lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7262 lock_end, GFP_NOFS);
7263 ordered = btrfs_lookup_first_ordered_extent(inode,
7264 lock_end);
7265 if (ordered &&
7266 ordered->file_offset <= lock_end &&
7267 ordered->file_offset + ordered->len > lock_start) {
7268 unlock_extent(&BTRFS_I(inode)->io_tree,
7269 lock_start, lock_end, GFP_NOFS);
7270 btrfs_start_ordered_extent(inode, ordered, 1);
7271 btrfs_put_ordered_extent(ordered);
7272 key.offset += num_bytes;
7273 goto skip;
7274 }
7275 if (ordered)
7276 btrfs_put_ordered_extent(ordered);
7277
7278 extent_locked = 1;
7279 continue;
7280 }
7281
7282 if (nr_extents == 1) {
7283 /* update extent pointer in place */
7284 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7285 new_extents[0].disk_bytenr);
7286 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7287 new_extents[0].disk_num_bytes);
7288 btrfs_mark_buffer_dirty(leaf);
7289
7290 btrfs_drop_extent_cache(inode, key.offset,
7291 key.offset + num_bytes - 1, 0);
7292
7293 ret = btrfs_inc_extent_ref(trans, root,
7294 new_extents[0].disk_bytenr,
7295 new_extents[0].disk_num_bytes,
7296 leaf->start,
7297 root->root_key.objectid,
7298 trans->transid,
7299 key.objectid);
7300 BUG_ON(ret);
7301
7302 ret = btrfs_free_extent(trans, root,
7303 extent_key->objectid,
7304 extent_key->offset,
7305 leaf->start,
7306 btrfs_header_owner(leaf),
7307 btrfs_header_generation(leaf),
7308 key.objectid, 0);
7309 BUG_ON(ret);
7310
7311 btrfs_release_path(root, path);
7312 key.offset += num_bytes;
7313 } else {
7314 BUG_ON(1);
7315#if 0
7316 u64 alloc_hint;
7317 u64 extent_len;
7318 int i;
7319 /*
7320 * drop old extent pointer at first, then insert the
7321 * new pointers one bye one
7322 */
7323 btrfs_release_path(root, path);
7324 ret = btrfs_drop_extents(trans, root, inode, key.offset,
7325 key.offset + num_bytes,
7326 key.offset, &alloc_hint);
7327 BUG_ON(ret);
7328
7329 for (i = 0; i < nr_extents; i++) {
7330 if (ext_offset >= new_extents[i].num_bytes) {
7331 ext_offset -= new_extents[i].num_bytes;
7332 continue;
7333 }
7334 extent_len = min(new_extents[i].num_bytes -
7335 ext_offset, num_bytes);
7336
7337 ret = btrfs_insert_empty_item(trans, root,
7338 path, &key,
7339 sizeof(*fi));
7340 BUG_ON(ret);
7341
7342 leaf = path->nodes[0];
7343 fi = btrfs_item_ptr(leaf, path->slots[0],
7344 struct btrfs_file_extent_item);
7345 btrfs_set_file_extent_generation(leaf, fi,
7346 trans->transid);
7347 btrfs_set_file_extent_type(leaf, fi,
7348 BTRFS_FILE_EXTENT_REG);
7349 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7350 new_extents[i].disk_bytenr);
7351 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7352 new_extents[i].disk_num_bytes);
7353 btrfs_set_file_extent_ram_bytes(leaf, fi,
7354 new_extents[i].ram_bytes);
7355
7356 btrfs_set_file_extent_compression(leaf, fi,
7357 new_extents[i].compression);
7358 btrfs_set_file_extent_encryption(leaf, fi,
7359 new_extents[i].encryption);
7360 btrfs_set_file_extent_other_encoding(leaf, fi,
7361 new_extents[i].other_encoding);
7362
7363 btrfs_set_file_extent_num_bytes(leaf, fi,
7364 extent_len);
7365 ext_offset += new_extents[i].offset;
7366 btrfs_set_file_extent_offset(leaf, fi,
7367 ext_offset);
7368 btrfs_mark_buffer_dirty(leaf);
7369
7370 btrfs_drop_extent_cache(inode, key.offset,
7371 key.offset + extent_len - 1, 0);
7372
7373 ret = btrfs_inc_extent_ref(trans, root,
7374 new_extents[i].disk_bytenr,
7375 new_extents[i].disk_num_bytes,
7376 leaf->start,
7377 root->root_key.objectid,
7378 trans->transid, key.objectid);
7379 BUG_ON(ret);
7380 btrfs_release_path(root, path);
7381
7382 inode_add_bytes(inode, extent_len);
7383
7384 ext_offset = 0;
7385 num_bytes -= extent_len;
7386 key.offset += extent_len;
7387
7388 if (num_bytes == 0)
7389 break;
7390 }
7391 BUG_ON(i >= nr_extents);
7392#endif
7393 }
7394
7395 if (extent_locked) {
7396 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7397 lock_end, GFP_NOFS);
7398 extent_locked = 0;
7399 }
7400skip:
7401 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
7402 key.offset >= search_end)
7403 break;
7404
7405 cond_resched();
7406 }
7407 ret = 0;
7408out:
7409 btrfs_release_path(root, path);
7410 if (inode) {
7411 mutex_unlock(&inode->i_mutex);
7412 if (extent_locked) {
7413 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7414 lock_end, GFP_NOFS);
7415 }
7416 iput(inode);
7417 }
7418 return ret;
7419}
7420
7421int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
7422 struct btrfs_root *root,
7423 struct extent_buffer *buf, u64 orig_start)
7424{
7425 int level;
7426 int ret;
7427
7428 BUG_ON(btrfs_header_generation(buf) != trans->transid);
7429 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
7430
7431 level = btrfs_header_level(buf);
7432 if (level == 0) {
7433 struct btrfs_leaf_ref *ref;
7434 struct btrfs_leaf_ref *orig_ref;
7435
7436 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
7437 if (!orig_ref)
7438 return -ENOENT;
7439
7440 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
7441 if (!ref) {
7442 btrfs_free_leaf_ref(root, orig_ref);
7443 return -ENOMEM;
7444 }
7445
7446 ref->nritems = orig_ref->nritems;
7447 memcpy(ref->extents, orig_ref->extents,
7448 sizeof(ref->extents[0]) * ref->nritems);
7449
7450 btrfs_free_leaf_ref(root, orig_ref);
7451
7452 ref->root_gen = trans->transid;
7453 ref->bytenr = buf->start;
7454 ref->owner = btrfs_header_owner(buf);
7455 ref->generation = btrfs_header_generation(buf);
7456
7457 ret = btrfs_add_leaf_ref(root, ref, 0);
7458 WARN_ON(ret);
7459 btrfs_free_leaf_ref(root, ref);
7460 }
7461 return 0;
7462}
7463
7464static noinline int invalidate_extent_cache(struct btrfs_root *root,
7465 struct extent_buffer *leaf,
7466 struct btrfs_block_group_cache *group,
7467 struct btrfs_root *target_root)
7468{
7469 struct btrfs_key key;
7470 struct inode *inode = NULL;
7471 struct btrfs_file_extent_item *fi;
7472 struct extent_state *cached_state = NULL;
7473 u64 num_bytes;
7474 u64 skip_objectid = 0;
7475 u32 nritems;
7476 u32 i;
7477
7478 nritems = btrfs_header_nritems(leaf);
7479 for (i = 0; i < nritems; i++) {
7480 btrfs_item_key_to_cpu(leaf, &key, i);
7481 if (key.objectid == skip_objectid ||
7482 key.type != BTRFS_EXTENT_DATA_KEY)
7483 continue;
7484 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
7485 if (btrfs_file_extent_type(leaf, fi) ==
7486 BTRFS_FILE_EXTENT_INLINE)
7487 continue;
7488 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
7489 continue;
7490 if (!inode || inode->i_ino != key.objectid) {
7491 iput(inode);
7492 inode = btrfs_ilookup(target_root->fs_info->sb,
7493 key.objectid, target_root, 1);
7494 }
7495 if (!inode) {
7496 skip_objectid = key.objectid;
7497 continue;
7498 }
7499 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7500
7501 lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset,
7502 key.offset + num_bytes - 1, 0, &cached_state,
7503 GFP_NOFS);
7504 btrfs_drop_extent_cache(inode, key.offset,
7505 key.offset + num_bytes - 1, 1);
7506 unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset,
7507 key.offset + num_bytes - 1, &cached_state,
7508 GFP_NOFS);
7509 cond_resched();
7510 }
7511 iput(inode);
7512 return 0;
7513}
7514
7515static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
7516 struct btrfs_root *root,
7517 struct extent_buffer *leaf,
7518 struct btrfs_block_group_cache *group,
7519 struct inode *reloc_inode)
7520{
7521 struct btrfs_key key;
7522 struct btrfs_key extent_key;
7523 struct btrfs_file_extent_item *fi;
7524 struct btrfs_leaf_ref *ref;
7525 struct disk_extent *new_extent;
7526 u64 bytenr;
7527 u64 num_bytes;
7528 u32 nritems;
7529 u32 i;
7530 int ext_index;
7531 int nr_extent;
7532 int ret;
7533
7534 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
7535 if (!new_extent)
7536 return -ENOMEM;
7537
7538 ref = btrfs_lookup_leaf_ref(root, leaf->start);
7539 BUG_ON(!ref);
7540
7541 ext_index = -1;
7542 nritems = btrfs_header_nritems(leaf);
7543 for (i = 0; i < nritems; i++) {
7544 btrfs_item_key_to_cpu(leaf, &key, i);
7545 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
7546 continue;
7547 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
7548 if (btrfs_file_extent_type(leaf, fi) ==
7549 BTRFS_FILE_EXTENT_INLINE)
7550 continue;
7551 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7552 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7553 if (bytenr == 0)
7554 continue;
7555
7556 ext_index++;
7557 if (bytenr >= group->key.objectid + group->key.offset ||
7558 bytenr + num_bytes <= group->key.objectid)
7559 continue;
7560
7561 extent_key.objectid = bytenr;
7562 extent_key.offset = num_bytes;
7563 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
7564 nr_extent = 1;
7565 ret = get_new_locations(reloc_inode, &extent_key,
7566 group->key.objectid, 1,
7567 &new_extent, &nr_extent);
7568 if (ret > 0)
7569 continue;
7570 BUG_ON(ret < 0);
7571
7572 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
7573 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
7574 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
7575 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
7576
7577 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7578 new_extent->disk_bytenr);
7579 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7580 new_extent->disk_num_bytes);
7581 btrfs_mark_buffer_dirty(leaf);
7582
7583 ret = btrfs_inc_extent_ref(trans, root,
7584 new_extent->disk_bytenr,
7585 new_extent->disk_num_bytes,
7586 leaf->start,
7587 root->root_key.objectid,
7588 trans->transid, key.objectid);
7589 BUG_ON(ret);
7590
7591 ret = btrfs_free_extent(trans, root,
7592 bytenr, num_bytes, leaf->start,
7593 btrfs_header_owner(leaf),
7594 btrfs_header_generation(leaf),
7595 key.objectid, 0);
7596 BUG_ON(ret);
7597 cond_resched();
7598 }
7599 kfree(new_extent);
7600 BUG_ON(ext_index + 1 != ref->nritems);
7601 btrfs_free_leaf_ref(root, ref);
7602 return 0;
7603}
7604
7605int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
7606 struct btrfs_root *root)
7607{
7608 struct btrfs_root *reloc_root;
7609 int ret;
7610
7611 if (root->reloc_root) {
7612 reloc_root = root->reloc_root;
7613 root->reloc_root = NULL;
7614 list_add(&reloc_root->dead_list,
7615 &root->fs_info->dead_reloc_roots);
7616
7617 btrfs_set_root_bytenr(&reloc_root->root_item,
7618 reloc_root->node->start);
7619 btrfs_set_root_level(&root->root_item,
7620 btrfs_header_level(reloc_root->node));
7621 memset(&reloc_root->root_item.drop_progress, 0,
7622 sizeof(struct btrfs_disk_key));
7623 reloc_root->root_item.drop_level = 0;
7624
7625 ret = btrfs_update_root(trans, root->fs_info->tree_root,
7626 &reloc_root->root_key,
7627 &reloc_root->root_item);
7628 BUG_ON(ret);
7629 }
7630 return 0;
7631}
7632
7633int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
7634{
7635 struct btrfs_trans_handle *trans;
7636 struct btrfs_root *reloc_root;
7637 struct btrfs_root *prev_root = NULL;
7638 struct list_head dead_roots;
7639 int ret;
7640 unsigned long nr;
7641
7642 INIT_LIST_HEAD(&dead_roots);
7643 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
7644
7645 while (!list_empty(&dead_roots)) {
7646 reloc_root = list_entry(dead_roots.prev,
7647 struct btrfs_root, dead_list);
7648 list_del_init(&reloc_root->dead_list);
7649
7650 BUG_ON(reloc_root->commit_root != NULL);
7651 while (1) {
7652 trans = btrfs_join_transaction(root, 1);
7653 BUG_ON(IS_ERR(trans));
7654
7655 mutex_lock(&root->fs_info->drop_mutex);
7656 ret = btrfs_drop_snapshot(trans, reloc_root);
7657 if (ret != -EAGAIN)
7658 break;
7659 mutex_unlock(&root->fs_info->drop_mutex);
7660
7661 nr = trans->blocks_used;
7662 ret = btrfs_end_transaction(trans, root);
7663 BUG_ON(ret);
7664 btrfs_btree_balance_dirty(root, nr);
7665 }
7666
7667 free_extent_buffer(reloc_root->node);
7668
7669 ret = btrfs_del_root(trans, root->fs_info->tree_root,
7670 &reloc_root->root_key);
7671 BUG_ON(ret);
7672 mutex_unlock(&root->fs_info->drop_mutex);
7673
7674 nr = trans->blocks_used;
7675 ret = btrfs_end_transaction(trans, root);
7676 BUG_ON(ret);
7677 btrfs_btree_balance_dirty(root, nr);
7678
7679 kfree(prev_root);
7680 prev_root = reloc_root;
7681 }
7682 if (prev_root) {
7683 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
7684 kfree(prev_root);
7685 }
7686 return 0;
7687}
7688
7689int btrfs_add_dead_reloc_root(struct btrfs_root *root)
7690{
7691 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
7692 return 0;
7693}
7694
7695int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
7696{
7697 struct btrfs_root *reloc_root;
7698 struct btrfs_trans_handle *trans;
7699 struct btrfs_key location;
7700 int found;
7701 int ret;
7702
7703 mutex_lock(&root->fs_info->tree_reloc_mutex);
7704 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
7705 BUG_ON(ret);
7706 found = !list_empty(&root->fs_info->dead_reloc_roots);
7707 mutex_unlock(&root->fs_info->tree_reloc_mutex);
7708
7709 if (found) {
7710 trans = btrfs_start_transaction(root, 1);
7711 BUG_ON(IS_ERR(trans));
7712 ret = btrfs_commit_transaction(trans, root);
7713 BUG_ON(ret);
7714 }
7715
7716 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
7717 location.offset = (u64)-1;
7718 location.type = BTRFS_ROOT_ITEM_KEY;
7719
7720 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
7721 BUG_ON(!reloc_root);
7722 ret = btrfs_orphan_cleanup(reloc_root);
7723 BUG_ON(ret);
7724 return 0;
7725}
7726
7727static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
7728 struct btrfs_root *root)
7729{
7730 struct btrfs_root *reloc_root;
7731 struct extent_buffer *eb;
7732 struct btrfs_root_item *root_item;
7733 struct btrfs_key root_key;
7734 int ret;
7735
7736 BUG_ON(!root->ref_cows);
7737 if (root->reloc_root)
7738 return 0;
7739
7740 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
7741 if (!root_item)
7742 return -ENOMEM;
7743
7744 ret = btrfs_copy_root(trans, root, root->commit_root,
7745 &eb, BTRFS_TREE_RELOC_OBJECTID);
7746 BUG_ON(ret);
7747
7748 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
7749 root_key.offset = root->root_key.objectid;
7750 root_key.type = BTRFS_ROOT_ITEM_KEY;
7751
7752 memcpy(root_item, &root->root_item, sizeof(root_item));
7753 btrfs_set_root_refs(root_item, 0);
7754 btrfs_set_root_bytenr(root_item, eb->start);
7755 btrfs_set_root_level(root_item, btrfs_header_level(eb));
7756 btrfs_set_root_generation(root_item, trans->transid);
7757
7758 btrfs_tree_unlock(eb);
7759 free_extent_buffer(eb);
7760
7761 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
7762 &root_key, root_item);
7763 BUG_ON(ret);
7764 kfree(root_item);
7765
7766 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
7767 &root_key);
7768 BUG_ON(IS_ERR(reloc_root));
7769 reloc_root->last_trans = trans->transid;
7770 reloc_root->commit_root = NULL;
7771 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
7772
7773 root->reloc_root = reloc_root;
7774 return 0;
7775}
7776
7777/*
7778 * Core function of space balance.
7779 *
7780 * The idea is using reloc trees to relocate tree blocks in reference
7781 * counted roots. There is one reloc tree for each subvol, and all
7782 * reloc trees share same root key objectid. Reloc trees are snapshots
7783 * of the latest committed roots of subvols (root->commit_root).
7784 *
7785 * To relocate a tree block referenced by a subvol, there are two steps.
7786 * COW the block through subvol's reloc tree, then update block pointer
7787 * in the subvol to point to the new block. Since all reloc trees share
7788 * same root key objectid, doing special handing for tree blocks owned
7789 * by them is easy. Once a tree block has been COWed in one reloc tree,
7790 * we can use the resulting new block directly when the same block is
7791 * required to COW again through other reloc trees. By this way, relocated
7792 * tree blocks are shared between reloc trees, so they are also shared
7793 * between subvols.
7794 */
7795static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
7796 struct btrfs_root *root,
7797 struct btrfs_path *path,
7798 struct btrfs_key *first_key,
7799 struct btrfs_ref_path *ref_path,
7800 struct btrfs_block_group_cache *group,
7801 struct inode *reloc_inode)
7802{
7803 struct btrfs_root *reloc_root;
7804 struct extent_buffer *eb = NULL;
7805 struct btrfs_key *keys;
7806 u64 *nodes;
7807 int level;
7808 int shared_level;
7809 int lowest_level = 0;
7810 int ret;
7811
7812 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
7813 lowest_level = ref_path->owner_objectid;
7814
7815 if (!root->ref_cows) {
7816 path->lowest_level = lowest_level;
7817 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
7818 BUG_ON(ret < 0);
7819 path->lowest_level = 0;
7820 btrfs_release_path(root, path);
7821 return 0;
7822 }
7823
7824 mutex_lock(&root->fs_info->tree_reloc_mutex);
7825 ret = init_reloc_tree(trans, root);
7826 BUG_ON(ret);
7827 reloc_root = root->reloc_root;
7828
7829 shared_level = ref_path->shared_level;
7830 ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
7831
7832 keys = ref_path->node_keys;
7833 nodes = ref_path->new_nodes;
7834 memset(&keys[shared_level + 1], 0,
7835 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
7836 memset(&nodes[shared_level + 1], 0,
7837 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
7838
7839 if (nodes[lowest_level] == 0) {
7840 path->lowest_level = lowest_level;
7841 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7842 0, 1);
7843 BUG_ON(ret);
7844 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
7845 eb = path->nodes[level];
7846 if (!eb || eb == reloc_root->node)
7847 break;
7848 nodes[level] = eb->start;
7849 if (level == 0)
7850 btrfs_item_key_to_cpu(eb, &keys[level], 0);
7851 else
7852 btrfs_node_key_to_cpu(eb, &keys[level], 0);
7853 }
7854 if (nodes[0] &&
7855 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7856 eb = path->nodes[0];
7857 ret = replace_extents_in_leaf(trans, reloc_root, eb,
7858 group, reloc_inode);
7859 BUG_ON(ret);
7860 }
7861 btrfs_release_path(reloc_root, path);
7862 } else {
7863 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
7864 lowest_level);
7865 BUG_ON(ret);
7866 }
7867
7868 /*
7869 * replace tree blocks in the fs tree with tree blocks in
7870 * the reloc tree.
7871 */
7872 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
7873 BUG_ON(ret < 0);
7874
7875 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7876 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7877 0, 0);
7878 BUG_ON(ret);
7879 extent_buffer_get(path->nodes[0]);
7880 eb = path->nodes[0];
7881 btrfs_release_path(reloc_root, path);
7882 ret = invalidate_extent_cache(reloc_root, eb, group, root);
7883 BUG_ON(ret);
7884 free_extent_buffer(eb);
7885 }
7886
7887 mutex_unlock(&root->fs_info->tree_reloc_mutex);
7888 path->lowest_level = 0;
7889 return 0;
7890}
7891
7892static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
7893 struct btrfs_root *root,
7894 struct btrfs_path *path,
7895 struct btrfs_key *first_key,
7896 struct btrfs_ref_path *ref_path)
7897{
7898 int ret;
7899
7900 ret = relocate_one_path(trans, root, path, first_key,
7901 ref_path, NULL, NULL);
7902 BUG_ON(ret);
7903
7904 return 0;
7905}
7906
7907static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
7908 struct btrfs_root *extent_root,
7909 struct btrfs_path *path,
7910 struct btrfs_key *extent_key)
7911{
7912 int ret;
7913
7914 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
7915 if (ret)
7916 goto out;
7917 ret = btrfs_del_item(trans, extent_root, path);
7918out:
7919 btrfs_release_path(extent_root, path);
7920 return ret;
7921}
7922
7923static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
7924 struct btrfs_ref_path *ref_path)
7925{
7926 struct btrfs_key root_key;
7927
7928 root_key.objectid = ref_path->root_objectid;
7929 root_key.type = BTRFS_ROOT_ITEM_KEY;
7930 if (is_cowonly_root(ref_path->root_objectid))
7931 root_key.offset = 0;
7932 else
7933 root_key.offset = (u64)-1;
7934
7935 return btrfs_read_fs_root_no_name(fs_info, &root_key);
7936}
7937
7938static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7939 struct btrfs_path *path,
7940 struct btrfs_key *extent_key,
7941 struct btrfs_block_group_cache *group,
7942 struct inode *reloc_inode, int pass)
7943{
7944 struct btrfs_trans_handle *trans;
7945 struct btrfs_root *found_root;
7946 struct btrfs_ref_path *ref_path = NULL;
7947 struct disk_extent *new_extents = NULL;
7948 int nr_extents = 0;
7949 int loops;
7950 int ret;
7951 int level;
7952 struct btrfs_key first_key;
7953 u64 prev_block = 0;
7954
7955
7956 trans = btrfs_start_transaction(extent_root, 1);
7957 BUG_ON(IS_ERR(trans));
7958
7959 if (extent_key->objectid == 0) {
7960 ret = del_extent_zero(trans, extent_root, path, extent_key);
7961 goto out;
7962 }
7963
7964 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
7965 if (!ref_path) {
7966 ret = -ENOMEM;
7967 goto out;
7968 }
7969
7970 for (loops = 0; ; loops++) {
7971 if (loops == 0) {
7972 ret = btrfs_first_ref_path(trans, extent_root, ref_path,
7973 extent_key->objectid);
7974 } else {
7975 ret = btrfs_next_ref_path(trans, extent_root, ref_path);
7976 }
7977 if (ret < 0)
7978 goto out;
7979 if (ret > 0)
7980 break;
7981
7982 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
7983 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
7984 continue;
7985
7986 found_root = read_ref_root(extent_root->fs_info, ref_path);
7987 BUG_ON(!found_root);
7988 /*
7989 * for reference counted tree, only process reference paths
7990 * rooted at the latest committed root.
7991 */
7992 if (found_root->ref_cows &&
7993 ref_path->root_generation != found_root->root_key.offset)
7994 continue;
7995
7996 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7997 if (pass == 0) {
7998 /*
7999 * copy data extents to new locations
8000 */
8001 u64 group_start = group->key.objectid;
8002 ret = relocate_data_extent(reloc_inode,
8003 extent_key,
8004 group_start);
8005 if (ret < 0)
8006 goto out;
8007 break;
8008 }
8009 level = 0;
8010 } else {
8011 level = ref_path->owner_objectid;
8012 }
8013
8014 if (prev_block != ref_path->nodes[level]) {
8015 struct extent_buffer *eb;
8016 u64 block_start = ref_path->nodes[level];
8017 u64 block_size = btrfs_level_size(found_root, level);
8018
8019 eb = read_tree_block(found_root, block_start,
8020 block_size, 0);
8021 if (!eb) {
8022 ret = -EIO;
8023 goto out;
8024 }
8025 btrfs_tree_lock(eb);
8026 BUG_ON(level != btrfs_header_level(eb));
8027
8028 if (level == 0)
8029 btrfs_item_key_to_cpu(eb, &first_key, 0);
8030 else
8031 btrfs_node_key_to_cpu(eb, &first_key, 0);
8032
8033 btrfs_tree_unlock(eb);
8034 free_extent_buffer(eb);
8035 prev_block = block_start;
8036 }
8037
8038 mutex_lock(&extent_root->fs_info->trans_mutex);
8039 btrfs_record_root_in_trans(found_root);
8040 mutex_unlock(&extent_root->fs_info->trans_mutex);
8041 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
8042 /*
8043 * try to update data extent references while
8044 * keeping metadata shared between snapshots.
8045 */
8046 if (pass == 1) {
8047 ret = relocate_one_path(trans, found_root,
8048 path, &first_key, ref_path,
8049 group, reloc_inode);
8050 if (ret < 0)
8051 goto out;
8052 continue;
8053 }
8054 /*
8055 * use fallback method to process the remaining
8056 * references.
8057 */
8058 if (!new_extents) {
8059 u64 group_start = group->key.objectid;
8060 new_extents = kmalloc(sizeof(*new_extents),
8061 GFP_NOFS);
8062 if (!new_extents) {
8063 ret = -ENOMEM;
8064 goto out;
8065 }
8066 nr_extents = 1;
8067 ret = get_new_locations(reloc_inode,
8068 extent_key,
8069 group_start, 1,
8070 &new_extents,
8071 &nr_extents);
8072 if (ret)
8073 goto out;
8074 }
8075 ret = replace_one_extent(trans, found_root,
8076 path, extent_key,
8077 &first_key, ref_path,
8078 new_extents, nr_extents);
8079 } else {
8080 ret = relocate_tree_block(trans, found_root, path,
8081 &first_key, ref_path);
8082 }
8083 if (ret < 0)
8084 goto out;
8085 }
8086 ret = 0;
8087out:
8088 btrfs_end_transaction(trans, extent_root);
8089 kfree(new_extents);
8090 kfree(ref_path);
8091 return ret;
8092}
8093#endif
8094
8095static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6445static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8096{ 6446{
8097 u64 num_devices; 6447 u64 num_devices;
@@ -8555,10 +6905,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8555 ret = -ENOMEM; 6905 ret = -ENOMEM;
8556 goto error; 6906 goto error;
8557 } 6907 }
6908 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
6909 GFP_NOFS);
6910 if (!cache->free_space_ctl) {
6911 kfree(cache);
6912 ret = -ENOMEM;
6913 goto error;
6914 }
8558 6915
8559 atomic_set(&cache->count, 1); 6916 atomic_set(&cache->count, 1);
8560 spin_lock_init(&cache->lock); 6917 spin_lock_init(&cache->lock);
8561 spin_lock_init(&cache->tree_lock);
8562 cache->fs_info = info; 6918 cache->fs_info = info;
8563 INIT_LIST_HEAD(&cache->list); 6919 INIT_LIST_HEAD(&cache->list);
8564 INIT_LIST_HEAD(&cache->cluster_list); 6920 INIT_LIST_HEAD(&cache->cluster_list);
@@ -8566,24 +6922,18 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8566 if (need_clear) 6922 if (need_clear)
8567 cache->disk_cache_state = BTRFS_DC_CLEAR; 6923 cache->disk_cache_state = BTRFS_DC_CLEAR;
8568 6924
8569 /*
8570 * we only want to have 32k of ram per block group for keeping
8571 * track of free space, and if we pass 1/2 of that we want to
8572 * start converting things over to using bitmaps
8573 */
8574 cache->extents_thresh = ((1024 * 32) / 2) /
8575 sizeof(struct btrfs_free_space);
8576
8577 read_extent_buffer(leaf, &cache->item, 6925 read_extent_buffer(leaf, &cache->item,
8578 btrfs_item_ptr_offset(leaf, path->slots[0]), 6926 btrfs_item_ptr_offset(leaf, path->slots[0]),
8579 sizeof(cache->item)); 6927 sizeof(cache->item));
8580 memcpy(&cache->key, &found_key, sizeof(found_key)); 6928 memcpy(&cache->key, &found_key, sizeof(found_key));
8581 6929
8582 key.objectid = found_key.objectid + found_key.offset; 6930 key.objectid = found_key.objectid + found_key.offset;
8583 btrfs_release_path(root, path); 6931 btrfs_release_path(path);
8584 cache->flags = btrfs_block_group_flags(&cache->item); 6932 cache->flags = btrfs_block_group_flags(&cache->item);
8585 cache->sectorsize = root->sectorsize; 6933 cache->sectorsize = root->sectorsize;
8586 6934
6935 btrfs_init_free_space_ctl(cache);
6936
8587 /* 6937 /*
8588 * We need to exclude the super stripes now so that the space 6938 * We need to exclude the super stripes now so that the space
8589 * info has super bytes accounted for, otherwise we'll think 6939 * info has super bytes accounted for, otherwise we'll think
@@ -8670,6 +7020,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8670 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7020 cache = kzalloc(sizeof(*cache), GFP_NOFS);
8671 if (!cache) 7021 if (!cache)
8672 return -ENOMEM; 7022 return -ENOMEM;
7023 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
7024 GFP_NOFS);
7025 if (!cache->free_space_ctl) {
7026 kfree(cache);
7027 return -ENOMEM;
7028 }
8673 7029
8674 cache->key.objectid = chunk_offset; 7030 cache->key.objectid = chunk_offset;
8675 cache->key.offset = size; 7031 cache->key.offset = size;
@@ -8677,19 +7033,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8677 cache->sectorsize = root->sectorsize; 7033 cache->sectorsize = root->sectorsize;
8678 cache->fs_info = root->fs_info; 7034 cache->fs_info = root->fs_info;
8679 7035
8680 /*
8681 * we only want to have 32k of ram per block group for keeping track
8682 * of free space, and if we pass 1/2 of that we want to start
8683 * converting things over to using bitmaps
8684 */
8685 cache->extents_thresh = ((1024 * 32) / 2) /
8686 sizeof(struct btrfs_free_space);
8687 atomic_set(&cache->count, 1); 7036 atomic_set(&cache->count, 1);
8688 spin_lock_init(&cache->lock); 7037 spin_lock_init(&cache->lock);
8689 spin_lock_init(&cache->tree_lock);
8690 INIT_LIST_HEAD(&cache->list); 7038 INIT_LIST_HEAD(&cache->list);
8691 INIT_LIST_HEAD(&cache->cluster_list); 7039 INIT_LIST_HEAD(&cache->cluster_list);
8692 7040
7041 btrfs_init_free_space_ctl(cache);
7042
8693 btrfs_set_block_group_used(&cache->item, bytes_used); 7043 btrfs_set_block_group_used(&cache->item, bytes_used);
8694 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7044 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
8695 cache->flags = type; 7045 cache->flags = type;
@@ -8802,12 +7152,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8802 if (ret < 0) 7152 if (ret < 0)
8803 goto out; 7153 goto out;
8804 if (ret > 0) 7154 if (ret > 0)
8805 btrfs_release_path(tree_root, path); 7155 btrfs_release_path(path);
8806 if (ret == 0) { 7156 if (ret == 0) {
8807 ret = btrfs_del_item(trans, tree_root, path); 7157 ret = btrfs_del_item(trans, tree_root, path);
8808 if (ret) 7158 if (ret)
8809 goto out; 7159 goto out;
8810 btrfs_release_path(tree_root, path); 7160 btrfs_release_path(path);
8811 } 7161 }
8812 7162
8813 spin_lock(&root->fs_info->block_group_cache_lock); 7163 spin_lock(&root->fs_info->block_group_cache_lock);