aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c1799
1 files changed, 75 insertions, 1724 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c8c318494dee..c9173a7827b0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -94,7 +94,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
94 return (cache->flags & bits) == bits; 94 return (cache->flags & bits) == bits;
95} 95}
96 96
97void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 97static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
98{ 98{
99 atomic_inc(&cache->count); 99 atomic_inc(&cache->count);
100} 100}
@@ -105,6 +105,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
105 WARN_ON(cache->pinned > 0); 105 WARN_ON(cache->pinned > 0);
106 WARN_ON(cache->reserved > 0); 106 WARN_ON(cache->reserved > 0);
107 WARN_ON(cache->reserved_pinned > 0); 107 WARN_ON(cache->reserved_pinned > 0);
108 kfree(cache->free_space_ctl);
108 kfree(cache); 109 kfree(cache);
109 } 110 }
110} 111}
@@ -381,7 +382,7 @@ again:
381 if (need_resched() || 382 if (need_resched() ||
382 btrfs_next_leaf(extent_root, path)) { 383 btrfs_next_leaf(extent_root, path)) {
383 caching_ctl->progress = last; 384 caching_ctl->progress = last;
384 btrfs_release_path(extent_root, path); 385 btrfs_release_path(path);
385 up_read(&fs_info->extent_commit_sem); 386 up_read(&fs_info->extent_commit_sem);
386 mutex_unlock(&caching_ctl->mutex); 387 mutex_unlock(&caching_ctl->mutex);
387 cond_resched(); 388 cond_resched();
@@ -757,8 +758,12 @@ again:
757 atomic_inc(&head->node.refs); 758 atomic_inc(&head->node.refs);
758 spin_unlock(&delayed_refs->lock); 759 spin_unlock(&delayed_refs->lock);
759 760
760 btrfs_release_path(root->fs_info->extent_root, path); 761 btrfs_release_path(path);
761 762
763 /*
764 * Mutex was contended, block until it's released and try
765 * again
766 */
762 mutex_lock(&head->mutex); 767 mutex_lock(&head->mutex);
763 mutex_unlock(&head->mutex); 768 mutex_unlock(&head->mutex);
764 btrfs_put_delayed_ref(&head->node); 769 btrfs_put_delayed_ref(&head->node);
@@ -937,7 +942,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
937 break; 942 break;
938 } 943 }
939 } 944 }
940 btrfs_release_path(root, path); 945 btrfs_release_path(path);
941 946
942 if (owner < BTRFS_FIRST_FREE_OBJECTID) 947 if (owner < BTRFS_FIRST_FREE_OBJECTID)
943 new_size += sizeof(*bi); 948 new_size += sizeof(*bi);
@@ -950,7 +955,6 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
950 BUG_ON(ret); 955 BUG_ON(ret);
951 956
952 ret = btrfs_extend_item(trans, root, path, new_size); 957 ret = btrfs_extend_item(trans, root, path, new_size);
953 BUG_ON(ret);
954 958
955 leaf = path->nodes[0]; 959 leaf = path->nodes[0];
956 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 960 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1045,7 +1049,7 @@ again:
1045 return 0; 1049 return 0;
1046#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1050#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1047 key.type = BTRFS_EXTENT_REF_V0_KEY; 1051 key.type = BTRFS_EXTENT_REF_V0_KEY;
1048 btrfs_release_path(root, path); 1052 btrfs_release_path(path);
1049 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1053 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1050 if (ret < 0) { 1054 if (ret < 0) {
1051 err = ret; 1055 err = ret;
@@ -1083,7 +1087,7 @@ again:
1083 if (match_extent_data_ref(leaf, ref, root_objectid, 1087 if (match_extent_data_ref(leaf, ref, root_objectid,
1084 owner, offset)) { 1088 owner, offset)) {
1085 if (recow) { 1089 if (recow) {
1086 btrfs_release_path(root, path); 1090 btrfs_release_path(path);
1087 goto again; 1091 goto again;
1088 } 1092 }
1089 err = 0; 1093 err = 0;
@@ -1144,7 +1148,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1144 if (match_extent_data_ref(leaf, ref, root_objectid, 1148 if (match_extent_data_ref(leaf, ref, root_objectid,
1145 owner, offset)) 1149 owner, offset))
1146 break; 1150 break;
1147 btrfs_release_path(root, path); 1151 btrfs_release_path(path);
1148 key.offset++; 1152 key.offset++;
1149 ret = btrfs_insert_empty_item(trans, root, path, &key, 1153 ret = btrfs_insert_empty_item(trans, root, path, &key,
1150 size); 1154 size);
@@ -1170,7 +1174,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1170 btrfs_mark_buffer_dirty(leaf); 1174 btrfs_mark_buffer_dirty(leaf);
1171 ret = 0; 1175 ret = 0;
1172fail: 1176fail:
1173 btrfs_release_path(root, path); 1177 btrfs_release_path(path);
1174 return ret; 1178 return ret;
1175} 1179}
1176 1180
@@ -1296,7 +1300,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1296 ret = -ENOENT; 1300 ret = -ENOENT;
1297#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1301#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1298 if (ret == -ENOENT && parent) { 1302 if (ret == -ENOENT && parent) {
1299 btrfs_release_path(root, path); 1303 btrfs_release_path(path);
1300 key.type = BTRFS_EXTENT_REF_V0_KEY; 1304 key.type = BTRFS_EXTENT_REF_V0_KEY;
1301 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1305 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1302 if (ret > 0) 1306 if (ret > 0)
@@ -1325,7 +1329,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1325 } 1329 }
1326 1330
1327 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1331 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1328 btrfs_release_path(root, path); 1332 btrfs_release_path(path);
1329 return ret; 1333 return ret;
1330} 1334}
1331 1335
@@ -1558,7 +1562,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1558 size = btrfs_extent_inline_ref_size(type); 1562 size = btrfs_extent_inline_ref_size(type);
1559 1563
1560 ret = btrfs_extend_item(trans, root, path, size); 1564 ret = btrfs_extend_item(trans, root, path, size);
1561 BUG_ON(ret);
1562 1565
1563 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1566 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1564 refs = btrfs_extent_refs(leaf, ei); 1567 refs = btrfs_extent_refs(leaf, ei);
@@ -1611,7 +1614,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1611 if (ret != -ENOENT) 1614 if (ret != -ENOENT)
1612 return ret; 1615 return ret;
1613 1616
1614 btrfs_release_path(root, path); 1617 btrfs_release_path(path);
1615 *ref_ret = NULL; 1618 *ref_ret = NULL;
1616 1619
1617 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1620 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
@@ -1687,7 +1690,6 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1687 end - ptr - size); 1690 end - ptr - size);
1688 item_size -= size; 1691 item_size -= size;
1689 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1692 ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1690 BUG_ON(ret);
1691 } 1693 }
1692 btrfs_mark_buffer_dirty(leaf); 1694 btrfs_mark_buffer_dirty(leaf);
1693 return 0; 1695 return 0;
@@ -1865,7 +1867,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1865 __run_delayed_extent_op(extent_op, leaf, item); 1867 __run_delayed_extent_op(extent_op, leaf, item);
1866 1868
1867 btrfs_mark_buffer_dirty(leaf); 1869 btrfs_mark_buffer_dirty(leaf);
1868 btrfs_release_path(root->fs_info->extent_root, path); 1870 btrfs_release_path(path);
1869 1871
1870 path->reada = 1; 1872 path->reada = 1;
1871 path->leave_spinning = 1; 1873 path->leave_spinning = 1;
@@ -2300,6 +2302,10 @@ again:
2300 atomic_inc(&ref->refs); 2302 atomic_inc(&ref->refs);
2301 2303
2302 spin_unlock(&delayed_refs->lock); 2304 spin_unlock(&delayed_refs->lock);
2305 /*
2306 * Mutex was contended, block until it's
2307 * released and try again
2308 */
2303 mutex_lock(&head->mutex); 2309 mutex_lock(&head->mutex);
2304 mutex_unlock(&head->mutex); 2310 mutex_unlock(&head->mutex);
2305 2311
@@ -2364,8 +2370,12 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2364 atomic_inc(&head->node.refs); 2370 atomic_inc(&head->node.refs);
2365 spin_unlock(&delayed_refs->lock); 2371 spin_unlock(&delayed_refs->lock);
2366 2372
2367 btrfs_release_path(root->fs_info->extent_root, path); 2373 btrfs_release_path(path);
2368 2374
2375 /*
2376 * Mutex was contended, block until it's released and let
2377 * caller try again
2378 */
2369 mutex_lock(&head->mutex); 2379 mutex_lock(&head->mutex);
2370 mutex_unlock(&head->mutex); 2380 mutex_unlock(&head->mutex);
2371 btrfs_put_delayed_ref(&head->node); 2381 btrfs_put_delayed_ref(&head->node);
@@ -2513,126 +2523,6 @@ out:
2513 return ret; 2523 return ret;
2514} 2524}
2515 2525
2516#if 0
2517int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2518 struct extent_buffer *buf, u32 nr_extents)
2519{
2520 struct btrfs_key key;
2521 struct btrfs_file_extent_item *fi;
2522 u64 root_gen;
2523 u32 nritems;
2524 int i;
2525 int level;
2526 int ret = 0;
2527 int shared = 0;
2528
2529 if (!root->ref_cows)
2530 return 0;
2531
2532 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2533 shared = 0;
2534 root_gen = root->root_key.offset;
2535 } else {
2536 shared = 1;
2537 root_gen = trans->transid - 1;
2538 }
2539
2540 level = btrfs_header_level(buf);
2541 nritems = btrfs_header_nritems(buf);
2542
2543 if (level == 0) {
2544 struct btrfs_leaf_ref *ref;
2545 struct btrfs_extent_info *info;
2546
2547 ref = btrfs_alloc_leaf_ref(root, nr_extents);
2548 if (!ref) {
2549 ret = -ENOMEM;
2550 goto out;
2551 }
2552
2553 ref->root_gen = root_gen;
2554 ref->bytenr = buf->start;
2555 ref->owner = btrfs_header_owner(buf);
2556 ref->generation = btrfs_header_generation(buf);
2557 ref->nritems = nr_extents;
2558 info = ref->extents;
2559
2560 for (i = 0; nr_extents > 0 && i < nritems; i++) {
2561 u64 disk_bytenr;
2562 btrfs_item_key_to_cpu(buf, &key, i);
2563 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2564 continue;
2565 fi = btrfs_item_ptr(buf, i,
2566 struct btrfs_file_extent_item);
2567 if (btrfs_file_extent_type(buf, fi) ==
2568 BTRFS_FILE_EXTENT_INLINE)
2569 continue;
2570 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2571 if (disk_bytenr == 0)
2572 continue;
2573
2574 info->bytenr = disk_bytenr;
2575 info->num_bytes =
2576 btrfs_file_extent_disk_num_bytes(buf, fi);
2577 info->objectid = key.objectid;
2578 info->offset = key.offset;
2579 info++;
2580 }
2581
2582 ret = btrfs_add_leaf_ref(root, ref, shared);
2583 if (ret == -EEXIST && shared) {
2584 struct btrfs_leaf_ref *old;
2585 old = btrfs_lookup_leaf_ref(root, ref->bytenr);
2586 BUG_ON(!old);
2587 btrfs_remove_leaf_ref(root, old);
2588 btrfs_free_leaf_ref(root, old);
2589 ret = btrfs_add_leaf_ref(root, ref, shared);
2590 }
2591 WARN_ON(ret);
2592 btrfs_free_leaf_ref(root, ref);
2593 }
2594out:
2595 return ret;
2596}
2597
2598/* when a block goes through cow, we update the reference counts of
2599 * everything that block points to. The internal pointers of the block
2600 * can be in just about any order, and it is likely to have clusters of
2601 * things that are close together and clusters of things that are not.
2602 *
2603 * To help reduce the seeks that come with updating all of these reference
2604 * counts, sort them by byte number before actual updates are done.
2605 *
2606 * struct refsort is used to match byte number to slot in the btree block.
2607 * we sort based on the byte number and then use the slot to actually
2608 * find the item.
2609 *
2610 * struct refsort is smaller than strcut btrfs_item and smaller than
2611 * struct btrfs_key_ptr. Since we're currently limited to the page size
2612 * for a btree block, there's no way for a kmalloc of refsorts for a
2613 * single node to be bigger than a page.
2614 */
2615struct refsort {
2616 u64 bytenr;
2617 u32 slot;
2618};
2619
2620/*
2621 * for passing into sort()
2622 */
2623static int refsort_cmp(const void *a_void, const void *b_void)
2624{
2625 const struct refsort *a = a_void;
2626 const struct refsort *b = b_void;
2627
2628 if (a->bytenr < b->bytenr)
2629 return -1;
2630 if (a->bytenr > b->bytenr)
2631 return 1;
2632 return 0;
2633}
2634#endif
2635
2636static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2526static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2637 struct btrfs_root *root, 2527 struct btrfs_root *root,
2638 struct extent_buffer *buf, 2528 struct extent_buffer *buf,
@@ -2735,7 +2625,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
2735 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2625 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
2736 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2626 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
2737 btrfs_mark_buffer_dirty(leaf); 2627 btrfs_mark_buffer_dirty(leaf);
2738 btrfs_release_path(extent_root, path); 2628 btrfs_release_path(path);
2739fail: 2629fail:
2740 if (ret) 2630 if (ret)
2741 return ret; 2631 return ret;
@@ -2788,7 +2678,7 @@ again:
2788 inode = lookup_free_space_inode(root, block_group, path); 2678 inode = lookup_free_space_inode(root, block_group, path);
2789 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 2679 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2790 ret = PTR_ERR(inode); 2680 ret = PTR_ERR(inode);
2791 btrfs_release_path(root, path); 2681 btrfs_release_path(path);
2792 goto out; 2682 goto out;
2793 } 2683 }
2794 2684
@@ -2857,7 +2747,7 @@ again:
2857out_put: 2747out_put:
2858 iput(inode); 2748 iput(inode);
2859out_free: 2749out_free:
2860 btrfs_release_path(root, path); 2750 btrfs_release_path(path);
2861out: 2751out:
2862 spin_lock(&block_group->lock); 2752 spin_lock(&block_group->lock);
2863 block_group->disk_cache_state = dcs; 2753 block_group->disk_cache_state = dcs;
@@ -3147,7 +3037,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3147 /* make sure bytes are sectorsize aligned */ 3037 /* make sure bytes are sectorsize aligned */
3148 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3038 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
3149 3039
3150 if (root == root->fs_info->tree_root) { 3040 if (root == root->fs_info->tree_root ||
3041 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
3151 alloc_chunk = 0; 3042 alloc_chunk = 0;
3152 committed = 1; 3043 committed = 1;
3153 } 3044 }
@@ -3215,18 +3106,6 @@ commit_trans:
3215 goto again; 3106 goto again;
3216 } 3107 }
3217 3108
3218#if 0 /* I hope we never need this code again, just in case */
3219 printk(KERN_ERR "no space left, need %llu, %llu bytes_used, "
3220 "%llu bytes_reserved, " "%llu bytes_pinned, "
3221 "%llu bytes_readonly, %llu may use %llu total\n",
3222 (unsigned long long)bytes,
3223 (unsigned long long)data_sinfo->bytes_used,
3224 (unsigned long long)data_sinfo->bytes_reserved,
3225 (unsigned long long)data_sinfo->bytes_pinned,
3226 (unsigned long long)data_sinfo->bytes_readonly,
3227 (unsigned long long)data_sinfo->bytes_may_use,
3228 (unsigned long long)data_sinfo->total_bytes);
3229#endif
3230 return -ENOSPC; 3109 return -ENOSPC;
3231 } 3110 }
3232 data_sinfo->bytes_may_use += bytes; 3111 data_sinfo->bytes_may_use += bytes;
@@ -3429,6 +3308,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3429 if (reserved == 0) 3308 if (reserved == 0)
3430 return 0; 3309 return 0;
3431 3310
3311 /* nothing to shrink - nothing to reclaim */
3312 if (root->fs_info->delalloc_bytes == 0)
3313 return 0;
3314
3432 max_reclaim = min(reserved, to_reclaim); 3315 max_reclaim = min(reserved, to_reclaim);
3433 3316
3434 while (loops < 1024) { 3317 while (loops < 1024) {
@@ -3655,8 +3538,8 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
3655 spin_unlock(&block_rsv->lock); 3538 spin_unlock(&block_rsv->lock);
3656} 3539}
3657 3540
3658void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, 3541static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3659 struct btrfs_block_rsv *dest, u64 num_bytes) 3542 struct btrfs_block_rsv *dest, u64 num_bytes)
3660{ 3543{
3661 struct btrfs_space_info *space_info = block_rsv->space_info; 3544 struct btrfs_space_info *space_info = block_rsv->space_info;
3662 3545
@@ -3859,23 +3742,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3859 u64 meta_used; 3742 u64 meta_used;
3860 u64 data_used; 3743 u64 data_used;
3861 int csum_size = btrfs_super_csum_size(&fs_info->super_copy); 3744 int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
3862#if 0
3863 /*
3864 * per tree used space accounting can be inaccuracy, so we
3865 * can't rely on it.
3866 */
3867 spin_lock(&fs_info->extent_root->accounting_lock);
3868 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item);
3869 spin_unlock(&fs_info->extent_root->accounting_lock);
3870
3871 spin_lock(&fs_info->csum_root->accounting_lock);
3872 num_bytes += btrfs_root_used(&fs_info->csum_root->root_item);
3873 spin_unlock(&fs_info->csum_root->accounting_lock);
3874 3745
3875 spin_lock(&fs_info->tree_root->accounting_lock);
3876 num_bytes += btrfs_root_used(&fs_info->tree_root->root_item);
3877 spin_unlock(&fs_info->tree_root->accounting_lock);
3878#endif
3879 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); 3746 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
3880 spin_lock(&sinfo->lock); 3747 spin_lock(&sinfo->lock);
3881 data_used = sinfo->bytes_used; 3748 data_used = sinfo->bytes_used;
@@ -3928,10 +3795,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3928 block_rsv->reserved = block_rsv->size; 3795 block_rsv->reserved = block_rsv->size;
3929 block_rsv->full = 1; 3796 block_rsv->full = 1;
3930 } 3797 }
3931#if 0 3798
3932 printk(KERN_INFO"global block rsv size %llu reserved %llu\n",
3933 block_rsv->size, block_rsv->reserved);
3934#endif
3935 spin_unlock(&sinfo->lock); 3799 spin_unlock(&sinfo->lock);
3936 spin_unlock(&block_rsv->lock); 3800 spin_unlock(&block_rsv->lock);
3937} 3801}
@@ -3977,12 +3841,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3977 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3841 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3978} 3842}
3979 3843
3980static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
3981{
3982 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3983 3 * num_items;
3984}
3985
3986int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, 3844int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
3987 struct btrfs_root *root, 3845 struct btrfs_root *root,
3988 struct btrfs_block_rsv *rsv) 3846 struct btrfs_block_rsv *rsv)
@@ -3996,7 +3854,7 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
3996 * needs to use some space. We may want to be smarter about this in the 3854 * needs to use some space. We may want to be smarter about this in the
3997 * future. 3855 * future.
3998 */ 3856 */
3999 num_bytes = calc_trans_metadata_size(root, 2); 3857 num_bytes = btrfs_calc_trans_metadata_size(root, 2);
4000 3858
4001 /* We already have enough bytes, just return */ 3859 /* We already have enough bytes, just return */
4002 if (rsv->reserved >= num_bytes) 3860 if (rsv->reserved >= num_bytes)
@@ -4024,7 +3882,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
4024 if (num_items == 0 || root->fs_info->chunk_root == root) 3882 if (num_items == 0 || root->fs_info->chunk_root == root)
4025 return 0; 3883 return 0;
4026 3884
4027 num_bytes = calc_trans_metadata_size(root, num_items); 3885 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
4028 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3886 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
4029 num_bytes); 3887 num_bytes);
4030 if (!ret) { 3888 if (!ret) {
@@ -4058,14 +3916,14 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
4058 * added it, so this takes the reservation so we can release it later 3916 * added it, so this takes the reservation so we can release it later
4059 * when we are truly done with the orphan item. 3917 * when we are truly done with the orphan item.
4060 */ 3918 */
4061 u64 num_bytes = calc_trans_metadata_size(root, 1); 3919 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
4062 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3920 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4063} 3921}
4064 3922
4065void btrfs_orphan_release_metadata(struct inode *inode) 3923void btrfs_orphan_release_metadata(struct inode *inode)
4066{ 3924{
4067 struct btrfs_root *root = BTRFS_I(inode)->root; 3925 struct btrfs_root *root = BTRFS_I(inode)->root;
4068 u64 num_bytes = calc_trans_metadata_size(root, 1); 3926 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
4069 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3927 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
4070} 3928}
4071 3929
@@ -4079,7 +3937,7 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
4079 * two for root back/forward refs, two for directory entries 3937 * two for root back/forward refs, two for directory entries
4080 * and one for root of the snapshot. 3938 * and one for root of the snapshot.
4081 */ 3939 */
4082 u64 num_bytes = calc_trans_metadata_size(root, 5); 3940 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
4083 dst_rsv->space_info = src_rsv->space_info; 3941 dst_rsv->space_info = src_rsv->space_info;
4084 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3942 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4085} 3943}
@@ -4108,7 +3966,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4108 3966
4109 if (nr_extents > reserved_extents) { 3967 if (nr_extents > reserved_extents) {
4110 nr_extents -= reserved_extents; 3968 nr_extents -= reserved_extents;
4111 to_reserve = calc_trans_metadata_size(root, nr_extents); 3969 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
4112 } else { 3970 } else {
4113 nr_extents = 0; 3971 nr_extents = 0;
4114 to_reserve = 0; 3972 to_reserve = 0;
@@ -4162,7 +4020,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4162 4020
4163 to_free = calc_csum_metadata_size(inode, num_bytes); 4021 to_free = calc_csum_metadata_size(inode, num_bytes);
4164 if (nr_extents > 0) 4022 if (nr_extents > 0)
4165 to_free += calc_trans_metadata_size(root, nr_extents); 4023 to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
4166 4024
4167 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 4025 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
4168 to_free); 4026 to_free);
@@ -4571,7 +4429,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4571 NULL, refs_to_drop, 4429 NULL, refs_to_drop,
4572 is_data); 4430 is_data);
4573 BUG_ON(ret); 4431 BUG_ON(ret);
4574 btrfs_release_path(extent_root, path); 4432 btrfs_release_path(path);
4575 path->leave_spinning = 1; 4433 path->leave_spinning = 1;
4576 4434
4577 key.objectid = bytenr; 4435 key.objectid = bytenr;
@@ -4610,7 +4468,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4610 owner_objectid, 0); 4468 owner_objectid, 0);
4611 BUG_ON(ret < 0); 4469 BUG_ON(ret < 0);
4612 4470
4613 btrfs_release_path(extent_root, path); 4471 btrfs_release_path(path);
4614 path->leave_spinning = 1; 4472 path->leave_spinning = 1;
4615 4473
4616 key.objectid = bytenr; 4474 key.objectid = bytenr;
@@ -4680,7 +4538,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4680 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 4538 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
4681 num_to_del); 4539 num_to_del);
4682 BUG_ON(ret); 4540 BUG_ON(ret);
4683 btrfs_release_path(extent_root, path); 4541 btrfs_release_path(path);
4684 4542
4685 if (is_data) { 4543 if (is_data) {
4686 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 4544 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
@@ -4923,7 +4781,7 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
4923 return 0; 4781 return 0;
4924 4782
4925 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 4783 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
4926 (cache->free_space >= num_bytes)); 4784 (cache->free_space_ctl->free_space >= num_bytes));
4927 4785
4928 put_caching_control(caching_ctl); 4786 put_caching_control(caching_ctl);
4929 return 0; 4787 return 0;
@@ -5159,13 +5017,14 @@ have_block_group:
5159 if (unlikely(block_group->ro)) 5017 if (unlikely(block_group->ro))
5160 goto loop; 5018 goto loop;
5161 5019
5162 spin_lock(&block_group->tree_lock); 5020 spin_lock(&block_group->free_space_ctl->tree_lock);
5163 if (cached && 5021 if (cached &&
5164 block_group->free_space < num_bytes + empty_size) { 5022 block_group->free_space_ctl->free_space <
5165 spin_unlock(&block_group->tree_lock); 5023 num_bytes + empty_size) {
5024 spin_unlock(&block_group->free_space_ctl->tree_lock);
5166 goto loop; 5025 goto loop;
5167 } 5026 }
5168 spin_unlock(&block_group->tree_lock); 5027 spin_unlock(&block_group->free_space_ctl->tree_lock);
5169 5028
5170 /* 5029 /*
5171 * Ok we want to try and use the cluster allocator, so lets look 5030 * Ok we want to try and use the cluster allocator, so lets look
@@ -6512,7 +6371,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6512 trans->block_rsv = block_rsv; 6371 trans->block_rsv = block_rsv;
6513 } 6372 }
6514 } 6373 }
6515 btrfs_release_path(root, path); 6374 btrfs_release_path(path);
6516 BUG_ON(err); 6375 BUG_ON(err);
6517 6376
6518 ret = btrfs_del_root(trans, tree_root, &root->root_key); 6377 ret = btrfs_del_root(trans, tree_root, &root->root_key);
@@ -6616,1514 +6475,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6616 return ret; 6475 return ret;
6617} 6476}
6618 6477
6619#if 0
6620static unsigned long calc_ra(unsigned long start, unsigned long last,
6621 unsigned long nr)
6622{
6623 return min(last, start + nr - 1);
6624}
6625
6626static noinline int relocate_inode_pages(struct inode *inode, u64 start,
6627 u64 len)
6628{
6629 u64 page_start;
6630 u64 page_end;
6631 unsigned long first_index;
6632 unsigned long last_index;
6633 unsigned long i;
6634 struct page *page;
6635 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
6636 struct file_ra_state *ra;
6637 struct btrfs_ordered_extent *ordered;
6638 unsigned int total_read = 0;
6639 unsigned int total_dirty = 0;
6640 int ret = 0;
6641
6642 ra = kzalloc(sizeof(*ra), GFP_NOFS);
6643 if (!ra)
6644 return -ENOMEM;
6645
6646 mutex_lock(&inode->i_mutex);
6647 first_index = start >> PAGE_CACHE_SHIFT;
6648 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
6649
6650 /* make sure the dirty trick played by the caller work */
6651 ret = invalidate_inode_pages2_range(inode->i_mapping,
6652 first_index, last_index);
6653 if (ret)
6654 goto out_unlock;
6655
6656 file_ra_state_init(ra, inode->i_mapping);
6657
6658 for (i = first_index ; i <= last_index; i++) {
6659 if (total_read % ra->ra_pages == 0) {
6660 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
6661 calc_ra(i, last_index, ra->ra_pages));
6662 }
6663 total_read++;
6664again:
6665 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
6666 BUG_ON(1);
6667 page = grab_cache_page(inode->i_mapping, i);
6668 if (!page) {
6669 ret = -ENOMEM;
6670 goto out_unlock;
6671 }
6672 if (!PageUptodate(page)) {
6673 btrfs_readpage(NULL, page);
6674 lock_page(page);
6675 if (!PageUptodate(page)) {
6676 unlock_page(page);
6677 page_cache_release(page);
6678 ret = -EIO;
6679 goto out_unlock;
6680 }
6681 }
6682 wait_on_page_writeback(page);
6683
6684 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
6685 page_end = page_start + PAGE_CACHE_SIZE - 1;
6686 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
6687
6688 ordered = btrfs_lookup_ordered_extent(inode, page_start);
6689 if (ordered) {
6690 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6691 unlock_page(page);
6692 page_cache_release(page);
6693 btrfs_start_ordered_extent(inode, ordered, 1);
6694 btrfs_put_ordered_extent(ordered);
6695 goto again;
6696 }
6697 set_page_extent_mapped(page);
6698
6699 if (i == first_index)
6700 set_extent_bits(io_tree, page_start, page_end,
6701 EXTENT_BOUNDARY, GFP_NOFS);
6702 btrfs_set_extent_delalloc(inode, page_start, page_end);
6703
6704 set_page_dirty(page);
6705 total_dirty++;
6706
6707 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6708 unlock_page(page);
6709 page_cache_release(page);
6710 }
6711
6712out_unlock:
6713 kfree(ra);
6714 mutex_unlock(&inode->i_mutex);
6715 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
6716 return ret;
6717}
6718
6719static noinline int relocate_data_extent(struct inode *reloc_inode,
6720 struct btrfs_key *extent_key,
6721 u64 offset)
6722{
6723 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
6724 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
6725 struct extent_map *em;
6726 u64 start = extent_key->objectid - offset;
6727 u64 end = start + extent_key->offset - 1;
6728
6729 em = alloc_extent_map(GFP_NOFS);
6730 BUG_ON(!em);
6731
6732 em->start = start;
6733 em->len = extent_key->offset;
6734 em->block_len = extent_key->offset;
6735 em->block_start = extent_key->objectid;
6736 em->bdev = root->fs_info->fs_devices->latest_bdev;
6737 set_bit(EXTENT_FLAG_PINNED, &em->flags);
6738
6739 /* setup extent map to cheat btrfs_readpage */
6740 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6741 while (1) {
6742 int ret;
6743 write_lock(&em_tree->lock);
6744 ret = add_extent_mapping(em_tree, em);
6745 write_unlock(&em_tree->lock);
6746 if (ret != -EEXIST) {
6747 free_extent_map(em);
6748 break;
6749 }
6750 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
6751 }
6752 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6753
6754 return relocate_inode_pages(reloc_inode, start, extent_key->offset);
6755}
6756
6757struct btrfs_ref_path {
6758 u64 extent_start;
6759 u64 nodes[BTRFS_MAX_LEVEL];
6760 u64 root_objectid;
6761 u64 root_generation;
6762 u64 owner_objectid;
6763 u32 num_refs;
6764 int lowest_level;
6765 int current_level;
6766 int shared_level;
6767
6768 struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
6769 u64 new_nodes[BTRFS_MAX_LEVEL];
6770};
6771
6772struct disk_extent {
6773 u64 ram_bytes;
6774 u64 disk_bytenr;
6775 u64 disk_num_bytes;
6776 u64 offset;
6777 u64 num_bytes;
6778 u8 compression;
6779 u8 encryption;
6780 u16 other_encoding;
6781};
6782
6783static int is_cowonly_root(u64 root_objectid)
6784{
6785 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
6786 root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6787 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
6788 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
6789 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
6790 root_objectid == BTRFS_CSUM_TREE_OBJECTID)
6791 return 1;
6792 return 0;
6793}
6794
6795static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
6796 struct btrfs_root *extent_root,
6797 struct btrfs_ref_path *ref_path,
6798 int first_time)
6799{
6800 struct extent_buffer *leaf;
6801 struct btrfs_path *path;
6802 struct btrfs_extent_ref *ref;
6803 struct btrfs_key key;
6804 struct btrfs_key found_key;
6805 u64 bytenr;
6806 u32 nritems;
6807 int level;
6808 int ret = 1;
6809
6810 path = btrfs_alloc_path();
6811 if (!path)
6812 return -ENOMEM;
6813
6814 if (first_time) {
6815 ref_path->lowest_level = -1;
6816 ref_path->current_level = -1;
6817 ref_path->shared_level = -1;
6818 goto walk_up;
6819 }
6820walk_down:
6821 level = ref_path->current_level - 1;
6822 while (level >= -1) {
6823 u64 parent;
6824 if (level < ref_path->lowest_level)
6825 break;
6826
6827 if (level >= 0)
6828 bytenr = ref_path->nodes[level];
6829 else
6830 bytenr = ref_path->extent_start;
6831 BUG_ON(bytenr == 0);
6832
6833 parent = ref_path->nodes[level + 1];
6834 ref_path->nodes[level + 1] = 0;
6835 ref_path->current_level = level;
6836 BUG_ON(parent == 0);
6837
6838 key.objectid = bytenr;
6839 key.offset = parent + 1;
6840 key.type = BTRFS_EXTENT_REF_KEY;
6841
6842 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6843 if (ret < 0)
6844 goto out;
6845 BUG_ON(ret == 0);
6846
6847 leaf = path->nodes[0];
6848 nritems = btrfs_header_nritems(leaf);
6849 if (path->slots[0] >= nritems) {
6850 ret = btrfs_next_leaf(extent_root, path);
6851 if (ret < 0)
6852 goto out;
6853 if (ret > 0)
6854 goto next;
6855 leaf = path->nodes[0];
6856 }
6857
6858 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6859 if (found_key.objectid == bytenr &&
6860 found_key.type == BTRFS_EXTENT_REF_KEY) {
6861 if (level < ref_path->shared_level)
6862 ref_path->shared_level = level;
6863 goto found;
6864 }
6865next:
6866 level--;
6867 btrfs_release_path(extent_root, path);
6868 cond_resched();
6869 }
6870 /* reached lowest level */
6871 ret = 1;
6872 goto out;
6873walk_up:
6874 level = ref_path->current_level;
6875 while (level < BTRFS_MAX_LEVEL - 1) {
6876 u64 ref_objectid;
6877
6878 if (level >= 0)
6879 bytenr = ref_path->nodes[level];
6880 else
6881 bytenr = ref_path->extent_start;
6882
6883 BUG_ON(bytenr == 0);
6884
6885 key.objectid = bytenr;
6886 key.offset = 0;
6887 key.type = BTRFS_EXTENT_REF_KEY;
6888
6889 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6890 if (ret < 0)
6891 goto out;
6892
6893 leaf = path->nodes[0];
6894 nritems = btrfs_header_nritems(leaf);
6895 if (path->slots[0] >= nritems) {
6896 ret = btrfs_next_leaf(extent_root, path);
6897 if (ret < 0)
6898 goto out;
6899 if (ret > 0) {
6900 /* the extent was freed by someone */
6901 if (ref_path->lowest_level == level)
6902 goto out;
6903 btrfs_release_path(extent_root, path);
6904 goto walk_down;
6905 }
6906 leaf = path->nodes[0];
6907 }
6908
6909 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6910 if (found_key.objectid != bytenr ||
6911 found_key.type != BTRFS_EXTENT_REF_KEY) {
6912 /* the extent was freed by someone */
6913 if (ref_path->lowest_level == level) {
6914 ret = 1;
6915 goto out;
6916 }
6917 btrfs_release_path(extent_root, path);
6918 goto walk_down;
6919 }
6920found:
6921 ref = btrfs_item_ptr(leaf, path->slots[0],
6922 struct btrfs_extent_ref);
6923 ref_objectid = btrfs_ref_objectid(leaf, ref);
6924 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
6925 if (first_time) {
6926 level = (int)ref_objectid;
6927 BUG_ON(level >= BTRFS_MAX_LEVEL);
6928 ref_path->lowest_level = level;
6929 ref_path->current_level = level;
6930 ref_path->nodes[level] = bytenr;
6931 } else {
6932 WARN_ON(ref_objectid != level);
6933 }
6934 } else {
6935 WARN_ON(level != -1);
6936 }
6937 first_time = 0;
6938
6939 if (ref_path->lowest_level == level) {
6940 ref_path->owner_objectid = ref_objectid;
6941 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
6942 }
6943
6944 /*
6945 * the block is tree root or the block isn't in reference
6946 * counted tree.
6947 */
6948 if (found_key.objectid == found_key.offset ||
6949 is_cowonly_root(btrfs_ref_root(leaf, ref))) {
6950 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6951 ref_path->root_generation =
6952 btrfs_ref_generation(leaf, ref);
6953 if (level < 0) {
6954 /* special reference from the tree log */
6955 ref_path->nodes[0] = found_key.offset;
6956 ref_path->current_level = 0;
6957 }
6958 ret = 0;
6959 goto out;
6960 }
6961
6962 level++;
6963 BUG_ON(ref_path->nodes[level] != 0);
6964 ref_path->nodes[level] = found_key.offset;
6965 ref_path->current_level = level;
6966
6967 /*
6968 * the reference was created in the running transaction,
6969 * no need to continue walking up.
6970 */
6971 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
6972 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6973 ref_path->root_generation =
6974 btrfs_ref_generation(leaf, ref);
6975 ret = 0;
6976 goto out;
6977 }
6978
6979 btrfs_release_path(extent_root, path);
6980 cond_resched();
6981 }
6982 /* reached max tree level, but no tree root found. */
6983 BUG();
6984out:
6985 btrfs_free_path(path);
6986 return ret;
6987}
6988
6989static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
6990 struct btrfs_root *extent_root,
6991 struct btrfs_ref_path *ref_path,
6992 u64 extent_start)
6993{
6994 memset(ref_path, 0, sizeof(*ref_path));
6995 ref_path->extent_start = extent_start;
6996
6997 return __next_ref_path(trans, extent_root, ref_path, 1);
6998}
6999
7000static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
7001 struct btrfs_root *extent_root,
7002 struct btrfs_ref_path *ref_path)
7003{
7004 return __next_ref_path(trans, extent_root, ref_path, 0);
7005}
7006
7007static noinline int get_new_locations(struct inode *reloc_inode,
7008 struct btrfs_key *extent_key,
7009 u64 offset, int no_fragment,
7010 struct disk_extent **extents,
7011 int *nr_extents)
7012{
7013 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
7014 struct btrfs_path *path;
7015 struct btrfs_file_extent_item *fi;
7016 struct extent_buffer *leaf;
7017 struct disk_extent *exts = *extents;
7018 struct btrfs_key found_key;
7019 u64 cur_pos;
7020 u64 last_byte;
7021 u32 nritems;
7022 int nr = 0;
7023 int max = *nr_extents;
7024 int ret;
7025
7026 WARN_ON(!no_fragment && *extents);
7027 if (!exts) {
7028 max = 1;
7029 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
7030 if (!exts)
7031 return -ENOMEM;
7032 }
7033
7034 path = btrfs_alloc_path();
7035 if (!path) {
7036 if (exts != *extents)
7037 kfree(exts);
7038 return -ENOMEM;
7039 }
7040
7041 cur_pos = extent_key->objectid - offset;
7042 last_byte = extent_key->objectid + extent_key->offset;
7043 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
7044 cur_pos, 0);
7045 if (ret < 0)
7046 goto out;
7047 if (ret > 0) {
7048 ret = -ENOENT;
7049 goto out;
7050 }
7051
7052 while (1) {
7053 leaf = path->nodes[0];
7054 nritems = btrfs_header_nritems(leaf);
7055 if (path->slots[0] >= nritems) {
7056 ret = btrfs_next_leaf(root, path);
7057 if (ret < 0)
7058 goto out;
7059 if (ret > 0)
7060 break;
7061 leaf = path->nodes[0];
7062 }
7063
7064 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
7065 if (found_key.offset != cur_pos ||
7066 found_key.type != BTRFS_EXTENT_DATA_KEY ||
7067 found_key.objectid != reloc_inode->i_ino)
7068 break;
7069
7070 fi = btrfs_item_ptr(leaf, path->slots[0],
7071 struct btrfs_file_extent_item);
7072 if (btrfs_file_extent_type(leaf, fi) !=
7073 BTRFS_FILE_EXTENT_REG ||
7074 btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
7075 break;
7076
7077 if (nr == max) {
7078 struct disk_extent *old = exts;
7079 max *= 2;
7080 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
7081 if (!exts) {
7082 ret = -ENOMEM;
7083 goto out;
7084 }
7085 memcpy(exts, old, sizeof(*exts) * nr);
7086 if (old != *extents)
7087 kfree(old);
7088 }
7089
7090 exts[nr].disk_bytenr =
7091 btrfs_file_extent_disk_bytenr(leaf, fi);
7092 exts[nr].disk_num_bytes =
7093 btrfs_file_extent_disk_num_bytes(leaf, fi);
7094 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
7095 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7096 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7097 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
7098 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
7099 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
7100 fi);
7101 BUG_ON(exts[nr].offset > 0);
7102 BUG_ON(exts[nr].compression || exts[nr].encryption);
7103 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
7104
7105 cur_pos += exts[nr].num_bytes;
7106 nr++;
7107
7108 if (cur_pos + offset >= last_byte)
7109 break;
7110
7111 if (no_fragment) {
7112 ret = 1;
7113 goto out;
7114 }
7115 path->slots[0]++;
7116 }
7117
7118 BUG_ON(cur_pos + offset > last_byte);
7119 if (cur_pos + offset < last_byte) {
7120 ret = -ENOENT;
7121 goto out;
7122 }
7123 ret = 0;
7124out:
7125 btrfs_free_path(path);
7126 if (ret) {
7127 if (exts != *extents)
7128 kfree(exts);
7129 } else {
7130 *extents = exts;
7131 *nr_extents = nr;
7132 }
7133 return ret;
7134}
7135
7136static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
7137 struct btrfs_root *root,
7138 struct btrfs_path *path,
7139 struct btrfs_key *extent_key,
7140 struct btrfs_key *leaf_key,
7141 struct btrfs_ref_path *ref_path,
7142 struct disk_extent *new_extents,
7143 int nr_extents)
7144{
7145 struct extent_buffer *leaf;
7146 struct btrfs_file_extent_item *fi;
7147 struct inode *inode = NULL;
7148 struct btrfs_key key;
7149 u64 lock_start = 0;
7150 u64 lock_end = 0;
7151 u64 num_bytes;
7152 u64 ext_offset;
7153 u64 search_end = (u64)-1;
7154 u32 nritems;
7155 int nr_scaned = 0;
7156 int extent_locked = 0;
7157 int extent_type;
7158 int ret;
7159
7160 memcpy(&key, leaf_key, sizeof(key));
7161 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
7162 if (key.objectid < ref_path->owner_objectid ||
7163 (key.objectid == ref_path->owner_objectid &&
7164 key.type < BTRFS_EXTENT_DATA_KEY)) {
7165 key.objectid = ref_path->owner_objectid;
7166 key.type = BTRFS_EXTENT_DATA_KEY;
7167 key.offset = 0;
7168 }
7169 }
7170
7171 while (1) {
7172 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7173 if (ret < 0)
7174 goto out;
7175
7176 leaf = path->nodes[0];
7177 nritems = btrfs_header_nritems(leaf);
7178next:
7179 if (extent_locked && ret > 0) {
7180 /*
7181 * the file extent item was modified by someone
7182 * before the extent got locked.
7183 */
7184 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7185 lock_end, GFP_NOFS);
7186 extent_locked = 0;
7187 }
7188
7189 if (path->slots[0] >= nritems) {
7190 if (++nr_scaned > 2)
7191 break;
7192
7193 BUG_ON(extent_locked);
7194 ret = btrfs_next_leaf(root, path);
7195 if (ret < 0)
7196 goto out;
7197 if (ret > 0)
7198 break;
7199 leaf = path->nodes[0];
7200 nritems = btrfs_header_nritems(leaf);
7201 }
7202
7203 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7204
7205 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
7206 if ((key.objectid > ref_path->owner_objectid) ||
7207 (key.objectid == ref_path->owner_objectid &&
7208 key.type > BTRFS_EXTENT_DATA_KEY) ||
7209 key.offset >= search_end)
7210 break;
7211 }
7212
7213 if (inode && key.objectid != inode->i_ino) {
7214 BUG_ON(extent_locked);
7215 btrfs_release_path(root, path);
7216 mutex_unlock(&inode->i_mutex);
7217 iput(inode);
7218 inode = NULL;
7219 continue;
7220 }
7221
7222 if (key.type != BTRFS_EXTENT_DATA_KEY) {
7223 path->slots[0]++;
7224 ret = 1;
7225 goto next;
7226 }
7227 fi = btrfs_item_ptr(leaf, path->slots[0],
7228 struct btrfs_file_extent_item);
7229 extent_type = btrfs_file_extent_type(leaf, fi);
7230 if ((extent_type != BTRFS_FILE_EXTENT_REG &&
7231 extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
7232 (btrfs_file_extent_disk_bytenr(leaf, fi) !=
7233 extent_key->objectid)) {
7234 path->slots[0]++;
7235 ret = 1;
7236 goto next;
7237 }
7238
7239 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7240 ext_offset = btrfs_file_extent_offset(leaf, fi);
7241
7242 if (search_end == (u64)-1) {
7243 search_end = key.offset - ext_offset +
7244 btrfs_file_extent_ram_bytes(leaf, fi);
7245 }
7246
7247 if (!extent_locked) {
7248 lock_start = key.offset;
7249 lock_end = lock_start + num_bytes - 1;
7250 } else {
7251 if (lock_start > key.offset ||
7252 lock_end + 1 < key.offset + num_bytes) {
7253 unlock_extent(&BTRFS_I(inode)->io_tree,
7254 lock_start, lock_end, GFP_NOFS);
7255 extent_locked = 0;
7256 }
7257 }
7258
7259 if (!inode) {
7260 btrfs_release_path(root, path);
7261
7262 inode = btrfs_iget_locked(root->fs_info->sb,
7263 key.objectid, root);
7264 if (inode->i_state & I_NEW) {
7265 BTRFS_I(inode)->root = root;
7266 BTRFS_I(inode)->location.objectid =
7267 key.objectid;
7268 BTRFS_I(inode)->location.type =
7269 BTRFS_INODE_ITEM_KEY;
7270 BTRFS_I(inode)->location.offset = 0;
7271 btrfs_read_locked_inode(inode);
7272 unlock_new_inode(inode);
7273 }
7274 /*
7275 * some code call btrfs_commit_transaction while
7276 * holding the i_mutex, so we can't use mutex_lock
7277 * here.
7278 */
7279 if (is_bad_inode(inode) ||
7280 !mutex_trylock(&inode->i_mutex)) {
7281 iput(inode);
7282 inode = NULL;
7283 key.offset = (u64)-1;
7284 goto skip;
7285 }
7286 }
7287
7288 if (!extent_locked) {
7289 struct btrfs_ordered_extent *ordered;
7290
7291 btrfs_release_path(root, path);
7292
7293 lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7294 lock_end, GFP_NOFS);
7295 ordered = btrfs_lookup_first_ordered_extent(inode,
7296 lock_end);
7297 if (ordered &&
7298 ordered->file_offset <= lock_end &&
7299 ordered->file_offset + ordered->len > lock_start) {
7300 unlock_extent(&BTRFS_I(inode)->io_tree,
7301 lock_start, lock_end, GFP_NOFS);
7302 btrfs_start_ordered_extent(inode, ordered, 1);
7303 btrfs_put_ordered_extent(ordered);
7304 key.offset += num_bytes;
7305 goto skip;
7306 }
7307 if (ordered)
7308 btrfs_put_ordered_extent(ordered);
7309
7310 extent_locked = 1;
7311 continue;
7312 }
7313
7314 if (nr_extents == 1) {
7315 /* update extent pointer in place */
7316 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7317 new_extents[0].disk_bytenr);
7318 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7319 new_extents[0].disk_num_bytes);
7320 btrfs_mark_buffer_dirty(leaf);
7321
7322 btrfs_drop_extent_cache(inode, key.offset,
7323 key.offset + num_bytes - 1, 0);
7324
7325 ret = btrfs_inc_extent_ref(trans, root,
7326 new_extents[0].disk_bytenr,
7327 new_extents[0].disk_num_bytes,
7328 leaf->start,
7329 root->root_key.objectid,
7330 trans->transid,
7331 key.objectid);
7332 BUG_ON(ret);
7333
7334 ret = btrfs_free_extent(trans, root,
7335 extent_key->objectid,
7336 extent_key->offset,
7337 leaf->start,
7338 btrfs_header_owner(leaf),
7339 btrfs_header_generation(leaf),
7340 key.objectid, 0);
7341 BUG_ON(ret);
7342
7343 btrfs_release_path(root, path);
7344 key.offset += num_bytes;
7345 } else {
7346 BUG_ON(1);
7347#if 0
7348 u64 alloc_hint;
7349 u64 extent_len;
7350 int i;
7351 /*
7352 * drop old extent pointer at first, then insert the
7353 * new pointers one bye one
7354 */
7355 btrfs_release_path(root, path);
7356 ret = btrfs_drop_extents(trans, root, inode, key.offset,
7357 key.offset + num_bytes,
7358 key.offset, &alloc_hint);
7359 BUG_ON(ret);
7360
7361 for (i = 0; i < nr_extents; i++) {
7362 if (ext_offset >= new_extents[i].num_bytes) {
7363 ext_offset -= new_extents[i].num_bytes;
7364 continue;
7365 }
7366 extent_len = min(new_extents[i].num_bytes -
7367 ext_offset, num_bytes);
7368
7369 ret = btrfs_insert_empty_item(trans, root,
7370 path, &key,
7371 sizeof(*fi));
7372 BUG_ON(ret);
7373
7374 leaf = path->nodes[0];
7375 fi = btrfs_item_ptr(leaf, path->slots[0],
7376 struct btrfs_file_extent_item);
7377 btrfs_set_file_extent_generation(leaf, fi,
7378 trans->transid);
7379 btrfs_set_file_extent_type(leaf, fi,
7380 BTRFS_FILE_EXTENT_REG);
7381 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7382 new_extents[i].disk_bytenr);
7383 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7384 new_extents[i].disk_num_bytes);
7385 btrfs_set_file_extent_ram_bytes(leaf, fi,
7386 new_extents[i].ram_bytes);
7387
7388 btrfs_set_file_extent_compression(leaf, fi,
7389 new_extents[i].compression);
7390 btrfs_set_file_extent_encryption(leaf, fi,
7391 new_extents[i].encryption);
7392 btrfs_set_file_extent_other_encoding(leaf, fi,
7393 new_extents[i].other_encoding);
7394
7395 btrfs_set_file_extent_num_bytes(leaf, fi,
7396 extent_len);
7397 ext_offset += new_extents[i].offset;
7398 btrfs_set_file_extent_offset(leaf, fi,
7399 ext_offset);
7400 btrfs_mark_buffer_dirty(leaf);
7401
7402 btrfs_drop_extent_cache(inode, key.offset,
7403 key.offset + extent_len - 1, 0);
7404
7405 ret = btrfs_inc_extent_ref(trans, root,
7406 new_extents[i].disk_bytenr,
7407 new_extents[i].disk_num_bytes,
7408 leaf->start,
7409 root->root_key.objectid,
7410 trans->transid, key.objectid);
7411 BUG_ON(ret);
7412 btrfs_release_path(root, path);
7413
7414 inode_add_bytes(inode, extent_len);
7415
7416 ext_offset = 0;
7417 num_bytes -= extent_len;
7418 key.offset += extent_len;
7419
7420 if (num_bytes == 0)
7421 break;
7422 }
7423 BUG_ON(i >= nr_extents);
7424#endif
7425 }
7426
7427 if (extent_locked) {
7428 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7429 lock_end, GFP_NOFS);
7430 extent_locked = 0;
7431 }
7432skip:
7433 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
7434 key.offset >= search_end)
7435 break;
7436
7437 cond_resched();
7438 }
7439 ret = 0;
7440out:
7441 btrfs_release_path(root, path);
7442 if (inode) {
7443 mutex_unlock(&inode->i_mutex);
7444 if (extent_locked) {
7445 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7446 lock_end, GFP_NOFS);
7447 }
7448 iput(inode);
7449 }
7450 return ret;
7451}
7452
7453int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
7454 struct btrfs_root *root,
7455 struct extent_buffer *buf, u64 orig_start)
7456{
7457 int level;
7458 int ret;
7459
7460 BUG_ON(btrfs_header_generation(buf) != trans->transid);
7461 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
7462
7463 level = btrfs_header_level(buf);
7464 if (level == 0) {
7465 struct btrfs_leaf_ref *ref;
7466 struct btrfs_leaf_ref *orig_ref;
7467
7468 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
7469 if (!orig_ref)
7470 return -ENOENT;
7471
7472 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
7473 if (!ref) {
7474 btrfs_free_leaf_ref(root, orig_ref);
7475 return -ENOMEM;
7476 }
7477
7478 ref->nritems = orig_ref->nritems;
7479 memcpy(ref->extents, orig_ref->extents,
7480 sizeof(ref->extents[0]) * ref->nritems);
7481
7482 btrfs_free_leaf_ref(root, orig_ref);
7483
7484 ref->root_gen = trans->transid;
7485 ref->bytenr = buf->start;
7486 ref->owner = btrfs_header_owner(buf);
7487 ref->generation = btrfs_header_generation(buf);
7488
7489 ret = btrfs_add_leaf_ref(root, ref, 0);
7490 WARN_ON(ret);
7491 btrfs_free_leaf_ref(root, ref);
7492 }
7493 return 0;
7494}
7495
7496static noinline int invalidate_extent_cache(struct btrfs_root *root,
7497 struct extent_buffer *leaf,
7498 struct btrfs_block_group_cache *group,
7499 struct btrfs_root *target_root)
7500{
7501 struct btrfs_key key;
7502 struct inode *inode = NULL;
7503 struct btrfs_file_extent_item *fi;
7504 struct extent_state *cached_state = NULL;
7505 u64 num_bytes;
7506 u64 skip_objectid = 0;
7507 u32 nritems;
7508 u32 i;
7509
7510 nritems = btrfs_header_nritems(leaf);
7511 for (i = 0; i < nritems; i++) {
7512 btrfs_item_key_to_cpu(leaf, &key, i);
7513 if (key.objectid == skip_objectid ||
7514 key.type != BTRFS_EXTENT_DATA_KEY)
7515 continue;
7516 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
7517 if (btrfs_file_extent_type(leaf, fi) ==
7518 BTRFS_FILE_EXTENT_INLINE)
7519 continue;
7520 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
7521 continue;
7522 if (!inode || inode->i_ino != key.objectid) {
7523 iput(inode);
7524 inode = btrfs_ilookup(target_root->fs_info->sb,
7525 key.objectid, target_root, 1);
7526 }
7527 if (!inode) {
7528 skip_objectid = key.objectid;
7529 continue;
7530 }
7531 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7532
7533 lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset,
7534 key.offset + num_bytes - 1, 0, &cached_state,
7535 GFP_NOFS);
7536 btrfs_drop_extent_cache(inode, key.offset,
7537 key.offset + num_bytes - 1, 1);
7538 unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset,
7539 key.offset + num_bytes - 1, &cached_state,
7540 GFP_NOFS);
7541 cond_resched();
7542 }
7543 iput(inode);
7544 return 0;
7545}
7546
7547static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
7548 struct btrfs_root *root,
7549 struct extent_buffer *leaf,
7550 struct btrfs_block_group_cache *group,
7551 struct inode *reloc_inode)
7552{
7553 struct btrfs_key key;
7554 struct btrfs_key extent_key;
7555 struct btrfs_file_extent_item *fi;
7556 struct btrfs_leaf_ref *ref;
7557 struct disk_extent *new_extent;
7558 u64 bytenr;
7559 u64 num_bytes;
7560 u32 nritems;
7561 u32 i;
7562 int ext_index;
7563 int nr_extent;
7564 int ret;
7565
7566 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
7567 if (!new_extent)
7568 return -ENOMEM;
7569
7570 ref = btrfs_lookup_leaf_ref(root, leaf->start);
7571 BUG_ON(!ref);
7572
7573 ext_index = -1;
7574 nritems = btrfs_header_nritems(leaf);
7575 for (i = 0; i < nritems; i++) {
7576 btrfs_item_key_to_cpu(leaf, &key, i);
7577 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
7578 continue;
7579 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
7580 if (btrfs_file_extent_type(leaf, fi) ==
7581 BTRFS_FILE_EXTENT_INLINE)
7582 continue;
7583 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7584 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7585 if (bytenr == 0)
7586 continue;
7587
7588 ext_index++;
7589 if (bytenr >= group->key.objectid + group->key.offset ||
7590 bytenr + num_bytes <= group->key.objectid)
7591 continue;
7592
7593 extent_key.objectid = bytenr;
7594 extent_key.offset = num_bytes;
7595 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
7596 nr_extent = 1;
7597 ret = get_new_locations(reloc_inode, &extent_key,
7598 group->key.objectid, 1,
7599 &new_extent, &nr_extent);
7600 if (ret > 0)
7601 continue;
7602 BUG_ON(ret < 0);
7603
7604 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
7605 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
7606 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
7607 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
7608
7609 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7610 new_extent->disk_bytenr);
7611 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7612 new_extent->disk_num_bytes);
7613 btrfs_mark_buffer_dirty(leaf);
7614
7615 ret = btrfs_inc_extent_ref(trans, root,
7616 new_extent->disk_bytenr,
7617 new_extent->disk_num_bytes,
7618 leaf->start,
7619 root->root_key.objectid,
7620 trans->transid, key.objectid);
7621 BUG_ON(ret);
7622
7623 ret = btrfs_free_extent(trans, root,
7624 bytenr, num_bytes, leaf->start,
7625 btrfs_header_owner(leaf),
7626 btrfs_header_generation(leaf),
7627 key.objectid, 0);
7628 BUG_ON(ret);
7629 cond_resched();
7630 }
7631 kfree(new_extent);
7632 BUG_ON(ext_index + 1 != ref->nritems);
7633 btrfs_free_leaf_ref(root, ref);
7634 return 0;
7635}
7636
7637int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
7638 struct btrfs_root *root)
7639{
7640 struct btrfs_root *reloc_root;
7641 int ret;
7642
7643 if (root->reloc_root) {
7644 reloc_root = root->reloc_root;
7645 root->reloc_root = NULL;
7646 list_add(&reloc_root->dead_list,
7647 &root->fs_info->dead_reloc_roots);
7648
7649 btrfs_set_root_bytenr(&reloc_root->root_item,
7650 reloc_root->node->start);
7651 btrfs_set_root_level(&root->root_item,
7652 btrfs_header_level(reloc_root->node));
7653 memset(&reloc_root->root_item.drop_progress, 0,
7654 sizeof(struct btrfs_disk_key));
7655 reloc_root->root_item.drop_level = 0;
7656
7657 ret = btrfs_update_root(trans, root->fs_info->tree_root,
7658 &reloc_root->root_key,
7659 &reloc_root->root_item);
7660 BUG_ON(ret);
7661 }
7662 return 0;
7663}
7664
7665int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
7666{
7667 struct btrfs_trans_handle *trans;
7668 struct btrfs_root *reloc_root;
7669 struct btrfs_root *prev_root = NULL;
7670 struct list_head dead_roots;
7671 int ret;
7672 unsigned long nr;
7673
7674 INIT_LIST_HEAD(&dead_roots);
7675 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
7676
7677 while (!list_empty(&dead_roots)) {
7678 reloc_root = list_entry(dead_roots.prev,
7679 struct btrfs_root, dead_list);
7680 list_del_init(&reloc_root->dead_list);
7681
7682 BUG_ON(reloc_root->commit_root != NULL);
7683 while (1) {
7684 trans = btrfs_join_transaction(root);
7685 BUG_ON(IS_ERR(trans));
7686
7687 mutex_lock(&root->fs_info->drop_mutex);
7688 ret = btrfs_drop_snapshot(trans, reloc_root);
7689 if (ret != -EAGAIN)
7690 break;
7691 mutex_unlock(&root->fs_info->drop_mutex);
7692
7693 nr = trans->blocks_used;
7694 ret = btrfs_end_transaction(trans, root);
7695 BUG_ON(ret);
7696 btrfs_btree_balance_dirty(root, nr);
7697 }
7698
7699 free_extent_buffer(reloc_root->node);
7700
7701 ret = btrfs_del_root(trans, root->fs_info->tree_root,
7702 &reloc_root->root_key);
7703 BUG_ON(ret);
7704 mutex_unlock(&root->fs_info->drop_mutex);
7705
7706 nr = trans->blocks_used;
7707 ret = btrfs_end_transaction(trans, root);
7708 BUG_ON(ret);
7709 btrfs_btree_balance_dirty(root, nr);
7710
7711 kfree(prev_root);
7712 prev_root = reloc_root;
7713 }
7714 if (prev_root) {
7715 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
7716 kfree(prev_root);
7717 }
7718 return 0;
7719}
7720
7721int btrfs_add_dead_reloc_root(struct btrfs_root *root)
7722{
7723 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
7724 return 0;
7725}
7726
7727int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
7728{
7729 struct btrfs_root *reloc_root;
7730 struct btrfs_trans_handle *trans;
7731 struct btrfs_key location;
7732 int found;
7733 int ret;
7734
7735 mutex_lock(&root->fs_info->tree_reloc_mutex);
7736 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
7737 BUG_ON(ret);
7738 found = !list_empty(&root->fs_info->dead_reloc_roots);
7739 mutex_unlock(&root->fs_info->tree_reloc_mutex);
7740
7741 if (found) {
7742 trans = btrfs_start_transaction(root, 1);
7743 BUG_ON(IS_ERR(trans));
7744 ret = btrfs_commit_transaction(trans, root);
7745 BUG_ON(ret);
7746 }
7747
7748 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
7749 location.offset = (u64)-1;
7750 location.type = BTRFS_ROOT_ITEM_KEY;
7751
7752 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
7753 BUG_ON(!reloc_root);
7754 ret = btrfs_orphan_cleanup(reloc_root);
7755 BUG_ON(ret);
7756 return 0;
7757}
7758
7759static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
7760 struct btrfs_root *root)
7761{
7762 struct btrfs_root *reloc_root;
7763 struct extent_buffer *eb;
7764 struct btrfs_root_item *root_item;
7765 struct btrfs_key root_key;
7766 int ret;
7767
7768 BUG_ON(!root->ref_cows);
7769 if (root->reloc_root)
7770 return 0;
7771
7772 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
7773 if (!root_item)
7774 return -ENOMEM;
7775
7776 ret = btrfs_copy_root(trans, root, root->commit_root,
7777 &eb, BTRFS_TREE_RELOC_OBJECTID);
7778 BUG_ON(ret);
7779
7780 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
7781 root_key.offset = root->root_key.objectid;
7782 root_key.type = BTRFS_ROOT_ITEM_KEY;
7783
7784 memcpy(root_item, &root->root_item, sizeof(root_item));
7785 btrfs_set_root_refs(root_item, 0);
7786 btrfs_set_root_bytenr(root_item, eb->start);
7787 btrfs_set_root_level(root_item, btrfs_header_level(eb));
7788 btrfs_set_root_generation(root_item, trans->transid);
7789
7790 btrfs_tree_unlock(eb);
7791 free_extent_buffer(eb);
7792
7793 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
7794 &root_key, root_item);
7795 BUG_ON(ret);
7796 kfree(root_item);
7797
7798 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
7799 &root_key);
7800 BUG_ON(IS_ERR(reloc_root));
7801 reloc_root->last_trans = trans->transid;
7802 reloc_root->commit_root = NULL;
7803 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
7804
7805 root->reloc_root = reloc_root;
7806 return 0;
7807}
7808
7809/*
7810 * Core function of space balance.
7811 *
7812 * The idea is using reloc trees to relocate tree blocks in reference
7813 * counted roots. There is one reloc tree for each subvol, and all
7814 * reloc trees share same root key objectid. Reloc trees are snapshots
7815 * of the latest committed roots of subvols (root->commit_root).
7816 *
7817 * To relocate a tree block referenced by a subvol, there are two steps.
7818 * COW the block through subvol's reloc tree, then update block pointer
7819 * in the subvol to point to the new block. Since all reloc trees share
7820 * same root key objectid, doing special handing for tree blocks owned
7821 * by them is easy. Once a tree block has been COWed in one reloc tree,
7822 * we can use the resulting new block directly when the same block is
7823 * required to COW again through other reloc trees. By this way, relocated
7824 * tree blocks are shared between reloc trees, so they are also shared
7825 * between subvols.
7826 */
7827static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
7828 struct btrfs_root *root,
7829 struct btrfs_path *path,
7830 struct btrfs_key *first_key,
7831 struct btrfs_ref_path *ref_path,
7832 struct btrfs_block_group_cache *group,
7833 struct inode *reloc_inode)
7834{
7835 struct btrfs_root *reloc_root;
7836 struct extent_buffer *eb = NULL;
7837 struct btrfs_key *keys;
7838 u64 *nodes;
7839 int level;
7840 int shared_level;
7841 int lowest_level = 0;
7842 int ret;
7843
7844 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
7845 lowest_level = ref_path->owner_objectid;
7846
7847 if (!root->ref_cows) {
7848 path->lowest_level = lowest_level;
7849 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
7850 BUG_ON(ret < 0);
7851 path->lowest_level = 0;
7852 btrfs_release_path(root, path);
7853 return 0;
7854 }
7855
7856 mutex_lock(&root->fs_info->tree_reloc_mutex);
7857 ret = init_reloc_tree(trans, root);
7858 BUG_ON(ret);
7859 reloc_root = root->reloc_root;
7860
7861 shared_level = ref_path->shared_level;
7862 ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
7863
7864 keys = ref_path->node_keys;
7865 nodes = ref_path->new_nodes;
7866 memset(&keys[shared_level + 1], 0,
7867 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
7868 memset(&nodes[shared_level + 1], 0,
7869 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
7870
7871 if (nodes[lowest_level] == 0) {
7872 path->lowest_level = lowest_level;
7873 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7874 0, 1);
7875 BUG_ON(ret);
7876 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
7877 eb = path->nodes[level];
7878 if (!eb || eb == reloc_root->node)
7879 break;
7880 nodes[level] = eb->start;
7881 if (level == 0)
7882 btrfs_item_key_to_cpu(eb, &keys[level], 0);
7883 else
7884 btrfs_node_key_to_cpu(eb, &keys[level], 0);
7885 }
7886 if (nodes[0] &&
7887 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7888 eb = path->nodes[0];
7889 ret = replace_extents_in_leaf(trans, reloc_root, eb,
7890 group, reloc_inode);
7891 BUG_ON(ret);
7892 }
7893 btrfs_release_path(reloc_root, path);
7894 } else {
7895 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
7896 lowest_level);
7897 BUG_ON(ret);
7898 }
7899
7900 /*
7901 * replace tree blocks in the fs tree with tree blocks in
7902 * the reloc tree.
7903 */
7904 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
7905 BUG_ON(ret < 0);
7906
7907 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7908 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7909 0, 0);
7910 BUG_ON(ret);
7911 extent_buffer_get(path->nodes[0]);
7912 eb = path->nodes[0];
7913 btrfs_release_path(reloc_root, path);
7914 ret = invalidate_extent_cache(reloc_root, eb, group, root);
7915 BUG_ON(ret);
7916 free_extent_buffer(eb);
7917 }
7918
7919 mutex_unlock(&root->fs_info->tree_reloc_mutex);
7920 path->lowest_level = 0;
7921 return 0;
7922}
7923
7924static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
7925 struct btrfs_root *root,
7926 struct btrfs_path *path,
7927 struct btrfs_key *first_key,
7928 struct btrfs_ref_path *ref_path)
7929{
7930 int ret;
7931
7932 ret = relocate_one_path(trans, root, path, first_key,
7933 ref_path, NULL, NULL);
7934 BUG_ON(ret);
7935
7936 return 0;
7937}
7938
7939static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
7940 struct btrfs_root *extent_root,
7941 struct btrfs_path *path,
7942 struct btrfs_key *extent_key)
7943{
7944 int ret;
7945
7946 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
7947 if (ret)
7948 goto out;
7949 ret = btrfs_del_item(trans, extent_root, path);
7950out:
7951 btrfs_release_path(extent_root, path);
7952 return ret;
7953}
7954
7955static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
7956 struct btrfs_ref_path *ref_path)
7957{
7958 struct btrfs_key root_key;
7959
7960 root_key.objectid = ref_path->root_objectid;
7961 root_key.type = BTRFS_ROOT_ITEM_KEY;
7962 if (is_cowonly_root(ref_path->root_objectid))
7963 root_key.offset = 0;
7964 else
7965 root_key.offset = (u64)-1;
7966
7967 return btrfs_read_fs_root_no_name(fs_info, &root_key);
7968}
7969
7970static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7971 struct btrfs_path *path,
7972 struct btrfs_key *extent_key,
7973 struct btrfs_block_group_cache *group,
7974 struct inode *reloc_inode, int pass)
7975{
7976 struct btrfs_trans_handle *trans;
7977 struct btrfs_root *found_root;
7978 struct btrfs_ref_path *ref_path = NULL;
7979 struct disk_extent *new_extents = NULL;
7980 int nr_extents = 0;
7981 int loops;
7982 int ret;
7983 int level;
7984 struct btrfs_key first_key;
7985 u64 prev_block = 0;
7986
7987
7988 trans = btrfs_start_transaction(extent_root, 1);
7989 BUG_ON(IS_ERR(trans));
7990
7991 if (extent_key->objectid == 0) {
7992 ret = del_extent_zero(trans, extent_root, path, extent_key);
7993 goto out;
7994 }
7995
7996 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
7997 if (!ref_path) {
7998 ret = -ENOMEM;
7999 goto out;
8000 }
8001
8002 for (loops = 0; ; loops++) {
8003 if (loops == 0) {
8004 ret = btrfs_first_ref_path(trans, extent_root, ref_path,
8005 extent_key->objectid);
8006 } else {
8007 ret = btrfs_next_ref_path(trans, extent_root, ref_path);
8008 }
8009 if (ret < 0)
8010 goto out;
8011 if (ret > 0)
8012 break;
8013
8014 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
8015 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
8016 continue;
8017
8018 found_root = read_ref_root(extent_root->fs_info, ref_path);
8019 BUG_ON(!found_root);
8020 /*
8021 * for reference counted tree, only process reference paths
8022 * rooted at the latest committed root.
8023 */
8024 if (found_root->ref_cows &&
8025 ref_path->root_generation != found_root->root_key.offset)
8026 continue;
8027
8028 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
8029 if (pass == 0) {
8030 /*
8031 * copy data extents to new locations
8032 */
8033 u64 group_start = group->key.objectid;
8034 ret = relocate_data_extent(reloc_inode,
8035 extent_key,
8036 group_start);
8037 if (ret < 0)
8038 goto out;
8039 break;
8040 }
8041 level = 0;
8042 } else {
8043 level = ref_path->owner_objectid;
8044 }
8045
8046 if (prev_block != ref_path->nodes[level]) {
8047 struct extent_buffer *eb;
8048 u64 block_start = ref_path->nodes[level];
8049 u64 block_size = btrfs_level_size(found_root, level);
8050
8051 eb = read_tree_block(found_root, block_start,
8052 block_size, 0);
8053 if (!eb) {
8054 ret = -EIO;
8055 goto out;
8056 }
8057 btrfs_tree_lock(eb);
8058 BUG_ON(level != btrfs_header_level(eb));
8059
8060 if (level == 0)
8061 btrfs_item_key_to_cpu(eb, &first_key, 0);
8062 else
8063 btrfs_node_key_to_cpu(eb, &first_key, 0);
8064
8065 btrfs_tree_unlock(eb);
8066 free_extent_buffer(eb);
8067 prev_block = block_start;
8068 }
8069
8070 mutex_lock(&extent_root->fs_info->trans_mutex);
8071 btrfs_record_root_in_trans(found_root);
8072 mutex_unlock(&extent_root->fs_info->trans_mutex);
8073 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
8074 /*
8075 * try to update data extent references while
8076 * keeping metadata shared between snapshots.
8077 */
8078 if (pass == 1) {
8079 ret = relocate_one_path(trans, found_root,
8080 path, &first_key, ref_path,
8081 group, reloc_inode);
8082 if (ret < 0)
8083 goto out;
8084 continue;
8085 }
8086 /*
8087 * use fallback method to process the remaining
8088 * references.
8089 */
8090 if (!new_extents) {
8091 u64 group_start = group->key.objectid;
8092 new_extents = kmalloc(sizeof(*new_extents),
8093 GFP_NOFS);
8094 if (!new_extents) {
8095 ret = -ENOMEM;
8096 goto out;
8097 }
8098 nr_extents = 1;
8099 ret = get_new_locations(reloc_inode,
8100 extent_key,
8101 group_start, 1,
8102 &new_extents,
8103 &nr_extents);
8104 if (ret)
8105 goto out;
8106 }
8107 ret = replace_one_extent(trans, found_root,
8108 path, extent_key,
8109 &first_key, ref_path,
8110 new_extents, nr_extents);
8111 } else {
8112 ret = relocate_tree_block(trans, found_root, path,
8113 &first_key, ref_path);
8114 }
8115 if (ret < 0)
8116 goto out;
8117 }
8118 ret = 0;
8119out:
8120 btrfs_end_transaction(trans, extent_root);
8121 kfree(new_extents);
8122 kfree(ref_path);
8123 return ret;
8124}
8125#endif
8126
8127static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6478static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8128{ 6479{
8129 u64 num_devices; 6480 u64 num_devices;
@@ -8588,10 +6939,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8588 ret = -ENOMEM; 6939 ret = -ENOMEM;
8589 goto error; 6940 goto error;
8590 } 6941 }
6942 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
6943 GFP_NOFS);
6944 if (!cache->free_space_ctl) {
6945 kfree(cache);
6946 ret = -ENOMEM;
6947 goto error;
6948 }
8591 6949
8592 atomic_set(&cache->count, 1); 6950 atomic_set(&cache->count, 1);
8593 spin_lock_init(&cache->lock); 6951 spin_lock_init(&cache->lock);
8594 spin_lock_init(&cache->tree_lock);
8595 cache->fs_info = info; 6952 cache->fs_info = info;
8596 INIT_LIST_HEAD(&cache->list); 6953 INIT_LIST_HEAD(&cache->list);
8597 INIT_LIST_HEAD(&cache->cluster_list); 6954 INIT_LIST_HEAD(&cache->cluster_list);
@@ -8599,24 +6956,18 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8599 if (need_clear) 6956 if (need_clear)
8600 cache->disk_cache_state = BTRFS_DC_CLEAR; 6957 cache->disk_cache_state = BTRFS_DC_CLEAR;
8601 6958
8602 /*
8603 * we only want to have 32k of ram per block group for keeping
8604 * track of free space, and if we pass 1/2 of that we want to
8605 * start converting things over to using bitmaps
8606 */
8607 cache->extents_thresh = ((1024 * 32) / 2) /
8608 sizeof(struct btrfs_free_space);
8609
8610 read_extent_buffer(leaf, &cache->item, 6959 read_extent_buffer(leaf, &cache->item,
8611 btrfs_item_ptr_offset(leaf, path->slots[0]), 6960 btrfs_item_ptr_offset(leaf, path->slots[0]),
8612 sizeof(cache->item)); 6961 sizeof(cache->item));
8613 memcpy(&cache->key, &found_key, sizeof(found_key)); 6962 memcpy(&cache->key, &found_key, sizeof(found_key));
8614 6963
8615 key.objectid = found_key.objectid + found_key.offset; 6964 key.objectid = found_key.objectid + found_key.offset;
8616 btrfs_release_path(root, path); 6965 btrfs_release_path(path);
8617 cache->flags = btrfs_block_group_flags(&cache->item); 6966 cache->flags = btrfs_block_group_flags(&cache->item);
8618 cache->sectorsize = root->sectorsize; 6967 cache->sectorsize = root->sectorsize;
8619 6968
6969 btrfs_init_free_space_ctl(cache);
6970
8620 /* 6971 /*
8621 * We need to exclude the super stripes now so that the space 6972 * We need to exclude the super stripes now so that the space
8622 * info has super bytes accounted for, otherwise we'll think 6973 * info has super bytes accounted for, otherwise we'll think
@@ -8703,6 +7054,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8703 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7054 cache = kzalloc(sizeof(*cache), GFP_NOFS);
8704 if (!cache) 7055 if (!cache)
8705 return -ENOMEM; 7056 return -ENOMEM;
7057 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
7058 GFP_NOFS);
7059 if (!cache->free_space_ctl) {
7060 kfree(cache);
7061 return -ENOMEM;
7062 }
8706 7063
8707 cache->key.objectid = chunk_offset; 7064 cache->key.objectid = chunk_offset;
8708 cache->key.offset = size; 7065 cache->key.offset = size;
@@ -8710,19 +7067,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8710 cache->sectorsize = root->sectorsize; 7067 cache->sectorsize = root->sectorsize;
8711 cache->fs_info = root->fs_info; 7068 cache->fs_info = root->fs_info;
8712 7069
8713 /*
8714 * we only want to have 32k of ram per block group for keeping track
8715 * of free space, and if we pass 1/2 of that we want to start
8716 * converting things over to using bitmaps
8717 */
8718 cache->extents_thresh = ((1024 * 32) / 2) /
8719 sizeof(struct btrfs_free_space);
8720 atomic_set(&cache->count, 1); 7070 atomic_set(&cache->count, 1);
8721 spin_lock_init(&cache->lock); 7071 spin_lock_init(&cache->lock);
8722 spin_lock_init(&cache->tree_lock);
8723 INIT_LIST_HEAD(&cache->list); 7072 INIT_LIST_HEAD(&cache->list);
8724 INIT_LIST_HEAD(&cache->cluster_list); 7073 INIT_LIST_HEAD(&cache->cluster_list);
8725 7074
7075 btrfs_init_free_space_ctl(cache);
7076
8726 btrfs_set_block_group_used(&cache->item, bytes_used); 7077 btrfs_set_block_group_used(&cache->item, bytes_used);
8727 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7078 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
8728 cache->flags = type; 7079 cache->flags = type;
@@ -8835,12 +7186,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8835 if (ret < 0) 7186 if (ret < 0)
8836 goto out; 7187 goto out;
8837 if (ret > 0) 7188 if (ret > 0)
8838 btrfs_release_path(tree_root, path); 7189 btrfs_release_path(path);
8839 if (ret == 0) { 7190 if (ret == 0) {
8840 ret = btrfs_del_item(trans, tree_root, path); 7191 ret = btrfs_del_item(trans, tree_root, path);
8841 if (ret) 7192 if (ret)
8842 goto out; 7193 goto out;
8843 btrfs_release_path(tree_root, path); 7194 btrfs_release_path(path);
8844 } 7195 }
8845 7196
8846 spin_lock(&root->fs_info->block_group_cache_lock); 7197 spin_lock(&root->fs_info->block_group_cache_lock);