diff options
author | Zheng Yan <zheng.yan@oracle.com> | 2008-09-26 10:09:34 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-26 10:09:34 -0400 |
commit | 1a40e23b95da45051ee4d74374c58ae87a14051c (patch) | |
tree | 77faffd3f9d3a26c22e6cf03b83762c95d687596 /fs/btrfs/extent-tree.c | |
parent | 5b21f2ed3f2947b5195b65c9fdbdd9e52904cc03 (diff) |
Btrfs: update space balancing code
This patch updates the space balancing code to utilize the new
backref format. Before, btrfs-vol -b would break any COW links
on data blocks or metadata. This was slow and caused the amount
of space used to explode if a large number of snapshots were present.
The new code can keeps the sharing of all data extents and
most of the tree blocks.
To maintain the sharing of data extents, the space balance code uses
a seperate inode hold data extent pointers, then updates the references
to point to the new location.
To maintain the sharing of tree blocks, the space balance code uses
reloc trees to relocate tree blocks in reference counted roots.
There is one reloc tree for each subvol, and all reloc trees share
same root key objectid. Reloc trees are snapshots of the latest
committed roots of subvols (root->commit_root).
To relocate a tree block referenced by a subvol, there are two steps.
COW the block through subvol's reloc tree, then update block pointer in
the subvol to point to the new block. Since all reloc trees share
same root key objectid, doing special handing for tree blocks
owned by them is easy. Once a tree block has been COWed in one
reloc tree, we can use the resulting new block directly when the
same block is required to COW again through other reloc trees.
In this way, relocated tree blocks are shared between reloc trees,
so they are also shared between subvols.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 2034 |
1 files changed, 1618 insertions, 416 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9ab099bc01a4..8043b9d584a9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1834,6 +1834,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
1834 | u64 header_owner = btrfs_header_owner(buf); | 1834 | u64 header_owner = btrfs_header_owner(buf); |
1835 | u64 header_transid = btrfs_header_generation(buf); | 1835 | u64 header_transid = btrfs_header_generation(buf); |
1836 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && | 1836 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && |
1837 | header_owner != BTRFS_TREE_RELOC_OBJECTID && | ||
1837 | header_transid == trans->transid && | 1838 | header_transid == trans->transid && |
1838 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 1839 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
1839 | clean_tree_block(NULL, root, buf); | 1840 | clean_tree_block(NULL, root, buf); |
@@ -2487,6 +2488,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
2487 | return -ENOSPC; | 2488 | return -ENOSPC; |
2488 | } | 2489 | } |
2489 | btrfs_add_free_space(cache, start, len); | 2490 | btrfs_add_free_space(cache, start, len); |
2491 | update_reserved_extents(root, start, len, 0); | ||
2490 | maybe_unlock_mutex(root); | 2492 | maybe_unlock_mutex(root); |
2491 | return 0; | 2493 | return 0; |
2492 | } | 2494 | } |
@@ -2947,6 +2949,10 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, | |||
2947 | */ | 2949 | */ |
2948 | if (*level == 1) { | 2950 | if (*level == 1) { |
2949 | ref = btrfs_lookup_leaf_ref(root, bytenr); | 2951 | ref = btrfs_lookup_leaf_ref(root, bytenr); |
2952 | if (ref && ref->generation != ptr_gen) { | ||
2953 | btrfs_free_leaf_ref(root, ref); | ||
2954 | ref = NULL; | ||
2955 | } | ||
2950 | if (ref) { | 2956 | if (ref) { |
2951 | ret = cache_drop_leaf_ref(trans, root, ref); | 2957 | ret = cache_drop_leaf_ref(trans, root, ref); |
2952 | BUG_ON(ret); | 2958 | BUG_ON(ret); |
@@ -3153,34 +3159,6 @@ out: | |||
3153 | return ret; | 3159 | return ret; |
3154 | } | 3160 | } |
3155 | 3161 | ||
3156 | int btrfs_free_block_groups(struct btrfs_fs_info *info) | ||
3157 | { | ||
3158 | struct btrfs_block_group_cache *block_group; | ||
3159 | struct rb_node *n; | ||
3160 | |||
3161 | mutex_lock(&info->alloc_mutex); | ||
3162 | spin_lock(&info->block_group_cache_lock); | ||
3163 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { | ||
3164 | block_group = rb_entry(n, struct btrfs_block_group_cache, | ||
3165 | cache_node); | ||
3166 | |||
3167 | spin_unlock(&info->block_group_cache_lock); | ||
3168 | btrfs_remove_free_space_cache(block_group); | ||
3169 | spin_lock(&info->block_group_cache_lock); | ||
3170 | |||
3171 | rb_erase(&block_group->cache_node, | ||
3172 | &info->block_group_cache_tree); | ||
3173 | |||
3174 | spin_lock(&block_group->space_info->lock); | ||
3175 | list_del(&block_group->list); | ||
3176 | spin_unlock(&block_group->space_info->lock); | ||
3177 | kfree(block_group); | ||
3178 | } | ||
3179 | spin_unlock(&info->block_group_cache_lock); | ||
3180 | mutex_unlock(&info->alloc_mutex); | ||
3181 | return 0; | ||
3182 | } | ||
3183 | |||
3184 | static unsigned long calc_ra(unsigned long start, unsigned long last, | 3162 | static unsigned long calc_ra(unsigned long start, unsigned long last, |
3185 | unsigned long nr) | 3163 | unsigned long nr) |
3186 | { | 3164 | { |
@@ -3192,37 +3170,43 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, | |||
3192 | { | 3170 | { |
3193 | u64 page_start; | 3171 | u64 page_start; |
3194 | u64 page_end; | 3172 | u64 page_end; |
3173 | unsigned long first_index; | ||
3195 | unsigned long last_index; | 3174 | unsigned long last_index; |
3196 | unsigned long i; | 3175 | unsigned long i; |
3197 | struct page *page; | 3176 | struct page *page; |
3198 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3177 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
3199 | struct file_ra_state *ra; | 3178 | struct file_ra_state *ra; |
3200 | unsigned long total_read = 0; | ||
3201 | unsigned long ra_pages; | ||
3202 | struct btrfs_ordered_extent *ordered; | 3179 | struct btrfs_ordered_extent *ordered; |
3203 | struct btrfs_trans_handle *trans; | 3180 | unsigned int total_read = 0; |
3181 | unsigned int total_dirty = 0; | ||
3182 | int ret = 0; | ||
3204 | 3183 | ||
3205 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | 3184 | ra = kzalloc(sizeof(*ra), GFP_NOFS); |
3206 | 3185 | ||
3207 | mutex_lock(&inode->i_mutex); | 3186 | mutex_lock(&inode->i_mutex); |
3208 | i = start >> PAGE_CACHE_SHIFT; | 3187 | first_index = start >> PAGE_CACHE_SHIFT; |
3209 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; | 3188 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; |
3210 | 3189 | ||
3211 | ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; | 3190 | /* make sure the dirty trick played by the caller work */ |
3191 | ret = invalidate_inode_pages2_range(inode->i_mapping, | ||
3192 | first_index, last_index); | ||
3193 | if (ret) | ||
3194 | goto out_unlock; | ||
3212 | 3195 | ||
3213 | file_ra_state_init(ra, inode->i_mapping); | 3196 | file_ra_state_init(ra, inode->i_mapping); |
3214 | 3197 | ||
3215 | for (; i <= last_index; i++) { | 3198 | for (i = first_index ; i <= last_index; i++) { |
3216 | if (total_read % ra_pages == 0) { | 3199 | if (total_read % ra->ra_pages == 0) { |
3217 | btrfs_force_ra(inode->i_mapping, ra, NULL, i, | 3200 | btrfs_force_ra(inode->i_mapping, ra, NULL, i, |
3218 | calc_ra(i, last_index, ra_pages)); | 3201 | calc_ra(i, last_index, ra->ra_pages)); |
3219 | } | 3202 | } |
3220 | total_read++; | 3203 | total_read++; |
3221 | again: | 3204 | again: |
3222 | if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) | 3205 | if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) |
3223 | goto truncate_racing; | 3206 | BUG_ON(1); |
3224 | page = grab_cache_page(inode->i_mapping, i); | 3207 | page = grab_cache_page(inode->i_mapping, i); |
3225 | if (!page) { | 3208 | if (!page) { |
3209 | ret = -ENOMEM; | ||
3226 | goto out_unlock; | 3210 | goto out_unlock; |
3227 | } | 3211 | } |
3228 | if (!PageUptodate(page)) { | 3212 | if (!PageUptodate(page)) { |
@@ -3231,6 +3215,7 @@ again: | |||
3231 | if (!PageUptodate(page)) { | 3215 | if (!PageUptodate(page)) { |
3232 | unlock_page(page); | 3216 | unlock_page(page); |
3233 | page_cache_release(page); | 3217 | page_cache_release(page); |
3218 | ret = -EIO; | ||
3234 | goto out_unlock; | 3219 | goto out_unlock; |
3235 | } | 3220 | } |
3236 | } | 3221 | } |
@@ -3251,14 +3236,13 @@ again: | |||
3251 | } | 3236 | } |
3252 | set_page_extent_mapped(page); | 3237 | set_page_extent_mapped(page); |
3253 | 3238 | ||
3254 | /* | ||
3255 | * make sure page_mkwrite is called for this page if userland | ||
3256 | * wants to change it from mmap | ||
3257 | */ | ||
3258 | clear_page_dirty_for_io(page); | ||
3259 | |||
3260 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 3239 | btrfs_set_extent_delalloc(inode, page_start, page_end); |
3240 | if (i == first_index) | ||
3241 | set_extent_bits(io_tree, page_start, page_end, | ||
3242 | EXTENT_BOUNDARY, GFP_NOFS); | ||
3243 | |||
3261 | set_page_dirty(page); | 3244 | set_page_dirty(page); |
3245 | total_dirty++; | ||
3262 | 3246 | ||
3263 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 3247 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
3264 | unlock_page(page); | 3248 | unlock_page(page); |
@@ -3266,347 +3250,1457 @@ again: | |||
3266 | } | 3250 | } |
3267 | 3251 | ||
3268 | out_unlock: | 3252 | out_unlock: |
3269 | /* we have to start the IO in order to get the ordered extents | ||
3270 | * instantiated. This allows the relocation to code to wait | ||
3271 | * for all the ordered extents to hit the disk. | ||
3272 | * | ||
3273 | * Otherwise, it would constantly loop over the same extents | ||
3274 | * because the old ones don't get deleted until the IO is | ||
3275 | * started | ||
3276 | */ | ||
3277 | btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1, | ||
3278 | WB_SYNC_NONE); | ||
3279 | kfree(ra); | 3253 | kfree(ra); |
3280 | trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); | ||
3281 | if (trans) { | ||
3282 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | ||
3283 | mark_inode_dirty(inode); | ||
3284 | } | ||
3285 | mutex_unlock(&inode->i_mutex); | 3254 | mutex_unlock(&inode->i_mutex); |
3286 | return 0; | 3255 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); |
3256 | return ret; | ||
3257 | } | ||
3258 | |||
3259 | static int noinline relocate_data_extent(struct inode *reloc_inode, | ||
3260 | struct btrfs_key *extent_key, | ||
3261 | u64 offset) | ||
3262 | { | ||
3263 | struct btrfs_root *root = BTRFS_I(reloc_inode)->root; | ||
3264 | struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; | ||
3265 | struct extent_map *em; | ||
3266 | |||
3267 | em = alloc_extent_map(GFP_NOFS); | ||
3268 | BUG_ON(!em || IS_ERR(em)); | ||
3269 | |||
3270 | em->start = extent_key->objectid - offset; | ||
3271 | em->len = extent_key->offset; | ||
3272 | em->block_start = extent_key->objectid; | ||
3273 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
3274 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
3275 | |||
3276 | /* setup extent map to cheat btrfs_readpage */ | ||
3277 | mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex); | ||
3278 | while (1) { | ||
3279 | int ret; | ||
3280 | spin_lock(&em_tree->lock); | ||
3281 | ret = add_extent_mapping(em_tree, em); | ||
3282 | spin_unlock(&em_tree->lock); | ||
3283 | if (ret != -EEXIST) { | ||
3284 | free_extent_map(em); | ||
3285 | break; | ||
3286 | } | ||
3287 | btrfs_drop_extent_cache(reloc_inode, em->start, | ||
3288 | em->start + em->len - 1, 0); | ||
3289 | } | ||
3290 | mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex); | ||
3287 | 3291 | ||
3288 | truncate_racing: | 3292 | return relocate_inode_pages(reloc_inode, extent_key->objectid - offset, |
3289 | vmtruncate(inode, inode->i_size); | 3293 | extent_key->offset); |
3290 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
3291 | total_read); | ||
3292 | goto out_unlock; | ||
3293 | } | 3294 | } |
3294 | 3295 | ||
3295 | /* | 3296 | struct btrfs_ref_path { |
3296 | * The back references tell us which tree holds a ref on a block, | 3297 | u64 extent_start; |
3297 | * but it is possible for the tree root field in the reference to | 3298 | u64 nodes[BTRFS_MAX_LEVEL]; |
3298 | * reflect the original root before a snapshot was made. In this | 3299 | u64 root_objectid; |
3299 | * case we should search through all the children of a given root | 3300 | u64 root_generation; |
3300 | * to find potential holders of references on a block. | 3301 | u64 owner_objectid; |
3301 | * | 3302 | u64 owner_offset; |
3302 | * Instead, we do something a little less fancy and just search | 3303 | u32 num_refs; |
3303 | * all the roots for a given key/block combination. | 3304 | int lowest_level; |
3304 | */ | 3305 | int current_level; |
3305 | static int find_root_for_ref(struct btrfs_root *root, | 3306 | }; |
3306 | struct btrfs_path *path, | ||
3307 | struct btrfs_key *key0, | ||
3308 | int level, | ||
3309 | int file_key, | ||
3310 | struct btrfs_root **found_root, | ||
3311 | u64 bytenr) | ||
3312 | { | ||
3313 | struct btrfs_key root_location; | ||
3314 | struct btrfs_root *cur_root = *found_root; | ||
3315 | struct btrfs_file_extent_item *file_extent; | ||
3316 | u64 root_search_start = BTRFS_FS_TREE_OBJECTID; | ||
3317 | u64 found_bytenr; | ||
3318 | int ret; | ||
3319 | 3307 | ||
3320 | root_location.offset = (u64)-1; | 3308 | struct disk_extent { |
3321 | root_location.type = BTRFS_ROOT_ITEM_KEY; | 3309 | u64 disk_bytenr; |
3322 | path->lowest_level = level; | 3310 | u64 disk_num_bytes; |
3323 | path->reada = 0; | 3311 | u64 offset; |
3324 | while(1) { | 3312 | u64 num_bytes; |
3325 | ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0); | 3313 | }; |
3326 | found_bytenr = 0; | 3314 | |
3327 | if (ret == 0 && file_key) { | 3315 | static int is_cowonly_root(u64 root_objectid) |
3328 | struct extent_buffer *leaf = path->nodes[0]; | 3316 | { |
3329 | file_extent = btrfs_item_ptr(leaf, path->slots[0], | 3317 | if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || |
3330 | struct btrfs_file_extent_item); | 3318 | root_objectid == BTRFS_EXTENT_TREE_OBJECTID || |
3331 | if (btrfs_file_extent_type(leaf, file_extent) == | 3319 | root_objectid == BTRFS_CHUNK_TREE_OBJECTID || |
3332 | BTRFS_FILE_EXTENT_REG) { | 3320 | root_objectid == BTRFS_DEV_TREE_OBJECTID || |
3333 | found_bytenr = | 3321 | root_objectid == BTRFS_TREE_LOG_OBJECTID) |
3334 | btrfs_file_extent_disk_bytenr(leaf, | 3322 | return 1; |
3335 | file_extent); | 3323 | return 0; |
3336 | } | 3324 | } |
3337 | } else if (!file_key) { | 3325 | |
3338 | if (path->nodes[level]) | 3326 | static int noinline __next_ref_path(struct btrfs_trans_handle *trans, |
3339 | found_bytenr = path->nodes[level]->start; | 3327 | struct btrfs_root *extent_root, |
3340 | } | 3328 | struct btrfs_ref_path *ref_path, |
3341 | 3329 | int first_time) | |
3342 | btrfs_release_path(cur_root, path); | 3330 | { |
3343 | 3331 | struct extent_buffer *leaf; | |
3344 | if (found_bytenr == bytenr) { | 3332 | struct btrfs_path *path; |
3345 | *found_root = cur_root; | 3333 | struct btrfs_extent_ref *ref; |
3334 | struct btrfs_key key; | ||
3335 | struct btrfs_key found_key; | ||
3336 | u64 bytenr; | ||
3337 | u32 nritems; | ||
3338 | int level; | ||
3339 | int ret = 1; | ||
3340 | |||
3341 | path = btrfs_alloc_path(); | ||
3342 | if (!path) | ||
3343 | return -ENOMEM; | ||
3344 | |||
3345 | mutex_lock(&extent_root->fs_info->alloc_mutex); | ||
3346 | |||
3347 | if (first_time) { | ||
3348 | ref_path->lowest_level = -1; | ||
3349 | ref_path->current_level = -1; | ||
3350 | goto walk_up; | ||
3351 | } | ||
3352 | walk_down: | ||
3353 | level = ref_path->current_level - 1; | ||
3354 | while (level >= -1) { | ||
3355 | u64 parent; | ||
3356 | if (level < ref_path->lowest_level) | ||
3357 | break; | ||
3358 | |||
3359 | if (level >= 0) { | ||
3360 | bytenr = ref_path->nodes[level]; | ||
3361 | } else { | ||
3362 | bytenr = ref_path->extent_start; | ||
3363 | } | ||
3364 | BUG_ON(bytenr == 0); | ||
3365 | |||
3366 | parent = ref_path->nodes[level + 1]; | ||
3367 | ref_path->nodes[level + 1] = 0; | ||
3368 | ref_path->current_level = level; | ||
3369 | BUG_ON(parent == 0); | ||
3370 | |||
3371 | key.objectid = bytenr; | ||
3372 | key.offset = parent + 1; | ||
3373 | key.type = BTRFS_EXTENT_REF_KEY; | ||
3374 | |||
3375 | ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); | ||
3376 | if (ret < 0) | ||
3377 | goto out; | ||
3378 | BUG_ON(ret == 0); | ||
3379 | |||
3380 | leaf = path->nodes[0]; | ||
3381 | nritems = btrfs_header_nritems(leaf); | ||
3382 | if (path->slots[0] >= nritems) { | ||
3383 | ret = btrfs_next_leaf(extent_root, path); | ||
3384 | if (ret < 0) | ||
3385 | goto out; | ||
3386 | if (ret > 0) | ||
3387 | goto next; | ||
3388 | leaf = path->nodes[0]; | ||
3389 | } | ||
3390 | |||
3391 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
3392 | if (found_key.objectid == bytenr && | ||
3393 | found_key.type == BTRFS_EXTENT_REF_KEY) | ||
3394 | goto found; | ||
3395 | next: | ||
3396 | level--; | ||
3397 | btrfs_release_path(extent_root, path); | ||
3398 | if (need_resched()) { | ||
3399 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | ||
3400 | cond_resched(); | ||
3401 | mutex_lock(&extent_root->fs_info->alloc_mutex); | ||
3402 | } | ||
3403 | } | ||
3404 | /* reached lowest level */ | ||
3405 | ret = 1; | ||
3406 | goto out; | ||
3407 | walk_up: | ||
3408 | level = ref_path->current_level; | ||
3409 | while (level < BTRFS_MAX_LEVEL - 1) { | ||
3410 | u64 ref_objectid; | ||
3411 | if (level >= 0) { | ||
3412 | bytenr = ref_path->nodes[level]; | ||
3413 | } else { | ||
3414 | bytenr = ref_path->extent_start; | ||
3415 | } | ||
3416 | BUG_ON(bytenr == 0); | ||
3417 | |||
3418 | key.objectid = bytenr; | ||
3419 | key.offset = 0; | ||
3420 | key.type = BTRFS_EXTENT_REF_KEY; | ||
3421 | |||
3422 | ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); | ||
3423 | if (ret < 0) | ||
3424 | goto out; | ||
3425 | |||
3426 | leaf = path->nodes[0]; | ||
3427 | nritems = btrfs_header_nritems(leaf); | ||
3428 | if (path->slots[0] >= nritems) { | ||
3429 | ret = btrfs_next_leaf(extent_root, path); | ||
3430 | if (ret < 0) | ||
3431 | goto out; | ||
3432 | if (ret > 0) { | ||
3433 | /* the extent was freed by someone */ | ||
3434 | if (ref_path->lowest_level == level) | ||
3435 | goto out; | ||
3436 | btrfs_release_path(extent_root, path); | ||
3437 | goto walk_down; | ||
3438 | } | ||
3439 | leaf = path->nodes[0]; | ||
3440 | } | ||
3441 | |||
3442 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
3443 | if (found_key.objectid != bytenr || | ||
3444 | found_key.type != BTRFS_EXTENT_REF_KEY) { | ||
3445 | /* the extent was freed by someone */ | ||
3446 | if (ref_path->lowest_level == level) { | ||
3447 | ret = 1; | ||
3448 | goto out; | ||
3449 | } | ||
3450 | btrfs_release_path(extent_root, path); | ||
3451 | goto walk_down; | ||
3452 | } | ||
3453 | found: | ||
3454 | ref = btrfs_item_ptr(leaf, path->slots[0], | ||
3455 | struct btrfs_extent_ref); | ||
3456 | ref_objectid = btrfs_ref_objectid(leaf, ref); | ||
3457 | if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { | ||
3458 | if (first_time) { | ||
3459 | level = (int)ref_objectid; | ||
3460 | BUG_ON(level >= BTRFS_MAX_LEVEL); | ||
3461 | ref_path->lowest_level = level; | ||
3462 | ref_path->current_level = level; | ||
3463 | ref_path->nodes[level] = bytenr; | ||
3464 | } else { | ||
3465 | WARN_ON(ref_objectid != level); | ||
3466 | } | ||
3467 | } else { | ||
3468 | WARN_ON(level != -1); | ||
3469 | } | ||
3470 | first_time = 0; | ||
3471 | |||
3472 | if (ref_path->lowest_level == level) { | ||
3473 | ref_path->owner_objectid = ref_objectid; | ||
3474 | ref_path->owner_offset = btrfs_ref_offset(leaf, ref); | ||
3475 | ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); | ||
3476 | } | ||
3477 | |||
3478 | /* | ||
3479 | * the block is tree root or the block isn't in reference | ||
3480 | * counted tree. | ||
3481 | */ | ||
3482 | if (found_key.objectid == found_key.offset || | ||
3483 | is_cowonly_root(btrfs_ref_root(leaf, ref))) { | ||
3484 | ref_path->root_objectid = btrfs_ref_root(leaf, ref); | ||
3485 | ref_path->root_generation = | ||
3486 | btrfs_ref_generation(leaf, ref); | ||
3487 | if (level < 0) { | ||
3488 | /* special reference from the tree log */ | ||
3489 | ref_path->nodes[0] = found_key.offset; | ||
3490 | ref_path->current_level = 0; | ||
3491 | } | ||
3346 | ret = 0; | 3492 | ret = 0; |
3347 | goto out; | 3493 | goto out; |
3348 | } | 3494 | } |
3349 | ret = btrfs_search_root(root->fs_info->tree_root, | ||
3350 | root_search_start, &root_search_start); | ||
3351 | if (ret) | ||
3352 | break; | ||
3353 | 3495 | ||
3354 | root_location.objectid = root_search_start; | 3496 | level++; |
3355 | cur_root = btrfs_read_fs_root_no_name(root->fs_info, | 3497 | BUG_ON(ref_path->nodes[level] != 0); |
3356 | &root_location); | 3498 | ref_path->nodes[level] = found_key.offset; |
3357 | if (!cur_root) { | 3499 | ref_path->current_level = level; |
3358 | ret = 1; | 3500 | |
3359 | break; | 3501 | /* |
3502 | * the reference was created in the running transaction, | ||
3503 | * no need to continue walking up. | ||
3504 | */ | ||
3505 | if (btrfs_ref_generation(leaf, ref) == trans->transid) { | ||
3506 | ref_path->root_objectid = btrfs_ref_root(leaf, ref); | ||
3507 | ref_path->root_generation = | ||
3508 | btrfs_ref_generation(leaf, ref); | ||
3509 | ret = 0; | ||
3510 | goto out; | ||
3511 | } | ||
3512 | |||
3513 | btrfs_release_path(extent_root, path); | ||
3514 | if (need_resched()) { | ||
3515 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | ||
3516 | cond_resched(); | ||
3517 | mutex_lock(&extent_root->fs_info->alloc_mutex); | ||
3360 | } | 3518 | } |
3361 | } | 3519 | } |
3520 | /* reached max tree level, but no tree root found. */ | ||
3521 | BUG(); | ||
3362 | out: | 3522 | out: |
3363 | path->lowest_level = 0; | 3523 | mutex_unlock(&extent_root->fs_info->alloc_mutex); |
3524 | btrfs_free_path(path); | ||
3364 | return ret; | 3525 | return ret; |
3365 | } | 3526 | } |
3366 | 3527 | ||
3367 | /* | 3528 | static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, |
3368 | * note, this releases the path | 3529 | struct btrfs_root *extent_root, |
3369 | */ | 3530 | struct btrfs_ref_path *ref_path, |
3370 | static int noinline relocate_one_reference(struct btrfs_root *extent_root, | 3531 | u64 extent_start) |
3371 | struct btrfs_path *path, | 3532 | { |
3372 | struct btrfs_key *extent_key, | 3533 | memset(ref_path, 0, sizeof(*ref_path)); |
3373 | u64 *last_file_objectid, | 3534 | ref_path->extent_start = extent_start; |
3374 | u64 *last_file_offset, | 3535 | |
3375 | u64 *last_file_root, | 3536 | return __next_ref_path(trans, extent_root, ref_path, 1); |
3376 | u64 last_extent) | 3537 | } |
3377 | { | 3538 | |
3378 | struct inode *inode; | 3539 | static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, |
3379 | struct btrfs_root *found_root; | 3540 | struct btrfs_root *extent_root, |
3380 | struct btrfs_key root_location; | 3541 | struct btrfs_ref_path *ref_path) |
3542 | { | ||
3543 | return __next_ref_path(trans, extent_root, ref_path, 0); | ||
3544 | } | ||
3545 | |||
3546 | static int noinline get_new_locations(struct inode *reloc_inode, | ||
3547 | struct btrfs_key *extent_key, | ||
3548 | u64 offset, int no_fragment, | ||
3549 | struct disk_extent **extents, | ||
3550 | int *nr_extents) | ||
3551 | { | ||
3552 | struct btrfs_root *root = BTRFS_I(reloc_inode)->root; | ||
3553 | struct btrfs_path *path; | ||
3554 | struct btrfs_file_extent_item *fi; | ||
3555 | struct extent_buffer *leaf; | ||
3556 | struct disk_extent *exts = *extents; | ||
3381 | struct btrfs_key found_key; | 3557 | struct btrfs_key found_key; |
3382 | struct btrfs_extent_ref *ref; | 3558 | u64 cur_pos; |
3383 | u64 ref_root; | 3559 | u64 last_byte; |
3384 | u64 ref_gen; | 3560 | u32 nritems; |
3385 | u64 ref_objectid; | 3561 | int nr = 0; |
3386 | u64 ref_offset; | 3562 | int max = *nr_extents; |
3387 | int ret; | 3563 | int ret; |
3388 | int level; | ||
3389 | 3564 | ||
3390 | WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); | 3565 | WARN_ON(!no_fragment && *extents); |
3566 | if (!exts) { | ||
3567 | max = 1; | ||
3568 | exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); | ||
3569 | if (!exts) | ||
3570 | return -ENOMEM; | ||
3571 | } | ||
3391 | 3572 | ||
3392 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3573 | path = btrfs_alloc_path(); |
3393 | struct btrfs_extent_ref); | 3574 | BUG_ON(!path); |
3394 | ref_root = btrfs_ref_root(path->nodes[0], ref); | ||
3395 | ref_gen = btrfs_ref_generation(path->nodes[0], ref); | ||
3396 | ref_objectid = btrfs_ref_objectid(path->nodes[0], ref); | ||
3397 | ref_offset = btrfs_ref_offset(path->nodes[0], ref); | ||
3398 | btrfs_release_path(extent_root, path); | ||
3399 | 3575 | ||
3400 | root_location.objectid = ref_root; | 3576 | cur_pos = extent_key->objectid - offset; |
3401 | if (ref_gen == 0) | 3577 | last_byte = extent_key->objectid + extent_key->offset; |
3402 | root_location.offset = 0; | 3578 | ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, |
3403 | else | 3579 | cur_pos, 0); |
3404 | root_location.offset = (u64)-1; | 3580 | if (ret < 0) |
3405 | root_location.type = BTRFS_ROOT_ITEM_KEY; | 3581 | goto out; |
3582 | if (ret > 0) { | ||
3583 | ret = -ENOENT; | ||
3584 | goto out; | ||
3585 | } | ||
3406 | 3586 | ||
3407 | found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, | 3587 | while (1) { |
3408 | &root_location); | 3588 | leaf = path->nodes[0]; |
3409 | BUG_ON(!found_root); | 3589 | nritems = btrfs_header_nritems(leaf); |
3410 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | 3590 | if (path->slots[0] >= nritems) { |
3591 | ret = btrfs_next_leaf(root, path); | ||
3592 | if (ret < 0) | ||
3593 | goto out; | ||
3594 | if (ret > 0) | ||
3595 | break; | ||
3596 | leaf = path->nodes[0]; | ||
3597 | } | ||
3411 | 3598 | ||
3412 | if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | 3599 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
3413 | found_key.objectid = ref_objectid; | 3600 | if (found_key.offset != cur_pos || |
3414 | found_key.type = BTRFS_EXTENT_DATA_KEY; | 3601 | found_key.type != BTRFS_EXTENT_DATA_KEY || |
3415 | found_key.offset = ref_offset; | 3602 | found_key.objectid != reloc_inode->i_ino) |
3416 | level = 0; | 3603 | break; |
3417 | 3604 | ||
3418 | if (last_extent == extent_key->objectid && | 3605 | fi = btrfs_item_ptr(leaf, path->slots[0], |
3419 | *last_file_objectid == ref_objectid && | 3606 | struct btrfs_file_extent_item); |
3420 | *last_file_offset == ref_offset && | 3607 | if (btrfs_file_extent_type(leaf, fi) != |
3421 | *last_file_root == ref_root) | 3608 | BTRFS_FILE_EXTENT_REG || |
3422 | goto out; | 3609 | btrfs_file_extent_disk_bytenr(leaf, fi) == 0) |
3610 | break; | ||
3423 | 3611 | ||
3424 | ret = find_root_for_ref(extent_root, path, &found_key, | 3612 | if (nr == max) { |
3425 | level, 1, &found_root, | 3613 | struct disk_extent *old = exts; |
3426 | extent_key->objectid); | 3614 | max *= 2; |
3615 | exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); | ||
3616 | memcpy(exts, old, sizeof(*exts) * nr); | ||
3617 | if (old != *extents) | ||
3618 | kfree(old); | ||
3619 | } | ||
3427 | 3620 | ||
3428 | if (ret) | 3621 | exts[nr].disk_bytenr = |
3622 | btrfs_file_extent_disk_bytenr(leaf, fi); | ||
3623 | exts[nr].disk_num_bytes = | ||
3624 | btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
3625 | exts[nr].offset = btrfs_file_extent_offset(leaf, fi); | ||
3626 | exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | ||
3627 | WARN_ON(exts[nr].offset > 0); | ||
3628 | WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); | ||
3629 | |||
3630 | cur_pos += exts[nr].num_bytes; | ||
3631 | nr++; | ||
3632 | |||
3633 | if (cur_pos + offset >= last_byte) | ||
3634 | break; | ||
3635 | |||
3636 | if (no_fragment) { | ||
3637 | ret = 1; | ||
3429 | goto out; | 3638 | goto out; |
3639 | } | ||
3640 | path->slots[0]++; | ||
3641 | } | ||
3430 | 3642 | ||
3431 | if (last_extent == extent_key->objectid && | 3643 | WARN_ON(cur_pos + offset > last_byte); |
3432 | *last_file_objectid == ref_objectid && | 3644 | if (cur_pos + offset < last_byte) { |
3433 | *last_file_offset == ref_offset && | 3645 | ret = -ENOENT; |
3434 | *last_file_root == ref_root) | 3646 | goto out; |
3647 | } | ||
3648 | ret = 0; | ||
3649 | out: | ||
3650 | btrfs_free_path(path); | ||
3651 | if (ret) { | ||
3652 | if (exts != *extents) | ||
3653 | kfree(exts); | ||
3654 | } else { | ||
3655 | *extents = exts; | ||
3656 | *nr_extents = nr; | ||
3657 | } | ||
3658 | return ret; | ||
3659 | } | ||
3660 | |||
3661 | static int noinline replace_one_extent(struct btrfs_trans_handle *trans, | ||
3662 | struct btrfs_root *root, | ||
3663 | struct btrfs_path *path, | ||
3664 | struct btrfs_key *extent_key, | ||
3665 | struct btrfs_key *leaf_key, | ||
3666 | struct btrfs_ref_path *ref_path, | ||
3667 | struct disk_extent *new_extents, | ||
3668 | int nr_extents) | ||
3669 | { | ||
3670 | struct extent_buffer *leaf; | ||
3671 | struct btrfs_file_extent_item *fi; | ||
3672 | struct inode *inode = NULL; | ||
3673 | struct btrfs_key key; | ||
3674 | u64 lock_start = 0; | ||
3675 | u64 lock_end = 0; | ||
3676 | u64 num_bytes; | ||
3677 | u64 ext_offset; | ||
3678 | u64 first_pos; | ||
3679 | u32 nritems; | ||
3680 | int extent_locked = 0; | ||
3681 | int ret; | ||
3682 | |||
3683 | first_pos = ref_path->owner_offset; | ||
3684 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { | ||
3685 | key.objectid = ref_path->owner_objectid; | ||
3686 | key.offset = ref_path->owner_offset; | ||
3687 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
3688 | } else { | ||
3689 | memcpy(&key, leaf_key, sizeof(key)); | ||
3690 | } | ||
3691 | |||
3692 | while (1) { | ||
3693 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
3694 | if (ret < 0) | ||
3435 | goto out; | 3695 | goto out; |
3436 | 3696 | ||
3437 | inode = btrfs_iget_locked(extent_root->fs_info->sb, | 3697 | leaf = path->nodes[0]; |
3438 | ref_objectid, found_root); | 3698 | nritems = btrfs_header_nritems(leaf); |
3439 | if (inode->i_state & I_NEW) { | 3699 | next: |
3440 | /* the inode and parent dir are two different roots */ | 3700 | if (extent_locked && ret > 0) { |
3441 | BTRFS_I(inode)->root = found_root; | 3701 | /* |
3442 | BTRFS_I(inode)->location.objectid = ref_objectid; | 3702 | * the file extent item was modified by someone |
3443 | BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; | 3703 | * before the extent got locked. |
3444 | BTRFS_I(inode)->location.offset = 0; | 3704 | */ |
3445 | btrfs_read_locked_inode(inode); | 3705 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); |
3446 | unlock_new_inode(inode); | 3706 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, |
3707 | lock_end, GFP_NOFS); | ||
3708 | extent_locked = 0; | ||
3709 | } | ||
3710 | |||
3711 | if (path->slots[0] >= nritems) { | ||
3712 | if (ref_path->owner_objectid == | ||
3713 | BTRFS_MULTIPLE_OBJECTIDS) | ||
3714 | break; | ||
3715 | |||
3716 | BUG_ON(extent_locked); | ||
3717 | ret = btrfs_next_leaf(root, path); | ||
3718 | if (ret < 0) | ||
3719 | goto out; | ||
3720 | if (ret > 0) | ||
3721 | break; | ||
3722 | leaf = path->nodes[0]; | ||
3723 | nritems = btrfs_header_nritems(leaf); | ||
3724 | } | ||
3725 | |||
3726 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
3447 | 3727 | ||
3728 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { | ||
3729 | if ((key.objectid > ref_path->owner_objectid) || | ||
3730 | (key.objectid == ref_path->owner_objectid && | ||
3731 | key.type > BTRFS_EXTENT_DATA_KEY) || | ||
3732 | (key.offset >= first_pos + extent_key->offset)) | ||
3733 | break; | ||
3448 | } | 3734 | } |
3449 | /* this can happen if the reference is not against | ||
3450 | * the latest version of the tree root | ||
3451 | */ | ||
3452 | if (is_bad_inode(inode)) | ||
3453 | goto out; | ||
3454 | 3735 | ||
3455 | *last_file_objectid = inode->i_ino; | 3736 | if (inode && key.objectid != inode->i_ino) { |
3456 | *last_file_root = found_root->root_key.objectid; | 3737 | BUG_ON(extent_locked); |
3457 | *last_file_offset = ref_offset; | 3738 | btrfs_release_path(root, path); |
3739 | mutex_unlock(&inode->i_mutex); | ||
3740 | iput(inode); | ||
3741 | inode = NULL; | ||
3742 | continue; | ||
3743 | } | ||
3458 | 3744 | ||
3459 | relocate_inode_pages(inode, ref_offset, extent_key->offset); | 3745 | if (key.type != BTRFS_EXTENT_DATA_KEY) { |
3460 | iput(inode); | 3746 | path->slots[0]++; |
3461 | } else { | 3747 | ret = 1; |
3462 | struct btrfs_trans_handle *trans; | 3748 | goto next; |
3463 | struct extent_buffer *eb; | 3749 | } |
3464 | int needs_lock = 0; | 3750 | fi = btrfs_item_ptr(leaf, path->slots[0], |
3751 | struct btrfs_file_extent_item); | ||
3752 | if ((btrfs_file_extent_type(leaf, fi) != | ||
3753 | BTRFS_FILE_EXTENT_REG) || | ||
3754 | (btrfs_file_extent_disk_bytenr(leaf, fi) != | ||
3755 | extent_key->objectid)) { | ||
3756 | path->slots[0]++; | ||
3757 | ret = 1; | ||
3758 | goto next; | ||
3759 | } | ||
3465 | 3760 | ||
3466 | eb = read_tree_block(found_root, extent_key->objectid, | 3761 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); |
3467 | extent_key->offset, 0); | 3762 | ext_offset = btrfs_file_extent_offset(leaf, fi); |
3468 | btrfs_tree_lock(eb); | ||
3469 | level = btrfs_header_level(eb); | ||
3470 | 3763 | ||
3471 | if (level == 0) | 3764 | if (first_pos > key.offset - ext_offset) |
3472 | btrfs_item_key_to_cpu(eb, &found_key, 0); | 3765 | first_pos = key.offset - ext_offset; |
3473 | else | ||
3474 | btrfs_node_key_to_cpu(eb, &found_key, 0); | ||
3475 | 3766 | ||
3476 | btrfs_tree_unlock(eb); | 3767 | if (!extent_locked) { |
3477 | free_extent_buffer(eb); | 3768 | lock_start = key.offset; |
3769 | lock_end = lock_start + num_bytes - 1; | ||
3770 | } else { | ||
3771 | BUG_ON(lock_start != key.offset); | ||
3772 | BUG_ON(lock_end - lock_start + 1 < num_bytes); | ||
3773 | } | ||
3774 | |||
3775 | if (!inode) { | ||
3776 | btrfs_release_path(root, path); | ||
3777 | |||
3778 | inode = btrfs_iget_locked(root->fs_info->sb, | ||
3779 | key.objectid, root); | ||
3780 | if (inode->i_state & I_NEW) { | ||
3781 | BTRFS_I(inode)->root = root; | ||
3782 | BTRFS_I(inode)->location.objectid = | ||
3783 | key.objectid; | ||
3784 | BTRFS_I(inode)->location.type = | ||
3785 | BTRFS_INODE_ITEM_KEY; | ||
3786 | BTRFS_I(inode)->location.offset = 0; | ||
3787 | btrfs_read_locked_inode(inode); | ||
3788 | unlock_new_inode(inode); | ||
3789 | } | ||
3790 | /* | ||
3791 | * some code call btrfs_commit_transaction while | ||
3792 | * holding the i_mutex, so we can't use mutex_lock | ||
3793 | * here. | ||
3794 | */ | ||
3795 | if (is_bad_inode(inode) || | ||
3796 | !mutex_trylock(&inode->i_mutex)) { | ||
3797 | iput(inode); | ||
3798 | inode = NULL; | ||
3799 | key.offset = (u64)-1; | ||
3800 | goto skip; | ||
3801 | } | ||
3802 | } | ||
3803 | |||
3804 | if (!extent_locked) { | ||
3805 | struct btrfs_ordered_extent *ordered; | ||
3806 | |||
3807 | btrfs_release_path(root, path); | ||
3478 | 3808 | ||
3479 | ret = find_root_for_ref(extent_root, path, &found_key, | 3809 | lock_extent(&BTRFS_I(inode)->io_tree, lock_start, |
3480 | level, 0, &found_root, | 3810 | lock_end, GFP_NOFS); |
3481 | extent_key->objectid); | 3811 | ordered = btrfs_lookup_first_ordered_extent(inode, |
3812 | lock_end); | ||
3813 | if (ordered && | ||
3814 | ordered->file_offset <= lock_end && | ||
3815 | ordered->file_offset + ordered->len > lock_start) { | ||
3816 | unlock_extent(&BTRFS_I(inode)->io_tree, | ||
3817 | lock_start, lock_end, GFP_NOFS); | ||
3818 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
3819 | btrfs_put_ordered_extent(ordered); | ||
3820 | key.offset += num_bytes; | ||
3821 | goto skip; | ||
3822 | } | ||
3823 | if (ordered) | ||
3824 | btrfs_put_ordered_extent(ordered); | ||
3825 | |||
3826 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
3827 | extent_locked = 1; | ||
3828 | continue; | ||
3829 | } | ||
3482 | 3830 | ||
3831 | if (nr_extents == 1) { | ||
3832 | /* update extent pointer in place */ | ||
3833 | btrfs_set_file_extent_generation(leaf, fi, | ||
3834 | trans->transid); | ||
3835 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | ||
3836 | new_extents[0].disk_bytenr); | ||
3837 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | ||
3838 | new_extents[0].disk_num_bytes); | ||
3839 | ext_offset += new_extents[0].offset; | ||
3840 | btrfs_set_file_extent_offset(leaf, fi, ext_offset); | ||
3841 | btrfs_mark_buffer_dirty(leaf); | ||
3842 | |||
3843 | btrfs_drop_extent_cache(inode, key.offset, | ||
3844 | key.offset + num_bytes - 1, 0); | ||
3845 | |||
3846 | ret = btrfs_inc_extent_ref(trans, root, | ||
3847 | new_extents[0].disk_bytenr, | ||
3848 | new_extents[0].disk_num_bytes, | ||
3849 | leaf->start, | ||
3850 | root->root_key.objectid, | ||
3851 | trans->transid, | ||
3852 | key.objectid, key.offset); | ||
3853 | BUG_ON(ret); | ||
3854 | |||
3855 | ret = btrfs_free_extent(trans, root, | ||
3856 | extent_key->objectid, | ||
3857 | extent_key->offset, | ||
3858 | leaf->start, | ||
3859 | btrfs_header_owner(leaf), | ||
3860 | btrfs_header_generation(leaf), | ||
3861 | key.objectid, key.offset, 0); | ||
3862 | BUG_ON(ret); | ||
3863 | |||
3864 | btrfs_release_path(root, path); | ||
3865 | key.offset += num_bytes; | ||
3866 | } else { | ||
3867 | u64 alloc_hint; | ||
3868 | u64 extent_len; | ||
3869 | int i; | ||
3870 | /* | ||
3871 | * drop old extent pointer at first, then insert the | ||
3872 | * new pointers one bye one | ||
3873 | */ | ||
3874 | btrfs_release_path(root, path); | ||
3875 | ret = btrfs_drop_extents(trans, root, inode, key.offset, | ||
3876 | key.offset + num_bytes, | ||
3877 | key.offset, &alloc_hint); | ||
3878 | BUG_ON(ret); | ||
3879 | |||
3880 | for (i = 0; i < nr_extents; i++) { | ||
3881 | if (ext_offset >= new_extents[i].num_bytes) { | ||
3882 | ext_offset -= new_extents[i].num_bytes; | ||
3883 | continue; | ||
3884 | } | ||
3885 | extent_len = min(new_extents[i].num_bytes - | ||
3886 | ext_offset, num_bytes); | ||
3887 | |||
3888 | ret = btrfs_insert_empty_item(trans, root, | ||
3889 | path, &key, | ||
3890 | sizeof(*fi)); | ||
3891 | BUG_ON(ret); | ||
3892 | |||
3893 | leaf = path->nodes[0]; | ||
3894 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
3895 | struct btrfs_file_extent_item); | ||
3896 | btrfs_set_file_extent_generation(leaf, fi, | ||
3897 | trans->transid); | ||
3898 | btrfs_set_file_extent_type(leaf, fi, | ||
3899 | BTRFS_FILE_EXTENT_REG); | ||
3900 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | ||
3901 | new_extents[i].disk_bytenr); | ||
3902 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | ||
3903 | new_extents[i].disk_num_bytes); | ||
3904 | btrfs_set_file_extent_num_bytes(leaf, fi, | ||
3905 | extent_len); | ||
3906 | ext_offset += new_extents[i].offset; | ||
3907 | btrfs_set_file_extent_offset(leaf, fi, | ||
3908 | ext_offset); | ||
3909 | btrfs_mark_buffer_dirty(leaf); | ||
3910 | |||
3911 | btrfs_drop_extent_cache(inode, key.offset, | ||
3912 | key.offset + extent_len - 1, 0); | ||
3913 | |||
3914 | ret = btrfs_inc_extent_ref(trans, root, | ||
3915 | new_extents[i].disk_bytenr, | ||
3916 | new_extents[i].disk_num_bytes, | ||
3917 | leaf->start, | ||
3918 | root->root_key.objectid, | ||
3919 | trans->transid, | ||
3920 | key.objectid, key.offset); | ||
3921 | BUG_ON(ret); | ||
3922 | btrfs_release_path(root, path); | ||
3923 | |||
3924 | inode->i_blocks += extent_len >> 9; | ||
3925 | |||
3926 | ext_offset = 0; | ||
3927 | num_bytes -= extent_len; | ||
3928 | key.offset += extent_len; | ||
3929 | |||
3930 | if (num_bytes == 0) | ||
3931 | break; | ||
3932 | } | ||
3933 | BUG_ON(i >= nr_extents); | ||
3934 | } | ||
3935 | |||
3936 | if (extent_locked) { | ||
3937 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
3938 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
3939 | lock_end, GFP_NOFS); | ||
3940 | extent_locked = 0; | ||
3941 | } | ||
3942 | skip: | ||
3943 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && | ||
3944 | key.offset >= first_pos + extent_key->offset) | ||
3945 | break; | ||
3946 | |||
3947 | cond_resched(); | ||
3948 | } | ||
3949 | ret = 0; | ||
3950 | out: | ||
3951 | btrfs_release_path(root, path); | ||
3952 | if (inode) { | ||
3953 | mutex_unlock(&inode->i_mutex); | ||
3954 | if (extent_locked) { | ||
3955 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
3956 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
3957 | lock_end, GFP_NOFS); | ||
3958 | } | ||
3959 | iput(inode); | ||
3960 | } | ||
3961 | return ret; | ||
3962 | } | ||
3963 | |||
3964 | int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
3965 | u64 num_bytes, u64 new_bytenr) | ||
3966 | { | ||
3967 | set_extent_bits(&root->fs_info->reloc_mapping_tree, | ||
3968 | orig_bytenr, orig_bytenr + num_bytes - 1, | ||
3969 | EXTENT_LOCKED, GFP_NOFS); | ||
3970 | set_state_private(&root->fs_info->reloc_mapping_tree, | ||
3971 | orig_bytenr, new_bytenr); | ||
3972 | return 0; | ||
3973 | } | ||
3974 | |||
3975 | int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
3976 | u64 num_bytes, u64 *new_bytenr) | ||
3977 | { | ||
3978 | u64 bytenr; | ||
3979 | u64 cur_bytenr = orig_bytenr; | ||
3980 | u64 prev_bytenr = orig_bytenr; | ||
3981 | int ret; | ||
3982 | |||
3983 | while (1) { | ||
3984 | ret = get_state_private(&root->fs_info->reloc_mapping_tree, | ||
3985 | cur_bytenr, &bytenr); | ||
3483 | if (ret) | 3986 | if (ret) |
3484 | goto out; | 3987 | break; |
3988 | prev_bytenr = cur_bytenr; | ||
3989 | cur_bytenr = bytenr; | ||
3990 | } | ||
3485 | 3991 | ||
3486 | /* | 3992 | if (orig_bytenr == cur_bytenr) |
3487 | * right here almost anything could happen to our key, | 3993 | return -ENOENT; |
3488 | * but that's ok. The cow below will either relocate it | ||
3489 | * or someone else will have relocated it. Either way, | ||
3490 | * it is in a different spot than it was before and | ||
3491 | * we're happy. | ||
3492 | */ | ||
3493 | 3994 | ||
3494 | trans = btrfs_start_transaction(found_root, 1); | 3995 | if (prev_bytenr != orig_bytenr) { |
3996 | set_state_private(&root->fs_info->reloc_mapping_tree, | ||
3997 | orig_bytenr, cur_bytenr); | ||
3998 | } | ||
3999 | *new_bytenr = cur_bytenr; | ||
4000 | return 0; | ||
4001 | } | ||
3495 | 4002 | ||
3496 | if (found_root == extent_root->fs_info->extent_root || | 4003 | void btrfs_free_reloc_mappings(struct btrfs_root *root) |
3497 | found_root == extent_root->fs_info->chunk_root || | 4004 | { |
3498 | found_root == extent_root->fs_info->dev_root) { | 4005 | clear_extent_bits(&root->fs_info->reloc_mapping_tree, |
3499 | needs_lock = 1; | 4006 | 0, (u64)-1, -1, GFP_NOFS); |
3500 | mutex_lock(&extent_root->fs_info->alloc_mutex); | 4007 | } |
4008 | |||
4009 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, | ||
4010 | struct btrfs_root *root, | ||
4011 | struct extent_buffer *buf, u64 orig_start) | ||
4012 | { | ||
4013 | int level; | ||
4014 | int ret; | ||
4015 | |||
4016 | BUG_ON(btrfs_header_generation(buf) != trans->transid); | ||
4017 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | ||
4018 | |||
4019 | level = btrfs_header_level(buf); | ||
4020 | if (level == 0) { | ||
4021 | struct btrfs_leaf_ref *ref; | ||
4022 | struct btrfs_leaf_ref *orig_ref; | ||
4023 | |||
4024 | orig_ref = btrfs_lookup_leaf_ref(root, orig_start); | ||
4025 | if (!orig_ref) | ||
4026 | return -ENOENT; | ||
4027 | |||
4028 | ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); | ||
4029 | if (!ref) { | ||
4030 | btrfs_free_leaf_ref(root, orig_ref); | ||
4031 | return -ENOMEM; | ||
3501 | } | 4032 | } |
3502 | 4033 | ||
3503 | path->lowest_level = level; | 4034 | ref->nritems = orig_ref->nritems; |
3504 | path->reada = 2; | 4035 | memcpy(ref->extents, orig_ref->extents, |
3505 | ret = btrfs_search_slot(trans, found_root, &found_key, path, | 4036 | sizeof(ref->extents[0]) * ref->nritems); |
3506 | 0, 1); | 4037 | |
4038 | btrfs_free_leaf_ref(root, orig_ref); | ||
4039 | |||
4040 | ref->root_gen = trans->transid; | ||
4041 | ref->bytenr = buf->start; | ||
4042 | ref->owner = btrfs_header_owner(buf); | ||
4043 | ref->generation = btrfs_header_generation(buf); | ||
4044 | ret = btrfs_add_leaf_ref(root, ref, 0); | ||
4045 | WARN_ON(ret); | ||
4046 | btrfs_free_leaf_ref(root, ref); | ||
4047 | } | ||
4048 | return 0; | ||
4049 | } | ||
4050 | |||
4051 | static int noinline invalidate_extent_cache(struct btrfs_root *root, | ||
4052 | struct extent_buffer *leaf, | ||
4053 | struct btrfs_block_group_cache *group, | ||
4054 | struct btrfs_root *target_root) | ||
4055 | { | ||
4056 | struct btrfs_key key; | ||
4057 | struct inode *inode = NULL; | ||
4058 | struct btrfs_file_extent_item *fi; | ||
4059 | u64 num_bytes; | ||
4060 | u64 skip_objectid = 0; | ||
4061 | u32 nritems; | ||
4062 | u32 i; | ||
4063 | |||
4064 | nritems = btrfs_header_nritems(leaf); | ||
4065 | for (i = 0; i < nritems; i++) { | ||
4066 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
4067 | if (key.objectid == skip_objectid || | ||
4068 | key.type != BTRFS_EXTENT_DATA_KEY) | ||
4069 | continue; | ||
4070 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
4071 | if (btrfs_file_extent_type(leaf, fi) == | ||
4072 | BTRFS_FILE_EXTENT_INLINE) | ||
4073 | continue; | ||
4074 | if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) | ||
4075 | continue; | ||
4076 | if (!inode || inode->i_ino != key.objectid) { | ||
4077 | iput(inode); | ||
4078 | inode = btrfs_ilookup(target_root->fs_info->sb, | ||
4079 | key.objectid, target_root, 1); | ||
4080 | } | ||
4081 | if (!inode) { | ||
4082 | skip_objectid = key.objectid; | ||
4083 | continue; | ||
4084 | } | ||
4085 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | ||
4086 | |||
4087 | lock_extent(&BTRFS_I(inode)->io_tree, key.offset, | ||
4088 | key.offset + num_bytes - 1, GFP_NOFS); | ||
4089 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
4090 | btrfs_drop_extent_cache(inode, key.offset, | ||
4091 | key.offset + num_bytes - 1, 1); | ||
4092 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
4093 | unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, | ||
4094 | key.offset + num_bytes - 1, GFP_NOFS); | ||
4095 | cond_resched(); | ||
4096 | } | ||
4097 | iput(inode); | ||
4098 | return 0; | ||
4099 | } | ||
4100 | |||
4101 | static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, | ||
4102 | struct btrfs_root *root, | ||
4103 | struct extent_buffer *leaf, | ||
4104 | struct btrfs_block_group_cache *group, | ||
4105 | struct inode *reloc_inode) | ||
4106 | { | ||
4107 | struct btrfs_key key; | ||
4108 | struct btrfs_key extent_key; | ||
4109 | struct btrfs_file_extent_item *fi; | ||
4110 | struct btrfs_leaf_ref *ref; | ||
4111 | struct disk_extent *new_extent; | ||
4112 | u64 bytenr; | ||
4113 | u64 num_bytes; | ||
4114 | u32 nritems; | ||
4115 | u32 i; | ||
4116 | int ext_index; | ||
4117 | int nr_extent; | ||
4118 | int ret; | ||
4119 | |||
4120 | new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); | ||
4121 | BUG_ON(!new_extent); | ||
4122 | |||
4123 | ref = btrfs_lookup_leaf_ref(root, leaf->start); | ||
4124 | BUG_ON(!ref); | ||
4125 | |||
4126 | ext_index = -1; | ||
4127 | nritems = btrfs_header_nritems(leaf); | ||
4128 | for (i = 0; i < nritems; i++) { | ||
4129 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
4130 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
4131 | continue; | ||
4132 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
4133 | if (btrfs_file_extent_type(leaf, fi) == | ||
4134 | BTRFS_FILE_EXTENT_INLINE) | ||
4135 | continue; | ||
4136 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
4137 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
4138 | if (bytenr == 0) | ||
4139 | continue; | ||
4140 | |||
4141 | ext_index++; | ||
4142 | if (bytenr >= group->key.objectid + group->key.offset || | ||
4143 | bytenr + num_bytes <= group->key.objectid) | ||
4144 | continue; | ||
4145 | |||
4146 | extent_key.objectid = bytenr; | ||
4147 | extent_key.offset = num_bytes; | ||
4148 | extent_key.type = BTRFS_EXTENT_ITEM_KEY; | ||
4149 | nr_extent = 1; | ||
4150 | ret = get_new_locations(reloc_inode, &extent_key, | ||
4151 | group->key.objectid, 1, | ||
4152 | &new_extent, &nr_extent); | ||
4153 | if (ret > 0) | ||
4154 | continue; | ||
4155 | BUG_ON(ret < 0); | ||
4156 | |||
4157 | BUG_ON(ref->extents[ext_index].bytenr != bytenr); | ||
4158 | BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); | ||
4159 | ref->extents[ext_index].bytenr = new_extent->disk_bytenr; | ||
4160 | ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; | ||
4161 | |||
4162 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
4163 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | ||
4164 | new_extent->disk_bytenr); | ||
4165 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | ||
4166 | new_extent->disk_num_bytes); | ||
4167 | new_extent->offset += btrfs_file_extent_offset(leaf, fi); | ||
4168 | btrfs_set_file_extent_offset(leaf, fi, new_extent->offset); | ||
4169 | btrfs_mark_buffer_dirty(leaf); | ||
4170 | |||
4171 | ret = btrfs_inc_extent_ref(trans, root, | ||
4172 | new_extent->disk_bytenr, | ||
4173 | new_extent->disk_num_bytes, | ||
4174 | leaf->start, | ||
4175 | root->root_key.objectid, | ||
4176 | trans->transid, | ||
4177 | key.objectid, key.offset); | ||
4178 | BUG_ON(ret); | ||
4179 | ret = btrfs_free_extent(trans, root, | ||
4180 | bytenr, num_bytes, leaf->start, | ||
4181 | btrfs_header_owner(leaf), | ||
4182 | btrfs_header_generation(leaf), | ||
4183 | key.objectid, key.offset, 0); | ||
4184 | BUG_ON(ret); | ||
4185 | cond_resched(); | ||
4186 | } | ||
4187 | kfree(new_extent); | ||
4188 | BUG_ON(ext_index + 1 != ref->nritems); | ||
4189 | btrfs_free_leaf_ref(root, ref); | ||
4190 | return 0; | ||
4191 | } | ||
4192 | |||
4193 | int btrfs_free_reloc_root(struct btrfs_root *root) | ||
4194 | { | ||
4195 | struct btrfs_root *reloc_root; | ||
4196 | |||
4197 | if (root->reloc_root) { | ||
4198 | reloc_root = root->reloc_root; | ||
4199 | root->reloc_root = NULL; | ||
4200 | list_add(&reloc_root->dead_list, | ||
4201 | &root->fs_info->dead_reloc_roots); | ||
4202 | } | ||
4203 | return 0; | ||
4204 | } | ||
4205 | |||
4206 | int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) | ||
4207 | { | ||
4208 | struct btrfs_trans_handle *trans; | ||
4209 | struct btrfs_root *reloc_root; | ||
4210 | struct btrfs_root *prev_root = NULL; | ||
4211 | struct list_head dead_roots; | ||
4212 | int ret; | ||
4213 | unsigned long nr; | ||
4214 | |||
4215 | INIT_LIST_HEAD(&dead_roots); | ||
4216 | list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); | ||
4217 | |||
4218 | while (!list_empty(&dead_roots)) { | ||
4219 | reloc_root = list_entry(dead_roots.prev, | ||
4220 | struct btrfs_root, dead_list); | ||
4221 | list_del_init(&reloc_root->dead_list); | ||
4222 | |||
4223 | BUG_ON(reloc_root->commit_root != NULL); | ||
4224 | while (1) { | ||
4225 | trans = btrfs_join_transaction(root, 1); | ||
4226 | BUG_ON(!trans); | ||
4227 | |||
4228 | mutex_lock(&root->fs_info->drop_mutex); | ||
4229 | ret = btrfs_drop_snapshot(trans, reloc_root); | ||
4230 | if (ret != -EAGAIN) | ||
4231 | break; | ||
4232 | mutex_unlock(&root->fs_info->drop_mutex); | ||
4233 | |||
4234 | nr = trans->blocks_used; | ||
4235 | ret = btrfs_end_transaction(trans, root); | ||
4236 | BUG_ON(ret); | ||
4237 | btrfs_btree_balance_dirty(root, nr); | ||
4238 | } | ||
4239 | |||
4240 | free_extent_buffer(reloc_root->node); | ||
4241 | |||
4242 | ret = btrfs_del_root(trans, root->fs_info->tree_root, | ||
4243 | &reloc_root->root_key); | ||
4244 | BUG_ON(ret); | ||
4245 | mutex_unlock(&root->fs_info->drop_mutex); | ||
4246 | |||
4247 | nr = trans->blocks_used; | ||
4248 | ret = btrfs_end_transaction(trans, root); | ||
4249 | BUG_ON(ret); | ||
4250 | btrfs_btree_balance_dirty(root, nr); | ||
4251 | |||
4252 | kfree(prev_root); | ||
4253 | prev_root = reloc_root; | ||
4254 | } | ||
4255 | if (prev_root) { | ||
4256 | btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); | ||
4257 | kfree(prev_root); | ||
4258 | } | ||
4259 | return 0; | ||
4260 | } | ||
4261 | |||
4262 | int btrfs_add_dead_reloc_root(struct btrfs_root *root) | ||
4263 | { | ||
4264 | list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); | ||
4265 | return 0; | ||
4266 | } | ||
4267 | |||
4268 | int btrfs_cleanup_reloc_trees(struct btrfs_root *root) | ||
4269 | { | ||
4270 | struct btrfs_root *reloc_root; | ||
4271 | struct btrfs_trans_handle *trans; | ||
4272 | struct btrfs_key location; | ||
4273 | int found; | ||
4274 | int ret; | ||
4275 | |||
4276 | mutex_lock(&root->fs_info->tree_reloc_mutex); | ||
4277 | ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); | ||
4278 | BUG_ON(ret); | ||
4279 | found = !list_empty(&root->fs_info->dead_reloc_roots); | ||
4280 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
4281 | |||
4282 | if (found) { | ||
4283 | trans = btrfs_start_transaction(root, 1); | ||
4284 | BUG_ON(!trans); | ||
4285 | ret = btrfs_commit_transaction(trans, root); | ||
4286 | BUG_ON(ret); | ||
4287 | } | ||
4288 | |||
4289 | location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; | ||
4290 | location.offset = (u64)-1; | ||
4291 | location.type = BTRFS_ROOT_ITEM_KEY; | ||
4292 | |||
4293 | reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); | ||
4294 | BUG_ON(!reloc_root); | ||
4295 | btrfs_orphan_cleanup(reloc_root); | ||
4296 | return 0; | ||
4297 | } | ||
4298 | |||
4299 | static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, | ||
4300 | struct btrfs_root *root) | ||
4301 | { | ||
4302 | struct btrfs_root *reloc_root; | ||
4303 | struct extent_buffer *eb; | ||
4304 | struct btrfs_root_item *root_item; | ||
4305 | struct btrfs_key root_key; | ||
4306 | int ret; | ||
4307 | |||
4308 | BUG_ON(!root->ref_cows); | ||
4309 | if (root->reloc_root) | ||
4310 | return 0; | ||
4311 | |||
4312 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | ||
4313 | BUG_ON(!root_item); | ||
4314 | |||
4315 | ret = btrfs_copy_root(trans, root, root->commit_root, | ||
4316 | &eb, BTRFS_TREE_RELOC_OBJECTID); | ||
4317 | BUG_ON(ret); | ||
4318 | |||
4319 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; | ||
4320 | root_key.offset = root->root_key.objectid; | ||
4321 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
4322 | |||
4323 | memcpy(root_item, &root->root_item, sizeof(root_item)); | ||
4324 | btrfs_set_root_refs(root_item, 0); | ||
4325 | btrfs_set_root_bytenr(root_item, eb->start); | ||
4326 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); | ||
4327 | memset(&root_item->drop_progress, 0, sizeof(root_item->drop_progress)); | ||
4328 | root_item->drop_level = 0; | ||
4329 | |||
4330 | btrfs_tree_unlock(eb); | ||
4331 | free_extent_buffer(eb); | ||
4332 | |||
4333 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, | ||
4334 | &root_key, root_item); | ||
4335 | BUG_ON(ret); | ||
4336 | kfree(root_item); | ||
4337 | |||
4338 | reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | ||
4339 | &root_key); | ||
4340 | BUG_ON(!reloc_root); | ||
4341 | reloc_root->last_trans = trans->transid; | ||
4342 | reloc_root->commit_root = NULL; | ||
4343 | reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; | ||
4344 | |||
4345 | root->reloc_root = reloc_root; | ||
4346 | return 0; | ||
4347 | } | ||
4348 | |||
4349 | /* | ||
4350 | * Core function of space balance. | ||
4351 | * | ||
4352 | * The idea is using reloc trees to relocate tree blocks in reference | ||
4353 | * counted roots. There is one reloc tree for each subvol, all reloc | ||
4354 | * trees share same key objectid. Reloc trees are snapshots of the | ||
4355 | * latest committed roots (subvol root->commit_root). To relocate a tree | ||
4356 | * block referenced by a subvol, the code COW the block through the reloc | ||
4357 | * tree, then update pointer in the subvol to point to the new block. | ||
4358 | * Since all reloc trees share same key objectid, we can easily do special | ||
4359 | * handing to share tree blocks between reloc trees. Once a tree block has | ||
4360 | * been COWed in one reloc tree, we can use the result when the same block | ||
4361 | * is COWed again through other reloc trees. | ||
4362 | */ | ||
4363 | static int noinline relocate_one_path(struct btrfs_trans_handle *trans, | ||
4364 | struct btrfs_root *root, | ||
4365 | struct btrfs_path *path, | ||
4366 | struct btrfs_key *first_key, | ||
4367 | struct btrfs_ref_path *ref_path, | ||
4368 | struct btrfs_block_group_cache *group, | ||
4369 | struct inode *reloc_inode) | ||
4370 | { | ||
4371 | struct btrfs_root *reloc_root; | ||
4372 | struct extent_buffer *eb = NULL; | ||
4373 | struct btrfs_key *keys; | ||
4374 | u64 *nodes; | ||
4375 | int level; | ||
4376 | int lowest_merge; | ||
4377 | int lowest_level = 0; | ||
4378 | int update_refs; | ||
4379 | int ret; | ||
4380 | |||
4381 | if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | ||
4382 | lowest_level = ref_path->owner_objectid; | ||
4383 | |||
4384 | if (is_cowonly_root(ref_path->root_objectid)) { | ||
4385 | path->lowest_level = lowest_level; | ||
4386 | ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); | ||
4387 | BUG_ON(ret < 0); | ||
3507 | path->lowest_level = 0; | 4388 | path->lowest_level = 0; |
3508 | btrfs_release_path(found_root, path); | 4389 | btrfs_release_path(root, path); |
4390 | return 0; | ||
4391 | } | ||
3509 | 4392 | ||
3510 | if (found_root == found_root->fs_info->extent_root) | 4393 | keys = kzalloc(sizeof(*keys) * BTRFS_MAX_LEVEL, GFP_NOFS); |
3511 | btrfs_extent_post_op(trans, found_root); | 4394 | BUG_ON(!keys); |
3512 | if (needs_lock) | 4395 | nodes = kzalloc(sizeof(*nodes) * BTRFS_MAX_LEVEL, GFP_NOFS); |
3513 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | 4396 | BUG_ON(!nodes); |
4397 | |||
4398 | mutex_lock(&root->fs_info->tree_reloc_mutex); | ||
4399 | ret = init_reloc_tree(trans, root); | ||
4400 | BUG_ON(ret); | ||
4401 | reloc_root = root->reloc_root; | ||
4402 | |||
4403 | path->lowest_level = lowest_level; | ||
4404 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 0); | ||
4405 | BUG_ON(ret); | ||
4406 | /* | ||
4407 | * get relocation mapping for tree blocks in the path | ||
4408 | */ | ||
4409 | lowest_merge = BTRFS_MAX_LEVEL; | ||
4410 | for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { | ||
4411 | u64 new_bytenr; | ||
4412 | eb = path->nodes[level]; | ||
4413 | if (!eb || eb == reloc_root->node) | ||
4414 | continue; | ||
4415 | ret = btrfs_get_reloc_mapping(reloc_root, eb->start, eb->len, | ||
4416 | &new_bytenr); | ||
4417 | if (ret) | ||
4418 | continue; | ||
4419 | if (level == 0) | ||
4420 | btrfs_item_key_to_cpu(eb, &keys[level], 0); | ||
4421 | else | ||
4422 | btrfs_node_key_to_cpu(eb, &keys[level], 0); | ||
4423 | nodes[level] = new_bytenr; | ||
4424 | lowest_merge = level; | ||
4425 | } | ||
3514 | 4426 | ||
3515 | btrfs_end_transaction(trans, found_root); | 4427 | update_refs = 0; |
4428 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
4429 | eb = path->nodes[0]; | ||
4430 | if (btrfs_header_generation(eb) < trans->transid) | ||
4431 | update_refs = 1; | ||
4432 | } | ||
3516 | 4433 | ||
4434 | btrfs_release_path(reloc_root, path); | ||
4435 | /* | ||
4436 | * merge tree blocks that already relocated in other reloc trees | ||
4437 | */ | ||
4438 | if (lowest_merge != BTRFS_MAX_LEVEL) { | ||
4439 | ret = btrfs_merge_path(trans, reloc_root, keys, nodes, | ||
4440 | lowest_merge); | ||
4441 | BUG_ON(ret < 0); | ||
3517 | } | 4442 | } |
3518 | out: | 4443 | /* |
3519 | mutex_lock(&extent_root->fs_info->alloc_mutex); | 4444 | * cow any tree blocks that still haven't been relocated |
4445 | */ | ||
4446 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 1); | ||
4447 | BUG_ON(ret); | ||
4448 | /* | ||
4449 | * if we are relocating data block group, update extent pointers | ||
4450 | * in the newly created tree leaf. | ||
4451 | */ | ||
4452 | eb = path->nodes[0]; | ||
4453 | if (update_refs && nodes[0] != eb->start) { | ||
4454 | ret = replace_extents_in_leaf(trans, reloc_root, eb, group, | ||
4455 | reloc_inode); | ||
4456 | BUG_ON(ret); | ||
4457 | } | ||
4458 | |||
4459 | memset(keys, 0, sizeof(*keys) * BTRFS_MAX_LEVEL); | ||
4460 | memset(nodes, 0, sizeof(*nodes) * BTRFS_MAX_LEVEL); | ||
4461 | for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { | ||
4462 | eb = path->nodes[level]; | ||
4463 | if (!eb || eb == reloc_root->node) | ||
4464 | continue; | ||
4465 | BUG_ON(btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID); | ||
4466 | nodes[level] = eb->start; | ||
4467 | if (level == 0) | ||
4468 | btrfs_item_key_to_cpu(eb, &keys[level], 0); | ||
4469 | else | ||
4470 | btrfs_node_key_to_cpu(eb, &keys[level], 0); | ||
4471 | } | ||
4472 | |||
4473 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
4474 | eb = path->nodes[0]; | ||
4475 | extent_buffer_get(eb); | ||
4476 | } | ||
4477 | btrfs_release_path(reloc_root, path); | ||
4478 | /* | ||
4479 | * replace tree blocks in the fs tree with tree blocks in | ||
4480 | * the reloc tree. | ||
4481 | */ | ||
4482 | ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); | ||
4483 | BUG_ON(ret < 0); | ||
4484 | |||
4485 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
4486 | ret = invalidate_extent_cache(reloc_root, eb, group, root); | ||
4487 | BUG_ON(ret); | ||
4488 | free_extent_buffer(eb); | ||
4489 | } | ||
4490 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
4491 | |||
4492 | path->lowest_level = 0; | ||
4493 | kfree(nodes); | ||
4494 | kfree(keys); | ||
4495 | return 0; | ||
4496 | } | ||
4497 | |||
4498 | static int noinline relocate_tree_block(struct btrfs_trans_handle *trans, | ||
4499 | struct btrfs_root *root, | ||
4500 | struct btrfs_path *path, | ||
4501 | struct btrfs_key *first_key, | ||
4502 | struct btrfs_ref_path *ref_path) | ||
4503 | { | ||
4504 | int ret; | ||
4505 | int needs_lock = 0; | ||
4506 | |||
4507 | if (root == root->fs_info->extent_root || | ||
4508 | root == root->fs_info->chunk_root || | ||
4509 | root == root->fs_info->dev_root) { | ||
4510 | needs_lock = 1; | ||
4511 | mutex_lock(&root->fs_info->alloc_mutex); | ||
4512 | } | ||
4513 | |||
4514 | ret = relocate_one_path(trans, root, path, first_key, | ||
4515 | ref_path, NULL, NULL); | ||
4516 | BUG_ON(ret); | ||
4517 | |||
4518 | if (root == root->fs_info->extent_root) | ||
4519 | btrfs_extent_post_op(trans, root); | ||
4520 | if (needs_lock) | ||
4521 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
4522 | |||
3520 | return 0; | 4523 | return 0; |
3521 | } | 4524 | } |
3522 | 4525 | ||
3523 | static int noinline del_extent_zero(struct btrfs_root *extent_root, | 4526 | static int noinline del_extent_zero(struct btrfs_trans_handle *trans, |
4527 | struct btrfs_root *extent_root, | ||
3524 | struct btrfs_path *path, | 4528 | struct btrfs_path *path, |
3525 | struct btrfs_key *extent_key) | 4529 | struct btrfs_key *extent_key) |
3526 | { | 4530 | { |
3527 | int ret; | 4531 | int ret; |
3528 | struct btrfs_trans_handle *trans; | ||
3529 | 4532 | ||
3530 | trans = btrfs_start_transaction(extent_root, 1); | 4533 | mutex_lock(&extent_root->fs_info->alloc_mutex); |
3531 | ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); | 4534 | ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); |
3532 | if (ret > 0) { | 4535 | if (ret) |
3533 | ret = -EIO; | ||
3534 | goto out; | ||
3535 | } | ||
3536 | if (ret < 0) | ||
3537 | goto out; | 4536 | goto out; |
3538 | ret = btrfs_del_item(trans, extent_root, path); | 4537 | ret = btrfs_del_item(trans, extent_root, path); |
3539 | out: | 4538 | out: |
3540 | btrfs_end_transaction(trans, extent_root); | 4539 | btrfs_release_path(extent_root, path); |
4540 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | ||
3541 | return ret; | 4541 | return ret; |
3542 | } | 4542 | } |
3543 | 4543 | ||
4544 | static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info, | ||
4545 | struct btrfs_ref_path *ref_path) | ||
4546 | { | ||
4547 | struct btrfs_key root_key; | ||
4548 | |||
4549 | root_key.objectid = ref_path->root_objectid; | ||
4550 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
4551 | if (is_cowonly_root(ref_path->root_objectid)) | ||
4552 | root_key.offset = 0; | ||
4553 | else | ||
4554 | root_key.offset = (u64)-1; | ||
4555 | |||
4556 | return btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
4557 | } | ||
4558 | |||
3544 | static int noinline relocate_one_extent(struct btrfs_root *extent_root, | 4559 | static int noinline relocate_one_extent(struct btrfs_root *extent_root, |
3545 | struct btrfs_path *path, | 4560 | struct btrfs_path *path, |
3546 | struct btrfs_key *extent_key) | 4561 | struct btrfs_key *extent_key, |
4562 | struct btrfs_block_group_cache *group, | ||
4563 | struct inode *reloc_inode, int pass) | ||
3547 | { | 4564 | { |
3548 | struct btrfs_key key; | 4565 | struct btrfs_trans_handle *trans; |
3549 | struct btrfs_key found_key; | 4566 | struct btrfs_root *found_root; |
3550 | struct extent_buffer *leaf; | 4567 | struct btrfs_ref_path *ref_path = NULL; |
3551 | u64 last_file_objectid = 0; | 4568 | struct disk_extent *new_extents = NULL; |
3552 | u64 last_file_root = 0; | 4569 | int nr_extents = 0; |
3553 | u64 last_file_offset = (u64)-1; | 4570 | int loops; |
3554 | u64 last_extent = 0; | 4571 | int ret; |
3555 | u32 nritems; | 4572 | int level; |
3556 | u32 item_size; | 4573 | struct btrfs_key first_key; |
3557 | int ret = 0; | 4574 | u64 prev_block = 0; |
4575 | |||
4576 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | ||
4577 | |||
4578 | trans = btrfs_start_transaction(extent_root, 1); | ||
4579 | BUG_ON(!trans); | ||
3558 | 4580 | ||
3559 | if (extent_key->objectid == 0) { | 4581 | if (extent_key->objectid == 0) { |
3560 | ret = del_extent_zero(extent_root, path, extent_key); | 4582 | ret = del_extent_zero(trans, extent_root, path, extent_key); |
3561 | goto out; | 4583 | goto out; |
3562 | } | 4584 | } |
3563 | key.objectid = extent_key->objectid; | ||
3564 | key.type = BTRFS_EXTENT_REF_KEY; | ||
3565 | key.offset = 0; | ||
3566 | 4585 | ||
3567 | while(1) { | 4586 | ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); |
3568 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); | 4587 | if (!ref_path) { |
4588 | ret = -ENOMEM; | ||
4589 | goto out; | ||
4590 | } | ||
3569 | 4591 | ||
4592 | for (loops = 0; ; loops++) { | ||
4593 | if (loops == 0) { | ||
4594 | ret = btrfs_first_ref_path(trans, extent_root, ref_path, | ||
4595 | extent_key->objectid); | ||
4596 | } else { | ||
4597 | ret = btrfs_next_ref_path(trans, extent_root, ref_path); | ||
4598 | } | ||
3570 | if (ret < 0) | 4599 | if (ret < 0) |
3571 | goto out; | 4600 | goto out; |
4601 | if (ret > 0) | ||
4602 | break; | ||
3572 | 4603 | ||
3573 | ret = 0; | 4604 | if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || |
3574 | leaf = path->nodes[0]; | 4605 | ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) |
3575 | nritems = btrfs_header_nritems(leaf); | 4606 | continue; |
3576 | if (path->slots[0] == nritems) { | 4607 | |
3577 | ret = btrfs_next_leaf(extent_root, path); | 4608 | found_root = read_ref_root(extent_root->fs_info, ref_path); |
3578 | if (ret > 0) { | 4609 | BUG_ON(!found_root); |
3579 | ret = 0; | 4610 | /* |
3580 | goto out; | 4611 | * for reference counted tree, only process reference paths |
4612 | * rooted at the latest committed root. | ||
4613 | */ | ||
4614 | if (found_root->ref_cows && | ||
4615 | ref_path->root_generation != found_root->root_key.offset) | ||
4616 | continue; | ||
4617 | |||
4618 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
4619 | if (pass == 0) { | ||
4620 | /* | ||
4621 | * copy data extents to new locations | ||
4622 | */ | ||
4623 | u64 group_start = group->key.objectid; | ||
4624 | ret = relocate_data_extent(reloc_inode, | ||
4625 | extent_key, | ||
4626 | group_start); | ||
4627 | if (ret < 0) | ||
4628 | goto out; | ||
4629 | break; | ||
3581 | } | 4630 | } |
3582 | if (ret < 0) | 4631 | level = 0; |
3583 | goto out; | 4632 | } else { |
3584 | leaf = path->nodes[0]; | 4633 | level = ref_path->owner_objectid; |
3585 | } | 4634 | } |
3586 | 4635 | ||
3587 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 4636 | if (prev_block != ref_path->nodes[level]) { |
3588 | if (found_key.objectid != extent_key->objectid) { | 4637 | struct extent_buffer *eb; |
3589 | break; | 4638 | u64 block_start = ref_path->nodes[level]; |
3590 | } | 4639 | u64 block_size = btrfs_level_size(found_root, level); |
3591 | 4640 | ||
3592 | if (found_key.type != BTRFS_EXTENT_REF_KEY) { | 4641 | eb = read_tree_block(found_root, block_start, |
3593 | break; | 4642 | block_size, 0); |
4643 | btrfs_tree_lock(eb); | ||
4644 | BUG_ON(level != btrfs_header_level(eb)); | ||
4645 | |||
4646 | if (level == 0) | ||
4647 | btrfs_item_key_to_cpu(eb, &first_key, 0); | ||
4648 | else | ||
4649 | btrfs_node_key_to_cpu(eb, &first_key, 0); | ||
4650 | |||
4651 | btrfs_tree_unlock(eb); | ||
4652 | free_extent_buffer(eb); | ||
4653 | prev_block = block_start; | ||
3594 | } | 4654 | } |
3595 | 4655 | ||
3596 | key.offset = found_key.offset + 1; | 4656 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID && |
3597 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | 4657 | pass >= 2) { |
4658 | /* | ||
4659 | * use fallback method to process the remaining | ||
4660 | * references. | ||
4661 | */ | ||
4662 | if (!new_extents) { | ||
4663 | u64 group_start = group->key.objectid; | ||
4664 | ret = get_new_locations(reloc_inode, | ||
4665 | extent_key, | ||
4666 | group_start, 0, | ||
4667 | &new_extents, | ||
4668 | &nr_extents); | ||
4669 | if (ret < 0) | ||
4670 | goto out; | ||
4671 | } | ||
4672 | btrfs_record_root_in_trans(found_root); | ||
4673 | ret = replace_one_extent(trans, found_root, | ||
4674 | path, extent_key, | ||
4675 | &first_key, ref_path, | ||
4676 | new_extents, nr_extents); | ||
4677 | if (ret < 0) | ||
4678 | goto out; | ||
4679 | continue; | ||
4680 | } | ||
3598 | 4681 | ||
3599 | ret = relocate_one_reference(extent_root, path, extent_key, | 4682 | btrfs_record_root_in_trans(found_root); |
3600 | &last_file_objectid, | 4683 | if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { |
3601 | &last_file_offset, | 4684 | ret = relocate_tree_block(trans, found_root, path, |
3602 | &last_file_root, last_extent); | 4685 | &first_key, ref_path); |
3603 | if (ret) | 4686 | } else { |
4687 | /* | ||
4688 | * try to update data extent references while | ||
4689 | * keeping metadata shared between snapshots. | ||
4690 | */ | ||
4691 | ret = relocate_one_path(trans, found_root, path, | ||
4692 | &first_key, ref_path, | ||
4693 | group, reloc_inode); | ||
4694 | } | ||
4695 | if (ret < 0) | ||
3604 | goto out; | 4696 | goto out; |
3605 | last_extent = extent_key->objectid; | ||
3606 | } | 4697 | } |
3607 | ret = 0; | 4698 | ret = 0; |
3608 | out: | 4699 | out: |
3609 | btrfs_release_path(extent_root, path); | 4700 | btrfs_end_transaction(trans, extent_root); |
4701 | kfree(new_extents); | ||
4702 | kfree(ref_path); | ||
4703 | mutex_lock(&extent_root->fs_info->alloc_mutex); | ||
3610 | return ret; | 4704 | return ret; |
3611 | } | 4705 | } |
3612 | 4706 | ||
@@ -3686,84 +4780,155 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root, | |||
3686 | return 0; | 4780 | return 0; |
3687 | } | 4781 | } |
3688 | 4782 | ||
3689 | int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) | 4783 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, |
4784 | struct btrfs_root *root, | ||
4785 | u64 objectid, u64 size) | ||
4786 | { | ||
4787 | struct btrfs_path *path; | ||
4788 | struct btrfs_inode_item *item; | ||
4789 | struct extent_buffer *leaf; | ||
4790 | int ret; | ||
4791 | |||
4792 | path = btrfs_alloc_path(); | ||
4793 | if (!path) | ||
4794 | return -ENOMEM; | ||
4795 | |||
4796 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | ||
4797 | if (ret) | ||
4798 | goto out; | ||
4799 | |||
4800 | leaf = path->nodes[0]; | ||
4801 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | ||
4802 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | ||
4803 | btrfs_set_inode_generation(leaf, item, 1); | ||
4804 | btrfs_set_inode_size(leaf, item, size); | ||
4805 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | ||
4806 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM); | ||
4807 | btrfs_mark_buffer_dirty(leaf); | ||
4808 | btrfs_release_path(root, path); | ||
4809 | out: | ||
4810 | btrfs_free_path(path); | ||
4811 | return ret; | ||
4812 | } | ||
4813 | |||
4814 | static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info, | ||
4815 | struct btrfs_block_group_cache *group) | ||
4816 | { | ||
4817 | struct inode *inode = NULL; | ||
4818 | struct btrfs_trans_handle *trans; | ||
4819 | struct btrfs_root *root; | ||
4820 | struct btrfs_key root_key; | ||
4821 | u64 objectid = BTRFS_FIRST_FREE_OBJECTID; | ||
4822 | int err = 0; | ||
4823 | |||
4824 | root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; | ||
4825 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
4826 | root_key.offset = (u64)-1; | ||
4827 | root = btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
4828 | if (IS_ERR(root)) | ||
4829 | return ERR_CAST(root); | ||
4830 | |||
4831 | trans = btrfs_start_transaction(root, 1); | ||
4832 | BUG_ON(!trans); | ||
4833 | |||
4834 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | ||
4835 | if (err) | ||
4836 | goto out; | ||
4837 | |||
4838 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | ||
4839 | BUG_ON(err); | ||
4840 | |||
4841 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
4842 | group->key.offset, 0); | ||
4843 | BUG_ON(err); | ||
4844 | |||
4845 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); | ||
4846 | if (inode->i_state & I_NEW) { | ||
4847 | BTRFS_I(inode)->root = root; | ||
4848 | BTRFS_I(inode)->location.objectid = objectid; | ||
4849 | BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; | ||
4850 | BTRFS_I(inode)->location.offset = 0; | ||
4851 | btrfs_read_locked_inode(inode); | ||
4852 | unlock_new_inode(inode); | ||
4853 | BUG_ON(is_bad_inode(inode)); | ||
4854 | } else { | ||
4855 | BUG_ON(1); | ||
4856 | } | ||
4857 | |||
4858 | err = btrfs_orphan_add(trans, inode); | ||
4859 | out: | ||
4860 | btrfs_end_transaction(trans, root); | ||
4861 | if (err) { | ||
4862 | if (inode) | ||
4863 | iput(inode); | ||
4864 | inode = ERR_PTR(err); | ||
4865 | } | ||
4866 | return inode; | ||
4867 | } | ||
4868 | |||
4869 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) | ||
3690 | { | 4870 | { |
3691 | struct btrfs_trans_handle *trans; | 4871 | struct btrfs_trans_handle *trans; |
3692 | struct btrfs_root *tree_root = root->fs_info->tree_root; | ||
3693 | struct btrfs_path *path; | 4872 | struct btrfs_path *path; |
4873 | struct btrfs_fs_info *info = root->fs_info; | ||
4874 | struct extent_buffer *leaf; | ||
4875 | struct inode *reloc_inode; | ||
4876 | struct btrfs_block_group_cache *block_group; | ||
4877 | struct btrfs_key key; | ||
3694 | u64 cur_byte; | 4878 | u64 cur_byte; |
3695 | u64 total_found; | 4879 | u64 total_found; |
3696 | u64 shrink_last_byte; | ||
3697 | struct btrfs_block_group_cache *shrink_block_group; | ||
3698 | struct btrfs_key key; | ||
3699 | struct btrfs_key found_key; | ||
3700 | struct extent_buffer *leaf; | ||
3701 | u32 nritems; | 4880 | u32 nritems; |
3702 | int ret; | 4881 | int ret; |
3703 | int progress; | 4882 | int progress; |
4883 | int pass = 0; | ||
3704 | 4884 | ||
3705 | mutex_lock(&root->fs_info->alloc_mutex); | 4885 | root = root->fs_info->extent_root; |
3706 | shrink_block_group = btrfs_lookup_block_group(root->fs_info, | ||
3707 | shrink_start); | ||
3708 | BUG_ON(!shrink_block_group); | ||
3709 | 4886 | ||
3710 | shrink_last_byte = shrink_block_group->key.objectid + | 4887 | block_group = btrfs_lookup_block_group(info, group_start); |
3711 | shrink_block_group->key.offset; | 4888 | BUG_ON(!block_group); |
4889 | |||
4890 | printk("btrfs relocating block group %llu flags %llu\n", | ||
4891 | (unsigned long long)block_group->key.objectid, | ||
4892 | (unsigned long long)block_group->flags); | ||
3712 | 4893 | ||
3713 | shrink_block_group->space_info->total_bytes -= | ||
3714 | shrink_block_group->key.offset; | ||
3715 | path = btrfs_alloc_path(); | 4894 | path = btrfs_alloc_path(); |
3716 | root = root->fs_info->extent_root; | 4895 | BUG_ON(!path); |
3717 | path->reada = 2; | ||
3718 | 4896 | ||
3719 | printk("btrfs relocating block group %llu flags %llu\n", | 4897 | reloc_inode = create_reloc_inode(info, block_group); |
3720 | (unsigned long long)shrink_start, | 4898 | BUG_ON(IS_ERR(reloc_inode)); |
3721 | (unsigned long long)shrink_block_group->flags); | ||
3722 | 4899 | ||
3723 | __alloc_chunk_for_shrink(root, shrink_block_group, 1); | 4900 | mutex_lock(&root->fs_info->alloc_mutex); |
3724 | 4901 | ||
3725 | again: | 4902 | __alloc_chunk_for_shrink(root, block_group, 1); |
4903 | block_group->ro = 1; | ||
4904 | block_group->space_info->total_bytes -= block_group->key.offset; | ||
3726 | 4905 | ||
3727 | shrink_block_group->ro = 1; | 4906 | mutex_unlock(&root->fs_info->alloc_mutex); |
3728 | 4907 | ||
4908 | btrfs_start_delalloc_inodes(info->tree_root); | ||
4909 | btrfs_wait_ordered_extents(info->tree_root, 0); | ||
4910 | again: | ||
3729 | total_found = 0; | 4911 | total_found = 0; |
3730 | progress = 0; | 4912 | progress = 0; |
3731 | key.objectid = shrink_start; | 4913 | key.objectid = block_group->key.objectid; |
3732 | key.offset = 0; | 4914 | key.offset = 0; |
3733 | key.type = 0; | 4915 | key.type = 0; |
3734 | cur_byte = key.objectid; | 4916 | cur_byte = key.objectid; |
3735 | 4917 | ||
3736 | mutex_unlock(&root->fs_info->alloc_mutex); | 4918 | trans = btrfs_start_transaction(info->tree_root, 1); |
4919 | btrfs_commit_transaction(trans, info->tree_root); | ||
3737 | 4920 | ||
3738 | btrfs_start_delalloc_inodes(root); | 4921 | mutex_lock(&root->fs_info->cleaner_mutex); |
3739 | btrfs_wait_ordered_extents(tree_root, 0); | 4922 | btrfs_clean_old_snapshots(info->tree_root); |
4923 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); | ||
4924 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
3740 | 4925 | ||
3741 | mutex_lock(&root->fs_info->alloc_mutex); | 4926 | mutex_lock(&root->fs_info->alloc_mutex); |
3742 | 4927 | ||
3743 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
3744 | if (ret < 0) | ||
3745 | goto out; | ||
3746 | |||
3747 | ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); | ||
3748 | if (ret < 0) | ||
3749 | goto out; | ||
3750 | |||
3751 | if (ret == 0) { | ||
3752 | leaf = path->nodes[0]; | ||
3753 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
3754 | if (found_key.objectid + found_key.offset > shrink_start && | ||
3755 | found_key.objectid < shrink_last_byte) { | ||
3756 | cur_byte = found_key.objectid; | ||
3757 | key.objectid = cur_byte; | ||
3758 | } | ||
3759 | } | ||
3760 | btrfs_release_path(root, path); | ||
3761 | |||
3762 | while(1) { | 4928 | while(1) { |
3763 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4929 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
3764 | if (ret < 0) | 4930 | if (ret < 0) |
3765 | goto out; | 4931 | goto out; |
3766 | |||
3767 | next: | 4932 | next: |
3768 | leaf = path->nodes[0]; | 4933 | leaf = path->nodes[0]; |
3769 | nritems = btrfs_header_nritems(leaf); | 4934 | nritems = btrfs_header_nritems(leaf); |
@@ -3779,109 +4944,76 @@ next: | |||
3779 | nritems = btrfs_header_nritems(leaf); | 4944 | nritems = btrfs_header_nritems(leaf); |
3780 | } | 4945 | } |
3781 | 4946 | ||
3782 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 4947 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
3783 | 4948 | ||
3784 | if (found_key.objectid >= shrink_last_byte) | 4949 | if (key.objectid >= block_group->key.objectid + |
4950 | block_group->key.offset) | ||
3785 | break; | 4951 | break; |
3786 | 4952 | ||
3787 | if (progress && need_resched()) { | 4953 | if (progress && need_resched()) { |
3788 | memcpy(&key, &found_key, sizeof(key)); | ||
3789 | cond_resched(); | ||
3790 | btrfs_release_path(root, path); | 4954 | btrfs_release_path(root, path); |
3791 | btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4955 | mutex_unlock(&root->fs_info->alloc_mutex); |
4956 | cond_resched(); | ||
4957 | mutex_lock(&root->fs_info->alloc_mutex); | ||
3792 | progress = 0; | 4958 | progress = 0; |
3793 | goto next; | 4959 | continue; |
3794 | } | 4960 | } |
3795 | progress = 1; | 4961 | progress = 1; |
3796 | 4962 | ||
3797 | if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || | 4963 | if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || |
3798 | found_key.objectid + found_key.offset <= cur_byte) { | 4964 | key.objectid + key.offset <= cur_byte) { |
3799 | memcpy(&key, &found_key, sizeof(key)); | ||
3800 | key.offset++; | ||
3801 | path->slots[0]++; | 4965 | path->slots[0]++; |
3802 | goto next; | 4966 | goto next; |
3803 | } | 4967 | } |
3804 | 4968 | ||
3805 | total_found++; | 4969 | total_found++; |
3806 | cur_byte = found_key.objectid + found_key.offset; | 4970 | cur_byte = key.objectid + key.offset; |
3807 | key.objectid = cur_byte; | ||
3808 | btrfs_release_path(root, path); | 4971 | btrfs_release_path(root, path); |
3809 | ret = relocate_one_extent(root, path, &found_key); | ||
3810 | __alloc_chunk_for_shrink(root, shrink_block_group, 0); | ||
3811 | } | ||
3812 | |||
3813 | btrfs_release_path(root, path); | ||
3814 | |||
3815 | if (total_found > 0) { | ||
3816 | printk("btrfs relocate found %llu last extent was %llu\n", | ||
3817 | (unsigned long long)total_found, | ||
3818 | (unsigned long long)found_key.objectid); | ||
3819 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
3820 | trans = btrfs_start_transaction(tree_root, 1); | ||
3821 | btrfs_commit_transaction(trans, tree_root); | ||
3822 | |||
3823 | btrfs_clean_old_snapshots(tree_root); | ||
3824 | 4972 | ||
3825 | btrfs_start_delalloc_inodes(root); | 4973 | __alloc_chunk_for_shrink(root, block_group, 0); |
3826 | btrfs_wait_ordered_extents(tree_root, 0); | 4974 | ret = relocate_one_extent(root, path, &key, block_group, |
4975 | reloc_inode, pass); | ||
4976 | BUG_ON(ret < 0); | ||
3827 | 4977 | ||
3828 | trans = btrfs_start_transaction(tree_root, 1); | 4978 | key.objectid = cur_byte; |
3829 | btrfs_commit_transaction(trans, tree_root); | 4979 | key.type = 0; |
3830 | mutex_lock(&root->fs_info->alloc_mutex); | 4980 | key.offset = 0; |
3831 | goto again; | ||
3832 | } | 4981 | } |
3833 | 4982 | ||
3834 | /* | 4983 | btrfs_release_path(root, path); |
3835 | * we've freed all the extents, now remove the block | ||
3836 | * group item from the tree | ||
3837 | */ | ||
3838 | mutex_unlock(&root->fs_info->alloc_mutex); | 4984 | mutex_unlock(&root->fs_info->alloc_mutex); |
3839 | 4985 | ||
3840 | trans = btrfs_start_transaction(root, 1); | 4986 | if (pass == 0) { |
3841 | 4987 | btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); | |
3842 | mutex_lock(&root->fs_info->alloc_mutex); | 4988 | invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); |
3843 | memcpy(&key, &shrink_block_group->key, sizeof(key)); | 4989 | WARN_ON(reloc_inode->i_mapping->nrpages); |
3844 | |||
3845 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
3846 | if (ret > 0) | ||
3847 | ret = -EIO; | ||
3848 | if (ret < 0) { | ||
3849 | btrfs_end_transaction(trans, root); | ||
3850 | goto out; | ||
3851 | } | 4990 | } |
3852 | 4991 | ||
3853 | spin_lock(&root->fs_info->block_group_cache_lock); | 4992 | if (total_found > 0) { |
3854 | rb_erase(&shrink_block_group->cache_node, | 4993 | printk("btrfs found %llu extents in pass %d\n", |
3855 | &root->fs_info->block_group_cache_tree); | 4994 | (unsigned long long)total_found, pass); |
3856 | spin_unlock(&root->fs_info->block_group_cache_lock); | 4995 | pass++; |
3857 | 4996 | goto again; | |
3858 | ret = btrfs_remove_free_space(shrink_block_group, key.objectid, | ||
3859 | key.offset); | ||
3860 | if (ret) { | ||
3861 | btrfs_end_transaction(trans, root); | ||
3862 | goto out; | ||
3863 | } | 4997 | } |
3864 | /* | ||
3865 | memset(shrink_block_group, 0, sizeof(*shrink_block_group)); | ||
3866 | kfree(shrink_block_group); | ||
3867 | */ | ||
3868 | 4998 | ||
3869 | btrfs_del_item(trans, root, path); | 4999 | /* delete reloc_inode */ |
3870 | btrfs_release_path(root, path); | 5000 | iput(reloc_inode); |
3871 | mutex_unlock(&root->fs_info->alloc_mutex); | 5001 | |
3872 | btrfs_commit_transaction(trans, root); | 5002 | /* unpin extents in this range */ |
5003 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
5004 | btrfs_commit_transaction(trans, info->tree_root); | ||
3873 | 5005 | ||
3874 | mutex_lock(&root->fs_info->alloc_mutex); | 5006 | mutex_lock(&root->fs_info->alloc_mutex); |
3875 | 5007 | ||
3876 | /* the code to unpin extents might set a few bits in the free | 5008 | spin_lock(&block_group->lock); |
3877 | * space cache for this range again | 5009 | WARN_ON(block_group->pinned > 0); |
3878 | */ | 5010 | WARN_ON(block_group->reserved > 0); |
3879 | /* XXX? */ | 5011 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); |
3880 | ret = btrfs_remove_free_space(shrink_block_group, key.objectid, | 5012 | spin_unlock(&block_group->lock); |
3881 | key.offset); | 5013 | ret = 0; |
3882 | out: | 5014 | out: |
3883 | btrfs_free_path(path); | ||
3884 | mutex_unlock(&root->fs_info->alloc_mutex); | 5015 | mutex_unlock(&root->fs_info->alloc_mutex); |
5016 | btrfs_free_path(path); | ||
3885 | return ret; | 5017 | return ret; |
3886 | } | 5018 | } |
3887 | 5019 | ||
@@ -3922,6 +5054,33 @@ out: | |||
3922 | return ret; | 5054 | return ret; |
3923 | } | 5055 | } |
3924 | 5056 | ||
5057 | int btrfs_free_block_groups(struct btrfs_fs_info *info) | ||
5058 | { | ||
5059 | struct btrfs_block_group_cache *block_group; | ||
5060 | struct rb_node *n; | ||
5061 | |||
5062 | mutex_lock(&info->alloc_mutex); | ||
5063 | spin_lock(&info->block_group_cache_lock); | ||
5064 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { | ||
5065 | block_group = rb_entry(n, struct btrfs_block_group_cache, | ||
5066 | cache_node); | ||
5067 | |||
5068 | spin_unlock(&info->block_group_cache_lock); | ||
5069 | btrfs_remove_free_space_cache(block_group); | ||
5070 | spin_lock(&info->block_group_cache_lock); | ||
5071 | |||
5072 | rb_erase(&block_group->cache_node, | ||
5073 | &info->block_group_cache_tree); | ||
5074 | spin_lock(&block_group->space_info->lock); | ||
5075 | list_del(&block_group->list); | ||
5076 | spin_unlock(&block_group->space_info->lock); | ||
5077 | kfree(block_group); | ||
5078 | } | ||
5079 | spin_unlock(&info->block_group_cache_lock); | ||
5080 | mutex_unlock(&info->alloc_mutex); | ||
5081 | return 0; | ||
5082 | } | ||
5083 | |||
3925 | int btrfs_read_block_groups(struct btrfs_root *root) | 5084 | int btrfs_read_block_groups(struct btrfs_root *root) |
3926 | { | 5085 | { |
3927 | struct btrfs_path *path; | 5086 | struct btrfs_path *path; |
@@ -4039,3 +5198,46 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
4039 | 5198 | ||
4040 | return 0; | 5199 | return 0; |
4041 | } | 5200 | } |
5201 | |||
5202 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | ||
5203 | struct btrfs_root *root, u64 group_start) | ||
5204 | { | ||
5205 | struct btrfs_path *path; | ||
5206 | struct btrfs_block_group_cache *block_group; | ||
5207 | struct btrfs_key key; | ||
5208 | int ret; | ||
5209 | |||
5210 | BUG_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); | ||
5211 | root = root->fs_info->extent_root; | ||
5212 | |||
5213 | block_group = btrfs_lookup_block_group(root->fs_info, group_start); | ||
5214 | BUG_ON(!block_group); | ||
5215 | |||
5216 | memcpy(&key, &block_group->key, sizeof(key)); | ||
5217 | |||
5218 | path = btrfs_alloc_path(); | ||
5219 | BUG_ON(!path); | ||
5220 | |||
5221 | btrfs_remove_free_space_cache(block_group); | ||
5222 | rb_erase(&block_group->cache_node, | ||
5223 | &root->fs_info->block_group_cache_tree); | ||
5224 | spin_lock(&block_group->space_info->lock); | ||
5225 | list_del(&block_group->list); | ||
5226 | spin_unlock(&block_group->space_info->lock); | ||
5227 | |||
5228 | /* | ||
5229 | memset(shrink_block_group, 0, sizeof(*shrink_block_group)); | ||
5230 | kfree(shrink_block_group); | ||
5231 | */ | ||
5232 | |||
5233 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
5234 | if (ret > 0) | ||
5235 | ret = -EIO; | ||
5236 | if (ret < 0) | ||
5237 | goto out; | ||
5238 | |||
5239 | ret = btrfs_del_item(trans, root, path); | ||
5240 | out: | ||
5241 | btrfs_free_path(path); | ||
5242 | return ret; | ||
5243 | } | ||