aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/relocation.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/relocation.c')
-rw-r--r--fs/btrfs/relocation.c280
1 files changed, 184 insertions, 96 deletions
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index c04f7f212602..361ad323faac 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -121,6 +121,15 @@ struct inodevec {
121 int nr; 121 int nr;
122}; 122};
123 123
124#define MAX_EXTENTS 128
125
126struct file_extent_cluster {
127 u64 start;
128 u64 end;
129 u64 boundary[MAX_EXTENTS];
130 unsigned int nr;
131};
132
124struct reloc_control { 133struct reloc_control {
125 /* block group to relocate */ 134 /* block group to relocate */
126 struct btrfs_block_group_cache *block_group; 135 struct btrfs_block_group_cache *block_group;
@@ -2180,7 +2189,7 @@ static int tree_block_processed(u64 bytenr, u32 blocksize,
2180 struct reloc_control *rc) 2189 struct reloc_control *rc)
2181{ 2190{
2182 if (test_range_bit(&rc->processed_blocks, bytenr, 2191 if (test_range_bit(&rc->processed_blocks, bytenr,
2183 bytenr + blocksize - 1, EXTENT_DIRTY, 1)) 2192 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2184 return 1; 2193 return 1;
2185 return 0; 2194 return 0;
2186} 2195}
@@ -2529,56 +2538,94 @@ out:
2529} 2538}
2530 2539
2531static noinline_for_stack 2540static noinline_for_stack
2532int relocate_inode_pages(struct inode *inode, u64 start, u64 len) 2541int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2542 u64 block_start)
2543{
2544 struct btrfs_root *root = BTRFS_I(inode)->root;
2545 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2546 struct extent_map *em;
2547 int ret = 0;
2548
2549 em = alloc_extent_map(GFP_NOFS);
2550 if (!em)
2551 return -ENOMEM;
2552
2553 em->start = start;
2554 em->len = end + 1 - start;
2555 em->block_len = em->len;
2556 em->block_start = block_start;
2557 em->bdev = root->fs_info->fs_devices->latest_bdev;
2558 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2559
2560 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2561 while (1) {
2562 write_lock(&em_tree->lock);
2563 ret = add_extent_mapping(em_tree, em);
2564 write_unlock(&em_tree->lock);
2565 if (ret != -EEXIST) {
2566 free_extent_map(em);
2567 break;
2568 }
2569 btrfs_drop_extent_cache(inode, start, end, 0);
2570 }
2571 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2572 return ret;
2573}
2574
2575static int relocate_file_extent_cluster(struct inode *inode,
2576 struct file_extent_cluster *cluster)
2533{ 2577{
2534 u64 page_start; 2578 u64 page_start;
2535 u64 page_end; 2579 u64 page_end;
2536 unsigned long i; 2580 u64 offset = BTRFS_I(inode)->index_cnt;
2537 unsigned long first_index; 2581 unsigned long index;
2538 unsigned long last_index; 2582 unsigned long last_index;
2539 unsigned int total_read = 0; 2583 unsigned int dirty_page = 0;
2540 unsigned int total_dirty = 0;
2541 struct page *page; 2584 struct page *page;
2542 struct file_ra_state *ra; 2585 struct file_ra_state *ra;
2543 struct btrfs_ordered_extent *ordered; 2586 int nr = 0;
2544 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2545 int ret = 0; 2587 int ret = 0;
2546 2588
2589 if (!cluster->nr)
2590 return 0;
2591
2547 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2592 ra = kzalloc(sizeof(*ra), GFP_NOFS);
2548 if (!ra) 2593 if (!ra)
2549 return -ENOMEM; 2594 return -ENOMEM;
2550 2595
2596 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
2597 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
2598
2551 mutex_lock(&inode->i_mutex); 2599 mutex_lock(&inode->i_mutex);
2552 first_index = start >> PAGE_CACHE_SHIFT;
2553 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2554 2600
2555 /* make sure the dirty trick played by the caller work */ 2601 i_size_write(inode, cluster->end + 1 - offset);
2556 while (1) { 2602 ret = setup_extent_mapping(inode, cluster->start - offset,
2557 ret = invalidate_inode_pages2_range(inode->i_mapping, 2603 cluster->end - offset, cluster->start);
2558 first_index, last_index);
2559 if (ret != -EBUSY)
2560 break;
2561 schedule_timeout(HZ/10);
2562 }
2563 if (ret) 2604 if (ret)
2564 goto out_unlock; 2605 goto out_unlock;
2565 2606
2566 file_ra_state_init(ra, inode->i_mapping); 2607 file_ra_state_init(ra, inode->i_mapping);
2567 2608
2568 for (i = first_index ; i <= last_index; i++) { 2609 WARN_ON(cluster->start != cluster->boundary[0]);
2569 if (total_read % ra->ra_pages == 0) { 2610 while (index <= last_index) {
2570 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 2611 page = find_lock_page(inode->i_mapping, index);
2571 min(last_index, ra->ra_pages + i - 1));
2572 }
2573 total_read++;
2574again:
2575 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
2576 BUG_ON(1);
2577 page = grab_cache_page(inode->i_mapping, i);
2578 if (!page) { 2612 if (!page) {
2579 ret = -ENOMEM; 2613 page_cache_sync_readahead(inode->i_mapping,
2580 goto out_unlock; 2614 ra, NULL, index,
2615 last_index + 1 - index);
2616 page = grab_cache_page(inode->i_mapping, index);
2617 if (!page) {
2618 ret = -ENOMEM;
2619 goto out_unlock;
2620 }
2621 }
2622
2623 if (PageReadahead(page)) {
2624 page_cache_async_readahead(inode->i_mapping,
2625 ra, NULL, page, index,
2626 last_index + 1 - index);
2581 } 2627 }
2628
2582 if (!PageUptodate(page)) { 2629 if (!PageUptodate(page)) {
2583 btrfs_readpage(NULL, page); 2630 btrfs_readpage(NULL, page);
2584 lock_page(page); 2631 lock_page(page);
@@ -2589,75 +2636,79 @@ again:
2589 goto out_unlock; 2636 goto out_unlock;
2590 } 2637 }
2591 } 2638 }
2592 wait_on_page_writeback(page);
2593 2639
2594 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2640 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2595 page_end = page_start + PAGE_CACHE_SIZE - 1; 2641 page_end = page_start + PAGE_CACHE_SIZE - 1;
2596 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 2642
2597 2643 lock_extent(&BTRFS_I(inode)->io_tree,
2598 ordered = btrfs_lookup_ordered_extent(inode, page_start); 2644 page_start, page_end, GFP_NOFS);
2599 if (ordered) { 2645
2600 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2601 unlock_page(page);
2602 page_cache_release(page);
2603 btrfs_start_ordered_extent(inode, ordered, 1);
2604 btrfs_put_ordered_extent(ordered);
2605 goto again;
2606 }
2607 set_page_extent_mapped(page); 2646 set_page_extent_mapped(page);
2608 2647
2609 if (i == first_index) 2648 if (nr < cluster->nr &&
2610 set_extent_bits(io_tree, page_start, page_end, 2649 page_start + offset == cluster->boundary[nr]) {
2650 set_extent_bits(&BTRFS_I(inode)->io_tree,
2651 page_start, page_end,
2611 EXTENT_BOUNDARY, GFP_NOFS); 2652 EXTENT_BOUNDARY, GFP_NOFS);
2653 nr++;
2654 }
2612 btrfs_set_extent_delalloc(inode, page_start, page_end); 2655 btrfs_set_extent_delalloc(inode, page_start, page_end);
2613 2656
2614 set_page_dirty(page); 2657 set_page_dirty(page);
2615 total_dirty++; 2658 dirty_page++;
2616 2659
2617 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 2660 unlock_extent(&BTRFS_I(inode)->io_tree,
2661 page_start, page_end, GFP_NOFS);
2618 unlock_page(page); 2662 unlock_page(page);
2619 page_cache_release(page); 2663 page_cache_release(page);
2664
2665 index++;
2666 if (nr < cluster->nr &&
2667 page_end + 1 + offset == cluster->boundary[nr]) {
2668 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2669 dirty_page);
2670 dirty_page = 0;
2671 }
2672 }
2673 if (dirty_page) {
2674 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2675 dirty_page);
2620 } 2676 }
2677 WARN_ON(nr != cluster->nr);
2621out_unlock: 2678out_unlock:
2622 mutex_unlock(&inode->i_mutex); 2679 mutex_unlock(&inode->i_mutex);
2623 kfree(ra); 2680 kfree(ra);
2624 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
2625 return ret; 2681 return ret;
2626} 2682}
2627 2683
2628static noinline_for_stack 2684static noinline_for_stack
2629int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) 2685int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
2686 struct file_extent_cluster *cluster)
2630{ 2687{
2631 struct btrfs_root *root = BTRFS_I(inode)->root; 2688 int ret;
2632 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2633 struct extent_map *em;
2634 u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt;
2635 u64 end = start + extent_key->offset - 1;
2636
2637 em = alloc_extent_map(GFP_NOFS);
2638 em->start = start;
2639 em->len = extent_key->offset;
2640 em->block_len = extent_key->offset;
2641 em->block_start = extent_key->objectid;
2642 em->bdev = root->fs_info->fs_devices->latest_bdev;
2643 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2644 2689
2645 /* setup extent map to cheat btrfs_readpage */ 2690 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
2646 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2691 ret = relocate_file_extent_cluster(inode, cluster);
2647 while (1) { 2692 if (ret)
2648 int ret; 2693 return ret;
2649 spin_lock(&em_tree->lock); 2694 cluster->nr = 0;
2650 ret = add_extent_mapping(em_tree, em);
2651 spin_unlock(&em_tree->lock);
2652 if (ret != -EEXIST) {
2653 free_extent_map(em);
2654 break;
2655 }
2656 btrfs_drop_extent_cache(inode, start, end, 0);
2657 } 2695 }
2658 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2659 2696
2660 return relocate_inode_pages(inode, start, extent_key->offset); 2697 if (!cluster->nr)
2698 cluster->start = extent_key->objectid;
2699 else
2700 BUG_ON(cluster->nr >= MAX_EXTENTS);
2701 cluster->end = extent_key->objectid + extent_key->offset - 1;
2702 cluster->boundary[cluster->nr] = extent_key->objectid;
2703 cluster->nr++;
2704
2705 if (cluster->nr >= MAX_EXTENTS) {
2706 ret = relocate_file_extent_cluster(inode, cluster);
2707 if (ret)
2708 return ret;
2709 cluster->nr = 0;
2710 }
2711 return 0;
2661} 2712}
2662 2713
2663#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2714#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
@@ -3203,10 +3254,12 @@ static int check_extent_flags(u64 flags)
3203 return 0; 3254 return 0;
3204} 3255}
3205 3256
3257
3206static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 3258static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3207{ 3259{
3208 struct rb_root blocks = RB_ROOT; 3260 struct rb_root blocks = RB_ROOT;
3209 struct btrfs_key key; 3261 struct btrfs_key key;
3262 struct file_extent_cluster *cluster;
3210 struct btrfs_trans_handle *trans = NULL; 3263 struct btrfs_trans_handle *trans = NULL;
3211 struct btrfs_path *path; 3264 struct btrfs_path *path;
3212 struct btrfs_extent_item *ei; 3265 struct btrfs_extent_item *ei;
@@ -3216,10 +3269,17 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3216 int ret; 3269 int ret;
3217 int err = 0; 3270 int err = 0;
3218 3271
3272 cluster = kzalloc(sizeof(*cluster), GFP_NOFS);
3273 if (!cluster)
3274 return -ENOMEM;
3275
3219 path = btrfs_alloc_path(); 3276 path = btrfs_alloc_path();
3220 if (!path) 3277 if (!path)
3221 return -ENOMEM; 3278 return -ENOMEM;
3222 3279
3280 rc->extents_found = 0;
3281 rc->extents_skipped = 0;
3282
3223 rc->search_start = rc->block_group->key.objectid; 3283 rc->search_start = rc->block_group->key.objectid;
3224 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3284 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
3225 GFP_NOFS); 3285 GFP_NOFS);
@@ -3306,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3306 } 3366 }
3307 3367
3308 nr = trans->blocks_used; 3368 nr = trans->blocks_used;
3309 btrfs_end_transaction_throttle(trans, rc->extent_root); 3369 btrfs_end_transaction(trans, rc->extent_root);
3310 trans = NULL; 3370 trans = NULL;
3311 btrfs_btree_balance_dirty(rc->extent_root, nr); 3371 btrfs_btree_balance_dirty(rc->extent_root, nr);
3312 3372
3313 if (rc->stage == MOVE_DATA_EXTENTS && 3373 if (rc->stage == MOVE_DATA_EXTENTS &&
3314 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3374 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3315 rc->found_file_extent = 1; 3375 rc->found_file_extent = 1;
3316 ret = relocate_data_extent(rc->data_inode, &key); 3376 ret = relocate_data_extent(rc->data_inode,
3377 &key, cluster);
3317 if (ret < 0) { 3378 if (ret < 0) {
3318 err = ret; 3379 err = ret;
3319 break; 3380 break;
@@ -3328,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3328 btrfs_btree_balance_dirty(rc->extent_root, nr); 3389 btrfs_btree_balance_dirty(rc->extent_root, nr);
3329 } 3390 }
3330 3391
3392 if (!err) {
3393 ret = relocate_file_extent_cluster(rc->data_inode, cluster);
3394 if (ret < 0)
3395 err = ret;
3396 }
3397
3398 kfree(cluster);
3399
3331 rc->create_reloc_root = 0; 3400 rc->create_reloc_root = 0;
3332 smp_mb(); 3401 smp_mb();
3333 3402
@@ -3348,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3348} 3417}
3349 3418
3350static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 3419static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3351 struct btrfs_root *root, 3420 struct btrfs_root *root, u64 objectid)
3352 u64 objectid, u64 size)
3353{ 3421{
3354 struct btrfs_path *path; 3422 struct btrfs_path *path;
3355 struct btrfs_inode_item *item; 3423 struct btrfs_inode_item *item;
@@ -3368,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3368 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 3436 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3369 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 3437 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
3370 btrfs_set_inode_generation(leaf, item, 1); 3438 btrfs_set_inode_generation(leaf, item, 1);
3371 btrfs_set_inode_size(leaf, item, size); 3439 btrfs_set_inode_size(leaf, item, 0);
3372 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 3440 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3373 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 3441 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
3374 btrfs_mark_buffer_dirty(leaf); 3442 btrfs_mark_buffer_dirty(leaf);
@@ -3404,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3404 if (err) 3472 if (err)
3405 goto out; 3473 goto out;
3406 3474
3407 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 3475 err = __insert_orphan_inode(trans, root, objectid);
3408 BUG_ON(err);
3409
3410 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
3411 group->key.offset, 0, group->key.offset,
3412 0, 0, 0);
3413 BUG_ON(err); 3476 BUG_ON(err);
3414 3477
3415 key.objectid = objectid; 3478 key.objectid = objectid;
@@ -3475,14 +3538,15 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3475 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 3538 btrfs_wait_ordered_extents(fs_info->tree_root, 0);
3476 3539
3477 while (1) { 3540 while (1) {
3478 mutex_lock(&fs_info->cleaner_mutex);
3479 btrfs_clean_old_snapshots(fs_info->tree_root);
3480 mutex_unlock(&fs_info->cleaner_mutex);
3481
3482 rc->extents_found = 0; 3541 rc->extents_found = 0;
3483 rc->extents_skipped = 0; 3542 rc->extents_skipped = 0;
3484 3543
3544 mutex_lock(&fs_info->cleaner_mutex);
3545
3546 btrfs_clean_old_snapshots(fs_info->tree_root);
3485 ret = relocate_block_group(rc); 3547 ret = relocate_block_group(rc);
3548
3549 mutex_unlock(&fs_info->cleaner_mutex);
3486 if (ret < 0) { 3550 if (ret < 0) {
3487 err = ret; 3551 err = ret;
3488 break; 3552 break;
@@ -3514,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3514 } 3578 }
3515 } 3579 }
3516 3580
3517 filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, 3581 filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
3518 rc->block_group->key.objectid, 3582 rc->block_group->key.objectid,
3519 rc->block_group->key.objectid + 3583 rc->block_group->key.objectid +
3520 rc->block_group->key.offset - 1); 3584 rc->block_group->key.offset - 1);
3521 3585
3522 WARN_ON(rc->block_group->pinned > 0); 3586 WARN_ON(rc->block_group->pinned > 0);
3523 WARN_ON(rc->block_group->reserved > 0); 3587 WARN_ON(rc->block_group->reserved > 0);
@@ -3530,6 +3594,26 @@ out:
3530 return err; 3594 return err;
3531} 3595}
3532 3596
3597static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
3598{
3599 struct btrfs_trans_handle *trans;
3600 int ret;
3601
3602 trans = btrfs_start_transaction(root->fs_info->tree_root, 1);
3603
3604 memset(&root->root_item.drop_progress, 0,
3605 sizeof(root->root_item.drop_progress));
3606 root->root_item.drop_level = 0;
3607 btrfs_set_root_refs(&root->root_item, 0);
3608 ret = btrfs_update_root(trans, root->fs_info->tree_root,
3609 &root->root_key, &root->root_item);
3610 BUG_ON(ret);
3611
3612 ret = btrfs_end_transaction(trans, root->fs_info->tree_root);
3613 BUG_ON(ret);
3614 return 0;
3615}
3616
3533/* 3617/*
3534 * recover relocation interrupted by system crash. 3618 * recover relocation interrupted by system crash.
3535 * 3619 *
@@ -3589,8 +3673,12 @@ int btrfs_recover_relocation(struct btrfs_root *root)
3589 fs_root = read_fs_root(root->fs_info, 3673 fs_root = read_fs_root(root->fs_info,
3590 reloc_root->root_key.offset); 3674 reloc_root->root_key.offset);
3591 if (IS_ERR(fs_root)) { 3675 if (IS_ERR(fs_root)) {
3592 err = PTR_ERR(fs_root); 3676 ret = PTR_ERR(fs_root);
3593 goto out; 3677 if (ret != -ENOENT) {
3678 err = ret;
3679 goto out;
3680 }
3681 mark_garbage_root(reloc_root);
3594 } 3682 }
3595 } 3683 }
3596 3684