aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/relocation.c237
1 files changed, 148 insertions, 89 deletions
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 48a504260635..361ad323faac 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -121,6 +121,15 @@ struct inodevec {
121 int nr; 121 int nr;
122}; 122};
123 123
124#define MAX_EXTENTS 128
125
126struct file_extent_cluster {
127 u64 start;
128 u64 end;
129 u64 boundary[MAX_EXTENTS];
130 unsigned int nr;
131};
132
124struct reloc_control { 133struct reloc_control {
125 /* block group to relocate */ 134 /* block group to relocate */
126 struct btrfs_block_group_cache *block_group; 135 struct btrfs_block_group_cache *block_group;
@@ -2529,56 +2538,94 @@ out:
2529} 2538}
2530 2539
2531static noinline_for_stack 2540static noinline_for_stack
2532int relocate_inode_pages(struct inode *inode, u64 start, u64 len) 2541int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2542 u64 block_start)
2543{
2544 struct btrfs_root *root = BTRFS_I(inode)->root;
2545 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2546 struct extent_map *em;
2547 int ret = 0;
2548
2549 em = alloc_extent_map(GFP_NOFS);
2550 if (!em)
2551 return -ENOMEM;
2552
2553 em->start = start;
2554 em->len = end + 1 - start;
2555 em->block_len = em->len;
2556 em->block_start = block_start;
2557 em->bdev = root->fs_info->fs_devices->latest_bdev;
2558 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2559
2560 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2561 while (1) {
2562 write_lock(&em_tree->lock);
2563 ret = add_extent_mapping(em_tree, em);
2564 write_unlock(&em_tree->lock);
2565 if (ret != -EEXIST) {
2566 free_extent_map(em);
2567 break;
2568 }
2569 btrfs_drop_extent_cache(inode, start, end, 0);
2570 }
2571 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2572 return ret;
2573}
2574
2575static int relocate_file_extent_cluster(struct inode *inode,
2576 struct file_extent_cluster *cluster)
2533{ 2577{
2534 u64 page_start; 2578 u64 page_start;
2535 u64 page_end; 2579 u64 page_end;
2536 unsigned long i; 2580 u64 offset = BTRFS_I(inode)->index_cnt;
2537 unsigned long first_index; 2581 unsigned long index;
2538 unsigned long last_index; 2582 unsigned long last_index;
2539 unsigned int total_read = 0; 2583 unsigned int dirty_page = 0;
2540 unsigned int total_dirty = 0;
2541 struct page *page; 2584 struct page *page;
2542 struct file_ra_state *ra; 2585 struct file_ra_state *ra;
2543 struct btrfs_ordered_extent *ordered; 2586 int nr = 0;
2544 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2545 int ret = 0; 2587 int ret = 0;
2546 2588
2589 if (!cluster->nr)
2590 return 0;
2591
2547 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2592 ra = kzalloc(sizeof(*ra), GFP_NOFS);
2548 if (!ra) 2593 if (!ra)
2549 return -ENOMEM; 2594 return -ENOMEM;
2550 2595
2596 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
2597 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
2598
2551 mutex_lock(&inode->i_mutex); 2599 mutex_lock(&inode->i_mutex);
2552 first_index = start >> PAGE_CACHE_SHIFT;
2553 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2554 2600
2555 /* make sure the dirty trick played by the caller work */ 2601 i_size_write(inode, cluster->end + 1 - offset);
2556 while (1) { 2602 ret = setup_extent_mapping(inode, cluster->start - offset,
2557 ret = invalidate_inode_pages2_range(inode->i_mapping, 2603 cluster->end - offset, cluster->start);
2558 first_index, last_index);
2559 if (ret != -EBUSY)
2560 break;
2561 schedule_timeout(HZ/10);
2562 }
2563 if (ret) 2604 if (ret)
2564 goto out_unlock; 2605 goto out_unlock;
2565 2606
2566 file_ra_state_init(ra, inode->i_mapping); 2607 file_ra_state_init(ra, inode->i_mapping);
2567 2608
2568 for (i = first_index ; i <= last_index; i++) { 2609 WARN_ON(cluster->start != cluster->boundary[0]);
2569 if (total_read % ra->ra_pages == 0) { 2610 while (index <= last_index) {
2570 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 2611 page = find_lock_page(inode->i_mapping, index);
2571 min(last_index, ra->ra_pages + i - 1));
2572 }
2573 total_read++;
2574again:
2575 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
2576 BUG_ON(1);
2577 page = grab_cache_page(inode->i_mapping, i);
2578 if (!page) { 2612 if (!page) {
2579 ret = -ENOMEM; 2613 page_cache_sync_readahead(inode->i_mapping,
2580 goto out_unlock; 2614 ra, NULL, index,
2615 last_index + 1 - index);
2616 page = grab_cache_page(inode->i_mapping, index);
2617 if (!page) {
2618 ret = -ENOMEM;
2619 goto out_unlock;
2620 }
2621 }
2622
2623 if (PageReadahead(page)) {
2624 page_cache_async_readahead(inode->i_mapping,
2625 ra, NULL, page, index,
2626 last_index + 1 - index);
2581 } 2627 }
2628
2582 if (!PageUptodate(page)) { 2629 if (!PageUptodate(page)) {
2583 btrfs_readpage(NULL, page); 2630 btrfs_readpage(NULL, page);
2584 lock_page(page); 2631 lock_page(page);
@@ -2589,75 +2636,79 @@ again:
2589 goto out_unlock; 2636 goto out_unlock;
2590 } 2637 }
2591 } 2638 }
2592 wait_on_page_writeback(page);
2593 2639
2594 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2640 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2595 page_end = page_start + PAGE_CACHE_SIZE - 1; 2641 page_end = page_start + PAGE_CACHE_SIZE - 1;
2596 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 2642
2597 2643 lock_extent(&BTRFS_I(inode)->io_tree,
2598 ordered = btrfs_lookup_ordered_extent(inode, page_start); 2644 page_start, page_end, GFP_NOFS);
2599 if (ordered) { 2645
2600 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2601 unlock_page(page);
2602 page_cache_release(page);
2603 btrfs_start_ordered_extent(inode, ordered, 1);
2604 btrfs_put_ordered_extent(ordered);
2605 goto again;
2606 }
2607 set_page_extent_mapped(page); 2646 set_page_extent_mapped(page);
2608 2647
2609 if (i == first_index) 2648 if (nr < cluster->nr &&
2610 set_extent_bits(io_tree, page_start, page_end, 2649 page_start + offset == cluster->boundary[nr]) {
2650 set_extent_bits(&BTRFS_I(inode)->io_tree,
2651 page_start, page_end,
2611 EXTENT_BOUNDARY, GFP_NOFS); 2652 EXTENT_BOUNDARY, GFP_NOFS);
2653 nr++;
2654 }
2612 btrfs_set_extent_delalloc(inode, page_start, page_end); 2655 btrfs_set_extent_delalloc(inode, page_start, page_end);
2613 2656
2614 set_page_dirty(page); 2657 set_page_dirty(page);
2615 total_dirty++; 2658 dirty_page++;
2616 2659
2617 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 2660 unlock_extent(&BTRFS_I(inode)->io_tree,
2661 page_start, page_end, GFP_NOFS);
2618 unlock_page(page); 2662 unlock_page(page);
2619 page_cache_release(page); 2663 page_cache_release(page);
2664
2665 index++;
2666 if (nr < cluster->nr &&
2667 page_end + 1 + offset == cluster->boundary[nr]) {
2668 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2669 dirty_page);
2670 dirty_page = 0;
2671 }
2672 }
2673 if (dirty_page) {
2674 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2675 dirty_page);
2620 } 2676 }
2677 WARN_ON(nr != cluster->nr);
2621out_unlock: 2678out_unlock:
2622 mutex_unlock(&inode->i_mutex); 2679 mutex_unlock(&inode->i_mutex);
2623 kfree(ra); 2680 kfree(ra);
2624 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
2625 return ret; 2681 return ret;
2626} 2682}
2627 2683
2628static noinline_for_stack 2684static noinline_for_stack
2629int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) 2685int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
2686 struct file_extent_cluster *cluster)
2630{ 2687{
2631 struct btrfs_root *root = BTRFS_I(inode)->root; 2688 int ret;
2632 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2633 struct extent_map *em;
2634 u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt;
2635 u64 end = start + extent_key->offset - 1;
2636
2637 em = alloc_extent_map(GFP_NOFS);
2638 em->start = start;
2639 em->len = extent_key->offset;
2640 em->block_len = extent_key->offset;
2641 em->block_start = extent_key->objectid;
2642 em->bdev = root->fs_info->fs_devices->latest_bdev;
2643 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2644 2689
2645 /* setup extent map to cheat btrfs_readpage */ 2690 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
2646 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2691 ret = relocate_file_extent_cluster(inode, cluster);
2647 while (1) { 2692 if (ret)
2648 int ret; 2693 return ret;
2649 write_lock(&em_tree->lock); 2694 cluster->nr = 0;
2650 ret = add_extent_mapping(em_tree, em);
2651 write_unlock(&em_tree->lock);
2652 if (ret != -EEXIST) {
2653 free_extent_map(em);
2654 break;
2655 }
2656 btrfs_drop_extent_cache(inode, start, end, 0);
2657 } 2695 }
2658 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2659 2696
2660 return relocate_inode_pages(inode, start, extent_key->offset); 2697 if (!cluster->nr)
2698 cluster->start = extent_key->objectid;
2699 else
2700 BUG_ON(cluster->nr >= MAX_EXTENTS);
2701 cluster->end = extent_key->objectid + extent_key->offset - 1;
2702 cluster->boundary[cluster->nr] = extent_key->objectid;
2703 cluster->nr++;
2704
2705 if (cluster->nr >= MAX_EXTENTS) {
2706 ret = relocate_file_extent_cluster(inode, cluster);
2707 if (ret)
2708 return ret;
2709 cluster->nr = 0;
2710 }
2711 return 0;
2661} 2712}
2662 2713
2663#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2714#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
@@ -3208,6 +3259,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3208{ 3259{
3209 struct rb_root blocks = RB_ROOT; 3260 struct rb_root blocks = RB_ROOT;
3210 struct btrfs_key key; 3261 struct btrfs_key key;
3262 struct file_extent_cluster *cluster;
3211 struct btrfs_trans_handle *trans = NULL; 3263 struct btrfs_trans_handle *trans = NULL;
3212 struct btrfs_path *path; 3264 struct btrfs_path *path;
3213 struct btrfs_extent_item *ei; 3265 struct btrfs_extent_item *ei;
@@ -3217,6 +3269,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3217 int ret; 3269 int ret;
3218 int err = 0; 3270 int err = 0;
3219 3271
3272 cluster = kzalloc(sizeof(*cluster), GFP_NOFS);
3273 if (!cluster)
3274 return -ENOMEM;
3275
3220 path = btrfs_alloc_path(); 3276 path = btrfs_alloc_path();
3221 if (!path) 3277 if (!path)
3222 return -ENOMEM; 3278 return -ENOMEM;
@@ -3310,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3310 } 3366 }
3311 3367
3312 nr = trans->blocks_used; 3368 nr = trans->blocks_used;
3313 btrfs_end_transaction_throttle(trans, rc->extent_root); 3369 btrfs_end_transaction(trans, rc->extent_root);
3314 trans = NULL; 3370 trans = NULL;
3315 btrfs_btree_balance_dirty(rc->extent_root, nr); 3371 btrfs_btree_balance_dirty(rc->extent_root, nr);
3316 3372
3317 if (rc->stage == MOVE_DATA_EXTENTS && 3373 if (rc->stage == MOVE_DATA_EXTENTS &&
3318 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3374 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3319 rc->found_file_extent = 1; 3375 rc->found_file_extent = 1;
3320 ret = relocate_data_extent(rc->data_inode, &key); 3376 ret = relocate_data_extent(rc->data_inode,
3377 &key, cluster);
3321 if (ret < 0) { 3378 if (ret < 0) {
3322 err = ret; 3379 err = ret;
3323 break; 3380 break;
@@ -3332,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3332 btrfs_btree_balance_dirty(rc->extent_root, nr); 3389 btrfs_btree_balance_dirty(rc->extent_root, nr);
3333 } 3390 }
3334 3391
3392 if (!err) {
3393 ret = relocate_file_extent_cluster(rc->data_inode, cluster);
3394 if (ret < 0)
3395 err = ret;
3396 }
3397
3398 kfree(cluster);
3399
3335 rc->create_reloc_root = 0; 3400 rc->create_reloc_root = 0;
3336 smp_mb(); 3401 smp_mb();
3337 3402
@@ -3352,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3352} 3417}
3353 3418
3354static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 3419static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3355 struct btrfs_root *root, 3420 struct btrfs_root *root, u64 objectid)
3356 u64 objectid, u64 size)
3357{ 3421{
3358 struct btrfs_path *path; 3422 struct btrfs_path *path;
3359 struct btrfs_inode_item *item; 3423 struct btrfs_inode_item *item;
@@ -3372,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3372 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 3436 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3373 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 3437 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
3374 btrfs_set_inode_generation(leaf, item, 1); 3438 btrfs_set_inode_generation(leaf, item, 1);
3375 btrfs_set_inode_size(leaf, item, size); 3439 btrfs_set_inode_size(leaf, item, 0);
3376 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 3440 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3377 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 3441 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
3378 btrfs_mark_buffer_dirty(leaf); 3442 btrfs_mark_buffer_dirty(leaf);
@@ -3408,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3408 if (err) 3472 if (err)
3409 goto out; 3473 goto out;
3410 3474
3411 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 3475 err = __insert_orphan_inode(trans, root, objectid);
3412 BUG_ON(err);
3413
3414 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
3415 group->key.offset, 0, group->key.offset,
3416 0, 0, 0);
3417 BUG_ON(err); 3476 BUG_ON(err);
3418 3477
3419 key.objectid = objectid; 3478 key.objectid = objectid;
@@ -3519,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3519 } 3578 }
3520 } 3579 }
3521 3580
3522 filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, 3581 filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
3523 rc->block_group->key.objectid, 3582 rc->block_group->key.objectid,
3524 rc->block_group->key.objectid + 3583 rc->block_group->key.objectid +
3525 rc->block_group->key.offset - 1); 3584 rc->block_group->key.offset - 1);
3526 3585
3527 WARN_ON(rc->block_group->pinned > 0); 3586 WARN_ON(rc->block_group->pinned > 0);
3528 WARN_ON(rc->block_group->reserved > 0); 3587 WARN_ON(rc->block_group->reserved > 0);