diff options
-rw-r--r-- | fs/btrfs/relocation.c | 237 |
1 files changed, 148 insertions, 89 deletions
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 48a504260635..361ad323faac 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -121,6 +121,15 @@ struct inodevec { | |||
121 | int nr; | 121 | int nr; |
122 | }; | 122 | }; |
123 | 123 | ||
124 | #define MAX_EXTENTS 128 | ||
125 | |||
126 | struct file_extent_cluster { | ||
127 | u64 start; | ||
128 | u64 end; | ||
129 | u64 boundary[MAX_EXTENTS]; | ||
130 | unsigned int nr; | ||
131 | }; | ||
132 | |||
124 | struct reloc_control { | 133 | struct reloc_control { |
125 | /* block group to relocate */ | 134 | /* block group to relocate */ |
126 | struct btrfs_block_group_cache *block_group; | 135 | struct btrfs_block_group_cache *block_group; |
@@ -2529,56 +2538,94 @@ out: | |||
2529 | } | 2538 | } |
2530 | 2539 | ||
2531 | static noinline_for_stack | 2540 | static noinline_for_stack |
2532 | int relocate_inode_pages(struct inode *inode, u64 start, u64 len) | 2541 | int setup_extent_mapping(struct inode *inode, u64 start, u64 end, |
2542 | u64 block_start) | ||
2543 | { | ||
2544 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2545 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
2546 | struct extent_map *em; | ||
2547 | int ret = 0; | ||
2548 | |||
2549 | em = alloc_extent_map(GFP_NOFS); | ||
2550 | if (!em) | ||
2551 | return -ENOMEM; | ||
2552 | |||
2553 | em->start = start; | ||
2554 | em->len = end + 1 - start; | ||
2555 | em->block_len = em->len; | ||
2556 | em->block_start = block_start; | ||
2557 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
2558 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
2559 | |||
2560 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2561 | while (1) { | ||
2562 | write_lock(&em_tree->lock); | ||
2563 | ret = add_extent_mapping(em_tree, em); | ||
2564 | write_unlock(&em_tree->lock); | ||
2565 | if (ret != -EEXIST) { | ||
2566 | free_extent_map(em); | ||
2567 | break; | ||
2568 | } | ||
2569 | btrfs_drop_extent_cache(inode, start, end, 0); | ||
2570 | } | ||
2571 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2572 | return ret; | ||
2573 | } | ||
2574 | |||
2575 | static int relocate_file_extent_cluster(struct inode *inode, | ||
2576 | struct file_extent_cluster *cluster) | ||
2533 | { | 2577 | { |
2534 | u64 page_start; | 2578 | u64 page_start; |
2535 | u64 page_end; | 2579 | u64 page_end; |
2536 | unsigned long i; | 2580 | u64 offset = BTRFS_I(inode)->index_cnt; |
2537 | unsigned long first_index; | 2581 | unsigned long index; |
2538 | unsigned long last_index; | 2582 | unsigned long last_index; |
2539 | unsigned int total_read = 0; | 2583 | unsigned int dirty_page = 0; |
2540 | unsigned int total_dirty = 0; | ||
2541 | struct page *page; | 2584 | struct page *page; |
2542 | struct file_ra_state *ra; | 2585 | struct file_ra_state *ra; |
2543 | struct btrfs_ordered_extent *ordered; | 2586 | int nr = 0; |
2544 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
2545 | int ret = 0; | 2587 | int ret = 0; |
2546 | 2588 | ||
2589 | if (!cluster->nr) | ||
2590 | return 0; | ||
2591 | |||
2547 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | 2592 | ra = kzalloc(sizeof(*ra), GFP_NOFS); |
2548 | if (!ra) | 2593 | if (!ra) |
2549 | return -ENOMEM; | 2594 | return -ENOMEM; |
2550 | 2595 | ||
2596 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | ||
2597 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | ||
2598 | |||
2551 | mutex_lock(&inode->i_mutex); | 2599 | mutex_lock(&inode->i_mutex); |
2552 | first_index = start >> PAGE_CACHE_SHIFT; | ||
2553 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; | ||
2554 | 2600 | ||
2555 | /* make sure the dirty trick played by the caller work */ | 2601 | i_size_write(inode, cluster->end + 1 - offset); |
2556 | while (1) { | 2602 | ret = setup_extent_mapping(inode, cluster->start - offset, |
2557 | ret = invalidate_inode_pages2_range(inode->i_mapping, | 2603 | cluster->end - offset, cluster->start); |
2558 | first_index, last_index); | ||
2559 | if (ret != -EBUSY) | ||
2560 | break; | ||
2561 | schedule_timeout(HZ/10); | ||
2562 | } | ||
2563 | if (ret) | 2604 | if (ret) |
2564 | goto out_unlock; | 2605 | goto out_unlock; |
2565 | 2606 | ||
2566 | file_ra_state_init(ra, inode->i_mapping); | 2607 | file_ra_state_init(ra, inode->i_mapping); |
2567 | 2608 | ||
2568 | for (i = first_index ; i <= last_index; i++) { | 2609 | WARN_ON(cluster->start != cluster->boundary[0]); |
2569 | if (total_read % ra->ra_pages == 0) { | 2610 | while (index <= last_index) { |
2570 | btrfs_force_ra(inode->i_mapping, ra, NULL, i, | 2611 | page = find_lock_page(inode->i_mapping, index); |
2571 | min(last_index, ra->ra_pages + i - 1)); | ||
2572 | } | ||
2573 | total_read++; | ||
2574 | again: | ||
2575 | if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) | ||
2576 | BUG_ON(1); | ||
2577 | page = grab_cache_page(inode->i_mapping, i); | ||
2578 | if (!page) { | 2612 | if (!page) { |
2579 | ret = -ENOMEM; | 2613 | page_cache_sync_readahead(inode->i_mapping, |
2580 | goto out_unlock; | 2614 | ra, NULL, index, |
2615 | last_index + 1 - index); | ||
2616 | page = grab_cache_page(inode->i_mapping, index); | ||
2617 | if (!page) { | ||
2618 | ret = -ENOMEM; | ||
2619 | goto out_unlock; | ||
2620 | } | ||
2621 | } | ||
2622 | |||
2623 | if (PageReadahead(page)) { | ||
2624 | page_cache_async_readahead(inode->i_mapping, | ||
2625 | ra, NULL, page, index, | ||
2626 | last_index + 1 - index); | ||
2581 | } | 2627 | } |
2628 | |||
2582 | if (!PageUptodate(page)) { | 2629 | if (!PageUptodate(page)) { |
2583 | btrfs_readpage(NULL, page); | 2630 | btrfs_readpage(NULL, page); |
2584 | lock_page(page); | 2631 | lock_page(page); |
@@ -2589,75 +2636,79 @@ again: | |||
2589 | goto out_unlock; | 2636 | goto out_unlock; |
2590 | } | 2637 | } |
2591 | } | 2638 | } |
2592 | wait_on_page_writeback(page); | ||
2593 | 2639 | ||
2594 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 2640 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; |
2595 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 2641 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
2596 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2642 | |
2597 | 2643 | lock_extent(&BTRFS_I(inode)->io_tree, | |
2598 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 2644 | page_start, page_end, GFP_NOFS); |
2599 | if (ordered) { | 2645 | |
2600 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
2601 | unlock_page(page); | ||
2602 | page_cache_release(page); | ||
2603 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
2604 | btrfs_put_ordered_extent(ordered); | ||
2605 | goto again; | ||
2606 | } | ||
2607 | set_page_extent_mapped(page); | 2646 | set_page_extent_mapped(page); |
2608 | 2647 | ||
2609 | if (i == first_index) | 2648 | if (nr < cluster->nr && |
2610 | set_extent_bits(io_tree, page_start, page_end, | 2649 | page_start + offset == cluster->boundary[nr]) { |
2650 | set_extent_bits(&BTRFS_I(inode)->io_tree, | ||
2651 | page_start, page_end, | ||
2611 | EXTENT_BOUNDARY, GFP_NOFS); | 2652 | EXTENT_BOUNDARY, GFP_NOFS); |
2653 | nr++; | ||
2654 | } | ||
2612 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 2655 | btrfs_set_extent_delalloc(inode, page_start, page_end); |
2613 | 2656 | ||
2614 | set_page_dirty(page); | 2657 | set_page_dirty(page); |
2615 | total_dirty++; | 2658 | dirty_page++; |
2616 | 2659 | ||
2617 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2660 | unlock_extent(&BTRFS_I(inode)->io_tree, |
2661 | page_start, page_end, GFP_NOFS); | ||
2618 | unlock_page(page); | 2662 | unlock_page(page); |
2619 | page_cache_release(page); | 2663 | page_cache_release(page); |
2664 | |||
2665 | index++; | ||
2666 | if (nr < cluster->nr && | ||
2667 | page_end + 1 + offset == cluster->boundary[nr]) { | ||
2668 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2669 | dirty_page); | ||
2670 | dirty_page = 0; | ||
2671 | } | ||
2672 | } | ||
2673 | if (dirty_page) { | ||
2674 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2675 | dirty_page); | ||
2620 | } | 2676 | } |
2677 | WARN_ON(nr != cluster->nr); | ||
2621 | out_unlock: | 2678 | out_unlock: |
2622 | mutex_unlock(&inode->i_mutex); | 2679 | mutex_unlock(&inode->i_mutex); |
2623 | kfree(ra); | 2680 | kfree(ra); |
2624 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); | ||
2625 | return ret; | 2681 | return ret; |
2626 | } | 2682 | } |
2627 | 2683 | ||
2628 | static noinline_for_stack | 2684 | static noinline_for_stack |
2629 | int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) | 2685 | int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, |
2686 | struct file_extent_cluster *cluster) | ||
2630 | { | 2687 | { |
2631 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2688 | int ret; |
2632 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
2633 | struct extent_map *em; | ||
2634 | u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt; | ||
2635 | u64 end = start + extent_key->offset - 1; | ||
2636 | |||
2637 | em = alloc_extent_map(GFP_NOFS); | ||
2638 | em->start = start; | ||
2639 | em->len = extent_key->offset; | ||
2640 | em->block_len = extent_key->offset; | ||
2641 | em->block_start = extent_key->objectid; | ||
2642 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
2643 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
2644 | 2689 | ||
2645 | /* setup extent map to cheat btrfs_readpage */ | 2690 | if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { |
2646 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | 2691 | ret = relocate_file_extent_cluster(inode, cluster); |
2647 | while (1) { | 2692 | if (ret) |
2648 | int ret; | 2693 | return ret; |
2649 | write_lock(&em_tree->lock); | 2694 | cluster->nr = 0; |
2650 | ret = add_extent_mapping(em_tree, em); | ||
2651 | write_unlock(&em_tree->lock); | ||
2652 | if (ret != -EEXIST) { | ||
2653 | free_extent_map(em); | ||
2654 | break; | ||
2655 | } | ||
2656 | btrfs_drop_extent_cache(inode, start, end, 0); | ||
2657 | } | 2695 | } |
2658 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2659 | 2696 | ||
2660 | return relocate_inode_pages(inode, start, extent_key->offset); | 2697 | if (!cluster->nr) |
2698 | cluster->start = extent_key->objectid; | ||
2699 | else | ||
2700 | BUG_ON(cluster->nr >= MAX_EXTENTS); | ||
2701 | cluster->end = extent_key->objectid + extent_key->offset - 1; | ||
2702 | cluster->boundary[cluster->nr] = extent_key->objectid; | ||
2703 | cluster->nr++; | ||
2704 | |||
2705 | if (cluster->nr >= MAX_EXTENTS) { | ||
2706 | ret = relocate_file_extent_cluster(inode, cluster); | ||
2707 | if (ret) | ||
2708 | return ret; | ||
2709 | cluster->nr = 0; | ||
2710 | } | ||
2711 | return 0; | ||
2661 | } | 2712 | } |
2662 | 2713 | ||
2663 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 2714 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
@@ -3208,6 +3259,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3208 | { | 3259 | { |
3209 | struct rb_root blocks = RB_ROOT; | 3260 | struct rb_root blocks = RB_ROOT; |
3210 | struct btrfs_key key; | 3261 | struct btrfs_key key; |
3262 | struct file_extent_cluster *cluster; | ||
3211 | struct btrfs_trans_handle *trans = NULL; | 3263 | struct btrfs_trans_handle *trans = NULL; |
3212 | struct btrfs_path *path; | 3264 | struct btrfs_path *path; |
3213 | struct btrfs_extent_item *ei; | 3265 | struct btrfs_extent_item *ei; |
@@ -3217,6 +3269,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3217 | int ret; | 3269 | int ret; |
3218 | int err = 0; | 3270 | int err = 0; |
3219 | 3271 | ||
3272 | cluster = kzalloc(sizeof(*cluster), GFP_NOFS); | ||
3273 | if (!cluster) | ||
3274 | return -ENOMEM; | ||
3275 | |||
3220 | path = btrfs_alloc_path(); | 3276 | path = btrfs_alloc_path(); |
3221 | if (!path) | 3277 | if (!path) |
3222 | return -ENOMEM; | 3278 | return -ENOMEM; |
@@ -3310,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3310 | } | 3366 | } |
3311 | 3367 | ||
3312 | nr = trans->blocks_used; | 3368 | nr = trans->blocks_used; |
3313 | btrfs_end_transaction_throttle(trans, rc->extent_root); | 3369 | btrfs_end_transaction(trans, rc->extent_root); |
3314 | trans = NULL; | 3370 | trans = NULL; |
3315 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3371 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
3316 | 3372 | ||
3317 | if (rc->stage == MOVE_DATA_EXTENTS && | 3373 | if (rc->stage == MOVE_DATA_EXTENTS && |
3318 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3374 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
3319 | rc->found_file_extent = 1; | 3375 | rc->found_file_extent = 1; |
3320 | ret = relocate_data_extent(rc->data_inode, &key); | 3376 | ret = relocate_data_extent(rc->data_inode, |
3377 | &key, cluster); | ||
3321 | if (ret < 0) { | 3378 | if (ret < 0) { |
3322 | err = ret; | 3379 | err = ret; |
3323 | break; | 3380 | break; |
@@ -3332,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3332 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3389 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
3333 | } | 3390 | } |
3334 | 3391 | ||
3392 | if (!err) { | ||
3393 | ret = relocate_file_extent_cluster(rc->data_inode, cluster); | ||
3394 | if (ret < 0) | ||
3395 | err = ret; | ||
3396 | } | ||
3397 | |||
3398 | kfree(cluster); | ||
3399 | |||
3335 | rc->create_reloc_root = 0; | 3400 | rc->create_reloc_root = 0; |
3336 | smp_mb(); | 3401 | smp_mb(); |
3337 | 3402 | ||
@@ -3352,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3352 | } | 3417 | } |
3353 | 3418 | ||
3354 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | 3419 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, |
3355 | struct btrfs_root *root, | 3420 | struct btrfs_root *root, u64 objectid) |
3356 | u64 objectid, u64 size) | ||
3357 | { | 3421 | { |
3358 | struct btrfs_path *path; | 3422 | struct btrfs_path *path; |
3359 | struct btrfs_inode_item *item; | 3423 | struct btrfs_inode_item *item; |
@@ -3372,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
3372 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | 3436 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); |
3373 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | 3437 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); |
3374 | btrfs_set_inode_generation(leaf, item, 1); | 3438 | btrfs_set_inode_generation(leaf, item, 1); |
3375 | btrfs_set_inode_size(leaf, item, size); | 3439 | btrfs_set_inode_size(leaf, item, 0); |
3376 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | 3440 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); |
3377 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | 3441 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); |
3378 | btrfs_mark_buffer_dirty(leaf); | 3442 | btrfs_mark_buffer_dirty(leaf); |
@@ -3408,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
3408 | if (err) | 3472 | if (err) |
3409 | goto out; | 3473 | goto out; |
3410 | 3474 | ||
3411 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | 3475 | err = __insert_orphan_inode(trans, root, objectid); |
3412 | BUG_ON(err); | ||
3413 | |||
3414 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
3415 | group->key.offset, 0, group->key.offset, | ||
3416 | 0, 0, 0); | ||
3417 | BUG_ON(err); | 3476 | BUG_ON(err); |
3418 | 3477 | ||
3419 | key.objectid = objectid; | 3478 | key.objectid = objectid; |
@@ -3519,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3519 | } | 3578 | } |
3520 | } | 3579 | } |
3521 | 3580 | ||
3522 | filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, | 3581 | filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, |
3523 | rc->block_group->key.objectid, | 3582 | rc->block_group->key.objectid, |
3524 | rc->block_group->key.objectid + | 3583 | rc->block_group->key.objectid + |
3525 | rc->block_group->key.offset - 1); | 3584 | rc->block_group->key.offset - 1); |
3526 | 3585 | ||
3527 | WARN_ON(rc->block_group->pinned > 0); | 3586 | WARN_ON(rc->block_group->pinned > 0); |
3528 | WARN_ON(rc->block_group->reserved > 0); | 3587 | WARN_ON(rc->block_group->reserved > 0); |