diff options
| -rw-r--r-- | fs/btrfs/relocation.c | 237 |
1 files changed, 148 insertions, 89 deletions
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 48a504260635..361ad323faac 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -121,6 +121,15 @@ struct inodevec { | |||
| 121 | int nr; | 121 | int nr; |
| 122 | }; | 122 | }; |
| 123 | 123 | ||
| 124 | #define MAX_EXTENTS 128 | ||
| 125 | |||
| 126 | struct file_extent_cluster { | ||
| 127 | u64 start; | ||
| 128 | u64 end; | ||
| 129 | u64 boundary[MAX_EXTENTS]; | ||
| 130 | unsigned int nr; | ||
| 131 | }; | ||
| 132 | |||
| 124 | struct reloc_control { | 133 | struct reloc_control { |
| 125 | /* block group to relocate */ | 134 | /* block group to relocate */ |
| 126 | struct btrfs_block_group_cache *block_group; | 135 | struct btrfs_block_group_cache *block_group; |
| @@ -2529,56 +2538,94 @@ out: | |||
| 2529 | } | 2538 | } |
| 2530 | 2539 | ||
| 2531 | static noinline_for_stack | 2540 | static noinline_for_stack |
| 2532 | int relocate_inode_pages(struct inode *inode, u64 start, u64 len) | 2541 | int setup_extent_mapping(struct inode *inode, u64 start, u64 end, |
| 2542 | u64 block_start) | ||
| 2543 | { | ||
| 2544 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 2545 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 2546 | struct extent_map *em; | ||
| 2547 | int ret = 0; | ||
| 2548 | |||
| 2549 | em = alloc_extent_map(GFP_NOFS); | ||
| 2550 | if (!em) | ||
| 2551 | return -ENOMEM; | ||
| 2552 | |||
| 2553 | em->start = start; | ||
| 2554 | em->len = end + 1 - start; | ||
| 2555 | em->block_len = em->len; | ||
| 2556 | em->block_start = block_start; | ||
| 2557 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 2558 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 2559 | |||
| 2560 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
| 2561 | while (1) { | ||
| 2562 | write_lock(&em_tree->lock); | ||
| 2563 | ret = add_extent_mapping(em_tree, em); | ||
| 2564 | write_unlock(&em_tree->lock); | ||
| 2565 | if (ret != -EEXIST) { | ||
| 2566 | free_extent_map(em); | ||
| 2567 | break; | ||
| 2568 | } | ||
| 2569 | btrfs_drop_extent_cache(inode, start, end, 0); | ||
| 2570 | } | ||
| 2571 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
| 2572 | return ret; | ||
| 2573 | } | ||
| 2574 | |||
| 2575 | static int relocate_file_extent_cluster(struct inode *inode, | ||
| 2576 | struct file_extent_cluster *cluster) | ||
| 2533 | { | 2577 | { |
| 2534 | u64 page_start; | 2578 | u64 page_start; |
| 2535 | u64 page_end; | 2579 | u64 page_end; |
| 2536 | unsigned long i; | 2580 | u64 offset = BTRFS_I(inode)->index_cnt; |
| 2537 | unsigned long first_index; | 2581 | unsigned long index; |
| 2538 | unsigned long last_index; | 2582 | unsigned long last_index; |
| 2539 | unsigned int total_read = 0; | 2583 | unsigned int dirty_page = 0; |
| 2540 | unsigned int total_dirty = 0; | ||
| 2541 | struct page *page; | 2584 | struct page *page; |
| 2542 | struct file_ra_state *ra; | 2585 | struct file_ra_state *ra; |
| 2543 | struct btrfs_ordered_extent *ordered; | 2586 | int nr = 0; |
| 2544 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 2545 | int ret = 0; | 2587 | int ret = 0; |
| 2546 | 2588 | ||
| 2589 | if (!cluster->nr) | ||
| 2590 | return 0; | ||
| 2591 | |||
| 2547 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | 2592 | ra = kzalloc(sizeof(*ra), GFP_NOFS); |
| 2548 | if (!ra) | 2593 | if (!ra) |
| 2549 | return -ENOMEM; | 2594 | return -ENOMEM; |
| 2550 | 2595 | ||
| 2596 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | ||
| 2597 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | ||
| 2598 | |||
| 2551 | mutex_lock(&inode->i_mutex); | 2599 | mutex_lock(&inode->i_mutex); |
| 2552 | first_index = start >> PAGE_CACHE_SHIFT; | ||
| 2553 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; | ||
| 2554 | 2600 | ||
| 2555 | /* make sure the dirty trick played by the caller work */ | 2601 | i_size_write(inode, cluster->end + 1 - offset); |
| 2556 | while (1) { | 2602 | ret = setup_extent_mapping(inode, cluster->start - offset, |
| 2557 | ret = invalidate_inode_pages2_range(inode->i_mapping, | 2603 | cluster->end - offset, cluster->start); |
| 2558 | first_index, last_index); | ||
| 2559 | if (ret != -EBUSY) | ||
| 2560 | break; | ||
| 2561 | schedule_timeout(HZ/10); | ||
| 2562 | } | ||
| 2563 | if (ret) | 2604 | if (ret) |
| 2564 | goto out_unlock; | 2605 | goto out_unlock; |
| 2565 | 2606 | ||
| 2566 | file_ra_state_init(ra, inode->i_mapping); | 2607 | file_ra_state_init(ra, inode->i_mapping); |
| 2567 | 2608 | ||
| 2568 | for (i = first_index ; i <= last_index; i++) { | 2609 | WARN_ON(cluster->start != cluster->boundary[0]); |
| 2569 | if (total_read % ra->ra_pages == 0) { | 2610 | while (index <= last_index) { |
| 2570 | btrfs_force_ra(inode->i_mapping, ra, NULL, i, | 2611 | page = find_lock_page(inode->i_mapping, index); |
| 2571 | min(last_index, ra->ra_pages + i - 1)); | ||
| 2572 | } | ||
| 2573 | total_read++; | ||
| 2574 | again: | ||
| 2575 | if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) | ||
| 2576 | BUG_ON(1); | ||
| 2577 | page = grab_cache_page(inode->i_mapping, i); | ||
| 2578 | if (!page) { | 2612 | if (!page) { |
| 2579 | ret = -ENOMEM; | 2613 | page_cache_sync_readahead(inode->i_mapping, |
| 2580 | goto out_unlock; | 2614 | ra, NULL, index, |
| 2615 | last_index + 1 - index); | ||
| 2616 | page = grab_cache_page(inode->i_mapping, index); | ||
| 2617 | if (!page) { | ||
| 2618 | ret = -ENOMEM; | ||
| 2619 | goto out_unlock; | ||
| 2620 | } | ||
| 2621 | } | ||
| 2622 | |||
| 2623 | if (PageReadahead(page)) { | ||
| 2624 | page_cache_async_readahead(inode->i_mapping, | ||
| 2625 | ra, NULL, page, index, | ||
| 2626 | last_index + 1 - index); | ||
| 2581 | } | 2627 | } |
| 2628 | |||
| 2582 | if (!PageUptodate(page)) { | 2629 | if (!PageUptodate(page)) { |
| 2583 | btrfs_readpage(NULL, page); | 2630 | btrfs_readpage(NULL, page); |
| 2584 | lock_page(page); | 2631 | lock_page(page); |
| @@ -2589,75 +2636,79 @@ again: | |||
| 2589 | goto out_unlock; | 2636 | goto out_unlock; |
| 2590 | } | 2637 | } |
| 2591 | } | 2638 | } |
| 2592 | wait_on_page_writeback(page); | ||
| 2593 | 2639 | ||
| 2594 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 2640 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; |
| 2595 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 2641 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
| 2596 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2642 | |
| 2597 | 2643 | lock_extent(&BTRFS_I(inode)->io_tree, | |
| 2598 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 2644 | page_start, page_end, GFP_NOFS); |
| 2599 | if (ordered) { | 2645 | |
| 2600 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 2601 | unlock_page(page); | ||
| 2602 | page_cache_release(page); | ||
| 2603 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 2604 | btrfs_put_ordered_extent(ordered); | ||
| 2605 | goto again; | ||
| 2606 | } | ||
| 2607 | set_page_extent_mapped(page); | 2646 | set_page_extent_mapped(page); |
| 2608 | 2647 | ||
| 2609 | if (i == first_index) | 2648 | if (nr < cluster->nr && |
| 2610 | set_extent_bits(io_tree, page_start, page_end, | 2649 | page_start + offset == cluster->boundary[nr]) { |
| 2650 | set_extent_bits(&BTRFS_I(inode)->io_tree, | ||
| 2651 | page_start, page_end, | ||
| 2611 | EXTENT_BOUNDARY, GFP_NOFS); | 2652 | EXTENT_BOUNDARY, GFP_NOFS); |
| 2653 | nr++; | ||
| 2654 | } | ||
| 2612 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 2655 | btrfs_set_extent_delalloc(inode, page_start, page_end); |
| 2613 | 2656 | ||
| 2614 | set_page_dirty(page); | 2657 | set_page_dirty(page); |
| 2615 | total_dirty++; | 2658 | dirty_page++; |
| 2616 | 2659 | ||
| 2617 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2660 | unlock_extent(&BTRFS_I(inode)->io_tree, |
| 2661 | page_start, page_end, GFP_NOFS); | ||
| 2618 | unlock_page(page); | 2662 | unlock_page(page); |
| 2619 | page_cache_release(page); | 2663 | page_cache_release(page); |
| 2664 | |||
| 2665 | index++; | ||
| 2666 | if (nr < cluster->nr && | ||
| 2667 | page_end + 1 + offset == cluster->boundary[nr]) { | ||
| 2668 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
| 2669 | dirty_page); | ||
| 2670 | dirty_page = 0; | ||
| 2671 | } | ||
| 2672 | } | ||
| 2673 | if (dirty_page) { | ||
| 2674 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
| 2675 | dirty_page); | ||
| 2620 | } | 2676 | } |
| 2677 | WARN_ON(nr != cluster->nr); | ||
| 2621 | out_unlock: | 2678 | out_unlock: |
| 2622 | mutex_unlock(&inode->i_mutex); | 2679 | mutex_unlock(&inode->i_mutex); |
| 2623 | kfree(ra); | 2680 | kfree(ra); |
| 2624 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); | ||
| 2625 | return ret; | 2681 | return ret; |
| 2626 | } | 2682 | } |
| 2627 | 2683 | ||
| 2628 | static noinline_for_stack | 2684 | static noinline_for_stack |
| 2629 | int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) | 2685 | int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, |
| 2686 | struct file_extent_cluster *cluster) | ||
| 2630 | { | 2687 | { |
| 2631 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2688 | int ret; |
| 2632 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 2633 | struct extent_map *em; | ||
| 2634 | u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt; | ||
| 2635 | u64 end = start + extent_key->offset - 1; | ||
| 2636 | |||
| 2637 | em = alloc_extent_map(GFP_NOFS); | ||
| 2638 | em->start = start; | ||
| 2639 | em->len = extent_key->offset; | ||
| 2640 | em->block_len = extent_key->offset; | ||
| 2641 | em->block_start = extent_key->objectid; | ||
| 2642 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 2643 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 2644 | 2689 | ||
| 2645 | /* setup extent map to cheat btrfs_readpage */ | 2690 | if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { |
| 2646 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | 2691 | ret = relocate_file_extent_cluster(inode, cluster); |
| 2647 | while (1) { | 2692 | if (ret) |
| 2648 | int ret; | 2693 | return ret; |
| 2649 | write_lock(&em_tree->lock); | 2694 | cluster->nr = 0; |
| 2650 | ret = add_extent_mapping(em_tree, em); | ||
| 2651 | write_unlock(&em_tree->lock); | ||
| 2652 | if (ret != -EEXIST) { | ||
| 2653 | free_extent_map(em); | ||
| 2654 | break; | ||
| 2655 | } | ||
| 2656 | btrfs_drop_extent_cache(inode, start, end, 0); | ||
| 2657 | } | 2695 | } |
| 2658 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
| 2659 | 2696 | ||
| 2660 | return relocate_inode_pages(inode, start, extent_key->offset); | 2697 | if (!cluster->nr) |
| 2698 | cluster->start = extent_key->objectid; | ||
| 2699 | else | ||
| 2700 | BUG_ON(cluster->nr >= MAX_EXTENTS); | ||
| 2701 | cluster->end = extent_key->objectid + extent_key->offset - 1; | ||
| 2702 | cluster->boundary[cluster->nr] = extent_key->objectid; | ||
| 2703 | cluster->nr++; | ||
| 2704 | |||
| 2705 | if (cluster->nr >= MAX_EXTENTS) { | ||
| 2706 | ret = relocate_file_extent_cluster(inode, cluster); | ||
| 2707 | if (ret) | ||
| 2708 | return ret; | ||
| 2709 | cluster->nr = 0; | ||
| 2710 | } | ||
| 2711 | return 0; | ||
| 2661 | } | 2712 | } |
| 2662 | 2713 | ||
| 2663 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 2714 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
| @@ -3208,6 +3259,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3208 | { | 3259 | { |
| 3209 | struct rb_root blocks = RB_ROOT; | 3260 | struct rb_root blocks = RB_ROOT; |
| 3210 | struct btrfs_key key; | 3261 | struct btrfs_key key; |
| 3262 | struct file_extent_cluster *cluster; | ||
| 3211 | struct btrfs_trans_handle *trans = NULL; | 3263 | struct btrfs_trans_handle *trans = NULL; |
| 3212 | struct btrfs_path *path; | 3264 | struct btrfs_path *path; |
| 3213 | struct btrfs_extent_item *ei; | 3265 | struct btrfs_extent_item *ei; |
| @@ -3217,6 +3269,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3217 | int ret; | 3269 | int ret; |
| 3218 | int err = 0; | 3270 | int err = 0; |
| 3219 | 3271 | ||
| 3272 | cluster = kzalloc(sizeof(*cluster), GFP_NOFS); | ||
| 3273 | if (!cluster) | ||
| 3274 | return -ENOMEM; | ||
| 3275 | |||
| 3220 | path = btrfs_alloc_path(); | 3276 | path = btrfs_alloc_path(); |
| 3221 | if (!path) | 3277 | if (!path) |
| 3222 | return -ENOMEM; | 3278 | return -ENOMEM; |
| @@ -3310,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3310 | } | 3366 | } |
| 3311 | 3367 | ||
| 3312 | nr = trans->blocks_used; | 3368 | nr = trans->blocks_used; |
| 3313 | btrfs_end_transaction_throttle(trans, rc->extent_root); | 3369 | btrfs_end_transaction(trans, rc->extent_root); |
| 3314 | trans = NULL; | 3370 | trans = NULL; |
| 3315 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3371 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
| 3316 | 3372 | ||
| 3317 | if (rc->stage == MOVE_DATA_EXTENTS && | 3373 | if (rc->stage == MOVE_DATA_EXTENTS && |
| 3318 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3374 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
| 3319 | rc->found_file_extent = 1; | 3375 | rc->found_file_extent = 1; |
| 3320 | ret = relocate_data_extent(rc->data_inode, &key); | 3376 | ret = relocate_data_extent(rc->data_inode, |
| 3377 | &key, cluster); | ||
| 3321 | if (ret < 0) { | 3378 | if (ret < 0) { |
| 3322 | err = ret; | 3379 | err = ret; |
| 3323 | break; | 3380 | break; |
| @@ -3332,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3332 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3389 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
| 3333 | } | 3390 | } |
| 3334 | 3391 | ||
| 3392 | if (!err) { | ||
| 3393 | ret = relocate_file_extent_cluster(rc->data_inode, cluster); | ||
| 3394 | if (ret < 0) | ||
| 3395 | err = ret; | ||
| 3396 | } | ||
| 3397 | |||
| 3398 | kfree(cluster); | ||
| 3399 | |||
| 3335 | rc->create_reloc_root = 0; | 3400 | rc->create_reloc_root = 0; |
| 3336 | smp_mb(); | 3401 | smp_mb(); |
| 3337 | 3402 | ||
| @@ -3352,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3352 | } | 3417 | } |
| 3353 | 3418 | ||
| 3354 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | 3419 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, |
| 3355 | struct btrfs_root *root, | 3420 | struct btrfs_root *root, u64 objectid) |
| 3356 | u64 objectid, u64 size) | ||
| 3357 | { | 3421 | { |
| 3358 | struct btrfs_path *path; | 3422 | struct btrfs_path *path; |
| 3359 | struct btrfs_inode_item *item; | 3423 | struct btrfs_inode_item *item; |
| @@ -3372,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
| 3372 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | 3436 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); |
| 3373 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | 3437 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); |
| 3374 | btrfs_set_inode_generation(leaf, item, 1); | 3438 | btrfs_set_inode_generation(leaf, item, 1); |
| 3375 | btrfs_set_inode_size(leaf, item, size); | 3439 | btrfs_set_inode_size(leaf, item, 0); |
| 3376 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | 3440 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); |
| 3377 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | 3441 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); |
| 3378 | btrfs_mark_buffer_dirty(leaf); | 3442 | btrfs_mark_buffer_dirty(leaf); |
| @@ -3408,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
| 3408 | if (err) | 3472 | if (err) |
| 3409 | goto out; | 3473 | goto out; |
| 3410 | 3474 | ||
| 3411 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | 3475 | err = __insert_orphan_inode(trans, root, objectid); |
| 3412 | BUG_ON(err); | ||
| 3413 | |||
| 3414 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
| 3415 | group->key.offset, 0, group->key.offset, | ||
| 3416 | 0, 0, 0); | ||
| 3417 | BUG_ON(err); | 3476 | BUG_ON(err); |
| 3418 | 3477 | ||
| 3419 | key.objectid = objectid; | 3478 | key.objectid = objectid; |
| @@ -3519,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3519 | } | 3578 | } |
| 3520 | } | 3579 | } |
| 3521 | 3580 | ||
| 3522 | filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, | 3581 | filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, |
| 3523 | rc->block_group->key.objectid, | 3582 | rc->block_group->key.objectid, |
| 3524 | rc->block_group->key.objectid + | 3583 | rc->block_group->key.objectid + |
| 3525 | rc->block_group->key.offset - 1); | 3584 | rc->block_group->key.offset - 1); |
| 3526 | 3585 | ||
| 3527 | WARN_ON(rc->block_group->pinned > 0); | 3586 | WARN_ON(rc->block_group->pinned > 0); |
| 3528 | WARN_ON(rc->block_group->reserved > 0); | 3587 | WARN_ON(rc->block_group->reserved > 0); |
