diff options
| author | Chris Mason <chris.mason@oracle.com> | 2008-04-28 15:29:52 -0400 |
|---|---|---|
| committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:02 -0400 |
| commit | ec44a35cbeb26ab2da84cb280d778260f2312feb (patch) | |
| tree | afd48a9769125095922c931e3ad77f207c8fb025 /fs/btrfs/extent-tree.c | |
| parent | 788f20eb5affef584e75ea84bb80a4c3352a2c0e (diff) | |
Btrfs: Add balance ioctl to restripe the chunks
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
| -rw-r--r-- | fs/btrfs/extent-tree.c | 106 |
1 files changed, 89 insertions, 17 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe4fe709c312..95aee5a29375 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
| 19 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
| 20 | #include <linux/writeback.h> | ||
| 20 | #include "hash.h" | 21 | #include "hash.h" |
| 21 | #include "crc32c.h" | 22 | #include "crc32c.h" |
| 22 | #include "ctree.h" | 23 | #include "ctree.h" |
| @@ -1058,6 +1059,26 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
| 1058 | } | 1059 | } |
| 1059 | } | 1060 | } |
| 1060 | 1061 | ||
| 1062 | static u64 reduce_alloc_profile(u64 flags) | ||
| 1063 | { | ||
| 1064 | if ((flags & BTRFS_BLOCK_GROUP_DUP) && | ||
| 1065 | (flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
| 1066 | BTRFS_BLOCK_GROUP_RAID10))) | ||
| 1067 | flags &= ~BTRFS_BLOCK_GROUP_DUP; | ||
| 1068 | |||
| 1069 | if ((flags & BTRFS_BLOCK_GROUP_RAID1) && | ||
| 1070 | (flags & BTRFS_BLOCK_GROUP_RAID10)) | ||
| 1071 | flags &= ~BTRFS_BLOCK_GROUP_RAID1; | ||
| 1072 | |||
| 1073 | if ((flags & BTRFS_BLOCK_GROUP_RAID0) && | ||
| 1074 | ((flags & BTRFS_BLOCK_GROUP_RAID1) | | ||
| 1075 | (flags & BTRFS_BLOCK_GROUP_RAID10) | | ||
| 1076 | (flags & BTRFS_BLOCK_GROUP_DUP))) | ||
| 1077 | flags &= ~BTRFS_BLOCK_GROUP_RAID0; | ||
| 1078 | return flags; | ||
| 1079 | } | ||
| 1080 | |||
| 1081 | |||
| 1061 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 1082 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
| 1062 | struct btrfs_root *extent_root, u64 alloc_bytes, | 1083 | struct btrfs_root *extent_root, u64 alloc_bytes, |
| 1063 | u64 flags) | 1084 | u64 flags) |
| @@ -1068,6 +1089,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 1068 | u64 num_bytes; | 1089 | u64 num_bytes; |
| 1069 | int ret; | 1090 | int ret; |
| 1070 | 1091 | ||
| 1092 | flags = reduce_alloc_profile(flags); | ||
| 1093 | |||
| 1071 | space_info = __find_space_info(extent_root->fs_info, flags); | 1094 | space_info = __find_space_info(extent_root->fs_info, flags); |
| 1072 | if (!space_info) { | 1095 | if (!space_info) { |
| 1073 | ret = update_space_info(extent_root->fs_info, flags, | 1096 | ret = update_space_info(extent_root->fs_info, flags, |
| @@ -1684,6 +1707,7 @@ enospc: | |||
| 1684 | error: | 1707 | error: |
| 1685 | return ret; | 1708 | return ret; |
| 1686 | } | 1709 | } |
| 1710 | |||
| 1687 | /* | 1711 | /* |
| 1688 | * finds a free extent and does all the dirty work required for allocation | 1712 | * finds a free extent and does all the dirty work required for allocation |
| 1689 | * returns the key for the extent through ins, and a tree buffer for | 1713 | * returns the key for the extent through ins, and a tree buffer for |
| @@ -1697,7 +1721,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | |||
| 1697 | u64 root_objectid, u64 ref_generation, | 1721 | u64 root_objectid, u64 ref_generation, |
| 1698 | u64 owner, u64 owner_offset, | 1722 | u64 owner, u64 owner_offset, |
| 1699 | u64 empty_size, u64 hint_byte, | 1723 | u64 empty_size, u64 hint_byte, |
| 1700 | u64 search_end, struct btrfs_key *ins, int data) | 1724 | u64 search_end, struct btrfs_key *ins, u64 data) |
| 1701 | { | 1725 | { |
| 1702 | int ret; | 1726 | int ret; |
| 1703 | int pending_ret; | 1727 | int pending_ret; |
| @@ -1727,6 +1751,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | |||
| 1727 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | 1751 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; |
| 1728 | } | 1752 | } |
| 1729 | again: | 1753 | again: |
| 1754 | data = reduce_alloc_profile(data); | ||
| 1730 | if (root->ref_cows) { | 1755 | if (root->ref_cows) { |
| 1731 | if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { | 1756 | if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { |
| 1732 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 1757 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
| @@ -1752,6 +1777,9 @@ again: | |||
| 1752 | num_bytes = max(num_bytes, min_alloc_size); | 1777 | num_bytes = max(num_bytes, min_alloc_size); |
| 1753 | goto again; | 1778 | goto again; |
| 1754 | } | 1779 | } |
| 1780 | if (ret) { | ||
| 1781 | printk("allocation failed flags %Lu\n", data); | ||
| 1782 | } | ||
| 1755 | BUG_ON(ret); | 1783 | BUG_ON(ret); |
| 1756 | if (ret) | 1784 | if (ret) |
| 1757 | return ret; | 1785 | return ret; |
| @@ -2274,8 +2302,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, | |||
| 2274 | { | 2302 | { |
| 2275 | u64 page_start; | 2303 | u64 page_start; |
| 2276 | u64 page_end; | 2304 | u64 page_end; |
| 2277 | u64 delalloc_start; | ||
| 2278 | u64 existing_delalloc; | ||
| 2279 | unsigned long last_index; | 2305 | unsigned long last_index; |
| 2280 | unsigned long i; | 2306 | unsigned long i; |
| 2281 | struct page *page; | 2307 | struct page *page; |
| @@ -2293,7 +2319,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, | |||
| 2293 | ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; | 2319 | ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; |
| 2294 | 2320 | ||
| 2295 | file_ra_state_init(ra, inode->i_mapping); | 2321 | file_ra_state_init(ra, inode->i_mapping); |
| 2296 | kfree(ra); | ||
| 2297 | 2322 | ||
| 2298 | for (; i <= last_index; i++) { | 2323 | for (; i <= last_index; i++) { |
| 2299 | if (total_read % ra_pages == 0) { | 2324 | if (total_read % ra_pages == 0) { |
| @@ -2313,26 +2338,30 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, | |||
| 2313 | goto out_unlock; | 2338 | goto out_unlock; |
| 2314 | } | 2339 | } |
| 2315 | } | 2340 | } |
| 2341 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) | ||
| 2342 | ClearPageDirty(page); | ||
| 2343 | #else | ||
| 2344 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | ||
| 2345 | #endif | ||
| 2346 | wait_on_page_writeback(page); | ||
| 2347 | set_page_extent_mapped(page); | ||
| 2316 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 2348 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; |
| 2317 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 2349 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
| 2318 | 2350 | ||
| 2319 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2351 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); |
| 2320 | 2352 | ||
| 2321 | delalloc_start = page_start; | 2353 | set_page_dirty(page); |
| 2322 | existing_delalloc = count_range_bits(io_tree, | ||
| 2323 | &delalloc_start, page_end, | ||
| 2324 | PAGE_CACHE_SIZE, EXTENT_DELALLOC); | ||
| 2325 | |||
| 2326 | set_extent_delalloc(io_tree, page_start, | 2354 | set_extent_delalloc(io_tree, page_start, |
| 2327 | page_end, GFP_NOFS); | 2355 | page_end, GFP_NOFS); |
| 2328 | 2356 | ||
| 2329 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2357 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
| 2330 | set_page_dirty(page); | ||
| 2331 | unlock_page(page); | 2358 | unlock_page(page); |
| 2332 | page_cache_release(page); | 2359 | page_cache_release(page); |
| 2360 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | ||
| 2333 | } | 2361 | } |
| 2334 | 2362 | ||
| 2335 | out_unlock: | 2363 | out_unlock: |
| 2364 | kfree(ra); | ||
| 2336 | mutex_unlock(&inode->i_mutex); | 2365 | mutex_unlock(&inode->i_mutex); |
| 2337 | return 0; | 2366 | return 0; |
| 2338 | } | 2367 | } |
| @@ -2397,8 +2426,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, | |||
| 2397 | goto out; | 2426 | goto out; |
| 2398 | } | 2427 | } |
| 2399 | relocate_inode_pages(inode, ref_offset, extent_key->offset); | 2428 | relocate_inode_pages(inode, ref_offset, extent_key->offset); |
| 2400 | /* FIXME, data=ordered will help get rid of this */ | ||
| 2401 | filemap_fdatawrite(inode->i_mapping); | ||
| 2402 | iput(inode); | 2429 | iput(inode); |
| 2403 | mutex_lock(&extent_root->fs_info->fs_mutex); | 2430 | mutex_lock(&extent_root->fs_info->fs_mutex); |
| 2404 | } else { | 2431 | } else { |
| @@ -2486,6 +2513,47 @@ out: | |||
| 2486 | return ret; | 2513 | return ret; |
| 2487 | } | 2514 | } |
| 2488 | 2515 | ||
| 2516 | static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | ||
| 2517 | { | ||
| 2518 | u64 num_devices; | ||
| 2519 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | | ||
| 2520 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; | ||
| 2521 | |||
| 2522 | num_devices = btrfs_super_num_devices(&root->fs_info->super_copy); | ||
| 2523 | if (num_devices == 1) { | ||
| 2524 | stripped |= BTRFS_BLOCK_GROUP_DUP; | ||
| 2525 | stripped = flags & ~stripped; | ||
| 2526 | |||
| 2527 | /* turn raid0 into single device chunks */ | ||
| 2528 | if (flags & BTRFS_BLOCK_GROUP_RAID0) | ||
| 2529 | return stripped; | ||
| 2530 | |||
| 2531 | /* turn mirroring into duplication */ | ||
| 2532 | if (flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
| 2533 | BTRFS_BLOCK_GROUP_RAID10)) | ||
| 2534 | return stripped | BTRFS_BLOCK_GROUP_DUP; | ||
| 2535 | return flags; | ||
| 2536 | } else { | ||
| 2537 | /* they already had raid on here, just return */ | ||
| 2538 | if ((flags & BTRFS_BLOCK_GROUP_DUP) && | ||
| 2539 | (flags & BTRFS_BLOCK_GROUP_RAID1)) { | ||
| 2540 | } | ||
| 2541 | if (flags & stripped) | ||
| 2542 | return flags; | ||
| 2543 | |||
| 2544 | stripped |= BTRFS_BLOCK_GROUP_DUP; | ||
| 2545 | stripped = flags & ~stripped; | ||
| 2546 | |||
| 2547 | /* switch duplicated blocks with raid1 */ | ||
| 2548 | if (flags & BTRFS_BLOCK_GROUP_DUP) | ||
| 2549 | return stripped | BTRFS_BLOCK_GROUP_RAID1; | ||
| 2550 | |||
| 2551 | /* turn single device chunks into raid0 */ | ||
| 2552 | return stripped | BTRFS_BLOCK_GROUP_RAID0; | ||
| 2553 | } | ||
| 2554 | return flags; | ||
| 2555 | } | ||
| 2556 | |||
| 2489 | int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) | 2557 | int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) |
| 2490 | { | 2558 | { |
| 2491 | struct btrfs_trans_handle *trans; | 2559 | struct btrfs_trans_handle *trans; |
| @@ -2494,6 +2562,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) | |||
| 2494 | u64 cur_byte; | 2562 | u64 cur_byte; |
| 2495 | u64 total_found; | 2563 | u64 total_found; |
| 2496 | u64 shrink_last_byte; | 2564 | u64 shrink_last_byte; |
| 2565 | u64 new_alloc_flags; | ||
| 2497 | struct btrfs_block_group_cache *shrink_block_group; | 2566 | struct btrfs_block_group_cache *shrink_block_group; |
| 2498 | struct btrfs_fs_info *info = root->fs_info; | 2567 | struct btrfs_fs_info *info = root->fs_info; |
| 2499 | struct btrfs_key key; | 2568 | struct btrfs_key key; |
| @@ -2511,17 +2580,20 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) | |||
| 2511 | 2580 | ||
| 2512 | shrink_block_group->space_info->total_bytes -= | 2581 | shrink_block_group->space_info->total_bytes -= |
| 2513 | shrink_block_group->key.offset; | 2582 | shrink_block_group->key.offset; |
| 2514 | printk("shrink_extent_tree %Lu -> %Lu type %Lu\n", shrink_start, shrink_last_byte, shrink_block_group->flags); | ||
| 2515 | path = btrfs_alloc_path(); | 2583 | path = btrfs_alloc_path(); |
| 2516 | root = root->fs_info->extent_root; | 2584 | root = root->fs_info->extent_root; |
| 2517 | path->reada = 2; | 2585 | path->reada = 2; |
| 2518 | 2586 | ||
| 2519 | again: | 2587 | again: |
| 2520 | trans = btrfs_start_transaction(root, 1); | 2588 | if (btrfs_block_group_used(&shrink_block_group->item) > 0) { |
| 2521 | do_chunk_alloc(trans, root->fs_info->extent_root, | 2589 | trans = btrfs_start_transaction(root, 1); |
| 2590 | new_alloc_flags = update_block_group_flags(root, | ||
| 2591 | shrink_block_group->flags); | ||
| 2592 | do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 2522 | btrfs_block_group_used(&shrink_block_group->item) + | 2593 | btrfs_block_group_used(&shrink_block_group->item) + |
| 2523 | 2 * 1024 * 1024, shrink_block_group->flags); | 2594 | 2 * 1024 * 1024, new_alloc_flags); |
| 2524 | btrfs_end_transaction(trans, root); | 2595 | btrfs_end_transaction(trans, root); |
| 2596 | } | ||
| 2525 | shrink_block_group->ro = 1; | 2597 | shrink_block_group->ro = 1; |
| 2526 | 2598 | ||
| 2527 | total_found = 0; | 2599 | total_found = 0; |
