diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-04-28 15:29:52 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:02 -0400 |
commit | ec44a35cbeb26ab2da84cb280d778260f2312feb (patch) | |
tree | afd48a9769125095922c931e3ad77f207c8fb025 /fs/btrfs/extent-tree.c | |
parent | 788f20eb5affef584e75ea84bb80a4c3352a2c0e (diff) |
Btrfs: Add balance ioctl to restripe the chunks
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 106 |
1 files changed, 89 insertions, 17 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe4fe709c312..95aee5a29375 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -17,6 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
20 | #include <linux/writeback.h> | ||
20 | #include "hash.h" | 21 | #include "hash.h" |
21 | #include "crc32c.h" | 22 | #include "crc32c.h" |
22 | #include "ctree.h" | 23 | #include "ctree.h" |
@@ -1058,6 +1059,26 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
1058 | } | 1059 | } |
1059 | } | 1060 | } |
1060 | 1061 | ||
1062 | static u64 reduce_alloc_profile(u64 flags) | ||
1063 | { | ||
1064 | if ((flags & BTRFS_BLOCK_GROUP_DUP) && | ||
1065 | (flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
1066 | BTRFS_BLOCK_GROUP_RAID10))) | ||
1067 | flags &= ~BTRFS_BLOCK_GROUP_DUP; | ||
1068 | |||
1069 | if ((flags & BTRFS_BLOCK_GROUP_RAID1) && | ||
1070 | (flags & BTRFS_BLOCK_GROUP_RAID10)) | ||
1071 | flags &= ~BTRFS_BLOCK_GROUP_RAID1; | ||
1072 | |||
1073 | if ((flags & BTRFS_BLOCK_GROUP_RAID0) && | ||
1074 | ((flags & BTRFS_BLOCK_GROUP_RAID1) | | ||
1075 | (flags & BTRFS_BLOCK_GROUP_RAID10) | | ||
1076 | (flags & BTRFS_BLOCK_GROUP_DUP))) | ||
1077 | flags &= ~BTRFS_BLOCK_GROUP_RAID0; | ||
1078 | return flags; | ||
1079 | } | ||
1080 | |||
1081 | |||
1061 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 1082 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
1062 | struct btrfs_root *extent_root, u64 alloc_bytes, | 1083 | struct btrfs_root *extent_root, u64 alloc_bytes, |
1063 | u64 flags) | 1084 | u64 flags) |
@@ -1068,6 +1089,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
1068 | u64 num_bytes; | 1089 | u64 num_bytes; |
1069 | int ret; | 1090 | int ret; |
1070 | 1091 | ||
1092 | flags = reduce_alloc_profile(flags); | ||
1093 | |||
1071 | space_info = __find_space_info(extent_root->fs_info, flags); | 1094 | space_info = __find_space_info(extent_root->fs_info, flags); |
1072 | if (!space_info) { | 1095 | if (!space_info) { |
1073 | ret = update_space_info(extent_root->fs_info, flags, | 1096 | ret = update_space_info(extent_root->fs_info, flags, |
@@ -1684,6 +1707,7 @@ enospc: | |||
1684 | error: | 1707 | error: |
1685 | return ret; | 1708 | return ret; |
1686 | } | 1709 | } |
1710 | |||
1687 | /* | 1711 | /* |
1688 | * finds a free extent and does all the dirty work required for allocation | 1712 | * finds a free extent and does all the dirty work required for allocation |
1689 | * returns the key for the extent through ins, and a tree buffer for | 1713 | * returns the key for the extent through ins, and a tree buffer for |
@@ -1697,7 +1721,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | |||
1697 | u64 root_objectid, u64 ref_generation, | 1721 | u64 root_objectid, u64 ref_generation, |
1698 | u64 owner, u64 owner_offset, | 1722 | u64 owner, u64 owner_offset, |
1699 | u64 empty_size, u64 hint_byte, | 1723 | u64 empty_size, u64 hint_byte, |
1700 | u64 search_end, struct btrfs_key *ins, int data) | 1724 | u64 search_end, struct btrfs_key *ins, u64 data) |
1701 | { | 1725 | { |
1702 | int ret; | 1726 | int ret; |
1703 | int pending_ret; | 1727 | int pending_ret; |
@@ -1727,6 +1751,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | |||
1727 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | 1751 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; |
1728 | } | 1752 | } |
1729 | again: | 1753 | again: |
1754 | data = reduce_alloc_profile(data); | ||
1730 | if (root->ref_cows) { | 1755 | if (root->ref_cows) { |
1731 | if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { | 1756 | if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { |
1732 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 1757 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
@@ -1752,6 +1777,9 @@ again: | |||
1752 | num_bytes = max(num_bytes, min_alloc_size); | 1777 | num_bytes = max(num_bytes, min_alloc_size); |
1753 | goto again; | 1778 | goto again; |
1754 | } | 1779 | } |
1780 | if (ret) { | ||
1781 | printk("allocation failed flags %Lu\n", data); | ||
1782 | } | ||
1755 | BUG_ON(ret); | 1783 | BUG_ON(ret); |
1756 | if (ret) | 1784 | if (ret) |
1757 | return ret; | 1785 | return ret; |
@@ -2274,8 +2302,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, | |||
2274 | { | 2302 | { |
2275 | u64 page_start; | 2303 | u64 page_start; |
2276 | u64 page_end; | 2304 | u64 page_end; |
2277 | u64 delalloc_start; | ||
2278 | u64 existing_delalloc; | ||
2279 | unsigned long last_index; | 2305 | unsigned long last_index; |
2280 | unsigned long i; | 2306 | unsigned long i; |
2281 | struct page *page; | 2307 | struct page *page; |
@@ -2293,7 +2319,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, | |||
2293 | ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; | 2319 | ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; |
2294 | 2320 | ||
2295 | file_ra_state_init(ra, inode->i_mapping); | 2321 | file_ra_state_init(ra, inode->i_mapping); |
2296 | kfree(ra); | ||
2297 | 2322 | ||
2298 | for (; i <= last_index; i++) { | 2323 | for (; i <= last_index; i++) { |
2299 | if (total_read % ra_pages == 0) { | 2324 | if (total_read % ra_pages == 0) { |
@@ -2313,26 +2338,30 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, | |||
2313 | goto out_unlock; | 2338 | goto out_unlock; |
2314 | } | 2339 | } |
2315 | } | 2340 | } |
2341 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) | ||
2342 | ClearPageDirty(page); | ||
2343 | #else | ||
2344 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | ||
2345 | #endif | ||
2346 | wait_on_page_writeback(page); | ||
2347 | set_page_extent_mapped(page); | ||
2316 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 2348 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; |
2317 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 2349 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
2318 | 2350 | ||
2319 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2351 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); |
2320 | 2352 | ||
2321 | delalloc_start = page_start; | 2353 | set_page_dirty(page); |
2322 | existing_delalloc = count_range_bits(io_tree, | ||
2323 | &delalloc_start, page_end, | ||
2324 | PAGE_CACHE_SIZE, EXTENT_DELALLOC); | ||
2325 | |||
2326 | set_extent_delalloc(io_tree, page_start, | 2354 | set_extent_delalloc(io_tree, page_start, |
2327 | page_end, GFP_NOFS); | 2355 | page_end, GFP_NOFS); |
2328 | 2356 | ||
2329 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2357 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
2330 | set_page_dirty(page); | ||
2331 | unlock_page(page); | 2358 | unlock_page(page); |
2332 | page_cache_release(page); | 2359 | page_cache_release(page); |
2360 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | ||
2333 | } | 2361 | } |
2334 | 2362 | ||
2335 | out_unlock: | 2363 | out_unlock: |
2364 | kfree(ra); | ||
2336 | mutex_unlock(&inode->i_mutex); | 2365 | mutex_unlock(&inode->i_mutex); |
2337 | return 0; | 2366 | return 0; |
2338 | } | 2367 | } |
@@ -2397,8 +2426,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, | |||
2397 | goto out; | 2426 | goto out; |
2398 | } | 2427 | } |
2399 | relocate_inode_pages(inode, ref_offset, extent_key->offset); | 2428 | relocate_inode_pages(inode, ref_offset, extent_key->offset); |
2400 | /* FIXME, data=ordered will help get rid of this */ | ||
2401 | filemap_fdatawrite(inode->i_mapping); | ||
2402 | iput(inode); | 2429 | iput(inode); |
2403 | mutex_lock(&extent_root->fs_info->fs_mutex); | 2430 | mutex_lock(&extent_root->fs_info->fs_mutex); |
2404 | } else { | 2431 | } else { |
@@ -2486,6 +2513,47 @@ out: | |||
2486 | return ret; | 2513 | return ret; |
2487 | } | 2514 | } |
2488 | 2515 | ||
2516 | static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | ||
2517 | { | ||
2518 | u64 num_devices; | ||
2519 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | | ||
2520 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; | ||
2521 | |||
2522 | num_devices = btrfs_super_num_devices(&root->fs_info->super_copy); | ||
2523 | if (num_devices == 1) { | ||
2524 | stripped |= BTRFS_BLOCK_GROUP_DUP; | ||
2525 | stripped = flags & ~stripped; | ||
2526 | |||
2527 | /* turn raid0 into single device chunks */ | ||
2528 | if (flags & BTRFS_BLOCK_GROUP_RAID0) | ||
2529 | return stripped; | ||
2530 | |||
2531 | /* turn mirroring into duplication */ | ||
2532 | if (flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
2533 | BTRFS_BLOCK_GROUP_RAID10)) | ||
2534 | return stripped | BTRFS_BLOCK_GROUP_DUP; | ||
2535 | return flags; | ||
2536 | } else { | ||
2537 | /* they already had raid on here, just return */ | ||
2538 | if ((flags & BTRFS_BLOCK_GROUP_DUP) && | ||
2539 | (flags & BTRFS_BLOCK_GROUP_RAID1)) { | ||
2540 | } | ||
2541 | if (flags & stripped) | ||
2542 | return flags; | ||
2543 | |||
2544 | stripped |= BTRFS_BLOCK_GROUP_DUP; | ||
2545 | stripped = flags & ~stripped; | ||
2546 | |||
2547 | /* switch duplicated blocks with raid1 */ | ||
2548 | if (flags & BTRFS_BLOCK_GROUP_DUP) | ||
2549 | return stripped | BTRFS_BLOCK_GROUP_RAID1; | ||
2550 | |||
2551 | /* turn single device chunks into raid0 */ | ||
2552 | return stripped | BTRFS_BLOCK_GROUP_RAID0; | ||
2553 | } | ||
2554 | return flags; | ||
2555 | } | ||
2556 | |||
2489 | int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) | 2557 | int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) |
2490 | { | 2558 | { |
2491 | struct btrfs_trans_handle *trans; | 2559 | struct btrfs_trans_handle *trans; |
@@ -2494,6 +2562,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) | |||
2494 | u64 cur_byte; | 2562 | u64 cur_byte; |
2495 | u64 total_found; | 2563 | u64 total_found; |
2496 | u64 shrink_last_byte; | 2564 | u64 shrink_last_byte; |
2565 | u64 new_alloc_flags; | ||
2497 | struct btrfs_block_group_cache *shrink_block_group; | 2566 | struct btrfs_block_group_cache *shrink_block_group; |
2498 | struct btrfs_fs_info *info = root->fs_info; | 2567 | struct btrfs_fs_info *info = root->fs_info; |
2499 | struct btrfs_key key; | 2568 | struct btrfs_key key; |
@@ -2511,17 +2580,20 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) | |||
2511 | 2580 | ||
2512 | shrink_block_group->space_info->total_bytes -= | 2581 | shrink_block_group->space_info->total_bytes -= |
2513 | shrink_block_group->key.offset; | 2582 | shrink_block_group->key.offset; |
2514 | printk("shrink_extent_tree %Lu -> %Lu type %Lu\n", shrink_start, shrink_last_byte, shrink_block_group->flags); | ||
2515 | path = btrfs_alloc_path(); | 2583 | path = btrfs_alloc_path(); |
2516 | root = root->fs_info->extent_root; | 2584 | root = root->fs_info->extent_root; |
2517 | path->reada = 2; | 2585 | path->reada = 2; |
2518 | 2586 | ||
2519 | again: | 2587 | again: |
2520 | trans = btrfs_start_transaction(root, 1); | 2588 | if (btrfs_block_group_used(&shrink_block_group->item) > 0) { |
2521 | do_chunk_alloc(trans, root->fs_info->extent_root, | 2589 | trans = btrfs_start_transaction(root, 1); |
2590 | new_alloc_flags = update_block_group_flags(root, | ||
2591 | shrink_block_group->flags); | ||
2592 | do_chunk_alloc(trans, root->fs_info->extent_root, | ||
2522 | btrfs_block_group_used(&shrink_block_group->item) + | 2593 | btrfs_block_group_used(&shrink_block_group->item) + |
2523 | 2 * 1024 * 1024, shrink_block_group->flags); | 2594 | 2 * 1024 * 1024, new_alloc_flags); |
2524 | btrfs_end_transaction(trans, root); | 2595 | btrfs_end_transaction(trans, root); |
2596 | } | ||
2525 | shrink_block_group->ro = 1; | 2597 | shrink_block_group->ro = 1; |
2526 | 2598 | ||
2527 | total_found = 0; | 2599 | total_found = 0; |