aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-04-28 15:29:52 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:02 -0400
commitec44a35cbeb26ab2da84cb280d778260f2312feb (patch)
treeafd48a9769125095922c931e3ad77f207c8fb025
parent788f20eb5affef584e75ea84bb80a4c3352a2c0e (diff)
Btrfs: Add balance ioctl to restripe the chunks
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent-tree.c106
-rw-r--r--fs/btrfs/inode.c11
-rw-r--r--fs/btrfs/volumes.c115
-rw-r--r--fs/btrfs/volumes.h1
5 files changed, 208 insertions, 27 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b9257b37bb96..73b92dd150ff 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1364,7 +1364,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1364 u64 root_objectid, u64 ref_generation, 1364 u64 root_objectid, u64 ref_generation,
1365 u64 owner, u64 owner_offset, 1365 u64 owner, u64 owner_offset,
1366 u64 empty_size, u64 hint_byte, 1366 u64 empty_size, u64 hint_byte,
1367 u64 search_end, struct btrfs_key *ins, int data); 1367 u64 search_end, struct btrfs_key *ins, u64 data);
1368int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1368int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1369 struct extent_buffer *buf); 1369 struct extent_buffer *buf);
1370int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 1370int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fe4fe709c312..95aee5a29375 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -17,6 +17,7 @@
17 */ 17 */
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/pagemap.h> 19#include <linux/pagemap.h>
20#include <linux/writeback.h>
20#include "hash.h" 21#include "hash.h"
21#include "crc32c.h" 22#include "crc32c.h"
22#include "ctree.h" 23#include "ctree.h"
@@ -1058,6 +1059,26 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1058 } 1059 }
1059} 1060}
1060 1061
1062static u64 reduce_alloc_profile(u64 flags)
1063{
1064 if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1065 (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1066 BTRFS_BLOCK_GROUP_RAID10)))
1067 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1068
1069 if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1070 (flags & BTRFS_BLOCK_GROUP_RAID10))
1071 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1072
1073 if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1074 ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1075 (flags & BTRFS_BLOCK_GROUP_RAID10) |
1076 (flags & BTRFS_BLOCK_GROUP_DUP)))
1077 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1078 return flags;
1079}
1080
1081
1061static int do_chunk_alloc(struct btrfs_trans_handle *trans, 1082static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1062 struct btrfs_root *extent_root, u64 alloc_bytes, 1083 struct btrfs_root *extent_root, u64 alloc_bytes,
1063 u64 flags) 1084 u64 flags)
@@ -1068,6 +1089,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1068 u64 num_bytes; 1089 u64 num_bytes;
1069 int ret; 1090 int ret;
1070 1091
1092 flags = reduce_alloc_profile(flags);
1093
1071 space_info = __find_space_info(extent_root->fs_info, flags); 1094 space_info = __find_space_info(extent_root->fs_info, flags);
1072 if (!space_info) { 1095 if (!space_info) {
1073 ret = update_space_info(extent_root->fs_info, flags, 1096 ret = update_space_info(extent_root->fs_info, flags,
@@ -1684,6 +1707,7 @@ enospc:
1684error: 1707error:
1685 return ret; 1708 return ret;
1686} 1709}
1710
1687/* 1711/*
1688 * finds a free extent and does all the dirty work required for allocation 1712 * finds a free extent and does all the dirty work required for allocation
1689 * returns the key for the extent through ins, and a tree buffer for 1713 * returns the key for the extent through ins, and a tree buffer for
@@ -1697,7 +1721,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1697 u64 root_objectid, u64 ref_generation, 1721 u64 root_objectid, u64 ref_generation,
1698 u64 owner, u64 owner_offset, 1722 u64 owner, u64 owner_offset,
1699 u64 empty_size, u64 hint_byte, 1723 u64 empty_size, u64 hint_byte,
1700 u64 search_end, struct btrfs_key *ins, int data) 1724 u64 search_end, struct btrfs_key *ins, u64 data)
1701{ 1725{
1702 int ret; 1726 int ret;
1703 int pending_ret; 1727 int pending_ret;
@@ -1727,6 +1751,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1727 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; 1751 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1728 } 1752 }
1729again: 1753again:
1754 data = reduce_alloc_profile(data);
1730 if (root->ref_cows) { 1755 if (root->ref_cows) {
1731 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { 1756 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1732 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 1757 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
@@ -1752,6 +1777,9 @@ again:
1752 num_bytes = max(num_bytes, min_alloc_size); 1777 num_bytes = max(num_bytes, min_alloc_size);
1753 goto again; 1778 goto again;
1754 } 1779 }
1780 if (ret) {
1781 printk("allocation failed flags %Lu\n", data);
1782 }
1755 BUG_ON(ret); 1783 BUG_ON(ret);
1756 if (ret) 1784 if (ret)
1757 return ret; 1785 return ret;
@@ -2274,8 +2302,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2274{ 2302{
2275 u64 page_start; 2303 u64 page_start;
2276 u64 page_end; 2304 u64 page_end;
2277 u64 delalloc_start;
2278 u64 existing_delalloc;
2279 unsigned long last_index; 2305 unsigned long last_index;
2280 unsigned long i; 2306 unsigned long i;
2281 struct page *page; 2307 struct page *page;
@@ -2293,7 +2319,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2293 ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; 2319 ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
2294 2320
2295 file_ra_state_init(ra, inode->i_mapping); 2321 file_ra_state_init(ra, inode->i_mapping);
2296 kfree(ra);
2297 2322
2298 for (; i <= last_index; i++) { 2323 for (; i <= last_index; i++) {
2299 if (total_read % ra_pages == 0) { 2324 if (total_read % ra_pages == 0) {
@@ -2313,26 +2338,30 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2313 goto out_unlock; 2338 goto out_unlock;
2314 } 2339 }
2315 } 2340 }
2341#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2342 ClearPageDirty(page);
2343#else
2344 cancel_dirty_page(page, PAGE_CACHE_SIZE);
2345#endif
2346 wait_on_page_writeback(page);
2347 set_page_extent_mapped(page);
2316 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2348 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2317 page_end = page_start + PAGE_CACHE_SIZE - 1; 2349 page_end = page_start + PAGE_CACHE_SIZE - 1;
2318 2350
2319 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 2351 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2320 2352
2321 delalloc_start = page_start; 2353 set_page_dirty(page);
2322 existing_delalloc = count_range_bits(io_tree,
2323 &delalloc_start, page_end,
2324 PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2325
2326 set_extent_delalloc(io_tree, page_start, 2354 set_extent_delalloc(io_tree, page_start,
2327 page_end, GFP_NOFS); 2355 page_end, GFP_NOFS);
2328 2356
2329 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 2357 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2330 set_page_dirty(page);
2331 unlock_page(page); 2358 unlock_page(page);
2332 page_cache_release(page); 2359 page_cache_release(page);
2360 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2333 } 2361 }
2334 2362
2335out_unlock: 2363out_unlock:
2364 kfree(ra);
2336 mutex_unlock(&inode->i_mutex); 2365 mutex_unlock(&inode->i_mutex);
2337 return 0; 2366 return 0;
2338} 2367}
@@ -2397,8 +2426,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2397 goto out; 2426 goto out;
2398 } 2427 }
2399 relocate_inode_pages(inode, ref_offset, extent_key->offset); 2428 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2400 /* FIXME, data=ordered will help get rid of this */
2401 filemap_fdatawrite(inode->i_mapping);
2402 iput(inode); 2429 iput(inode);
2403 mutex_lock(&extent_root->fs_info->fs_mutex); 2430 mutex_lock(&extent_root->fs_info->fs_mutex);
2404 } else { 2431 } else {
@@ -2486,6 +2513,47 @@ out:
2486 return ret; 2513 return ret;
2487} 2514}
2488 2515
2516static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
2517{
2518 u64 num_devices;
2519 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
2520 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
2521
2522 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy);
2523 if (num_devices == 1) {
2524 stripped |= BTRFS_BLOCK_GROUP_DUP;
2525 stripped = flags & ~stripped;
2526
2527 /* turn raid0 into single device chunks */
2528 if (flags & BTRFS_BLOCK_GROUP_RAID0)
2529 return stripped;
2530
2531 /* turn mirroring into duplication */
2532 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
2533 BTRFS_BLOCK_GROUP_RAID10))
2534 return stripped | BTRFS_BLOCK_GROUP_DUP;
2535 return flags;
2536 } else {
2537 /* they already had raid on here, just return */
2538 if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
2539 (flags & BTRFS_BLOCK_GROUP_RAID1)) {
2540 }
2541 if (flags & stripped)
2542 return flags;
2543
2544 stripped |= BTRFS_BLOCK_GROUP_DUP;
2545 stripped = flags & ~stripped;
2546
2547 /* switch duplicated blocks with raid1 */
2548 if (flags & BTRFS_BLOCK_GROUP_DUP)
2549 return stripped | BTRFS_BLOCK_GROUP_RAID1;
2550
2551 /* turn single device chunks into raid0 */
2552 return stripped | BTRFS_BLOCK_GROUP_RAID0;
2553 }
2554 return flags;
2555}
2556
2489int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) 2557int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
2490{ 2558{
2491 struct btrfs_trans_handle *trans; 2559 struct btrfs_trans_handle *trans;
@@ -2494,6 +2562,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
2494 u64 cur_byte; 2562 u64 cur_byte;
2495 u64 total_found; 2563 u64 total_found;
2496 u64 shrink_last_byte; 2564 u64 shrink_last_byte;
2565 u64 new_alloc_flags;
2497 struct btrfs_block_group_cache *shrink_block_group; 2566 struct btrfs_block_group_cache *shrink_block_group;
2498 struct btrfs_fs_info *info = root->fs_info; 2567 struct btrfs_fs_info *info = root->fs_info;
2499 struct btrfs_key key; 2568 struct btrfs_key key;
@@ -2511,17 +2580,20 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
2511 2580
2512 shrink_block_group->space_info->total_bytes -= 2581 shrink_block_group->space_info->total_bytes -=
2513 shrink_block_group->key.offset; 2582 shrink_block_group->key.offset;
2514printk("shrink_extent_tree %Lu -> %Lu type %Lu\n", shrink_start, shrink_last_byte, shrink_block_group->flags);
2515 path = btrfs_alloc_path(); 2583 path = btrfs_alloc_path();
2516 root = root->fs_info->extent_root; 2584 root = root->fs_info->extent_root;
2517 path->reada = 2; 2585 path->reada = 2;
2518 2586
2519again: 2587again:
2520 trans = btrfs_start_transaction(root, 1); 2588 if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
2521 do_chunk_alloc(trans, root->fs_info->extent_root, 2589 trans = btrfs_start_transaction(root, 1);
2590 new_alloc_flags = update_block_group_flags(root,
2591 shrink_block_group->flags);
2592 do_chunk_alloc(trans, root->fs_info->extent_root,
2522 btrfs_block_group_used(&shrink_block_group->item) + 2593 btrfs_block_group_used(&shrink_block_group->item) +
2523 2 * 1024 * 1024, shrink_block_group->flags); 2594 2 * 1024 * 1024, new_alloc_flags);
2524 btrfs_end_transaction(trans, root); 2595 btrfs_end_transaction(trans, root);
2596 }
2525 shrink_block_group->ro = 1; 2597 shrink_block_group->ro = 1;
2526 2598
2527 total_found = 0; 2599 total_found = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1a74b5018699..994834474590 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2864,6 +2864,15 @@ int btrfs_defrag_file(struct file *file) {
2864 goto out_unlock; 2864 goto out_unlock;
2865 } 2865 }
2866 } 2866 }
2867
2868#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2869 ClearPageDirty(page);
2870#else
2871 cancel_dirty_page(page, PAGE_CACHE_SIZE);
2872#endif
2873 wait_on_page_writeback(page);
2874 set_page_extent_mapped(page);
2875
2867 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2876 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2868 page_end = page_start + PAGE_CACHE_SIZE - 1; 2877 page_end = page_start + PAGE_CACHE_SIZE - 1;
2869 2878
@@ -3105,6 +3114,8 @@ long btrfs_ioctl(struct file *file, unsigned int
3105 return btrfs_ioctl_resize(root, (void __user *)arg); 3114 return btrfs_ioctl_resize(root, (void __user *)arg);
3106 case BTRFS_IOC_ADD_DEV: 3115 case BTRFS_IOC_ADD_DEV:
3107 return btrfs_ioctl_add_dev(root, (void __user *)arg); 3116 return btrfs_ioctl_add_dev(root, (void __user *)arg);
3117 case BTRFS_IOC_BALANCE:
3118 return btrfs_balance(root->fs_info->dev_root);
3108 } 3119 }
3109 3120
3110 return -ENOTTY; 3121 return -ENOTTY;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b93c15aa17db..6476ecbf132e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -869,6 +869,107 @@ out:
869 return 0; 869 return 0;
870} 870}
871 871
872static u64 div_factor(u64 num, int factor)
873{
874 if (factor == 10)
875 return num;
876 num *= factor;
877 do_div(num, 10);
878 return num;
879}
880
881
882int btrfs_balance(struct btrfs_root *dev_root)
883{
884 int ret;
885 struct list_head *cur;
886 struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
887 struct btrfs_device *device;
888 u64 old_size;
889 u64 size_to_free;
890 struct btrfs_path *path;
891 struct btrfs_key key;
892 struct btrfs_chunk *chunk;
893 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
894 struct btrfs_trans_handle *trans;
895 struct btrfs_key found_key;
896
897
898 dev_root = dev_root->fs_info->dev_root;
899
900 mutex_lock(&dev_root->fs_info->fs_mutex);
901 /* step one make some room on all the devices */
902 list_for_each(cur, devices) {
903 device = list_entry(cur, struct btrfs_device, dev_list);
904 old_size = device->total_bytes;
905 size_to_free = div_factor(old_size, 1);
906 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
907 if (device->total_bytes - device->bytes_used > size_to_free)
908 continue;
909
910 ret = btrfs_shrink_device(device, old_size - size_to_free);
911 BUG_ON(ret);
912
913 trans = btrfs_start_transaction(dev_root, 1);
914 BUG_ON(!trans);
915
916 ret = btrfs_grow_device(trans, device, old_size);
917 BUG_ON(ret);
918
919 btrfs_end_transaction(trans, dev_root);
920 }
921
922 /* step two, relocate all the chunks */
923 path = btrfs_alloc_path();
924 BUG_ON(!path);
925
926 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
927 key.offset = (u64)-1;
928 key.type = BTRFS_CHUNK_ITEM_KEY;
929
930 while(1) {
931 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
932 if (ret < 0)
933 goto error;
934
935 /*
936 * this shouldn't happen, it means the last relocate
937 * failed
938 */
939 if (ret == 0)
940 break;
941
942 ret = btrfs_previous_item(chunk_root, path, 0,
943 BTRFS_CHUNK_ITEM_KEY);
944 if (ret) {
945 break;
946 }
947 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
948 path->slots[0]);
949 if (found_key.objectid != key.objectid)
950 break;
951 chunk = btrfs_item_ptr(path->nodes[0],
952 path->slots[0],
953 struct btrfs_chunk);
954 key.offset = found_key.offset;
955 /* chunk zero is special */
956 if (key.offset == 0)
957 break;
958
959 ret = btrfs_relocate_chunk(chunk_root,
960 chunk_root->root_key.objectid,
961 found_key.objectid,
962 found_key.offset);
963 BUG_ON(ret);
964 btrfs_release_path(chunk_root, path);
965 }
966 ret = 0;
967error:
968 btrfs_free_path(path);
969 mutex_unlock(&dev_root->fs_info->fs_mutex);
970 return ret;
971}
972
872/* 973/*
873 * shrinking a device means finding all of the device extents past 974 * shrinking a device means finding all of the device extents past
874 * the new size, and then following the back refs to the chunks. 975 * the new size, and then following the back refs to the chunks.
@@ -985,15 +1086,6 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
985 return 0; 1086 return 0;
986} 1087}
987 1088
988static u64 div_factor(u64 num, int factor)
989{
990 if (factor == 10)
991 return num;
992 num *= factor;
993 do_div(num, 10);
994 return num;
995}
996
997static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes, 1089static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
998 int sub_stripes) 1090 int sub_stripes)
999{ 1091{
@@ -1040,6 +1132,11 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
1040 int stripe_len = 64 * 1024; 1132 int stripe_len = 64 * 1024;
1041 struct btrfs_key key; 1133 struct btrfs_key key;
1042 1134
1135 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
1136 (type & BTRFS_BLOCK_GROUP_DUP)) {
1137 WARN_ON(1);
1138 type &= ~BTRFS_BLOCK_GROUP_DUP;
1139 }
1043 dev_list = &extent_root->fs_info->fs_devices->alloc_list; 1140 dev_list = &extent_root->fs_info->fs_devices->alloc_list;
1044 if (list_empty(dev_list)) 1141 if (list_empty(dev_list))
1045 return -ENOSPC; 1142 return -ENOSPC;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 6f173450378b..3f9a17f2e41b 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -134,4 +134,5 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
134 u8 *uuid); 134 u8 *uuid);
135int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); 135int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
136int btrfs_init_new_device(struct btrfs_root *root, char *path); 136int btrfs_init_new_device(struct btrfs_root *root, char *path);
137int btrfs_balance(struct btrfs_root *dev_root);
137#endif 138#endif