aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2009-09-11 16:11:19 -0400
committerChris Mason <chris.mason@oracle.com>2009-09-21 19:23:48 -0400
commitba1bf4818baf68d914ef9e3b06fbea6acb674fe4 (patch)
tree1068d6ce508333a75668181bfe0e42956d4d8780 /fs
parent1fb58a6051cd904a9f8e0344b22e31921d6b5a4d (diff)
Btrfs: make balance code choose more wisely when relocating
Currently, we can panic the box if the first block group we go to move is of a type where there is no space left to move those extents. For example, if we fill the disk up with data, and then we try to balance and we have no room to move the data nor room to allocate new chunks, we will panic. Change this by checking to see if we have room to move this chunk around, and if not, return -ENOSPC and move on to the next chunk. This will make sure we remove block groups that are moveable, like if we have alot of empty metadata block groups, and then that way we make room to be able to balance our data chunks as well. Tested this with an fs that would panic on btrfs-vol -b normally, but no longer panics with this patch. V1->V2: -actually search for a free extent on the device to make sure we can allocate a chunk if need be. -fix btrfs_shrink_device to make sure we actually try to relocate all the chunks, and then if we can't return -ENOSPC so if we are doing a btrfs-vol -r we don't remove the device with data still on it. -check to make sure the block group we are going to relocate isn't the last one in that particular space -fix a bug in btrfs_shrink_device where we would change the device's size and not fix it if we fail to do our relocate Signed-off-by: Josef Bacik <jbacik@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/extent-tree.c87
-rw-r--r--fs/btrfs/volumes.c75
-rw-r--r--fs/btrfs/volumes.h3
4 files changed, 148 insertions, 18 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index bc57e236ac64..2b15fb97d23f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2006,6 +2006,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2006int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); 2006int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
2007int btrfs_free_block_groups(struct btrfs_fs_info *info); 2007int btrfs_free_block_groups(struct btrfs_fs_info *info);
2008int btrfs_read_block_groups(struct btrfs_root *root); 2008int btrfs_read_block_groups(struct btrfs_root *root);
2009int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr);
2009int btrfs_make_block_group(struct btrfs_trans_handle *trans, 2010int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2010 struct btrfs_root *root, u64 bytes_used, 2011 struct btrfs_root *root, u64 bytes_used,
2011 u64 type, u64 chunk_objectid, u64 chunk_offset, 2012 u64 type, u64 chunk_objectid, u64 chunk_offset,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4bd04f3fa8bb..4c7c9467f224 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7402,6 +7402,93 @@ out:
7402} 7402}
7403#endif 7403#endif
7404 7404
7405/*
7406 * checks to see if its even possible to relocate this block group.
7407 *
7408 * @return - -1 if it's not a good idea to relocate this block group, 0 if its
7409 * ok to go ahead and try.
7410 */
7411int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
7412{
7413 struct btrfs_block_group_cache *block_group;
7414 struct btrfs_space_info *space_info;
7415 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
7416 struct btrfs_device *device;
7417 int full = 0;
7418 int ret = 0;
7419
7420 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
7421
7422 /* odd, couldn't find the block group, leave it alone */
7423 if (!block_group)
7424 return -1;
7425
7426 /* no bytes used, we're good */
7427 if (!btrfs_block_group_used(&block_group->item))
7428 goto out;
7429
7430 space_info = block_group->space_info;
7431 spin_lock(&space_info->lock);
7432
7433 full = space_info->full;
7434
7435 /*
7436 * if this is the last block group we have in this space, we can't
7437 * relocate it.
7438 */
7439 if (space_info->total_bytes == block_group->key.offset) {
7440 ret = -1;
7441 spin_unlock(&space_info->lock);
7442 goto out;
7443 }
7444
7445 /*
7446 * need to make sure we have room in the space to handle all of the
7447 * extents from this block group. If we can, we're good
7448 */
7449 if (space_info->bytes_used + space_info->bytes_reserved +
7450 space_info->bytes_pinned + space_info->bytes_readonly +
7451 btrfs_block_group_used(&block_group->item) <
7452 space_info->total_bytes) {
7453 spin_unlock(&space_info->lock);
7454 goto out;
7455 }
7456 spin_unlock(&space_info->lock);
7457
7458 /*
7459 * ok we don't have enough space, but maybe we have free space on our
7460 * devices to allocate new chunks for relocation, so loop through our
7461 * alloc devices and guess if we have enough space. However, if we
7462 * were marked as full, then we know there aren't enough chunks, and we
7463 * can just return.
7464 */
7465 ret = -1;
7466 if (full)
7467 goto out;
7468
7469 mutex_lock(&root->fs_info->chunk_mutex);
7470 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
7471 u64 min_free = btrfs_block_group_used(&block_group->item);
7472 u64 dev_offset, max_avail;
7473
7474 /*
7475 * check to make sure we can actually find a chunk with enough
7476 * space to fit our block group in.
7477 */
7478 if (device->total_bytes > device->bytes_used + min_free) {
7479 ret = find_free_dev_extent(NULL, device, min_free,
7480 &dev_offset, &max_avail);
7481 if (!ret)
7482 break;
7483 ret = -1;
7484 }
7485 }
7486 mutex_unlock(&root->fs_info->chunk_mutex);
7487out:
7488 btrfs_put_block_group(block_group);
7489 return ret;
7490}
7491
7405static int find_first_block_group(struct btrfs_root *root, 7492static int find_first_block_group(struct btrfs_root *root,
7406 struct btrfs_path *path, struct btrfs_key *key) 7493 struct btrfs_path *path, struct btrfs_key *key)
7407{ 7494{
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d2358c06bbd9..be953afe804c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -719,10 +719,9 @@ error:
719 * called very infrequently and that a given device has a small number 719 * called very infrequently and that a given device has a small number
720 * of extents 720 * of extents
721 */ 721 */
722static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, 722int find_free_dev_extent(struct btrfs_trans_handle *trans,
723 struct btrfs_device *device, 723 struct btrfs_device *device, u64 num_bytes,
724 u64 num_bytes, u64 *start, 724 u64 *start, u64 *max_avail)
725 u64 *max_avail)
726{ 725{
727 struct btrfs_key key; 726 struct btrfs_key key;
728 struct btrfs_root *root = device->dev_root; 727 struct btrfs_root *root = device->dev_root;
@@ -1736,6 +1735,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1736 extent_root = root->fs_info->extent_root; 1735 extent_root = root->fs_info->extent_root;
1737 em_tree = &root->fs_info->mapping_tree.map_tree; 1736 em_tree = &root->fs_info->mapping_tree.map_tree;
1738 1737
1738 ret = btrfs_can_relocate(extent_root, chunk_offset);
1739 if (ret)
1740 return -ENOSPC;
1741
1739 /* step one, relocate all the extents inside this chunk */ 1742 /* step one, relocate all the extents inside this chunk */
1740 ret = btrfs_relocate_block_group(extent_root, chunk_offset); 1743 ret = btrfs_relocate_block_group(extent_root, chunk_offset);
1741 BUG_ON(ret); 1744 BUG_ON(ret);
@@ -1807,12 +1810,15 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
1807 struct btrfs_key found_key; 1810 struct btrfs_key found_key;
1808 u64 chunk_tree = chunk_root->root_key.objectid; 1811 u64 chunk_tree = chunk_root->root_key.objectid;
1809 u64 chunk_type; 1812 u64 chunk_type;
1813 bool retried = false;
1814 int failed = 0;
1810 int ret; 1815 int ret;
1811 1816
1812 path = btrfs_alloc_path(); 1817 path = btrfs_alloc_path();
1813 if (!path) 1818 if (!path)
1814 return -ENOMEM; 1819 return -ENOMEM;
1815 1820
1821again:
1816 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 1822 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1817 key.offset = (u64)-1; 1823 key.offset = (u64)-1;
1818 key.type = BTRFS_CHUNK_ITEM_KEY; 1824 key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -1842,7 +1848,10 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
1842 ret = btrfs_relocate_chunk(chunk_root, chunk_tree, 1848 ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
1843 found_key.objectid, 1849 found_key.objectid,
1844 found_key.offset); 1850 found_key.offset);
1845 BUG_ON(ret); 1851 if (ret == -ENOSPC)
1852 failed++;
1853 else if (ret)
1854 BUG();
1846 } 1855 }
1847 1856
1848 if (found_key.offset == 0) 1857 if (found_key.offset == 0)
@@ -1850,6 +1859,14 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
1850 key.offset = found_key.offset - 1; 1859 key.offset = found_key.offset - 1;
1851 } 1860 }
1852 ret = 0; 1861 ret = 0;
1862 if (failed && !retried) {
1863 failed = 0;
1864 retried = true;
1865 goto again;
1866 } else if (failed && retried) {
1867 WARN_ON(1);
1868 ret = -ENOSPC;
1869 }
1853error: 1870error:
1854 btrfs_free_path(path); 1871 btrfs_free_path(path);
1855 return ret; 1872 return ret;
@@ -1894,6 +1911,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
1894 continue; 1911 continue;
1895 1912
1896 ret = btrfs_shrink_device(device, old_size - size_to_free); 1913 ret = btrfs_shrink_device(device, old_size - size_to_free);
1914 if (ret == -ENOSPC)
1915 break;
1897 BUG_ON(ret); 1916 BUG_ON(ret);
1898 1917
1899 trans = btrfs_start_transaction(dev_root, 1); 1918 trans = btrfs_start_transaction(dev_root, 1);
@@ -1938,9 +1957,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
1938 chunk = btrfs_item_ptr(path->nodes[0], 1957 chunk = btrfs_item_ptr(path->nodes[0],
1939 path->slots[0], 1958 path->slots[0],
1940 struct btrfs_chunk); 1959 struct btrfs_chunk);
1941 key.offset = found_key.offset;
1942 /* chunk zero is special */ 1960 /* chunk zero is special */
1943 if (key.offset == 0) 1961 if (found_key.offset == 0)
1944 break; 1962 break;
1945 1963
1946 btrfs_release_path(chunk_root, path); 1964 btrfs_release_path(chunk_root, path);
@@ -1948,7 +1966,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
1948 chunk_root->root_key.objectid, 1966 chunk_root->root_key.objectid,
1949 found_key.objectid, 1967 found_key.objectid,
1950 found_key.offset); 1968 found_key.offset);
1951 BUG_ON(ret); 1969 BUG_ON(ret && ret != -ENOSPC);
1970 key.offset = found_key.offset - 1;
1952 } 1971 }
1953 ret = 0; 1972 ret = 0;
1954error: 1973error:
@@ -1974,10 +1993,13 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1974 u64 chunk_offset; 1993 u64 chunk_offset;
1975 int ret; 1994 int ret;
1976 int slot; 1995 int slot;
1996 int failed = 0;
1997 bool retried = false;
1977 struct extent_buffer *l; 1998 struct extent_buffer *l;
1978 struct btrfs_key key; 1999 struct btrfs_key key;
1979 struct btrfs_super_block *super_copy = &root->fs_info->super_copy; 2000 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1980 u64 old_total = btrfs_super_total_bytes(super_copy); 2001 u64 old_total = btrfs_super_total_bytes(super_copy);
2002 u64 old_size = device->total_bytes;
1981 u64 diff = device->total_bytes - new_size; 2003 u64 diff = device->total_bytes - new_size;
1982 2004
1983 if (new_size >= device->total_bytes) 2005 if (new_size >= device->total_bytes)
@@ -1987,12 +2009,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1987 if (!path) 2009 if (!path)
1988 return -ENOMEM; 2010 return -ENOMEM;
1989 2011
1990 trans = btrfs_start_transaction(root, 1);
1991 if (!trans) {
1992 ret = -ENOMEM;
1993 goto done;
1994 }
1995
1996 path->reada = 2; 2012 path->reada = 2;
1997 2013
1998 lock_chunks(root); 2014 lock_chunks(root);
@@ -2001,8 +2017,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
2001 if (device->writeable) 2017 if (device->writeable)
2002 device->fs_devices->total_rw_bytes -= diff; 2018 device->fs_devices->total_rw_bytes -= diff;
2003 unlock_chunks(root); 2019 unlock_chunks(root);
2004 btrfs_end_transaction(trans, root);
2005 2020
2021again:
2006 key.objectid = device->devid; 2022 key.objectid = device->devid;
2007 key.offset = (u64)-1; 2023 key.offset = (u64)-1;
2008 key.type = BTRFS_DEV_EXTENT_KEY; 2024 key.type = BTRFS_DEV_EXTENT_KEY;
@@ -2017,6 +2033,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
2017 goto done; 2033 goto done;
2018 if (ret) { 2034 if (ret) {
2019 ret = 0; 2035 ret = 0;
2036 btrfs_release_path(root, path);
2020 break; 2037 break;
2021 } 2038 }
2022 2039
@@ -2024,14 +2041,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
2024 slot = path->slots[0]; 2041 slot = path->slots[0];
2025 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 2042 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
2026 2043
2027 if (key.objectid != device->devid) 2044 if (key.objectid != device->devid) {
2045 btrfs_release_path(root, path);
2028 break; 2046 break;
2047 }
2029 2048
2030 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 2049 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
2031 length = btrfs_dev_extent_length(l, dev_extent); 2050 length = btrfs_dev_extent_length(l, dev_extent);
2032 2051
2033 if (key.offset + length <= new_size) 2052 if (key.offset + length <= new_size) {
2053 btrfs_release_path(root, path);
2034 break; 2054 break;
2055 }
2035 2056
2036 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); 2057 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2037 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); 2058 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -2040,8 +2061,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
2040 2061
2041 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, 2062 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
2042 chunk_offset); 2063 chunk_offset);
2043 if (ret) 2064 if (ret && ret != -ENOSPC)
2044 goto done; 2065 goto done;
2066 if (ret == -ENOSPC)
2067 failed++;
2068 key.offset -= 1;
2069 }
2070
2071 if (failed && !retried) {
2072 failed = 0;
2073 retried = true;
2074 goto again;
2075 } else if (failed && retried) {
2076 ret = -ENOSPC;
2077 lock_chunks(root);
2078
2079 device->total_bytes = old_size;
2080 if (device->writeable)
2081 device->fs_devices->total_rw_bytes += diff;
2082 unlock_chunks(root);
2083 goto done;
2045 } 2084 }
2046 2085
2047 /* Shrinking succeeded, else we would be at "done". */ 2086 /* Shrinking succeeded, else we would be at "done". */
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5139a833f721..31b0fabdd2ea 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -181,4 +181,7 @@ int btrfs_balance(struct btrfs_root *dev_root);
181void btrfs_unlock_volumes(void); 181void btrfs_unlock_volumes(void);
182void btrfs_lock_volumes(void); 182void btrfs_lock_volumes(void);
183int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 183int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
184int find_free_dev_extent(struct btrfs_trans_handle *trans,
185 struct btrfs_device *device, u64 num_bytes,
186 u64 *start, u64 *max_avail);
184#endif 187#endif