aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-06-04 15:34:51 -0400
committerChris Mason <chris.mason@oracle.com>2009-06-04 15:41:27 -0400
commit44fb5511638938a2c37c895abc14df648ffc07e9 (patch)
tree0554a8a71403958b279ddcb601a3d0bb85943a59
parent2cc3c559fb2fe8cecca82a517bc56e88b0c1effd (diff)
Btrfs: Fix oops and use after free during space balancing
The btrfs allocator uses list_for_each to walk the available block groups when searching for free blocks. It starts off with a hint to help find the best block group for a given allocation. The hint is resolved into a block group, but we don't properly check to make sure the block group we find isn't in the middle of being freed due to filesystem shrinking or balancing. If it is being freed, the list pointers in it are bogus and can't be trusted. But, the code happily goes along and uses them in the list_for_each loop, leading to all kinds of fun. The fix used here is to check to make sure the block group we find really is on the list before we use it. list_del_init is used when removing it from the list, so we can do a proper check. The allocation clustering code has a similar bug where it will trust the block group in the current free space cluster. If our allocation flags have changed (going from single spindle dup to raid1 for example) because the drives in the FS have changed, we're not allowed to use the old block group any more. The fix used here is to check the current cluster against the current allocation flags. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/extent-tree.c51
1 files changed, 48 insertions, 3 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3e2c7c738f23..35af93355063 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2622,7 +2622,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
2622 search_start); 2622 search_start);
2623 if (block_group && block_group_bits(block_group, data)) { 2623 if (block_group && block_group_bits(block_group, data)) {
2624 down_read(&space_info->groups_sem); 2624 down_read(&space_info->groups_sem);
2625 goto have_block_group; 2625 if (list_empty(&block_group->list) ||
2626 block_group->ro) {
2627 /*
2628 * someone is removing this block group,
2629 * we can't jump into the have_block_group
2630 * target because our list pointers are not
2631 * valid
2632 */
2633 btrfs_put_block_group(block_group);
2634 up_read(&space_info->groups_sem);
2635 } else
2636 goto have_block_group;
2626 } else if (block_group) { 2637 } else if (block_group) {
2627 btrfs_put_block_group(block_group); 2638 btrfs_put_block_group(block_group);
2628 } 2639 }
@@ -2656,6 +2667,13 @@ have_block_group:
2656 * people trying to start a new cluster 2667 * people trying to start a new cluster
2657 */ 2668 */
2658 spin_lock(&last_ptr->refill_lock); 2669 spin_lock(&last_ptr->refill_lock);
2670 if (last_ptr->block_group &&
2671 (last_ptr->block_group->ro ||
2672 !block_group_bits(last_ptr->block_group, data))) {
2673 offset = 0;
2674 goto refill_cluster;
2675 }
2676
2659 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 2677 offset = btrfs_alloc_from_cluster(block_group, last_ptr,
2660 num_bytes, search_start); 2678 num_bytes, search_start);
2661 if (offset) { 2679 if (offset) {
@@ -2681,10 +2699,17 @@ have_block_group:
2681 2699
2682 last_ptr_loop = 1; 2700 last_ptr_loop = 1;
2683 search_start = block_group->key.objectid; 2701 search_start = block_group->key.objectid;
2702 /*
2703 * we know this block group is properly
2704 * in the list because
2705 * btrfs_remove_block_group, drops the
2706 * cluster before it removes the block
2707 * group from the list
2708 */
2684 goto have_block_group; 2709 goto have_block_group;
2685 } 2710 }
2686 spin_unlock(&last_ptr->lock); 2711 spin_unlock(&last_ptr->lock);
2687 2712refill_cluster:
2688 /* 2713 /*
2689 * this cluster didn't work out, free it and 2714 * this cluster didn't work out, free it and
2690 * start over 2715 * start over
@@ -5968,6 +5993,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5968{ 5993{
5969 struct btrfs_path *path; 5994 struct btrfs_path *path;
5970 struct btrfs_block_group_cache *block_group; 5995 struct btrfs_block_group_cache *block_group;
5996 struct btrfs_free_cluster *cluster;
5971 struct btrfs_key key; 5997 struct btrfs_key key;
5972 int ret; 5998 int ret;
5973 5999
@@ -5979,6 +6005,21 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5979 6005
5980 memcpy(&key, &block_group->key, sizeof(key)); 6006 memcpy(&key, &block_group->key, sizeof(key));
5981 6007
6008 /* make sure this block group isn't part of an allocation cluster */
6009 cluster = &root->fs_info->data_alloc_cluster;
6010 spin_lock(&cluster->refill_lock);
6011 btrfs_return_cluster_to_free_space(block_group, cluster);
6012 spin_unlock(&cluster->refill_lock);
6013
6014 /*
6015 * make sure this block group isn't part of a metadata
6016 * allocation cluster
6017 */
6018 cluster = &root->fs_info->meta_alloc_cluster;
6019 spin_lock(&cluster->refill_lock);
6020 btrfs_return_cluster_to_free_space(block_group, cluster);
6021 spin_unlock(&cluster->refill_lock);
6022
5982 path = btrfs_alloc_path(); 6023 path = btrfs_alloc_path();
5983 BUG_ON(!path); 6024 BUG_ON(!path);
5984 6025
@@ -5988,7 +6029,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5988 spin_unlock(&root->fs_info->block_group_cache_lock); 6029 spin_unlock(&root->fs_info->block_group_cache_lock);
5989 btrfs_remove_free_space_cache(block_group); 6030 btrfs_remove_free_space_cache(block_group);
5990 down_write(&block_group->space_info->groups_sem); 6031 down_write(&block_group->space_info->groups_sem);
5991 list_del(&block_group->list); 6032 /*
6033 * we must use list_del_init so people can check to see if they
6034 * are still on the list after taking the semaphore
6035 */
6036 list_del_init(&block_group->list);
5992 up_write(&block_group->space_info->groups_sem); 6037 up_write(&block_group->space_info->groups_sem);
5993 6038
5994 spin_lock(&block_group->space_info->lock); 6039 spin_lock(&block_group->space_info->lock);