aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2010-12-13 15:06:46 -0500
committerChris Mason <chris.mason@oracle.com>2010-12-13 20:07:01 -0500
commit83a50de97fe96aca82389e061862ed760ece2283 (patch)
tree95421594f180c32cca1ff7f6881f4cf272cf2b5c /fs/btrfs/extent-tree.c
parentcd02dca56442e1504fd6bc5b96f7f1870162b266 (diff)
Btrfs: prevent RAID level downgrades when space is low
The extent allocator has code that allows us to fill allocations from any available block group, even if it doesn't match the raid level we've requested. This was put in because adding a new drive to a filesystem made with the default mkfs options actually upgrades the metadata from single spindle dup to full RAID1. But, the code also allows us to allocate from a raid0 chunk when we really want a raid1 or raid10 chunk. This can cause big trouble because mkfs creates a small (4MB) raid0 chunk for data and metadata which then goes unused for raid1/raid10 installs. The allocator will happily wander in and allocate from that chunk when things get tight, which is not correct. The fix here is to make sure that we provide duplication when the caller has asked for it. It does all the dups to be any raid level, which preserves the dup->raid1 upgrade abilities. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c20
1 files changed, 19 insertions, 1 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4be231e0d2bd..7e5162e5c411 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4943,6 +4943,25 @@ search:
4943 btrfs_get_block_group(block_group); 4943 btrfs_get_block_group(block_group);
4944 search_start = block_group->key.objectid; 4944 search_start = block_group->key.objectid;
4945 4945
4946 /*
4947 * this can happen if we end up cycling through all the
4948 * raid types, but we want to make sure we only allocate
4949 * for the proper type.
4950 */
4951 if (!block_group_bits(block_group, data)) {
4952 u64 extra = BTRFS_BLOCK_GROUP_DUP |
4953 BTRFS_BLOCK_GROUP_RAID1 |
4954 BTRFS_BLOCK_GROUP_RAID10;
4955
4956 /*
4957 * if they asked for extra copies and this block group
4958 * doesn't provide them, bail. This does allow us to
4959 * fill raid0 from raid1.
4960 */
4961 if ((data & extra) && !(block_group->flags & extra))
4962 goto loop;
4963 }
4964
4946have_block_group: 4965have_block_group:
4947 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4966 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4948 u64 free_percent; 4967 u64 free_percent;
@@ -8273,7 +8292,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8273 break; 8292 break;
8274 if (ret != 0) 8293 if (ret != 0)
8275 goto error; 8294 goto error;
8276
8277 leaf = path->nodes[0]; 8295 leaf = path->nodes[0];
8278 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 8296 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8279 cache = kzalloc(sizeof(*cache), GFP_NOFS); 8297 cache = kzalloc(sizeof(*cache), GFP_NOFS);