aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLiu Bo <liubo2009@cn.fujitsu.com>2012-07-06 05:31:34 -0400
committerChris Mason <chris.mason@fusionio.com>2012-07-23 16:28:02 -0400
commitcf7c1ef6e1fe05864369f59dd516e816b11de7d0 (patch)
tree53284520447b2d62cd221e76b77a28b5394a7cac
parent067893842341e7b7487062367ecfaa46c97505e0 (diff)
Btrfs: fix a bug of writting free space cache during balance
Here is the whole story: 1) A free space cache consists of two parts: o free space cache inode, which is special becase it's stored in root tree. o free space info, which is stored as the above inode's file data. But we only build up another new inode and does not flush its free space info onto disk when we _clear and setup_ free space cache, and this ends up with that the block group cache's cache_state remains DC_SETUP instead of DC_WRITTEN. And holding DC_SETUP means that we will not truncate this free space cache inode, which means the disk offset of its file extent will remain _unchanged_ at least until next transaction finishes committing itself. 2) We can set a block group readonly when we relocate the block group. However, if the readonly block group covers the disk offset where our free space cache inode is going to write, it will force the free space cache inode into cow_file_range() and it'll end up hitting a BUG_ON. 3) Due to the above analysis, we fix this bug by adding the missing dirty flag. 4) However, it's not over, there is still another case, nospace_cache. With nospace_cache, we do not want to set dirty flag, instead we just truncate free space cache inode and bail out with setting cache state DC_WRITTEN. We can benifit from it since it saves us another 'pre-allocation' part which usually costs a lot. Signed-off-by: Liu Bo <liubo2009@cn.fujitsu.com> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Josef Bacik <jbacik@fusionio.com>
-rw-r--r--fs/btrfs/extent-tree.c24
1 files changed, 21 insertions, 3 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 67bd12a52369..3ca26d84cce5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2903,8 +2903,13 @@ again:
2903 } 2903 }
2904 2904
2905 spin_lock(&block_group->lock); 2905 spin_lock(&block_group->lock);
2906 if (block_group->cached != BTRFS_CACHE_FINISHED) { 2906 if (block_group->cached != BTRFS_CACHE_FINISHED ||
2907 /* We're not cached, don't bother trying to write stuff out */ 2907 !btrfs_test_opt(root, SPACE_CACHE)) {
2908 /*
2909 * don't bother trying to write stuff out _if_
2910 * a) we're not cached,
2911 * b) we're with nospace_cache mount option.
2912 */
2908 dcs = BTRFS_DC_WRITTEN; 2913 dcs = BTRFS_DC_WRITTEN;
2909 spin_unlock(&block_group->lock); 2914 spin_unlock(&block_group->lock);
2910 goto out_put; 2915 goto out_put;
@@ -7614,8 +7619,21 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7614 INIT_LIST_HEAD(&cache->list); 7619 INIT_LIST_HEAD(&cache->list);
7615 INIT_LIST_HEAD(&cache->cluster_list); 7620 INIT_LIST_HEAD(&cache->cluster_list);
7616 7621
7617 if (need_clear) 7622 if (need_clear) {
7623 /*
7624 * When we mount with old space cache, we need to
7625 * set BTRFS_DC_CLEAR and set dirty flag.
7626 *
7627 * a) Setting 'BTRFS_DC_CLEAR' makes sure that we
7628 * truncate the old free space cache inode and
7629 * setup a new one.
7630 * b) Setting 'dirty flag' makes sure that we flush
7631 * the new space cache info onto disk.
7632 */
7618 cache->disk_cache_state = BTRFS_DC_CLEAR; 7633 cache->disk_cache_state = BTRFS_DC_CLEAR;
7634 if (btrfs_test_opt(root, SPACE_CACHE))
7635 cache->dirty = 1;
7636 }
7619 7637
7620 read_extent_buffer(leaf, &cache->item, 7638 read_extent_buffer(leaf, &cache->item,
7621 btrfs_item_ptr_offset(leaf, path->slots[0]), 7639 btrfs_item_ptr_offset(leaf, path->slots[0]),