aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/free-space-cache.c
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2014-06-18 22:42:50 -0400
committerChris Mason <clm@fb.com>2014-06-19 17:20:54 -0400
commite570fd27f2c5d7eac3876bccf99e9838d7f911a3 (patch)
tree3d73f4d8a2700fd441be0abe36cf7174bfb84c56 /fs/btrfs/free-space-cache.c
parent5349d6c3ffead27d693fdac21270541fa95ef33d (diff)
Btrfs: fix broken free space cache after the system crashed
When we mounted the filesystem after the crash, we got the following message: BTRFS error (device xxx): block group xxxx has wrong amount of free space BTRFS error (device xxx): failed to load free space cache for block group xxx It is because we didn't update the metadata of the allocated space (in extent tree) until the file data was written into the disk. During this time, there was no information about the allocated spaces in either the extent tree nor the free space cache. when we wrote out the free space cache at this time (commit transaction), those spaces were lost. In fact, only the free space that is used to store the file data had this problem, the others didn't because the metadata of them is updated in the same transaction context. There are many methods which can fix the above problem - track the allocated space, and write it out when we write out the free space cache - account the size of the allocated space that is used to store the file data, if the size is not zero, don't write out the free space cache. The first one is complex and may make the performance drop down. This patch chose the second method, we use a per-block-group variant to account the size of that allocated space. Besides that, we also introduce a per-block-group read-write semaphore to avoid the race between the allocation and the free space cache write out. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r--fs/btrfs/free-space-cache.c33
1 files changed, 33 insertions, 0 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index a852e15173e5..2b0a627cb5f9 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -680,6 +680,13 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
680 generation = btrfs_free_space_generation(leaf, header); 680 generation = btrfs_free_space_generation(leaf, header);
681 btrfs_release_path(path); 681 btrfs_release_path(path);
682 682
683 if (!BTRFS_I(inode)->generation) {
684 btrfs_info(root->fs_info,
685 "The free space cache file (%llu) is invalid. skip it\n",
686 offset);
687 return 0;
688 }
689
683 if (BTRFS_I(inode)->generation != generation) { 690 if (BTRFS_I(inode)->generation != generation) {
684 btrfs_err(root->fs_info, 691 btrfs_err(root->fs_info,
685 "free space inode generation (%llu) " 692 "free space inode generation (%llu) "
@@ -1107,6 +1114,20 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1107 if (ret) 1114 if (ret)
1108 return -1; 1115 return -1;
1109 1116
1117 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
1118 down_write(&block_group->data_rwsem);
1119 spin_lock(&block_group->lock);
1120 if (block_group->delalloc_bytes) {
1121 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1122 spin_unlock(&block_group->lock);
1123 up_write(&block_group->data_rwsem);
1124 BTRFS_I(inode)->generation = 0;
1125 ret = 0;
1126 goto out;
1127 }
1128 spin_unlock(&block_group->lock);
1129 }
1130
1110 /* Lock all pages first so we can lock the extent safely. */ 1131 /* Lock all pages first so we can lock the extent safely. */
1111 io_ctl_prepare_pages(&io_ctl, inode, 0); 1132 io_ctl_prepare_pages(&io_ctl, inode, 0);
1112 1133
@@ -1145,6 +1166,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1145 if (ret) 1166 if (ret)
1146 goto out_nospc; 1167 goto out_nospc;
1147 1168
1169 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1170 up_write(&block_group->data_rwsem);
1148 /* 1171 /*
1149 * Release the pages and unlock the extent, we will flush 1172 * Release the pages and unlock the extent, we will flush
1150 * them out later 1173 * them out later
@@ -1173,6 +1196,10 @@ out:
1173 1196
1174out_nospc: 1197out_nospc:
1175 cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); 1198 cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list);
1199
1200 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1201 up_write(&block_group->data_rwsem);
1202
1176 goto out; 1203 goto out;
1177} 1204}
1178 1205
@@ -1192,6 +1219,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1192 spin_unlock(&block_group->lock); 1219 spin_unlock(&block_group->lock);
1193 return 0; 1220 return 0;
1194 } 1221 }
1222
1223 if (block_group->delalloc_bytes) {
1224 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1225 spin_unlock(&block_group->lock);
1226 return 0;
1227 }
1195 spin_unlock(&block_group->lock); 1228 spin_unlock(&block_group->lock);
1196 1229
1197 inode = lookup_free_space_inode(root, block_group, path); 1230 inode = lookup_free_space_inode(root, block_group, path);