aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/free-space-cache.c
diff options
context:
space:
mode:
authorChris Mason <clm@fb.com>2015-04-06 15:46:08 -0400
committerChris Mason <clm@fb.com>2015-04-10 17:07:22 -0400
commit1bbc621ef28462456131c035eaeb5567a1a2a2fe (patch)
treed2c9e87e9cef8884a440bc9b6a5bf6574eff9fc7 /fs/btrfs/free-space-cache.c
parent2b108268006e06d57ec9810f4ccf5d99d7e5b598 (diff)
Btrfs: allow block group cache writeout outside critical section in commit
We loop through all of the dirty block groups during commit and write the free space cache. In order to make sure the cache is currect, we do this while no other writers are allowed in the commit. If a large number of block groups are dirty, this can introduce long stalls during the final stages of the commit, which can block new procs trying to change the filesystem. This commit changes the block group cache writeout to take appropriate locks and allow it to run earlier in the commit. We'll still have to redo some of the block groups, but it means we can get most of the work out of the way without blocking the entire FS. Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r--fs/btrfs/free-space-cache.c69
1 files changed, 62 insertions, 7 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 83532a245947..253cb74b0e27 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -226,9 +226,37 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
226 226
227int btrfs_truncate_free_space_cache(struct btrfs_root *root, 227int btrfs_truncate_free_space_cache(struct btrfs_root *root,
228 struct btrfs_trans_handle *trans, 228 struct btrfs_trans_handle *trans,
229 struct btrfs_block_group_cache *block_group,
229 struct inode *inode) 230 struct inode *inode)
230{ 231{
231 int ret = 0; 232 int ret = 0;
233 struct btrfs_path *path = btrfs_alloc_path();
234
235 if (!path) {
236 ret = -ENOMEM;
237 goto fail;
238 }
239
240 if (block_group) {
241 mutex_lock(&trans->transaction->cache_write_mutex);
242 if (!list_empty(&block_group->io_list)) {
243 list_del_init(&block_group->io_list);
244
245 btrfs_wait_cache_io(root, trans, block_group,
246 &block_group->io_ctl, path,
247 block_group->key.objectid);
248 btrfs_put_block_group(block_group);
249 }
250
251 /*
252 * now that we've truncated the cache away, its no longer
253 * setup or written
254 */
255 spin_lock(&block_group->lock);
256 block_group->disk_cache_state = BTRFS_DC_CLEAR;
257 spin_unlock(&block_group->lock);
258 }
259 btrfs_free_path(path);
232 260
233 btrfs_i_size_write(inode, 0); 261 btrfs_i_size_write(inode, 0);
234 truncate_pagecache(inode, 0); 262 truncate_pagecache(inode, 0);
@@ -242,11 +270,17 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
242 ret = btrfs_truncate_inode_items(trans, root, inode, 270 ret = btrfs_truncate_inode_items(trans, root, inode,
243 0, BTRFS_EXTENT_DATA_KEY); 271 0, BTRFS_EXTENT_DATA_KEY);
244 if (ret) { 272 if (ret) {
273 mutex_unlock(&trans->transaction->cache_write_mutex);
245 btrfs_abort_transaction(trans, root, ret); 274 btrfs_abort_transaction(trans, root, ret);
246 return ret; 275 return ret;
247 } 276 }
248 277
249 ret = btrfs_update_inode(trans, root, inode); 278 ret = btrfs_update_inode(trans, root, inode);
279
280 if (block_group)
281 mutex_unlock(&trans->transaction->cache_write_mutex);
282
283fail:
250 if (ret) 284 if (ret)
251 btrfs_abort_transaction(trans, root, ret); 285 btrfs_abort_transaction(trans, root, ret);
252 286
@@ -876,6 +910,7 @@ int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
876{ 910{
877 int ret; 911 int ret;
878 struct btrfs_free_cluster *cluster = NULL; 912 struct btrfs_free_cluster *cluster = NULL;
913 struct btrfs_free_cluster *cluster_locked = NULL;
879 struct rb_node *node = rb_first(&ctl->free_space_offset); 914 struct rb_node *node = rb_first(&ctl->free_space_offset);
880 struct btrfs_trim_range *trim_entry; 915 struct btrfs_trim_range *trim_entry;
881 916
@@ -887,6 +922,8 @@ int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
887 } 922 }
888 923
889 if (!node && cluster) { 924 if (!node && cluster) {
925 cluster_locked = cluster;
926 spin_lock(&cluster_locked->lock);
890 node = rb_first(&cluster->root); 927 node = rb_first(&cluster->root);
891 cluster = NULL; 928 cluster = NULL;
892 } 929 }
@@ -910,9 +947,15 @@ int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
910 node = rb_next(node); 947 node = rb_next(node);
911 if (!node && cluster) { 948 if (!node && cluster) {
912 node = rb_first(&cluster->root); 949 node = rb_first(&cluster->root);
950 cluster_locked = cluster;
951 spin_lock(&cluster_locked->lock);
913 cluster = NULL; 952 cluster = NULL;
914 } 953 }
915 } 954 }
955 if (cluster_locked) {
956 spin_unlock(&cluster_locked->lock);
957 cluster_locked = NULL;
958 }
916 959
917 /* 960 /*
918 * Make sure we don't miss any range that was removed from our rbtree 961 * Make sure we don't miss any range that was removed from our rbtree
@@ -930,6 +973,8 @@ int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
930 973
931 return 0; 974 return 0;
932fail: 975fail:
976 if (cluster_locked)
977 spin_unlock(&cluster_locked->lock);
933 return -ENOSPC; 978 return -ENOSPC;
934} 979}
935 980
@@ -1101,6 +1146,9 @@ int btrfs_wait_cache_io(struct btrfs_root *root,
1101 int ret; 1146 int ret;
1102 struct inode *inode = io_ctl->inode; 1147 struct inode *inode = io_ctl->inode;
1103 1148
1149 if (!inode)
1150 return 0;
1151
1104 root = root->fs_info->tree_root; 1152 root = root->fs_info->tree_root;
1105 1153
1106 /* Flush the dirty pages in the cache file. */ 1154 /* Flush the dirty pages in the cache file. */
@@ -1127,11 +1175,16 @@ out:
1127 btrfs_update_inode(trans, root, inode); 1175 btrfs_update_inode(trans, root, inode);
1128 1176
1129 if (block_group) { 1177 if (block_group) {
1178 /* the dirty list is protected by the dirty_bgs_lock */
1179 spin_lock(&trans->transaction->dirty_bgs_lock);
1180
1181 /* the disk_cache_state is protected by the block group lock */
1130 spin_lock(&block_group->lock); 1182 spin_lock(&block_group->lock);
1131 1183
1132 /* 1184 /*
1133 * only mark this as written if we didn't get put back on 1185 * only mark this as written if we didn't get put back on
1134 * the dirty list while waiting for IO. 1186 * the dirty list while waiting for IO. Otherwise our
1187 * cache state won't be right, and we won't get written again
1135 */ 1188 */
1136 if (!ret && list_empty(&block_group->dirty_list)) 1189 if (!ret && list_empty(&block_group->dirty_list))
1137 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1190 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
@@ -1139,6 +1192,7 @@ out:
1139 block_group->disk_cache_state = BTRFS_DC_ERROR; 1192 block_group->disk_cache_state = BTRFS_DC_ERROR;
1140 1193
1141 spin_unlock(&block_group->lock); 1194 spin_unlock(&block_group->lock);
1195 spin_unlock(&trans->transaction->dirty_bgs_lock);
1142 io_ctl->inode = NULL; 1196 io_ctl->inode = NULL;
1143 iput(inode); 1197 iput(inode);
1144 } 1198 }
@@ -1207,9 +1261,11 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1207 1261
1208 mutex_lock(&ctl->cache_writeout_mutex); 1262 mutex_lock(&ctl->cache_writeout_mutex);
1209 /* Write out the extent entries in the free space cache */ 1263 /* Write out the extent entries in the free space cache */
1264 spin_lock(&ctl->tree_lock);
1210 ret = write_cache_extent_entries(io_ctl, ctl, 1265 ret = write_cache_extent_entries(io_ctl, ctl,
1211 block_group, &entries, &bitmaps, 1266 block_group, &entries, &bitmaps,
1212 &bitmap_list); 1267 &bitmap_list);
1268 spin_unlock(&ctl->tree_lock);
1213 if (ret) { 1269 if (ret) {
1214 mutex_unlock(&ctl->cache_writeout_mutex); 1270 mutex_unlock(&ctl->cache_writeout_mutex);
1215 goto out_nospc; 1271 goto out_nospc;
@@ -1219,6 +1275,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1219 * Some spaces that are freed in the current transaction are pinned, 1275 * Some spaces that are freed in the current transaction are pinned,
1220 * they will be added into free space cache after the transaction is 1276 * they will be added into free space cache after the transaction is
1221 * committed, we shouldn't lose them. 1277 * committed, we shouldn't lose them.
1278 *
1279 * If this changes while we are working we'll get added back to
1280 * the dirty list and redo it. No locking needed
1222 */ 1281 */
1223 ret = write_pinned_extent_entries(root, block_group, io_ctl, &entries); 1282 ret = write_pinned_extent_entries(root, block_group, io_ctl, &entries);
1224 if (ret) { 1283 if (ret) {
@@ -1231,7 +1290,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1231 * locked while doing it because a concurrent trim can be manipulating 1290 * locked while doing it because a concurrent trim can be manipulating
1232 * or freeing the bitmap. 1291 * or freeing the bitmap.
1233 */ 1292 */
1293 spin_lock(&ctl->tree_lock);
1234 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1294 ret = write_bitmap_entries(io_ctl, &bitmap_list);
1295 spin_unlock(&ctl->tree_lock);
1235 mutex_unlock(&ctl->cache_writeout_mutex); 1296 mutex_unlock(&ctl->cache_writeout_mutex);
1236 if (ret) 1297 if (ret)
1237 goto out_nospc; 1298 goto out_nospc;
@@ -1307,12 +1368,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1307 spin_unlock(&block_group->lock); 1368 spin_unlock(&block_group->lock);
1308 return 0; 1369 return 0;
1309 } 1370 }
1310
1311 if (block_group->delalloc_bytes) {
1312 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1313 spin_unlock(&block_group->lock);
1314 return 0;
1315 }
1316 spin_unlock(&block_group->lock); 1371 spin_unlock(&block_group->lock);
1317 1372
1318 inode = lookup_free_space_inode(root, block_group, path); 1373 inode = lookup_free_space_inode(root, block_group, path);