diff options
author | Jeff Mahoney <jeffm@suse.com> | 2015-06-15 09:41:19 -0400 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2015-07-29 11:15:29 -0400 |
commit | e33e17ee1098d8d751552ac11c111e1c1a3db014 (patch) | |
tree | 49b4c9b5c4123db86d9256a5b9122da73275d730 | |
parent | e44163e177960ee60e32a73bffdd53c3a5827406 (diff) |
btrfs: add missing discards when unpinning extents with -o discard
When we clear the dirty bits in btrfs_delete_unused_bgs for extents
in the empty block group, it results in btrfs_finish_extent_commit being
unable to discard the freed extents.
The block group removal patch added an alternate path to forget extents
other than btrfs_finish_extent_commit. As a result, any extents that
would be freed when the block group is removed aren't discarded. In my
test run, with a large copy of mixed sized files followed by removal, it
left nearly 2/3 of extents undiscarded.
To clean up the block groups, we add the removed block group onto a list
that will be discarded after transaction commit.
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Tested-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r-- | fs/btrfs/ctree.h | 3 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 68 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 57 | ||||
-rw-r--r-- | fs/btrfs/super.c | 2 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 2 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 2 |
6 files changed, 105 insertions, 29 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index aac314e14188..19ef3f306559 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -3437,6 +3437,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
3437 | struct btrfs_root *root, u64 group_start, | 3437 | struct btrfs_root *root, u64 group_start, |
3438 | struct extent_map *em); | 3438 | struct extent_map *em); |
3439 | void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); | 3439 | void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); |
3440 | void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache); | ||
3441 | void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache); | ||
3440 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | 3442 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, |
3441 | struct btrfs_root *root); | 3443 | struct btrfs_root *root); |
3442 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | 3444 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); |
@@ -4073,6 +4075,7 @@ __cold | |||
4073 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | 4075 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, |
4074 | unsigned int line, int errno, const char *fmt, ...); | 4076 | unsigned int line, int errno, const char *fmt, ...); |
4075 | 4077 | ||
4078 | const char *btrfs_decode_error(int errno); | ||
4076 | 4079 | ||
4077 | __cold | 4080 | __cold |
4078 | void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | 4081 | void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 15411aefbfa0..6b791f394698 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -6131,20 +6131,19 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
6131 | struct btrfs_root *root) | 6131 | struct btrfs_root *root) |
6132 | { | 6132 | { |
6133 | struct btrfs_fs_info *fs_info = root->fs_info; | 6133 | struct btrfs_fs_info *fs_info = root->fs_info; |
6134 | struct btrfs_block_group_cache *block_group, *tmp; | ||
6135 | struct list_head *deleted_bgs; | ||
6134 | struct extent_io_tree *unpin; | 6136 | struct extent_io_tree *unpin; |
6135 | u64 start; | 6137 | u64 start; |
6136 | u64 end; | 6138 | u64 end; |
6137 | int ret; | 6139 | int ret; |
6138 | 6140 | ||
6139 | if (trans->aborted) | ||
6140 | return 0; | ||
6141 | |||
6142 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | 6141 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
6143 | unpin = &fs_info->freed_extents[1]; | 6142 | unpin = &fs_info->freed_extents[1]; |
6144 | else | 6143 | else |
6145 | unpin = &fs_info->freed_extents[0]; | 6144 | unpin = &fs_info->freed_extents[0]; |
6146 | 6145 | ||
6147 | while (1) { | 6146 | while (!trans->aborted) { |
6148 | mutex_lock(&fs_info->unused_bg_unpin_mutex); | 6147 | mutex_lock(&fs_info->unused_bg_unpin_mutex); |
6149 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 6148 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
6150 | EXTENT_DIRTY, NULL); | 6149 | EXTENT_DIRTY, NULL); |
@@ -6163,6 +6162,34 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
6163 | cond_resched(); | 6162 | cond_resched(); |
6164 | } | 6163 | } |
6165 | 6164 | ||
6165 | /* | ||
6166 | * Transaction is finished. We don't need the lock anymore. We | ||
6167 | * do need to clean up the block groups in case of a transaction | ||
6168 | * abort. | ||
6169 | */ | ||
6170 | deleted_bgs = &trans->transaction->deleted_bgs; | ||
6171 | list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) { | ||
6172 | u64 trimmed = 0; | ||
6173 | |||
6174 | ret = -EROFS; | ||
6175 | if (!trans->aborted) | ||
6176 | ret = btrfs_discard_extent(root, | ||
6177 | block_group->key.objectid, | ||
6178 | block_group->key.offset, | ||
6179 | &trimmed); | ||
6180 | |||
6181 | list_del_init(&block_group->bg_list); | ||
6182 | btrfs_put_block_group_trimming(block_group); | ||
6183 | btrfs_put_block_group(block_group); | ||
6184 | |||
6185 | if (ret) { | ||
6186 | const char *errstr = btrfs_decode_error(ret); | ||
6187 | btrfs_warn(fs_info, | ||
6188 | "Discard failed while removing blockgroup: errno=%d %s\n", | ||
6189 | ret, errstr); | ||
6190 | } | ||
6191 | } | ||
6192 | |||
6166 | return 0; | 6193 | return 0; |
6167 | } | 6194 | } |
6168 | 6195 | ||
@@ -9903,6 +9930,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9903 | * currently running transaction might finish and a new one start, | 9930 | * currently running transaction might finish and a new one start, |
9904 | * allowing for new block groups to be created that can reuse the same | 9931 | * allowing for new block groups to be created that can reuse the same |
9905 | * physical device locations unless we take this special care. | 9932 | * physical device locations unless we take this special care. |
9933 | * | ||
9934 | * There may also be an implicit trim operation if the file system | ||
9935 | * is mounted with -odiscard. The same protections must remain | ||
9936 | * in place until the extents have been discarded completely when | ||
9937 | * the transaction commit has completed. | ||
9906 | */ | 9938 | */ |
9907 | remove_em = (atomic_read(&block_group->trimming) == 0); | 9939 | remove_em = (atomic_read(&block_group->trimming) == 0); |
9908 | /* | 9940 | /* |
@@ -9977,6 +10009,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
9977 | spin_lock(&fs_info->unused_bgs_lock); | 10009 | spin_lock(&fs_info->unused_bgs_lock); |
9978 | while (!list_empty(&fs_info->unused_bgs)) { | 10010 | while (!list_empty(&fs_info->unused_bgs)) { |
9979 | u64 start, end; | 10011 | u64 start, end; |
10012 | int trimming; | ||
9980 | 10013 | ||
9981 | block_group = list_first_entry(&fs_info->unused_bgs, | 10014 | block_group = list_first_entry(&fs_info->unused_bgs, |
9982 | struct btrfs_block_group_cache, | 10015 | struct btrfs_block_group_cache, |
@@ -10076,12 +10109,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
10076 | spin_unlock(&block_group->lock); | 10109 | spin_unlock(&block_group->lock); |
10077 | spin_unlock(&space_info->lock); | 10110 | spin_unlock(&space_info->lock); |
10078 | 10111 | ||
10112 | /* DISCARD can flip during remount */ | ||
10113 | trimming = btrfs_test_opt(root, DISCARD); | ||
10114 | |||
10115 | /* Implicit trim during transaction commit. */ | ||
10116 | if (trimming) | ||
10117 | btrfs_get_block_group_trimming(block_group); | ||
10118 | |||
10079 | /* | 10119 | /* |
10080 | * Btrfs_remove_chunk will abort the transaction if things go | 10120 | * Btrfs_remove_chunk will abort the transaction if things go |
10081 | * horribly wrong. | 10121 | * horribly wrong. |
10082 | */ | 10122 | */ |
10083 | ret = btrfs_remove_chunk(trans, root, | 10123 | ret = btrfs_remove_chunk(trans, root, |
10084 | block_group->key.objectid); | 10124 | block_group->key.objectid); |
10125 | |||
10126 | if (ret) { | ||
10127 | if (trimming) | ||
10128 | btrfs_put_block_group_trimming(block_group); | ||
10129 | goto end_trans; | ||
10130 | } | ||
10131 | |||
10132 | /* | ||
10133 | * If we're not mounted with -odiscard, we can just forget | ||
10134 | * about this block group. Otherwise we'll need to wait | ||
10135 | * until transaction commit to do the actual discard. | ||
10136 | */ | ||
10137 | if (trimming) { | ||
10138 | WARN_ON(!list_empty(&block_group->bg_list)); | ||
10139 | spin_lock(&trans->transaction->deleted_bgs_lock); | ||
10140 | list_move(&block_group->bg_list, | ||
10141 | &trans->transaction->deleted_bgs); | ||
10142 | spin_unlock(&trans->transaction->deleted_bgs_lock); | ||
10143 | btrfs_get_block_group(block_group); | ||
10144 | } | ||
10085 | end_trans: | 10145 | end_trans: |
10086 | btrfs_end_transaction(trans, root); | 10146 | btrfs_end_transaction(trans, root); |
10087 | next: | 10147 | next: |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index fb5a6b1c62a6..abe3a66bd3ba 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -3272,35 +3272,23 @@ next: | |||
3272 | return ret; | 3272 | return ret; |
3273 | } | 3273 | } |
3274 | 3274 | ||
3275 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | 3275 | void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) |
3276 | u64 *trimmed, u64 start, u64 end, u64 minlen) | ||
3277 | { | 3276 | { |
3278 | int ret; | 3277 | atomic_inc(&cache->trimming); |
3278 | } | ||
3279 | 3279 | ||
3280 | *trimmed = 0; | 3280 | void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) |
3281 | { | ||
3282 | struct extent_map_tree *em_tree; | ||
3283 | struct extent_map *em; | ||
3284 | bool cleanup; | ||
3281 | 3285 | ||
3282 | spin_lock(&block_group->lock); | 3286 | spin_lock(&block_group->lock); |
3283 | if (block_group->removed) { | 3287 | cleanup = (atomic_dec_and_test(&block_group->trimming) && |
3284 | spin_unlock(&block_group->lock); | 3288 | block_group->removed); |
3285 | return 0; | ||
3286 | } | ||
3287 | atomic_inc(&block_group->trimming); | ||
3288 | spin_unlock(&block_group->lock); | 3289 | spin_unlock(&block_group->lock); |
3289 | 3290 | ||
3290 | ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); | 3291 | if (cleanup) { |
3291 | if (ret) | ||
3292 | goto out; | ||
3293 | |||
3294 | ret = trim_bitmaps(block_group, trimmed, start, end, minlen); | ||
3295 | out: | ||
3296 | spin_lock(&block_group->lock); | ||
3297 | if (atomic_dec_and_test(&block_group->trimming) && | ||
3298 | block_group->removed) { | ||
3299 | struct extent_map_tree *em_tree; | ||
3300 | struct extent_map *em; | ||
3301 | |||
3302 | spin_unlock(&block_group->lock); | ||
3303 | |||
3304 | lock_chunks(block_group->fs_info->chunk_root); | 3292 | lock_chunks(block_group->fs_info->chunk_root); |
3305 | em_tree = &block_group->fs_info->mapping_tree.map_tree; | 3293 | em_tree = &block_group->fs_info->mapping_tree.map_tree; |
3306 | write_lock(&em_tree->lock); | 3294 | write_lock(&em_tree->lock); |
@@ -3324,10 +3312,31 @@ out: | |||
3324 | * this block group have left 1 entry each one. Free them. | 3312 | * this block group have left 1 entry each one. Free them. |
3325 | */ | 3313 | */ |
3326 | __btrfs_remove_free_space_cache(block_group->free_space_ctl); | 3314 | __btrfs_remove_free_space_cache(block_group->free_space_ctl); |
3327 | } else { | 3315 | } |
3316 | } | ||
3317 | |||
3318 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | ||
3319 | u64 *trimmed, u64 start, u64 end, u64 minlen) | ||
3320 | { | ||
3321 | int ret; | ||
3322 | |||
3323 | *trimmed = 0; | ||
3324 | |||
3325 | spin_lock(&block_group->lock); | ||
3326 | if (block_group->removed) { | ||
3328 | spin_unlock(&block_group->lock); | 3327 | spin_unlock(&block_group->lock); |
3328 | return 0; | ||
3329 | } | 3329 | } |
3330 | btrfs_get_block_group_trimming(block_group); | ||
3331 | spin_unlock(&block_group->lock); | ||
3332 | |||
3333 | ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); | ||
3334 | if (ret) | ||
3335 | goto out; | ||
3330 | 3336 | ||
3337 | ret = trim_bitmaps(block_group, trimmed, start, end, minlen); | ||
3338 | out: | ||
3339 | btrfs_put_block_group_trimming(block_group); | ||
3331 | return ret; | 3340 | return ret; |
3332 | } | 3341 | } |
3333 | 3342 | ||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a1077e0ffaa8..8da24e242896 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -69,7 +69,7 @@ static struct file_system_type btrfs_fs_type; | |||
69 | 69 | ||
70 | static int btrfs_remount(struct super_block *sb, int *flags, char *data); | 70 | static int btrfs_remount(struct super_block *sb, int *flags, char *data); |
71 | 71 | ||
72 | static const char *btrfs_decode_error(int errno) | 72 | const char *btrfs_decode_error(int errno) |
73 | { | 73 | { |
74 | char *errstr = "unknown"; | 74 | char *errstr = "unknown"; |
75 | 75 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f5021fcb154e..44da9299a25b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -258,6 +258,8 @@ loop: | |||
258 | mutex_init(&cur_trans->cache_write_mutex); | 258 | mutex_init(&cur_trans->cache_write_mutex); |
259 | cur_trans->num_dirty_bgs = 0; | 259 | cur_trans->num_dirty_bgs = 0; |
260 | spin_lock_init(&cur_trans->dirty_bgs_lock); | 260 | spin_lock_init(&cur_trans->dirty_bgs_lock); |
261 | INIT_LIST_HEAD(&cur_trans->deleted_bgs); | ||
262 | spin_lock_init(&cur_trans->deleted_bgs_lock); | ||
261 | list_add_tail(&cur_trans->list, &fs_info->trans_list); | 263 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
262 | extent_io_tree_init(&cur_trans->dirty_pages, | 264 | extent_io_tree_init(&cur_trans->dirty_pages, |
263 | fs_info->btree_inode->i_mapping); | 265 | fs_info->btree_inode->i_mapping); |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index eb09c2067fa8..edc2fbc262d7 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -74,6 +74,8 @@ struct btrfs_transaction { | |||
74 | */ | 74 | */ |
75 | struct mutex cache_write_mutex; | 75 | struct mutex cache_write_mutex; |
76 | spinlock_t dirty_bgs_lock; | 76 | spinlock_t dirty_bgs_lock; |
77 | struct list_head deleted_bgs; | ||
78 | spinlock_t deleted_bgs_lock; | ||
77 | struct btrfs_delayed_ref_root delayed_refs; | 79 | struct btrfs_delayed_ref_root delayed_refs; |
78 | int aborted; | 80 | int aborted; |
79 | int dirty_bg_run; | 81 | int dirty_bg_run; |