summaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorJeff Mahoney <jeffm@suse.com>2015-06-15 09:41:17 -0400
committerChris Mason <clm@fb.com>2015-07-29 11:15:26 -0400
commit499f377f49f085ee4aa214c738e948e88626f39b (patch)
tree1c9485ec464fd6619b7ca91425de51dad29cc421 /fs/btrfs
parent86557861dfe4f8defde0df40620b97cc60285aa4 (diff)
btrfs: iterate over unused chunk space in FITRIM
Since we now clean up block groups automatically as they become empty, iterating over block groups is no longer sufficient to discard unused space. This patch iterates over the unused chunk space and discards any regions that are unallocated, regardless of whether they were ever used. This is a change for btrfs but is consistent with other file systems. We do this in a transactionless manner since the discard process can take a substantial amount of time and a transaction would need to be started before the acquisition of the device list lock. That would mean a transaction would be held open across /all/ of the discards collectively. In order to prevent other threads from allocating or freeing chunks, we hold the chunks lock across the search and discard calls. We release it between searches to allow the file system to perform more-or-less normally. Since the running transaction can commit and disappear while we're using the transaction pointer, we take a reference to it and release it after the search. This is safe since it would happen normally at the end of the transaction commit after any locks are released anyway. We also take the commit_root_sem to protect against a transaction starting and committing while we're running. Signed-off-by: Jeff Mahoney <jeffm@suse.com> Reviewed-by: Filipe Manana <fdmanana@suse.com> Tested-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/extent-tree.c101
-rw-r--r--fs/btrfs/volumes.c63
-rw-r--r--fs/btrfs/volumes.h3
3 files changed, 143 insertions, 24 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d763457b3cce..15411aefbfa0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10135,10 +10135,99 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
10135 return unpin_extent_range(root, start, end, false); 10135 return unpin_extent_range(root, start, end, false);
10136} 10136}
10137 10137
10138/*
10139 * It used to be that old block groups would be left around forever.
10140 * Iterating over them would be enough to trim unused space. Since we
10141 * now automatically remove them, we also need to iterate over unallocated
10142 * space.
10143 *
10144 * We don't want a transaction for this since the discard may take a
10145 * substantial amount of time. We don't require that a transaction be
10146 * running, but we do need to take a running transaction into account
10147 * to ensure that we're not discarding chunks that were released in
10148 * the current transaction.
10149 *
10150 * Holding the chunks lock will prevent other threads from allocating
10151 * or releasing chunks, but it won't prevent a running transaction
10152 * from committing and releasing the memory that the pending chunks
10153 * list head uses. For that, we need to take a reference to the
10154 * transaction.
10155 */
10156static int btrfs_trim_free_extents(struct btrfs_device *device,
10157 u64 minlen, u64 *trimmed)
10158{
10159 u64 start = 0, len = 0;
10160 int ret;
10161
10162 *trimmed = 0;
10163
10164 /* Not writeable = nothing to do. */
10165 if (!device->writeable)
10166 return 0;
10167
10168 /* No free space = nothing to do. */
10169 if (device->total_bytes <= device->bytes_used)
10170 return 0;
10171
10172 ret = 0;
10173
10174 while (1) {
10175 struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
10176 struct btrfs_transaction *trans;
10177 u64 bytes;
10178
10179 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
10180 if (ret)
10181 return ret;
10182
10183 down_read(&fs_info->commit_root_sem);
10184
10185 spin_lock(&fs_info->trans_lock);
10186 trans = fs_info->running_transaction;
10187 if (trans)
10188 atomic_inc(&trans->use_count);
10189 spin_unlock(&fs_info->trans_lock);
10190
10191 ret = find_free_dev_extent_start(trans, device, minlen, start,
10192 &start, &len);
10193 if (trans)
10194 btrfs_put_transaction(trans);
10195
10196 if (ret) {
10197 up_read(&fs_info->commit_root_sem);
10198 mutex_unlock(&fs_info->chunk_mutex);
10199 if (ret == -ENOSPC)
10200 ret = 0;
10201 break;
10202 }
10203
10204 ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
10205 up_read(&fs_info->commit_root_sem);
10206 mutex_unlock(&fs_info->chunk_mutex);
10207
10208 if (ret)
10209 break;
10210
10211 start += len;
10212 *trimmed += bytes;
10213
10214 if (fatal_signal_pending(current)) {
10215 ret = -ERESTARTSYS;
10216 break;
10217 }
10218
10219 cond_resched();
10220 }
10221
10222 return ret;
10223}
10224
10138int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) 10225int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
10139{ 10226{
10140 struct btrfs_fs_info *fs_info = root->fs_info; 10227 struct btrfs_fs_info *fs_info = root->fs_info;
10141 struct btrfs_block_group_cache *cache = NULL; 10228 struct btrfs_block_group_cache *cache = NULL;
10229 struct btrfs_device *device;
10230 struct list_head *devices;
10142 u64 group_trimmed; 10231 u64 group_trimmed;
10143 u64 start; 10232 u64 start;
10144 u64 end; 10233 u64 end;
@@ -10193,6 +10282,18 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
10193 cache = next_block_group(fs_info->tree_root, cache); 10282 cache = next_block_group(fs_info->tree_root, cache);
10194 } 10283 }
10195 10284
10285 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
10286 devices = &root->fs_info->fs_devices->alloc_list;
10287 list_for_each_entry(device, devices, dev_alloc_list) {
10288 ret = btrfs_trim_free_extents(device, range->minlen,
10289 &group_trimmed);
10290 if (ret)
10291 break;
10292
10293 trimmed += group_trimmed;
10294 }
10295 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
10296
10196 range->len = trimmed; 10297 range->len = trimmed;
10197 return ret; 10298 return ret;
10198} 10299}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9b95503ddd00..141c6051cf58 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1116,15 +1116,18 @@ out:
1116 return ret; 1116 return ret;
1117} 1117}
1118 1118
1119static int contains_pending_extent(struct btrfs_trans_handle *trans, 1119static int contains_pending_extent(struct btrfs_transaction *transaction,
1120 struct btrfs_device *device, 1120 struct btrfs_device *device,
1121 u64 *start, u64 len) 1121 u64 *start, u64 len)
1122{ 1122{
1123 struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
1123 struct extent_map *em; 1124 struct extent_map *em;
1124 struct list_head *search_list = &trans->transaction->pending_chunks; 1125 struct list_head *search_list = &fs_info->pinned_chunks;
1125 int ret = 0; 1126 int ret = 0;
1126 u64 physical_start = *start; 1127 u64 physical_start = *start;
1127 1128
1129 if (transaction)
1130 search_list = &transaction->pending_chunks;
1128again: 1131again:
1129 list_for_each_entry(em, search_list, list) { 1132 list_for_each_entry(em, search_list, list) {
1130 struct map_lookup *map; 1133 struct map_lookup *map;
@@ -1159,8 +1162,8 @@ again:
1159 } 1162 }
1160 } 1163 }
1161 } 1164 }
1162 if (search_list == &trans->transaction->pending_chunks) { 1165 if (search_list != &fs_info->pinned_chunks) {
1163 search_list = &trans->root->fs_info->pinned_chunks; 1166 search_list = &fs_info->pinned_chunks;
1164 goto again; 1167 goto again;
1165 } 1168 }
1166 1169
@@ -1169,12 +1172,13 @@ again:
1169 1172
1170 1173
1171/* 1174/*
1172 * find_free_dev_extent - find free space in the specified device 1175 * find_free_dev_extent_start - find free space in the specified device
1173 * @device: the device which we search the free space in 1176 * @device: the device which we search the free space in
1174 * @num_bytes: the size of the free space that we need 1177 * @num_bytes: the size of the free space that we need
1175 * @start: store the start of the free space. 1178 * @search_start: the position from which to begin the search
1176 * @len: the size of the free space. that we find, or the size of the max 1179 * @start: store the start of the free space.
1177 * free space if we don't find suitable free space 1180 * @len: the size of the free space. that we find, or the size
1181 * of the max free space if we don't find suitable free space
1178 * 1182 *
1179 * this uses a pretty simple search, the expectation is that it is 1183 * this uses a pretty simple search, the expectation is that it is
1180 * called very infrequently and that a given device has a small number 1184 * called very infrequently and that a given device has a small number
@@ -1188,9 +1192,9 @@ again:
1188 * But if we don't find suitable free space, it is used to store the size of 1192 * But if we don't find suitable free space, it is used to store the size of
1189 * the max free space. 1193 * the max free space.
1190 */ 1194 */
1191int find_free_dev_extent(struct btrfs_trans_handle *trans, 1195int find_free_dev_extent_start(struct btrfs_transaction *transaction,
1192 struct btrfs_device *device, u64 num_bytes, 1196 struct btrfs_device *device, u64 num_bytes,
1193 u64 *start, u64 *len) 1197 u64 search_start, u64 *start, u64 *len)
1194{ 1198{
1195 struct btrfs_key key; 1199 struct btrfs_key key;
1196 struct btrfs_root *root = device->dev_root; 1200 struct btrfs_root *root = device->dev_root;
@@ -1200,19 +1204,11 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
1200 u64 max_hole_start; 1204 u64 max_hole_start;
1201 u64 max_hole_size; 1205 u64 max_hole_size;
1202 u64 extent_end; 1206 u64 extent_end;
1203 u64 search_start;
1204 u64 search_end = device->total_bytes; 1207 u64 search_end = device->total_bytes;
1205 int ret; 1208 int ret;
1206 int slot; 1209 int slot;
1207 struct extent_buffer *l; 1210 struct extent_buffer *l;
1208 1211
1209 /* FIXME use last free of some kind */
1210
1211 /* we don't want to overwrite the superblock on the drive,
1212 * so we make sure to start at an offset of at least 1MB
1213 */
1214 search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
1215
1216 path = btrfs_alloc_path(); 1212 path = btrfs_alloc_path();
1217 if (!path) 1213 if (!path)
1218 return -ENOMEM; 1214 return -ENOMEM;
@@ -1273,7 +1269,7 @@ again:
1273 * Have to check before we set max_hole_start, otherwise 1269 * Have to check before we set max_hole_start, otherwise
1274 * we could end up sending back this offset anyway. 1270 * we could end up sending back this offset anyway.
1275 */ 1271 */
1276 if (contains_pending_extent(trans, device, 1272 if (contains_pending_extent(transaction, device,
1277 &search_start, 1273 &search_start,
1278 hole_size)) { 1274 hole_size)) {
1279 if (key.offset >= search_start) { 1275 if (key.offset >= search_start) {
@@ -1322,7 +1318,7 @@ next:
1322 if (search_end > search_start) { 1318 if (search_end > search_start) {
1323 hole_size = search_end - search_start; 1319 hole_size = search_end - search_start;
1324 1320
1325 if (contains_pending_extent(trans, device, &search_start, 1321 if (contains_pending_extent(transaction, device, &search_start,
1326 hole_size)) { 1322 hole_size)) {
1327 btrfs_release_path(path); 1323 btrfs_release_path(path);
1328 goto again; 1324 goto again;
@@ -1348,6 +1344,24 @@ out:
1348 return ret; 1344 return ret;
1349} 1345}
1350 1346
1347int find_free_dev_extent(struct btrfs_trans_handle *trans,
1348 struct btrfs_device *device, u64 num_bytes,
1349 u64 *start, u64 *len)
1350{
1351 struct btrfs_root *root = device->dev_root;
1352 u64 search_start;
1353
1354 /* FIXME use last free of some kind */
1355
1356 /*
1357 * we don't want to overwrite the superblock on the drive,
1358 * so we make sure to start at an offset of at least 1MB
1359 */
1360 search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
1361 return find_free_dev_extent_start(trans->transaction, device,
1362 num_bytes, search_start, start, len);
1363}
1364
1351static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, 1365static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
1352 struct btrfs_device *device, 1366 struct btrfs_device *device,
1353 u64 start, u64 *dev_extent_len) 1367 u64 start, u64 *dev_extent_len)
@@ -4200,7 +4214,8 @@ again:
4200 u64 start = new_size; 4214 u64 start = new_size;
4201 u64 len = old_size - new_size; 4215 u64 len = old_size - new_size;
4202 4216
4203 if (contains_pending_extent(trans, device, &start, len)) { 4217 if (contains_pending_extent(trans->transaction, device,
4218 &start, len)) {
4204 unlock_chunks(root); 4219 unlock_chunks(root);
4205 checked_pending_chunks = true; 4220 checked_pending_chunks = true;
4206 failed = 0; 4221 failed = 0;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 210a64390f40..57b0217b5300 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -455,6 +455,9 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
455int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info); 455int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
456int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info); 456int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
457int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 457int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
458int find_free_dev_extent_start(struct btrfs_transaction *transaction,
459 struct btrfs_device *device, u64 num_bytes,
460 u64 search_start, u64 *start, u64 *max_avail);
458int find_free_dev_extent(struct btrfs_trans_handle *trans, 461int find_free_dev_extent(struct btrfs_trans_handle *trans,
459 struct btrfs_device *device, u64 num_bytes, 462 struct btrfs_device *device, u64 num_bytes,
460 u64 *start, u64 *max_avail); 463 u64 *start, u64 *max_avail);