aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c248
1 files changed, 239 insertions, 9 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 59d59d98bca1..5411f0ab5683 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1882,10 +1882,77 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1882 return ret; 1882 return ret;
1883} 1883}
1884 1884
1885static int btrfs_issue_discard(struct block_device *bdev, 1885#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
1886 u64 start, u64 len) 1886static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1887 u64 *discarded_bytes)
1887{ 1888{
1888 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); 1889 int j, ret = 0;
1890 u64 bytes_left, end;
1891 u64 aligned_start = ALIGN(start, 1 << 9);
1892
1893 if (WARN_ON(start != aligned_start)) {
1894 len -= aligned_start - start;
1895 len = round_down(len, 1 << 9);
1896 start = aligned_start;
1897 }
1898
1899 *discarded_bytes = 0;
1900
1901 if (!len)
1902 return 0;
1903
1904 end = start + len;
1905 bytes_left = len;
1906
1907 /* Skip any superblocks on this device. */
1908 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1909 u64 sb_start = btrfs_sb_offset(j);
1910 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1911 u64 size = sb_start - start;
1912
1913 if (!in_range(sb_start, start, bytes_left) &&
1914 !in_range(sb_end, start, bytes_left) &&
1915 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1916 continue;
1917
1918 /*
1919 * Superblock spans beginning of range. Adjust start and
1920 * try again.
1921 */
1922 if (sb_start <= start) {
1923 start += sb_end - start;
1924 if (start > end) {
1925 bytes_left = 0;
1926 break;
1927 }
1928 bytes_left = end - start;
1929 continue;
1930 }
1931
1932 if (size) {
1933 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1934 GFP_NOFS, 0);
1935 if (!ret)
1936 *discarded_bytes += size;
1937 else if (ret != -EOPNOTSUPP)
1938 return ret;
1939 }
1940
1941 start = sb_end;
1942 if (start > end) {
1943 bytes_left = 0;
1944 break;
1945 }
1946 bytes_left = end - start;
1947 }
1948
1949 if (bytes_left) {
1950 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1951 GFP_NOFS, 0);
1952 if (!ret)
1953 *discarded_bytes += bytes_left;
1954 }
1955 return ret;
1889} 1956}
1890 1957
1891int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1958int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
@@ -1906,14 +1973,16 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1906 1973
1907 1974
1908 for (i = 0; i < bbio->num_stripes; i++, stripe++) { 1975 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1976 u64 bytes;
1909 if (!stripe->dev->can_discard) 1977 if (!stripe->dev->can_discard)
1910 continue; 1978 continue;
1911 1979
1912 ret = btrfs_issue_discard(stripe->dev->bdev, 1980 ret = btrfs_issue_discard(stripe->dev->bdev,
1913 stripe->physical, 1981 stripe->physical,
1914 stripe->length); 1982 stripe->length,
1983 &bytes);
1915 if (!ret) 1984 if (!ret)
1916 discarded_bytes += stripe->length; 1985 discarded_bytes += bytes;
1917 else if (ret != -EOPNOTSUPP) 1986 else if (ret != -EOPNOTSUPP)
1918 break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */ 1987 break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
1919 1988
@@ -6061,20 +6130,19 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
6061 struct btrfs_root *root) 6130 struct btrfs_root *root)
6062{ 6131{
6063 struct btrfs_fs_info *fs_info = root->fs_info; 6132 struct btrfs_fs_info *fs_info = root->fs_info;
6133 struct btrfs_block_group_cache *block_group, *tmp;
6134 struct list_head *deleted_bgs;
6064 struct extent_io_tree *unpin; 6135 struct extent_io_tree *unpin;
6065 u64 start; 6136 u64 start;
6066 u64 end; 6137 u64 end;
6067 int ret; 6138 int ret;
6068 6139
6069 if (trans->aborted)
6070 return 0;
6071
6072 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 6140 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6073 unpin = &fs_info->freed_extents[1]; 6141 unpin = &fs_info->freed_extents[1];
6074 else 6142 else
6075 unpin = &fs_info->freed_extents[0]; 6143 unpin = &fs_info->freed_extents[0];
6076 6144
6077 while (1) { 6145 while (!trans->aborted) {
6078 mutex_lock(&fs_info->unused_bg_unpin_mutex); 6146 mutex_lock(&fs_info->unused_bg_unpin_mutex);
6079 ret = find_first_extent_bit(unpin, 0, &start, &end, 6147 ret = find_first_extent_bit(unpin, 0, &start, &end,
6080 EXTENT_DIRTY, NULL); 6148 EXTENT_DIRTY, NULL);
@@ -6093,6 +6161,34 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
6093 cond_resched(); 6161 cond_resched();
6094 } 6162 }
6095 6163
6164 /*
6165 * Transaction is finished. We don't need the lock anymore. We
6166 * do need to clean up the block groups in case of a transaction
6167 * abort.
6168 */
6169 deleted_bgs = &trans->transaction->deleted_bgs;
6170 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
6171 u64 trimmed = 0;
6172
6173 ret = -EROFS;
6174 if (!trans->aborted)
6175 ret = btrfs_discard_extent(root,
6176 block_group->key.objectid,
6177 block_group->key.offset,
6178 &trimmed);
6179
6180 list_del_init(&block_group->bg_list);
6181 btrfs_put_block_group_trimming(block_group);
6182 btrfs_put_block_group(block_group);
6183
6184 if (ret) {
6185 const char *errstr = btrfs_decode_error(ret);
6186 btrfs_warn(fs_info,
6187 "Discard failed while removing blockgroup: errno=%d %s\n",
6188 ret, errstr);
6189 }
6190 }
6191
6096 return 0; 6192 return 0;
6097} 6193}
6098 6194
@@ -9830,6 +9926,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9830 * currently running transaction might finish and a new one start, 9926 * currently running transaction might finish and a new one start,
9831 * allowing for new block groups to be created that can reuse the same 9927 * allowing for new block groups to be created that can reuse the same
9832 * physical device locations unless we take this special care. 9928 * physical device locations unless we take this special care.
9929 *
9930 * There may also be an implicit trim operation if the file system
9931 * is mounted with -odiscard. The same protections must remain
9932 * in place until the extents have been discarded completely when
9933 * the transaction commit has completed.
9833 */ 9934 */
9834 remove_em = (atomic_read(&block_group->trimming) == 0); 9935 remove_em = (atomic_read(&block_group->trimming) == 0);
9835 /* 9936 /*
@@ -9904,6 +10005,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9904 spin_lock(&fs_info->unused_bgs_lock); 10005 spin_lock(&fs_info->unused_bgs_lock);
9905 while (!list_empty(&fs_info->unused_bgs)) { 10006 while (!list_empty(&fs_info->unused_bgs)) {
9906 u64 start, end; 10007 u64 start, end;
10008 int trimming;
9907 10009
9908 block_group = list_first_entry(&fs_info->unused_bgs, 10010 block_group = list_first_entry(&fs_info->unused_bgs,
9909 struct btrfs_block_group_cache, 10011 struct btrfs_block_group_cache,
@@ -10003,12 +10105,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10003 spin_unlock(&block_group->lock); 10105 spin_unlock(&block_group->lock);
10004 spin_unlock(&space_info->lock); 10106 spin_unlock(&space_info->lock);
10005 10107
10108 /* DISCARD can flip during remount */
10109 trimming = btrfs_test_opt(root, DISCARD);
10110
10111 /* Implicit trim during transaction commit. */
10112 if (trimming)
10113 btrfs_get_block_group_trimming(block_group);
10114
10006 /* 10115 /*
10007 * Btrfs_remove_chunk will abort the transaction if things go 10116 * Btrfs_remove_chunk will abort the transaction if things go
10008 * horribly wrong. 10117 * horribly wrong.
10009 */ 10118 */
10010 ret = btrfs_remove_chunk(trans, root, 10119 ret = btrfs_remove_chunk(trans, root,
10011 block_group->key.objectid); 10120 block_group->key.objectid);
10121
10122 if (ret) {
10123 if (trimming)
10124 btrfs_put_block_group_trimming(block_group);
10125 goto end_trans;
10126 }
10127
10128 /*
10129 * If we're not mounted with -odiscard, we can just forget
10130 * about this block group. Otherwise we'll need to wait
10131 * until transaction commit to do the actual discard.
10132 */
10133 if (trimming) {
10134 WARN_ON(!list_empty(&block_group->bg_list));
10135 spin_lock(&trans->transaction->deleted_bgs_lock);
10136 list_move(&block_group->bg_list,
10137 &trans->transaction->deleted_bgs);
10138 spin_unlock(&trans->transaction->deleted_bgs_lock);
10139 btrfs_get_block_group(block_group);
10140 }
10012end_trans: 10141end_trans:
10013 btrfs_end_transaction(trans, root); 10142 btrfs_end_transaction(trans, root);
10014next: 10143next:
@@ -10062,10 +10191,99 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
10062 return unpin_extent_range(root, start, end, false); 10191 return unpin_extent_range(root, start, end, false);
10063} 10192}
10064 10193
10194/*
10195 * It used to be that old block groups would be left around forever.
10196 * Iterating over them would be enough to trim unused space. Since we
10197 * now automatically remove them, we also need to iterate over unallocated
10198 * space.
10199 *
10200 * We don't want a transaction for this since the discard may take a
10201 * substantial amount of time. We don't require that a transaction be
10202 * running, but we do need to take a running transaction into account
10203 * to ensure that we're not discarding chunks that were released in
10204 * the current transaction.
10205 *
10206 * Holding the chunks lock will prevent other threads from allocating
10207 * or releasing chunks, but it won't prevent a running transaction
10208 * from committing and releasing the memory that the pending chunks
10209 * list head uses. For that, we need to take a reference to the
10210 * transaction.
10211 */
10212static int btrfs_trim_free_extents(struct btrfs_device *device,
10213 u64 minlen, u64 *trimmed)
10214{
10215 u64 start = 0, len = 0;
10216 int ret;
10217
10218 *trimmed = 0;
10219
10220 /* Not writeable = nothing to do. */
10221 if (!device->writeable)
10222 return 0;
10223
10224 /* No free space = nothing to do. */
10225 if (device->total_bytes <= device->bytes_used)
10226 return 0;
10227
10228 ret = 0;
10229
10230 while (1) {
10231 struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
10232 struct btrfs_transaction *trans;
10233 u64 bytes;
10234
10235 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
10236 if (ret)
10237 return ret;
10238
10239 down_read(&fs_info->commit_root_sem);
10240
10241 spin_lock(&fs_info->trans_lock);
10242 trans = fs_info->running_transaction;
10243 if (trans)
10244 atomic_inc(&trans->use_count);
10245 spin_unlock(&fs_info->trans_lock);
10246
10247 ret = find_free_dev_extent_start(trans, device, minlen, start,
10248 &start, &len);
10249 if (trans)
10250 btrfs_put_transaction(trans);
10251
10252 if (ret) {
10253 up_read(&fs_info->commit_root_sem);
10254 mutex_unlock(&fs_info->chunk_mutex);
10255 if (ret == -ENOSPC)
10256 ret = 0;
10257 break;
10258 }
10259
10260 ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
10261 up_read(&fs_info->commit_root_sem);
10262 mutex_unlock(&fs_info->chunk_mutex);
10263
10264 if (ret)
10265 break;
10266
10267 start += len;
10268 *trimmed += bytes;
10269
10270 if (fatal_signal_pending(current)) {
10271 ret = -ERESTARTSYS;
10272 break;
10273 }
10274
10275 cond_resched();
10276 }
10277
10278 return ret;
10279}
10280
10065int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) 10281int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
10066{ 10282{
10067 struct btrfs_fs_info *fs_info = root->fs_info; 10283 struct btrfs_fs_info *fs_info = root->fs_info;
10068 struct btrfs_block_group_cache *cache = NULL; 10284 struct btrfs_block_group_cache *cache = NULL;
10285 struct btrfs_device *device;
10286 struct list_head *devices;
10069 u64 group_trimmed; 10287 u64 group_trimmed;
10070 u64 start; 10288 u64 start;
10071 u64 end; 10289 u64 end;
@@ -10120,6 +10338,18 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
10120 cache = next_block_group(fs_info->tree_root, cache); 10338 cache = next_block_group(fs_info->tree_root, cache);
10121 } 10339 }
10122 10340
10341 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
10342 devices = &root->fs_info->fs_devices->alloc_list;
10343 list_for_each_entry(device, devices, dev_alloc_list) {
10344 ret = btrfs_trim_free_extents(device, range->minlen,
10345 &group_trimmed);
10346 if (ret)
10347 break;
10348
10349 trimmed += group_trimmed;
10350 }
10351 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
10352
10123 range->len = trimmed; 10353 range->len = trimmed;
10124 return ret; 10354 return ret;
10125} 10355}