diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 248 |
1 files changed, 239 insertions, 9 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 59d59d98bca1..5411f0ab5683 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1882,10 +1882,77 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1882 | return ret; | 1882 | return ret; |
1883 | } | 1883 | } |
1884 | 1884 | ||
1885 | static int btrfs_issue_discard(struct block_device *bdev, | 1885 | #define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len)) |
1886 | u64 start, u64 len) | 1886 | static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, |
1887 | u64 *discarded_bytes) | ||
1887 | { | 1888 | { |
1888 | return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); | 1889 | int j, ret = 0; |
1890 | u64 bytes_left, end; | ||
1891 | u64 aligned_start = ALIGN(start, 1 << 9); | ||
1892 | |||
1893 | if (WARN_ON(start != aligned_start)) { | ||
1894 | len -= aligned_start - start; | ||
1895 | len = round_down(len, 1 << 9); | ||
1896 | start = aligned_start; | ||
1897 | } | ||
1898 | |||
1899 | *discarded_bytes = 0; | ||
1900 | |||
1901 | if (!len) | ||
1902 | return 0; | ||
1903 | |||
1904 | end = start + len; | ||
1905 | bytes_left = len; | ||
1906 | |||
1907 | /* Skip any superblocks on this device. */ | ||
1908 | for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) { | ||
1909 | u64 sb_start = btrfs_sb_offset(j); | ||
1910 | u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE; | ||
1911 | u64 size = sb_start - start; | ||
1912 | |||
1913 | if (!in_range(sb_start, start, bytes_left) && | ||
1914 | !in_range(sb_end, start, bytes_left) && | ||
1915 | !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE)) | ||
1916 | continue; | ||
1917 | |||
1918 | /* | ||
1919 | * Superblock spans beginning of range. Adjust start and | ||
1920 | * try again. | ||
1921 | */ | ||
1922 | if (sb_start <= start) { | ||
1923 | start += sb_end - start; | ||
1924 | if (start > end) { | ||
1925 | bytes_left = 0; | ||
1926 | break; | ||
1927 | } | ||
1928 | bytes_left = end - start; | ||
1929 | continue; | ||
1930 | } | ||
1931 | |||
1932 | if (size) { | ||
1933 | ret = blkdev_issue_discard(bdev, start >> 9, size >> 9, | ||
1934 | GFP_NOFS, 0); | ||
1935 | if (!ret) | ||
1936 | *discarded_bytes += size; | ||
1937 | else if (ret != -EOPNOTSUPP) | ||
1938 | return ret; | ||
1939 | } | ||
1940 | |||
1941 | start = sb_end; | ||
1942 | if (start > end) { | ||
1943 | bytes_left = 0; | ||
1944 | break; | ||
1945 | } | ||
1946 | bytes_left = end - start; | ||
1947 | } | ||
1948 | |||
1949 | if (bytes_left) { | ||
1950 | ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9, | ||
1951 | GFP_NOFS, 0); | ||
1952 | if (!ret) | ||
1953 | *discarded_bytes += bytes_left; | ||
1954 | } | ||
1955 | return ret; | ||
1889 | } | 1956 | } |
1890 | 1957 | ||
1891 | int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1958 | int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
@@ -1906,14 +1973,16 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1906 | 1973 | ||
1907 | 1974 | ||
1908 | for (i = 0; i < bbio->num_stripes; i++, stripe++) { | 1975 | for (i = 0; i < bbio->num_stripes; i++, stripe++) { |
1976 | u64 bytes; | ||
1909 | if (!stripe->dev->can_discard) | 1977 | if (!stripe->dev->can_discard) |
1910 | continue; | 1978 | continue; |
1911 | 1979 | ||
1912 | ret = btrfs_issue_discard(stripe->dev->bdev, | 1980 | ret = btrfs_issue_discard(stripe->dev->bdev, |
1913 | stripe->physical, | 1981 | stripe->physical, |
1914 | stripe->length); | 1982 | stripe->length, |
1983 | &bytes); | ||
1915 | if (!ret) | 1984 | if (!ret) |
1916 | discarded_bytes += stripe->length; | 1985 | discarded_bytes += bytes; |
1917 | else if (ret != -EOPNOTSUPP) | 1986 | else if (ret != -EOPNOTSUPP) |
1918 | break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */ | 1987 | break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */ |
1919 | 1988 | ||
@@ -6061,20 +6130,19 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
6061 | struct btrfs_root *root) | 6130 | struct btrfs_root *root) |
6062 | { | 6131 | { |
6063 | struct btrfs_fs_info *fs_info = root->fs_info; | 6132 | struct btrfs_fs_info *fs_info = root->fs_info; |
6133 | struct btrfs_block_group_cache *block_group, *tmp; | ||
6134 | struct list_head *deleted_bgs; | ||
6064 | struct extent_io_tree *unpin; | 6135 | struct extent_io_tree *unpin; |
6065 | u64 start; | 6136 | u64 start; |
6066 | u64 end; | 6137 | u64 end; |
6067 | int ret; | 6138 | int ret; |
6068 | 6139 | ||
6069 | if (trans->aborted) | ||
6070 | return 0; | ||
6071 | |||
6072 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | 6140 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
6073 | unpin = &fs_info->freed_extents[1]; | 6141 | unpin = &fs_info->freed_extents[1]; |
6074 | else | 6142 | else |
6075 | unpin = &fs_info->freed_extents[0]; | 6143 | unpin = &fs_info->freed_extents[0]; |
6076 | 6144 | ||
6077 | while (1) { | 6145 | while (!trans->aborted) { |
6078 | mutex_lock(&fs_info->unused_bg_unpin_mutex); | 6146 | mutex_lock(&fs_info->unused_bg_unpin_mutex); |
6079 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 6147 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
6080 | EXTENT_DIRTY, NULL); | 6148 | EXTENT_DIRTY, NULL); |
@@ -6093,6 +6161,34 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
6093 | cond_resched(); | 6161 | cond_resched(); |
6094 | } | 6162 | } |
6095 | 6163 | ||
6164 | /* | ||
6165 | * Transaction is finished. We don't need the lock anymore. We | ||
6166 | * do need to clean up the block groups in case of a transaction | ||
6167 | * abort. | ||
6168 | */ | ||
6169 | deleted_bgs = &trans->transaction->deleted_bgs; | ||
6170 | list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) { | ||
6171 | u64 trimmed = 0; | ||
6172 | |||
6173 | ret = -EROFS; | ||
6174 | if (!trans->aborted) | ||
6175 | ret = btrfs_discard_extent(root, | ||
6176 | block_group->key.objectid, | ||
6177 | block_group->key.offset, | ||
6178 | &trimmed); | ||
6179 | |||
6180 | list_del_init(&block_group->bg_list); | ||
6181 | btrfs_put_block_group_trimming(block_group); | ||
6182 | btrfs_put_block_group(block_group); | ||
6183 | |||
6184 | if (ret) { | ||
6185 | const char *errstr = btrfs_decode_error(ret); | ||
6186 | btrfs_warn(fs_info, | ||
6187 | "Discard failed while removing blockgroup: errno=%d %s\n", | ||
6188 | ret, errstr); | ||
6189 | } | ||
6190 | } | ||
6191 | |||
6096 | return 0; | 6192 | return 0; |
6097 | } | 6193 | } |
6098 | 6194 | ||
@@ -9830,6 +9926,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9830 | * currently running transaction might finish and a new one start, | 9926 | * currently running transaction might finish and a new one start, |
9831 | * allowing for new block groups to be created that can reuse the same | 9927 | * allowing for new block groups to be created that can reuse the same |
9832 | * physical device locations unless we take this special care. | 9928 | * physical device locations unless we take this special care. |
9929 | * | ||
9930 | * There may also be an implicit trim operation if the file system | ||
9931 | * is mounted with -odiscard. The same protections must remain | ||
9932 | * in place until the extents have been discarded completely when | ||
9933 | * the transaction commit has completed. | ||
9833 | */ | 9934 | */ |
9834 | remove_em = (atomic_read(&block_group->trimming) == 0); | 9935 | remove_em = (atomic_read(&block_group->trimming) == 0); |
9835 | /* | 9936 | /* |
@@ -9904,6 +10005,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
9904 | spin_lock(&fs_info->unused_bgs_lock); | 10005 | spin_lock(&fs_info->unused_bgs_lock); |
9905 | while (!list_empty(&fs_info->unused_bgs)) { | 10006 | while (!list_empty(&fs_info->unused_bgs)) { |
9906 | u64 start, end; | 10007 | u64 start, end; |
10008 | int trimming; | ||
9907 | 10009 | ||
9908 | block_group = list_first_entry(&fs_info->unused_bgs, | 10010 | block_group = list_first_entry(&fs_info->unused_bgs, |
9909 | struct btrfs_block_group_cache, | 10011 | struct btrfs_block_group_cache, |
@@ -10003,12 +10105,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
10003 | spin_unlock(&block_group->lock); | 10105 | spin_unlock(&block_group->lock); |
10004 | spin_unlock(&space_info->lock); | 10106 | spin_unlock(&space_info->lock); |
10005 | 10107 | ||
10108 | /* DISCARD can flip during remount */ | ||
10109 | trimming = btrfs_test_opt(root, DISCARD); | ||
10110 | |||
10111 | /* Implicit trim during transaction commit. */ | ||
10112 | if (trimming) | ||
10113 | btrfs_get_block_group_trimming(block_group); | ||
10114 | |||
10006 | /* | 10115 | /* |
10007 | * Btrfs_remove_chunk will abort the transaction if things go | 10116 | * Btrfs_remove_chunk will abort the transaction if things go |
10008 | * horribly wrong. | 10117 | * horribly wrong. |
10009 | */ | 10118 | */ |
10010 | ret = btrfs_remove_chunk(trans, root, | 10119 | ret = btrfs_remove_chunk(trans, root, |
10011 | block_group->key.objectid); | 10120 | block_group->key.objectid); |
10121 | |||
10122 | if (ret) { | ||
10123 | if (trimming) | ||
10124 | btrfs_put_block_group_trimming(block_group); | ||
10125 | goto end_trans; | ||
10126 | } | ||
10127 | |||
10128 | /* | ||
10129 | * If we're not mounted with -odiscard, we can just forget | ||
10130 | * about this block group. Otherwise we'll need to wait | ||
10131 | * until transaction commit to do the actual discard. | ||
10132 | */ | ||
10133 | if (trimming) { | ||
10134 | WARN_ON(!list_empty(&block_group->bg_list)); | ||
10135 | spin_lock(&trans->transaction->deleted_bgs_lock); | ||
10136 | list_move(&block_group->bg_list, | ||
10137 | &trans->transaction->deleted_bgs); | ||
10138 | spin_unlock(&trans->transaction->deleted_bgs_lock); | ||
10139 | btrfs_get_block_group(block_group); | ||
10140 | } | ||
10012 | end_trans: | 10141 | end_trans: |
10013 | btrfs_end_transaction(trans, root); | 10142 | btrfs_end_transaction(trans, root); |
10014 | next: | 10143 | next: |
@@ -10062,10 +10191,99 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
10062 | return unpin_extent_range(root, start, end, false); | 10191 | return unpin_extent_range(root, start, end, false); |
10063 | } | 10192 | } |
10064 | 10193 | ||
10194 | /* | ||
10195 | * It used to be that old block groups would be left around forever. | ||
10196 | * Iterating over them would be enough to trim unused space. Since we | ||
10197 | * now automatically remove them, we also need to iterate over unallocated | ||
10198 | * space. | ||
10199 | * | ||
10200 | * We don't want a transaction for this since the discard may take a | ||
10201 | * substantial amount of time. We don't require that a transaction be | ||
10202 | * running, but we do need to take a running transaction into account | ||
10203 | * to ensure that we're not discarding chunks that were released in | ||
10204 | * the current transaction. | ||
10205 | * | ||
10206 | * Holding the chunks lock will prevent other threads from allocating | ||
10207 | * or releasing chunks, but it won't prevent a running transaction | ||
10208 | * from committing and releasing the memory that the pending chunks | ||
10209 | * list head uses. For that, we need to take a reference to the | ||
10210 | * transaction. | ||
10211 | */ | ||
10212 | static int btrfs_trim_free_extents(struct btrfs_device *device, | ||
10213 | u64 minlen, u64 *trimmed) | ||
10214 | { | ||
10215 | u64 start = 0, len = 0; | ||
10216 | int ret; | ||
10217 | |||
10218 | *trimmed = 0; | ||
10219 | |||
10220 | /* Not writeable = nothing to do. */ | ||
10221 | if (!device->writeable) | ||
10222 | return 0; | ||
10223 | |||
10224 | /* No free space = nothing to do. */ | ||
10225 | if (device->total_bytes <= device->bytes_used) | ||
10226 | return 0; | ||
10227 | |||
10228 | ret = 0; | ||
10229 | |||
10230 | while (1) { | ||
10231 | struct btrfs_fs_info *fs_info = device->dev_root->fs_info; | ||
10232 | struct btrfs_transaction *trans; | ||
10233 | u64 bytes; | ||
10234 | |||
10235 | ret = mutex_lock_interruptible(&fs_info->chunk_mutex); | ||
10236 | if (ret) | ||
10237 | return ret; | ||
10238 | |||
10239 | down_read(&fs_info->commit_root_sem); | ||
10240 | |||
10241 | spin_lock(&fs_info->trans_lock); | ||
10242 | trans = fs_info->running_transaction; | ||
10243 | if (trans) | ||
10244 | atomic_inc(&trans->use_count); | ||
10245 | spin_unlock(&fs_info->trans_lock); | ||
10246 | |||
10247 | ret = find_free_dev_extent_start(trans, device, minlen, start, | ||
10248 | &start, &len); | ||
10249 | if (trans) | ||
10250 | btrfs_put_transaction(trans); | ||
10251 | |||
10252 | if (ret) { | ||
10253 | up_read(&fs_info->commit_root_sem); | ||
10254 | mutex_unlock(&fs_info->chunk_mutex); | ||
10255 | if (ret == -ENOSPC) | ||
10256 | ret = 0; | ||
10257 | break; | ||
10258 | } | ||
10259 | |||
10260 | ret = btrfs_issue_discard(device->bdev, start, len, &bytes); | ||
10261 | up_read(&fs_info->commit_root_sem); | ||
10262 | mutex_unlock(&fs_info->chunk_mutex); | ||
10263 | |||
10264 | if (ret) | ||
10265 | break; | ||
10266 | |||
10267 | start += len; | ||
10268 | *trimmed += bytes; | ||
10269 | |||
10270 | if (fatal_signal_pending(current)) { | ||
10271 | ret = -ERESTARTSYS; | ||
10272 | break; | ||
10273 | } | ||
10274 | |||
10275 | cond_resched(); | ||
10276 | } | ||
10277 | |||
10278 | return ret; | ||
10279 | } | ||
10280 | |||
10065 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | 10281 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) |
10066 | { | 10282 | { |
10067 | struct btrfs_fs_info *fs_info = root->fs_info; | 10283 | struct btrfs_fs_info *fs_info = root->fs_info; |
10068 | struct btrfs_block_group_cache *cache = NULL; | 10284 | struct btrfs_block_group_cache *cache = NULL; |
10285 | struct btrfs_device *device; | ||
10286 | struct list_head *devices; | ||
10069 | u64 group_trimmed; | 10287 | u64 group_trimmed; |
10070 | u64 start; | 10288 | u64 start; |
10071 | u64 end; | 10289 | u64 end; |
@@ -10120,6 +10338,18 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
10120 | cache = next_block_group(fs_info->tree_root, cache); | 10338 | cache = next_block_group(fs_info->tree_root, cache); |
10121 | } | 10339 | } |
10122 | 10340 | ||
10341 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
10342 | devices = &root->fs_info->fs_devices->alloc_list; | ||
10343 | list_for_each_entry(device, devices, dev_alloc_list) { | ||
10344 | ret = btrfs_trim_free_extents(device, range->minlen, | ||
10345 | &group_trimmed); | ||
10346 | if (ret) | ||
10347 | break; | ||
10348 | |||
10349 | trimmed += group_trimmed; | ||
10350 | } | ||
10351 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
10352 | |||
10123 | range->len = trimmed; | 10353 | range->len = trimmed; |
10124 | return ret; | 10354 | return ret; |
10125 | } | 10355 | } |