diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
| -rw-r--r-- | fs/btrfs/extent-tree.c | 248 |
1 files changed, 239 insertions, 9 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 59d59d98bca1..5411f0ab5683 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -1882,10 +1882,77 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
| 1882 | return ret; | 1882 | return ret; |
| 1883 | } | 1883 | } |
| 1884 | 1884 | ||
| 1885 | static int btrfs_issue_discard(struct block_device *bdev, | 1885 | #define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len)) |
| 1886 | u64 start, u64 len) | 1886 | static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, |
| 1887 | u64 *discarded_bytes) | ||
| 1887 | { | 1888 | { |
| 1888 | return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); | 1889 | int j, ret = 0; |
| 1890 | u64 bytes_left, end; | ||
| 1891 | u64 aligned_start = ALIGN(start, 1 << 9); | ||
| 1892 | |||
| 1893 | if (WARN_ON(start != aligned_start)) { | ||
| 1894 | len -= aligned_start - start; | ||
| 1895 | len = round_down(len, 1 << 9); | ||
| 1896 | start = aligned_start; | ||
| 1897 | } | ||
| 1898 | |||
| 1899 | *discarded_bytes = 0; | ||
| 1900 | |||
| 1901 | if (!len) | ||
| 1902 | return 0; | ||
| 1903 | |||
| 1904 | end = start + len; | ||
| 1905 | bytes_left = len; | ||
| 1906 | |||
| 1907 | /* Skip any superblocks on this device. */ | ||
| 1908 | for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) { | ||
| 1909 | u64 sb_start = btrfs_sb_offset(j); | ||
| 1910 | u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE; | ||
| 1911 | u64 size = sb_start - start; | ||
| 1912 | |||
| 1913 | if (!in_range(sb_start, start, bytes_left) && | ||
| 1914 | !in_range(sb_end, start, bytes_left) && | ||
| 1915 | !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE)) | ||
| 1916 | continue; | ||
| 1917 | |||
| 1918 | /* | ||
| 1919 | * Superblock spans beginning of range. Adjust start and | ||
| 1920 | * try again. | ||
| 1921 | */ | ||
| 1922 | if (sb_start <= start) { | ||
| 1923 | start += sb_end - start; | ||
| 1924 | if (start > end) { | ||
| 1925 | bytes_left = 0; | ||
| 1926 | break; | ||
| 1927 | } | ||
| 1928 | bytes_left = end - start; | ||
| 1929 | continue; | ||
| 1930 | } | ||
| 1931 | |||
| 1932 | if (size) { | ||
| 1933 | ret = blkdev_issue_discard(bdev, start >> 9, size >> 9, | ||
| 1934 | GFP_NOFS, 0); | ||
| 1935 | if (!ret) | ||
| 1936 | *discarded_bytes += size; | ||
| 1937 | else if (ret != -EOPNOTSUPP) | ||
| 1938 | return ret; | ||
| 1939 | } | ||
| 1940 | |||
| 1941 | start = sb_end; | ||
| 1942 | if (start > end) { | ||
| 1943 | bytes_left = 0; | ||
| 1944 | break; | ||
| 1945 | } | ||
| 1946 | bytes_left = end - start; | ||
| 1947 | } | ||
| 1948 | |||
| 1949 | if (bytes_left) { | ||
| 1950 | ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9, | ||
| 1951 | GFP_NOFS, 0); | ||
| 1952 | if (!ret) | ||
| 1953 | *discarded_bytes += bytes_left; | ||
| 1954 | } | ||
| 1955 | return ret; | ||
| 1889 | } | 1956 | } |
| 1890 | 1957 | ||
| 1891 | int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1958 | int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
| @@ -1906,14 +1973,16 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
| 1906 | 1973 | ||
| 1907 | 1974 | ||
| 1908 | for (i = 0; i < bbio->num_stripes; i++, stripe++) { | 1975 | for (i = 0; i < bbio->num_stripes; i++, stripe++) { |
| 1976 | u64 bytes; | ||
| 1909 | if (!stripe->dev->can_discard) | 1977 | if (!stripe->dev->can_discard) |
| 1910 | continue; | 1978 | continue; |
| 1911 | 1979 | ||
| 1912 | ret = btrfs_issue_discard(stripe->dev->bdev, | 1980 | ret = btrfs_issue_discard(stripe->dev->bdev, |
| 1913 | stripe->physical, | 1981 | stripe->physical, |
| 1914 | stripe->length); | 1982 | stripe->length, |
| 1983 | &bytes); | ||
| 1915 | if (!ret) | 1984 | if (!ret) |
| 1916 | discarded_bytes += stripe->length; | 1985 | discarded_bytes += bytes; |
| 1917 | else if (ret != -EOPNOTSUPP) | 1986 | else if (ret != -EOPNOTSUPP) |
| 1918 | break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */ | 1987 | break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */ |
| 1919 | 1988 | ||
| @@ -6061,20 +6130,19 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 6061 | struct btrfs_root *root) | 6130 | struct btrfs_root *root) |
| 6062 | { | 6131 | { |
| 6063 | struct btrfs_fs_info *fs_info = root->fs_info; | 6132 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 6133 | struct btrfs_block_group_cache *block_group, *tmp; | ||
| 6134 | struct list_head *deleted_bgs; | ||
| 6064 | struct extent_io_tree *unpin; | 6135 | struct extent_io_tree *unpin; |
| 6065 | u64 start; | 6136 | u64 start; |
| 6066 | u64 end; | 6137 | u64 end; |
| 6067 | int ret; | 6138 | int ret; |
| 6068 | 6139 | ||
| 6069 | if (trans->aborted) | ||
| 6070 | return 0; | ||
| 6071 | |||
| 6072 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | 6140 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
| 6073 | unpin = &fs_info->freed_extents[1]; | 6141 | unpin = &fs_info->freed_extents[1]; |
| 6074 | else | 6142 | else |
| 6075 | unpin = &fs_info->freed_extents[0]; | 6143 | unpin = &fs_info->freed_extents[0]; |
| 6076 | 6144 | ||
| 6077 | while (1) { | 6145 | while (!trans->aborted) { |
| 6078 | mutex_lock(&fs_info->unused_bg_unpin_mutex); | 6146 | mutex_lock(&fs_info->unused_bg_unpin_mutex); |
| 6079 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 6147 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
| 6080 | EXTENT_DIRTY, NULL); | 6148 | EXTENT_DIRTY, NULL); |
| @@ -6093,6 +6161,34 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 6093 | cond_resched(); | 6161 | cond_resched(); |
| 6094 | } | 6162 | } |
| 6095 | 6163 | ||
| 6164 | /* | ||
| 6165 | * Transaction is finished. We don't need the lock anymore. We | ||
| 6166 | * do need to clean up the block groups in case of a transaction | ||
| 6167 | * abort. | ||
| 6168 | */ | ||
| 6169 | deleted_bgs = &trans->transaction->deleted_bgs; | ||
| 6170 | list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) { | ||
| 6171 | u64 trimmed = 0; | ||
| 6172 | |||
| 6173 | ret = -EROFS; | ||
| 6174 | if (!trans->aborted) | ||
| 6175 | ret = btrfs_discard_extent(root, | ||
| 6176 | block_group->key.objectid, | ||
| 6177 | block_group->key.offset, | ||
| 6178 | &trimmed); | ||
| 6179 | |||
| 6180 | list_del_init(&block_group->bg_list); | ||
| 6181 | btrfs_put_block_group_trimming(block_group); | ||
| 6182 | btrfs_put_block_group(block_group); | ||
| 6183 | |||
| 6184 | if (ret) { | ||
| 6185 | const char *errstr = btrfs_decode_error(ret); | ||
| 6186 | btrfs_warn(fs_info, | ||
| 6187 | "Discard failed while removing blockgroup: errno=%d %s\n", | ||
| 6188 | ret, errstr); | ||
| 6189 | } | ||
| 6190 | } | ||
| 6191 | |||
| 6096 | return 0; | 6192 | return 0; |
| 6097 | } | 6193 | } |
| 6098 | 6194 | ||
| @@ -9830,6 +9926,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
| 9830 | * currently running transaction might finish and a new one start, | 9926 | * currently running transaction might finish and a new one start, |
| 9831 | * allowing for new block groups to be created that can reuse the same | 9927 | * allowing for new block groups to be created that can reuse the same |
| 9832 | * physical device locations unless we take this special care. | 9928 | * physical device locations unless we take this special care. |
| 9929 | * | ||
| 9930 | * There may also be an implicit trim operation if the file system | ||
| 9931 | * is mounted with -odiscard. The same protections must remain | ||
| 9932 | * in place until the extents have been discarded completely when | ||
| 9933 | * the transaction commit has completed. | ||
| 9833 | */ | 9934 | */ |
| 9834 | remove_em = (atomic_read(&block_group->trimming) == 0); | 9935 | remove_em = (atomic_read(&block_group->trimming) == 0); |
| 9835 | /* | 9936 | /* |
| @@ -9904,6 +10005,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
| 9904 | spin_lock(&fs_info->unused_bgs_lock); | 10005 | spin_lock(&fs_info->unused_bgs_lock); |
| 9905 | while (!list_empty(&fs_info->unused_bgs)) { | 10006 | while (!list_empty(&fs_info->unused_bgs)) { |
| 9906 | u64 start, end; | 10007 | u64 start, end; |
| 10008 | int trimming; | ||
| 9907 | 10009 | ||
| 9908 | block_group = list_first_entry(&fs_info->unused_bgs, | 10010 | block_group = list_first_entry(&fs_info->unused_bgs, |
| 9909 | struct btrfs_block_group_cache, | 10011 | struct btrfs_block_group_cache, |
| @@ -10003,12 +10105,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
| 10003 | spin_unlock(&block_group->lock); | 10105 | spin_unlock(&block_group->lock); |
| 10004 | spin_unlock(&space_info->lock); | 10106 | spin_unlock(&space_info->lock); |
| 10005 | 10107 | ||
| 10108 | /* DISCARD can flip during remount */ | ||
| 10109 | trimming = btrfs_test_opt(root, DISCARD); | ||
| 10110 | |||
| 10111 | /* Implicit trim during transaction commit. */ | ||
| 10112 | if (trimming) | ||
| 10113 | btrfs_get_block_group_trimming(block_group); | ||
| 10114 | |||
| 10006 | /* | 10115 | /* |
| 10007 | * Btrfs_remove_chunk will abort the transaction if things go | 10116 | * Btrfs_remove_chunk will abort the transaction if things go |
| 10008 | * horribly wrong. | 10117 | * horribly wrong. |
| 10009 | */ | 10118 | */ |
| 10010 | ret = btrfs_remove_chunk(trans, root, | 10119 | ret = btrfs_remove_chunk(trans, root, |
| 10011 | block_group->key.objectid); | 10120 | block_group->key.objectid); |
| 10121 | |||
| 10122 | if (ret) { | ||
| 10123 | if (trimming) | ||
| 10124 | btrfs_put_block_group_trimming(block_group); | ||
| 10125 | goto end_trans; | ||
| 10126 | } | ||
| 10127 | |||
| 10128 | /* | ||
| 10129 | * If we're not mounted with -odiscard, we can just forget | ||
| 10130 | * about this block group. Otherwise we'll need to wait | ||
| 10131 | * until transaction commit to do the actual discard. | ||
| 10132 | */ | ||
| 10133 | if (trimming) { | ||
| 10134 | WARN_ON(!list_empty(&block_group->bg_list)); | ||
| 10135 | spin_lock(&trans->transaction->deleted_bgs_lock); | ||
| 10136 | list_move(&block_group->bg_list, | ||
| 10137 | &trans->transaction->deleted_bgs); | ||
| 10138 | spin_unlock(&trans->transaction->deleted_bgs_lock); | ||
| 10139 | btrfs_get_block_group(block_group); | ||
| 10140 | } | ||
| 10012 | end_trans: | 10141 | end_trans: |
| 10013 | btrfs_end_transaction(trans, root); | 10142 | btrfs_end_transaction(trans, root); |
| 10014 | next: | 10143 | next: |
| @@ -10062,10 +10191,99 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
| 10062 | return unpin_extent_range(root, start, end, false); | 10191 | return unpin_extent_range(root, start, end, false); |
| 10063 | } | 10192 | } |
| 10064 | 10193 | ||
| 10194 | /* | ||
| 10195 | * It used to be that old block groups would be left around forever. | ||
| 10196 | * Iterating over them would be enough to trim unused space. Since we | ||
| 10197 | * now automatically remove them, we also need to iterate over unallocated | ||
| 10198 | * space. | ||
| 10199 | * | ||
| 10200 | * We don't want a transaction for this since the discard may take a | ||
| 10201 | * substantial amount of time. We don't require that a transaction be | ||
| 10202 | * running, but we do need to take a running transaction into account | ||
| 10203 | * to ensure that we're not discarding chunks that were released in | ||
| 10204 | * the current transaction. | ||
| 10205 | * | ||
| 10206 | * Holding the chunks lock will prevent other threads from allocating | ||
| 10207 | * or releasing chunks, but it won't prevent a running transaction | ||
| 10208 | * from committing and releasing the memory that the pending chunks | ||
| 10209 | * list head uses. For that, we need to take a reference to the | ||
| 10210 | * transaction. | ||
| 10211 | */ | ||
| 10212 | static int btrfs_trim_free_extents(struct btrfs_device *device, | ||
| 10213 | u64 minlen, u64 *trimmed) | ||
| 10214 | { | ||
| 10215 | u64 start = 0, len = 0; | ||
| 10216 | int ret; | ||
| 10217 | |||
| 10218 | *trimmed = 0; | ||
| 10219 | |||
| 10220 | /* Not writeable = nothing to do. */ | ||
| 10221 | if (!device->writeable) | ||
| 10222 | return 0; | ||
| 10223 | |||
| 10224 | /* No free space = nothing to do. */ | ||
| 10225 | if (device->total_bytes <= device->bytes_used) | ||
| 10226 | return 0; | ||
| 10227 | |||
| 10228 | ret = 0; | ||
| 10229 | |||
| 10230 | while (1) { | ||
| 10231 | struct btrfs_fs_info *fs_info = device->dev_root->fs_info; | ||
| 10232 | struct btrfs_transaction *trans; | ||
| 10233 | u64 bytes; | ||
| 10234 | |||
| 10235 | ret = mutex_lock_interruptible(&fs_info->chunk_mutex); | ||
| 10236 | if (ret) | ||
| 10237 | return ret; | ||
| 10238 | |||
| 10239 | down_read(&fs_info->commit_root_sem); | ||
| 10240 | |||
| 10241 | spin_lock(&fs_info->trans_lock); | ||
| 10242 | trans = fs_info->running_transaction; | ||
| 10243 | if (trans) | ||
| 10244 | atomic_inc(&trans->use_count); | ||
| 10245 | spin_unlock(&fs_info->trans_lock); | ||
| 10246 | |||
| 10247 | ret = find_free_dev_extent_start(trans, device, minlen, start, | ||
| 10248 | &start, &len); | ||
| 10249 | if (trans) | ||
| 10250 | btrfs_put_transaction(trans); | ||
| 10251 | |||
| 10252 | if (ret) { | ||
| 10253 | up_read(&fs_info->commit_root_sem); | ||
| 10254 | mutex_unlock(&fs_info->chunk_mutex); | ||
| 10255 | if (ret == -ENOSPC) | ||
| 10256 | ret = 0; | ||
| 10257 | break; | ||
| 10258 | } | ||
| 10259 | |||
| 10260 | ret = btrfs_issue_discard(device->bdev, start, len, &bytes); | ||
| 10261 | up_read(&fs_info->commit_root_sem); | ||
| 10262 | mutex_unlock(&fs_info->chunk_mutex); | ||
| 10263 | |||
| 10264 | if (ret) | ||
| 10265 | break; | ||
| 10266 | |||
| 10267 | start += len; | ||
| 10268 | *trimmed += bytes; | ||
| 10269 | |||
| 10270 | if (fatal_signal_pending(current)) { | ||
| 10271 | ret = -ERESTARTSYS; | ||
| 10272 | break; | ||
| 10273 | } | ||
| 10274 | |||
| 10275 | cond_resched(); | ||
| 10276 | } | ||
| 10277 | |||
| 10278 | return ret; | ||
| 10279 | } | ||
| 10280 | |||
| 10065 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | 10281 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) |
| 10066 | { | 10282 | { |
| 10067 | struct btrfs_fs_info *fs_info = root->fs_info; | 10283 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 10068 | struct btrfs_block_group_cache *cache = NULL; | 10284 | struct btrfs_block_group_cache *cache = NULL; |
| 10285 | struct btrfs_device *device; | ||
| 10286 | struct list_head *devices; | ||
| 10069 | u64 group_trimmed; | 10287 | u64 group_trimmed; |
| 10070 | u64 start; | 10288 | u64 start; |
| 10071 | u64 end; | 10289 | u64 end; |
| @@ -10120,6 +10338,18 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
| 10120 | cache = next_block_group(fs_info->tree_root, cache); | 10338 | cache = next_block_group(fs_info->tree_root, cache); |
| 10121 | } | 10339 | } |
| 10122 | 10340 | ||
| 10341 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 10342 | devices = &root->fs_info->fs_devices->alloc_list; | ||
| 10343 | list_for_each_entry(device, devices, dev_alloc_list) { | ||
| 10344 | ret = btrfs_trim_free_extents(device, range->minlen, | ||
| 10345 | &group_trimmed); | ||
| 10346 | if (ret) | ||
| 10347 | break; | ||
| 10348 | |||
| 10349 | trimmed += group_trimmed; | ||
| 10350 | } | ||
| 10351 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 10352 | |||
| 10123 | range->len = trimmed; | 10353 | range->len = trimmed; |
| 10124 | return ret; | 10354 | return ret; |
| 10125 | } | 10355 | } |
