aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2011-07-26 17:00:46 -0400
committerJosef Bacik <josef@redhat.com>2011-10-19 15:12:30 -0400
commitfb25e9141ab843794d5cdef3936ccb58435e2371 (patch)
tree73450e1666520ffc9d6405c51885d3c42d7d963c /fs/btrfs/extent-tree.c
parent830c4adbd04a79f806d4fa579546f36a71b727c1 (diff)
Btrfs: use bytes_may_use for all ENOSPC reservations
We have been using bytes_reserved for metadata reservations, which is wrong since we use that to keep track of outstanding reservations from the allocator. This resulted in us doing a lot of silly things to make sure we don't allocate a bunch of metadata chunks since we never had a real view of how much space was actually in use by metadata. This passes Arne's enospc test and xfstests as well as my own enospc tests. Hopefully this will get us moving in the right direction. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c163
1 files changed, 89 insertions, 74 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 03edac4f7771..fbe6278f466b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -52,6 +52,21 @@ enum {
52 CHUNK_ALLOC_LIMITED = 2, 52 CHUNK_ALLOC_LIMITED = 2,
53}; 53};
54 54
55/*
56 * Control how reservations are dealt with.
57 *
58 * RESERVE_FREE - freeing a reservation.
59 * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
60 * ENOSPC accounting
61 * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
62 * bytes_may_use as the ENOSPC accounting is done elsewhere
63 */
64enum {
65 RESERVE_FREE = 0,
66 RESERVE_ALLOC = 1,
67 RESERVE_ALLOC_NO_ACCOUNT = 2,
68};
69
55static int update_block_group(struct btrfs_trans_handle *trans, 70static int update_block_group(struct btrfs_trans_handle *trans,
56 struct btrfs_root *root, 71 struct btrfs_root *root,
57 u64 bytenr, u64 num_bytes, int alloc); 72 u64 bytenr, u64 num_bytes, int alloc);
@@ -81,6 +96,8 @@ static int find_next_key(struct btrfs_path *path, int level,
81 struct btrfs_key *key); 96 struct btrfs_key *key);
82static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 97static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
83 int dump_block_groups); 98 int dump_block_groups);
99static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
100 u64 num_bytes, int reserve);
84 101
85static noinline int 102static noinline int
86block_group_cache_done(struct btrfs_block_group_cache *cache) 103block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -3128,9 +3145,7 @@ commit_trans:
3128} 3145}
3129 3146
3130/* 3147/*
3131 * called when we are clearing an delalloc extent from the 3148 * Called if we need to clear a data reservation for this inode.
3132 * inode's io_tree or there was an error for whatever reason
3133 * after calling btrfs_check_data_free_space
3134 */ 3149 */
3135void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) 3150void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3136{ 3151{
@@ -3163,6 +3178,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
3163 struct btrfs_space_info *sinfo, u64 alloc_bytes, 3178 struct btrfs_space_info *sinfo, u64 alloc_bytes,
3164 int force) 3179 int force)
3165{ 3180{
3181 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
3166 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3182 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3167 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; 3183 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3168 u64 thresh; 3184 u64 thresh;
@@ -3171,6 +3187,13 @@ static int should_alloc_chunk(struct btrfs_root *root,
3171 return 1; 3187 return 1;
3172 3188
3173 /* 3189 /*
3190 * We need to take into account the global rsv because for all intents
3191 * and purposes it's used space. Don't worry about locking the
3192 * global_rsv, it doesn't change except when the transaction commits.
3193 */
3194 num_allocated += global_rsv->size;
3195
3196 /*
3174 * in limited mode, we want to have some free space up to 3197 * in limited mode, we want to have some free space up to
3175 * about 1% of the FS size. 3198 * about 1% of the FS size.
3176 */ 3199 */
@@ -3317,7 +3340,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3317 space_info = block_rsv->space_info; 3340 space_info = block_rsv->space_info;
3318 3341
3319 smp_mb(); 3342 smp_mb();
3320 reserved = space_info->bytes_reserved; 3343 reserved = space_info->bytes_may_use;
3321 progress = space_info->reservation_progress; 3344 progress = space_info->reservation_progress;
3322 3345
3323 if (reserved == 0) 3346 if (reserved == 0)
@@ -3341,9 +3364,9 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3341 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3364 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3342 3365
3343 spin_lock(&space_info->lock); 3366 spin_lock(&space_info->lock);
3344 if (reserved > space_info->bytes_reserved) 3367 if (reserved > space_info->bytes_may_use)
3345 reclaimed += reserved - space_info->bytes_reserved; 3368 reclaimed += reserved - space_info->bytes_may_use;
3346 reserved = space_info->bytes_reserved; 3369 reserved = space_info->bytes_may_use;
3347 spin_unlock(&space_info->lock); 3370 spin_unlock(&space_info->lock);
3348 3371
3349 loops++; 3372 loops++;
@@ -3401,7 +3424,6 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
3401 int ret = 0; 3424 int ret = 0;
3402 bool committed = false; 3425 bool committed = false;
3403 bool flushing = false; 3426 bool flushing = false;
3404
3405again: 3427again:
3406 ret = 0; 3428 ret = 0;
3407 spin_lock(&space_info->lock); 3429 spin_lock(&space_info->lock);
@@ -3443,7 +3465,7 @@ again:
3443 if (unused <= space_info->total_bytes) { 3465 if (unused <= space_info->total_bytes) {
3444 unused = space_info->total_bytes - unused; 3466 unused = space_info->total_bytes - unused;
3445 if (unused >= num_bytes) { 3467 if (unused >= num_bytes) {
3446 space_info->bytes_reserved += orig_bytes; 3468 space_info->bytes_may_use += orig_bytes;
3447 ret = 0; 3469 ret = 0;
3448 } else { 3470 } else {
3449 /* 3471 /*
@@ -3614,7 +3636,7 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3614 } 3636 }
3615 if (num_bytes) { 3637 if (num_bytes) {
3616 spin_lock(&space_info->lock); 3638 spin_lock(&space_info->lock);
3617 space_info->bytes_reserved -= num_bytes; 3639 space_info->bytes_may_use -= num_bytes;
3618 space_info->reservation_progress++; 3640 space_info->reservation_progress++;
3619 spin_unlock(&space_info->lock); 3641 spin_unlock(&space_info->lock);
3620 } 3642 }
@@ -3825,12 +3847,12 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3825 if (sinfo->total_bytes > num_bytes) { 3847 if (sinfo->total_bytes > num_bytes) {
3826 num_bytes = sinfo->total_bytes - num_bytes; 3848 num_bytes = sinfo->total_bytes - num_bytes;
3827 block_rsv->reserved += num_bytes; 3849 block_rsv->reserved += num_bytes;
3828 sinfo->bytes_reserved += num_bytes; 3850 sinfo->bytes_may_use += num_bytes;
3829 } 3851 }
3830 3852
3831 if (block_rsv->reserved >= block_rsv->size) { 3853 if (block_rsv->reserved >= block_rsv->size) {
3832 num_bytes = block_rsv->reserved - block_rsv->size; 3854 num_bytes = block_rsv->reserved - block_rsv->size;
3833 sinfo->bytes_reserved -= num_bytes; 3855 sinfo->bytes_may_use -= num_bytes;
3834 sinfo->reservation_progress++; 3856 sinfo->reservation_progress++;
3835 block_rsv->reserved = block_rsv->size; 3857 block_rsv->reserved = block_rsv->size;
3836 block_rsv->full = 1; 3858 block_rsv->full = 1;
@@ -4133,7 +4155,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4133 btrfs_set_block_group_used(&cache->item, old_val); 4155 btrfs_set_block_group_used(&cache->item, old_val);
4134 cache->reserved -= num_bytes; 4156 cache->reserved -= num_bytes;
4135 cache->space_info->bytes_reserved -= num_bytes; 4157 cache->space_info->bytes_reserved -= num_bytes;
4136 cache->space_info->reservation_progress++;
4137 cache->space_info->bytes_used += num_bytes; 4158 cache->space_info->bytes_used += num_bytes;
4138 cache->space_info->disk_used += num_bytes * factor; 4159 cache->space_info->disk_used += num_bytes * factor;
4139 spin_unlock(&cache->lock); 4160 spin_unlock(&cache->lock);
@@ -4185,7 +4206,6 @@ static int pin_down_extent(struct btrfs_root *root,
4185 if (reserved) { 4206 if (reserved) {
4186 cache->reserved -= num_bytes; 4207 cache->reserved -= num_bytes;
4187 cache->space_info->bytes_reserved -= num_bytes; 4208 cache->space_info->bytes_reserved -= num_bytes;
4188 cache->space_info->reservation_progress++;
4189 } 4209 }
4190 spin_unlock(&cache->lock); 4210 spin_unlock(&cache->lock);
4191 spin_unlock(&cache->space_info->lock); 4211 spin_unlock(&cache->space_info->lock);
@@ -4212,46 +4232,55 @@ int btrfs_pin_extent(struct btrfs_root *root,
4212 return 0; 4232 return 0;
4213} 4233}
4214 4234
4215/* 4235/**
4216 * update size of reserved extents. this function may return -EAGAIN 4236 * btrfs_update_reserved_bytes - update the block_group and space info counters
4217 * if 'reserve' is true or 'sinfo' is false. 4237 * @cache: The cache we are manipulating
4238 * @num_bytes: The number of bytes in question
4239 * @reserve: One of the reservation enums
4240 *
4241 * This is called by the allocator when it reserves space, or by somebody who is
4242 * freeing space that was never actually used on disk. For example if you
4243 * reserve some space for a new leaf in transaction A and before transaction A
4244 * commits you free that leaf, you call this with reserve set to 0 in order to
4245 * clear the reservation.
4246 *
4247 * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
4248 * ENOSPC accounting. For data we handle the reservation through clearing the
4249 * delalloc bits in the io_tree. We have to do this since we could end up
4250 * allocating less disk space for the amount of data we have reserved in the
4251 * case of compression.
4252 *
4253 * If this is a reservation and the block group has become read only we cannot
4254 * make the reservation and return -EAGAIN, otherwise this function always
4255 * succeeds.
4218 */ 4256 */
4219int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 4257static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4220 u64 num_bytes, int reserve, int sinfo) 4258 u64 num_bytes, int reserve)
4221{ 4259{
4260 struct btrfs_space_info *space_info = cache->space_info;
4222 int ret = 0; 4261 int ret = 0;
4223 if (sinfo) { 4262 spin_lock(&space_info->lock);
4224 struct btrfs_space_info *space_info = cache->space_info; 4263 spin_lock(&cache->lock);
4225 spin_lock(&space_info->lock); 4264 if (reserve != RESERVE_FREE) {
4226 spin_lock(&cache->lock);
4227 if (reserve) {
4228 if (cache->ro) {
4229 ret = -EAGAIN;
4230 } else {
4231 cache->reserved += num_bytes;
4232 space_info->bytes_reserved += num_bytes;
4233 }
4234 } else {
4235 if (cache->ro)
4236 space_info->bytes_readonly += num_bytes;
4237 cache->reserved -= num_bytes;
4238 space_info->bytes_reserved -= num_bytes;
4239 space_info->reservation_progress++;
4240 }
4241 spin_unlock(&cache->lock);
4242 spin_unlock(&space_info->lock);
4243 } else {
4244 spin_lock(&cache->lock);
4245 if (cache->ro) { 4265 if (cache->ro) {
4246 ret = -EAGAIN; 4266 ret = -EAGAIN;
4247 } else { 4267 } else {
4248 if (reserve) 4268 cache->reserved += num_bytes;
4249 cache->reserved += num_bytes; 4269 space_info->bytes_reserved += num_bytes;
4250 else 4270 if (reserve == RESERVE_ALLOC) {
4251 cache->reserved -= num_bytes; 4271 BUG_ON(space_info->bytes_may_use < num_bytes);
4272 space_info->bytes_may_use -= num_bytes;
4273 }
4252 } 4274 }
4253 spin_unlock(&cache->lock); 4275 } else {
4276 if (cache->ro)
4277 space_info->bytes_readonly += num_bytes;
4278 cache->reserved -= num_bytes;
4279 space_info->bytes_reserved -= num_bytes;
4280 space_info->reservation_progress++;
4254 } 4281 }
4282 spin_unlock(&cache->lock);
4283 spin_unlock(&space_info->lock);
4255 return ret; 4284 return ret;
4256} 4285}
4257 4286
@@ -4322,7 +4351,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
4322 } else if (cache->reserved_pinned > 0) { 4351 } else if (cache->reserved_pinned > 0) {
4323 len = min(len, cache->reserved_pinned); 4352 len = min(len, cache->reserved_pinned);
4324 cache->reserved_pinned -= len; 4353 cache->reserved_pinned -= len;
4325 cache->space_info->bytes_reserved += len; 4354 cache->space_info->bytes_may_use += len;
4326 } 4355 }
4327 spin_unlock(&cache->lock); 4356 spin_unlock(&cache->lock);
4328 spin_unlock(&cache->space_info->lock); 4357 spin_unlock(&cache->space_info->lock);
@@ -4701,27 +4730,8 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4701 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 4730 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
4702 4731
4703 btrfs_add_free_space(cache, buf->start, buf->len); 4732 btrfs_add_free_space(cache, buf->start, buf->len);
4704 ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); 4733 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
4705 if (ret == -EAGAIN) {
4706 /* block group became read-only */
4707 btrfs_update_reserved_bytes(cache, buf->len, 0, 1);
4708 goto out;
4709 }
4710 4734
4711 ret = 1;
4712 spin_lock(&block_rsv->lock);
4713 if (block_rsv->reserved < block_rsv->size) {
4714 block_rsv->reserved += buf->len;
4715 ret = 0;
4716 }
4717 spin_unlock(&block_rsv->lock);
4718
4719 if (ret) {
4720 spin_lock(&cache->space_info->lock);
4721 cache->space_info->bytes_reserved -= buf->len;
4722 cache->space_info->reservation_progress++;
4723 spin_unlock(&cache->space_info->lock);
4724 }
4725 goto out; 4735 goto out;
4726 } 4736 }
4727pin: 4737pin:
@@ -4881,6 +4891,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4881 int last_ptr_loop = 0; 4891 int last_ptr_loop = 0;
4882 int loop = 0; 4892 int loop = 0;
4883 int index = 0; 4893 int index = 0;
4894 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
4895 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
4884 bool found_uncached_bg = false; 4896 bool found_uncached_bg = false;
4885 bool failed_cluster_refill = false; 4897 bool failed_cluster_refill = false;
4886 bool failed_alloc = false; 4898 bool failed_alloc = false;
@@ -5200,8 +5212,8 @@ checks:
5200 search_start - offset); 5212 search_start - offset);
5201 BUG_ON(offset > search_start); 5213 BUG_ON(offset > search_start);
5202 5214
5203 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, 5215 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
5204 (data & BTRFS_BLOCK_GROUP_DATA)); 5216 alloc_type);
5205 if (ret == -EAGAIN) { 5217 if (ret == -EAGAIN) {
5206 btrfs_add_free_space(block_group, offset, num_bytes); 5218 btrfs_add_free_space(block_group, offset, num_bytes);
5207 goto loop; 5219 goto loop;
@@ -5323,7 +5335,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
5323 int index = 0; 5335 int index = 0;
5324 5336
5325 spin_lock(&info->lock); 5337 spin_lock(&info->lock);
5326 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 5338 printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n",
5339 (unsigned long long)info->flags,
5327 (unsigned long long)(info->total_bytes - info->bytes_used - 5340 (unsigned long long)(info->total_bytes - info->bytes_used -
5328 info->bytes_pinned - info->bytes_reserved - 5341 info->bytes_pinned - info->bytes_reserved -
5329 info->bytes_readonly), 5342 info->bytes_readonly),
@@ -5425,7 +5438,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
5425 ret = btrfs_discard_extent(root, start, len, NULL); 5438 ret = btrfs_discard_extent(root, start, len, NULL);
5426 5439
5427 btrfs_add_free_space(cache, start, len); 5440 btrfs_add_free_space(cache, start, len);
5428 btrfs_update_reserved_bytes(cache, len, 0, 1); 5441 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
5429 btrfs_put_block_group(cache); 5442 btrfs_put_block_group(cache);
5430 5443
5431 trace_btrfs_reserved_extent_free(root, start, len); 5444 trace_btrfs_reserved_extent_free(root, start, len);
@@ -5628,7 +5641,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5628 put_caching_control(caching_ctl); 5641 put_caching_control(caching_ctl);
5629 } 5642 }
5630 5643
5631 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); 5644 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
5645 RESERVE_ALLOC_NO_ACCOUNT);
5632 BUG_ON(ret); 5646 BUG_ON(ret);
5633 btrfs_put_block_group(block_group); 5647 btrfs_put_block_group(block_group);
5634 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 5648 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
@@ -6594,7 +6608,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
6594 cache->reserved_pinned + num_bytes + min_allocable_bytes <= 6608 cache->reserved_pinned + num_bytes + min_allocable_bytes <=
6595 sinfo->total_bytes) { 6609 sinfo->total_bytes) {
6596 sinfo->bytes_readonly += num_bytes; 6610 sinfo->bytes_readonly += num_bytes;
6597 sinfo->bytes_reserved += cache->reserved_pinned; 6611 sinfo->bytes_may_use += cache->reserved_pinned;
6598 cache->reserved_pinned = 0; 6612 cache->reserved_pinned = 0;
6599 cache->ro = 1; 6613 cache->ro = 1;
6600 ret = 0; 6614 ret = 0;
@@ -6962,7 +6976,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
6962 struct btrfs_space_info, 6976 struct btrfs_space_info,
6963 list); 6977 list);
6964 if (space_info->bytes_pinned > 0 || 6978 if (space_info->bytes_pinned > 0 ||
6965 space_info->bytes_reserved > 0) { 6979 space_info->bytes_reserved > 0 ||
6980 space_info->bytes_may_use > 0) {
6966 WARN_ON(1); 6981 WARN_ON(1);
6967 dump_space_info(space_info, 0, 0); 6982 dump_space_info(space_info, 0, 0);
6968 } 6983 }