aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2014-06-18 22:42:50 -0400
committerChris Mason <clm@fb.com>2014-06-19 17:20:54 -0400
commite570fd27f2c5d7eac3876bccf99e9838d7f911a3 (patch)
tree3d73f4d8a2700fd441be0abe36cf7174bfb84c56 /fs/btrfs/extent-tree.c
parent5349d6c3ffead27d693fdac21270541fa95ef33d (diff)
Btrfs: fix broken free space cache after the system crashed
When we mounted the filesystem after the crash, we got the following message: BTRFS error (device xxx): block group xxxx has wrong amount of free space BTRFS error (device xxx): failed to load free space cache for block group xxx It is because we didn't update the metadata of the allocated space (in extent tree) until the file data was written into the disk. During this time, there was no information about the allocated spaces in either the extent tree nor the free space cache. when we wrote out the free space cache at this time (commit transaction), those spaces were lost. In fact, only the free space that is used to store the file data had this problem, the others didn't because the metadata of them is updated in the same transaction context. There are many methods which can fix the above problem - track the allocated space, and write it out when we write out the free space cache - account the size of the allocated space that is used to store the file data, if the size is not zero, don't write out the free space cache. The first one is complex and may make the performance drop down. This patch chose the second method, we use a per-block-group variant to account the size of that allocated space. Besides that, we also introduce a per-block-group read-write semaphore to avoid the race between the allocation and the free space cache write out. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c143
1 files changed, 112 insertions, 31 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fafb3e53ecde..99c253918208 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -105,7 +105,8 @@ static int find_next_key(struct btrfs_path *path, int level,
105static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 105static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
106 int dump_block_groups); 106 int dump_block_groups);
107static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 107static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
108 u64 num_bytes, int reserve); 108 u64 num_bytes, int reserve,
109 int delalloc);
109static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, 110static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
110 u64 num_bytes); 111 u64 num_bytes);
111int btrfs_pin_extent(struct btrfs_root *root, 112int btrfs_pin_extent(struct btrfs_root *root,
@@ -3260,7 +3261,8 @@ again:
3260 3261
3261 spin_lock(&block_group->lock); 3262 spin_lock(&block_group->lock);
3262 if (block_group->cached != BTRFS_CACHE_FINISHED || 3263 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3263 !btrfs_test_opt(root, SPACE_CACHE)) { 3264 !btrfs_test_opt(root, SPACE_CACHE) ||
3265 block_group->delalloc_bytes) {
3264 /* 3266 /*
3265 * don't bother trying to write stuff out _if_ 3267 * don't bother trying to write stuff out _if_
3266 * a) we're not cached, 3268 * a) we're not cached,
@@ -5613,6 +5615,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
5613 * @cache: The cache we are manipulating 5615 * @cache: The cache we are manipulating
5614 * @num_bytes: The number of bytes in question 5616 * @num_bytes: The number of bytes in question
5615 * @reserve: One of the reservation enums 5617 * @reserve: One of the reservation enums
5618 * @delalloc: The blocks are allocated for the delalloc write
5616 * 5619 *
5617 * This is called by the allocator when it reserves space, or by somebody who is 5620 * This is called by the allocator when it reserves space, or by somebody who is
5618 * freeing space that was never actually used on disk. For example if you 5621 * freeing space that was never actually used on disk. For example if you
@@ -5631,7 +5634,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
5631 * succeeds. 5634 * succeeds.
5632 */ 5635 */
5633static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 5636static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
5634 u64 num_bytes, int reserve) 5637 u64 num_bytes, int reserve, int delalloc)
5635{ 5638{
5636 struct btrfs_space_info *space_info = cache->space_info; 5639 struct btrfs_space_info *space_info = cache->space_info;
5637 int ret = 0; 5640 int ret = 0;
@@ -5650,12 +5653,18 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
5650 num_bytes, 0); 5653 num_bytes, 0);
5651 space_info->bytes_may_use -= num_bytes; 5654 space_info->bytes_may_use -= num_bytes;
5652 } 5655 }
5656
5657 if (delalloc)
5658 cache->delalloc_bytes += num_bytes;
5653 } 5659 }
5654 } else { 5660 } else {
5655 if (cache->ro) 5661 if (cache->ro)
5656 space_info->bytes_readonly += num_bytes; 5662 space_info->bytes_readonly += num_bytes;
5657 cache->reserved -= num_bytes; 5663 cache->reserved -= num_bytes;
5658 space_info->bytes_reserved -= num_bytes; 5664 space_info->bytes_reserved -= num_bytes;
5665
5666 if (delalloc)
5667 cache->delalloc_bytes -= num_bytes;
5659 } 5668 }
5660 spin_unlock(&cache->lock); 5669 spin_unlock(&cache->lock);
5661 spin_unlock(&space_info->lock); 5670 spin_unlock(&space_info->lock);
@@ -6206,7 +6215,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6206 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 6215 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
6207 6216
6208 btrfs_add_free_space(cache, buf->start, buf->len); 6217 btrfs_add_free_space(cache, buf->start, buf->len);
6209 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); 6218 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
6210 trace_btrfs_reserved_extent_free(root, buf->start, buf->len); 6219 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
6211 pin = 0; 6220 pin = 0;
6212 } 6221 }
@@ -6365,6 +6374,70 @@ enum btrfs_loop_type {
6365 LOOP_NO_EMPTY_SIZE = 3, 6374 LOOP_NO_EMPTY_SIZE = 3,
6366}; 6375};
6367 6376
6377static inline void
6378btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
6379 int delalloc)
6380{
6381 if (delalloc)
6382 down_read(&cache->data_rwsem);
6383}
6384
6385static inline void
6386btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
6387 int delalloc)
6388{
6389 btrfs_get_block_group(cache);
6390 if (delalloc)
6391 down_read(&cache->data_rwsem);
6392}
6393
6394static struct btrfs_block_group_cache *
6395btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
6396 struct btrfs_free_cluster *cluster,
6397 int delalloc)
6398{
6399 struct btrfs_block_group_cache *used_bg;
6400 bool locked = false;
6401again:
6402 spin_lock(&cluster->refill_lock);
6403 if (locked) {
6404 if (used_bg == cluster->block_group)
6405 return used_bg;
6406
6407 up_read(&used_bg->data_rwsem);
6408 btrfs_put_block_group(used_bg);
6409 }
6410
6411 used_bg = cluster->block_group;
6412 if (!used_bg)
6413 return NULL;
6414
6415 if (used_bg == block_group)
6416 return used_bg;
6417
6418 btrfs_get_block_group(used_bg);
6419
6420 if (!delalloc)
6421 return used_bg;
6422
6423 if (down_read_trylock(&used_bg->data_rwsem))
6424 return used_bg;
6425
6426 spin_unlock(&cluster->refill_lock);
6427 down_read(&used_bg->data_rwsem);
6428 locked = true;
6429 goto again;
6430}
6431
6432static inline void
6433btrfs_release_block_group(struct btrfs_block_group_cache *cache,
6434 int delalloc)
6435{
6436 if (delalloc)
6437 up_read(&cache->data_rwsem);
6438 btrfs_put_block_group(cache);
6439}
6440
6368/* 6441/*
6369 * walks the btree of allocated extents and find a hole of a given size. 6442 * walks the btree of allocated extents and find a hole of a given size.
6370 * The key ins is changed to record the hole: 6443 * The key ins is changed to record the hole:
@@ -6379,7 +6452,7 @@ enum btrfs_loop_type {
6379static noinline int find_free_extent(struct btrfs_root *orig_root, 6452static noinline int find_free_extent(struct btrfs_root *orig_root,
6380 u64 num_bytes, u64 empty_size, 6453 u64 num_bytes, u64 empty_size,
6381 u64 hint_byte, struct btrfs_key *ins, 6454 u64 hint_byte, struct btrfs_key *ins,
6382 u64 flags) 6455 u64 flags, int delalloc)
6383{ 6456{
6384 int ret = 0; 6457 int ret = 0;
6385 struct btrfs_root *root = orig_root->fs_info->extent_root; 6458 struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -6467,6 +6540,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6467 up_read(&space_info->groups_sem); 6540 up_read(&space_info->groups_sem);
6468 } else { 6541 } else {
6469 index = get_block_group_index(block_group); 6542 index = get_block_group_index(block_group);
6543 btrfs_lock_block_group(block_group, delalloc);
6470 goto have_block_group; 6544 goto have_block_group;
6471 } 6545 }
6472 } else if (block_group) { 6546 } else if (block_group) {
@@ -6481,7 +6555,7 @@ search:
6481 u64 offset; 6555 u64 offset;
6482 int cached; 6556 int cached;
6483 6557
6484 btrfs_get_block_group(block_group); 6558 btrfs_grab_block_group(block_group, delalloc);
6485 search_start = block_group->key.objectid; 6559 search_start = block_group->key.objectid;
6486 6560
6487 /* 6561 /*
@@ -6529,16 +6603,16 @@ have_block_group:
6529 * the refill lock keeps out other 6603 * the refill lock keeps out other
6530 * people trying to start a new cluster 6604 * people trying to start a new cluster
6531 */ 6605 */
6532 spin_lock(&last_ptr->refill_lock); 6606 used_block_group = btrfs_lock_cluster(block_group,
6533 used_block_group = last_ptr->block_group; 6607 last_ptr,
6534 if (used_block_group != block_group && 6608 delalloc);
6535 (!used_block_group || 6609 if (!used_block_group)
6536 used_block_group->ro ||
6537 !block_group_bits(used_block_group, flags)))
6538 goto refill_cluster; 6610 goto refill_cluster;
6539 6611
6540 if (used_block_group != block_group) 6612 if (used_block_group != block_group &&
6541 btrfs_get_block_group(used_block_group); 6613 (used_block_group->ro ||
6614 !block_group_bits(used_block_group, flags)))
6615 goto release_cluster;
6542 6616
6543 offset = btrfs_alloc_from_cluster(used_block_group, 6617 offset = btrfs_alloc_from_cluster(used_block_group,
6544 last_ptr, 6618 last_ptr,
@@ -6552,16 +6626,15 @@ have_block_group:
6552 used_block_group, 6626 used_block_group,
6553 search_start, num_bytes); 6627 search_start, num_bytes);
6554 if (used_block_group != block_group) { 6628 if (used_block_group != block_group) {
6555 btrfs_put_block_group(block_group); 6629 btrfs_release_block_group(block_group,
6630 delalloc);
6556 block_group = used_block_group; 6631 block_group = used_block_group;
6557 } 6632 }
6558 goto checks; 6633 goto checks;
6559 } 6634 }
6560 6635
6561 WARN_ON(last_ptr->block_group != used_block_group); 6636 WARN_ON(last_ptr->block_group != used_block_group);
6562 if (used_block_group != block_group) 6637release_cluster:
6563 btrfs_put_block_group(used_block_group);
6564refill_cluster:
6565 /* If we are on LOOP_NO_EMPTY_SIZE, we can't 6638 /* If we are on LOOP_NO_EMPTY_SIZE, we can't
6566 * set up a new clusters, so lets just skip it 6639 * set up a new clusters, so lets just skip it
6567 * and let the allocator find whatever block 6640 * and let the allocator find whatever block
@@ -6578,8 +6651,10 @@ refill_cluster:
6578 * succeeding in the unclustered 6651 * succeeding in the unclustered
6579 * allocation. */ 6652 * allocation. */
6580 if (loop >= LOOP_NO_EMPTY_SIZE && 6653 if (loop >= LOOP_NO_EMPTY_SIZE &&
6581 last_ptr->block_group != block_group) { 6654 used_block_group != block_group) {
6582 spin_unlock(&last_ptr->refill_lock); 6655 spin_unlock(&last_ptr->refill_lock);
6656 btrfs_release_block_group(used_block_group,
6657 delalloc);
6583 goto unclustered_alloc; 6658 goto unclustered_alloc;
6584 } 6659 }
6585 6660
@@ -6589,6 +6664,10 @@ refill_cluster:
6589 */ 6664 */
6590 btrfs_return_cluster_to_free_space(NULL, last_ptr); 6665 btrfs_return_cluster_to_free_space(NULL, last_ptr);
6591 6666
6667 if (used_block_group != block_group)
6668 btrfs_release_block_group(used_block_group,
6669 delalloc);
6670refill_cluster:
6592 if (loop >= LOOP_NO_EMPTY_SIZE) { 6671 if (loop >= LOOP_NO_EMPTY_SIZE) {
6593 spin_unlock(&last_ptr->refill_lock); 6672 spin_unlock(&last_ptr->refill_lock);
6594 goto unclustered_alloc; 6673 goto unclustered_alloc;
@@ -6696,7 +6775,7 @@ checks:
6696 BUG_ON(offset > search_start); 6775 BUG_ON(offset > search_start);
6697 6776
6698 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 6777 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
6699 alloc_type); 6778 alloc_type, delalloc);
6700 if (ret == -EAGAIN) { 6779 if (ret == -EAGAIN) {
6701 btrfs_add_free_space(block_group, offset, num_bytes); 6780 btrfs_add_free_space(block_group, offset, num_bytes);
6702 goto loop; 6781 goto loop;
@@ -6708,13 +6787,13 @@ checks:
6708 6787
6709 trace_btrfs_reserve_extent(orig_root, block_group, 6788 trace_btrfs_reserve_extent(orig_root, block_group,
6710 search_start, num_bytes); 6789 search_start, num_bytes);
6711 btrfs_put_block_group(block_group); 6790 btrfs_release_block_group(block_group, delalloc);
6712 break; 6791 break;
6713loop: 6792loop:
6714 failed_cluster_refill = false; 6793 failed_cluster_refill = false;
6715 failed_alloc = false; 6794 failed_alloc = false;
6716 BUG_ON(index != get_block_group_index(block_group)); 6795 BUG_ON(index != get_block_group_index(block_group));
6717 btrfs_put_block_group(block_group); 6796 btrfs_release_block_group(block_group, delalloc);
6718 } 6797 }
6719 up_read(&space_info->groups_sem); 6798 up_read(&space_info->groups_sem);
6720 6799
@@ -6827,7 +6906,7 @@ again:
6827int btrfs_reserve_extent(struct btrfs_root *root, 6906int btrfs_reserve_extent(struct btrfs_root *root,
6828 u64 num_bytes, u64 min_alloc_size, 6907 u64 num_bytes, u64 min_alloc_size,
6829 u64 empty_size, u64 hint_byte, 6908 u64 empty_size, u64 hint_byte,
6830 struct btrfs_key *ins, int is_data) 6909 struct btrfs_key *ins, int is_data, int delalloc)
6831{ 6910{
6832 bool final_tried = false; 6911 bool final_tried = false;
6833 u64 flags; 6912 u64 flags;
@@ -6837,7 +6916,7 @@ int btrfs_reserve_extent(struct btrfs_root *root,
6837again: 6916again:
6838 WARN_ON(num_bytes < root->sectorsize); 6917 WARN_ON(num_bytes < root->sectorsize);
6839 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, 6918 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
6840 flags); 6919 flags, delalloc);
6841 6920
6842 if (ret == -ENOSPC) { 6921 if (ret == -ENOSPC) {
6843 if (!final_tried && ins->offset) { 6922 if (!final_tried && ins->offset) {
@@ -6862,7 +6941,8 @@ again:
6862} 6941}
6863 6942
6864static int __btrfs_free_reserved_extent(struct btrfs_root *root, 6943static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6865 u64 start, u64 len, int pin) 6944 u64 start, u64 len,
6945 int pin, int delalloc)
6866{ 6946{
6867 struct btrfs_block_group_cache *cache; 6947 struct btrfs_block_group_cache *cache;
6868 int ret = 0; 6948 int ret = 0;
@@ -6881,7 +6961,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6881 pin_down_extent(root, cache, start, len, 1); 6961 pin_down_extent(root, cache, start, len, 1);
6882 else { 6962 else {
6883 btrfs_add_free_space(cache, start, len); 6963 btrfs_add_free_space(cache, start, len);
6884 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); 6964 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
6885 } 6965 }
6886 btrfs_put_block_group(cache); 6966 btrfs_put_block_group(cache);
6887 6967
@@ -6891,15 +6971,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6891} 6971}
6892 6972
6893int btrfs_free_reserved_extent(struct btrfs_root *root, 6973int btrfs_free_reserved_extent(struct btrfs_root *root,
6894 u64 start, u64 len) 6974 u64 start, u64 len, int delalloc)
6895{ 6975{
6896 return __btrfs_free_reserved_extent(root, start, len, 0); 6976 return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
6897} 6977}
6898 6978
6899int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, 6979int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
6900 u64 start, u64 len) 6980 u64 start, u64 len)
6901{ 6981{
6902 return __btrfs_free_reserved_extent(root, start, len, 1); 6982 return __btrfs_free_reserved_extent(root, start, len, 1, 0);
6903} 6983}
6904 6984
6905static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 6985static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
@@ -7114,7 +7194,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
7114 return -EINVAL; 7194 return -EINVAL;
7115 7195
7116 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 7196 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
7117 RESERVE_ALLOC_NO_ACCOUNT); 7197 RESERVE_ALLOC_NO_ACCOUNT, 0);
7118 BUG_ON(ret); /* logic error */ 7198 BUG_ON(ret); /* logic error */
7119 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 7199 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
7120 0, owner, offset, ins, 1); 7200 0, owner, offset, ins, 1);
@@ -7256,7 +7336,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
7256 return ERR_CAST(block_rsv); 7336 return ERR_CAST(block_rsv);
7257 7337
7258 ret = btrfs_reserve_extent(root, blocksize, blocksize, 7338 ret = btrfs_reserve_extent(root, blocksize, blocksize,
7259 empty_size, hint, &ins, 0); 7339 empty_size, hint, &ins, 0, 0);
7260 if (ret) { 7340 if (ret) {
7261 unuse_block_rsv(root->fs_info, block_rsv, blocksize); 7341 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
7262 return ERR_PTR(ret); 7342 return ERR_PTR(ret);
@@ -8659,6 +8739,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
8659 start); 8739 start);
8660 atomic_set(&cache->count, 1); 8740 atomic_set(&cache->count, 1);
8661 spin_lock_init(&cache->lock); 8741 spin_lock_init(&cache->lock);
8742 init_rwsem(&cache->data_rwsem);
8662 INIT_LIST_HEAD(&cache->list); 8743 INIT_LIST_HEAD(&cache->list);
8663 INIT_LIST_HEAD(&cache->cluster_list); 8744 INIT_LIST_HEAD(&cache->cluster_list);
8664 INIT_LIST_HEAD(&cache->new_bg_list); 8745 INIT_LIST_HEAD(&cache->new_bg_list);