aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2014-06-18 22:42:50 -0400
committerChris Mason <clm@fb.com>2014-06-19 17:20:54 -0400
commite570fd27f2c5d7eac3876bccf99e9838d7f911a3 (patch)
tree3d73f4d8a2700fd441be0abe36cf7174bfb84c56 /fs/btrfs
parent5349d6c3ffead27d693fdac21270541fa95ef33d (diff)
Btrfs: fix broken free space cache after the system crashed
When we mounted the filesystem after the crash, we got the following message: BTRFS error (device xxx): block group xxxx has wrong amount of free space BTRFS error (device xxx): failed to load free space cache for block group xxx It is because we didn't update the metadata of the allocated space (in extent tree) until the file data was written into the disk. During this time, there was no information about the allocated spaces in either the extent tree nor the free space cache. when we wrote out the free space cache at this time (commit transaction), those spaces were lost. In fact, only the free space that is used to store the file data had this problem, the others didn't because the metadata of them is updated in the same transaction context. There are many methods which can fix the above problem - track the allocated space, and write it out when we write out the free space cache - account the size of the allocated space that is used to store the file data, if the size is not zero, don't write out the free space cache. The first one is complex and may make the performance drop down. This patch chose the second method, we use a per-block-group variant to account the size of that allocated space. Besides that, we also introduce a per-block-group read-write semaphore to avoid the race between the allocation and the free space cache write out. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.h13
-rw-r--r--fs/btrfs/extent-tree.c143
-rw-r--r--fs/btrfs/free-space-cache.c33
-rw-r--r--fs/btrfs/inode.c41
4 files changed, 186 insertions, 44 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b7e2c1c1ef36..be91397f4e92 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1259,11 +1259,19 @@ struct btrfs_block_group_cache {
1259 spinlock_t lock; 1259 spinlock_t lock;
1260 u64 pinned; 1260 u64 pinned;
1261 u64 reserved; 1261 u64 reserved;
1262 u64 delalloc_bytes;
1262 u64 bytes_super; 1263 u64 bytes_super;
1263 u64 flags; 1264 u64 flags;
1264 u64 sectorsize; 1265 u64 sectorsize;
1265 u64 cache_generation; 1266 u64 cache_generation;
1266 1267
1268 /*
1269 * It is just used for the delayed data space allocation because
1270 * only the data space allocation and the relative metadata update
1271 * can be done cross the transaction.
1272 */
1273 struct rw_semaphore data_rwsem;
1274
1267 /* for raid56, this is a full stripe, without parity */ 1275 /* for raid56, this is a full stripe, without parity */
1268 unsigned long full_stripe_len; 1276 unsigned long full_stripe_len;
1269 1277
@@ -3316,7 +3324,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
3316 struct btrfs_key *ins); 3324 struct btrfs_key *ins);
3317int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, 3325int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
3318 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 3326 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
3319 struct btrfs_key *ins, int is_data); 3327 struct btrfs_key *ins, int is_data, int delalloc);
3320int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3328int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3321 struct extent_buffer *buf, int full_backref, int no_quota); 3329 struct extent_buffer *buf, int full_backref, int no_quota);
3322int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3330int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -3330,7 +3338,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
3330 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 3338 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
3331 u64 owner, u64 offset, int no_quota); 3339 u64 owner, u64 offset, int no_quota);
3332 3340
3333int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 3341int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len,
3342 int delalloc);
3334int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, 3343int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
3335 u64 start, u64 len); 3344 u64 start, u64 len);
3336void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 3345void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fafb3e53ecde..99c253918208 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -105,7 +105,8 @@ static int find_next_key(struct btrfs_path *path, int level,
105static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 105static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
106 int dump_block_groups); 106 int dump_block_groups);
107static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 107static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
108 u64 num_bytes, int reserve); 108 u64 num_bytes, int reserve,
109 int delalloc);
109static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, 110static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
110 u64 num_bytes); 111 u64 num_bytes);
111int btrfs_pin_extent(struct btrfs_root *root, 112int btrfs_pin_extent(struct btrfs_root *root,
@@ -3260,7 +3261,8 @@ again:
3260 3261
3261 spin_lock(&block_group->lock); 3262 spin_lock(&block_group->lock);
3262 if (block_group->cached != BTRFS_CACHE_FINISHED || 3263 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3263 !btrfs_test_opt(root, SPACE_CACHE)) { 3264 !btrfs_test_opt(root, SPACE_CACHE) ||
3265 block_group->delalloc_bytes) {
3264 /* 3266 /*
3265 * don't bother trying to write stuff out _if_ 3267 * don't bother trying to write stuff out _if_
3266 * a) we're not cached, 3268 * a) we're not cached,
@@ -5613,6 +5615,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
5613 * @cache: The cache we are manipulating 5615 * @cache: The cache we are manipulating
5614 * @num_bytes: The number of bytes in question 5616 * @num_bytes: The number of bytes in question
5615 * @reserve: One of the reservation enums 5617 * @reserve: One of the reservation enums
5618 * @delalloc: The blocks are allocated for the delalloc write
5616 * 5619 *
5617 * This is called by the allocator when it reserves space, or by somebody who is 5620 * This is called by the allocator when it reserves space, or by somebody who is
5618 * freeing space that was never actually used on disk. For example if you 5621 * freeing space that was never actually used on disk. For example if you
@@ -5631,7 +5634,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
5631 * succeeds. 5634 * succeeds.
5632 */ 5635 */
5633static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 5636static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
5634 u64 num_bytes, int reserve) 5637 u64 num_bytes, int reserve, int delalloc)
5635{ 5638{
5636 struct btrfs_space_info *space_info = cache->space_info; 5639 struct btrfs_space_info *space_info = cache->space_info;
5637 int ret = 0; 5640 int ret = 0;
@@ -5650,12 +5653,18 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
5650 num_bytes, 0); 5653 num_bytes, 0);
5651 space_info->bytes_may_use -= num_bytes; 5654 space_info->bytes_may_use -= num_bytes;
5652 } 5655 }
5656
5657 if (delalloc)
5658 cache->delalloc_bytes += num_bytes;
5653 } 5659 }
5654 } else { 5660 } else {
5655 if (cache->ro) 5661 if (cache->ro)
5656 space_info->bytes_readonly += num_bytes; 5662 space_info->bytes_readonly += num_bytes;
5657 cache->reserved -= num_bytes; 5663 cache->reserved -= num_bytes;
5658 space_info->bytes_reserved -= num_bytes; 5664 space_info->bytes_reserved -= num_bytes;
5665
5666 if (delalloc)
5667 cache->delalloc_bytes -= num_bytes;
5659 } 5668 }
5660 spin_unlock(&cache->lock); 5669 spin_unlock(&cache->lock);
5661 spin_unlock(&space_info->lock); 5670 spin_unlock(&space_info->lock);
@@ -6206,7 +6215,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6206 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 6215 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
6207 6216
6208 btrfs_add_free_space(cache, buf->start, buf->len); 6217 btrfs_add_free_space(cache, buf->start, buf->len);
6209 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); 6218 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
6210 trace_btrfs_reserved_extent_free(root, buf->start, buf->len); 6219 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
6211 pin = 0; 6220 pin = 0;
6212 } 6221 }
@@ -6365,6 +6374,70 @@ enum btrfs_loop_type {
6365 LOOP_NO_EMPTY_SIZE = 3, 6374 LOOP_NO_EMPTY_SIZE = 3,
6366}; 6375};
6367 6376
6377static inline void
6378btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
6379 int delalloc)
6380{
6381 if (delalloc)
6382 down_read(&cache->data_rwsem);
6383}
6384
6385static inline void
6386btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
6387 int delalloc)
6388{
6389 btrfs_get_block_group(cache);
6390 if (delalloc)
6391 down_read(&cache->data_rwsem);
6392}
6393
6394static struct btrfs_block_group_cache *
6395btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
6396 struct btrfs_free_cluster *cluster,
6397 int delalloc)
6398{
6399 struct btrfs_block_group_cache *used_bg;
6400 bool locked = false;
6401again:
6402 spin_lock(&cluster->refill_lock);
6403 if (locked) {
6404 if (used_bg == cluster->block_group)
6405 return used_bg;
6406
6407 up_read(&used_bg->data_rwsem);
6408 btrfs_put_block_group(used_bg);
6409 }
6410
6411 used_bg = cluster->block_group;
6412 if (!used_bg)
6413 return NULL;
6414
6415 if (used_bg == block_group)
6416 return used_bg;
6417
6418 btrfs_get_block_group(used_bg);
6419
6420 if (!delalloc)
6421 return used_bg;
6422
6423 if (down_read_trylock(&used_bg->data_rwsem))
6424 return used_bg;
6425
6426 spin_unlock(&cluster->refill_lock);
6427 down_read(&used_bg->data_rwsem);
6428 locked = true;
6429 goto again;
6430}
6431
6432static inline void
6433btrfs_release_block_group(struct btrfs_block_group_cache *cache,
6434 int delalloc)
6435{
6436 if (delalloc)
6437 up_read(&cache->data_rwsem);
6438 btrfs_put_block_group(cache);
6439}
6440
6368/* 6441/*
6369 * walks the btree of allocated extents and find a hole of a given size. 6442 * walks the btree of allocated extents and find a hole of a given size.
6370 * The key ins is changed to record the hole: 6443 * The key ins is changed to record the hole:
@@ -6379,7 +6452,7 @@ enum btrfs_loop_type {
6379static noinline int find_free_extent(struct btrfs_root *orig_root, 6452static noinline int find_free_extent(struct btrfs_root *orig_root,
6380 u64 num_bytes, u64 empty_size, 6453 u64 num_bytes, u64 empty_size,
6381 u64 hint_byte, struct btrfs_key *ins, 6454 u64 hint_byte, struct btrfs_key *ins,
6382 u64 flags) 6455 u64 flags, int delalloc)
6383{ 6456{
6384 int ret = 0; 6457 int ret = 0;
6385 struct btrfs_root *root = orig_root->fs_info->extent_root; 6458 struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -6467,6 +6540,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6467 up_read(&space_info->groups_sem); 6540 up_read(&space_info->groups_sem);
6468 } else { 6541 } else {
6469 index = get_block_group_index(block_group); 6542 index = get_block_group_index(block_group);
6543 btrfs_lock_block_group(block_group, delalloc);
6470 goto have_block_group; 6544 goto have_block_group;
6471 } 6545 }
6472 } else if (block_group) { 6546 } else if (block_group) {
@@ -6481,7 +6555,7 @@ search:
6481 u64 offset; 6555 u64 offset;
6482 int cached; 6556 int cached;
6483 6557
6484 btrfs_get_block_group(block_group); 6558 btrfs_grab_block_group(block_group, delalloc);
6485 search_start = block_group->key.objectid; 6559 search_start = block_group->key.objectid;
6486 6560
6487 /* 6561 /*
@@ -6529,16 +6603,16 @@ have_block_group:
6529 * the refill lock keeps out other 6603 * the refill lock keeps out other
6530 * people trying to start a new cluster 6604 * people trying to start a new cluster
6531 */ 6605 */
6532 spin_lock(&last_ptr->refill_lock); 6606 used_block_group = btrfs_lock_cluster(block_group,
6533 used_block_group = last_ptr->block_group; 6607 last_ptr,
6534 if (used_block_group != block_group && 6608 delalloc);
6535 (!used_block_group || 6609 if (!used_block_group)
6536 used_block_group->ro ||
6537 !block_group_bits(used_block_group, flags)))
6538 goto refill_cluster; 6610 goto refill_cluster;
6539 6611
6540 if (used_block_group != block_group) 6612 if (used_block_group != block_group &&
6541 btrfs_get_block_group(used_block_group); 6613 (used_block_group->ro ||
6614 !block_group_bits(used_block_group, flags)))
6615 goto release_cluster;
6542 6616
6543 offset = btrfs_alloc_from_cluster(used_block_group, 6617 offset = btrfs_alloc_from_cluster(used_block_group,
6544 last_ptr, 6618 last_ptr,
@@ -6552,16 +6626,15 @@ have_block_group:
6552 used_block_group, 6626 used_block_group,
6553 search_start, num_bytes); 6627 search_start, num_bytes);
6554 if (used_block_group != block_group) { 6628 if (used_block_group != block_group) {
6555 btrfs_put_block_group(block_group); 6629 btrfs_release_block_group(block_group,
6630 delalloc);
6556 block_group = used_block_group; 6631 block_group = used_block_group;
6557 } 6632 }
6558 goto checks; 6633 goto checks;
6559 } 6634 }
6560 6635
6561 WARN_ON(last_ptr->block_group != used_block_group); 6636 WARN_ON(last_ptr->block_group != used_block_group);
6562 if (used_block_group != block_group) 6637release_cluster:
6563 btrfs_put_block_group(used_block_group);
6564refill_cluster:
6565 /* If we are on LOOP_NO_EMPTY_SIZE, we can't 6638 /* If we are on LOOP_NO_EMPTY_SIZE, we can't
6566 * set up a new clusters, so lets just skip it 6639 * set up a new clusters, so lets just skip it
6567 * and let the allocator find whatever block 6640 * and let the allocator find whatever block
@@ -6578,8 +6651,10 @@ refill_cluster:
6578 * succeeding in the unclustered 6651 * succeeding in the unclustered
6579 * allocation. */ 6652 * allocation. */
6580 if (loop >= LOOP_NO_EMPTY_SIZE && 6653 if (loop >= LOOP_NO_EMPTY_SIZE &&
6581 last_ptr->block_group != block_group) { 6654 used_block_group != block_group) {
6582 spin_unlock(&last_ptr->refill_lock); 6655 spin_unlock(&last_ptr->refill_lock);
6656 btrfs_release_block_group(used_block_group,
6657 delalloc);
6583 goto unclustered_alloc; 6658 goto unclustered_alloc;
6584 } 6659 }
6585 6660
@@ -6589,6 +6664,10 @@ refill_cluster:
6589 */ 6664 */
6590 btrfs_return_cluster_to_free_space(NULL, last_ptr); 6665 btrfs_return_cluster_to_free_space(NULL, last_ptr);
6591 6666
6667 if (used_block_group != block_group)
6668 btrfs_release_block_group(used_block_group,
6669 delalloc);
6670refill_cluster:
6592 if (loop >= LOOP_NO_EMPTY_SIZE) { 6671 if (loop >= LOOP_NO_EMPTY_SIZE) {
6593 spin_unlock(&last_ptr->refill_lock); 6672 spin_unlock(&last_ptr->refill_lock);
6594 goto unclustered_alloc; 6673 goto unclustered_alloc;
@@ -6696,7 +6775,7 @@ checks:
6696 BUG_ON(offset > search_start); 6775 BUG_ON(offset > search_start);
6697 6776
6698 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 6777 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
6699 alloc_type); 6778 alloc_type, delalloc);
6700 if (ret == -EAGAIN) { 6779 if (ret == -EAGAIN) {
6701 btrfs_add_free_space(block_group, offset, num_bytes); 6780 btrfs_add_free_space(block_group, offset, num_bytes);
6702 goto loop; 6781 goto loop;
@@ -6708,13 +6787,13 @@ checks:
6708 6787
6709 trace_btrfs_reserve_extent(orig_root, block_group, 6788 trace_btrfs_reserve_extent(orig_root, block_group,
6710 search_start, num_bytes); 6789 search_start, num_bytes);
6711 btrfs_put_block_group(block_group); 6790 btrfs_release_block_group(block_group, delalloc);
6712 break; 6791 break;
6713loop: 6792loop:
6714 failed_cluster_refill = false; 6793 failed_cluster_refill = false;
6715 failed_alloc = false; 6794 failed_alloc = false;
6716 BUG_ON(index != get_block_group_index(block_group)); 6795 BUG_ON(index != get_block_group_index(block_group));
6717 btrfs_put_block_group(block_group); 6796 btrfs_release_block_group(block_group, delalloc);
6718 } 6797 }
6719 up_read(&space_info->groups_sem); 6798 up_read(&space_info->groups_sem);
6720 6799
@@ -6827,7 +6906,7 @@ again:
6827int btrfs_reserve_extent(struct btrfs_root *root, 6906int btrfs_reserve_extent(struct btrfs_root *root,
6828 u64 num_bytes, u64 min_alloc_size, 6907 u64 num_bytes, u64 min_alloc_size,
6829 u64 empty_size, u64 hint_byte, 6908 u64 empty_size, u64 hint_byte,
6830 struct btrfs_key *ins, int is_data) 6909 struct btrfs_key *ins, int is_data, int delalloc)
6831{ 6910{
6832 bool final_tried = false; 6911 bool final_tried = false;
6833 u64 flags; 6912 u64 flags;
@@ -6837,7 +6916,7 @@ int btrfs_reserve_extent(struct btrfs_root *root,
6837again: 6916again:
6838 WARN_ON(num_bytes < root->sectorsize); 6917 WARN_ON(num_bytes < root->sectorsize);
6839 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, 6918 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
6840 flags); 6919 flags, delalloc);
6841 6920
6842 if (ret == -ENOSPC) { 6921 if (ret == -ENOSPC) {
6843 if (!final_tried && ins->offset) { 6922 if (!final_tried && ins->offset) {
@@ -6862,7 +6941,8 @@ again:
6862} 6941}
6863 6942
6864static int __btrfs_free_reserved_extent(struct btrfs_root *root, 6943static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6865 u64 start, u64 len, int pin) 6944 u64 start, u64 len,
6945 int pin, int delalloc)
6866{ 6946{
6867 struct btrfs_block_group_cache *cache; 6947 struct btrfs_block_group_cache *cache;
6868 int ret = 0; 6948 int ret = 0;
@@ -6881,7 +6961,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6881 pin_down_extent(root, cache, start, len, 1); 6961 pin_down_extent(root, cache, start, len, 1);
6882 else { 6962 else {
6883 btrfs_add_free_space(cache, start, len); 6963 btrfs_add_free_space(cache, start, len);
6884 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); 6964 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
6885 } 6965 }
6886 btrfs_put_block_group(cache); 6966 btrfs_put_block_group(cache);
6887 6967
@@ -6891,15 +6971,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6891} 6971}
6892 6972
6893int btrfs_free_reserved_extent(struct btrfs_root *root, 6973int btrfs_free_reserved_extent(struct btrfs_root *root,
6894 u64 start, u64 len) 6974 u64 start, u64 len, int delalloc)
6895{ 6975{
6896 return __btrfs_free_reserved_extent(root, start, len, 0); 6976 return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
6897} 6977}
6898 6978
6899int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, 6979int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
6900 u64 start, u64 len) 6980 u64 start, u64 len)
6901{ 6981{
6902 return __btrfs_free_reserved_extent(root, start, len, 1); 6982 return __btrfs_free_reserved_extent(root, start, len, 1, 0);
6903} 6983}
6904 6984
6905static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 6985static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
@@ -7114,7 +7194,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
7114 return -EINVAL; 7194 return -EINVAL;
7115 7195
7116 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 7196 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
7117 RESERVE_ALLOC_NO_ACCOUNT); 7197 RESERVE_ALLOC_NO_ACCOUNT, 0);
7118 BUG_ON(ret); /* logic error */ 7198 BUG_ON(ret); /* logic error */
7119 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 7199 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
7120 0, owner, offset, ins, 1); 7200 0, owner, offset, ins, 1);
@@ -7256,7 +7336,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
7256 return ERR_CAST(block_rsv); 7336 return ERR_CAST(block_rsv);
7257 7337
7258 ret = btrfs_reserve_extent(root, blocksize, blocksize, 7338 ret = btrfs_reserve_extent(root, blocksize, blocksize,
7259 empty_size, hint, &ins, 0); 7339 empty_size, hint, &ins, 0, 0);
7260 if (ret) { 7340 if (ret) {
7261 unuse_block_rsv(root->fs_info, block_rsv, blocksize); 7341 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
7262 return ERR_PTR(ret); 7342 return ERR_PTR(ret);
@@ -8659,6 +8739,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
8659 start); 8739 start);
8660 atomic_set(&cache->count, 1); 8740 atomic_set(&cache->count, 1);
8661 spin_lock_init(&cache->lock); 8741 spin_lock_init(&cache->lock);
8742 init_rwsem(&cache->data_rwsem);
8662 INIT_LIST_HEAD(&cache->list); 8743 INIT_LIST_HEAD(&cache->list);
8663 INIT_LIST_HEAD(&cache->cluster_list); 8744 INIT_LIST_HEAD(&cache->cluster_list);
8664 INIT_LIST_HEAD(&cache->new_bg_list); 8745 INIT_LIST_HEAD(&cache->new_bg_list);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index a852e15173e5..2b0a627cb5f9 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -680,6 +680,13 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
680 generation = btrfs_free_space_generation(leaf, header); 680 generation = btrfs_free_space_generation(leaf, header);
681 btrfs_release_path(path); 681 btrfs_release_path(path);
682 682
683 if (!BTRFS_I(inode)->generation) {
684 btrfs_info(root->fs_info,
685 "The free space cache file (%llu) is invalid. skip it\n",
686 offset);
687 return 0;
688 }
689
683 if (BTRFS_I(inode)->generation != generation) { 690 if (BTRFS_I(inode)->generation != generation) {
684 btrfs_err(root->fs_info, 691 btrfs_err(root->fs_info,
685 "free space inode generation (%llu) " 692 "free space inode generation (%llu) "
@@ -1107,6 +1114,20 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1107 if (ret) 1114 if (ret)
1108 return -1; 1115 return -1;
1109 1116
1117 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
1118 down_write(&block_group->data_rwsem);
1119 spin_lock(&block_group->lock);
1120 if (block_group->delalloc_bytes) {
1121 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1122 spin_unlock(&block_group->lock);
1123 up_write(&block_group->data_rwsem);
1124 BTRFS_I(inode)->generation = 0;
1125 ret = 0;
1126 goto out;
1127 }
1128 spin_unlock(&block_group->lock);
1129 }
1130
1110 /* Lock all pages first so we can lock the extent safely. */ 1131 /* Lock all pages first so we can lock the extent safely. */
1111 io_ctl_prepare_pages(&io_ctl, inode, 0); 1132 io_ctl_prepare_pages(&io_ctl, inode, 0);
1112 1133
@@ -1145,6 +1166,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1145 if (ret) 1166 if (ret)
1146 goto out_nospc; 1167 goto out_nospc;
1147 1168
1169 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1170 up_write(&block_group->data_rwsem);
1148 /* 1171 /*
1149 * Release the pages and unlock the extent, we will flush 1172 * Release the pages and unlock the extent, we will flush
1150 * them out later 1173 * them out later
@@ -1173,6 +1196,10 @@ out:
1173 1196
1174out_nospc: 1197out_nospc:
1175 cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); 1198 cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list);
1199
1200 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1201 up_write(&block_group->data_rwsem);
1202
1176 goto out; 1203 goto out;
1177} 1204}
1178 1205
@@ -1192,6 +1219,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1192 spin_unlock(&block_group->lock); 1219 spin_unlock(&block_group->lock);
1193 return 0; 1220 return 0;
1194 } 1221 }
1222
1223 if (block_group->delalloc_bytes) {
1224 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1225 spin_unlock(&block_group->lock);
1226 return 0;
1227 }
1195 spin_unlock(&block_group->lock); 1228 spin_unlock(&block_group->lock);
1196 1229
1197 inode = lookup_free_space_inode(root, block_group, path); 1230 inode = lookup_free_space_inode(root, block_group, path);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 372b2cb2b297..6b65fab27a1a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -693,7 +693,7 @@ retry:
693 ret = btrfs_reserve_extent(root, 693 ret = btrfs_reserve_extent(root,
694 async_extent->compressed_size, 694 async_extent->compressed_size,
695 async_extent->compressed_size, 695 async_extent->compressed_size,
696 0, alloc_hint, &ins, 1); 696 0, alloc_hint, &ins, 1, 1);
697 if (ret) { 697 if (ret) {
698 int i; 698 int i;
699 699
@@ -794,7 +794,7 @@ retry:
794out: 794out:
795 return ret; 795 return ret;
796out_free_reserve: 796out_free_reserve:
797 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 797 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
798out_free: 798out_free:
799 extent_clear_unlock_delalloc(inode, async_extent->start, 799 extent_clear_unlock_delalloc(inode, async_extent->start,
800 async_extent->start + 800 async_extent->start +
@@ -917,7 +917,7 @@ static noinline int cow_file_range(struct inode *inode,
917 cur_alloc_size = disk_num_bytes; 917 cur_alloc_size = disk_num_bytes;
918 ret = btrfs_reserve_extent(root, cur_alloc_size, 918 ret = btrfs_reserve_extent(root, cur_alloc_size,
919 root->sectorsize, 0, alloc_hint, 919 root->sectorsize, 0, alloc_hint,
920 &ins, 1); 920 &ins, 1, 1);
921 if (ret < 0) 921 if (ret < 0)
922 goto out_unlock; 922 goto out_unlock;
923 923
@@ -995,7 +995,7 @@ out:
995 return ret; 995 return ret;
996 996
997out_reserve: 997out_reserve:
998 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 998 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
999out_unlock: 999out_unlock:
1000 extent_clear_unlock_delalloc(inode, start, end, locked_page, 1000 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1001 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | 1001 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
@@ -2599,6 +2599,21 @@ out_kfree:
2599 return NULL; 2599 return NULL;
2600} 2600}
2601 2601
2602static void btrfs_release_delalloc_bytes(struct btrfs_root *root,
2603 u64 start, u64 len)
2604{
2605 struct btrfs_block_group_cache *cache;
2606
2607 cache = btrfs_lookup_block_group(root->fs_info, start);
2608 ASSERT(cache);
2609
2610 spin_lock(&cache->lock);
2611 cache->delalloc_bytes -= len;
2612 spin_unlock(&cache->lock);
2613
2614 btrfs_put_block_group(cache);
2615}
2616
2602/* as ordered data IO finishes, this gets called so we can finish 2617/* as ordered data IO finishes, this gets called so we can finish
2603 * an ordered extent if the range of bytes in the file it covers are 2618 * an ordered extent if the range of bytes in the file it covers are
2604 * fully written. 2619 * fully written.
@@ -2698,6 +2713,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2698 logical_len, logical_len, 2713 logical_len, logical_len,
2699 compress_type, 0, 0, 2714 compress_type, 0, 0,
2700 BTRFS_FILE_EXTENT_REG); 2715 BTRFS_FILE_EXTENT_REG);
2716 if (!ret)
2717 btrfs_release_delalloc_bytes(root,
2718 ordered_extent->start,
2719 ordered_extent->disk_len);
2701 } 2720 }
2702 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 2721 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
2703 ordered_extent->file_offset, ordered_extent->len, 2722 ordered_extent->file_offset, ordered_extent->len,
@@ -2750,7 +2769,7 @@ out:
2750 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && 2769 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2751 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) 2770 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
2752 btrfs_free_reserved_extent(root, ordered_extent->start, 2771 btrfs_free_reserved_extent(root, ordered_extent->start,
2753 ordered_extent->disk_len); 2772 ordered_extent->disk_len, 1);
2754 } 2773 }
2755 2774
2756 2775
@@ -6535,21 +6554,21 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
6535 6554
6536 alloc_hint = get_extent_allocation_hint(inode, start, len); 6555 alloc_hint = get_extent_allocation_hint(inode, start, len);
6537 ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, 6556 ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
6538 alloc_hint, &ins, 1); 6557 alloc_hint, &ins, 1, 1);
6539 if (ret) 6558 if (ret)
6540 return ERR_PTR(ret); 6559 return ERR_PTR(ret);
6541 6560
6542 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, 6561 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
6543 ins.offset, ins.offset, ins.offset, 0); 6562 ins.offset, ins.offset, ins.offset, 0);
6544 if (IS_ERR(em)) { 6563 if (IS_ERR(em)) {
6545 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 6564 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
6546 return em; 6565 return em;
6547 } 6566 }
6548 6567
6549 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, 6568 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
6550 ins.offset, ins.offset, 0); 6569 ins.offset, ins.offset, 0);
6551 if (ret) { 6570 if (ret) {
6552 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 6571 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
6553 free_extent_map(em); 6572 free_extent_map(em);
6554 return ERR_PTR(ret); 6573 return ERR_PTR(ret);
6555 } 6574 }
@@ -7437,7 +7456,7 @@ free_ordered:
7437 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && 7456 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
7438 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) 7457 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
7439 btrfs_free_reserved_extent(root, ordered->start, 7458 btrfs_free_reserved_extent(root, ordered->start,
7440 ordered->disk_len); 7459 ordered->disk_len, 1);
7441 btrfs_put_ordered_extent(ordered); 7460 btrfs_put_ordered_extent(ordered);
7442 btrfs_put_ordered_extent(ordered); 7461 btrfs_put_ordered_extent(ordered);
7443 } 7462 }
@@ -8819,7 +8838,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
8819 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); 8838 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
8820 cur_bytes = max(cur_bytes, min_size); 8839 cur_bytes = max(cur_bytes, min_size);
8821 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, 8840 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
8822 *alloc_hint, &ins, 1); 8841 *alloc_hint, &ins, 1, 0);
8823 if (ret) { 8842 if (ret) {
8824 if (own_trans) 8843 if (own_trans)
8825 btrfs_end_transaction(trans, root); 8844 btrfs_end_transaction(trans, root);
@@ -8833,7 +8852,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
8833 BTRFS_FILE_EXTENT_PREALLOC); 8852 BTRFS_FILE_EXTENT_PREALLOC);
8834 if (ret) { 8853 if (ret) {
8835 btrfs_free_reserved_extent(root, ins.objectid, 8854 btrfs_free_reserved_extent(root, ins.objectid,
8836 ins.offset); 8855 ins.offset, 0);
8837 btrfs_abort_transaction(trans, root, ret); 8856 btrfs_abort_transaction(trans, root, ret);
8838 if (own_trans) 8857 if (own_trans)
8839 btrfs_end_transaction(trans, root); 8858 btrfs_end_transaction(trans, root);