summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2019-10-14 02:34:51 -0400
committerDavid Sterba <dsterba@suse.com>2019-10-15 12:50:07 -0400
commit8702ba9396bf7bbae2ab93c94acd4bd37cfa4f09 (patch)
treed885b88194ccfd4c58438d8bb71e5cbf18c87342
parent80ed4548d0711d15ca51be5dee0ff813051cfc90 (diff)
btrfs: qgroup: Always free PREALLOC META reserve in btrfs_delalloc_release_extents()
[Background] Btrfs qgroup uses two types of reserved space for METADATA space, PERTRANS and PREALLOC. PERTRANS is metadata space reserved for each transaction started by btrfs_start_transaction(). While PREALLOC is for delalloc, where we reserve space before joining a transaction, and finally it will be converted to PERTRANS after the writeback is done. [Inconsistency] However there is inconsistency in how we handle PREALLOC metadata space. The most obvious one is: In btrfs_buffered_write(): btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, true); We always free qgroup PREALLOC meta space. While in btrfs_truncate_block(): btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0)); We only free qgroup PREALLOC meta space when something went wrong. [The Correct Behavior] The correct behavior should be the one in btrfs_buffered_write(), we should always free PREALLOC metadata space. The reason is, the btrfs_delalloc_* mechanism works by: - Reserve metadata first, even it's not necessary In btrfs_delalloc_reserve_metadata() - Free the unused metadata space Normally in: btrfs_delalloc_release_extents() |- btrfs_inode_rsv_release() Here we do calculation on whether we should release or not. E.g. for 64K buffered write, the metadata rsv works like: /* The first page */ reserve_meta: num_bytes=calc_inode_reservations() free_meta: num_bytes=0 total: num_bytes=calc_inode_reservations() /* The first page caused one outstanding extent, thus needs metadata rsv */ /* The 2nd page */ reserve_meta: num_bytes=calc_inode_reservations() free_meta: num_bytes=calc_inode_reservations() total: not changed /* The 2nd page doesn't cause new outstanding extent, needs no new meta rsv, so we free what we have reserved */ /* The 3rd~16th pages */ reserve_meta: num_bytes=calc_inode_reservations() free_meta: num_bytes=calc_inode_reservations() total: not changed (still space for one outstanding extent) This means, if btrfs_delalloc_release_extents() determines to free some space, then those space should be freed NOW. So for qgroup, we should call btrfs_qgroup_free_meta_prealloc() other than btrfs_qgroup_convert_reserved_meta(). The good news is: - The callers are not that hot The hottest caller is in btrfs_buffered_write(), which is already fixed by commit 336a8bb8e36a ("btrfs: Fix wrong btrfs_delalloc_release_extents parameter"). Thus it's not that easy to cause false EDQUOT. - The trans commit in advance for qgroup would hide the bug Since commit f5fef4593653 ("btrfs: qgroup: Make qgroup async transaction commit more aggressive"), when btrfs qgroup metadata free space is slow, it will try to commit transaction and free the wrongly converted PERTRANS space, so it's not that easy to hit such bug. [FIX] So to fix the problem, remove the @qgroup_free parameter for btrfs_delalloc_release_extents(), and always pass true to btrfs_inode_rsv_release(). Reported-by: Filipe Manana <fdmanana@suse.com> Fixes: 43b18595d660 ("btrfs: qgroup: Use separate meta reservation type for delalloc") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/delalloc-space.c6
-rw-r--r--fs/btrfs/file.c7
-rw-r--r--fs/btrfs/inode-map.c4
-rw-r--r--fs/btrfs/inode.c12
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/btrfs/relocation.c9
7 files changed, 20 insertions, 27 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index c1489c229694..fe2b8765d9e6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2487,8 +2487,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
2487 int nitems, bool use_global_rsv); 2487 int nitems, bool use_global_rsv);
2488void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, 2488void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
2489 struct btrfs_block_rsv *rsv); 2489 struct btrfs_block_rsv *rsv);
2490void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, 2490void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
2491 bool qgroup_free);
2492 2491
2493int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes); 2492int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
2494u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); 2493u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index d949d7d2abed..571e7b31ea2f 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -418,7 +418,6 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
418 * btrfs_delalloc_release_extents - release our outstanding_extents 418 * btrfs_delalloc_release_extents - release our outstanding_extents
419 * @inode: the inode to balance the reservation for. 419 * @inode: the inode to balance the reservation for.
420 * @num_bytes: the number of bytes we originally reserved with 420 * @num_bytes: the number of bytes we originally reserved with
421 * @qgroup_free: do we need to free qgroup meta reservation or convert them.
422 * 421 *
423 * When we reserve space we increase outstanding_extents for the extents we may 422 * When we reserve space we increase outstanding_extents for the extents we may
424 * add. Once we've set the range as delalloc or created our ordered extents we 423 * add. Once we've set the range as delalloc or created our ordered extents we
@@ -426,8 +425,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
426 * temporarily tracked outstanding_extents. This _must_ be used in conjunction 425 * temporarily tracked outstanding_extents. This _must_ be used in conjunction
427 * with btrfs_delalloc_reserve_metadata. 426 * with btrfs_delalloc_reserve_metadata.
428 */ 427 */
429void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, 428void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
430 bool qgroup_free)
431{ 429{
432 struct btrfs_fs_info *fs_info = inode->root->fs_info; 430 struct btrfs_fs_info *fs_info = inode->root->fs_info;
433 unsigned num_extents; 431 unsigned num_extents;
@@ -441,7 +439,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
441 if (btrfs_is_testing(fs_info)) 439 if (btrfs_is_testing(fs_info))
442 return; 440 return;
443 441
444 btrfs_inode_rsv_release(inode, qgroup_free); 442 btrfs_inode_rsv_release(inode, true);
445} 443}
446 444
447/** 445/**
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 27e5b269e729..e955e7fa9201 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1692,7 +1692,7 @@ again:
1692 force_page_uptodate); 1692 force_page_uptodate);
1693 if (ret) { 1693 if (ret) {
1694 btrfs_delalloc_release_extents(BTRFS_I(inode), 1694 btrfs_delalloc_release_extents(BTRFS_I(inode),
1695 reserve_bytes, true); 1695 reserve_bytes);
1696 break; 1696 break;
1697 } 1697 }
1698 1698
@@ -1704,7 +1704,7 @@ again:
1704 if (extents_locked == -EAGAIN) 1704 if (extents_locked == -EAGAIN)
1705 goto again; 1705 goto again;
1706 btrfs_delalloc_release_extents(BTRFS_I(inode), 1706 btrfs_delalloc_release_extents(BTRFS_I(inode),
1707 reserve_bytes, true); 1707 reserve_bytes);
1708 ret = extents_locked; 1708 ret = extents_locked;
1709 break; 1709 break;
1710 } 1710 }
@@ -1772,8 +1772,7 @@ again:
1772 else 1772 else
1773 free_extent_state(cached_state); 1773 free_extent_state(cached_state);
1774 1774
1775 btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, 1775 btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
1776 true);
1777 if (ret) { 1776 if (ret) {
1778 btrfs_drop_pages(pages, num_pages); 1777 btrfs_drop_pages(pages, num_pages);
1779 break; 1778 break;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 63cad7865d75..37345fb6191d 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -501,13 +501,13 @@ again:
501 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, 501 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
502 prealloc, prealloc, &alloc_hint); 502 prealloc, prealloc, &alloc_hint);
503 if (ret) { 503 if (ret) {
504 btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true); 504 btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
505 btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc, true); 505 btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc, true);
506 goto out_put; 506 goto out_put;
507 } 507 }
508 508
509 ret = btrfs_write_out_ino_cache(root, trans, path, inode); 509 ret = btrfs_write_out_ino_cache(root, trans, path, inode);
510 btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false); 510 btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
511out_put: 511out_put:
512 iput(inode); 512 iput(inode);
513out_release: 513out_release:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0f2754eaa05b..c3f386b7cc0b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2206,7 +2206,7 @@ again:
2206 2206
2207 ClearPageChecked(page); 2207 ClearPageChecked(page);
2208 set_page_dirty(page); 2208 set_page_dirty(page);
2209 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false); 2209 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
2210out: 2210out:
2211 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, 2211 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
2212 &cached_state); 2212 &cached_state);
@@ -4951,7 +4951,7 @@ again:
4951 if (!page) { 4951 if (!page) {
4952 btrfs_delalloc_release_space(inode, data_reserved, 4952 btrfs_delalloc_release_space(inode, data_reserved,
4953 block_start, blocksize, true); 4953 block_start, blocksize, true);
4954 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true); 4954 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
4955 ret = -ENOMEM; 4955 ret = -ENOMEM;
4956 goto out; 4956 goto out;
4957 } 4957 }
@@ -5018,7 +5018,7 @@ out_unlock:
5018 if (ret) 5018 if (ret)
5019 btrfs_delalloc_release_space(inode, data_reserved, block_start, 5019 btrfs_delalloc_release_space(inode, data_reserved, block_start,
5020 blocksize, true); 5020 blocksize, true);
5021 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0)); 5021 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
5022 unlock_page(page); 5022 unlock_page(page);
5023 put_page(page); 5023 put_page(page);
5024out: 5024out:
@@ -8709,7 +8709,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
8709 } else if (ret >= 0 && (size_t)ret < count) 8709 } else if (ret >= 0 && (size_t)ret < count)
8710 btrfs_delalloc_release_space(inode, data_reserved, 8710 btrfs_delalloc_release_space(inode, data_reserved,
8711 offset, count - (size_t)ret, true); 8711 offset, count - (size_t)ret, true);
8712 btrfs_delalloc_release_extents(BTRFS_I(inode), count, false); 8712 btrfs_delalloc_release_extents(BTRFS_I(inode), count);
8713 } 8713 }
8714out: 8714out:
8715 if (wakeup) 8715 if (wakeup)
@@ -9059,7 +9059,7 @@ again:
9059 unlock_extent_cached(io_tree, page_start, page_end, &cached_state); 9059 unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
9060 9060
9061 if (!ret2) { 9061 if (!ret2) {
9062 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true); 9062 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
9063 sb_end_pagefault(inode->i_sb); 9063 sb_end_pagefault(inode->i_sb);
9064 extent_changeset_free(data_reserved); 9064 extent_changeset_free(data_reserved);
9065 return VM_FAULT_LOCKED; 9065 return VM_FAULT_LOCKED;
@@ -9068,7 +9068,7 @@ again:
9068out_unlock: 9068out_unlock:
9069 unlock_page(page); 9069 unlock_page(page);
9070out: 9070out:
9071 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0)); 9071 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
9072 btrfs_delalloc_release_space(inode, data_reserved, page_start, 9072 btrfs_delalloc_release_space(inode, data_reserved, page_start,
9073 reserved_space, (ret != 0)); 9073 reserved_space, (ret != 0));
9074out_noreserve: 9074out_noreserve:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index de730e56d3f5..7c145a41decd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1360,8 +1360,7 @@ again:
1360 unlock_page(pages[i]); 1360 unlock_page(pages[i]);
1361 put_page(pages[i]); 1361 put_page(pages[i]);
1362 } 1362 }
1363 btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT, 1363 btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
1364 false);
1365 extent_changeset_free(data_reserved); 1364 extent_changeset_free(data_reserved);
1366 return i_done; 1365 return i_done;
1367out: 1366out:
@@ -1372,8 +1371,7 @@ out:
1372 btrfs_delalloc_release_space(inode, data_reserved, 1371 btrfs_delalloc_release_space(inode, data_reserved,
1373 start_index << PAGE_SHIFT, 1372 start_index << PAGE_SHIFT,
1374 page_cnt << PAGE_SHIFT, true); 1373 page_cnt << PAGE_SHIFT, true);
1375 btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT, 1374 btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
1376 true);
1377 extent_changeset_free(data_reserved); 1375 extent_changeset_free(data_reserved);
1378 return ret; 1376 return ret;
1379 1377
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 7b883239fa7e..5cd42b66818c 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3278,7 +3278,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
3278 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3278 btrfs_delalloc_release_metadata(BTRFS_I(inode),
3279 PAGE_SIZE, true); 3279 PAGE_SIZE, true);
3280 btrfs_delalloc_release_extents(BTRFS_I(inode), 3280 btrfs_delalloc_release_extents(BTRFS_I(inode),
3281 PAGE_SIZE, true); 3281 PAGE_SIZE);
3282 ret = -ENOMEM; 3282 ret = -ENOMEM;
3283 goto out; 3283 goto out;
3284 } 3284 }
@@ -3299,7 +3299,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
3299 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3299 btrfs_delalloc_release_metadata(BTRFS_I(inode),
3300 PAGE_SIZE, true); 3300 PAGE_SIZE, true);
3301 btrfs_delalloc_release_extents(BTRFS_I(inode), 3301 btrfs_delalloc_release_extents(BTRFS_I(inode),
3302 PAGE_SIZE, true); 3302 PAGE_SIZE);
3303 ret = -EIO; 3303 ret = -EIO;
3304 goto out; 3304 goto out;
3305 } 3305 }
@@ -3328,7 +3328,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
3328 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3328 btrfs_delalloc_release_metadata(BTRFS_I(inode),
3329 PAGE_SIZE, true); 3329 PAGE_SIZE, true);
3330 btrfs_delalloc_release_extents(BTRFS_I(inode), 3330 btrfs_delalloc_release_extents(BTRFS_I(inode),
3331 PAGE_SIZE, true); 3331 PAGE_SIZE);
3332 3332
3333 clear_extent_bits(&BTRFS_I(inode)->io_tree, 3333 clear_extent_bits(&BTRFS_I(inode)->io_tree,
3334 page_start, page_end, 3334 page_start, page_end,
@@ -3344,8 +3344,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
3344 put_page(page); 3344 put_page(page);
3345 3345
3346 index++; 3346 index++;
3347 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, 3347 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
3348 false);
3349 balance_dirty_pages_ratelimited(inode->i_mapping); 3348 balance_dirty_pages_ratelimited(inode->i_mapping);
3350 btrfs_throttle(fs_info); 3349 btrfs_throttle(fs_info);
3351 } 3350 }