summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2019-10-14 02:34:51 -0400
committerDavid Sterba <dsterba@suse.com>2019-10-15 12:50:07 -0400
commit8702ba9396bf7bbae2ab93c94acd4bd37cfa4f09 (patch)
treed885b88194ccfd4c58438d8bb71e5cbf18c87342 /fs/btrfs/inode.c
parent80ed4548d0711d15ca51be5dee0ff813051cfc90 (diff)
btrfs: qgroup: Always free PREALLOC META reserve in btrfs_delalloc_release_extents()
[Background] Btrfs qgroup uses two types of reserved space for METADATA space, PERTRANS and PREALLOC. PERTRANS is metadata space reserved for each transaction started by btrfs_start_transaction(). While PREALLOC is for delalloc, where we reserve space before joining a transaction, and finally it will be converted to PERTRANS after the writeback is done. [Inconsistency] However there is inconsistency in how we handle PREALLOC metadata space. The most obvious one is: In btrfs_buffered_write(): btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, true); We always free qgroup PREALLOC meta space. While in btrfs_truncate_block(): btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0)); We only free qgroup PREALLOC meta space when something went wrong. [The Correct Behavior] The correct behavior should be the one in btrfs_buffered_write(), we should always free PREALLOC metadata space. The reason is, the btrfs_delalloc_* mechanism works by: - Reserve metadata first, even it's not necessary In btrfs_delalloc_reserve_metadata() - Free the unused metadata space Normally in: btrfs_delalloc_release_extents() |- btrfs_inode_rsv_release() Here we do calculation on whether we should release or not. E.g. for 64K buffered write, the metadata rsv works like: /* The first page */ reserve_meta: num_bytes=calc_inode_reservations() free_meta: num_bytes=0 total: num_bytes=calc_inode_reservations() /* The first page caused one outstanding extent, thus needs metadata rsv */ /* The 2nd page */ reserve_meta: num_bytes=calc_inode_reservations() free_meta: num_bytes=calc_inode_reservations() total: not changed /* The 2nd page doesn't cause new outstanding extent, needs no new meta rsv, so we free what we have reserved */ /* The 3rd~16th pages */ reserve_meta: num_bytes=calc_inode_reservations() free_meta: num_bytes=calc_inode_reservations() total: not changed (still space for one outstanding extent) This means, if btrfs_delalloc_release_extents() determines to free some space, then those space should be freed NOW. So for qgroup, we should call btrfs_qgroup_free_meta_prealloc() other than btrfs_qgroup_convert_reserved_meta(). The good news is: - The callers are not that hot The hottest caller is in btrfs_buffered_write(), which is already fixed by commit 336a8bb8e36a ("btrfs: Fix wrong btrfs_delalloc_release_extents parameter"). Thus it's not that easy to cause false EDQUOT. - The trans commit in advance for qgroup would hide the bug Since commit f5fef4593653 ("btrfs: qgroup: Make qgroup async transaction commit more aggressive"), when btrfs qgroup metadata free space is slow, it will try to commit transaction and free the wrongly converted PERTRANS space, so it's not that easy to hit such bug. [FIX] So to fix the problem, remove the @qgroup_free parameter for btrfs_delalloc_release_extents(), and always pass true to btrfs_inode_rsv_release(). Reported-by: Filipe Manana <fdmanana@suse.com> Fixes: 43b18595d660 ("btrfs: qgroup: Use separate meta reservation type for delalloc") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c12
1 files changed, 6 insertions, 6 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0f2754eaa05b..c3f386b7cc0b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2206,7 +2206,7 @@ again:
2206 2206
2207 ClearPageChecked(page); 2207 ClearPageChecked(page);
2208 set_page_dirty(page); 2208 set_page_dirty(page);
2209 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false); 2209 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
2210out: 2210out:
2211 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, 2211 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
2212 &cached_state); 2212 &cached_state);
@@ -4951,7 +4951,7 @@ again:
4951 if (!page) { 4951 if (!page) {
4952 btrfs_delalloc_release_space(inode, data_reserved, 4952 btrfs_delalloc_release_space(inode, data_reserved,
4953 block_start, blocksize, true); 4953 block_start, blocksize, true);
4954 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true); 4954 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
4955 ret = -ENOMEM; 4955 ret = -ENOMEM;
4956 goto out; 4956 goto out;
4957 } 4957 }
@@ -5018,7 +5018,7 @@ out_unlock:
5018 if (ret) 5018 if (ret)
5019 btrfs_delalloc_release_space(inode, data_reserved, block_start, 5019 btrfs_delalloc_release_space(inode, data_reserved, block_start,
5020 blocksize, true); 5020 blocksize, true);
5021 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0)); 5021 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
5022 unlock_page(page); 5022 unlock_page(page);
5023 put_page(page); 5023 put_page(page);
5024out: 5024out:
@@ -8709,7 +8709,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
8709 } else if (ret >= 0 && (size_t)ret < count) 8709 } else if (ret >= 0 && (size_t)ret < count)
8710 btrfs_delalloc_release_space(inode, data_reserved, 8710 btrfs_delalloc_release_space(inode, data_reserved,
8711 offset, count - (size_t)ret, true); 8711 offset, count - (size_t)ret, true);
8712 btrfs_delalloc_release_extents(BTRFS_I(inode), count, false); 8712 btrfs_delalloc_release_extents(BTRFS_I(inode), count);
8713 } 8713 }
8714out: 8714out:
8715 if (wakeup) 8715 if (wakeup)
@@ -9059,7 +9059,7 @@ again:
9059 unlock_extent_cached(io_tree, page_start, page_end, &cached_state); 9059 unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
9060 9060
9061 if (!ret2) { 9061 if (!ret2) {
9062 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true); 9062 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
9063 sb_end_pagefault(inode->i_sb); 9063 sb_end_pagefault(inode->i_sb);
9064 extent_changeset_free(data_reserved); 9064 extent_changeset_free(data_reserved);
9065 return VM_FAULT_LOCKED; 9065 return VM_FAULT_LOCKED;
@@ -9068,7 +9068,7 @@ again:
9068out_unlock: 9068out_unlock:
9069 unlock_page(page); 9069 unlock_page(page);
9070out: 9070out:
9071 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0)); 9071 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
9072 btrfs_delalloc_release_space(inode, data_reserved, page_start, 9072 btrfs_delalloc_release_space(inode, data_reserved, page_start,
9073 reserved_space, (ret != 0)); 9073 reserved_space, (ret != 0));
9074out_noreserve: 9074out_noreserve: