aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2017-03-06 18:04:20 -0500
committerFilipe Manana <fdmanana@suse.com>2017-04-26 11:27:22 -0400
commita315e68f6e8b3006c29482dbfc4d928f098c449c (patch)
tree0e27dfc28bb908b143f1c84a345e87f45372d137 /fs/btrfs/inode.c
parent524272607e882d04e6d1a70d41fcbed819445ab9 (diff)
Btrfs: fix invalid attempt to free reserved space on failure to cow range
When attempting to COW a file range (we are starting writeback and doing COW), if we manage to reserve an extent for the range we will write into but fail after reserving it and before creating the respective ordered extent, we end up in an error path where we attempt to decrement the data space's bytes_may_use counter after we already did it while reserving the extent, leading to a warning/trace like the following: [ 847.621524] ------------[ cut here ]------------ [ 847.625441] WARNING: CPU: 5 PID: 4905 at fs/btrfs/extent-tree.c:4316 btrfs_free_reserved_data_space_noquota+0x60/0x9f [btrfs] [ 847.633704] Modules linked in: btrfs crc32c_generic xor raid6_pq acpi_cpufreq i2c_piix4 ppdev psmouse tpm_tis serio_raw pcspkr parport_pc tpm_tis_core i2c_core sg [ 847.644616] CPU: 5 PID: 4905 Comm: xfs_io Not tainted 4.10.0-rc8-btrfs-next-37+ #2 [ 847.648601] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 [ 847.648601] Call Trace: [ 847.648601] dump_stack+0x67/0x90 [ 847.648601] __warn+0xc2/0xdd [ 847.648601] warn_slowpath_null+0x1d/0x1f [ 847.648601] btrfs_free_reserved_data_space_noquota+0x60/0x9f [btrfs] [ 847.648601] btrfs_clear_bit_hook+0x140/0x258 [btrfs] [ 847.648601] clear_state_bit+0x87/0x128 [btrfs] [ 847.648601] __clear_extent_bit+0x222/0x2b7 [btrfs] [ 847.648601] clear_extent_bit+0x17/0x19 [btrfs] [ 847.648601] extent_clear_unlock_delalloc+0x3b/0x6b [btrfs] [ 847.648601] cow_file_range.isra.39+0x387/0x39a [btrfs] [ 847.648601] run_delalloc_nocow+0x4d7/0x70e [btrfs] [ 847.648601] ? arch_local_irq_save+0x9/0xc [ 847.648601] run_delalloc_range+0xa7/0x2b5 [btrfs] [ 847.648601] writepage_delalloc.isra.31+0xb9/0x15c [btrfs] [ 847.648601] __extent_writepage+0x249/0x2e8 [btrfs] [ 847.648601] extent_write_cache_pages.constprop.33+0x28b/0x36c [btrfs] [ 847.648601] ? arch_local_irq_save+0x9/0xc [ 847.648601] ? mark_lock+0x24/0x201 [ 847.648601] extent_writepages+0x4b/0x5c [btrfs] [ 847.648601] ? btrfs_writepage_start_hook+0xed/0xed [btrfs] [ 847.648601] btrfs_writepages+0x28/0x2a [btrfs] [ 847.648601] do_writepages+0x23/0x2c [ 847.648601] __filemap_fdatawrite_range+0x5a/0x61 [ 847.648601] filemap_fdatawrite_range+0x13/0x15 [ 847.648601] btrfs_fdatawrite_range+0x20/0x46 [btrfs] [ 847.648601] start_ordered_ops+0x19/0x23 [btrfs] [ 847.648601] btrfs_sync_file+0x136/0x42c [btrfs] [ 847.648601] vfs_fsync_range+0x8c/0x9e [ 847.648601] vfs_fsync+0x1c/0x1e [ 847.648601] do_fsync+0x31/0x4a [ 847.648601] SyS_fsync+0x10/0x14 [ 847.648601] entry_SYSCALL_64_fastpath+0x18/0xad [ 847.648601] RIP: 0033:0x7f5b05200800 [ 847.648601] RSP: 002b:00007ffe204f71c8 EFLAGS: 00000246 ORIG_RAX: 000000000000004a [ 847.648601] RAX: ffffffffffffffda RBX: ffffffff8109637b RCX: 00007f5b05200800 [ 847.648601] RDX: 00000000008bd0a0 RSI: 00000000008bd2e0 RDI: 0000000000000003 [ 847.648601] RBP: ffffc90001d67f98 R08: 000000000000ffff R09: 000000000000001f [ 847.648601] R10: 00000000000001f6 R11: 0000000000000246 R12: 0000000000000046 [ 847.648601] R13: ffffc90001d67f78 R14: 00007f5b054be740 R15: 00007f5b054be740 [ 847.648601] ? trace_hardirqs_off_caller+0x3f/0xaa [ 847.685787] ---[ end trace 2a4a3e15382508e8 ]--- So fix this by not attempting to decrement the data space info's bytes_may_use counter if we already reserved the extent and an error happened before creating the ordered extent. We are already correctly freeing the reserved extent if an error happens, so there's no additional measure needed. Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c59
1 files changed, 42 insertions, 17 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b8b2a3cbdbe1..bfe04afa6277 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -943,10 +943,13 @@ static noinline int cow_file_range(struct inode *inode,
943 u64 num_bytes; 943 u64 num_bytes;
944 unsigned long ram_size; 944 unsigned long ram_size;
945 u64 disk_num_bytes; 945 u64 disk_num_bytes;
946 u64 cur_alloc_size; 946 u64 cur_alloc_size = 0;
947 u64 blocksize = fs_info->sectorsize; 947 u64 blocksize = fs_info->sectorsize;
948 struct btrfs_key ins; 948 struct btrfs_key ins;
949 struct extent_map *em; 949 struct extent_map *em;
950 unsigned clear_bits;
951 unsigned long page_ops;
952 bool extent_reserved = false;
950 int ret = 0; 953 int ret = 0;
951 954
952 if (btrfs_is_free_space_inode(BTRFS_I(inode))) { 955 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
@@ -991,14 +994,14 @@ static noinline int cow_file_range(struct inode *inode,
991 start + num_bytes - 1, 0); 994 start + num_bytes - 1, 0);
992 995
993 while (disk_num_bytes > 0) { 996 while (disk_num_bytes > 0) {
994 unsigned long op;
995
996 cur_alloc_size = disk_num_bytes; 997 cur_alloc_size = disk_num_bytes;
997 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, 998 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
998 fs_info->sectorsize, 0, alloc_hint, 999 fs_info->sectorsize, 0, alloc_hint,
999 &ins, 1, 1); 1000 &ins, 1, 1);
1000 if (ret < 0) 1001 if (ret < 0)
1001 goto out_unlock; 1002 goto out_unlock;
1003 cur_alloc_size = ins.offset;
1004 extent_reserved = true;
1002 1005
1003 ram_size = ins.offset; 1006 ram_size = ins.offset;
1004 em = create_io_em(inode, start, ins.offset, /* len */ 1007 em = create_io_em(inode, start, ins.offset, /* len */
@@ -1013,7 +1016,6 @@ static noinline int cow_file_range(struct inode *inode,
1013 goto out_reserve; 1016 goto out_reserve;
1014 free_extent_map(em); 1017 free_extent_map(em);
1015 1018
1016 cur_alloc_size = ins.offset;
1017 ret = btrfs_add_ordered_extent(inode, start, ins.objectid, 1019 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
1018 ram_size, cur_alloc_size, 0); 1020 ram_size, cur_alloc_size, 0);
1019 if (ret) 1021 if (ret)
@@ -1048,14 +1050,14 @@ static noinline int cow_file_range(struct inode *inode,
1048 * Do set the Private2 bit so we know this page was properly 1050 * Do set the Private2 bit so we know this page was properly
1049 * setup for writepage 1051 * setup for writepage
1050 */ 1052 */
1051 op = unlock ? PAGE_UNLOCK : 0; 1053 page_ops = unlock ? PAGE_UNLOCK : 0;
1052 op |= PAGE_SET_PRIVATE2; 1054 page_ops |= PAGE_SET_PRIVATE2;
1053 1055
1054 extent_clear_unlock_delalloc(inode, start, 1056 extent_clear_unlock_delalloc(inode, start,
1055 start + ram_size - 1, 1057 start + ram_size - 1,
1056 delalloc_end, locked_page, 1058 delalloc_end, locked_page,
1057 EXTENT_LOCKED | EXTENT_DELALLOC, 1059 EXTENT_LOCKED | EXTENT_DELALLOC,
1058 op); 1060 page_ops);
1059 if (disk_num_bytes < cur_alloc_size) 1061 if (disk_num_bytes < cur_alloc_size)
1060 disk_num_bytes = 0; 1062 disk_num_bytes = 0;
1061 else 1063 else
@@ -1063,6 +1065,7 @@ static noinline int cow_file_range(struct inode *inode,
1063 num_bytes -= cur_alloc_size; 1065 num_bytes -= cur_alloc_size;
1064 alloc_hint = ins.objectid + ins.offset; 1066 alloc_hint = ins.objectid + ins.offset;
1065 start += cur_alloc_size; 1067 start += cur_alloc_size;
1068 extent_reserved = false;
1066 1069
1067 /* 1070 /*
1068 * btrfs_reloc_clone_csums() error, since start is increased 1071 * btrfs_reloc_clone_csums() error, since start is increased
@@ -1081,12 +1084,35 @@ out_reserve:
1081 btrfs_dec_block_group_reservations(fs_info, ins.objectid); 1084 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1082 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); 1085 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
1083out_unlock: 1086out_unlock:
1087 clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DEFRAG |
1088 EXTENT_CLEAR_META_RESV;
1089 page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
1090 PAGE_END_WRITEBACK;
1091 /*
1092 * If we reserved an extent for our delalloc range (or a subrange) and
1093 * failed to create the respective ordered extent, then it means that
1094 * when we reserved the extent we decremented the extent's size from
1095 * the data space_info's bytes_may_use counter and incremented the
1096 * space_info's bytes_reserved counter by the same amount. We must make
1097 * sure extent_clear_unlock_delalloc() does not try to decrement again
1098 * the data space_info's bytes_may_use counter, therefore we do not pass
1099 * it the flag EXTENT_CLEAR_DATA_RESV.
1100 */
1101 if (extent_reserved) {
1102 extent_clear_unlock_delalloc(inode, start,
1103 start + cur_alloc_size,
1104 start + cur_alloc_size,
1105 locked_page,
1106 clear_bits,
1107 page_ops);
1108 start += cur_alloc_size;
1109 if (start >= end)
1110 goto out;
1111 }
1084 extent_clear_unlock_delalloc(inode, start, end, delalloc_end, 1112 extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
1085 locked_page, 1113 locked_page,
1086 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | 1114 clear_bits | EXTENT_CLEAR_DATA_RESV,
1087 EXTENT_DELALLOC | EXTENT_DEFRAG, 1115 page_ops);
1088 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
1089 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
1090 goto out; 1116 goto out;
1091} 1117}
1092 1118
@@ -1776,7 +1802,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
1776 1802
1777 if (*bits & EXTENT_FIRST_DELALLOC) { 1803 if (*bits & EXTENT_FIRST_DELALLOC) {
1778 *bits &= ~EXTENT_FIRST_DELALLOC; 1804 *bits &= ~EXTENT_FIRST_DELALLOC;
1779 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { 1805 } else if (!(*bits & EXTENT_CLEAR_META_RESV)) {
1780 spin_lock(&inode->lock); 1806 spin_lock(&inode->lock);
1781 inode->outstanding_extents -= num_extents; 1807 inode->outstanding_extents -= num_extents;
1782 spin_unlock(&inode->lock); 1808 spin_unlock(&inode->lock);
@@ -1787,7 +1813,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
1787 * don't need to call dellalloc_release_metadata if there is an 1813 * don't need to call dellalloc_release_metadata if there is an
1788 * error. 1814 * error.
1789 */ 1815 */
1790 if (*bits & EXTENT_DO_ACCOUNTING && 1816 if (*bits & EXTENT_CLEAR_META_RESV &&
1791 root != fs_info->tree_root) 1817 root != fs_info->tree_root)
1792 btrfs_delalloc_release_metadata(inode, len); 1818 btrfs_delalloc_release_metadata(inode, len);
1793 1819
@@ -1795,10 +1821,9 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
1795 if (btrfs_is_testing(fs_info)) 1821 if (btrfs_is_testing(fs_info))
1796 return; 1822 return;
1797 1823
1798 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1824 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
1799 && do_list && !(state->state & EXTENT_NORESERVE) 1825 do_list && !(state->state & EXTENT_NORESERVE) &&
1800 && (*bits & (EXTENT_DO_ACCOUNTING | 1826 (*bits & EXTENT_CLEAR_DATA_RESV))
1801 EXTENT_CLEAR_DATA_RESV)))
1802 btrfs_free_reserved_data_space_noquota( 1827 btrfs_free_reserved_data_space_noquota(
1803 &inode->vfs_inode, 1828 &inode->vfs_inode,
1804 state->start, len); 1829 state->start, len);