diff options
author | Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com> | 2016-07-25 03:51:40 -0400 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2016-08-25 06:58:26 -0400 |
commit | 18513091af9483ba84328d42092bd4d42a3c958f (patch) | |
tree | e5a8dfa241105b399c110cc4f79400b332136272 /fs/btrfs/inode.c | |
parent | 4824f1f412f75e9f84b9cecbde828e8f4699f82d (diff) |
btrfs: update btrfs_space_info's bytes_may_use timely
This patch can fix some false ENOSPC errors, below test script can
reproduce one false ENOSPC error:
#!/bin/bash
dd if=/dev/zero of=fs.img bs=$((1024*1024)) count=128
dev=$(losetup --show -f fs.img)
mkfs.btrfs -f -M $dev
mkdir /tmp/mntpoint
mount $dev /tmp/mntpoint
cd /tmp/mntpoint
xfs_io -f -c "falloc 0 $((64*1024*1024))" testfile
Above script will fail for ENOSPC reason, but indeed fs still has free
space to satisfy this request. Please see call graph:
btrfs_fallocate()
|-> btrfs_alloc_data_chunk_ondemand()
| bytes_may_use += 64M
|-> btrfs_prealloc_file_range()
|-> btrfs_reserve_extent()
|-> btrfs_add_reserved_bytes()
| alloc_type is RESERVE_ALLOC_NO_ACCOUNT, so it does not
| change bytes_may_use, and bytes_reserved += 64M. Now
| bytes_may_use + bytes_reserved == 128M, which is greater
| than btrfs_space_info's total_bytes, false enospc occurs.
| Note, the bytes_may_use decrease operation will be done in
| end of btrfs_fallocate(), which is too late.
Here is another simple case for buffered write:
CPU 1 | CPU 2
|
|-> cow_file_range() |-> __btrfs_buffered_write()
|-> btrfs_reserve_extent() | |
| | |
| | |
| ..... | |-> btrfs_check_data_free_space()
| |
| |
|-> extent_clear_unlock_delalloc() |
In CPU 1, btrfs_reserve_extent()->find_free_extent()->
btrfs_add_reserved_bytes() do not decrease bytes_may_use, the decrease
operation will be delayed to be done in extent_clear_unlock_delalloc().
Assume in this case, btrfs_reserve_extent() reserved 128MB data, CPU2's
btrfs_check_data_free_space() tries to reserve 100MB data space.
If
100MB > data_sinfo->total_bytes - data_sinfo->bytes_used -
data_sinfo->bytes_reserved - data_sinfo->bytes_pinned -
data_sinfo->bytes_readonly - data_sinfo->bytes_may_use
btrfs_check_data_free_space() will try to allcate new data chunk or call
btrfs_start_delalloc_roots(), or commit current transaction in order to
reserve some free space, obviously a lot of work. But indeed it's not
necessary as long as decreasing bytes_may_use timely, we still have
free space, decreasing 128M from bytes_may_use.
To fix this issue, this patch chooses to update bytes_may_use for both
data and metadata in btrfs_add_reserved_bytes(). For compress path, real
extent length may not be equal to file content length, so introduce a
ram_bytes argument for btrfs_reserve_extent(), find_free_extent() and
btrfs_add_reserved_bytes(), it's becasue bytes_may_use is increased by
file content length. Then compress path can update bytes_may_use
correctly. Also now we can discard RESERVE_ALLOC_NO_ACCOUNT, RESERVE_ALLOC
and RESERVE_FREE.
As we know, usually EXTENT_DO_ACCOUNTING is used for error path. In
run_delalloc_nocow(), for inode marked as NODATACOW or extent marked as
PREALLOC, we also need to update bytes_may_use, but can not pass
EXTENT_DO_ACCOUNTING, because it also clears metadata reservation, so
here we introduce EXTENT_CLEAR_DATA_RESV flag to indicate btrfs_clear_bit_hook()
to update btrfs_space_info's bytes_may_use.
Meanwhile __btrfs_prealloc_file_range() will call
btrfs_free_reserved_data_space() internally for both sucessful and failed
path, btrfs_prealloc_file_range()'s callers does not need to call
btrfs_free_reserved_data_space() any more.
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 37 |
1 files changed, 27 insertions, 10 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 29636624a427..bcea20015d9a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -566,6 +566,8 @@ cont: | |||
566 | PAGE_SET_WRITEBACK | | 566 | PAGE_SET_WRITEBACK | |
567 | page_error_op | | 567 | page_error_op | |
568 | PAGE_END_WRITEBACK); | 568 | PAGE_END_WRITEBACK); |
569 | btrfs_free_reserved_data_space_noquota(inode, start, | ||
570 | end - start + 1); | ||
569 | goto free_pages_out; | 571 | goto free_pages_out; |
570 | } | 572 | } |
571 | } | 573 | } |
@@ -742,7 +744,7 @@ retry: | |||
742 | lock_extent(io_tree, async_extent->start, | 744 | lock_extent(io_tree, async_extent->start, |
743 | async_extent->start + async_extent->ram_size - 1); | 745 | async_extent->start + async_extent->ram_size - 1); |
744 | 746 | ||
745 | ret = btrfs_reserve_extent(root, | 747 | ret = btrfs_reserve_extent(root, async_extent->ram_size, |
746 | async_extent->compressed_size, | 748 | async_extent->compressed_size, |
747 | async_extent->compressed_size, | 749 | async_extent->compressed_size, |
748 | 0, alloc_hint, &ins, 1, 1); | 750 | 0, alloc_hint, &ins, 1, 1); |
@@ -969,7 +971,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
969 | EXTENT_DEFRAG, PAGE_UNLOCK | | 971 | EXTENT_DEFRAG, PAGE_UNLOCK | |
970 | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | | 972 | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | |
971 | PAGE_END_WRITEBACK); | 973 | PAGE_END_WRITEBACK); |
972 | 974 | btrfs_free_reserved_data_space_noquota(inode, start, | |
975 | end - start + 1); | ||
973 | *nr_written = *nr_written + | 976 | *nr_written = *nr_written + |
974 | (end - start + PAGE_SIZE) / PAGE_SIZE; | 977 | (end - start + PAGE_SIZE) / PAGE_SIZE; |
975 | *page_started = 1; | 978 | *page_started = 1; |
@@ -989,7 +992,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
989 | unsigned long op; | 992 | unsigned long op; |
990 | 993 | ||
991 | cur_alloc_size = disk_num_bytes; | 994 | cur_alloc_size = disk_num_bytes; |
992 | ret = btrfs_reserve_extent(root, cur_alloc_size, | 995 | ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, |
993 | root->sectorsize, 0, alloc_hint, | 996 | root->sectorsize, 0, alloc_hint, |
994 | &ins, 1, 1); | 997 | &ins, 1, 1); |
995 | if (ret < 0) | 998 | if (ret < 0) |
@@ -1489,8 +1492,10 @@ out_check: | |||
1489 | extent_clear_unlock_delalloc(inode, cur_offset, | 1492 | extent_clear_unlock_delalloc(inode, cur_offset, |
1490 | cur_offset + num_bytes - 1, | 1493 | cur_offset + num_bytes - 1, |
1491 | locked_page, EXTENT_LOCKED | | 1494 | locked_page, EXTENT_LOCKED | |
1492 | EXTENT_DELALLOC, PAGE_UNLOCK | | 1495 | EXTENT_DELALLOC | |
1493 | PAGE_SET_PRIVATE2); | 1496 | EXTENT_CLEAR_DATA_RESV, |
1497 | PAGE_UNLOCK | PAGE_SET_PRIVATE2); | ||
1498 | |||
1494 | if (!nolock && nocow) | 1499 | if (!nolock && nocow) |
1495 | btrfs_end_write_no_snapshoting(root); | 1500 | btrfs_end_write_no_snapshoting(root); |
1496 | cur_offset = extent_end; | 1501 | cur_offset = extent_end; |
@@ -1807,7 +1812,9 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
1807 | return; | 1812 | return; |
1808 | 1813 | ||
1809 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID | 1814 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID |
1810 | && do_list && !(state->state & EXTENT_NORESERVE)) | 1815 | && do_list && !(state->state & EXTENT_NORESERVE) |
1816 | && (*bits & (EXTENT_DO_ACCOUNTING | | ||
1817 | EXTENT_CLEAR_DATA_RESV))) | ||
1811 | btrfs_free_reserved_data_space_noquota(inode, | 1818 | btrfs_free_reserved_data_space_noquota(inode, |
1812 | state->start, len); | 1819 | state->start, len); |
1813 | 1820 | ||
@@ -7252,7 +7259,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
7252 | int ret; | 7259 | int ret; |
7253 | 7260 | ||
7254 | alloc_hint = get_extent_allocation_hint(inode, start, len); | 7261 | alloc_hint = get_extent_allocation_hint(inode, start, len); |
7255 | ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, | 7262 | ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0, |
7256 | alloc_hint, &ins, 1, 1); | 7263 | alloc_hint, &ins, 1, 1); |
7257 | if (ret) | 7264 | if (ret) |
7258 | return ERR_PTR(ret); | 7265 | return ERR_PTR(ret); |
@@ -7752,6 +7759,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
7752 | ret = PTR_ERR(em2); | 7759 | ret = PTR_ERR(em2); |
7753 | goto unlock_err; | 7760 | goto unlock_err; |
7754 | } | 7761 | } |
7762 | /* | ||
7763 | * For inode marked NODATACOW or extent marked PREALLOC, | ||
7764 | * use the existing or preallocated extent, so does not | ||
7765 | * need to adjust btrfs_space_info's bytes_may_use. | ||
7766 | */ | ||
7767 | btrfs_free_reserved_data_space_noquota(inode, | ||
7768 | start, len); | ||
7755 | goto unlock; | 7769 | goto unlock; |
7756 | } | 7770 | } |
7757 | } | 7771 | } |
@@ -7786,7 +7800,6 @@ unlock: | |||
7786 | i_size_write(inode, start + len); | 7800 | i_size_write(inode, start + len); |
7787 | 7801 | ||
7788 | adjust_dio_outstanding_extents(inode, dio_data, len); | 7802 | adjust_dio_outstanding_extents(inode, dio_data, len); |
7789 | btrfs_free_reserved_data_space(inode, start, len); | ||
7790 | WARN_ON(dio_data->reserve < len); | 7803 | WARN_ON(dio_data->reserve < len); |
7791 | dio_data->reserve -= len; | 7804 | dio_data->reserve -= len; |
7792 | dio_data->unsubmitted_oe_range_end = start + len; | 7805 | dio_data->unsubmitted_oe_range_end = start + len; |
@@ -10306,6 +10319,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
10306 | u64 last_alloc = (u64)-1; | 10319 | u64 last_alloc = (u64)-1; |
10307 | int ret = 0; | 10320 | int ret = 0; |
10308 | bool own_trans = true; | 10321 | bool own_trans = true; |
10322 | u64 end = start + num_bytes - 1; | ||
10309 | 10323 | ||
10310 | if (trans) | 10324 | if (trans) |
10311 | own_trans = false; | 10325 | own_trans = false; |
@@ -10327,8 +10341,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
10327 | * sized chunks. | 10341 | * sized chunks. |
10328 | */ | 10342 | */ |
10329 | cur_bytes = min(cur_bytes, last_alloc); | 10343 | cur_bytes = min(cur_bytes, last_alloc); |
10330 | ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, | 10344 | ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes, |
10331 | *alloc_hint, &ins, 1, 0); | 10345 | min_size, 0, *alloc_hint, &ins, 1, 0); |
10332 | if (ret) { | 10346 | if (ret) { |
10333 | if (own_trans) | 10347 | if (own_trans) |
10334 | btrfs_end_transaction(trans, root); | 10348 | btrfs_end_transaction(trans, root); |
@@ -10414,6 +10428,9 @@ next: | |||
10414 | if (own_trans) | 10428 | if (own_trans) |
10415 | btrfs_end_transaction(trans, root); | 10429 | btrfs_end_transaction(trans, root); |
10416 | } | 10430 | } |
10431 | if (cur_offset < end) | ||
10432 | btrfs_free_reserved_data_space(inode, cur_offset, | ||
10433 | end - cur_offset + 1); | ||
10417 | return ret; | 10434 | return ret; |
10418 | } | 10435 | } |
10419 | 10436 | ||