diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-26 13:32:13 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-26 13:32:13 -0400 |
commit | 65ae689329c5d6a149b9201df9321368fbdb6a5c (patch) | |
tree | d5eec2c81b1d50ff7af1319e190617962a2386d1 | |
parent | 26a3b01be87f7890125933eea03eb997d335a921 (diff) | |
parent | 0ccc3876e4b2a1559a4dbe3126dda4459d38a83b (diff) |
Merge tag 'for-5.1-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:
- fsync fixes: i_size for truncate vs fsync, dio vs buffered during
snapshotting, remove complicated but incomplete assertion
- removed excessive warnigs, misreported device stats updates
- fix raid56 page mapping for 32bit arch
- fixes reported by static analyzer
* tag 'for-5.1-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
Btrfs: fix assertion failure on fsync with NO_HOLES enabled
btrfs: Avoid possible qgroup_rsv_size overflow in btrfs_calculate_inode_block_rsv_size
btrfs: Fix bound checking in qgroup_trace_new_subtree_blocks
btrfs: raid56: properly unmap parity page in finish_parity_scrub()
btrfs: don't report readahead errors and don't update statistics
Btrfs: fix file corruption after snapshotting due to mix of buffered/DIO writes
btrfs: remove WARN_ON in log_dir_items
Btrfs: fix incorrect file size after shrinking truncate and fsync
-rw-r--r-- | fs/btrfs/extent-tree.c | 2 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 4 | ||||
-rw-r--r-- | fs/btrfs/raid56.c | 3 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 49 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 33 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 2 |
6 files changed, 72 insertions, 21 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1d49694e6ae3..c5880329ae37 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -6174,7 +6174,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, | |||
6174 | * | 6174 | * |
6175 | * This is overestimating in most cases. | 6175 | * This is overestimating in most cases. |
6176 | */ | 6176 | */ |
6177 | qgroup_rsv_size = outstanding_extents * fs_info->nodesize; | 6177 | qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize; |
6178 | 6178 | ||
6179 | spin_lock(&block_rsv->lock); | 6179 | spin_lock(&block_rsv->lock); |
6180 | block_rsv->size = reserve_size; | 6180 | block_rsv->size = reserve_size; |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index eb680b715dd6..e659d9d61107 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -1922,8 +1922,8 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans, | |||
1922 | int i; | 1922 | int i; |
1923 | 1923 | ||
1924 | /* Level sanity check */ | 1924 | /* Level sanity check */ |
1925 | if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL || | 1925 | if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 || |
1926 | root_level < 0 || root_level >= BTRFS_MAX_LEVEL || | 1926 | root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 || |
1927 | root_level < cur_level) { | 1927 | root_level < cur_level) { |
1928 | btrfs_err_rl(fs_info, | 1928 | btrfs_err_rl(fs_info, |
1929 | "%s: bad levels, cur_level=%d root_level=%d", | 1929 | "%s: bad levels, cur_level=%d root_level=%d", |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 1869ba8e5981..67a6f7d47402 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -2430,8 +2430,9 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, | |||
2430 | bitmap_clear(rbio->dbitmap, pagenr, 1); | 2430 | bitmap_clear(rbio->dbitmap, pagenr, 1); |
2431 | kunmap(p); | 2431 | kunmap(p); |
2432 | 2432 | ||
2433 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) | 2433 | for (stripe = 0; stripe < nr_data; stripe++) |
2434 | kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); | 2434 | kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); |
2435 | kunmap(p_page); | ||
2435 | } | 2436 | } |
2436 | 2437 | ||
2437 | __free_page(p_page); | 2438 | __free_page(p_page); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index acdad6d658f5..e4e665f422fc 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -1886,8 +1886,10 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) | |||
1886 | } | 1886 | } |
1887 | } | 1887 | } |
1888 | 1888 | ||
1889 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) | 1889 | static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans) |
1890 | { | 1890 | { |
1891 | struct btrfs_fs_info *fs_info = trans->fs_info; | ||
1892 | |||
1891 | /* | 1893 | /* |
1892 | * We use writeback_inodes_sb here because if we used | 1894 | * We use writeback_inodes_sb here because if we used |
1893 | * btrfs_start_delalloc_roots we would deadlock with fs freeze. | 1895 | * btrfs_start_delalloc_roots we would deadlock with fs freeze. |
@@ -1897,15 +1899,50 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) | |||
1897 | * from already being in a transaction and our join_transaction doesn't | 1899 | * from already being in a transaction and our join_transaction doesn't |
1898 | * have to re-take the fs freeze lock. | 1900 | * have to re-take the fs freeze lock. |
1899 | */ | 1901 | */ |
1900 | if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) | 1902 | if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) { |
1901 | writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC); | 1903 | writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC); |
1904 | } else { | ||
1905 | struct btrfs_pending_snapshot *pending; | ||
1906 | struct list_head *head = &trans->transaction->pending_snapshots; | ||
1907 | |||
1908 | /* | ||
1909 | * Flush dellaloc for any root that is going to be snapshotted. | ||
1910 | * This is done to avoid a corrupted version of files, in the | ||
1911 | * snapshots, that had both buffered and direct IO writes (even | ||
1912 | * if they were done sequentially) due to an unordered update of | ||
1913 | * the inode's size on disk. | ||
1914 | */ | ||
1915 | list_for_each_entry(pending, head, list) { | ||
1916 | int ret; | ||
1917 | |||
1918 | ret = btrfs_start_delalloc_snapshot(pending->root); | ||
1919 | if (ret) | ||
1920 | return ret; | ||
1921 | } | ||
1922 | } | ||
1902 | return 0; | 1923 | return 0; |
1903 | } | 1924 | } |
1904 | 1925 | ||
1905 | static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) | 1926 | static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans) |
1906 | { | 1927 | { |
1907 | if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) | 1928 | struct btrfs_fs_info *fs_info = trans->fs_info; |
1929 | |||
1930 | if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) { | ||
1908 | btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); | 1931 | btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); |
1932 | } else { | ||
1933 | struct btrfs_pending_snapshot *pending; | ||
1934 | struct list_head *head = &trans->transaction->pending_snapshots; | ||
1935 | |||
1936 | /* | ||
1937 | * Wait for any dellaloc that we started previously for the roots | ||
1938 | * that are going to be snapshotted. This is to avoid a corrupted | ||
1939 | * version of files in the snapshots that had both buffered and | ||
1940 | * direct IO writes (even if they were done sequentially). | ||
1941 | */ | ||
1942 | list_for_each_entry(pending, head, list) | ||
1943 | btrfs_wait_ordered_extents(pending->root, | ||
1944 | U64_MAX, 0, U64_MAX); | ||
1945 | } | ||
1909 | } | 1946 | } |
1910 | 1947 | ||
1911 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans) | 1948 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans) |
@@ -2023,7 +2060,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) | |||
2023 | 2060 | ||
2024 | extwriter_counter_dec(cur_trans, trans->type); | 2061 | extwriter_counter_dec(cur_trans, trans->type); |
2025 | 2062 | ||
2026 | ret = btrfs_start_delalloc_flush(fs_info); | 2063 | ret = btrfs_start_delalloc_flush(trans); |
2027 | if (ret) | 2064 | if (ret) |
2028 | goto cleanup_transaction; | 2065 | goto cleanup_transaction; |
2029 | 2066 | ||
@@ -2039,7 +2076,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) | |||
2039 | if (ret) | 2076 | if (ret) |
2040 | goto cleanup_transaction; | 2077 | goto cleanup_transaction; |
2041 | 2078 | ||
2042 | btrfs_wait_delalloc_flush(fs_info); | 2079 | btrfs_wait_delalloc_flush(trans); |
2043 | 2080 | ||
2044 | btrfs_scrub_pause(fs_info); | 2081 | btrfs_scrub_pause(fs_info); |
2045 | /* | 2082 | /* |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f06454a55e00..561884f60d35 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -3578,9 +3578,16 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
3578 | } | 3578 | } |
3579 | btrfs_release_path(path); | 3579 | btrfs_release_path(path); |
3580 | 3580 | ||
3581 | /* find the first key from this transaction again */ | 3581 | /* |
3582 | * Find the first key from this transaction again. See the note for | ||
3583 | * log_new_dir_dentries, if we're logging a directory recursively we | ||
3584 | * won't be holding its i_mutex, which means we can modify the directory | ||
3585 | * while we're logging it. If we remove an entry between our first | ||
3586 | * search and this search we'll not find the key again and can just | ||
3587 | * bail. | ||
3588 | */ | ||
3582 | ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); | 3589 | ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); |
3583 | if (WARN_ON(ret != 0)) | 3590 | if (ret != 0) |
3584 | goto done; | 3591 | goto done; |
3585 | 3592 | ||
3586 | /* | 3593 | /* |
@@ -4544,6 +4551,19 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode, | |||
4544 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 4551 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
4545 | struct btrfs_inode_item); | 4552 | struct btrfs_inode_item); |
4546 | *size_ret = btrfs_inode_size(path->nodes[0], item); | 4553 | *size_ret = btrfs_inode_size(path->nodes[0], item); |
4554 | /* | ||
4555 | * If the in-memory inode's i_size is smaller then the inode | ||
4556 | * size stored in the btree, return the inode's i_size, so | ||
4557 | * that we get a correct inode size after replaying the log | ||
4558 | * when before a power failure we had a shrinking truncate | ||
4559 | * followed by addition of a new name (rename / new hard link). | ||
4560 | * Otherwise return the inode size from the btree, to avoid | ||
4561 | * data loss when replaying a log due to previously doing a | ||
4562 | * write that expands the inode's size and logging a new name | ||
4563 | * immediately after. | ||
4564 | */ | ||
4565 | if (*size_ret > inode->vfs_inode.i_size) | ||
4566 | *size_ret = inode->vfs_inode.i_size; | ||
4547 | } | 4567 | } |
4548 | 4568 | ||
4549 | btrfs_release_path(path); | 4569 | btrfs_release_path(path); |
@@ -4705,15 +4725,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, | |||
4705 | struct btrfs_file_extent_item); | 4725 | struct btrfs_file_extent_item); |
4706 | 4726 | ||
4707 | if (btrfs_file_extent_type(leaf, extent) == | 4727 | if (btrfs_file_extent_type(leaf, extent) == |
4708 | BTRFS_FILE_EXTENT_INLINE) { | 4728 | BTRFS_FILE_EXTENT_INLINE) |
4709 | len = btrfs_file_extent_ram_bytes(leaf, extent); | ||
4710 | ASSERT(len == i_size || | ||
4711 | (len == fs_info->sectorsize && | ||
4712 | btrfs_file_extent_compression(leaf, extent) != | ||
4713 | BTRFS_COMPRESS_NONE) || | ||
4714 | (len < i_size && i_size < fs_info->sectorsize)); | ||
4715 | return 0; | 4729 | return 0; |
4716 | } | ||
4717 | 4730 | ||
4718 | len = btrfs_file_extent_num_bytes(leaf, extent); | 4731 | len = btrfs_file_extent_num_bytes(leaf, extent); |
4719 | /* Last extent goes beyond i_size, no need to log a hole. */ | 4732 | /* Last extent goes beyond i_size, no need to log a hole. */ |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9024eee889b9..db934ceae9c1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -6407,7 +6407,7 @@ static void btrfs_end_bio(struct bio *bio) | |||
6407 | if (bio_op(bio) == REQ_OP_WRITE) | 6407 | if (bio_op(bio) == REQ_OP_WRITE) |
6408 | btrfs_dev_stat_inc_and_print(dev, | 6408 | btrfs_dev_stat_inc_and_print(dev, |
6409 | BTRFS_DEV_STAT_WRITE_ERRS); | 6409 | BTRFS_DEV_STAT_WRITE_ERRS); |
6410 | else | 6410 | else if (!(bio->bi_opf & REQ_RAHEAD)) |
6411 | btrfs_dev_stat_inc_and_print(dev, | 6411 | btrfs_dev_stat_inc_and_print(dev, |
6412 | BTRFS_DEV_STAT_READ_ERRS); | 6412 | BTRFS_DEV_STAT_READ_ERRS); |
6413 | if (bio->bi_opf & REQ_PREFLUSH) | 6413 | if (bio->bi_opf & REQ_PREFLUSH) |