Merge tag 'for-5.1-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba: - fsync fixes: i_size for truncate vs fsync, dio vs buffered during snapshotting, remove complicated but incomplete assertion - removed excessive warnigs, misreported device stats updates - fix raid56 page mapping for 32bit arch - fixes reported by static analyzer * tag 'for-5.1-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: Btrfs: fix assertion failure on fsync with NO_HOLES enabled btrfs: Avoid possible qgroup_rsv_size overflow in btrfs_calculate_inode_block_rsv_size btrfs: Fix bound checking in qgroup_trace_new_subtree_blocks btrfs: raid56: properly unmap parity page in finish_parity_scrub() btrfs: don't report readahead errors and don't update statistics Btrfs: fix file corruption after snapshotting due to mix of buffered/DIO writes btrfs: remove WARN_ON in log_dir_items Btrfs: fix incorrect file size after shrinking truncate and fsync
author: Linus Torvalds <torvalds@linux-foundation.org> 2019-03-26 13:32:13 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2019-03-26 13:32:13 -0400
commit: 65ae689329c5d6a149b9201df9321368fbdb6a5c (patch)
tree: d5eec2c81b1d50ff7af1319e190617962a2386d1
parent: 26a3b01be87f7890125933eea03eb997d335a921 (diff)
parent: 0ccc3876e4b2a1559a4dbe3126dda4459d38a83b (diff)
6 files changed, 72 insertions, 21 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1d49694e6ae3..c5880329ae37 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6174,7 +6174,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
         *
         * This is overestimating in most cases.
         */
-        qgroup_rsv_size = outstanding_extents * fs_info->nodesize;
+        qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize;
        spin_lock(&block_rsv->lock);
        block_rsv->size = reserve_size;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index eb680b715dd6..e659d9d61107 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1922,8 +1922,8 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
        int i;
        /* Level sanity check */
-        if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL ||
+        if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 ||
-            root_level < 0 || root_level >= BTRFS_MAX_LEVEL ||
+            root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 ||
            root_level < cur_level) {
                btrfs_err_rl(fs_info,
                        "%s: bad levels, cur_level=%d root_level=%d",
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 1869ba8e5981..67a6f7d47402 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -2430,8 +2430,9 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
                        bitmap_clear(rbio->dbitmap, pagenr, 1);
                kunmap(p);
-                for (stripe = 0; stripe < rbio->real_stripes; stripe++)
+                for (stripe = 0; stripe < nr_data; stripe++)
                        kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
+                kunmap(p_page);
        }
        __free_page(p_page);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index acdad6d658f5..e4e665f422fc 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1886,8 +1886,10 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
       }
 }
-static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
+static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
 {
+        struct btrfs_fs_info *fs_info = trans->fs_info;
        /*
         * We use writeback_inodes_sb here because if we used
         * btrfs_start_delalloc_roots we would deadlock with fs freeze.
@@ -1897,15 +1899,50 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
         * from already being in a transaction and our join_transaction doesn't
         * have to re-take the fs freeze lock.
         */
-        if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
+        if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
                writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
+        } else {
+                struct btrfs_pending_snapshot *pending;
+                struct list_head *head = &trans->transaction->pending_snapshots;
+                /*
+                 * Flush dellaloc for any root that is going to be snapshotted.
+                 * This is done to avoid a corrupted version of files, in the
+                 * snapshots, that had both buffered and direct IO writes (even
+                 * if they were done sequentially) due to an unordered update of
+                 * the inode's size on disk.
+                 */
+                list_for_each_entry(pending, head, list) {
+                        int ret;
+                        ret = btrfs_start_delalloc_snapshot(pending->root);
+                        if (ret)
+                                return ret;
+                }
+        }
        return 0;
 }
-static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
+static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans)
 {
-        if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
+        struct btrfs_fs_info *fs_info = trans->fs_info;
+        if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
                btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
+        } else {
+                struct btrfs_pending_snapshot *pending;
+                struct list_head *head = &trans->transaction->pending_snapshots;
+                /*
+                 * Wait for any dellaloc that we started previously for the roots
+                 * that are going to be snapshotted. This is to avoid a corrupted
+                 * version of files in the snapshots that had both buffered and
+                 * direct IO writes (even if they were done sequentially).
+                 */
+                list_for_each_entry(pending, head, list)
+                        btrfs_wait_ordered_extents(pending->root,
+                                                   U64_MAX, 0, U64_MAX);
+        }
 }
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
@@ -2023,7 +2060,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
        extwriter_counter_dec(cur_trans, trans->type);
-        ret = btrfs_start_delalloc_flush(fs_info);
+        ret = btrfs_start_delalloc_flush(trans);
        if (ret)
                goto cleanup_transaction;
@@ -2039,7 +2076,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
        if (ret)
                goto cleanup_transaction;
-        btrfs_wait_delalloc_flush(fs_info);
+        btrfs_wait_delalloc_flush(trans);
        btrfs_scrub_pause(fs_info);
        /*
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f06454a55e00..561884f60d35 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3578,9 +3578,16 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
        }
        btrfs_release_path(path);
-        /* find the first key from this transaction again */
+        /*
+         * Find the first key from this transaction again.  See the note for
+         * log_new_dir_dentries, if we're logging a directory recursively we
+         * won't be holding its i_mutex, which means we can modify the directory
+         * while we're logging it.  If we remove an entry between our first
+         * search and this search we'll not find the key again and can just
+         * bail.
+         */
        ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
-        if (WARN_ON(ret != 0))
+        if (ret != 0)
                goto done;
        /*
@@ -4544,6 +4551,19 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
                item = btrfs_item_ptr(path->nodes[0], path->slots[0],
                                      struct btrfs_inode_item);
                *size_ret = btrfs_inode_size(path->nodes[0], item);
+                /*
+                 * If the in-memory inode's i_size is smaller then the inode
+                 * size stored in the btree, return the inode's i_size, so
+                 * that we get a correct inode size after replaying the log
+                 * when before a power failure we had a shrinking truncate
+                 * followed by addition of a new name (rename / new hard link).
+                 * Otherwise return the inode size from the btree, to avoid
+                 * data loss when replaying a log due to previously doing a
+                 * write that expands the inode's size and logging a new name
+                 * immediately after.
+                 */
+                if (*size_ret > inode->vfs_inode.i_size)
+                        *size_ret = inode->vfs_inode.i_size;
        }
        btrfs_release_path(path);
@@ -4705,15 +4725,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
                                        struct btrfs_file_extent_item);
                if (btrfs_file_extent_type(leaf, extent) ==
-                    BTRFS_FILE_EXTENT_INLINE) {
+                    BTRFS_FILE_EXTENT_INLINE)
-                        len = btrfs_file_extent_ram_bytes(leaf, extent);
-                        ASSERT(len == i_size ||
-                               (len == fs_info->sectorsize &&
-                                btrfs_file_extent_compression(leaf, extent) !=
-                                BTRFS_COMPRESS_NONE) ||
-                               (len < i_size && i_size < fs_info->sectorsize));
                        return 0;
-                }
                len = btrfs_file_extent_num_bytes(leaf, extent);
                /* Last extent goes beyond i_size, no need to log a hole. */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9024eee889b9..db934ceae9c1 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6407,7 +6407,7 @@ static void btrfs_end_bio(struct bio *bio)
                                if (bio_op(bio) == REQ_OP_WRITE)
                                        btrfs_dev_stat_inc_and_print(dev,
                                                BTRFS_DEV_STAT_WRITE_ERRS);
-                                else
+                                else if (!(bio->bi_opf & REQ_RAHEAD))
                                        btrfs_dev_stat_inc_and_print(dev,
                                                BTRFS_DEV_STAT_READ_ERRS);
                                if (bio->bi_opf & REQ_PREFLUSH)
author	Linus Torvalds <torvalds@linux-foundation.org>	2019-03-26 13:32:13 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2019-03-26 13:32:13 -0400
commit	65ae689329c5d6a149b9201df9321368fbdb6a5c (patch)
tree	d5eec2c81b1d50ff7af1319e190617962a2386d1
parent	26a3b01be87f7890125933eea03eb997d335a921 (diff)
parent	0ccc3876e4b2a1559a4dbe3126dda4459d38a83b (diff)