aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-01-13 12:55:51 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-01-13 12:55:51 -0500
commit6b529fb0a3eabf9c4cc3e94c11477250379ce6d8 (patch)
tree132133ceb3eef791ea98d288041616d5eb9fdcf8
parent72d657dd2115804b93bde4b77e426cc2de70eebf (diff)
parent1b3922a8bc74231f9a767d1be6d9a061a4d4eeab (diff)
Merge tag 'for-5.0-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - two regression fixes in clone/dedupe ioctls, the generic check callback needs to lock extents properly and wait for io to avoid problems with writeback and relocation - fix deadlock when using free space tree due to block group creation - a recently added check refuses a valid fileystem with seeding device, make that work again with a quickfix, proper solution needs more intrusive changes * tag 'for-5.0-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: Use real device structure to verify dev extent Btrfs: fix deadlock when using free space tree due to block group creation Btrfs: fix race between reflink/dedupe and relocation Btrfs: fix race between cloning range ending at eof and writeback
-rw-r--r--fs/btrfs/ctree.c16
-rw-r--r--fs/btrfs/ioctl.c49
-rw-r--r--fs/btrfs/volumes.c12
3 files changed, 64 insertions, 13 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d92462fe66c8..f64aad613727 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1016,19 +1016,21 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1016 parent_start = parent->start; 1016 parent_start = parent->start;
1017 1017
1018 /* 1018 /*
1019 * If we are COWing a node/leaf from the extent, chunk or device trees, 1019 * If we are COWing a node/leaf from the extent, chunk, device or free
1020 * make sure that we do not finish block group creation of pending block 1020 * space trees, make sure that we do not finish block group creation of
1021 * groups. We do this to avoid a deadlock. 1021 * pending block groups. We do this to avoid a deadlock.
1022 * COWing can result in allocation of a new chunk, and flushing pending 1022 * COWing can result in allocation of a new chunk, and flushing pending
1023 * block groups (btrfs_create_pending_block_groups()) can be triggered 1023 * block groups (btrfs_create_pending_block_groups()) can be triggered
1024 * when finishing allocation of a new chunk. Creation of a pending block 1024 * when finishing allocation of a new chunk. Creation of a pending block
1025 * group modifies the extent, chunk and device trees, therefore we could 1025 * group modifies the extent, chunk, device and free space trees,
1026 * deadlock with ourselves since we are holding a lock on an extent 1026 * therefore we could deadlock with ourselves since we are holding a
1027 * buffer that btrfs_create_pending_block_groups() may try to COW later. 1027 * lock on an extent buffer that btrfs_create_pending_block_groups() may
1028 * try to COW later.
1028 */ 1029 */
1029 if (root == fs_info->extent_root || 1030 if (root == fs_info->extent_root ||
1030 root == fs_info->chunk_root || 1031 root == fs_info->chunk_root ||
1031 root == fs_info->dev_root) 1032 root == fs_info->dev_root ||
1033 root == fs_info->free_space_root)
1032 trans->can_flush_pending_bgs = false; 1034 trans->can_flush_pending_bgs = false;
1033 1035
1034 cow = btrfs_alloc_tree_block(trans, root, parent_start, 1036 cow = btrfs_alloc_tree_block(trans, root, parent_start,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fab9443f6a42..9c8e1734429c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3221,6 +3221,26 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
3221 inode_lock_nested(inode2, I_MUTEX_CHILD); 3221 inode_lock_nested(inode2, I_MUTEX_CHILD);
3222} 3222}
3223 3223
3224static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
3225 struct inode *inode2, u64 loff2, u64 len)
3226{
3227 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
3228 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
3229}
3230
3231static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
3232 struct inode *inode2, u64 loff2, u64 len)
3233{
3234 if (inode1 < inode2) {
3235 swap(inode1, inode2);
3236 swap(loff1, loff2);
3237 } else if (inode1 == inode2 && loff2 < loff1) {
3238 swap(loff1, loff2);
3239 }
3240 lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
3241 lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
3242}
3243
3224static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, 3244static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
3225 struct inode *dst, u64 dst_loff) 3245 struct inode *dst, u64 dst_loff)
3226{ 3246{
@@ -3242,11 +3262,12 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
3242 return -EINVAL; 3262 return -EINVAL;
3243 3263
3244 /* 3264 /*
3245 * Lock destination range to serialize with concurrent readpages(). 3265 * Lock destination range to serialize with concurrent readpages() and
3266 * source range to serialize with relocation.
3246 */ 3267 */
3247 lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); 3268 btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
3248 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); 3269 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
3249 unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); 3270 btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
3250 3271
3251 return ret; 3272 return ret;
3252} 3273}
@@ -3905,17 +3926,33 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
3905 len = ALIGN(src->i_size, bs) - off; 3926 len = ALIGN(src->i_size, bs) - off;
3906 3927
3907 if (destoff > inode->i_size) { 3928 if (destoff > inode->i_size) {
3929 const u64 wb_start = ALIGN_DOWN(inode->i_size, bs);
3930
3908 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 3931 ret = btrfs_cont_expand(inode, inode->i_size, destoff);
3909 if (ret) 3932 if (ret)
3910 return ret; 3933 return ret;
3934 /*
3935 * We may have truncated the last block if the inode's size is
3936 * not sector size aligned, so we need to wait for writeback to
3937 * complete before proceeding further, otherwise we can race
3938 * with cloning and attempt to increment a reference to an
3939 * extent that no longer exists (writeback completed right after
3940 * we found the previous extent covering eof and before we
3941 * attempted to increment its reference count).
3942 */
3943 ret = btrfs_wait_ordered_range(inode, wb_start,
3944 destoff - wb_start);
3945 if (ret)
3946 return ret;
3911 } 3947 }
3912 3948
3913 /* 3949 /*
3914 * Lock destination range to serialize with concurrent readpages(). 3950 * Lock destination range to serialize with concurrent readpages() and
3951 * source range to serialize with relocation.
3915 */ 3952 */
3916 lock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); 3953 btrfs_double_extent_lock(src, off, inode, destoff, len);
3917 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); 3954 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
3918 unlock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); 3955 btrfs_double_extent_unlock(src, off, inode, destoff, len);
3919 /* 3956 /*
3920 * Truncate page cache pages so that future reads will see the cloned 3957 * Truncate page cache pages so that future reads will see the cloned
3921 * data immediately and not the previous data. 3958 * data immediately and not the previous data.
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2576b1a379c9..3e4f8f88353e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7825,6 +7825,18 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
7825 ret = -EUCLEAN; 7825 ret = -EUCLEAN;
7826 goto out; 7826 goto out;
7827 } 7827 }
7828
7829 /* It's possible this device is a dummy for seed device */
7830 if (dev->disk_total_bytes == 0) {
7831 dev = find_device(fs_info->fs_devices->seed, devid, NULL);
7832 if (!dev) {
7833 btrfs_err(fs_info, "failed to find seed devid %llu",
7834 devid);
7835 ret = -EUCLEAN;
7836 goto out;
7837 }
7838 }
7839
7828 if (physical_offset + physical_len > dev->disk_total_bytes) { 7840 if (physical_offset + physical_len > dev->disk_total_bytes) {
7829 btrfs_err(fs_info, 7841 btrfs_err(fs_info,
7830"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu", 7842"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",