diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-01-13 12:55:51 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-01-13 12:55:51 -0500 |
| commit | 6b529fb0a3eabf9c4cc3e94c11477250379ce6d8 (patch) | |
| tree | 132133ceb3eef791ea98d288041616d5eb9fdcf8 | |
| parent | 72d657dd2115804b93bde4b77e426cc2de70eebf (diff) | |
| parent | 1b3922a8bc74231f9a767d1be6d9a061a4d4eeab (diff) | |
Merge tag 'for-5.0-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:
- two regression fixes in clone/dedupe ioctls, the generic check
callback needs to lock extents properly and wait for io to avoid
problems with writeback and relocation
- fix deadlock when using free space tree due to block group creation
- a recently added check refuses a valid fileystem with seeding device,
make that work again with a quickfix, proper solution needs more
intrusive changes
* tag 'for-5.0-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: Use real device structure to verify dev extent
Btrfs: fix deadlock when using free space tree due to block group creation
Btrfs: fix race between reflink/dedupe and relocation
Btrfs: fix race between cloning range ending at eof and writeback
| -rw-r--r-- | fs/btrfs/ctree.c | 16 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 49 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 12 |
3 files changed, 64 insertions, 13 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d92462fe66c8..f64aad613727 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -1016,19 +1016,21 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 1016 | parent_start = parent->start; | 1016 | parent_start = parent->start; |
| 1017 | 1017 | ||
| 1018 | /* | 1018 | /* |
| 1019 | * If we are COWing a node/leaf from the extent, chunk or device trees, | 1019 | * If we are COWing a node/leaf from the extent, chunk, device or free |
| 1020 | * make sure that we do not finish block group creation of pending block | 1020 | * space trees, make sure that we do not finish block group creation of |
| 1021 | * groups. We do this to avoid a deadlock. | 1021 | * pending block groups. We do this to avoid a deadlock. |
| 1022 | * COWing can result in allocation of a new chunk, and flushing pending | 1022 | * COWing can result in allocation of a new chunk, and flushing pending |
| 1023 | * block groups (btrfs_create_pending_block_groups()) can be triggered | 1023 | * block groups (btrfs_create_pending_block_groups()) can be triggered |
| 1024 | * when finishing allocation of a new chunk. Creation of a pending block | 1024 | * when finishing allocation of a new chunk. Creation of a pending block |
| 1025 | * group modifies the extent, chunk and device trees, therefore we could | 1025 | * group modifies the extent, chunk, device and free space trees, |
| 1026 | * deadlock with ourselves since we are holding a lock on an extent | 1026 | * therefore we could deadlock with ourselves since we are holding a |
| 1027 | * buffer that btrfs_create_pending_block_groups() may try to COW later. | 1027 | * lock on an extent buffer that btrfs_create_pending_block_groups() may |
| 1028 | * try to COW later. | ||
| 1028 | */ | 1029 | */ |
| 1029 | if (root == fs_info->extent_root || | 1030 | if (root == fs_info->extent_root || |
| 1030 | root == fs_info->chunk_root || | 1031 | root == fs_info->chunk_root || |
| 1031 | root == fs_info->dev_root) | 1032 | root == fs_info->dev_root || |
| 1033 | root == fs_info->free_space_root) | ||
| 1032 | trans->can_flush_pending_bgs = false; | 1034 | trans->can_flush_pending_bgs = false; |
| 1033 | 1035 | ||
| 1034 | cow = btrfs_alloc_tree_block(trans, root, parent_start, | 1036 | cow = btrfs_alloc_tree_block(trans, root, parent_start, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fab9443f6a42..9c8e1734429c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -3221,6 +3221,26 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) | |||
| 3221 | inode_lock_nested(inode2, I_MUTEX_CHILD); | 3221 | inode_lock_nested(inode2, I_MUTEX_CHILD); |
| 3222 | } | 3222 | } |
| 3223 | 3223 | ||
| 3224 | static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, | ||
| 3225 | struct inode *inode2, u64 loff2, u64 len) | ||
| 3226 | { | ||
| 3227 | unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); | ||
| 3228 | unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); | ||
| 3229 | } | ||
| 3230 | |||
| 3231 | static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, | ||
| 3232 | struct inode *inode2, u64 loff2, u64 len) | ||
| 3233 | { | ||
| 3234 | if (inode1 < inode2) { | ||
| 3235 | swap(inode1, inode2); | ||
| 3236 | swap(loff1, loff2); | ||
| 3237 | } else if (inode1 == inode2 && loff2 < loff1) { | ||
| 3238 | swap(loff1, loff2); | ||
| 3239 | } | ||
| 3240 | lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); | ||
| 3241 | lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); | ||
| 3242 | } | ||
| 3243 | |||
| 3224 | static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, | 3244 | static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, |
| 3225 | struct inode *dst, u64 dst_loff) | 3245 | struct inode *dst, u64 dst_loff) |
| 3226 | { | 3246 | { |
| @@ -3242,11 +3262,12 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, | |||
| 3242 | return -EINVAL; | 3262 | return -EINVAL; |
| 3243 | 3263 | ||
| 3244 | /* | 3264 | /* |
| 3245 | * Lock destination range to serialize with concurrent readpages(). | 3265 | * Lock destination range to serialize with concurrent readpages() and |
| 3266 | * source range to serialize with relocation. | ||
| 3246 | */ | 3267 | */ |
| 3247 | lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); | 3268 | btrfs_double_extent_lock(src, loff, dst, dst_loff, len); |
| 3248 | ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); | 3269 | ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); |
| 3249 | unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); | 3270 | btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); |
| 3250 | 3271 | ||
| 3251 | return ret; | 3272 | return ret; |
| 3252 | } | 3273 | } |
| @@ -3905,17 +3926,33 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, | |||
| 3905 | len = ALIGN(src->i_size, bs) - off; | 3926 | len = ALIGN(src->i_size, bs) - off; |
| 3906 | 3927 | ||
| 3907 | if (destoff > inode->i_size) { | 3928 | if (destoff > inode->i_size) { |
| 3929 | const u64 wb_start = ALIGN_DOWN(inode->i_size, bs); | ||
| 3930 | |||
| 3908 | ret = btrfs_cont_expand(inode, inode->i_size, destoff); | 3931 | ret = btrfs_cont_expand(inode, inode->i_size, destoff); |
| 3909 | if (ret) | 3932 | if (ret) |
| 3910 | return ret; | 3933 | return ret; |
| 3934 | /* | ||
| 3935 | * We may have truncated the last block if the inode's size is | ||
| 3936 | * not sector size aligned, so we need to wait for writeback to | ||
| 3937 | * complete before proceeding further, otherwise we can race | ||
| 3938 | * with cloning and attempt to increment a reference to an | ||
| 3939 | * extent that no longer exists (writeback completed right after | ||
| 3940 | * we found the previous extent covering eof and before we | ||
| 3941 | * attempted to increment its reference count). | ||
| 3942 | */ | ||
| 3943 | ret = btrfs_wait_ordered_range(inode, wb_start, | ||
| 3944 | destoff - wb_start); | ||
| 3945 | if (ret) | ||
| 3946 | return ret; | ||
| 3911 | } | 3947 | } |
| 3912 | 3948 | ||
| 3913 | /* | 3949 | /* |
| 3914 | * Lock destination range to serialize with concurrent readpages(). | 3950 | * Lock destination range to serialize with concurrent readpages() and |
| 3951 | * source range to serialize with relocation. | ||
| 3915 | */ | 3952 | */ |
| 3916 | lock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); | 3953 | btrfs_double_extent_lock(src, off, inode, destoff, len); |
| 3917 | ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); | 3954 | ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); |
| 3918 | unlock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); | 3955 | btrfs_double_extent_unlock(src, off, inode, destoff, len); |
| 3919 | /* | 3956 | /* |
| 3920 | * Truncate page cache pages so that future reads will see the cloned | 3957 | * Truncate page cache pages so that future reads will see the cloned |
| 3921 | * data immediately and not the previous data. | 3958 | * data immediately and not the previous data. |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2576b1a379c9..3e4f8f88353e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -7825,6 +7825,18 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, | |||
| 7825 | ret = -EUCLEAN; | 7825 | ret = -EUCLEAN; |
| 7826 | goto out; | 7826 | goto out; |
| 7827 | } | 7827 | } |
| 7828 | |||
| 7829 | /* It's possible this device is a dummy for seed device */ | ||
| 7830 | if (dev->disk_total_bytes == 0) { | ||
| 7831 | dev = find_device(fs_info->fs_devices->seed, devid, NULL); | ||
| 7832 | if (!dev) { | ||
| 7833 | btrfs_err(fs_info, "failed to find seed devid %llu", | ||
| 7834 | devid); | ||
| 7835 | ret = -EUCLEAN; | ||
| 7836 | goto out; | ||
| 7837 | } | ||
| 7838 | } | ||
| 7839 | |||
| 7828 | if (physical_offset + physical_len > dev->disk_total_bytes) { | 7840 | if (physical_offset + physical_len > dev->disk_total_bytes) { |
| 7829 | btrfs_err(fs_info, | 7841 | btrfs_err(fs_info, |
| 7830 | "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu", | 7842 | "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu", |
