diff options
-rw-r--r-- | fs/btrfs/ctree.h | 4 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 16 | ||||
-rw-r--r-- | fs/btrfs/file.c | 10 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 47 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 7 |
5 files changed, 60 insertions, 24 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9918ba3ec2b2..fc73e86235e8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -3480,8 +3480,8 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | |||
3480 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | 3480 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, |
3481 | struct btrfs_fs_info *fs_info); | 3481 | struct btrfs_fs_info *fs_info); |
3482 | int __get_raid_index(u64 flags); | 3482 | int __get_raid_index(u64 flags); |
3483 | int btrfs_start_nocow_write(struct btrfs_root *root); | 3483 | int btrfs_start_write_no_snapshoting(struct btrfs_root *root); |
3484 | void btrfs_end_nocow_write(struct btrfs_root *root); | 3484 | void btrfs_end_write_no_snapshoting(struct btrfs_root *root); |
3485 | /* ctree.c */ | 3485 | /* ctree.c */ |
3486 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 3486 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
3487 | int level, int *slot); | 3487 | int level, int *slot); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5e81e3694d92..b4e3ab115f5f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -9656,12 +9656,14 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
9656 | } | 9656 | } |
9657 | 9657 | ||
9658 | /* | 9658 | /* |
9659 | * btrfs_{start,end}_write() is similar to mnt_{want, drop}_write(), | 9659 | * btrfs_{start,end}_write_no_snapshoting() are similar to |
9660 | * they are used to prevent the some tasks writing data into the page cache | 9660 | * mnt_{want,drop}_write(), they are used to prevent some tasks from writing |
9661 | * by nocow before the subvolume is snapshoted, but flush the data into | 9661 | * data into the page cache through nocow before the subvolume is snapshoted, |
9662 | * the disk after the snapshot creation. | 9662 | * but flush the data into disk after the snapshot creation, or to prevent |
9663 | * operations while snapshoting is ongoing and that cause the snapshot to be | ||
9664 | * inconsistent (writes followed by expanding truncates for example). | ||
9663 | */ | 9665 | */ |
9664 | void btrfs_end_nocow_write(struct btrfs_root *root) | 9666 | void btrfs_end_write_no_snapshoting(struct btrfs_root *root) |
9665 | { | 9667 | { |
9666 | percpu_counter_dec(&root->subv_writers->counter); | 9668 | percpu_counter_dec(&root->subv_writers->counter); |
9667 | /* | 9669 | /* |
@@ -9673,7 +9675,7 @@ void btrfs_end_nocow_write(struct btrfs_root *root) | |||
9673 | wake_up(&root->subv_writers->wait); | 9675 | wake_up(&root->subv_writers->wait); |
9674 | } | 9676 | } |
9675 | 9677 | ||
9676 | int btrfs_start_nocow_write(struct btrfs_root *root) | 9678 | int btrfs_start_write_no_snapshoting(struct btrfs_root *root) |
9677 | { | 9679 | { |
9678 | if (atomic_read(&root->will_be_snapshoted)) | 9680 | if (atomic_read(&root->will_be_snapshoted)) |
9679 | return 0; | 9681 | return 0; |
@@ -9684,7 +9686,7 @@ int btrfs_start_nocow_write(struct btrfs_root *root) | |||
9684 | */ | 9686 | */ |
9685 | smp_mb(); | 9687 | smp_mb(); |
9686 | if (atomic_read(&root->will_be_snapshoted)) { | 9688 | if (atomic_read(&root->will_be_snapshoted)) { |
9687 | btrfs_end_nocow_write(root); | 9689 | btrfs_end_write_no_snapshoting(root); |
9688 | return 0; | 9690 | return 0; |
9689 | } | 9691 | } |
9690 | return 1; | 9692 | return 1; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0fbf0e7bc606..e4090259569b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1428,7 +1428,7 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
1428 | u64 num_bytes; | 1428 | u64 num_bytes; |
1429 | int ret; | 1429 | int ret; |
1430 | 1430 | ||
1431 | ret = btrfs_start_nocow_write(root); | 1431 | ret = btrfs_start_write_no_snapshoting(root); |
1432 | if (!ret) | 1432 | if (!ret) |
1433 | return -ENOSPC; | 1433 | return -ENOSPC; |
1434 | 1434 | ||
@@ -1451,7 +1451,7 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
1451 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); | 1451 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); |
1452 | if (ret <= 0) { | 1452 | if (ret <= 0) { |
1453 | ret = 0; | 1453 | ret = 0; |
1454 | btrfs_end_nocow_write(root); | 1454 | btrfs_end_write_no_snapshoting(root); |
1455 | } else { | 1455 | } else { |
1456 | *write_bytes = min_t(size_t, *write_bytes , | 1456 | *write_bytes = min_t(size_t, *write_bytes , |
1457 | num_bytes - pos + lockstart); | 1457 | num_bytes - pos + lockstart); |
@@ -1543,7 +1543,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1543 | btrfs_free_reserved_data_space(inode, | 1543 | btrfs_free_reserved_data_space(inode, |
1544 | reserve_bytes); | 1544 | reserve_bytes); |
1545 | else | 1545 | else |
1546 | btrfs_end_nocow_write(root); | 1546 | btrfs_end_write_no_snapshoting(root); |
1547 | break; | 1547 | break; |
1548 | } | 1548 | } |
1549 | 1549 | ||
@@ -1632,7 +1632,7 @@ again: | |||
1632 | 1632 | ||
1633 | release_bytes = 0; | 1633 | release_bytes = 0; |
1634 | if (only_release_metadata) | 1634 | if (only_release_metadata) |
1635 | btrfs_end_nocow_write(root); | 1635 | btrfs_end_write_no_snapshoting(root); |
1636 | 1636 | ||
1637 | if (only_release_metadata && copied > 0) { | 1637 | if (only_release_metadata && copied > 0) { |
1638 | u64 lockstart = round_down(pos, root->sectorsize); | 1638 | u64 lockstart = round_down(pos, root->sectorsize); |
@@ -1661,7 +1661,7 @@ again: | |||
1661 | 1661 | ||
1662 | if (release_bytes) { | 1662 | if (release_bytes) { |
1663 | if (only_release_metadata) { | 1663 | if (only_release_metadata) { |
1664 | btrfs_end_nocow_write(root); | 1664 | btrfs_end_write_no_snapshoting(root); |
1665 | btrfs_delalloc_release_metadata(inode, release_bytes); | 1665 | btrfs_delalloc_release_metadata(inode, release_bytes); |
1666 | } else { | 1666 | } else { |
1667 | btrfs_delalloc_release_space(inode, release_bytes); | 1667 | btrfs_delalloc_release_space(inode, release_bytes); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a5374c2bb943..8de23355f6cf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -1337,7 +1337,7 @@ next_slot: | |||
1337 | * we fall into common COW way. | 1337 | * we fall into common COW way. |
1338 | */ | 1338 | */ |
1339 | if (!nolock) { | 1339 | if (!nolock) { |
1340 | err = btrfs_start_nocow_write(root); | 1340 | err = btrfs_start_write_no_snapshoting(root); |
1341 | if (!err) | 1341 | if (!err) |
1342 | goto out_check; | 1342 | goto out_check; |
1343 | } | 1343 | } |
@@ -1361,7 +1361,7 @@ out_check: | |||
1361 | if (extent_end <= start) { | 1361 | if (extent_end <= start) { |
1362 | path->slots[0]++; | 1362 | path->slots[0]++; |
1363 | if (!nolock && nocow) | 1363 | if (!nolock && nocow) |
1364 | btrfs_end_nocow_write(root); | 1364 | btrfs_end_write_no_snapshoting(root); |
1365 | goto next_slot; | 1365 | goto next_slot; |
1366 | } | 1366 | } |
1367 | if (!nocow) { | 1367 | if (!nocow) { |
@@ -1381,7 +1381,7 @@ out_check: | |||
1381 | page_started, nr_written, 1); | 1381 | page_started, nr_written, 1); |
1382 | if (ret) { | 1382 | if (ret) { |
1383 | if (!nolock && nocow) | 1383 | if (!nolock && nocow) |
1384 | btrfs_end_nocow_write(root); | 1384 | btrfs_end_write_no_snapshoting(root); |
1385 | goto error; | 1385 | goto error; |
1386 | } | 1386 | } |
1387 | cow_start = (u64)-1; | 1387 | cow_start = (u64)-1; |
@@ -1432,7 +1432,7 @@ out_check: | |||
1432 | num_bytes); | 1432 | num_bytes); |
1433 | if (ret) { | 1433 | if (ret) { |
1434 | if (!nolock && nocow) | 1434 | if (!nolock && nocow) |
1435 | btrfs_end_nocow_write(root); | 1435 | btrfs_end_write_no_snapshoting(root); |
1436 | goto error; | 1436 | goto error; |
1437 | } | 1437 | } |
1438 | } | 1438 | } |
@@ -1443,7 +1443,7 @@ out_check: | |||
1443 | EXTENT_DELALLOC, PAGE_UNLOCK | | 1443 | EXTENT_DELALLOC, PAGE_UNLOCK | |
1444 | PAGE_SET_PRIVATE2); | 1444 | PAGE_SET_PRIVATE2); |
1445 | if (!nolock && nocow) | 1445 | if (!nolock && nocow) |
1446 | btrfs_end_nocow_write(root); | 1446 | btrfs_end_write_no_snapshoting(root); |
1447 | cur_offset = extent_end; | 1447 | cur_offset = extent_end; |
1448 | if (cur_offset > end) | 1448 | if (cur_offset > end) |
1449 | break; | 1449 | break; |
@@ -4599,6 +4599,26 @@ next: | |||
4599 | return err; | 4599 | return err; |
4600 | } | 4600 | } |
4601 | 4601 | ||
4602 | static int wait_snapshoting_atomic_t(atomic_t *a) | ||
4603 | { | ||
4604 | schedule(); | ||
4605 | return 0; | ||
4606 | } | ||
4607 | |||
4608 | static void wait_for_snapshot_creation(struct btrfs_root *root) | ||
4609 | { | ||
4610 | while (true) { | ||
4611 | int ret; | ||
4612 | |||
4613 | ret = btrfs_start_write_no_snapshoting(root); | ||
4614 | if (ret) | ||
4615 | break; | ||
4616 | wait_on_atomic_t(&root->will_be_snapshoted, | ||
4617 | wait_snapshoting_atomic_t, | ||
4618 | TASK_UNINTERRUPTIBLE); | ||
4619 | } | ||
4620 | } | ||
4621 | |||
4602 | static int btrfs_setsize(struct inode *inode, struct iattr *attr) | 4622 | static int btrfs_setsize(struct inode *inode, struct iattr *attr) |
4603 | { | 4623 | { |
4604 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4624 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -4623,17 +4643,30 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) | |||
4623 | 4643 | ||
4624 | if (newsize > oldsize) { | 4644 | if (newsize > oldsize) { |
4625 | truncate_pagecache(inode, newsize); | 4645 | truncate_pagecache(inode, newsize); |
4646 | /* | ||
4647 | * Don't do an expanding truncate while snapshoting is ongoing. | ||
4648 | * This is to ensure the snapshot captures a fully consistent | ||
4649 | * state of this file - if the snapshot captures this expanding | ||
4650 | * truncation, it must capture all writes that happened before | ||
4651 | * this truncation. | ||
4652 | */ | ||
4653 | wait_for_snapshot_creation(root); | ||
4626 | ret = btrfs_cont_expand(inode, oldsize, newsize); | 4654 | ret = btrfs_cont_expand(inode, oldsize, newsize); |
4627 | if (ret) | 4655 | if (ret) { |
4656 | btrfs_end_write_no_snapshoting(root); | ||
4628 | return ret; | 4657 | return ret; |
4658 | } | ||
4629 | 4659 | ||
4630 | trans = btrfs_start_transaction(root, 1); | 4660 | trans = btrfs_start_transaction(root, 1); |
4631 | if (IS_ERR(trans)) | 4661 | if (IS_ERR(trans)) { |
4662 | btrfs_end_write_no_snapshoting(root); | ||
4632 | return PTR_ERR(trans); | 4663 | return PTR_ERR(trans); |
4664 | } | ||
4633 | 4665 | ||
4634 | i_size_write(inode, newsize); | 4666 | i_size_write(inode, newsize); |
4635 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); | 4667 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); |
4636 | ret = btrfs_update_inode(trans, root, inode); | 4668 | ret = btrfs_update_inode(trans, root, inode); |
4669 | btrfs_end_write_no_snapshoting(root); | ||
4637 | btrfs_end_transaction(trans, root); | 4670 | btrfs_end_transaction(trans, root); |
4638 | } else { | 4671 | } else { |
4639 | 4672 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3abc068c5543..b590e23fa03e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -617,7 +617,7 @@ fail: | |||
617 | return ret; | 617 | return ret; |
618 | } | 618 | } |
619 | 619 | ||
620 | static void btrfs_wait_nocow_write(struct btrfs_root *root) | 620 | static void btrfs_wait_for_no_snapshoting_writes(struct btrfs_root *root) |
621 | { | 621 | { |
622 | s64 writers; | 622 | s64 writers; |
623 | DEFINE_WAIT(wait); | 623 | DEFINE_WAIT(wait); |
@@ -649,7 +649,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
649 | 649 | ||
650 | atomic_inc(&root->will_be_snapshoted); | 650 | atomic_inc(&root->will_be_snapshoted); |
651 | smp_mb__after_atomic(); | 651 | smp_mb__after_atomic(); |
652 | btrfs_wait_nocow_write(root); | 652 | btrfs_wait_for_no_snapshoting_writes(root); |
653 | 653 | ||
654 | ret = btrfs_start_delalloc_inodes(root, 0); | 654 | ret = btrfs_start_delalloc_inodes(root, 0); |
655 | if (ret) | 655 | if (ret) |
@@ -732,7 +732,8 @@ fail: | |||
732 | free: | 732 | free: |
733 | kfree(pending_snapshot); | 733 | kfree(pending_snapshot); |
734 | out: | 734 | out: |
735 | atomic_dec(&root->will_be_snapshoted); | 735 | if (atomic_dec_and_test(&root->will_be_snapshoted)) |
736 | wake_up_atomic_t(&root->will_be_snapshoted); | ||
736 | return ret; | 737 | return ret; |
737 | } | 738 | } |
738 | 739 | ||