diff options
author | Filipe Manana <fdmanana@suse.com> | 2014-07-31 09:41:07 -0400 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2014-08-21 10:55:21 -0400 |
commit | 9c3b306e1c9e6be4be09e99a8fe2227d1005effc (patch) | |
tree | 3959abb99f9d14ae1ab26c0c2735e56504e5939b /fs | |
parent | 87fa3bb0786f37dff0b92f2c38421dd56d8902a9 (diff) |
Btrfs: race free update of commit root for ro snapshots
This is a better solution for the problem addressed in the following
commit:
Btrfs: update commit root on snapshot creation after orphan cleanup
(3821f348889e506efbd268cc8149e0ebfa47c4e5)
The previous solution wasn't the best because of 2 reasons:
1) It added another full transaction commit, which is more expensive
than just swapping the commit root with the root;
2) If a reboot happened after the first transaction commit (the one
that creates the snapshot) and before the second transaction commit,
then we would end up with the same problem if a send using that
snapshot was requested before the first transaction commit after
the reboot.
This change addresses those 2 issues. The second issue is addressed by
switching the commit root in the dentry lookup VFS callback, which is
also called by the snapshot/subvol creation ioctl and performs orphan
cleanup if needed. Like the vfs, the ioctl locks the parent inode too,
preventing race issues between a dentry lookup and snapshot creation.
Cc: Alex Lyakas <alex.btrfs@zadarastorage.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/inode.c | 36 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 33 |
2 files changed, 36 insertions, 33 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a3c6e76f5a4e..6dd6e50d143a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -5181,6 +5181,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
5181 | iput(inode); | 5181 | iput(inode); |
5182 | inode = ERR_PTR(ret); | 5182 | inode = ERR_PTR(ret); |
5183 | } | 5183 | } |
5184 | /* | ||
5185 | * If orphan cleanup did remove any orphans, it means the tree | ||
5186 | * was modified and therefore the commit root is not the same as | ||
5187 | * the current root anymore. This is a problem, because send | ||
5188 | * uses the commit root and therefore can see inode items that | ||
5189 | * don't exist in the current root anymore, and for example make | ||
5190 | * calls to btrfs_iget, which will do tree lookups based on the | ||
5191 | * current root and not on the commit root. Those lookups will | ||
5192 | * fail, returning a -ESTALE error, and making send fail with | ||
5193 | * that error. So make sure a send does not see any orphans we | ||
5194 | * have just removed, and that it will see the same inodes | ||
5195 | * regardless of whether a transaction commit happened before | ||
5196 | * it started (meaning that the commit root will be the same as | ||
5197 | * the current root) or not. | ||
5198 | */ | ||
5199 | if (sub_root->node != sub_root->commit_root) { | ||
5200 | u64 sub_flags = btrfs_root_flags(&sub_root->root_item); | ||
5201 | |||
5202 | if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) { | ||
5203 | struct extent_buffer *eb; | ||
5204 | |||
5205 | /* | ||
5206 | * Assert we can't have races between dentry | ||
5207 | * lookup called through the snapshot creation | ||
5208 | * ioctl and the VFS. | ||
5209 | */ | ||
5210 | ASSERT(mutex_is_locked(&dir->i_mutex)); | ||
5211 | |||
5212 | down_write(&root->fs_info->commit_root_sem); | ||
5213 | eb = sub_root->commit_root; | ||
5214 | sub_root->commit_root = | ||
5215 | btrfs_root_node(sub_root); | ||
5216 | up_write(&root->fs_info->commit_root_sem); | ||
5217 | free_extent_buffer(eb); | ||
5218 | } | ||
5219 | } | ||
5184 | } | 5220 | } |
5185 | 5221 | ||
5186 | return inode; | 5222 | return inode; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 47aceb494d1d..845287ca59c3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -711,39 +711,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
711 | if (ret) | 711 | if (ret) |
712 | goto fail; | 712 | goto fail; |
713 | 713 | ||
714 | ret = btrfs_orphan_cleanup(pending_snapshot->snap); | ||
715 | if (ret) | ||
716 | goto fail; | ||
717 | |||
718 | /* | ||
719 | * If orphan cleanup did remove any orphans, it means the tree was | ||
720 | * modified and therefore the commit root is not the same as the | ||
721 | * current root anymore. This is a problem, because send uses the | ||
722 | * commit root and therefore can see inode items that don't exist | ||
723 | * in the current root anymore, and for example make calls to | ||
724 | * btrfs_iget, which will do tree lookups based on the current root | ||
725 | * and not on the commit root. Those lookups will fail, returning a | ||
726 | * -ESTALE error, and making send fail with that error. So make sure | ||
727 | * a send does not see any orphans we have just removed, and that it | ||
728 | * will see the same inodes regardless of whether a transaction | ||
729 | * commit happened before it started (meaning that the commit root | ||
730 | * will be the same as the current root) or not. | ||
731 | */ | ||
732 | if (readonly && pending_snapshot->snap->node != | ||
733 | pending_snapshot->snap->commit_root) { | ||
734 | trans = btrfs_join_transaction(pending_snapshot->snap); | ||
735 | if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) { | ||
736 | ret = PTR_ERR(trans); | ||
737 | goto fail; | ||
738 | } | ||
739 | if (!IS_ERR(trans)) { | ||
740 | ret = btrfs_commit_transaction(trans, | ||
741 | pending_snapshot->snap); | ||
742 | if (ret) | ||
743 | goto fail; | ||
744 | } | ||
745 | } | ||
746 | |||
747 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 714 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); |
748 | if (IS_ERR(inode)) { | 715 | if (IS_ERR(inode)) { |
749 | ret = PTR_ERR(inode); | 716 | ret = PTR_ERR(inode); |