aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2014-07-31 09:41:07 -0400
committerChris Mason <clm@fb.com>2014-08-21 10:55:21 -0400
commit9c3b306e1c9e6be4be09e99a8fe2227d1005effc (patch)
tree3959abb99f9d14ae1ab26c0c2735e56504e5939b /fs
parent87fa3bb0786f37dff0b92f2c38421dd56d8902a9 (diff)
Btrfs: race free update of commit root for ro snapshots
This is a better solution for the problem addressed in the following commit: Btrfs: update commit root on snapshot creation after orphan cleanup (3821f348889e506efbd268cc8149e0ebfa47c4e5) The previous solution wasn't the best because of 2 reasons: 1) It added another full transaction commit, which is more expensive than just swapping the commit root with the root; 2) If a reboot happened after the first transaction commit (the one that creates the snapshot) and before the second transaction commit, then we would end up with the same problem if a send using that snapshot was requested before the first transaction commit after the reboot. This change addresses those 2 issues. The second issue is addressed by switching the commit root in the dentry lookup VFS callback, which is also called by the snapshot/subvol creation ioctl and performs orphan cleanup if needed. Like the vfs, the ioctl locks the parent inode too, preventing race issues between a dentry lookup and snapshot creation. Cc: Alex Lyakas <alex.btrfs@zadarastorage.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/inode.c36
-rw-r--r--fs/btrfs/ioctl.c33
2 files changed, 36 insertions, 33 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a3c6e76f5a4e..6dd6e50d143a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5181,6 +5181,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5181 iput(inode); 5181 iput(inode);
5182 inode = ERR_PTR(ret); 5182 inode = ERR_PTR(ret);
5183 } 5183 }
5184 /*
5185 * If orphan cleanup did remove any orphans, it means the tree
5186 * was modified and therefore the commit root is not the same as
5187 * the current root anymore. This is a problem, because send
5188 * uses the commit root and therefore can see inode items that
5189 * don't exist in the current root anymore, and for example make
5190 * calls to btrfs_iget, which will do tree lookups based on the
5191 * current root and not on the commit root. Those lookups will
5192 * fail, returning a -ESTALE error, and making send fail with
5193 * that error. So make sure a send does not see any orphans we
5194 * have just removed, and that it will see the same inodes
5195 * regardless of whether a transaction commit happened before
5196 * it started (meaning that the commit root will be the same as
5197 * the current root) or not.
5198 */
5199 if (sub_root->node != sub_root->commit_root) {
5200 u64 sub_flags = btrfs_root_flags(&sub_root->root_item);
5201
5202 if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) {
5203 struct extent_buffer *eb;
5204
5205 /*
5206 * Assert we can't have races between dentry
5207 * lookup called through the snapshot creation
5208 * ioctl and the VFS.
5209 */
5210 ASSERT(mutex_is_locked(&dir->i_mutex));
5211
5212 down_write(&root->fs_info->commit_root_sem);
5213 eb = sub_root->commit_root;
5214 sub_root->commit_root =
5215 btrfs_root_node(sub_root);
5216 up_write(&root->fs_info->commit_root_sem);
5217 free_extent_buffer(eb);
5218 }
5219 }
5184 } 5220 }
5185 5221
5186 return inode; 5222 return inode;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 47aceb494d1d..845287ca59c3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -711,39 +711,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
711 if (ret) 711 if (ret)
712 goto fail; 712 goto fail;
713 713
714 ret = btrfs_orphan_cleanup(pending_snapshot->snap);
715 if (ret)
716 goto fail;
717
718 /*
719 * If orphan cleanup did remove any orphans, it means the tree was
720 * modified and therefore the commit root is not the same as the
721 * current root anymore. This is a problem, because send uses the
722 * commit root and therefore can see inode items that don't exist
723 * in the current root anymore, and for example make calls to
724 * btrfs_iget, which will do tree lookups based on the current root
725 * and not on the commit root. Those lookups will fail, returning a
726 * -ESTALE error, and making send fail with that error. So make sure
727 * a send does not see any orphans we have just removed, and that it
728 * will see the same inodes regardless of whether a transaction
729 * commit happened before it started (meaning that the commit root
730 * will be the same as the current root) or not.
731 */
732 if (readonly && pending_snapshot->snap->node !=
733 pending_snapshot->snap->commit_root) {
734 trans = btrfs_join_transaction(pending_snapshot->snap);
735 if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) {
736 ret = PTR_ERR(trans);
737 goto fail;
738 }
739 if (!IS_ERR(trans)) {
740 ret = btrfs_commit_transaction(trans,
741 pending_snapshot->snap);
742 if (ret)
743 goto fail;
744 }
745 }
746
747 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); 714 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
748 if (IS_ERR(inode)) { 715 if (IS_ERR(inode)) {
749 ret = PTR_ERR(inode); 716 ret = PTR_ERR(inode);