diff options
author | Filipe Manana <fdmanana@suse.com> | 2014-07-31 09:41:07 -0400 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2014-08-21 10:55:21 -0400 |
commit | 9c3b306e1c9e6be4be09e99a8fe2227d1005effc (patch) | |
tree | 3959abb99f9d14ae1ab26c0c2735e56504e5939b /fs/btrfs/inode.c | |
parent | 87fa3bb0786f37dff0b92f2c38421dd56d8902a9 (diff) |
Btrfs: race free update of commit root for ro snapshots
This is a better solution for the problem addressed in the following
commit:
Btrfs: update commit root on snapshot creation after orphan cleanup
(3821f348889e506efbd268cc8149e0ebfa47c4e5)
The previous solution wasn't the best because of 2 reasons:
1) It added another full transaction commit, which is more expensive
than just swapping the commit root with the root;
2) If a reboot happened after the first transaction commit (the one
that creates the snapshot) and before the second transaction commit,
then we would end up with the same problem if a send using that
snapshot was requested before the first transaction commit after
the reboot.
This change addresses those 2 issues. The second issue is addressed by
switching the commit root in the dentry lookup VFS callback, which is
also called by the snapshot/subvol creation ioctl and performs orphan
cleanup if needed. Like the vfs, the ioctl locks the parent inode too,
preventing race issues between a dentry lookup and snapshot creation.
Cc: Alex Lyakas <alex.btrfs@zadarastorage.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a3c6e76f5a4e..6dd6e50d143a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -5181,6 +5181,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
5181 | iput(inode); | 5181 | iput(inode); |
5182 | inode = ERR_PTR(ret); | 5182 | inode = ERR_PTR(ret); |
5183 | } | 5183 | } |
5184 | /* | ||
5185 | * If orphan cleanup did remove any orphans, it means the tree | ||
5186 | * was modified and therefore the commit root is not the same as | ||
5187 | * the current root anymore. This is a problem, because send | ||
5188 | * uses the commit root and therefore can see inode items that | ||
5189 | * don't exist in the current root anymore, and for example make | ||
5190 | * calls to btrfs_iget, which will do tree lookups based on the | ||
5191 | * current root and not on the commit root. Those lookups will | ||
5192 | * fail, returning a -ESTALE error, and making send fail with | ||
5193 | * that error. So make sure a send does not see any orphans we | ||
5194 | * have just removed, and that it will see the same inodes | ||
5195 | * regardless of whether a transaction commit happened before | ||
5196 | * it started (meaning that the commit root will be the same as | ||
5197 | * the current root) or not. | ||
5198 | */ | ||
5199 | if (sub_root->node != sub_root->commit_root) { | ||
5200 | u64 sub_flags = btrfs_root_flags(&sub_root->root_item); | ||
5201 | |||
5202 | if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) { | ||
5203 | struct extent_buffer *eb; | ||
5204 | |||
5205 | /* | ||
5206 | * Assert we can't have races between dentry | ||
5207 | * lookup called through the snapshot creation | ||
5208 | * ioctl and the VFS. | ||
5209 | */ | ||
5210 | ASSERT(mutex_is_locked(&dir->i_mutex)); | ||
5211 | |||
5212 | down_write(&root->fs_info->commit_root_sem); | ||
5213 | eb = sub_root->commit_root; | ||
5214 | sub_root->commit_root = | ||
5215 | btrfs_root_node(sub_root); | ||
5216 | up_write(&root->fs_info->commit_root_sem); | ||
5217 | free_extent_buffer(eb); | ||
5218 | } | ||
5219 | } | ||
5184 | } | 5220 | } |
5185 | 5221 | ||
5186 | return inode; | 5222 | return inode; |