aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
authorLiu Bo <bo.li.liu@oracle.com>2013-01-28 22:22:10 -0500
committerJosef Bacik <jbacik@fusionio.com>2013-02-05 16:09:13 -0500
commit6f1c36055f96e80031c7fdda3fd5be826b8d7782 (patch)
treea7417741e5f6680981dc64dabaffe7d0c1a02f53 /fs/btrfs/file.c
parent843fcf35733164076a77ad833c72c32da8228ad0 (diff)
Btrfs: fix race between snapshot deletion and getting inode
While running snapshot testscript created by Mitch and David, the race between autodefrag and snapshot deletion can lead to corruption of dead_root list so that we can get crash on btrfs_clean_old_snapshots(). And besides autodefrag, scrub also does the same thing, ie. read root first and get inode. Here is the story(take autodefrag as an example): (1) when we delete a snapshot or subvolume, it will set its root's refs to zero and do a iput() on its own inode, and if this inode happens to be the only active in-meory one in root's inode rbtree, it will add itself to the global dead_roots list for later cleanup. (2) after (1), the autodefrag thread may read another inode for defrag and the inode is just in the deleted snapshot/subvolume, but all of these are without checking if the root is still valid(refs > 0). So the end up result is adding the deleted snapshot/subvolume's root to the global dead_roots list AGAIN. Fortunately, we already have a srcu lock to avoid the race, ie. subvol_srcu. So all we need to do is to take the lock to protect 'read root and get inode', since we synchronize to wait for the rcu grace period before adding something to the global dead_roots list. Reported-by: Mitch Harder <mitch.harder@sabayonlinux.org> Signed-off-by: Liu Bo <bo.li.liu@oracle.com> Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c22
1 files changed, 18 insertions, 4 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a902faab7161..b06d289f998f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -293,15 +293,24 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
293 struct btrfs_key key; 293 struct btrfs_key key;
294 struct btrfs_ioctl_defrag_range_args range; 294 struct btrfs_ioctl_defrag_range_args range;
295 int num_defrag; 295 int num_defrag;
296 int index;
297 int ret;
296 298
297 /* get the inode */ 299 /* get the inode */
298 key.objectid = defrag->root; 300 key.objectid = defrag->root;
299 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 301 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
300 key.offset = (u64)-1; 302 key.offset = (u64)-1;
303
304 index = srcu_read_lock(&fs_info->subvol_srcu);
305
301 inode_root = btrfs_read_fs_root_no_name(fs_info, &key); 306 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
302 if (IS_ERR(inode_root)) { 307 if (IS_ERR(inode_root)) {
303 kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 308 ret = PTR_ERR(inode_root);
304 return PTR_ERR(inode_root); 309 goto cleanup;
310 }
311 if (btrfs_root_refs(&inode_root->root_item) == 0) {
312 ret = -ENOENT;
313 goto cleanup;
305 } 314 }
306 315
307 key.objectid = defrag->ino; 316 key.objectid = defrag->ino;
@@ -309,9 +318,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
309 key.offset = 0; 318 key.offset = 0;
310 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); 319 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
311 if (IS_ERR(inode)) { 320 if (IS_ERR(inode)) {
312 kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 321 ret = PTR_ERR(inode);
313 return PTR_ERR(inode); 322 goto cleanup;
314 } 323 }
324 srcu_read_unlock(&fs_info->subvol_srcu, index);
315 325
316 /* do a chunk of defrag */ 326 /* do a chunk of defrag */
317 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); 327 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
@@ -346,6 +356,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
346 356
347 iput(inode); 357 iput(inode);
348 return 0; 358 return 0;
359cleanup:
360 srcu_read_unlock(&fs_info->subvol_srcu, index);
361 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
362 return ret;
349} 363}
350 364
351/* 365/*