aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWang Xiaoguang <wangxg.fnst@cn.fujitsu.com>2016-08-01 01:28:08 -0400
committerChris Mason <clm@fb.com>2016-08-25 06:58:26 -0400
commit9e7cc91a6d18a4973c6d2cc104871439c9e94f3d (patch)
tree7ad5c06fa1c4439040665948da4fbf7f7c94d49e
parent18513091af9483ba84328d42092bd4d42a3c958f (diff)
btrfs: fix fsfreeze hang caused by delayed iputs deal
When running fstests generic/068, sometimes we got below deadlock: xfs_io D ffff8800331dbb20 0 6697 6693 0x00000080 ffff8800331dbb20 ffff88007acfc140 ffff880034d895c0 ffff8800331dc000 ffff880032d243e8 fffffffeffffffff ffff880032d24400 0000000000000001 ffff8800331dbb38 ffffffff816a9045 ffff880034d895c0 ffff8800331dbba8 Call Trace: [<ffffffff816a9045>] schedule+0x35/0x80 [<ffffffff816abab2>] rwsem_down_read_failed+0xf2/0x140 [<ffffffff8118f5e1>] ? __filemap_fdatawrite_range+0xd1/0x100 [<ffffffff8134f978>] call_rwsem_down_read_failed+0x18/0x30 [<ffffffffa06631fc>] ? btrfs_alloc_block_rsv+0x2c/0xb0 [btrfs] [<ffffffff810d32b5>] percpu_down_read+0x35/0x50 [<ffffffff81217dfc>] __sb_start_write+0x2c/0x40 [<ffffffffa067f5d5>] start_transaction+0x2a5/0x4d0 [btrfs] [<ffffffffa067f857>] btrfs_join_transaction+0x17/0x20 [btrfs] [<ffffffffa068ba34>] btrfs_evict_inode+0x3c4/0x5d0 [btrfs] [<ffffffff81230a1a>] evict+0xba/0x1a0 [<ffffffff812316b6>] iput+0x196/0x200 [<ffffffffa06851d0>] btrfs_run_delayed_iputs+0x70/0xc0 [btrfs] [<ffffffffa067f1d8>] btrfs_commit_transaction+0x928/0xa80 [btrfs] [<ffffffffa0646df0>] btrfs_freeze+0x30/0x40 [btrfs] [<ffffffff81218040>] freeze_super+0xf0/0x190 [<ffffffff81229275>] do_vfs_ioctl+0x4a5/0x5c0 [<ffffffff81003176>] ? do_audit_syscall_entry+0x66/0x70 [<ffffffff810038cf>] ? syscall_trace_enter_phase1+0x11f/0x140 [<ffffffff81229409>] SyS_ioctl+0x79/0x90 [<ffffffff81003c12>] do_syscall_64+0x62/0x110 [<ffffffff816acbe1>] entry_SYSCALL64_slow_path+0x25/0x25 >From this warning, freeze_super() already holds SB_FREEZE_FS, but btrfs_freeze() will call btrfs_commit_transaction() again, if btrfs_commit_transaction() finds that it has delayed iputs to handle, it'll start_transaction(), which will try to get SB_FREEZE_FS lock again, then deadlock occurs. The root cause is that in btrfs, sync_filesystem(sb) does not make sure all metadata is updated. There still maybe some codes adding delayed iputs, see below sample race window: CPU1 | CPU2 |-> freeze_super() | |-> sync_filesystem(sb); | | |-> cleaner_kthread() | | |-> btrfs_delete_unused_bgs() | | |-> btrfs_remove_chunk() | | |-> btrfs_remove_block_group() | | |-> btrfs_add_delayed_iput() | | |-> sb->s_writers.frozen = SB_FREEZE_FS; | |-> sb_wait_write(sb, SB_FREEZE_FS); | | acquire SB_FREEZE_FS lock. | | | |-> btrfs_freeze() | |-> btrfs_commit_transaction() | |-> btrfs_run_delayed_iputs() | | will handle delayed iputs, | | that means start_transaction() | | will be called, which will try | | to get SB_FREEZE_FS lock. | To fix this issue, introduce a "int fs_frozen" to record internally whether fs has been frozen. If fs has been frozen, we can not handle delayed iputs. Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com> Reviewed-by: David Sterba <dsterba@suse.com> [ add comment to btrfs_freeze ] Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/btrfs/super.c16
-rw-r--r--fs/btrfs/transaction.c7
4 files changed, 25 insertions, 1 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 09cdff0d58e8..ec4154faab61 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1080,6 +1080,8 @@ struct btrfs_fs_info {
1080 struct list_head pinned_chunks; 1080 struct list_head pinned_chunks;
1081 1081
1082 int creating_free_space_tree; 1082 int creating_free_space_tree;
1083 /* Used to record internally whether fs has been frozen */
1084 int fs_frozen;
1083}; 1085};
1084 1086
1085struct btrfs_subvolume_writers { 1087struct btrfs_subvolume_writers {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7857f64e1cae..17062223fac3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2631,6 +2631,7 @@ int open_ctree(struct super_block *sb,
2631 atomic_set(&fs_info->qgroup_op_seq, 0); 2631 atomic_set(&fs_info->qgroup_op_seq, 0);
2632 atomic_set(&fs_info->reada_works_cnt, 0); 2632 atomic_set(&fs_info->reada_works_cnt, 0);
2633 atomic64_set(&fs_info->tree_mod_seq, 0); 2633 atomic64_set(&fs_info->tree_mod_seq, 0);
2634 fs_info->fs_frozen = 0;
2634 fs_info->sb = sb; 2635 fs_info->sb = sb;
2635 fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; 2636 fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
2636 fs_info->metadata_ratio = 0; 2637 fs_info->metadata_ratio = 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 864ce334f696..4071fe2bd098 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2241,6 +2241,13 @@ static int btrfs_freeze(struct super_block *sb)
2241 struct btrfs_trans_handle *trans; 2241 struct btrfs_trans_handle *trans;
2242 struct btrfs_root *root = btrfs_sb(sb)->tree_root; 2242 struct btrfs_root *root = btrfs_sb(sb)->tree_root;
2243 2243
2244 root->fs_info->fs_frozen = 1;
2245 /*
2246 * We don't need a barrier here, we'll wait for any transaction that
2247 * could be in progress on other threads (and do delayed iputs that
2248 * we want to avoid on a frozen filesystem), or do the commit
2249 * ourselves.
2250 */
2244 trans = btrfs_attach_transaction_barrier(root); 2251 trans = btrfs_attach_transaction_barrier(root);
2245 if (IS_ERR(trans)) { 2252 if (IS_ERR(trans)) {
2246 /* no transaction, don't bother */ 2253 /* no transaction, don't bother */
@@ -2251,6 +2258,14 @@ static int btrfs_freeze(struct super_block *sb)
2251 return btrfs_commit_transaction(trans, root); 2258 return btrfs_commit_transaction(trans, root);
2252} 2259}
2253 2260
2261static int btrfs_unfreeze(struct super_block *sb)
2262{
2263 struct btrfs_root *root = btrfs_sb(sb)->tree_root;
2264
2265 root->fs_info->fs_frozen = 0;
2266 return 0;
2267}
2268
2254static int btrfs_show_devname(struct seq_file *m, struct dentry *root) 2269static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
2255{ 2270{
2256 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); 2271 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@@ -2299,6 +2314,7 @@ static const struct super_operations btrfs_super_ops = {
2299 .statfs = btrfs_statfs, 2314 .statfs = btrfs_statfs,
2300 .remount_fs = btrfs_remount, 2315 .remount_fs = btrfs_remount,
2301 .freeze_fs = btrfs_freeze, 2316 .freeze_fs = btrfs_freeze,
2317 .unfreeze_fs = btrfs_unfreeze,
2302}; 2318};
2303 2319
2304static const struct file_operations btrfs_ctl_fops = { 2320static const struct file_operations btrfs_ctl_fops = {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 9cca0a721961..95d41919d034 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2278,8 +2278,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
2278 2278
2279 kmem_cache_free(btrfs_trans_handle_cachep, trans); 2279 kmem_cache_free(btrfs_trans_handle_cachep, trans);
2280 2280
2281 /*
2282 * If fs has been frozen, we can not handle delayed iputs, otherwise
2283 * it'll result in deadlock about SB_FREEZE_FS.
2284 */
2281 if (current != root->fs_info->transaction_kthread && 2285 if (current != root->fs_info->transaction_kthread &&
2282 current != root->fs_info->cleaner_kthread) 2286 current != root->fs_info->cleaner_kthread &&
2287 !root->fs_info->fs_frozen)
2283 btrfs_run_delayed_iputs(root); 2288 btrfs_run_delayed_iputs(root);
2284 2289
2285 return ret; 2290 return ret;