aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2016-08-23 16:13:51 -0400
committerChris Mason <clm@fb.com>2016-08-25 06:58:32 -0400
commit28a235931b56d4e7bdd51f6733daf95f2b269da8 (patch)
tree9fe572dbe6f4a3eaa91032bbf0e619521b46e9a6
parent1ba98d086fe3a14d6a31f2f66dbab70c45d00f63 (diff)
Btrfs: fix lockdep warning on deadlock against an inode's log mutex
Commit 44f714dae50a ("Btrfs: improve performance on fsync against new inode after rename/unlink"), which landed in 4.8-rc2, introduced a possibility for a deadlock due to double locking of an inode's log mutex by the same task, which lockdep reports with: [23045.433975] ============================================= [23045.434748] [ INFO: possible recursive locking detected ] [23045.435426] 4.7.0-rc6-btrfs-next-34+ #1 Not tainted [23045.436044] --------------------------------------------- [23045.436044] xfs_io/3688 is trying to acquire lock: [23045.436044] (&ei->log_mutex){+.+...}, at: [<ffffffffa038552d>] btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] but task is already holding lock: [23045.436044] (&ei->log_mutex){+.+...}, at: [<ffffffffa038552d>] btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] other info that might help us debug this: [23045.436044] Possible unsafe locking scenario: [23045.436044] CPU0 [23045.436044] ---- [23045.436044] lock(&ei->log_mutex); [23045.436044] lock(&ei->log_mutex); [23045.436044] *** DEADLOCK *** [23045.436044] May be due to missing lock nesting notation [23045.436044] 3 locks held by xfs_io/3688: [23045.436044] #0: (&sb->s_type->i_mutex_key#15){+.+...}, at: [<ffffffffa035f2ae>] btrfs_sync_file+0x14e/0x425 [btrfs] [23045.436044] #1: (sb_internal#2){.+.+.+}, at: [<ffffffff8118446b>] __sb_start_write+0x5f/0xb0 [23045.436044] #2: (&ei->log_mutex){+.+...}, at: [<ffffffffa038552d>] btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] stack backtrace: [23045.436044] CPU: 4 PID: 3688 Comm: xfs_io Not tainted 4.7.0-rc6-btrfs-next-34+ #1 [23045.436044] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 [23045.436044] 0000000000000000 ffff88022f5f7860 ffffffff8127074d ffffffff82a54b70 [23045.436044] ffffffff82a54b70 ffff88022f5f7920 ffffffff81092897 ffff880228015d68 [23045.436044] 0000000000000000 ffffffff82a54b70 ffffffff829c3f00 ffff880228015d68 [23045.436044] Call Trace: [23045.436044] [<ffffffff8127074d>] dump_stack+0x67/0x90 [23045.436044] [<ffffffff81092897>] __lock_acquire+0xcbb/0xe4e [23045.436044] [<ffffffff8109155f>] ? mark_lock+0x24/0x201 [23045.436044] [<ffffffff8109179a>] ? mark_held_locks+0x5e/0x74 [23045.436044] [<ffffffff81092de0>] lock_acquire+0x12f/0x1c3 [23045.436044] [<ffffffff81092de0>] ? lock_acquire+0x12f/0x1c3 [23045.436044] [<ffffffffa038552d>] ? btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [<ffffffffa038552d>] ? btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [<ffffffff814a51a4>] mutex_lock_nested+0x77/0x3a7 [23045.436044] [<ffffffffa038552d>] ? btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [<ffffffffa039705e>] ? btrfs_release_delayed_node+0xb/0xd [btrfs] [23045.436044] [<ffffffffa038552d>] btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [<ffffffffa038552d>] ? btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [<ffffffff810a0ed1>] ? vprintk_emit+0x453/0x465 [23045.436044] [<ffffffffa0385a61>] btrfs_log_inode+0x66e/0xc95 [btrfs] [23045.436044] [<ffffffffa03c084d>] log_new_dir_dentries+0x26c/0x359 [btrfs] [23045.436044] [<ffffffffa03865aa>] btrfs_log_inode_parent+0x4a6/0x628 [btrfs] [23045.436044] [<ffffffffa0387552>] btrfs_log_dentry_safe+0x5a/0x75 [btrfs] [23045.436044] [<ffffffffa035f464>] btrfs_sync_file+0x304/0x425 [btrfs] [23045.436044] [<ffffffff811acaf4>] vfs_fsync_range+0x8c/0x9e [23045.436044] [<ffffffff811acb22>] vfs_fsync+0x1c/0x1e [23045.436044] [<ffffffff811acc79>] do_fsync+0x31/0x4a [23045.436044] [<ffffffff811ace99>] SyS_fsync+0x10/0x14 [23045.436044] [<ffffffff814a88e5>] entry_SYSCALL_64_fastpath+0x18/0xa8 [23045.436044] [<ffffffff8108f039>] ? trace_hardirqs_off_caller+0x3f/0xaa An example reproducer for this is: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ mkdir /mnt/dir $ touch /mnt/dir/foo $ sync $ mv /mnt/dir/foo /mnt/dir/bar $ touch /mnt/dir/foo $ xfs_io -c "fsync" /mnt/dir/bar This is because while logging the inode of file bar we end up logging its parent directory (since its inode has an unlink_trans field matching the current transaction id due to the rename operation), which in turn logs the inodes for all its new dentries, so that the new inode for the new file named foo gets logged which in turn triggered another logging attempt for the inode we are fsync'ing, since that inode had an old name that corresponds to the name of the new inode. So fix this by ensuring that when logging the inode for a new dentry that has a name matching an old name of some other inode, we don't log again the original inode that we are fsync'ing. Fixes: 44f714dae50a ("Btrfs: improve performance on fsync against new inode after rename/unlink") Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/tree-log.c5
-rw-r--r--fs/btrfs/tree-log.h5
3 files changed, 8 insertions, 4 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3391f2adf0c8..fea31a4a6e36 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2070,7 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
2070 } 2070 }
2071 trans->sync = true; 2071 trans->sync = true;
2072 2072
2073 btrfs_init_log_ctx(&ctx); 2073 btrfs_init_log_ctx(&ctx, inode);
2074 2074
2075 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); 2075 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
2076 if (ret < 0) { 2076 if (ret < 0) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ffe92da81b8a..e935035ac034 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2823,7 +2823,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2823 */ 2823 */
2824 mutex_unlock(&root->log_mutex); 2824 mutex_unlock(&root->log_mutex);
2825 2825
2826 btrfs_init_log_ctx(&root_log_ctx); 2826 btrfs_init_log_ctx(&root_log_ctx, NULL);
2827 2827
2828 mutex_lock(&log_root_tree->log_mutex); 2828 mutex_lock(&log_root_tree->log_mutex);
2829 atomic_inc(&log_root_tree->log_batch); 2829 atomic_inc(&log_root_tree->log_batch);
@@ -4757,7 +4757,8 @@ again:
4757 if (ret < 0) { 4757 if (ret < 0) {
4758 err = ret; 4758 err = ret;
4759 goto out_unlock; 4759 goto out_unlock;
4760 } else if (ret > 0) { 4760 } else if (ret > 0 && ctx &&
4761 other_ino != btrfs_ino(ctx->inode)) {
4761 struct btrfs_key inode_key; 4762 struct btrfs_key inode_key;
4762 struct inode *other_inode; 4763 struct inode *other_inode;
4763 4764
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index a9f1b75d080d..ab858e31ccbc 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -30,15 +30,18 @@ struct btrfs_log_ctx {
30 int log_transid; 30 int log_transid;
31 int io_err; 31 int io_err;
32 bool log_new_dentries; 32 bool log_new_dentries;
33 struct inode *inode;
33 struct list_head list; 34 struct list_head list;
34}; 35};
35 36
36static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) 37static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
38 struct inode *inode)
37{ 39{
38 ctx->log_ret = 0; 40 ctx->log_ret = 0;
39 ctx->log_transid = 0; 41 ctx->log_transid = 0;
40 ctx->io_err = 0; 42 ctx->io_err = 0;
41 ctx->log_new_dentries = false; 43 ctx->log_new_dentries = false;
44 ctx->inode = inode;
42 INIT_LIST_HEAD(&ctx->list); 45 INIT_LIST_HEAD(&ctx->list);
43} 46}
44 47