aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorZhao Lei <zhaolei@cn.fujitsu.com>2015-06-04 08:09:15 -0400
committerChris Mason <clm@fb.com>2015-06-10 10:04:52 -0400
commit20b2e3029eef277cd93a46a991004260057e1a9e (patch)
tree043bc01801f7fc64f9910fd19254bb1d7b95e7be /fs
parente1d227a42ea2b4664f94212bd1106b9a3413ffb8 (diff)
btrfs: Fix lockdep warning of wr_ctx->wr_lock in scrub_free_wr_ctx()
lockdep report following warning in test: [25176.843958] ================================= [25176.844519] [ INFO: inconsistent lock state ] [25176.845047] 4.1.0-rc3 #22 Tainted: G W [25176.845591] --------------------------------- [25176.846153] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [25176.846713] fsstress/26661 [HC0[0]:SC1[1]:HE1:SE0] takes: [25176.847246] (&wr_ctx->wr_lock){+.?...}, at: [<ffffffffa04cdc6d>] scrub_free_ctx+0x2d/0xf0 [btrfs] [25176.847838] {SOFTIRQ-ON-W} state was registered at: [25176.848396] [<ffffffff810bf460>] __lock_acquire+0x6a0/0xe10 [25176.848955] [<ffffffff810bfd1e>] lock_acquire+0xce/0x2c0 [25176.849491] [<ffffffff816489af>] mutex_lock_nested+0x7f/0x410 [25176.850029] [<ffffffffa04d04ff>] scrub_stripe+0x4df/0x1080 [btrfs] [25176.850575] [<ffffffffa04d11b1>] scrub_chunk.isra.19+0x111/0x130 [btrfs] [25176.851110] [<ffffffffa04d144c>] scrub_enumerate_chunks+0x27c/0x510 [btrfs] [25176.851660] [<ffffffffa04d3b87>] btrfs_scrub_dev+0x1c7/0x6c0 [btrfs] [25176.852189] [<ffffffffa04e918e>] btrfs_dev_replace_start+0x36e/0x450 [btrfs] [25176.852771] [<ffffffffa04a98e0>] btrfs_ioctl+0x1e10/0x2d20 [btrfs] [25176.853315] [<ffffffff8121c5b8>] do_vfs_ioctl+0x318/0x570 [25176.853868] [<ffffffff8121c851>] SyS_ioctl+0x41/0x80 [25176.854406] [<ffffffff8164da17>] system_call_fastpath+0x12/0x6f [25176.854935] irq event stamp: 51506 [25176.855511] hardirqs last enabled at (51506): [<ffffffff810d4ce5>] vprintk_emit+0x225/0x5e0 [25176.856059] hardirqs last disabled at (51505): [<ffffffff810d4b77>] vprintk_emit+0xb7/0x5e0 [25176.856642] softirqs last enabled at (50886): [<ffffffff81067a23>] __do_softirq+0x363/0x640 [25176.857184] softirqs last disabled at (50949): [<ffffffff8106804d>] irq_exit+0x10d/0x120 [25176.857746] other info that might help us debug this: [25176.858845] Possible unsafe locking scenario: [25176.859981] CPU0 [25176.860537] ---- [25176.861059] lock(&wr_ctx->wr_lock); [25176.861705] <Interrupt> [25176.862272] lock(&wr_ctx->wr_lock); [25176.862881] *** DEADLOCK *** Reason: Above warning is caused by: Interrupt -> bio_endio() -> ... -> scrub_put_ctx() -> scrub_free_ctx() *1 -> ... -> mutex_lock(&wr_ctx->wr_lock); scrub_put_ctx() is allowed to be called in end_bio interrupt, but in code design, it will never call scrub_free_ctx(sctx) in interrupe context(above *1), because btrfs_scrub_dev() get one additional reference of sctx->refs, which makes scrub_free_ctx() only called withine btrfs_scrub_dev(). Now the code runs out of our wish, because free sequence in scrub_pending_bio_dec() have a gap. Current code: -----------------------------------+----------------------------------- scrub_pending_bio_dec() | btrfs_scrub_dev -----------------------------------+----------------------------------- atomic_dec(&sctx->bios_in_flight); | wake_up(&sctx->list_wait); | | scrub_put_ctx() | -> atomic_dec_and_test(&sctx->refs) scrub_put_ctx(sctx); | -> atomic_dec_and_test(&sctx->refs)| -> scrub_free_ctx() | -----------------------------------+----------------------------------- We expected: -----------------------------------+----------------------------------- scrub_pending_bio_dec() | btrfs_scrub_dev -----------------------------------+----------------------------------- atomic_dec(&sctx->bios_in_flight); | wake_up(&sctx->list_wait); | scrub_put_ctx(sctx); | -> atomic_dec_and_test(&sctx->refs)| | scrub_put_ctx() | -> atomic_dec_and_test(&sctx->refs) | -> scrub_free_ctx() -----------------------------------+----------------------------------- Fix: Move scrub_pending_bio_dec() to a workqueue, to avoid this function run in interrupt context. Tested by check tracelog in debug. Changelog v1->v2: Use workqueue instead of adjust function call sequence in v1, because v1 will introduce a bug pointed out by: Filipe David Manana <fdmanana@gmail.com> Reported-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com> Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/async-thread.c1
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/scrub.c26
4 files changed, 27 insertions, 3 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index df9932b00d08..1ce06c849a86 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -85,6 +85,7 @@ BTRFS_WORK_HELPER(extent_refs_helper);
85BTRFS_WORK_HELPER(scrub_helper); 85BTRFS_WORK_HELPER(scrub_helper);
86BTRFS_WORK_HELPER(scrubwrc_helper); 86BTRFS_WORK_HELPER(scrubwrc_helper);
87BTRFS_WORK_HELPER(scrubnc_helper); 87BTRFS_WORK_HELPER(scrubnc_helper);
88BTRFS_WORK_HELPER(scrubparity_helper);
88 89
89static struct __btrfs_workqueue * 90static struct __btrfs_workqueue *
90__btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active, 91__btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active,
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index ec2ee477f8ba..b0b093b6afec 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -64,6 +64,8 @@ BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
64BTRFS_WORK_HELPER_PROTO(scrub_helper); 64BTRFS_WORK_HELPER_PROTO(scrub_helper);
65BTRFS_WORK_HELPER_PROTO(scrubwrc_helper); 65BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
66BTRFS_WORK_HELPER_PROTO(scrubnc_helper); 66BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
67BTRFS_WORK_HELPER_PROTO(scrubparity_helper);
68
67 69
68struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, 70struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
69 unsigned int flags, 71 unsigned int flags,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5e09834ac2ef..881549a35fca 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1698,6 +1698,7 @@ struct btrfs_fs_info {
1698 struct btrfs_workqueue *scrub_workers; 1698 struct btrfs_workqueue *scrub_workers;
1699 struct btrfs_workqueue *scrub_wr_completion_workers; 1699 struct btrfs_workqueue *scrub_wr_completion_workers;
1700 struct btrfs_workqueue *scrub_nocow_workers; 1700 struct btrfs_workqueue *scrub_nocow_workers;
1701 struct btrfs_workqueue *scrub_parity_workers;
1701 1702
1702#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 1703#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1703 u32 check_integrity_print_mask; 1704 u32 check_integrity_print_mask;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ab5811545a98..9f2feabe99f2 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2662,18 +2662,30 @@ static void scrub_free_parity(struct scrub_parity *sparity)
2662 kfree(sparity); 2662 kfree(sparity);
2663} 2663}
2664 2664
2665static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
2666{
2667 struct scrub_parity *sparity = container_of(work, struct scrub_parity,
2668 work);
2669 struct scrub_ctx *sctx = sparity->sctx;
2670
2671 scrub_free_parity(sparity);
2672 scrub_pending_bio_dec(sctx);
2673}
2674
2665static void scrub_parity_bio_endio(struct bio *bio, int error) 2675static void scrub_parity_bio_endio(struct bio *bio, int error)
2666{ 2676{
2667 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private; 2677 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2668 struct scrub_ctx *sctx = sparity->sctx;
2669 2678
2670 if (error) 2679 if (error)
2671 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, 2680 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2672 sparity->nsectors); 2681 sparity->nsectors);
2673 2682
2674 scrub_free_parity(sparity);
2675 scrub_pending_bio_dec(sctx);
2676 bio_put(bio); 2683 bio_put(bio);
2684
2685 btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
2686 scrub_parity_bio_endio_worker, NULL, NULL);
2687 btrfs_queue_work(sparity->sctx->dev_root->fs_info->scrub_parity_workers,
2688 &sparity->work);
2677} 2689}
2678 2690
2679static void scrub_parity_check_and_repair(struct scrub_parity *sparity) 2691static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
@@ -3589,6 +3601,13 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3589 ret = -ENOMEM; 3601 ret = -ENOMEM;
3590 goto out; 3602 goto out;
3591 } 3603 }
3604 fs_info->scrub_parity_workers =
3605 btrfs_alloc_workqueue("btrfs-scrubparity", flags,
3606 max_active, 2);
3607 if (!fs_info->scrub_parity_workers) {
3608 ret = -ENOMEM;
3609 goto out;
3610 }
3592 } 3611 }
3593 ++fs_info->scrub_workers_refcnt; 3612 ++fs_info->scrub_workers_refcnt;
3594out: 3613out:
@@ -3601,6 +3620,7 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
3601 btrfs_destroy_workqueue(fs_info->scrub_workers); 3620 btrfs_destroy_workqueue(fs_info->scrub_workers);
3602 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); 3621 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
3603 btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); 3622 btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
3623 btrfs_destroy_workqueue(fs_info->scrub_parity_workers);
3604 } 3624 }
3605 WARN_ON(fs_info->scrub_workers_refcnt < 0); 3625 WARN_ON(fs_info->scrub_workers_refcnt < 0);
3606} 3626}