diff options
author | Akinobu Mita <akinobu.mita@gmail.com> | 2015-09-26 13:09:25 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2015-09-29 13:32:51 -0400 |
commit | 60de074ba1e8f327db19bc33d8530131ac01695d (patch) | |
tree | b23080dbac15be404a4c47da9d83ec7b83cbd3f7 /block/blk-mq.c | |
parent | 5778322e67ed34dc9f391a4a5cbcbb856071ceba (diff) |
blk-mq: fix deadlock when reading cpu_list
CPU hotplug handling for blk-mq (blk_mq_queue_reinit) acquires
all_q_mutex in blk_mq_queue_reinit_notify() and then removes sysfs
entries by blk_mq_sysfs_unregister(). Removing sysfs entry needs to
be blocked until the active reference of the kernfs_node to be zero.
On the other hand, reading blk_mq_hw_sysfs_cpu sysfs entry (e.g.
/sys/block/nullb0/mq/0/cpu_list) acquires all_q_mutex in
blk_mq_hw_sysfs_cpus_show().
If these happen at the same time, a deadlock can happen. Because one
can wait for the active reference to be zero with holding all_q_mutex,
and the other tries to acquire all_q_mutex with holding the active
reference.
The reason that all_q_mutex is acquired in blk_mq_hw_sysfs_cpus_show()
is to avoid reading an imcomplete hctx->cpumask. Since reading sysfs
entry for blk-mq needs to acquire q->sysfs_lock, we can avoid deadlock
and reading an imcomplete hctx->cpumask by protecting q->sysfs_lock
while hctx->cpumask is being updated.
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Reviewed-by: Ming Lei <tom.leiming@gmail.com>
Cc: Ming Lei <tom.leiming@gmail.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index a5dbd069c9da..31c0c6259c4c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -1797,6 +1797,11 @@ static void blk_mq_map_swqueue(struct request_queue *q, | |||
1797 | struct blk_mq_ctx *ctx; | 1797 | struct blk_mq_ctx *ctx; |
1798 | struct blk_mq_tag_set *set = q->tag_set; | 1798 | struct blk_mq_tag_set *set = q->tag_set; |
1799 | 1799 | ||
1800 | /* | ||
1801 | * Avoid others reading imcomplete hctx->cpumask through sysfs | ||
1802 | */ | ||
1803 | mutex_lock(&q->sysfs_lock); | ||
1804 | |||
1800 | queue_for_each_hw_ctx(q, hctx, i) { | 1805 | queue_for_each_hw_ctx(q, hctx, i) { |
1801 | cpumask_clear(hctx->cpumask); | 1806 | cpumask_clear(hctx->cpumask); |
1802 | hctx->nr_ctx = 0; | 1807 | hctx->nr_ctx = 0; |
@@ -1816,6 +1821,8 @@ static void blk_mq_map_swqueue(struct request_queue *q, | |||
1816 | hctx->ctxs[hctx->nr_ctx++] = ctx; | 1821 | hctx->ctxs[hctx->nr_ctx++] = ctx; |
1817 | } | 1822 | } |
1818 | 1823 | ||
1824 | mutex_unlock(&q->sysfs_lock); | ||
1825 | |||
1819 | queue_for_each_hw_ctx(q, hctx, i) { | 1826 | queue_for_each_hw_ctx(q, hctx, i) { |
1820 | struct blk_mq_ctxmap *map = &hctx->ctx_map; | 1827 | struct blk_mq_ctxmap *map = &hctx->ctx_map; |
1821 | 1828 | ||