aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorAsias He <asias@redhat.com>2012-05-24 11:28:52 -0400
committerJens Axboe <axboe@kernel.dk>2012-06-15 02:46:22 -0400
commit5e5cfac0c622d42eff4fa308e91b3c9c1884b4f0 (patch)
tree6246c81d97357fffaa9bcb33fef53a5adef2e829 /block
parent458f27a9823a0841acb4ca59e0e7f33e181f85e2 (diff)
block: Mitigate lock unbalance caused by lock switching
Commit 777eb1bf15b8532c396821774bf6451e563438f5 disconnects externally supplied queue_lock before blk_drain_queue(). Switching the lock would introduce lock unbalance because theads which have taken the external lock might unlock the internal lock in the during the queue drain. This patch mitigate this by disconnecting the lock after the queue draining since queue draining makes a lot of request_queue users go away. However, please note, this patch only makes the problem less likely to happen. Anyone who still holds a ref might try to issue a new request on a dead queue after the blk_cleanup_queue() finishes draining, the lock unbalance might still happen in this case. ===================================== [ BUG: bad unlock balance detected! ] 3.4.0+ #288 Not tainted ------------------------------------- fio/17706 is trying to release lock (&(&q->__queue_lock)->rlock) at: [<ffffffff81329372>] blk_queue_bio+0x2a2/0x380 but there are no more locks to release! other info that might help us debug this: 1 lock held by fio/17706: #0: (&(&vblk->lock)->rlock){......}, at: [<ffffffff81327f1a>] get_request_wait+0x19a/0x250 stack backtrace: Pid: 17706, comm: fio Not tainted 3.4.0+ #288 Call Trace: [<ffffffff81329372>] ? blk_queue_bio+0x2a2/0x380 [<ffffffff810dea49>] print_unlock_inbalance_bug+0xf9/0x100 [<ffffffff810dfe4f>] lock_release_non_nested+0x1df/0x330 [<ffffffff811dae24>] ? dio_bio_end_aio+0x34/0xc0 [<ffffffff811d6935>] ? bio_check_pages_dirty+0x85/0xe0 [<ffffffff811daea1>] ? dio_bio_end_aio+0xb1/0xc0 [<ffffffff81329372>] ? blk_queue_bio+0x2a2/0x380 [<ffffffff81329372>] ? blk_queue_bio+0x2a2/0x380 [<ffffffff810e0079>] lock_release+0xd9/0x250 [<ffffffff81a74553>] _raw_spin_unlock_irq+0x23/0x40 [<ffffffff81329372>] blk_queue_bio+0x2a2/0x380 [<ffffffff81328faa>] generic_make_request+0xca/0x100 [<ffffffff81329056>] submit_bio+0x76/0xf0 [<ffffffff8115470c>] ? set_page_dirty_lock+0x3c/0x60 [<ffffffff811d69e1>] ? bio_set_pages_dirty+0x51/0x70 [<ffffffff811dd1a8>] do_blockdev_direct_IO+0xbf8/0xee0 [<ffffffff811d8620>] ? blkdev_get_block+0x80/0x80 [<ffffffff811dd4e5>] __blockdev_direct_IO+0x55/0x60 [<ffffffff811d8620>] ? blkdev_get_block+0x80/0x80 [<ffffffff811d92e7>] blkdev_direct_IO+0x57/0x60 [<ffffffff811d8620>] ? blkdev_get_block+0x80/0x80 [<ffffffff8114c6ae>] generic_file_aio_read+0x70e/0x760 [<ffffffff810df7c5>] ? __lock_acquire+0x215/0x5a0 [<ffffffff811e9924>] ? aio_run_iocb+0x54/0x1a0 [<ffffffff8114bfa0>] ? grab_cache_page_nowait+0xc0/0xc0 [<ffffffff811e82cc>] aio_rw_vect_retry+0x7c/0x1e0 [<ffffffff811e8250>] ? aio_fsync+0x30/0x30 [<ffffffff811e9936>] aio_run_iocb+0x66/0x1a0 [<ffffffff811ea9b0>] do_io_submit+0x6f0/0xb80 [<ffffffff8134de2e>] ? trace_hardirqs_on_thunk+0x3a/0x3f [<ffffffff811eae50>] sys_io_submit+0x10/0x20 [<ffffffff81a7c9e9>] system_call_fastpath+0x16/0x1b Changes since v2: Update commit log to explain how the code is still broken even if we delay the lock switching after the drain. Changes since v1: Update commit log as Tejun suggested. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Asias He <asias@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c10
1 files changed, 5 insertions, 5 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index ce7fbf8d85a6..93eb3e4f88ce 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -480,7 +480,6 @@ void blk_cleanup_queue(struct request_queue *q)
480 /* mark @q DEAD, no new request or merges will be allowed afterwards */ 480 /* mark @q DEAD, no new request or merges will be allowed afterwards */
481 mutex_lock(&q->sysfs_lock); 481 mutex_lock(&q->sysfs_lock);
482 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); 482 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
483
484 spin_lock_irq(lock); 483 spin_lock_irq(lock);
485 484
486 /* 485 /*
@@ -498,10 +497,6 @@ void blk_cleanup_queue(struct request_queue *q)
498 queue_flag_set(QUEUE_FLAG_NOMERGES, q); 497 queue_flag_set(QUEUE_FLAG_NOMERGES, q);
499 queue_flag_set(QUEUE_FLAG_NOXMERGES, q); 498 queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
500 queue_flag_set(QUEUE_FLAG_DEAD, q); 499 queue_flag_set(QUEUE_FLAG_DEAD, q);
501
502 if (q->queue_lock != &q->__queue_lock)
503 q->queue_lock = &q->__queue_lock;
504
505 spin_unlock_irq(lock); 500 spin_unlock_irq(lock);
506 mutex_unlock(&q->sysfs_lock); 501 mutex_unlock(&q->sysfs_lock);
507 502
@@ -512,6 +507,11 @@ void blk_cleanup_queue(struct request_queue *q)
512 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); 507 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
513 blk_sync_queue(q); 508 blk_sync_queue(q);
514 509
510 spin_lock_irq(lock);
511 if (q->queue_lock != &q->__queue_lock)
512 q->queue_lock = &q->__queue_lock;
513 spin_unlock_irq(lock);
514
515 /* @q is and will stay empty, shutdown and put */ 515 /* @q is and will stay empty, shutdown and put */
516 blk_put_queue(q); 516 blk_put_queue(q);
517} 517}