diff options
| author | Bart Van Assche <bvanassche@acm.org> | 2014-12-09 10:57:48 -0500 |
|---|---|---|
| committer | Jens Axboe <axboe@fb.com> | 2014-12-09 11:07:13 -0500 |
| commit | 45a9c9d909b24c6ad0e28a7946e7486e73010319 (patch) | |
| tree | 7317f2beee89119a5e2906dea3c6601e11ae1d61 | |
| parent | 19c66e59ce57e7b181625cbb408d48eb10837763 (diff) | |
blk-mq: Fix a use-after-free
blk-mq users are allowed to free the memory request_queue.tag_set
points at after blk_cleanup_queue() has finished but before
blk_release_queue() has started. This can happen e.g. in the SCSI
core. The SCSI core namely embeds the tag_set structure in a SCSI
host structure. The SCSI host structure is freed by
scsi_host_dev_release(). This function is called after
blk_cleanup_queue() finished but can be called before
blk_release_queue().
This means that it is not safe to access request_queue.tag_set from
inside blk_release_queue(). Hence remove the blk_sync_queue() call
from blk_release_queue(). This call is not necessary - outstanding
requests must have finished before blk_release_queue() is
called. Additionally, move the blk_mq_free_queue() call from
blk_release_queue() to blk_cleanup_queue() to avoid that struct
request_queue.tag_set gets accessed after it has been freed.
This patch avoids that the following kernel oops can be triggered
when deleting a SCSI host for which scsi-mq was enabled:
Call Trace:
[<ffffffff8109a7c4>] lock_acquire+0xc4/0x270
[<ffffffff814ce111>] mutex_lock_nested+0x61/0x380
[<ffffffff812575f0>] blk_mq_free_queue+0x30/0x180
[<ffffffff8124d654>] blk_release_queue+0x84/0xd0
[<ffffffff8126c29b>] kobject_cleanup+0x7b/0x1a0
[<ffffffff8126c140>] kobject_put+0x30/0x70
[<ffffffff81245895>] blk_put_queue+0x15/0x20
[<ffffffff8125c409>] disk_release+0x99/0xd0
[<ffffffff8133d056>] device_release+0x36/0xb0
[<ffffffff8126c29b>] kobject_cleanup+0x7b/0x1a0
[<ffffffff8126c140>] kobject_put+0x30/0x70
[<ffffffff8125a78a>] put_disk+0x1a/0x20
[<ffffffff811d4cb5>] __blkdev_put+0x135/0x1b0
[<ffffffff811d56a0>] blkdev_put+0x50/0x160
[<ffffffff81199eb4>] kill_block_super+0x44/0x70
[<ffffffff8119a2a4>] deactivate_locked_super+0x44/0x60
[<ffffffff8119a87e>] deactivate_super+0x4e/0x70
[<ffffffff811b9833>] cleanup_mnt+0x43/0x90
[<ffffffff811b98d2>] __cleanup_mnt+0x12/0x20
[<ffffffff8107252c>] task_work_run+0xac/0xe0
[<ffffffff81002c01>] do_notify_resume+0x61/0xa0
[<ffffffff814d2c58>] int_signal+0x12/0x17
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Robert Elliott <elliott@hp.com>
Cc: Ming Lei <ming.lei@canonical.com>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: <stable@vger.kernel.org> # v3.13+
Signed-off-by: Jens Axboe <axboe@fb.com>
| -rw-r--r-- | block/blk-core.c | 3 | ||||
| -rw-r--r-- | block/blk-sysfs.c | 12 |
2 files changed, 7 insertions, 8 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 0421b53e6431..93f9152fc271 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
| @@ -525,6 +525,9 @@ void blk_cleanup_queue(struct request_queue *q) | |||
| 525 | del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); | 525 | del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); |
| 526 | blk_sync_queue(q); | 526 | blk_sync_queue(q); |
| 527 | 527 | ||
| 528 | if (q->mq_ops) | ||
| 529 | blk_mq_free_queue(q); | ||
| 530 | |||
| 528 | spin_lock_irq(lock); | 531 | spin_lock_irq(lock); |
| 529 | if (q->queue_lock != &q->__queue_lock) | 532 | if (q->queue_lock != &q->__queue_lock) |
| 530 | q->queue_lock = &q->__queue_lock; | 533 | q->queue_lock = &q->__queue_lock; |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 1fac43408911..935ea2aa0730 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
| @@ -492,17 +492,15 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head) | |||
| 492 | * Currently, its primary task it to free all the &struct request | 492 | * Currently, its primary task it to free all the &struct request |
| 493 | * structures that were allocated to the queue and the queue itself. | 493 | * structures that were allocated to the queue and the queue itself. |
| 494 | * | 494 | * |
| 495 | * Caveat: | 495 | * Note: |
| 496 | * Hopefully the low level driver will have finished any | 496 | * The low level driver must have finished any outstanding requests first |
| 497 | * outstanding requests first... | 497 | * via blk_cleanup_queue(). |
| 498 | **/ | 498 | **/ |
| 499 | static void blk_release_queue(struct kobject *kobj) | 499 | static void blk_release_queue(struct kobject *kobj) |
| 500 | { | 500 | { |
| 501 | struct request_queue *q = | 501 | struct request_queue *q = |
| 502 | container_of(kobj, struct request_queue, kobj); | 502 | container_of(kobj, struct request_queue, kobj); |
| 503 | 503 | ||
| 504 | blk_sync_queue(q); | ||
| 505 | |||
| 506 | blkcg_exit_queue(q); | 504 | blkcg_exit_queue(q); |
| 507 | 505 | ||
| 508 | if (q->elevator) { | 506 | if (q->elevator) { |
| @@ -517,9 +515,7 @@ static void blk_release_queue(struct kobject *kobj) | |||
| 517 | if (q->queue_tags) | 515 | if (q->queue_tags) |
| 518 | __blk_queue_free_tags(q); | 516 | __blk_queue_free_tags(q); |
| 519 | 517 | ||
| 520 | if (q->mq_ops) | 518 | if (!q->mq_ops) |
| 521 | blk_mq_free_queue(q); | ||
| 522 | else | ||
| 523 | blk_free_flush_queue(q->fq); | 519 | blk_free_flush_queue(q->fq); |
| 524 | 520 | ||
| 525 | blk_trace_shutdown(q); | 521 | blk_trace_shutdown(q); |
