diff options
| author | Bart Van Assche <bart.vanassche@wdc.com> | 2018-02-28 13:15:33 -0500 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2018-02-28 14:23:35 -0500 |
| commit | a063057d7c731cffa7d10740e8ebc2970df8dbb3 (patch) | |
| tree | 40895bc044e82ff993d698128c5b24f6d0c82c0a /block | |
| parent | 498f6650aec864e331cae7575fec5f07781d0bf3 (diff) | |
block: Fix a race between request queue removal and the block cgroup controller
Avoid that the following race can occur:
blk_cleanup_queue() blkcg_print_blkgs()
spin_lock_irq(lock) (1) spin_lock_irq(blkg->q->queue_lock) (2,5)
q->queue_lock = &q->__queue_lock (3)
spin_unlock_irq(lock) (4)
spin_unlock_irq(blkg->q->queue_lock) (6)
(1) take driver lock;
(2) busy loop for driver lock;
(3) override driver lock with internal lock;
(4) unlock driver lock;
(5) can take driver lock now;
(6) but unlock internal lock.
This change is safe because only the SCSI core and the NVME core keep
a reference on a request queue after having called blk_cleanup_queue().
Neither driver accesses any of the removed data structures between its
blk_cleanup_queue() and blk_put_queue() calls.
Reported-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Jan Kara <jack@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
| -rw-r--r-- | block/blk-core.c | 31 | ||||
| -rw-r--r-- | block/blk-sysfs.c | 7 |
2 files changed, 31 insertions, 7 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 41c74b37be85..6febc69a58aa 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
| @@ -719,6 +719,37 @@ void blk_cleanup_queue(struct request_queue *q) | |||
| 719 | del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer); | 719 | del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer); |
| 720 | blk_sync_queue(q); | 720 | blk_sync_queue(q); |
| 721 | 721 | ||
| 722 | /* | ||
| 723 | * I/O scheduler exit is only safe after the sysfs scheduler attribute | ||
| 724 | * has been removed. | ||
| 725 | */ | ||
| 726 | WARN_ON_ONCE(q->kobj.state_in_sysfs); | ||
| 727 | |||
| 728 | /* | ||
| 729 | * Since the I/O scheduler exit code may access cgroup information, | ||
| 730 | * perform I/O scheduler exit before disassociating from the block | ||
| 731 | * cgroup controller. | ||
| 732 | */ | ||
| 733 | if (q->elevator) { | ||
| 734 | ioc_clear_queue(q); | ||
| 735 | elevator_exit(q, q->elevator); | ||
| 736 | q->elevator = NULL; | ||
| 737 | } | ||
| 738 | |||
| 739 | /* | ||
| 740 | * Remove all references to @q from the block cgroup controller before | ||
| 741 | * restoring @q->queue_lock to avoid that restoring this pointer causes | ||
| 742 | * e.g. blkcg_print_blkgs() to crash. | ||
| 743 | */ | ||
| 744 | blkcg_exit_queue(q); | ||
| 745 | |||
| 746 | /* | ||
| 747 | * Since the cgroup code may dereference the @q->backing_dev_info | ||
| 748 | * pointer, only decrease its reference count after having removed the | ||
| 749 | * association with the block cgroup controller. | ||
| 750 | */ | ||
| 751 | bdi_put(q->backing_dev_info); | ||
| 752 | |||
| 722 | if (q->mq_ops) | 753 | if (q->mq_ops) |
| 723 | blk_mq_free_queue(q); | 754 | blk_mq_free_queue(q); |
| 724 | percpu_ref_exit(&q->q_usage_counter); | 755 | percpu_ref_exit(&q->q_usage_counter); |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index cbea895a5547..fd71a00c9462 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
| @@ -798,13 +798,6 @@ static void __blk_release_queue(struct work_struct *work) | |||
| 798 | if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags)) | 798 | if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags)) |
| 799 | blk_stat_remove_callback(q, q->poll_cb); | 799 | blk_stat_remove_callback(q, q->poll_cb); |
| 800 | blk_stat_free_callback(q->poll_cb); | 800 | blk_stat_free_callback(q->poll_cb); |
| 801 | bdi_put(q->backing_dev_info); | ||
| 802 | blkcg_exit_queue(q); | ||
| 803 | |||
| 804 | if (q->elevator) { | ||
| 805 | ioc_clear_queue(q); | ||
| 806 | elevator_exit(q, q->elevator); | ||
| 807 | } | ||
| 808 | 801 | ||
| 809 | blk_free_queue_stats(q->stats); | 802 | blk_free_queue_stats(q->stats); |
| 810 | 803 | ||
