diff options
author | Bart Van Assche <bart.vanassche@sandisk.com> | 2016-11-02 12:09:51 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2016-11-02 14:50:19 -0400 |
commit | 6a83e74d214a47a1371cd2e6a783264fcba7d428 (patch) | |
tree | 871df14894af953b8486575a2803f038c8f33c4a /block/blk-mq.c | |
parent | 9b7dd572cc439fa92e120290eb74d0295567c5a0 (diff) |
blk-mq: Introduce blk_mq_quiesce_queue()
blk_mq_quiesce_queue() waits until ongoing .queue_rq() invocations
have finished. This function does *not* wait until all outstanding
requests have finished (this means invocation of request.end_io()).
The algorithm used by blk_mq_quiesce_queue() is as follows:
* Hold either an RCU read lock or an SRCU read lock around
.queue_rq() calls. The former is used if .queue_rq() does not
block and the latter if .queue_rq() may block.
* blk_mq_quiesce_queue() first calls blk_mq_stop_hw_queues()
followed by synchronize_srcu() or synchronize_rcu(). The latter
call waits for .queue_rq() invocations that started before
blk_mq_quiesce_queue() was called.
* The blk_mq_hctx_stopped() calls that control whether or not
.queue_rq() will be called are called with the (S)RCU read lock
held. This is necessary to avoid race conditions against
blk_mq_quiesce_queue().
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Ming Lei <tom.leiming@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 71 |
1 files changed, 64 insertions, 7 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index a461823644fb..3dc323543293 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -115,6 +115,33 @@ void blk_mq_unfreeze_queue(struct request_queue *q) | |||
115 | } | 115 | } |
116 | EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); | 116 | EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); |
117 | 117 | ||
118 | /** | ||
119 | * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished | ||
120 | * @q: request queue. | ||
121 | * | ||
122 | * Note: this function does not prevent that the struct request end_io() | ||
123 | * callback function is invoked. Additionally, it is not prevented that | ||
124 | * new queue_rq() calls occur unless the queue has been stopped first. | ||
125 | */ | ||
126 | void blk_mq_quiesce_queue(struct request_queue *q) | ||
127 | { | ||
128 | struct blk_mq_hw_ctx *hctx; | ||
129 | unsigned int i; | ||
130 | bool rcu = false; | ||
131 | |||
132 | blk_mq_stop_hw_queues(q); | ||
133 | |||
134 | queue_for_each_hw_ctx(q, hctx, i) { | ||
135 | if (hctx->flags & BLK_MQ_F_BLOCKING) | ||
136 | synchronize_srcu(&hctx->queue_rq_srcu); | ||
137 | else | ||
138 | rcu = true; | ||
139 | } | ||
140 | if (rcu) | ||
141 | synchronize_rcu(); | ||
142 | } | ||
143 | EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue); | ||
144 | |||
118 | void blk_mq_wake_waiters(struct request_queue *q) | 145 | void blk_mq_wake_waiters(struct request_queue *q) |
119 | { | 146 | { |
120 | struct blk_mq_hw_ctx *hctx; | 147 | struct blk_mq_hw_ctx *hctx; |
@@ -766,7 +793,7 @@ static inline unsigned int queued_to_index(unsigned int queued) | |||
766 | * of IO. In particular, we'd like FIFO behaviour on handling existing | 793 | * of IO. In particular, we'd like FIFO behaviour on handling existing |
767 | * items on the hctx->dispatch list. Ignore that for now. | 794 | * items on the hctx->dispatch list. Ignore that for now. |
768 | */ | 795 | */ |
769 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | 796 | static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx) |
770 | { | 797 | { |
771 | struct request_queue *q = hctx->queue; | 798 | struct request_queue *q = hctx->queue; |
772 | struct request *rq; | 799 | struct request *rq; |
@@ -778,9 +805,6 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
778 | if (unlikely(blk_mq_hctx_stopped(hctx))) | 805 | if (unlikely(blk_mq_hctx_stopped(hctx))) |
779 | return; | 806 | return; |
780 | 807 | ||
781 | WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) && | ||
782 | cpu_online(hctx->next_cpu)); | ||
783 | |||
784 | hctx->run++; | 808 | hctx->run++; |
785 | 809 | ||
786 | /* | 810 | /* |
@@ -871,6 +895,24 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
871 | } | 895 | } |
872 | } | 896 | } |
873 | 897 | ||
898 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | ||
899 | { | ||
900 | int srcu_idx; | ||
901 | |||
902 | WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) && | ||
903 | cpu_online(hctx->next_cpu)); | ||
904 | |||
905 | if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { | ||
906 | rcu_read_lock(); | ||
907 | blk_mq_process_rq_list(hctx); | ||
908 | rcu_read_unlock(); | ||
909 | } else { | ||
910 | srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); | ||
911 | blk_mq_process_rq_list(hctx); | ||
912 | srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); | ||
913 | } | ||
914 | } | ||
915 | |||
874 | /* | 916 | /* |
875 | * It'd be great if the workqueue API had a way to pass | 917 | * It'd be great if the workqueue API had a way to pass |
876 | * in a mask and had some smarts for more clever placement. | 918 | * in a mask and had some smarts for more clever placement. |
@@ -1268,7 +1310,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1268 | const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); | 1310 | const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); |
1269 | struct blk_mq_alloc_data data; | 1311 | struct blk_mq_alloc_data data; |
1270 | struct request *rq; | 1312 | struct request *rq; |
1271 | unsigned int request_count = 0; | 1313 | unsigned int request_count = 0, srcu_idx; |
1272 | struct blk_plug *plug; | 1314 | struct blk_plug *plug; |
1273 | struct request *same_queue_rq = NULL; | 1315 | struct request *same_queue_rq = NULL; |
1274 | blk_qc_t cookie; | 1316 | blk_qc_t cookie; |
@@ -1311,7 +1353,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1311 | blk_mq_bio_to_request(rq, bio); | 1353 | blk_mq_bio_to_request(rq, bio); |
1312 | 1354 | ||
1313 | /* | 1355 | /* |
1314 | * We do limited pluging. If the bio can be merged, do that. | 1356 | * We do limited plugging. If the bio can be merged, do that. |
1315 | * Otherwise the existing request in the plug list will be | 1357 | * Otherwise the existing request in the plug list will be |
1316 | * issued. So the plug list will have one request at most | 1358 | * issued. So the plug list will have one request at most |
1317 | */ | 1359 | */ |
@@ -1331,7 +1373,16 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1331 | blk_mq_put_ctx(data.ctx); | 1373 | blk_mq_put_ctx(data.ctx); |
1332 | if (!old_rq) | 1374 | if (!old_rq) |
1333 | goto done; | 1375 | goto done; |
1334 | blk_mq_try_issue_directly(data.hctx, old_rq, &cookie); | 1376 | |
1377 | if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) { | ||
1378 | rcu_read_lock(); | ||
1379 | blk_mq_try_issue_directly(data.hctx, old_rq, &cookie); | ||
1380 | rcu_read_unlock(); | ||
1381 | } else { | ||
1382 | srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu); | ||
1383 | blk_mq_try_issue_directly(data.hctx, old_rq, &cookie); | ||
1384 | srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx); | ||
1385 | } | ||
1335 | goto done; | 1386 | goto done; |
1336 | } | 1387 | } |
1337 | 1388 | ||
@@ -1610,6 +1661,9 @@ static void blk_mq_exit_hctx(struct request_queue *q, | |||
1610 | if (set->ops->exit_hctx) | 1661 | if (set->ops->exit_hctx) |
1611 | set->ops->exit_hctx(hctx, hctx_idx); | 1662 | set->ops->exit_hctx(hctx, hctx_idx); |
1612 | 1663 | ||
1664 | if (hctx->flags & BLK_MQ_F_BLOCKING) | ||
1665 | cleanup_srcu_struct(&hctx->queue_rq_srcu); | ||
1666 | |||
1613 | blk_mq_remove_cpuhp(hctx); | 1667 | blk_mq_remove_cpuhp(hctx); |
1614 | blk_free_flush_queue(hctx->fq); | 1668 | blk_free_flush_queue(hctx->fq); |
1615 | sbitmap_free(&hctx->ctx_map); | 1669 | sbitmap_free(&hctx->ctx_map); |
@@ -1690,6 +1744,9 @@ static int blk_mq_init_hctx(struct request_queue *q, | |||
1690 | flush_start_tag + hctx_idx, node)) | 1744 | flush_start_tag + hctx_idx, node)) |
1691 | goto free_fq; | 1745 | goto free_fq; |
1692 | 1746 | ||
1747 | if (hctx->flags & BLK_MQ_F_BLOCKING) | ||
1748 | init_srcu_struct(&hctx->queue_rq_srcu); | ||
1749 | |||
1693 | return 0; | 1750 | return 0; |
1694 | 1751 | ||
1695 | free_fq: | 1752 | free_fq: |