summaryrefslogtreecommitdiffstats
path: root/block/blk-mq.c
diff options
context:
space:
mode:
authorBart Van Assche <bart.vanassche@sandisk.com>2016-11-02 12:09:51 -0400
committerJens Axboe <axboe@fb.com>2016-11-02 14:50:19 -0400
commit6a83e74d214a47a1371cd2e6a783264fcba7d428 (patch)
tree871df14894af953b8486575a2803f038c8f33c4a /block/blk-mq.c
parent9b7dd572cc439fa92e120290eb74d0295567c5a0 (diff)
blk-mq: Introduce blk_mq_quiesce_queue()
blk_mq_quiesce_queue() waits until ongoing .queue_rq() invocations have finished. This function does *not* wait until all outstanding requests have finished (this means invocation of request.end_io()). The algorithm used by blk_mq_quiesce_queue() is as follows: * Hold either an RCU read lock or an SRCU read lock around .queue_rq() calls. The former is used if .queue_rq() does not block and the latter if .queue_rq() may block. * blk_mq_quiesce_queue() first calls blk_mq_stop_hw_queues() followed by synchronize_srcu() or synchronize_rcu(). The latter call waits for .queue_rq() invocations that started before blk_mq_quiesce_queue() was called. * The blk_mq_hctx_stopped() calls that control whether or not .queue_rq() will be called are called with the (S)RCU read lock held. This is necessary to avoid race conditions against blk_mq_quiesce_queue(). Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com> Cc: Hannes Reinecke <hare@suse.com> Cc: Johannes Thumshirn <jthumshirn@suse.de> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Ming Lei <tom.leiming@gmail.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c71
1 files changed, 64 insertions, 7 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a461823644fb..3dc323543293 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -115,6 +115,33 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
115} 115}
116EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); 116EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
117 117
118/**
119 * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
120 * @q: request queue.
121 *
122 * Note: this function does not prevent that the struct request end_io()
123 * callback function is invoked. Additionally, it is not prevented that
124 * new queue_rq() calls occur unless the queue has been stopped first.
125 */
126void blk_mq_quiesce_queue(struct request_queue *q)
127{
128 struct blk_mq_hw_ctx *hctx;
129 unsigned int i;
130 bool rcu = false;
131
132 blk_mq_stop_hw_queues(q);
133
134 queue_for_each_hw_ctx(q, hctx, i) {
135 if (hctx->flags & BLK_MQ_F_BLOCKING)
136 synchronize_srcu(&hctx->queue_rq_srcu);
137 else
138 rcu = true;
139 }
140 if (rcu)
141 synchronize_rcu();
142}
143EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
144
118void blk_mq_wake_waiters(struct request_queue *q) 145void blk_mq_wake_waiters(struct request_queue *q)
119{ 146{
120 struct blk_mq_hw_ctx *hctx; 147 struct blk_mq_hw_ctx *hctx;
@@ -766,7 +793,7 @@ static inline unsigned int queued_to_index(unsigned int queued)
766 * of IO. In particular, we'd like FIFO behaviour on handling existing 793 * of IO. In particular, we'd like FIFO behaviour on handling existing
767 * items on the hctx->dispatch list. Ignore that for now. 794 * items on the hctx->dispatch list. Ignore that for now.
768 */ 795 */
769static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) 796static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx)
770{ 797{
771 struct request_queue *q = hctx->queue; 798 struct request_queue *q = hctx->queue;
772 struct request *rq; 799 struct request *rq;
@@ -778,9 +805,6 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
778 if (unlikely(blk_mq_hctx_stopped(hctx))) 805 if (unlikely(blk_mq_hctx_stopped(hctx)))
779 return; 806 return;
780 807
781 WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
782 cpu_online(hctx->next_cpu));
783
784 hctx->run++; 808 hctx->run++;
785 809
786 /* 810 /*
@@ -871,6 +895,24 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
871 } 895 }
872} 896}
873 897
898static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
899{
900 int srcu_idx;
901
902 WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
903 cpu_online(hctx->next_cpu));
904
905 if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
906 rcu_read_lock();
907 blk_mq_process_rq_list(hctx);
908 rcu_read_unlock();
909 } else {
910 srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
911 blk_mq_process_rq_list(hctx);
912 srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
913 }
914}
915
874/* 916/*
875 * It'd be great if the workqueue API had a way to pass 917 * It'd be great if the workqueue API had a way to pass
876 * in a mask and had some smarts for more clever placement. 918 * in a mask and had some smarts for more clever placement.
@@ -1268,7 +1310,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1268 const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); 1310 const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
1269 struct blk_mq_alloc_data data; 1311 struct blk_mq_alloc_data data;
1270 struct request *rq; 1312 struct request *rq;
1271 unsigned int request_count = 0; 1313 unsigned int request_count = 0, srcu_idx;
1272 struct blk_plug *plug; 1314 struct blk_plug *plug;
1273 struct request *same_queue_rq = NULL; 1315 struct request *same_queue_rq = NULL;
1274 blk_qc_t cookie; 1316 blk_qc_t cookie;
@@ -1311,7 +1353,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1311 blk_mq_bio_to_request(rq, bio); 1353 blk_mq_bio_to_request(rq, bio);
1312 1354
1313 /* 1355 /*
1314 * We do limited pluging. If the bio can be merged, do that. 1356 * We do limited plugging. If the bio can be merged, do that.
1315 * Otherwise the existing request in the plug list will be 1357 * Otherwise the existing request in the plug list will be
1316 * issued. So the plug list will have one request at most 1358 * issued. So the plug list will have one request at most
1317 */ 1359 */
@@ -1331,7 +1373,16 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1331 blk_mq_put_ctx(data.ctx); 1373 blk_mq_put_ctx(data.ctx);
1332 if (!old_rq) 1374 if (!old_rq)
1333 goto done; 1375 goto done;
1334 blk_mq_try_issue_directly(data.hctx, old_rq, &cookie); 1376
1377 if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) {
1378 rcu_read_lock();
1379 blk_mq_try_issue_directly(data.hctx, old_rq, &cookie);
1380 rcu_read_unlock();
1381 } else {
1382 srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu);
1383 blk_mq_try_issue_directly(data.hctx, old_rq, &cookie);
1384 srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx);
1385 }
1335 goto done; 1386 goto done;
1336 } 1387 }
1337 1388
@@ -1610,6 +1661,9 @@ static void blk_mq_exit_hctx(struct request_queue *q,
1610 if (set->ops->exit_hctx) 1661 if (set->ops->exit_hctx)
1611 set->ops->exit_hctx(hctx, hctx_idx); 1662 set->ops->exit_hctx(hctx, hctx_idx);
1612 1663
1664 if (hctx->flags & BLK_MQ_F_BLOCKING)
1665 cleanup_srcu_struct(&hctx->queue_rq_srcu);
1666
1613 blk_mq_remove_cpuhp(hctx); 1667 blk_mq_remove_cpuhp(hctx);
1614 blk_free_flush_queue(hctx->fq); 1668 blk_free_flush_queue(hctx->fq);
1615 sbitmap_free(&hctx->ctx_map); 1669 sbitmap_free(&hctx->ctx_map);
@@ -1690,6 +1744,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
1690 flush_start_tag + hctx_idx, node)) 1744 flush_start_tag + hctx_idx, node))
1691 goto free_fq; 1745 goto free_fq;
1692 1746
1747 if (hctx->flags & BLK_MQ_F_BLOCKING)
1748 init_srcu_struct(&hctx->queue_rq_srcu);
1749
1693 return 0; 1750 return 0;
1694 1751
1695 free_fq: 1752 free_fq: