blk-mq: new multi-queue block IO queueing mechanism

Linux currently has two models for block devices: - The classic request_fn based approach, where drivers use struct request units for IO. The block layer provides various helper functionalities to let drivers share code, things like tag management, timeout handling, queueing, etc. - The "stacked" approach, where a driver squeezes in between the block layer and IO submitter. Since this bypasses the IO stack, driver generally have to manage everything themselves. With drivers being written for new high IOPS devices, the classic request_fn based driver doesn't work well enough. The design dates back to when both SMP and high IOPS was rare. It has problems with scaling to bigger machines, and runs into scaling issues even on smaller machines when you have IOPS in the hundreds of thousands per device. The stacked approach is then most often selected as the model for the driver. But this means that everybody has to re-invent everything, and along with that we get all the problems again that the shared approach solved. This commit introduces blk-mq, block multi queue support. The design is centered around per-cpu queues for queueing IO, which then funnel down into x number of hardware submission queues. We might have a 1:1 mapping between the two, or it might be an N:M mapping. That all depends on what the hardware supports. blk-mq provides various helper functions, which include: - Scalable support for request tagging. Most devices need to be able to uniquely identify a request both in the driver and to the hardware. The tagging uses per-cpu caches for freed tags, to enable cache hot reuse. - Timeout handling without tracking request on a per-device basis. Basically the driver should be able to get a notification, if a request happens to fail. - Optional support for non 1:1 mappings between issue and submission queues. blk-mq can redirect IO completions to the desired location. - Support for per-request payloads. Drivers almost always need to associate a request structure with some driver private command structure. Drivers can tell blk-mq this at init time, and then any request handed to the driver will have the required size of memory associated with it. - Support for merging of IO, and plugging. The stacked model gets neither of these. Even for high IOPS devices, merging sequential IO reduces per-command overhead and thus increases bandwidth. For now, this is provided as a potential 3rd queueing model, with the hope being that, as it matures, it can replace both the classic and stacked model. That would get us back to having just 1 real model for block devices, leaving the stacked approach to dm/md devices (as it was originally intended). Contributions in this patch from the following people: Shaohua Li <shli@fusionio.com> Alexander Gordeev <agordeev@redhat.com> Christoph Hellwig <hch@infradead.org> Mike Christie <michaelc@cs.wisc.edu> Matias Bjorling <m@bjorling.me> Jeff Moyer <jmoyer@redhat.com> Acked-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Jens Axboe <axboe@kernel.dk> 2013-10-24 04:20:05 -0400
committer: Jens Axboe <axboe@kernel.dk> 2013-10-25 06:56:00 -0400
commit: 320ae51feed5c2f13664aa05a76bec198967e04d (patch)
tree: ad37ccbcc5ddb1c9c19e48965bf8fec1b05217dc /block/blk-flush.c
parent: 1dddc01af0d42b21058e0cb9c1ca9e8d5204d9b0 (diff)
1 files changed, 139 insertions, 15 deletions
diff --git a/block/blk-flush.c b/block/blk-flush.c
index cc2b827a853c..3e4cc9c7890a 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -69,8 +69,10 @@
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/gfp.h>
+#include <linux/blk-mq.h>
 #include "blk.h"
+#include "blk-mq.h"
 /* FLUSH/FUA sequences */
 enum {
@@ -124,6 +126,24 @@ static void blk_flush_restore_request(struct request *rq)
        /* make @rq a normal request */
        rq->cmd_flags &= ~REQ_FLUSH_SEQ;
        rq->end_io = rq->flush.saved_end_io;
+        blk_clear_rq_complete(rq);
+}
+static void mq_flush_data_run(struct work_struct *work)
+{
+        struct request *rq;
+        rq = container_of(work, struct request, mq_flush_data);
+        memset(&rq->csd, 0, sizeof(rq->csd));
+        blk_mq_run_request(rq, true, false);
+}
+static void blk_mq_flush_data_insert(struct request *rq)
+{
+        INIT_WORK(&rq->mq_flush_data, mq_flush_data_run);
+        kblockd_schedule_work(rq->q, &rq->mq_flush_data);
 }
 /**
@@ -136,7 +156,7 @@ static void blk_flush_restore_request(struct request *rq)
 * completion and trigger the next step.
 *
 * CONTEXT:
- * spin_lock_irq(q->queue_lock)
+ * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
 *
 * RETURNS:
 * %true if requests were added to the dispatch queue, %false otherwise.
@@ -146,7 +166,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 {
        struct request_queue *q = rq->q;
        struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
-        bool queued = false;
+        bool queued = false, kicked;
        BUG_ON(rq->flush.seq & seq);
        rq->flush.seq |= seq;
@@ -167,8 +187,12 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
        case REQ_FSEQ_DATA:
                list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
-                list_add(&rq->queuelist, &q->queue_head);
+                if (q->mq_ops)
-                queued = true;
+                        blk_mq_flush_data_insert(rq);
+                else {
+                        list_add(&rq->queuelist, &q->queue_head);
+                        queued = true;
+                }
                break;
        case REQ_FSEQ_DONE:
@@ -181,28 +205,43 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
                BUG_ON(!list_empty(&rq->queuelist));
                list_del_init(&rq->flush.list);
                blk_flush_restore_request(rq);
-                __blk_end_request_all(rq, error);
+                if (q->mq_ops)
+                        blk_mq_end_io(rq, error);
+                else
+                        __blk_end_request_all(rq, error);
                break;
        default:
                BUG();
        }
-        return blk_kick_flush(q) | queued;
+        kicked = blk_kick_flush(q);
+        /* blk_mq_run_flush will run queue */
+        if (q->mq_ops)
+                return queued;
+        return kicked | queued;
 }
 static void flush_end_io(struct request *flush_rq, int error)
 {
        struct request_queue *q = flush_rq->q;
-        struct list_head *running = &q->flush_queue[q->flush_running_idx];
+        struct list_head *running;
        bool queued = false;
        struct request *rq, *n;
+        unsigned long flags = 0;
+        if (q->mq_ops) {
+                blk_mq_free_request(flush_rq);
+                spin_lock_irqsave(&q->mq_flush_lock, flags);
+        }
+        running = &q->flush_queue[q->flush_running_idx];
        BUG_ON(q->flush_pending_idx == q->flush_running_idx);
        /* account completion of the flush request */
        q->flush_running_idx ^= 1;
-        elv_completed_request(q, flush_rq);
+        if (!q->mq_ops)
+                elv_completed_request(q, flush_rq);
        /* and push the waiting requests to the next stage */
        list_for_each_entry_safe(rq, n, running, flush.list) {
@@ -223,9 +262,48 @@ static void flush_end_io(struct request *flush_rq, int error)
         * directly into request_fn may confuse the driver.  Always use
         * kblockd.
         */
-        if (queued || q->flush_queue_delayed)
+        if (queued || q->flush_queue_delayed) {
-                blk_run_queue_async(q);
+                if (!q->mq_ops)
+                        blk_run_queue_async(q);
+                else
+                /*
+                 * This can be optimized to only run queues with requests
+                 * queued if necessary.
+                 */
+                        blk_mq_run_queues(q, true);
+        }
        q->flush_queue_delayed = 0;
+        if (q->mq_ops)
+                spin_unlock_irqrestore(&q->mq_flush_lock, flags);
+}
+static void mq_flush_work(struct work_struct *work)
+{
+        struct request_queue *q;
+        struct request *rq;
+        q = container_of(work, struct request_queue, mq_flush_work);
+        /* We don't need set REQ_FLUSH_SEQ, it's for consistency */
+        rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
+                __GFP_WAIT|GFP_ATOMIC);
+        rq->cmd_type = REQ_TYPE_FS;
+        rq->end_io = flush_end_io;
+        blk_mq_run_request(rq, true, false);
+}
+/*
+ * We can't directly use q->flush_rq, because it doesn't have tag and is not in
+ * hctx->rqs[]. so we must allocate a new request, since we can't sleep here,
+ * so offload the work to workqueue.
+ *
+ * Note: we assume a flush request finished in any hardware queue will flush
+ * the whole disk cache.
+ */
+static void mq_run_flush(struct request_queue *q)
+{
+        kblockd_schedule_work(q, &q->mq_flush_work);
 }
 /**
@@ -236,7 +314,7 @@ static void flush_end_io(struct request *flush_rq, int error)
 * Please read the comment at the top of this file for more info.
 *
 * CONTEXT:
- * spin_lock_irq(q->queue_lock)
+ * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
 *
 * RETURNS:
 * %true if flush was issued, %false otherwise.
@@ -261,13 +339,18 @@ static bool blk_kick_flush(struct request_queue *q)
         * Issue flush and toggle pending_idx.  This makes pending_idx
         * different from running_idx, which means flush is in flight.
         */
+        q->flush_pending_idx ^= 1;
+        if (q->mq_ops) {
+                mq_run_flush(q);
+                return true;
+        }
        blk_rq_init(q, &q->flush_rq);
        q->flush_rq.cmd_type = REQ_TYPE_FS;
        q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
        q->flush_rq.rq_disk = first_rq->rq_disk;
        q->flush_rq.end_io = flush_end_io;
-        q->flush_pending_idx ^= 1;
        list_add_tail(&q->flush_rq.queuelist, &q->queue_head);
        return true;
 }
@@ -284,16 +367,37 @@ static void flush_data_end_io(struct request *rq, int error)
                blk_run_queue_async(q);
 }
+static void mq_flush_data_end_io(struct request *rq, int error)
+{
+        struct request_queue *q = rq->q;
+        struct blk_mq_hw_ctx *hctx;
+        struct blk_mq_ctx *ctx;
+        unsigned long flags;
+        ctx = rq->mq_ctx;
+        hctx = q->mq_ops->map_queue(q, ctx->cpu);
+        /*
+         * After populating an empty queue, kick it to avoid stall.  Read
+         * the comment in flush_end_io().
+         */
+        spin_lock_irqsave(&q->mq_flush_lock, flags);
+        if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
+                blk_mq_run_hw_queue(hctx, true);
+        spin_unlock_irqrestore(&q->mq_flush_lock, flags);
+}
 /**
 * blk_insert_flush - insert a new FLUSH/FUA request
 * @rq: request to insert
 *
 * To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions.
+ * or __blk_mq_run_hw_queue() to dispatch request.
 * @rq is being submitted.  Analyze what needs to be done and put it on the
 * right queue.
 *
 * CONTEXT:
- * spin_lock_irq(q->queue_lock)
+ * spin_lock_irq(q->queue_lock) in !mq case
 */
 void blk_insert_flush(struct request *rq)
 {
@@ -316,7 +420,10 @@ void blk_insert_flush(struct request *rq)
         * complete the request.
         */
        if (!policy) {
-                __blk_end_bidi_request(rq, 0, 0, 0);
+                if (q->mq_ops)
+                        blk_mq_end_io(rq, 0);
+                else
+                        __blk_end_bidi_request(rq, 0, 0, 0);
                return;
        }
@@ -329,7 +436,10 @@ void blk_insert_flush(struct request *rq)
         */
        if ((policy & REQ_FSEQ_DATA) &&
            !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
-                list_add_tail(&rq->queuelist, &q->queue_head);
+                if (q->mq_ops) {
+                        blk_mq_run_request(rq, false, true);
+                } else
+                        list_add_tail(&rq->queuelist, &q->queue_head);
                return;
        }
@@ -341,6 +451,14 @@ void blk_insert_flush(struct request *rq)
        INIT_LIST_HEAD(&rq->flush.list);
        rq->cmd_flags |= REQ_FLUSH_SEQ;
        rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
+        if (q->mq_ops) {
+                rq->end_io = mq_flush_data_end_io;
+                spin_lock_irq(&q->mq_flush_lock);
+                blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
+                spin_unlock_irq(&q->mq_flush_lock);
+                return;
+        }
        rq->end_io = flush_data_end_io;
        blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
@@ -453,3 +571,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
        return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
+void blk_mq_init_flush(struct request_queue *q)
+{
+        spin_lock_init(&q->mq_flush_lock);
+        INIT_WORK(&q->mq_flush_work, mq_flush_work);
+}
author	Jens Axboe <axboe@kernel.dk>	2013-10-24 04:20:05 -0400
committer	Jens Axboe <axboe@kernel.dk>	2013-10-25 06:56:00 -0400
commit	320ae51feed5c2f13664aa05a76bec198967e04d (patch)
tree	ad37ccbcc5ddb1c9c19e48965bf8fec1b05217dc /block/blk-flush.c
parent	1dddc01af0d42b21058e0cb9c1ca9e8d5204d9b0 (diff)

diff --git a/block/blk-flush.c b/block/blk-flush.c index cc2b827a853c..3e4cc9c7890a 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c
@@ -69,8 +69,10 @@
69	#include <linux/bio.h>	69	#include <linux/bio.h>
70	#include <linux/blkdev.h>	70	#include <linux/blkdev.h>
71	#include <linux/gfp.h>	71	#include <linux/gfp.h>
		72	#include <linux/blk-mq.h>
72		73
73	#include "blk.h"	74	#include "blk.h"
		75	#include "blk-mq.h"
74		76
75	/* FLUSH/FUA sequences */	77	/* FLUSH/FUA sequences */
76	enum {	78	enum {
@@ -124,6 +126,24 @@ static void blk_flush_restore_request(struct request *rq)
124	/* make @rq a normal request */	126	/* make @rq a normal request */
125	rq->cmd_flags &= ~REQ_FLUSH_SEQ;	127	rq->cmd_flags &= ~REQ_FLUSH_SEQ;
126	rq->end_io = rq->flush.saved_end_io;	128	rq->end_io = rq->flush.saved_end_io;
		129
		130	blk_clear_rq_complete(rq);
		131	}
		132
		133	static void mq_flush_data_run(struct work_struct *work)
		134	{
		135	struct request *rq;
		136
		137	rq = container_of(work, struct request, mq_flush_data);
		138
		139	memset(&rq->csd, 0, sizeof(rq->csd));
		140	blk_mq_run_request(rq, true, false);
		141	}
		142
		143	static void blk_mq_flush_data_insert(struct request *rq)
		144	{
		145	INIT_WORK(&rq->mq_flush_data, mq_flush_data_run);
		146	kblockd_schedule_work(rq->q, &rq->mq_flush_data);
127	}	147	}
128		148
129	/**	149	/**
@@ -136,7 +156,7 @@ static void blk_flush_restore_request(struct request *rq)
136	* completion and trigger the next step.	156	* completion and trigger the next step.
137	*	157	*
138	* CONTEXT:	158	* CONTEXT:
139	* spin_lock_irq(q->queue_lock)	159	* spin_lock_irq(q->queue_lock or q->mq_flush_lock)
140	*	160	*
141	* RETURNS:	161	* RETURNS:
142	* %true if requests were added to the dispatch queue, %false otherwise.	162	* %true if requests were added to the dispatch queue, %false otherwise.
@@ -146,7 +166,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
146	{	166	{
147	struct request_queue *q = rq->q;	167	struct request_queue *q = rq->q;
148	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];	168	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
149	bool queued = false;	169	bool queued = false, kicked;
150		170
151	BUG_ON(rq->flush.seq & seq);	171	BUG_ON(rq->flush.seq & seq);
152	rq->flush.seq \|= seq;	172	rq->flush.seq \|= seq;
@@ -167,8 +187,12 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
167		187
168	case REQ_FSEQ_DATA:	188	case REQ_FSEQ_DATA:
169	list_move_tail(&rq->flush.list, &q->flush_data_in_flight);	189	list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
170	list_add(&rq->queuelist, &q->queue_head);	190	if (q->mq_ops)
171	queued = true;	191	blk_mq_flush_data_insert(rq);
		192	else {
		193	list_add(&rq->queuelist, &q->queue_head);
		194	queued = true;
		195	}
172	break;	196	break;
173		197
174	case REQ_FSEQ_DONE:	198	case REQ_FSEQ_DONE:
@@ -181,28 +205,43 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
181	BUG_ON(!list_empty(&rq->queuelist));	205	BUG_ON(!list_empty(&rq->queuelist));
182	list_del_init(&rq->flush.list);	206	list_del_init(&rq->flush.list);
183	blk_flush_restore_request(rq);	207	blk_flush_restore_request(rq);
184	__blk_end_request_all(rq, error);	208	if (q->mq_ops)
		209	blk_mq_end_io(rq, error);
		210	else
		211	__blk_end_request_all(rq, error);
185	break;	212	break;
186		213
187	default:	214	default:
188	BUG();	215	BUG();
189	}	216	}
190		217
191	return blk_kick_flush(q) \| queued;	218	kicked = blk_kick_flush(q);
		219	/* blk_mq_run_flush will run queue */
		220	if (q->mq_ops)
		221	return queued;
		222	return kicked \| queued;
192	}	223	}
193		224
194	static void flush_end_io(struct request *flush_rq, int error)	225	static void flush_end_io(struct request *flush_rq, int error)
195	{	226	{
196	struct request_queue *q = flush_rq->q;	227	struct request_queue *q = flush_rq->q;
197	struct list_head *running = &q->flush_queue[q->flush_running_idx];	228	struct list_head *running;
198	bool queued = false;	229	bool queued = false;
199	struct request rq, n;	230	struct request rq, n;
		231	unsigned long flags = 0;
200		232
		233	if (q->mq_ops) {
		234	blk_mq_free_request(flush_rq);
		235	spin_lock_irqsave(&q->mq_flush_lock, flags);
		236	}
		237	running = &q->flush_queue[q->flush_running_idx];
201	BUG_ON(q->flush_pending_idx == q->flush_running_idx);	238	BUG_ON(q->flush_pending_idx == q->flush_running_idx);
202		239
203	/* account completion of the flush request */	240	/* account completion of the flush request */
204	q->flush_running_idx ^= 1;	241	q->flush_running_idx ^= 1;
205	elv_completed_request(q, flush_rq);	242
		243	if (!q->mq_ops)
		244	elv_completed_request(q, flush_rq);
206		245
207	/* and push the waiting requests to the next stage */	246	/* and push the waiting requests to the next stage */
208	list_for_each_entry_safe(rq, n, running, flush.list) {	247	list_for_each_entry_safe(rq, n, running, flush.list) {
@@ -223,9 +262,48 @@ static void flush_end_io(struct request *flush_rq, int error)
223	* directly into request_fn may confuse the driver. Always use	262	* directly into request_fn may confuse the driver. Always use
224	* kblockd.	263	* kblockd.
225	*/	264	*/
226	if (queued \|\| q->flush_queue_delayed)	265	if (queued \|\| q->flush_queue_delayed) {
227	blk_run_queue_async(q);	266	if (!q->mq_ops)
		267	blk_run_queue_async(q);
		268	else
		269	/*
		270	* This can be optimized to only run queues with requests
		271	* queued if necessary.
		272	*/
		273	blk_mq_run_queues(q, true);
		274	}
228	q->flush_queue_delayed = 0;	275	q->flush_queue_delayed = 0;
		276	if (q->mq_ops)
		277	spin_unlock_irqrestore(&q->mq_flush_lock, flags);
		278	}
		279
		280	static void mq_flush_work(struct work_struct *work)
		281	{
		282	struct request_queue *q;
		283	struct request *rq;
		284
		285	q = container_of(work, struct request_queue, mq_flush_work);
		286
		287	/* We don't need set REQ_FLUSH_SEQ, it's for consistency */
		288	rq = blk_mq_alloc_request(q, WRITE_FLUSH\|REQ_FLUSH_SEQ,
		289	__GFP_WAIT\|GFP_ATOMIC);
		290	rq->cmd_type = REQ_TYPE_FS;
		291	rq->end_io = flush_end_io;
		292
		293	blk_mq_run_request(rq, true, false);
		294	}
		295
		296	/*
		297	* We can't directly use q->flush_rq, because it doesn't have tag and is not in
		298	* hctx->rqs[]. so we must allocate a new request, since we can't sleep here,
		299	* so offload the work to workqueue.
		300	*
		301	* Note: we assume a flush request finished in any hardware queue will flush
		302	* the whole disk cache.
		303	*/
		304	static void mq_run_flush(struct request_queue *q)
		305	{
		306	kblockd_schedule_work(q, &q->mq_flush_work);
229	}	307	}
230		308
231	/**	309	/**
@@ -236,7 +314,7 @@ static void flush_end_io(struct request *flush_rq, int error)
236	* Please read the comment at the top of this file for more info.	314	* Please read the comment at the top of this file for more info.
237	*	315	*
238	* CONTEXT:	316	* CONTEXT:
239	* spin_lock_irq(q->queue_lock)	317	* spin_lock_irq(q->queue_lock or q->mq_flush_lock)
240	*	318	*
241	* RETURNS:	319	* RETURNS:
242	* %true if flush was issued, %false otherwise.	320	* %true if flush was issued, %false otherwise.
@@ -261,13 +339,18 @@ static bool blk_kick_flush(struct request_queue *q)
261	* Issue flush and toggle pending_idx. This makes pending_idx	339	* Issue flush and toggle pending_idx. This makes pending_idx
262	* different from running_idx, which means flush is in flight.	340	* different from running_idx, which means flush is in flight.
263	*/	341	*/
		342	q->flush_pending_idx ^= 1;
		343	if (q->mq_ops) {
		344	mq_run_flush(q);
		345	return true;
		346	}
		347
264	blk_rq_init(q, &q->flush_rq);	348	blk_rq_init(q, &q->flush_rq);
265	q->flush_rq.cmd_type = REQ_TYPE_FS;	349	q->flush_rq.cmd_type = REQ_TYPE_FS;
266	q->flush_rq.cmd_flags = WRITE_FLUSH \| REQ_FLUSH_SEQ;	350	q->flush_rq.cmd_flags = WRITE_FLUSH \| REQ_FLUSH_SEQ;
267	q->flush_rq.rq_disk = first_rq->rq_disk;	351	q->flush_rq.rq_disk = first_rq->rq_disk;
268	q->flush_rq.end_io = flush_end_io;	352	q->flush_rq.end_io = flush_end_io;
269		353
270	q->flush_pending_idx ^= 1;
271	list_add_tail(&q->flush_rq.queuelist, &q->queue_head);	354	list_add_tail(&q->flush_rq.queuelist, &q->queue_head);
272	return true;	355	return true;
273	}	356	}
@@ -284,16 +367,37 @@ static void flush_data_end_io(struct request *rq, int error)
284	blk_run_queue_async(q);	367	blk_run_queue_async(q);
285	}	368	}
286		369
		370	static void mq_flush_data_end_io(struct request *rq, int error)
		371	{
		372	struct request_queue *q = rq->q;
		373	struct blk_mq_hw_ctx *hctx;
		374	struct blk_mq_ctx *ctx;
		375	unsigned long flags;
		376
		377	ctx = rq->mq_ctx;
		378	hctx = q->mq_ops->map_queue(q, ctx->cpu);
		379
		380	/*
		381	* After populating an empty queue, kick it to avoid stall. Read
		382	* the comment in flush_end_io().
		383	*/
		384	spin_lock_irqsave(&q->mq_flush_lock, flags);
		385	if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
		386	blk_mq_run_hw_queue(hctx, true);
		387	spin_unlock_irqrestore(&q->mq_flush_lock, flags);
		388	}
		389
287	/**	390	/**
288	* blk_insert_flush - insert a new FLUSH/FUA request	391	* blk_insert_flush - insert a new FLUSH/FUA request
289	* @rq: request to insert	392	* @rq: request to insert
290	*	393	*
291	* To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions.	394	* To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions.
		395	* or __blk_mq_run_hw_queue() to dispatch request.
292	* @rq is being submitted. Analyze what needs to be done and put it on the	396	* @rq is being submitted. Analyze what needs to be done and put it on the
293	* right queue.	397	* right queue.
294	*	398	*
295	* CONTEXT:	399	* CONTEXT:
296	* spin_lock_irq(q->queue_lock)	400	* spin_lock_irq(q->queue_lock) in !mq case
297	*/	401	*/
298	void blk_insert_flush(struct request *rq)	402	void blk_insert_flush(struct request *rq)
299	{	403	{
@@ -316,7 +420,10 @@ void blk_insert_flush(struct request *rq)
316	* complete the request.	420	* complete the request.
317	*/	421	*/
318	if (!policy) {	422	if (!policy) {
319	__blk_end_bidi_request(rq, 0, 0, 0);	423	if (q->mq_ops)
		424	blk_mq_end_io(rq, 0);
		425	else
		426	__blk_end_bidi_request(rq, 0, 0, 0);
320	return;	427	return;
321	}	428	}
322		429
@@ -329,7 +436,10 @@ void blk_insert_flush(struct request *rq)
329	*/	436	*/
330	if ((policy & REQ_FSEQ_DATA) &&	437	if ((policy & REQ_FSEQ_DATA) &&
331	!(policy & (REQ_FSEQ_PREFLUSH \| REQ_FSEQ_POSTFLUSH))) {	438	!(policy & (REQ_FSEQ_PREFLUSH \| REQ_FSEQ_POSTFLUSH))) {
332	list_add_tail(&rq->queuelist, &q->queue_head);	439	if (q->mq_ops) {
		440	blk_mq_run_request(rq, false, true);
		441	} else
		442	list_add_tail(&rq->queuelist, &q->queue_head);
333	return;	443	return;
334	}	444	}
335		445
@@ -341,6 +451,14 @@ void blk_insert_flush(struct request *rq)
341	INIT_LIST_HEAD(&rq->flush.list);	451	INIT_LIST_HEAD(&rq->flush.list);
342	rq->cmd_flags \|= REQ_FLUSH_SEQ;	452	rq->cmd_flags \|= REQ_FLUSH_SEQ;
343	rq->flush.saved_end_io = rq->end_io; /* Usually NULL */	453	rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
		454	if (q->mq_ops) {
		455	rq->end_io = mq_flush_data_end_io;
		456
		457	spin_lock_irq(&q->mq_flush_lock);
		458	blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
		459	spin_unlock_irq(&q->mq_flush_lock);
		460	return;
		461	}
344	rq->end_io = flush_data_end_io;	462	rq->end_io = flush_data_end_io;
345		463
346	blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);	464	blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
@@ -453,3 +571,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
453	return ret;	571	return ret;
454	}	572	}
455	EXPORT_SYMBOL(blkdev_issue_flush);	573	EXPORT_SYMBOL(blkdev_issue_flush);
		574
		575	void blk_mq_init_flush(struct request_queue *q)
		576	{
		577	spin_lock_init(&q->mq_flush_lock);
		578	INIT_WORK(&q->mq_flush_work, mq_flush_work);
		579	}