blk-mq: rework flush sequencing logic

Witch to using a preallocated flush_rq for blk-mq similar to what's done with the old request path. This allows us to set up the request properly with a tag from the actually allowed range and ->rq_disk as needed by some drivers. To make life easier we also switch to dynamic allocation of ->flush_rq for the old path. This effectively reverts most of "blk-mq: fix for flush deadlock" and "blk-mq: Don't reserve a tag for flush request" Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@fb.com>
author: Christoph Hellwig <hch@lst.de> 2014-02-10 11:29:00 -0500
committer: Jens Axboe <axboe@fb.com> 2014-02-10 11:29:00 -0500
commit: 18741986a4b1dc4b1f171634c4191abc3b0fa023 (patch)
tree: d0f632fa9b205d5fbcc76ff1cf8cba63112c7da8 /block/blk-flush.c
parent: ce2c350b2cfe5b5ca5023a6b1ec4d21821d39add (diff)
1 files changed, 38 insertions, 67 deletions
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 9143e85226c7..66e2b697f5db 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -130,20 +130,26 @@ static void blk_flush_restore_request(struct request *rq)
        blk_clear_rq_complete(rq);
 }
-static void mq_flush_data_run(struct work_struct *work)
+static void mq_flush_run(struct work_struct *work)
 {
        struct request *rq;
-        rq = container_of(work, struct request, mq_flush_data);
+        rq = container_of(work, struct request, mq_flush_work);
        memset(&rq->csd, 0, sizeof(rq->csd));
        blk_mq_run_request(rq, true, false);
 }
-static void blk_mq_flush_data_insert(struct request *rq)
+static bool blk_flush_queue_rq(struct request *rq)
 {
-        INIT_WORK(&rq->mq_flush_data, mq_flush_data_run);
+        if (rq->q->mq_ops) {
-        kblockd_schedule_work(rq->q, &rq->mq_flush_data);
+                INIT_WORK(&rq->mq_flush_work, mq_flush_run);
+                kblockd_schedule_work(rq->q, &rq->mq_flush_work);
+                return false;
+        } else {
+                list_add_tail(&rq->queuelist, &rq->q->queue_head);
+                return true;
+        }
 }
 /**
@@ -187,12 +193,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
        case REQ_FSEQ_DATA:
                list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
-                if (q->mq_ops)
+                queued = blk_flush_queue_rq(rq);
-                        blk_mq_flush_data_insert(rq);
-                else {
-                        list_add(&rq->queuelist, &q->queue_head);
-                        queued = true;
-                }
                break;
        case REQ_FSEQ_DONE:
@@ -216,9 +217,6 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
        }
        kicked = blk_kick_flush(q);
-        /* blk_mq_run_flush will run queue */
-        if (q->mq_ops)
-                return queued;
        return kicked | queued;
 }
@@ -230,10 +228,9 @@ static void flush_end_io(struct request *flush_rq, int error)
        struct request *rq, *n;
        unsigned long flags = 0;
-        if (q->mq_ops) {
+        if (q->mq_ops)
-                blk_mq_free_request(flush_rq);
                spin_lock_irqsave(&q->mq_flush_lock, flags);
-        }
        running = &q->flush_queue[q->flush_running_idx];
        BUG_ON(q->flush_pending_idx == q->flush_running_idx);
@@ -263,48 +260,14 @@ static void flush_end_io(struct request *flush_rq, int error)
         * kblockd.
         */
        if (queued || q->flush_queue_delayed) {
-                if (!q->mq_ops)
+                WARN_ON(q->mq_ops);
-                        blk_run_queue_async(q);
+                blk_run_queue_async(q);
-                else
-                /*
-                 * This can be optimized to only run queues with requests
-                 * queued if necessary.
-                 */
-                        blk_mq_run_queues(q, true);
        }
        q->flush_queue_delayed = 0;
        if (q->mq_ops)
                spin_unlock_irqrestore(&q->mq_flush_lock, flags);
 }
-static void mq_flush_work(struct work_struct *work)
-{
-        struct request_queue *q;
-        struct request *rq;
-        q = container_of(work, struct request_queue, mq_flush_work);
-        rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
-                __GFP_WAIT|GFP_ATOMIC, false);
-        rq->cmd_type = REQ_TYPE_FS;
-        rq->end_io = flush_end_io;
-        blk_mq_run_request(rq, true, false);
-}
-/*
- * We can't directly use q->flush_rq, because it doesn't have tag and is not in
- * hctx->rqs[]. so we must allocate a new request, since we can't sleep here,
- * so offload the work to workqueue.
- *
- * Note: we assume a flush request finished in any hardware queue will flush
- * the whole disk cache.
- */
-static void mq_run_flush(struct request_queue *q)
-{
-        kblockd_schedule_work(q, &q->mq_flush_work);
-}
 /**
 * blk_kick_flush - consider issuing flush request
 * @q: request_queue being kicked
@@ -339,19 +302,31 @@ static bool blk_kick_flush(struct request_queue *q)
         * different from running_idx, which means flush is in flight.
         */
        q->flush_pending_idx ^= 1;
        if (q->mq_ops) {
-                mq_run_flush(q);
+                struct blk_mq_ctx *ctx = first_rq->mq_ctx;
-                return true;
+                struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);
+                blk_mq_rq_init(hctx, q->flush_rq);
+                q->flush_rq->mq_ctx = ctx;
+                /*
+                 * Reuse the tag value from the fist waiting request,
+                 * with blk-mq the tag is generated during request
+                 * allocation and drivers can rely on it being inside
+                 * the range they asked for.
+                 */
+                q->flush_rq->tag = first_rq->tag;
+        } else {
+                blk_rq_init(q, q->flush_rq);
        }
-        blk_rq_init(q, &q->flush_rq);
+        q->flush_rq->cmd_type = REQ_TYPE_FS;
-        q->flush_rq.cmd_type = REQ_TYPE_FS;
+        q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
-        q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
+        q->flush_rq->rq_disk = first_rq->rq_disk;
-        q->flush_rq.rq_disk = first_rq->rq_disk;
+        q->flush_rq->end_io = flush_end_io;
-        q->flush_rq.end_io = flush_end_io;
-        list_add_tail(&q->flush_rq.queuelist, &q->queue_head);
+        return blk_flush_queue_rq(q->flush_rq);
-        return true;
 }
 static void flush_data_end_io(struct request *rq, int error)
@@ -407,11 +382,8 @@ void blk_insert_flush(struct request *rq)
        /*
         * @policy now records what operations need to be done.  Adjust
         * REQ_FLUSH and FUA for the driver.
-         * We keep REQ_FLUSH for mq to track flush requests. For !FUA,
-         * we never dispatch the request directly.
         */
-        if (rq->cmd_flags & REQ_FUA)
+        rq->cmd_flags &= ~REQ_FLUSH;
-                rq->cmd_flags &= ~REQ_FLUSH;
        if (!(fflags & REQ_FUA))
                rq->cmd_flags &= ~REQ_FUA;
@@ -560,5 +532,4 @@ EXPORT_SYMBOL(blkdev_issue_flush);
 void blk_mq_init_flush(struct request_queue *q)
 {
        spin_lock_init(&q->mq_flush_lock);
-        INIT_WORK(&q->mq_flush_work, mq_flush_work);
 }
author	Christoph Hellwig <hch@lst.de>	2014-02-10 11:29:00 -0500
committer	Jens Axboe <axboe@fb.com>	2014-02-10 11:29:00 -0500
commit	18741986a4b1dc4b1f171634c4191abc3b0fa023 (patch)
tree	d0f632fa9b205d5fbcc76ff1cf8cba63112c7da8 /block/blk-flush.c
parent	ce2c350b2cfe5b5ca5023a6b1ec4d21821d39add (diff)

diff --git a/block/blk-flush.c b/block/blk-flush.c index 9143e85226c7..66e2b697f5db 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c
@@ -130,20 +130,26 @@ static void blk_flush_restore_request(struct request *rq)
130	blk_clear_rq_complete(rq);	130	blk_clear_rq_complete(rq);
131	}	131	}
132		132
133	static void mq_flush_data_run(struct work_struct *work)	133	static void mq_flush_run(struct work_struct *work)
134	{	134	{
135	struct request *rq;	135	struct request *rq;
136		136
137	rq = container_of(work, struct request, mq_flush_data);	137	rq = container_of(work, struct request, mq_flush_work);
138		138
139	memset(&rq->csd, 0, sizeof(rq->csd));	139	memset(&rq->csd, 0, sizeof(rq->csd));
140	blk_mq_run_request(rq, true, false);	140	blk_mq_run_request(rq, true, false);
141	}	141	}
142		142
143	static void blk_mq_flush_data_insert(struct request *rq)	143	static bool blk_flush_queue_rq(struct request *rq)
144	{	144	{
145	INIT_WORK(&rq->mq_flush_data, mq_flush_data_run);	145	if (rq->q->mq_ops) {
146	kblockd_schedule_work(rq->q, &rq->mq_flush_data);	146	INIT_WORK(&rq->mq_flush_work, mq_flush_run);
		147	kblockd_schedule_work(rq->q, &rq->mq_flush_work);
		148	return false;
		149	} else {
		150	list_add_tail(&rq->queuelist, &rq->q->queue_head);
		151	return true;
		152	}
147	}	153	}
148		154
149	/**	155	/**
@@ -187,12 +193,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
187		193
188	case REQ_FSEQ_DATA:	194	case REQ_FSEQ_DATA:
189	list_move_tail(&rq->flush.list, &q->flush_data_in_flight);	195	list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
190	if (q->mq_ops)	196	queued = blk_flush_queue_rq(rq);
191	blk_mq_flush_data_insert(rq);
192	else {
193	list_add(&rq->queuelist, &q->queue_head);
194	queued = true;
195	}
196	break;	197	break;
197		198
198	case REQ_FSEQ_DONE:	199	case REQ_FSEQ_DONE:
@@ -216,9 +217,6 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
216	}	217	}
217		218
218	kicked = blk_kick_flush(q);	219	kicked = blk_kick_flush(q);
219	/* blk_mq_run_flush will run queue */
220	if (q->mq_ops)
221	return queued;
222	return kicked \| queued;	220	return kicked \| queued;
223	}	221	}
224		222
@@ -230,10 +228,9 @@ static void flush_end_io(struct request *flush_rq, int error)
230	struct request rq, n;	228	struct request rq, n;
231	unsigned long flags = 0;	229	unsigned long flags = 0;
232		230
233	if (q->mq_ops) {	231	if (q->mq_ops)
234	blk_mq_free_request(flush_rq);
235	spin_lock_irqsave(&q->mq_flush_lock, flags);	232	spin_lock_irqsave(&q->mq_flush_lock, flags);
236	}	233
237	running = &q->flush_queue[q->flush_running_idx];	234	running = &q->flush_queue[q->flush_running_idx];
238	BUG_ON(q->flush_pending_idx == q->flush_running_idx);	235	BUG_ON(q->flush_pending_idx == q->flush_running_idx);
239		236
@@ -263,48 +260,14 @@ static void flush_end_io(struct request *flush_rq, int error)
263	* kblockd.	260	* kblockd.
264	*/	261	*/
265	if (queued \|\| q->flush_queue_delayed) {	262	if (queued \|\| q->flush_queue_delayed) {
266	if (!q->mq_ops)	263	WARN_ON(q->mq_ops);
267	blk_run_queue_async(q);	264	blk_run_queue_async(q);
268	else
269	/*
270	* This can be optimized to only run queues with requests
271	* queued if necessary.
272	*/
273	blk_mq_run_queues(q, true);
274	}	265	}
275	q->flush_queue_delayed = 0;	266	q->flush_queue_delayed = 0;
276	if (q->mq_ops)	267	if (q->mq_ops)
277	spin_unlock_irqrestore(&q->mq_flush_lock, flags);	268	spin_unlock_irqrestore(&q->mq_flush_lock, flags);
278	}	269	}
279		270
280	static void mq_flush_work(struct work_struct *work)
281	{
282	struct request_queue *q;
283	struct request *rq;
284
285	q = container_of(work, struct request_queue, mq_flush_work);
286
287	rq = blk_mq_alloc_request(q, WRITE_FLUSH\|REQ_FLUSH_SEQ,
288	__GFP_WAIT\|GFP_ATOMIC, false);
289	rq->cmd_type = REQ_TYPE_FS;
290	rq->end_io = flush_end_io;
291
292	blk_mq_run_request(rq, true, false);
293	}
294
295	/*
296	* We can't directly use q->flush_rq, because it doesn't have tag and is not in
297	* hctx->rqs[]. so we must allocate a new request, since we can't sleep here,
298	* so offload the work to workqueue.
299	*
300	* Note: we assume a flush request finished in any hardware queue will flush
301	* the whole disk cache.
302	*/
303	static void mq_run_flush(struct request_queue *q)
304	{
305	kblockd_schedule_work(q, &q->mq_flush_work);
306	}
307
308	/**	271	/**
309	* blk_kick_flush - consider issuing flush request	272	* blk_kick_flush - consider issuing flush request
310	* @q: request_queue being kicked	273	* @q: request_queue being kicked
@@ -339,19 +302,31 @@ static bool blk_kick_flush(struct request_queue *q)
339	* different from running_idx, which means flush is in flight.	302	* different from running_idx, which means flush is in flight.
340	*/	303	*/
341	q->flush_pending_idx ^= 1;	304	q->flush_pending_idx ^= 1;
		305
342	if (q->mq_ops) {	306	if (q->mq_ops) {
343	mq_run_flush(q);	307	struct blk_mq_ctx *ctx = first_rq->mq_ctx;
344	return true;	308	struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);
		309
		310	blk_mq_rq_init(hctx, q->flush_rq);
		311	q->flush_rq->mq_ctx = ctx;
		312
		313	/*
		314	* Reuse the tag value from the fist waiting request,
		315	* with blk-mq the tag is generated during request
		316	* allocation and drivers can rely on it being inside
		317	* the range they asked for.
		318	*/
		319	q->flush_rq->tag = first_rq->tag;
		320	} else {
		321	blk_rq_init(q, q->flush_rq);
345	}	322	}
346		323
347	blk_rq_init(q, &q->flush_rq);	324	q->flush_rq->cmd_type = REQ_TYPE_FS;
348	q->flush_rq.cmd_type = REQ_TYPE_FS;	325	q->flush_rq->cmd_flags = WRITE_FLUSH \| REQ_FLUSH_SEQ;
349	q->flush_rq.cmd_flags = WRITE_FLUSH \| REQ_FLUSH_SEQ;	326	q->flush_rq->rq_disk = first_rq->rq_disk;
350	q->flush_rq.rq_disk = first_rq->rq_disk;	327	q->flush_rq->end_io = flush_end_io;
351	q->flush_rq.end_io = flush_end_io;
352		328
353	list_add_tail(&q->flush_rq.queuelist, &q->queue_head);	329	return blk_flush_queue_rq(q->flush_rq);
354	return true;
355	}	330	}
356		331
357	static void flush_data_end_io(struct request *rq, int error)	332	static void flush_data_end_io(struct request *rq, int error)
@@ -407,11 +382,8 @@ void blk_insert_flush(struct request *rq)
407	/*	382	/*
408	* @policy now records what operations need to be done. Adjust	383	* @policy now records what operations need to be done. Adjust
409	* REQ_FLUSH and FUA for the driver.	384	* REQ_FLUSH and FUA for the driver.
410	* We keep REQ_FLUSH for mq to track flush requests. For !FUA,
411	* we never dispatch the request directly.
412	*/	385	*/
413	if (rq->cmd_flags & REQ_FUA)	386	rq->cmd_flags &= ~REQ_FLUSH;
414	rq->cmd_flags &= ~REQ_FLUSH;
415	if (!(fflags & REQ_FUA))	387	if (!(fflags & REQ_FUA))
416	rq->cmd_flags &= ~REQ_FUA;	388	rq->cmd_flags &= ~REQ_FUA;
417		389
@@ -560,5 +532,4 @@ EXPORT_SYMBOL(blkdev_issue_flush);
560	void blk_mq_init_flush(struct request_queue *q)	532	void blk_mq_init_flush(struct request_queue *q)
561	{	533	{
562	spin_lock_init(&q->mq_flush_lock);	534	spin_lock_init(&q->mq_flush_lock);
563	INIT_WORK(&q->mq_flush_work, mq_flush_work);
564	}	535	}