blk-mq-sched: add framework for MQ capable IO schedulers

This adds a set of hooks that intercepts the blk-mq path of allocating/inserting/issuing/completing requests, allowing us to develop a scheduler within that framework. We reuse the existing elevator scheduler API on the registration side, but augment that with the scheduler flagging support for the blk-mq interfce, and with a separate set of ops hooks for MQ devices. We split driver and scheduler tags, so we can run the scheduling independently of device queue depth. Signed-off-by: Jens Axboe <axboe@fb.com> Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com> Reviewed-by: Omar Sandoval <osandov@fb.com>
author: Jens Axboe <axboe@fb.com> 2017-01-17 08:03:22 -0500
committer: Jens Axboe <axboe@fb.com> 2017-01-17 12:04:20 -0500
commit: bd166ef183c263c5ced656d49ef19c7da4adc774 (patch)
tree: 449bbd3b4e671b370b96e3846b2281116e7089e9 /block/blk-mq-sched.c
parent: 2af8cbe30531eca73c8f3ba277f155fc0020b01a (diff)
1 files changed, 368 insertions, 0 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
new file mode 100644
index 000000000000..26759798a0b3
--- /dev/null
+++ b/block/blk-mq-sched.c
@@ -0,0 +1,368 @@
+/*
+ * blk-mq scheduling framework
+ *
+ * Copyright (C) 2016 Jens Axboe
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/blk-mq.h>
+#include <trace/events/block.h>
+#include "blk.h"
+#include "blk-mq.h"
+#include "blk-mq-sched.h"
+#include "blk-mq-tag.h"
+#include "blk-wbt.h"
+void blk_mq_sched_free_hctx_data(struct request_queue *q,
+                                 void (*exit)(struct blk_mq_hw_ctx *))
+{
+        struct blk_mq_hw_ctx *hctx;
+        int i;
+        queue_for_each_hw_ctx(q, hctx, i) {
+                if (exit && hctx->sched_data)
+                        exit(hctx);
+                kfree(hctx->sched_data);
+                hctx->sched_data = NULL;
+        }
+}
+EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
+int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
+                                int (*init)(struct blk_mq_hw_ctx *),
+                                void (*exit)(struct blk_mq_hw_ctx *))
+{
+        struct blk_mq_hw_ctx *hctx;
+        int ret;
+        int i;
+        queue_for_each_hw_ctx(q, hctx, i) {
+                hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node);
+                if (!hctx->sched_data) {
+                        ret = -ENOMEM;
+                        goto error;
+                }
+                if (init) {
+                        ret = init(hctx);
+                        if (ret) {
+                                /*
+                                 * We don't want to give exit() a partially
+                                 * initialized sched_data. init() must clean up
+                                 * if it fails.
+                                 */
+                                kfree(hctx->sched_data);
+                                hctx->sched_data = NULL;
+                                goto error;
+                        }
+                }
+        }
+        return 0;
+error:
+        blk_mq_sched_free_hctx_data(q, exit);
+        return ret;
+}
+EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data);
+static void __blk_mq_sched_assign_ioc(struct request_queue *q,
+                                      struct request *rq, struct io_context *ioc)
+{
+        struct io_cq *icq;
+        spin_lock_irq(q->queue_lock);
+        icq = ioc_lookup_icq(ioc, q);
+        spin_unlock_irq(q->queue_lock);
+        if (!icq) {
+                icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
+                if (!icq)
+                        return;
+        }
+        rq->elv.icq = icq;
+        if (!blk_mq_sched_get_rq_priv(q, rq)) {
+                rq->rq_flags |= RQF_ELVPRIV;
+                get_io_context(icq->ioc);
+                return;
+        }
+        rq->elv.icq = NULL;
+}
+static void blk_mq_sched_assign_ioc(struct request_queue *q,
+                                    struct request *rq, struct bio *bio)
+{
+        struct io_context *ioc;
+        ioc = rq_ioc(bio);
+        if (ioc)
+                __blk_mq_sched_assign_ioc(q, rq, ioc);
+}
+struct request *blk_mq_sched_get_request(struct request_queue *q,
+                                         struct bio *bio,
+                                         unsigned int op,
+                                         struct blk_mq_alloc_data *data)
+{
+        struct elevator_queue *e = q->elevator;
+        struct blk_mq_hw_ctx *hctx;
+        struct blk_mq_ctx *ctx;
+        struct request *rq;
+        const bool is_flush = op & (REQ_PREFLUSH | REQ_FUA);
+        blk_queue_enter_live(q);
+        ctx = blk_mq_get_ctx(q);
+        hctx = blk_mq_map_queue(q, ctx->cpu);
+        blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
+        if (e) {
+                data->flags |= BLK_MQ_REQ_INTERNAL;
+                /*
+                 * Flush requests are special and go directly to the
+                 * dispatch list.
+                 */
+                if (!is_flush && e->type->ops.mq.get_request) {
+                        rq = e->type->ops.mq.get_request(q, op, data);
+                        if (rq)
+                                rq->rq_flags |= RQF_QUEUED;
+                } else
+                        rq = __blk_mq_alloc_request(data, op);
+        } else {
+                rq = __blk_mq_alloc_request(data, op);
+                data->hctx->tags->rqs[rq->tag] = rq;
+        }
+        if (rq) {
+                if (!is_flush) {
+                        rq->elv.icq = NULL;
+                        if (e && e->type->icq_cache)
+                                blk_mq_sched_assign_ioc(q, rq, bio);
+                }
+                data->hctx->queued++;
+                return rq;
+        }
+        blk_queue_exit(q);
+        return NULL;
+}
+void blk_mq_sched_put_request(struct request *rq)
+{
+        struct request_queue *q = rq->q;
+        struct elevator_queue *e = q->elevator;
+        if (rq->rq_flags & RQF_ELVPRIV) {
+                blk_mq_sched_put_rq_priv(rq->q, rq);
+                if (rq->elv.icq) {
+                        put_io_context(rq->elv.icq->ioc);
+                        rq->elv.icq = NULL;
+                }
+        }
+        if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
+                e->type->ops.mq.put_request(rq);
+        else
+                blk_mq_finish_request(rq);
+}
+void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
+{
+        struct elevator_queue *e = hctx->queue->elevator;
+        LIST_HEAD(rq_list);
+        if (unlikely(blk_mq_hctx_stopped(hctx)))
+                return;
+        hctx->run++;
+        /*
+         * If we have previous entries on our dispatch list, grab them first for
+         * more fair dispatch.
+         */
+        if (!list_empty_careful(&hctx->dispatch)) {
+                spin_lock(&hctx->lock);
+                if (!list_empty(&hctx->dispatch))
+                        list_splice_init(&hctx->dispatch, &rq_list);
+                spin_unlock(&hctx->lock);
+        }
+        /*
+         * Only ask the scheduler for requests, if we didn't have residual
+         * requests from the dispatch list. This is to avoid the case where
+         * we only ever dispatch a fraction of the requests available because
+         * of low device queue depth. Once we pull requests out of the IO
+         * scheduler, we can no longer merge or sort them. So it's best to
+         * leave them there for as long as we can. Mark the hw queue as
+         * needing a restart in that case.
+         */
+        if (list_empty(&rq_list)) {
+                if (e && e->type->ops.mq.dispatch_requests)
+                        e->type->ops.mq.dispatch_requests(hctx, &rq_list);
+                else
+                        blk_mq_flush_busy_ctxs(hctx, &rq_list);
+        } else
+                blk_mq_sched_mark_restart(hctx);
+        blk_mq_dispatch_rq_list(hctx, &rq_list);
+}
+void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
+                                   struct list_head *rq_list,
+                                   struct request *(*get_rq)(struct blk_mq_hw_ctx *))
+{
+        do {
+                struct request *rq;
+                rq = get_rq(hctx);
+                if (!rq)
+                        break;
+                list_add_tail(&rq->queuelist, rq_list);
+        } while (1);
+}
+EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch);
+bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio)
+{
+        struct request *rq;
+        int ret;
+        ret = elv_merge(q, &rq, bio);
+        if (ret == ELEVATOR_BACK_MERGE) {
+                if (!blk_mq_sched_allow_merge(q, rq, bio))
+                        return false;
+                if (bio_attempt_back_merge(q, rq, bio)) {
+                        if (!attempt_back_merge(q, rq))
+                                elv_merged_request(q, rq, ret);
+                        return true;
+                }
+        } else if (ret == ELEVATOR_FRONT_MERGE) {
+                if (!blk_mq_sched_allow_merge(q, rq, bio))
+                        return false;
+                if (bio_attempt_front_merge(q, rq, bio)) {
+                        if (!attempt_front_merge(q, rq))
+                                elv_merged_request(q, rq, ret);
+                        return true;
+                }
+        }
+        return false;
+}
+EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
+bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
+{
+        struct elevator_queue *e = q->elevator;
+        if (e->type->ops.mq.bio_merge) {
+                struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+                struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+                blk_mq_put_ctx(ctx);
+                return e->type->ops.mq.bio_merge(hctx, bio);
+        }
+        return false;
+}
+bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
+{
+        return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
+}
+EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
+void blk_mq_sched_request_inserted(struct request *rq)
+{
+        trace_block_rq_insert(rq->q, rq);
+}
+EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
+bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq)
+{
+        if (rq->tag == -1) {
+                rq->rq_flags |= RQF_SORTED;
+                return false;
+        }
+        /*
+         * If we already have a real request tag, send directly to
+         * the dispatch list.
+         */
+        spin_lock(&hctx->lock);
+        list_add(&rq->queuelist, &hctx->dispatch);
+        spin_unlock(&hctx->lock);
+        return true;
+}
+EXPORT_SYMBOL_GPL(blk_mq_sched_bypass_insert);
+static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
+                                   struct blk_mq_hw_ctx *hctx,
+                                   unsigned int hctx_idx)
+{
+        if (hctx->sched_tags) {
+                blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
+                blk_mq_free_rq_map(hctx->sched_tags);
+                hctx->sched_tags = NULL;
+        }
+}
+int blk_mq_sched_setup(struct request_queue *q)
+{
+        struct blk_mq_tag_set *set = q->tag_set;
+        struct blk_mq_hw_ctx *hctx;
+        int ret, i;
+        /*
+         * Default to 256, since we don't split into sync/async like the
+         * old code did. Additionally, this is a per-hw queue depth.
+         */
+        q->nr_requests = 2 * BLKDEV_MAX_RQ;
+        /*
+         * We're switching to using an IO scheduler, so setup the hctx
+         * scheduler tags and switch the request map from the regular
+         * tags to scheduler tags. First allocate what we need, so we
+         * can safely fail and fallback, if needed.
+         */
+        ret = 0;
+        queue_for_each_hw_ctx(q, hctx, i) {
+                hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0);
+                if (!hctx->sched_tags) {
+                        ret = -ENOMEM;
+                        break;
+                }
+                ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests);
+                if (ret)
+                        break;
+        }
+        /*
+         * If we failed, free what we did allocate
+         */
+        if (ret) {
+                queue_for_each_hw_ctx(q, hctx, i) {
+                        if (!hctx->sched_tags)
+                                continue;
+                        blk_mq_sched_free_tags(set, hctx, i);
+                }
+                return ret;
+        }
+        return 0;
+}
+void blk_mq_sched_teardown(struct request_queue *q)
+{
+        struct blk_mq_tag_set *set = q->tag_set;
+        struct blk_mq_hw_ctx *hctx;
+        int i;
+        queue_for_each_hw_ctx(q, hctx, i)
+                blk_mq_sched_free_tags(set, hctx, i);
+}
author	Jens Axboe <axboe@fb.com>	2017-01-17 08:03:22 -0500
committer	Jens Axboe <axboe@fb.com>	2017-01-17 12:04:20 -0500
commit	bd166ef183c263c5ced656d49ef19c7da4adc774 (patch)
tree	449bbd3b4e671b370b96e3846b2281116e7089e9 /block/blk-mq-sched.c
parent	2af8cbe30531eca73c8f3ba277f155fc0020b01a (diff)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c new file mode 100644 index 000000000000..26759798a0b3 --- /dev/null +++ b/block/blk-mq-sched.c
@@ -0,0 +1,368 @@
	1	/*
	2	* blk-mq scheduling framework
	3	*
	4	* Copyright (C) 2016 Jens Axboe
	5	*/
	6	#include <linux/kernel.h>
	7	#include <linux/module.h>
	8	#include <linux/blk-mq.h>
	9
	10	#include <trace/events/block.h>
	11
	12	#include "blk.h"
	13	#include "blk-mq.h"
	14	#include "blk-mq-sched.h"
	15	#include "blk-mq-tag.h"
	16	#include "blk-wbt.h"
	17
	18	void blk_mq_sched_free_hctx_data(struct request_queue *q,
	19	void (exit)(struct blk_mq_hw_ctx ))
	20	{
	21	struct blk_mq_hw_ctx *hctx;
	22	int i;
	23
	24	queue_for_each_hw_ctx(q, hctx, i) {
	25	if (exit && hctx->sched_data)
	26	exit(hctx);
	27	kfree(hctx->sched_data);
	28	hctx->sched_data = NULL;
	29	}
	30	}
	31	EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
	32
	33	int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
	34	int (init)(struct blk_mq_hw_ctx ),
	35	void (exit)(struct blk_mq_hw_ctx ))
	36	{
	37	struct blk_mq_hw_ctx *hctx;
	38	int ret;
	39	int i;
	40
	41	queue_for_each_hw_ctx(q, hctx, i) {
	42	hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node);
	43	if (!hctx->sched_data) {
	44	ret = -ENOMEM;
	45	goto error;
	46	}
	47
	48	if (init) {
	49	ret = init(hctx);
	50	if (ret) {
	51	/*
	52	* We don't want to give exit() a partially
	53	* initialized sched_data. init() must clean up
	54	* if it fails.
	55	*/
	56	kfree(hctx->sched_data);
	57	hctx->sched_data = NULL;
	58	goto error;
	59	}
	60	}
	61	}
	62
	63	return 0;
	64	error:
	65	blk_mq_sched_free_hctx_data(q, exit);
	66	return ret;
	67	}
	68	EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data);
	69
	70	static void __blk_mq_sched_assign_ioc(struct request_queue *q,
	71	struct request rq, struct io_context ioc)
	72	{
	73	struct io_cq *icq;
	74
	75	spin_lock_irq(q->queue_lock);
	76	icq = ioc_lookup_icq(ioc, q);
	77	spin_unlock_irq(q->queue_lock);
	78
	79	if (!icq) {
	80	icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
	81	if (!icq)
	82	return;
	83	}
	84
	85	rq->elv.icq = icq;
	86	if (!blk_mq_sched_get_rq_priv(q, rq)) {
	87	rq->rq_flags \|= RQF_ELVPRIV;
	88	get_io_context(icq->ioc);
	89	return;
	90	}
	91
	92	rq->elv.icq = NULL;
	93	}
	94
	95	static void blk_mq_sched_assign_ioc(struct request_queue *q,
	96	struct request rq, struct bio bio)
	97	{
	98	struct io_context *ioc;
	99
	100	ioc = rq_ioc(bio);
	101	if (ioc)
	102	__blk_mq_sched_assign_ioc(q, rq, ioc);
	103	}
	104
	105	struct request blk_mq_sched_get_request(struct request_queue q,
	106	struct bio *bio,
	107	unsigned int op,
	108	struct blk_mq_alloc_data *data)
	109	{
	110	struct elevator_queue *e = q->elevator;
	111	struct blk_mq_hw_ctx *hctx;
	112	struct blk_mq_ctx *ctx;
	113	struct request *rq;
	114	const bool is_flush = op & (REQ_PREFLUSH \| REQ_FUA);
	115
	116	blk_queue_enter_live(q);
	117	ctx = blk_mq_get_ctx(q);
	118	hctx = blk_mq_map_queue(q, ctx->cpu);
	119
	120	blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
	121
	122	if (e) {
	123	data->flags \|= BLK_MQ_REQ_INTERNAL;
	124
	125	/*
	126	* Flush requests are special and go directly to the
	127	* dispatch list.
	128	*/
	129	if (!is_flush && e->type->ops.mq.get_request) {
	130	rq = e->type->ops.mq.get_request(q, op, data);
	131	if (rq)
	132	rq->rq_flags \|= RQF_QUEUED;
	133	} else
	134	rq = __blk_mq_alloc_request(data, op);
	135	} else {
	136	rq = __blk_mq_alloc_request(data, op);
	137	data->hctx->tags->rqs[rq->tag] = rq;
	138	}
	139
	140	if (rq) {
	141	if (!is_flush) {
	142	rq->elv.icq = NULL;
	143	if (e && e->type->icq_cache)
	144	blk_mq_sched_assign_ioc(q, rq, bio);
	145	}
	146	data->hctx->queued++;
	147	return rq;
	148	}
	149
	150	blk_queue_exit(q);
	151	return NULL;
	152	}
	153
	154	void blk_mq_sched_put_request(struct request *rq)
	155	{
	156	struct request_queue *q = rq->q;
	157	struct elevator_queue *e = q->elevator;
	158
	159	if (rq->rq_flags & RQF_ELVPRIV) {
	160	blk_mq_sched_put_rq_priv(rq->q, rq);
	161	if (rq->elv.icq) {
	162	put_io_context(rq->elv.icq->ioc);
	163	rq->elv.icq = NULL;
	164	}
	165	}
	166
	167	if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
	168	e->type->ops.mq.put_request(rq);
	169	else
	170	blk_mq_finish_request(rq);
	171	}
	172
	173	void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
	174	{
	175	struct elevator_queue *e = hctx->queue->elevator;
	176	LIST_HEAD(rq_list);
	177
	178	if (unlikely(blk_mq_hctx_stopped(hctx)))
	179	return;
	180
	181	hctx->run++;
	182
	183	/*
	184	* If we have previous entries on our dispatch list, grab them first for
	185	* more fair dispatch.
	186	*/
	187	if (!list_empty_careful(&hctx->dispatch)) {
	188	spin_lock(&hctx->lock);
	189	if (!list_empty(&hctx->dispatch))
	190	list_splice_init(&hctx->dispatch, &rq_list);
	191	spin_unlock(&hctx->lock);
	192	}
	193
	194	/*
	195	* Only ask the scheduler for requests, if we didn't have residual
	196	* requests from the dispatch list. This is to avoid the case where
	197	* we only ever dispatch a fraction of the requests available because
	198	* of low device queue depth. Once we pull requests out of the IO
	199	* scheduler, we can no longer merge or sort them. So it's best to
	200	* leave them there for as long as we can. Mark the hw queue as
	201	* needing a restart in that case.
	202	*/
	203	if (list_empty(&rq_list)) {
	204	if (e && e->type->ops.mq.dispatch_requests)
	205	e->type->ops.mq.dispatch_requests(hctx, &rq_list);
	206	else
	207	blk_mq_flush_busy_ctxs(hctx, &rq_list);
	208	} else
	209	blk_mq_sched_mark_restart(hctx);
	210
	211	blk_mq_dispatch_rq_list(hctx, &rq_list);
	212	}
	213
	214	void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
	215	struct list_head *rq_list,
	216	struct request (get_rq)(struct blk_mq_hw_ctx *))
	217	{
	218	do {
	219	struct request *rq;
	220
	221	rq = get_rq(hctx);
	222	if (!rq)
	223	break;
	224
	225	list_add_tail(&rq->queuelist, rq_list);
	226	} while (1);
	227	}
	228	EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch);
	229
	230	bool blk_mq_sched_try_merge(struct request_queue q, struct bio bio)
	231	{
	232	struct request *rq;
	233	int ret;
	234
	235	ret = elv_merge(q, &rq, bio);
	236	if (ret == ELEVATOR_BACK_MERGE) {
	237	if (!blk_mq_sched_allow_merge(q, rq, bio))
	238	return false;
	239	if (bio_attempt_back_merge(q, rq, bio)) {
	240	if (!attempt_back_merge(q, rq))
	241	elv_merged_request(q, rq, ret);
	242	return true;
	243	}
	244	} else if (ret == ELEVATOR_FRONT_MERGE) {
	245	if (!blk_mq_sched_allow_merge(q, rq, bio))
	246	return false;
	247	if (bio_attempt_front_merge(q, rq, bio)) {
	248	if (!attempt_front_merge(q, rq))
	249	elv_merged_request(q, rq, ret);
	250	return true;
	251	}
	252	}
	253
	254	return false;
	255	}
	256	EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
	257
	258	bool __blk_mq_sched_bio_merge(struct request_queue q, struct bio bio)
	259	{
	260	struct elevator_queue *e = q->elevator;
	261
	262	if (e->type->ops.mq.bio_merge) {
	263	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
	264	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
	265
	266	blk_mq_put_ctx(ctx);
	267	return e->type->ops.mq.bio_merge(hctx, bio);
	268	}
	269
	270	return false;
	271	}
	272
	273	bool blk_mq_sched_try_insert_merge(struct request_queue q, struct request rq)
	274	{
	275	return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
	276	}
	277	EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
	278
	279	void blk_mq_sched_request_inserted(struct request *rq)
	280	{
	281	trace_block_rq_insert(rq->q, rq);
	282	}
	283	EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
	284
	285	bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx hctx, struct request rq)
	286	{
	287	if (rq->tag == -1) {
	288	rq->rq_flags \|= RQF_SORTED;
	289	return false;
	290	}
	291
	292	/*
	293	* If we already have a real request tag, send directly to
	294	* the dispatch list.
	295	*/
	296	spin_lock(&hctx->lock);
	297	list_add(&rq->queuelist, &hctx->dispatch);
	298	spin_unlock(&hctx->lock);
	299	return true;
	300	}
	301	EXPORT_SYMBOL_GPL(blk_mq_sched_bypass_insert);
	302
	303	static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
	304	struct blk_mq_hw_ctx *hctx,
	305	unsigned int hctx_idx)
	306	{
	307	if (hctx->sched_tags) {
	308	blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
	309	blk_mq_free_rq_map(hctx->sched_tags);
	310	hctx->sched_tags = NULL;
	311	}
	312	}
	313
	314	int blk_mq_sched_setup(struct request_queue *q)
	315	{
	316	struct blk_mq_tag_set *set = q->tag_set;
	317	struct blk_mq_hw_ctx *hctx;
	318	int ret, i;
	319
	320	/*
	321	* Default to 256, since we don't split into sync/async like the
	322	* old code did. Additionally, this is a per-hw queue depth.
	323	*/
	324	q->nr_requests = 2 * BLKDEV_MAX_RQ;
	325
	326	/*
	327	* We're switching to using an IO scheduler, so setup the hctx
	328	* scheduler tags and switch the request map from the regular
	329	* tags to scheduler tags. First allocate what we need, so we
	330	* can safely fail and fallback, if needed.
	331	*/
	332	ret = 0;
	333	queue_for_each_hw_ctx(q, hctx, i) {
	334	hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0);
	335	if (!hctx->sched_tags) {
	336	ret = -ENOMEM;
	337	break;
	338	}
	339	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests);
	340	if (ret)
	341	break;
	342	}
	343
	344	/*
	345	* If we failed, free what we did allocate
	346	*/
	347	if (ret) {
	348	queue_for_each_hw_ctx(q, hctx, i) {
	349	if (!hctx->sched_tags)
	350	continue;
	351	blk_mq_sched_free_tags(set, hctx, i);
	352	}
	353
	354	return ret;
	355	}
	356
	357	return 0;
	358	}
	359
	360	void blk_mq_sched_teardown(struct request_queue *q)
	361	{
	362	struct blk_mq_tag_set *set = q->tag_set;
	363	struct blk_mq_hw_ctx *hctx;
	364	int i;
	365
	366	queue_for_each_hw_ctx(q, hctx, i)
	367	blk_mq_sched_free_tags(set, hctx, i);
	368	}