blk-mq: split make request handler for multi and single queue

We want slightly different behavior from them: - On single queue devices, we currently use the per-process plug for deferred IO and for merging. - On multi queue devices, we don't use the per-process plug, but we want to go straight to hardware for SYNC IO. Split blk_mq_make_request() into a blk_sq_make_request() for single queue devices, and retain blk_mq_make_request() for multi queue devices. Then we don't need multiple checks for q->nr_hw_queues in the request mapping. Signed-off-by: Jens Axboe <axboe@fb.com>
author: Jens Axboe <axboe@fb.com> 2014-05-22 12:40:51 -0400
committer: Jens Axboe <axboe@fb.com> 2014-05-22 12:43:07 -0400
commit: 07068d5b8ed8fa6759b2826ba9197e49b69a1fc3 (patch)
tree: 3e13c8e1028e3722be926487acae5473c1eda2be /block/blk-mq.c
parent: 484b4061e6683e0e6a09c7455f80781128dc8a6b (diff)
1 files changed, 157 insertions, 50 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 103aa1dbc000..54e78863c083 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1072,43 +1072,57 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
        blk_account_io_start(rq, 1);
 }
-static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
+static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
+                                         struct blk_mq_ctx *ctx,
+                                         struct request *rq, struct bio *bio)
 {
-        struct blk_mq_hw_ctx *hctx;
+        struct request_queue *q = hctx->queue;
-        struct blk_mq_ctx *ctx;
-        const int is_sync = rw_is_sync(bio->bi_rw);
-        const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
-        int rw = bio_data_dir(bio);
-        struct request *rq;
-        unsigned int use_plug, request_count = 0;
-        /*
-         * If we have multiple hardware queues, just go directly to
-         * one of those for sync IO.
-         */
-        use_plug = !is_flush_fua && ((q->nr_hw_queues == 1) || !is_sync);
-        blk_queue_bounce(q, &bio);
+        if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE)) {
+                blk_mq_bio_to_request(rq, bio);
+                spin_lock(&ctx->lock);
+insert_rq:
+                __blk_mq_insert_request(hctx, rq, false);
+                spin_unlock(&ctx->lock);
+                return false;
+        } else {
+                spin_lock(&ctx->lock);
+                if (!blk_mq_attempt_merge(q, ctx, bio)) {
+                        blk_mq_bio_to_request(rq, bio);
+                        goto insert_rq;
+                }
-        if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
+                spin_unlock(&ctx->lock);
-                bio_endio(bio, -EIO);
+                __blk_mq_free_request(hctx, ctx, rq);
-                return;
+                return true;
        }
+}
-        if (use_plug && !blk_queue_nomerges(q) &&
+struct blk_map_ctx {
-            blk_attempt_plug_merge(q, bio, &request_count))
+        struct blk_mq_hw_ctx *hctx;
-                return;
+        struct blk_mq_ctx *ctx;
+};
+static struct request *blk_mq_map_request(struct request_queue *q,
+                                          struct bio *bio,
+                                          struct blk_map_ctx *data)
+{
+        struct blk_mq_hw_ctx *hctx;
+        struct blk_mq_ctx *ctx;
+        struct request *rq;
+        int rw = bio_data_dir(bio);
-        if (blk_mq_queue_enter(q)) {
+        if (unlikely(blk_mq_queue_enter(q))) {
                bio_endio(bio, -EIO);
-                return;
+                return NULL;
        }
        ctx = blk_mq_get_ctx(q);
        hctx = q->mq_ops->map_queue(q, ctx->cpu);
-        if (is_sync)
+        if (rw_is_sync(bio->bi_rw))
                rw |= REQ_SYNC;
        trace_block_getrq(q, bio, rw);
        rq = __blk_mq_alloc_request(hctx, ctx, GFP_ATOMIC, false);
        if (likely(rq))
@@ -1123,6 +1137,109 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
        }
        hctx->queued++;
+        data->hctx = hctx;
+        data->ctx = ctx;
+        return rq;
+}
+/*
+ * Multiple hardware queue variant. This will not use per-process plugs,
+ * but will attempt to bypass the hctx queueing if we can go straight to
+ * hardware for SYNC IO.
+ */
+static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
+{
+        const int is_sync = rw_is_sync(bio->bi_rw);
+        const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+        struct blk_map_ctx data;
+        struct request *rq;
+        blk_queue_bounce(q, &bio);
+        if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
+                bio_endio(bio, -EIO);
+                return;
+        }
+        rq = blk_mq_map_request(q, bio, &data);
+        if (unlikely(!rq))
+                return;
+        if (unlikely(is_flush_fua)) {
+                blk_mq_bio_to_request(rq, bio);
+                blk_insert_flush(rq);
+                goto run_queue;
+        }
+        if (is_sync) {
+                int ret;
+                blk_mq_bio_to_request(rq, bio);
+                blk_mq_start_request(rq, true);
+                /*
+                 * For OK queue, we are done. For error, kill it. Any other
+                 * error (busy), just add it to our list as we previously
+                 * would have done
+                 */
+                ret = q->mq_ops->queue_rq(data.hctx, rq);
+                if (ret == BLK_MQ_RQ_QUEUE_OK)
+                        goto done;
+                else {
+                        __blk_mq_requeue_request(rq);
+                        if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
+                                rq->errors = -EIO;
+                                blk_mq_end_io(rq, rq->errors);
+                                goto done;
+                        }
+                }
+        }
+        if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
+                /*
+                 * For a SYNC request, send it to the hardware immediately. For
+                 * an ASYNC request, just ensure that we run it later on. The
+                 * latter allows for merging opportunities and more efficient
+                 * dispatching.
+                 */
+run_queue:
+                blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
+        }
+done:
+        blk_mq_put_ctx(data.ctx);
+}
+/*
+ * Single hardware queue variant. This will attempt to use any per-process
+ * plug for merging and IO deferral.
+ */
+static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
+{
+        const int is_sync = rw_is_sync(bio->bi_rw);
+        const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+        unsigned int use_plug, request_count = 0;
+        struct blk_map_ctx data;
+        struct request *rq;
+        /*
+         * If we have multiple hardware queues, just go directly to
+         * one of those for sync IO.
+         */
+        use_plug = !is_flush_fua && !is_sync;
+        blk_queue_bounce(q, &bio);
+        if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
+                bio_endio(bio, -EIO);
+                return;
+        }
+        if (use_plug && !blk_queue_nomerges(q) &&
+            blk_attempt_plug_merge(q, bio, &request_count))
+                return;
+        rq = blk_mq_map_request(q, bio, &data);
        if (unlikely(is_flush_fua)) {
                blk_mq_bio_to_request(rq, bio);
@@ -1147,37 +1264,23 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
                                trace_block_plug(q);
                        }
                        list_add_tail(&rq->queuelist, &plug->mq_list);
-                        blk_mq_put_ctx(ctx);
+                        blk_mq_put_ctx(data.ctx);
                        return;
                }
        }
-        if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE)) {
+        if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
-                blk_mq_bio_to_request(rq, bio);
+                /*
-                spin_lock(&ctx->lock);
+                 * For a SYNC request, send it to the hardware immediately. For
-insert_rq:
+                 * an ASYNC request, just ensure that we run it later on. The
-                __blk_mq_insert_request(hctx, rq, false);
+                 * latter allows for merging opportunities and more efficient
-                spin_unlock(&ctx->lock);
+                 * dispatching.
-        } else {
+                 */
-                spin_lock(&ctx->lock);
+run_queue:
-                if (!blk_mq_attempt_merge(q, ctx, bio)) {
+                blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
-                        blk_mq_bio_to_request(rq, bio);
-                        goto insert_rq;
-                }
-                spin_unlock(&ctx->lock);
-                __blk_mq_free_request(hctx, ctx, rq);
        }
+        blk_mq_put_ctx(data.ctx);
-        /*
-         * For a SYNC request, send it to the hardware immediately. For an
-         * ASYNC request, just ensure that we run it later on. The latter
-         * allows for merging opportunities and more efficient dispatching.
-         */
-run_queue:
-        blk_mq_run_hw_queue(hctx, !is_sync || is_flush_fua);
-        blk_mq_put_ctx(ctx);
 }
 /*
@@ -1670,7 +1773,11 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
        q->sg_reserved_size = INT_MAX;
-        blk_queue_make_request(q, blk_mq_make_request);
+        if (q->nr_hw_queues > 1)
+                blk_queue_make_request(q, blk_mq_make_request);
+        else
+                blk_queue_make_request(q, blk_sq_make_request);
        blk_queue_rq_timed_out(q, blk_mq_rq_timed_out);
        if (set->timeout)
                blk_queue_rq_timeout(q, set->timeout);
author	Jens Axboe <axboe@fb.com>	2014-05-22 12:40:51 -0400
committer	Jens Axboe <axboe@fb.com>	2014-05-22 12:43:07 -0400
commit	07068d5b8ed8fa6759b2826ba9197e49b69a1fc3 (patch)
tree	3e13c8e1028e3722be926487acae5473c1eda2be /block/blk-mq.c
parent	484b4061e6683e0e6a09c7455f80781128dc8a6b (diff)

diff --git a/block/blk-mq.c b/block/blk-mq.c index 103aa1dbc000..54e78863c083 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c
@@ -1072,43 +1072,57 @@ static void blk_mq_bio_to_request(struct request rq, struct bio bio)
1072	blk_account_io_start(rq, 1);	1072	blk_account_io_start(rq, 1);
1073	}	1073	}
1074		1074
1075	static void blk_mq_make_request(struct request_queue q, struct bio bio)	1075	static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
		1076	struct blk_mq_ctx *ctx,
		1077	struct request rq, struct bio bio)
1076	{	1078	{
1077	struct blk_mq_hw_ctx *hctx;	1079	struct request_queue *q = hctx->queue;
1078	struct blk_mq_ctx *ctx;
1079	const int is_sync = rw_is_sync(bio->bi_rw);
1080	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH \| REQ_FUA);
1081	int rw = bio_data_dir(bio);
1082	struct request *rq;
1083	unsigned int use_plug, request_count = 0;
1084
1085	/*
1086	* If we have multiple hardware queues, just go directly to
1087	* one of those for sync IO.
1088	*/
1089	use_plug = !is_flush_fua && ((q->nr_hw_queues == 1) \|\| !is_sync);
1090		1080
1091	blk_queue_bounce(q, &bio);	1081	if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE)) {
		1082	blk_mq_bio_to_request(rq, bio);
		1083	spin_lock(&ctx->lock);
		1084	insert_rq:
		1085	__blk_mq_insert_request(hctx, rq, false);
		1086	spin_unlock(&ctx->lock);
		1087	return false;
		1088	} else {
		1089	spin_lock(&ctx->lock);
		1090	if (!blk_mq_attempt_merge(q, ctx, bio)) {
		1091	blk_mq_bio_to_request(rq, bio);
		1092	goto insert_rq;
		1093	}
1092		1094
1093	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {	1095	spin_unlock(&ctx->lock);
1094	bio_endio(bio, -EIO);	1096	__blk_mq_free_request(hctx, ctx, rq);
1095	return;	1097	return true;
1096	}	1098	}
		1099	}
1097		1100
1098	if (use_plug && !blk_queue_nomerges(q) &&	1101	struct blk_map_ctx {
1099	blk_attempt_plug_merge(q, bio, &request_count))	1102	struct blk_mq_hw_ctx *hctx;
1100	return;	1103	struct blk_mq_ctx *ctx;
		1104	};
		1105
		1106	static struct request blk_mq_map_request(struct request_queue q,
		1107	struct bio *bio,
		1108	struct blk_map_ctx *data)
		1109	{
		1110	struct blk_mq_hw_ctx *hctx;
		1111	struct blk_mq_ctx *ctx;
		1112	struct request *rq;
		1113	int rw = bio_data_dir(bio);
1101		1114
1102	if (blk_mq_queue_enter(q)) {	1115	if (unlikely(blk_mq_queue_enter(q))) {
1103	bio_endio(bio, -EIO);	1116	bio_endio(bio, -EIO);
1104	return;	1117	return NULL;
1105	}	1118	}
1106		1119
1107	ctx = blk_mq_get_ctx(q);	1120	ctx = blk_mq_get_ctx(q);
1108	hctx = q->mq_ops->map_queue(q, ctx->cpu);	1121	hctx = q->mq_ops->map_queue(q, ctx->cpu);
1109		1122
1110	if (is_sync)	1123	if (rw_is_sync(bio->bi_rw))
1111	rw \|= REQ_SYNC;	1124	rw \|= REQ_SYNC;
		1125
1112	trace_block_getrq(q, bio, rw);	1126	trace_block_getrq(q, bio, rw);
1113	rq = __blk_mq_alloc_request(hctx, ctx, GFP_ATOMIC, false);	1127	rq = __blk_mq_alloc_request(hctx, ctx, GFP_ATOMIC, false);
1114	if (likely(rq))	1128	if (likely(rq))
@@ -1123,6 +1137,109 @@ static void blk_mq_make_request(struct request_queue q, struct bio bio)
1123	}	1137	}
1124		1138
1125	hctx->queued++;	1139	hctx->queued++;
		1140	data->hctx = hctx;
		1141	data->ctx = ctx;
		1142	return rq;
		1143	}
		1144
		1145	/*
		1146	* Multiple hardware queue variant. This will not use per-process plugs,
		1147	* but will attempt to bypass the hctx queueing if we can go straight to
		1148	* hardware for SYNC IO.
		1149	*/
		1150	static void blk_mq_make_request(struct request_queue q, struct bio bio)
		1151	{
		1152	const int is_sync = rw_is_sync(bio->bi_rw);
		1153	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH \| REQ_FUA);
		1154	struct blk_map_ctx data;
		1155	struct request *rq;
		1156
		1157	blk_queue_bounce(q, &bio);
		1158
		1159	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
		1160	bio_endio(bio, -EIO);
		1161	return;
		1162	}
		1163
		1164	rq = blk_mq_map_request(q, bio, &data);
		1165	if (unlikely(!rq))
		1166	return;
		1167
		1168	if (unlikely(is_flush_fua)) {
		1169	blk_mq_bio_to_request(rq, bio);
		1170	blk_insert_flush(rq);
		1171	goto run_queue;
		1172	}
		1173
		1174	if (is_sync) {
		1175	int ret;
		1176
		1177	blk_mq_bio_to_request(rq, bio);
		1178	blk_mq_start_request(rq, true);
		1179
		1180	/*
		1181	* For OK queue, we are done. For error, kill it. Any other
		1182	* error (busy), just add it to our list as we previously
		1183	* would have done
		1184	*/
		1185	ret = q->mq_ops->queue_rq(data.hctx, rq);
		1186	if (ret == BLK_MQ_RQ_QUEUE_OK)
		1187	goto done;
		1188	else {
		1189	__blk_mq_requeue_request(rq);
		1190
		1191	if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
		1192	rq->errors = -EIO;
		1193	blk_mq_end_io(rq, rq->errors);
		1194	goto done;
		1195	}
		1196	}
		1197	}
		1198
		1199	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
		1200	/*
		1201	* For a SYNC request, send it to the hardware immediately. For
		1202	* an ASYNC request, just ensure that we run it later on. The
		1203	* latter allows for merging opportunities and more efficient
		1204	* dispatching.
		1205	*/
		1206	run_queue:
		1207	blk_mq_run_hw_queue(data.hctx, !is_sync \|\| is_flush_fua);
		1208	}
		1209	done:
		1210	blk_mq_put_ctx(data.ctx);
		1211	}
		1212
		1213	/*
		1214	* Single hardware queue variant. This will attempt to use any per-process
		1215	* plug for merging and IO deferral.
		1216	*/
		1217	static void blk_sq_make_request(struct request_queue q, struct bio bio)
		1218	{
		1219	const int is_sync = rw_is_sync(bio->bi_rw);
		1220	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH \| REQ_FUA);
		1221	unsigned int use_plug, request_count = 0;
		1222	struct blk_map_ctx data;
		1223	struct request *rq;
		1224
		1225	/*
		1226	* If we have multiple hardware queues, just go directly to
		1227	* one of those for sync IO.
		1228	*/
		1229	use_plug = !is_flush_fua && !is_sync;
		1230
		1231	blk_queue_bounce(q, &bio);
		1232
		1233	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
		1234	bio_endio(bio, -EIO);
		1235	return;
		1236	}
		1237
		1238	if (use_plug && !blk_queue_nomerges(q) &&
		1239	blk_attempt_plug_merge(q, bio, &request_count))
		1240	return;
		1241
		1242	rq = blk_mq_map_request(q, bio, &data);
1126		1243
1127	if (unlikely(is_flush_fua)) {	1244	if (unlikely(is_flush_fua)) {
1128	blk_mq_bio_to_request(rq, bio);	1245	blk_mq_bio_to_request(rq, bio);
@@ -1147,37 +1264,23 @@ static void blk_mq_make_request(struct request_queue q, struct bio bio)
1147	trace_block_plug(q);	1264	trace_block_plug(q);
1148	}	1265	}
1149	list_add_tail(&rq->queuelist, &plug->mq_list);	1266	list_add_tail(&rq->queuelist, &plug->mq_list);
1150	blk_mq_put_ctx(ctx);	1267	blk_mq_put_ctx(data.ctx);
1151	return;	1268	return;
1152	}	1269	}
1153	}	1270	}
1154		1271
1155	if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE)) {	1272	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
1156	blk_mq_bio_to_request(rq, bio);	1273	/*
1157	spin_lock(&ctx->lock);	1274	* For a SYNC request, send it to the hardware immediately. For
1158	insert_rq:	1275	* an ASYNC request, just ensure that we run it later on. The
1159	__blk_mq_insert_request(hctx, rq, false);	1276	* latter allows for merging opportunities and more efficient
1160	spin_unlock(&ctx->lock);	1277	* dispatching.
1161	} else {	1278	*/
1162	spin_lock(&ctx->lock);	1279	run_queue:
1163	if (!blk_mq_attempt_merge(q, ctx, bio)) {	1280	blk_mq_run_hw_queue(data.hctx, !is_sync \|\| is_flush_fua);
1164	blk_mq_bio_to_request(rq, bio);
1165	goto insert_rq;
1166	}
1167
1168	spin_unlock(&ctx->lock);
1169	__blk_mq_free_request(hctx, ctx, rq);
1170	}	1281	}
1171		1282
1172		1283	blk_mq_put_ctx(data.ctx);
1173	/*
1174	* For a SYNC request, send it to the hardware immediately. For an
1175	* ASYNC request, just ensure that we run it later on. The latter
1176	* allows for merging opportunities and more efficient dispatching.
1177	*/
1178	run_queue:
1179	blk_mq_run_hw_queue(hctx, !is_sync \|\| is_flush_fua);
1180	blk_mq_put_ctx(ctx);
1181	}	1284	}
1182		1285
1183	/*	1286	/*
@@ -1670,7 +1773,11 @@ struct request_queue blk_mq_init_queue(struct blk_mq_tag_set set)
1670		1773
1671	q->sg_reserved_size = INT_MAX;	1774	q->sg_reserved_size = INT_MAX;
1672		1775
1673	blk_queue_make_request(q, blk_mq_make_request);	1776	if (q->nr_hw_queues > 1)
		1777	blk_queue_make_request(q, blk_mq_make_request);
		1778	else
		1779	blk_queue_make_request(q, blk_sq_make_request);
		1780
1674	blk_queue_rq_timed_out(q, blk_mq_rq_timed_out);	1781	blk_queue_rq_timed_out(q, blk_mq_rq_timed_out);
1675	if (set->timeout)	1782	if (set->timeout)
1676	blk_queue_rq_timeout(q, set->timeout);	1783	blk_queue_rq_timeout(q, set->timeout);