aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorShaohua Li <shli@kernel.org>2013-12-30 22:38:50 -0500
committerJens Axboe <axboe@kernel.dk>2014-01-30 14:57:25 -0500
commitf0276924fa35a3607920a58cf5d878212824b951 (patch)
tree5759cef09f3ba6b2f206ace779fef298a8b9d7be /block
parentd835502f3dacad1638d516ab156d66f0ba377cf5 (diff)
blk-mq: Don't reserve a tag for flush request
Reserving a tag (request) for flush to avoid dead lock is a overkill. A tag is valuable resource. We can track the number of flush requests and disallow having too many pending flush requests allocated. With this patch, blk_mq_alloc_request_pinned() could do a busy nop (but not a dead loop) if too many pending requests are allocated and new flush request is allocated. But this should not be a problem, too many pending flush requests are very rare case. I verified this can fix the deadlock caused by too many pending flush requests. Signed-off-by: Shaohua Li <shli@fusionio.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-flush.c8
-rw-r--r--block/blk-mq.c46
2 files changed, 35 insertions, 19 deletions
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 9288aaf35c21..9143e85226c7 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -284,9 +284,8 @@ static void mq_flush_work(struct work_struct *work)
284 284
285 q = container_of(work, struct request_queue, mq_flush_work); 285 q = container_of(work, struct request_queue, mq_flush_work);
286 286
287 /* We don't need set REQ_FLUSH_SEQ, it's for consistency */
288 rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ, 287 rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
289 __GFP_WAIT|GFP_ATOMIC, true); 288 __GFP_WAIT|GFP_ATOMIC, false);
290 rq->cmd_type = REQ_TYPE_FS; 289 rq->cmd_type = REQ_TYPE_FS;
291 rq->end_io = flush_end_io; 290 rq->end_io = flush_end_io;
292 291
@@ -408,8 +407,11 @@ void blk_insert_flush(struct request *rq)
408 /* 407 /*
409 * @policy now records what operations need to be done. Adjust 408 * @policy now records what operations need to be done. Adjust
410 * REQ_FLUSH and FUA for the driver. 409 * REQ_FLUSH and FUA for the driver.
410 * We keep REQ_FLUSH for mq to track flush requests. For !FUA,
411 * we never dispatch the request directly.
411 */ 412 */
412 rq->cmd_flags &= ~REQ_FLUSH; 413 if (rq->cmd_flags & REQ_FUA)
414 rq->cmd_flags &= ~REQ_FLUSH;
413 if (!(fflags & REQ_FUA)) 415 if (!(fflags & REQ_FUA))
414 rq->cmd_flags &= ~REQ_FUA; 416 rq->cmd_flags &= ~REQ_FUA;
415 417
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 57039fcd9c93..9072d0ab184f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -194,9 +194,27 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
194} 194}
195 195
196static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, 196static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
197 gfp_t gfp, bool reserved) 197 gfp_t gfp, bool reserved,
198 int rw)
198{ 199{
199 return blk_mq_alloc_rq(hctx, gfp, reserved); 200 struct request *req;
201 bool is_flush = false;
202 /*
203 * flush need allocate a request, leave at least one request for
204 * non-flush IO to avoid deadlock
205 */
206 if ((rw & REQ_FLUSH) && !(rw & REQ_FLUSH_SEQ)) {
207 if (atomic_inc_return(&hctx->pending_flush) >=
208 hctx->queue_depth - hctx->reserved_tags - 1) {
209 atomic_dec(&hctx->pending_flush);
210 return NULL;
211 }
212 is_flush = true;
213 }
214 req = blk_mq_alloc_rq(hctx, gfp, reserved);
215 if (!req && is_flush)
216 atomic_dec(&hctx->pending_flush);
217 return req;
200} 218}
201 219
202static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, 220static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
@@ -209,7 +227,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
209 struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); 227 struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
210 struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); 228 struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);
211 229
212 rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved); 230 rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved, rw);
213 if (rq) { 231 if (rq) {
214 blk_mq_rq_ctx_init(q, ctx, rq, rw); 232 blk_mq_rq_ctx_init(q, ctx, rq, rw);
215 break; 233 break;
@@ -272,6 +290,9 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
272 const int tag = rq->tag; 290 const int tag = rq->tag;
273 struct request_queue *q = rq->q; 291 struct request_queue *q = rq->q;
274 292
293 if ((rq->cmd_flags & REQ_FLUSH) && !(rq->cmd_flags & REQ_FLUSH_SEQ))
294 atomic_dec(&hctx->pending_flush);
295
275 blk_mq_rq_init(hctx, rq); 296 blk_mq_rq_init(hctx, rq);
276 blk_mq_put_tag(hctx->tags, tag); 297 blk_mq_put_tag(hctx->tags, tag);
277 298
@@ -900,14 +921,14 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
900 hctx = q->mq_ops->map_queue(q, ctx->cpu); 921 hctx = q->mq_ops->map_queue(q, ctx->cpu);
901 922
902 trace_block_getrq(q, bio, rw); 923 trace_block_getrq(q, bio, rw);
903 rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false); 924 rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false, bio->bi_rw);
904 if (likely(rq)) 925 if (likely(rq))
905 blk_mq_rq_ctx_init(q, ctx, rq, rw); 926 blk_mq_rq_ctx_init(q, ctx, rq, bio->bi_rw);
906 else { 927 else {
907 blk_mq_put_ctx(ctx); 928 blk_mq_put_ctx(ctx);
908 trace_block_sleeprq(q, bio, rw); 929 trace_block_sleeprq(q, bio, rw);
909 rq = blk_mq_alloc_request_pinned(q, rw, __GFP_WAIT|GFP_ATOMIC, 930 rq = blk_mq_alloc_request_pinned(q, bio->bi_rw,
910 false); 931 __GFP_WAIT|GFP_ATOMIC, false);
911 ctx = rq->mq_ctx; 932 ctx = rq->mq_ctx;
912 hctx = q->mq_ops->map_queue(q, ctx->cpu); 933 hctx = q->mq_ops->map_queue(q, ctx->cpu);
913 } 934 }
@@ -1184,7 +1205,9 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
1184 hctx->queue_num = i; 1205 hctx->queue_num = i;
1185 hctx->flags = reg->flags; 1206 hctx->flags = reg->flags;
1186 hctx->queue_depth = reg->queue_depth; 1207 hctx->queue_depth = reg->queue_depth;
1208 hctx->reserved_tags = reg->reserved_tags;
1187 hctx->cmd_size = reg->cmd_size; 1209 hctx->cmd_size = reg->cmd_size;
1210 atomic_set(&hctx->pending_flush, 0);
1188 1211
1189 blk_mq_init_cpu_notifier(&hctx->cpu_notifier, 1212 blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
1190 blk_mq_hctx_notify, hctx); 1213 blk_mq_hctx_notify, hctx);
@@ -1309,15 +1332,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1309 reg->queue_depth = BLK_MQ_MAX_DEPTH; 1332 reg->queue_depth = BLK_MQ_MAX_DEPTH;
1310 } 1333 }
1311 1334
1312 /*
1313 * Set aside a tag for flush requests. It will only be used while
1314 * another flush request is in progress but outside the driver.
1315 *
1316 * TODO: only allocate if flushes are supported
1317 */
1318 reg->queue_depth++;
1319 reg->reserved_tags++;
1320
1321 if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) 1335 if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN))
1322 return ERR_PTR(-EINVAL); 1336 return ERR_PTR(-EINVAL);
1323 1337