diff options
author | Shaohua Li <shli@kernel.org> | 2013-12-30 22:38:50 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2014-01-30 14:57:25 -0500 |
commit | f0276924fa35a3607920a58cf5d878212824b951 (patch) | |
tree | 5759cef09f3ba6b2f206ace779fef298a8b9d7be /block | |
parent | d835502f3dacad1638d516ab156d66f0ba377cf5 (diff) |
blk-mq: Don't reserve a tag for flush request
Reserving a tag (request) for flush to avoid dead lock is a overkill. A
tag is valuable resource. We can track the number of flush requests and
disallow having too many pending flush requests allocated. With this
patch, blk_mq_alloc_request_pinned() could do a busy nop (but not a dead
loop) if too many pending requests are allocated and new flush request
is allocated. But this should not be a problem, too many pending flush
requests are very rare case.
I verified this can fix the deadlock caused by too many pending flush
requests.
Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-flush.c | 8 | ||||
-rw-r--r-- | block/blk-mq.c | 46 |
2 files changed, 35 insertions, 19 deletions
diff --git a/block/blk-flush.c b/block/blk-flush.c index 9288aaf35c21..9143e85226c7 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
@@ -284,9 +284,8 @@ static void mq_flush_work(struct work_struct *work) | |||
284 | 284 | ||
285 | q = container_of(work, struct request_queue, mq_flush_work); | 285 | q = container_of(work, struct request_queue, mq_flush_work); |
286 | 286 | ||
287 | /* We don't need set REQ_FLUSH_SEQ, it's for consistency */ | ||
288 | rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ, | 287 | rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ, |
289 | __GFP_WAIT|GFP_ATOMIC, true); | 288 | __GFP_WAIT|GFP_ATOMIC, false); |
290 | rq->cmd_type = REQ_TYPE_FS; | 289 | rq->cmd_type = REQ_TYPE_FS; |
291 | rq->end_io = flush_end_io; | 290 | rq->end_io = flush_end_io; |
292 | 291 | ||
@@ -408,8 +407,11 @@ void blk_insert_flush(struct request *rq) | |||
408 | /* | 407 | /* |
409 | * @policy now records what operations need to be done. Adjust | 408 | * @policy now records what operations need to be done. Adjust |
410 | * REQ_FLUSH and FUA for the driver. | 409 | * REQ_FLUSH and FUA for the driver. |
410 | * We keep REQ_FLUSH for mq to track flush requests. For !FUA, | ||
411 | * we never dispatch the request directly. | ||
411 | */ | 412 | */ |
412 | rq->cmd_flags &= ~REQ_FLUSH; | 413 | if (rq->cmd_flags & REQ_FUA) |
414 | rq->cmd_flags &= ~REQ_FLUSH; | ||
413 | if (!(fflags & REQ_FUA)) | 415 | if (!(fflags & REQ_FUA)) |
414 | rq->cmd_flags &= ~REQ_FUA; | 416 | rq->cmd_flags &= ~REQ_FUA; |
415 | 417 | ||
diff --git a/block/blk-mq.c b/block/blk-mq.c index 57039fcd9c93..9072d0ab184f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -194,9 +194,27 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, | |||
194 | } | 194 | } |
195 | 195 | ||
196 | static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, | 196 | static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, |
197 | gfp_t gfp, bool reserved) | 197 | gfp_t gfp, bool reserved, |
198 | int rw) | ||
198 | { | 199 | { |
199 | return blk_mq_alloc_rq(hctx, gfp, reserved); | 200 | struct request *req; |
201 | bool is_flush = false; | ||
202 | /* | ||
203 | * flush need allocate a request, leave at least one request for | ||
204 | * non-flush IO to avoid deadlock | ||
205 | */ | ||
206 | if ((rw & REQ_FLUSH) && !(rw & REQ_FLUSH_SEQ)) { | ||
207 | if (atomic_inc_return(&hctx->pending_flush) >= | ||
208 | hctx->queue_depth - hctx->reserved_tags - 1) { | ||
209 | atomic_dec(&hctx->pending_flush); | ||
210 | return NULL; | ||
211 | } | ||
212 | is_flush = true; | ||
213 | } | ||
214 | req = blk_mq_alloc_rq(hctx, gfp, reserved); | ||
215 | if (!req && is_flush) | ||
216 | atomic_dec(&hctx->pending_flush); | ||
217 | return req; | ||
200 | } | 218 | } |
201 | 219 | ||
202 | static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, | 220 | static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, |
@@ -209,7 +227,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, | |||
209 | struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); | 227 | struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); |
210 | struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); | 228 | struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); |
211 | 229 | ||
212 | rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved); | 230 | rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved, rw); |
213 | if (rq) { | 231 | if (rq) { |
214 | blk_mq_rq_ctx_init(q, ctx, rq, rw); | 232 | blk_mq_rq_ctx_init(q, ctx, rq, rw); |
215 | break; | 233 | break; |
@@ -272,6 +290,9 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, | |||
272 | const int tag = rq->tag; | 290 | const int tag = rq->tag; |
273 | struct request_queue *q = rq->q; | 291 | struct request_queue *q = rq->q; |
274 | 292 | ||
293 | if ((rq->cmd_flags & REQ_FLUSH) && !(rq->cmd_flags & REQ_FLUSH_SEQ)) | ||
294 | atomic_dec(&hctx->pending_flush); | ||
295 | |||
275 | blk_mq_rq_init(hctx, rq); | 296 | blk_mq_rq_init(hctx, rq); |
276 | blk_mq_put_tag(hctx->tags, tag); | 297 | blk_mq_put_tag(hctx->tags, tag); |
277 | 298 | ||
@@ -900,14 +921,14 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
900 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | 921 | hctx = q->mq_ops->map_queue(q, ctx->cpu); |
901 | 922 | ||
902 | trace_block_getrq(q, bio, rw); | 923 | trace_block_getrq(q, bio, rw); |
903 | rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false); | 924 | rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false, bio->bi_rw); |
904 | if (likely(rq)) | 925 | if (likely(rq)) |
905 | blk_mq_rq_ctx_init(q, ctx, rq, rw); | 926 | blk_mq_rq_ctx_init(q, ctx, rq, bio->bi_rw); |
906 | else { | 927 | else { |
907 | blk_mq_put_ctx(ctx); | 928 | blk_mq_put_ctx(ctx); |
908 | trace_block_sleeprq(q, bio, rw); | 929 | trace_block_sleeprq(q, bio, rw); |
909 | rq = blk_mq_alloc_request_pinned(q, rw, __GFP_WAIT|GFP_ATOMIC, | 930 | rq = blk_mq_alloc_request_pinned(q, bio->bi_rw, |
910 | false); | 931 | __GFP_WAIT|GFP_ATOMIC, false); |
911 | ctx = rq->mq_ctx; | 932 | ctx = rq->mq_ctx; |
912 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | 933 | hctx = q->mq_ops->map_queue(q, ctx->cpu); |
913 | } | 934 | } |
@@ -1184,7 +1205,9 @@ static int blk_mq_init_hw_queues(struct request_queue *q, | |||
1184 | hctx->queue_num = i; | 1205 | hctx->queue_num = i; |
1185 | hctx->flags = reg->flags; | 1206 | hctx->flags = reg->flags; |
1186 | hctx->queue_depth = reg->queue_depth; | 1207 | hctx->queue_depth = reg->queue_depth; |
1208 | hctx->reserved_tags = reg->reserved_tags; | ||
1187 | hctx->cmd_size = reg->cmd_size; | 1209 | hctx->cmd_size = reg->cmd_size; |
1210 | atomic_set(&hctx->pending_flush, 0); | ||
1188 | 1211 | ||
1189 | blk_mq_init_cpu_notifier(&hctx->cpu_notifier, | 1212 | blk_mq_init_cpu_notifier(&hctx->cpu_notifier, |
1190 | blk_mq_hctx_notify, hctx); | 1213 | blk_mq_hctx_notify, hctx); |
@@ -1309,15 +1332,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, | |||
1309 | reg->queue_depth = BLK_MQ_MAX_DEPTH; | 1332 | reg->queue_depth = BLK_MQ_MAX_DEPTH; |
1310 | } | 1333 | } |
1311 | 1334 | ||
1312 | /* | ||
1313 | * Set aside a tag for flush requests. It will only be used while | ||
1314 | * another flush request is in progress but outside the driver. | ||
1315 | * | ||
1316 | * TODO: only allocate if flushes are supported | ||
1317 | */ | ||
1318 | reg->queue_depth++; | ||
1319 | reg->reserved_tags++; | ||
1320 | |||
1321 | if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) | 1335 | if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) |
1322 | return ERR_PTR(-EINVAL); | 1336 | return ERR_PTR(-EINVAL); |
1323 | 1337 | ||