aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-mq.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c162
1 files changed, 119 insertions, 43 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 702df07b980d..255d79c14dc1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -112,18 +112,31 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref)
112 */ 112 */
113void blk_mq_freeze_queue(struct request_queue *q) 113void blk_mq_freeze_queue(struct request_queue *q)
114{ 114{
115 bool freeze;
116
115 spin_lock_irq(q->queue_lock); 117 spin_lock_irq(q->queue_lock);
116 q->mq_freeze_depth++; 118 freeze = !q->mq_freeze_depth++;
117 spin_unlock_irq(q->queue_lock); 119 spin_unlock_irq(q->queue_lock);
118 120
119 percpu_ref_kill(&q->mq_usage_counter); 121 if (freeze) {
120 blk_mq_run_queues(q, false); 122 /*
123 * XXX: Temporary kludge to work around SCSI blk-mq stall.
124 * SCSI synchronously creates and destroys many queues
125 * back-to-back during probe leading to lengthy stalls.
126 * This will be fixed by keeping ->mq_usage_counter in
127 * atomic mode until genhd registration, but, for now,
128 * let's work around using expedited synchronization.
129 */
130 __percpu_ref_kill_expedited(&q->mq_usage_counter);
131
132 blk_mq_run_queues(q, false);
133 }
121 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter)); 134 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
122} 135}
123 136
124static void blk_mq_unfreeze_queue(struct request_queue *q) 137static void blk_mq_unfreeze_queue(struct request_queue *q)
125{ 138{
126 bool wake = false; 139 bool wake;
127 140
128 spin_lock_irq(q->queue_lock); 141 spin_lock_irq(q->queue_lock);
129 wake = !--q->mq_freeze_depth; 142 wake = !--q->mq_freeze_depth;
@@ -172,6 +185,8 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
172 /* tag was already set */ 185 /* tag was already set */
173 rq->errors = 0; 186 rq->errors = 0;
174 187
188 rq->cmd = rq->__cmd;
189
175 rq->extra_len = 0; 190 rq->extra_len = 0;
176 rq->sense_len = 0; 191 rq->sense_len = 0;
177 rq->resid_len = 0; 192 rq->resid_len = 0;
@@ -197,7 +212,6 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
197 if (tag != BLK_MQ_TAG_FAIL) { 212 if (tag != BLK_MQ_TAG_FAIL) {
198 rq = data->hctx->tags->rqs[tag]; 213 rq = data->hctx->tags->rqs[tag];
199 214
200 rq->cmd_flags = 0;
201 if (blk_mq_tag_busy(data->hctx)) { 215 if (blk_mq_tag_busy(data->hctx)) {
202 rq->cmd_flags = REQ_MQ_INFLIGHT; 216 rq->cmd_flags = REQ_MQ_INFLIGHT;
203 atomic_inc(&data->hctx->nr_active); 217 atomic_inc(&data->hctx->nr_active);
@@ -252,6 +266,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
252 266
253 if (rq->cmd_flags & REQ_MQ_INFLIGHT) 267 if (rq->cmd_flags & REQ_MQ_INFLIGHT)
254 atomic_dec(&hctx->nr_active); 268 atomic_dec(&hctx->nr_active);
269 rq->cmd_flags = 0;
255 270
256 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 271 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
257 blk_mq_put_tag(hctx, tag, &ctx->last_tag); 272 blk_mq_put_tag(hctx, tag, &ctx->last_tag);
@@ -387,6 +402,12 @@ static void blk_mq_start_request(struct request *rq, bool last)
387 blk_add_timer(rq); 402 blk_add_timer(rq);
388 403
389 /* 404 /*
405 * Ensure that ->deadline is visible before set the started
406 * flag and clear the completed flag.
407 */
408 smp_mb__before_atomic();
409
410 /*
390 * Mark us as started and clear complete. Complete might have been 411 * Mark us as started and clear complete. Complete might have been
391 * set if requeue raced with timeout, which then marked it as 412 * set if requeue raced with timeout, which then marked it as
392 * complete. So be sure to clear complete again when we start 413 * complete. So be sure to clear complete again when we start
@@ -467,7 +488,11 @@ static void blk_mq_requeue_work(struct work_struct *work)
467 blk_mq_insert_request(rq, false, false, false); 488 blk_mq_insert_request(rq, false, false, false);
468 } 489 }
469 490
470 blk_mq_run_queues(q, false); 491 /*
492 * Use the start variant of queue running here, so that running
493 * the requeue work will kick stopped queues.
494 */
495 blk_mq_start_hw_queues(q);
471} 496}
472 497
473void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) 498void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
@@ -951,14 +976,9 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
951 976
952 hctx = q->mq_ops->map_queue(q, ctx->cpu); 977 hctx = q->mq_ops->map_queue(q, ctx->cpu);
953 978
954 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA) && 979 spin_lock(&ctx->lock);
955 !(rq->cmd_flags & (REQ_FLUSH_SEQ))) { 980 __blk_mq_insert_request(hctx, rq, at_head);
956 blk_insert_flush(rq); 981 spin_unlock(&ctx->lock);
957 } else {
958 spin_lock(&ctx->lock);
959 __blk_mq_insert_request(hctx, rq, at_head);
960 spin_unlock(&ctx->lock);
961 }
962 982
963 if (run_queue) 983 if (run_queue)
964 blk_mq_run_hw_queue(hctx, async); 984 blk_mq_run_hw_queue(hctx, async);
@@ -1068,13 +1088,17 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
1068 blk_account_io_start(rq, 1); 1088 blk_account_io_start(rq, 1);
1069} 1089}
1070 1090
1091static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
1092{
1093 return (hctx->flags & BLK_MQ_F_SHOULD_MERGE) &&
1094 !blk_queue_nomerges(hctx->queue);
1095}
1096
1071static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx, 1097static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
1072 struct blk_mq_ctx *ctx, 1098 struct blk_mq_ctx *ctx,
1073 struct request *rq, struct bio *bio) 1099 struct request *rq, struct bio *bio)
1074{ 1100{
1075 struct request_queue *q = hctx->queue; 1101 if (!hctx_allow_merges(hctx)) {
1076
1077 if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE)) {
1078 blk_mq_bio_to_request(rq, bio); 1102 blk_mq_bio_to_request(rq, bio);
1079 spin_lock(&ctx->lock); 1103 spin_lock(&ctx->lock);
1080insert_rq: 1104insert_rq:
@@ -1082,6 +1106,8 @@ insert_rq:
1082 spin_unlock(&ctx->lock); 1106 spin_unlock(&ctx->lock);
1083 return false; 1107 return false;
1084 } else { 1108 } else {
1109 struct request_queue *q = hctx->queue;
1110
1085 spin_lock(&ctx->lock); 1111 spin_lock(&ctx->lock);
1086 if (!blk_mq_attempt_merge(q, ctx, bio)) { 1112 if (!blk_mq_attempt_merge(q, ctx, bio)) {
1087 blk_mq_bio_to_request(rq, bio); 1113 blk_mq_bio_to_request(rq, bio);
@@ -1309,6 +1335,7 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
1309 continue; 1335 continue;
1310 set->ops->exit_request(set->driver_data, tags->rqs[i], 1336 set->ops->exit_request(set->driver_data, tags->rqs[i],
1311 hctx_idx, i); 1337 hctx_idx, i);
1338 tags->rqs[i] = NULL;
1312 } 1339 }
1313 } 1340 }
1314 1341
@@ -1342,8 +1369,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1342 1369
1343 INIT_LIST_HEAD(&tags->page_list); 1370 INIT_LIST_HEAD(&tags->page_list);
1344 1371
1345 tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *), 1372 tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
1346 GFP_KERNEL, set->numa_node); 1373 GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
1374 set->numa_node);
1347 if (!tags->rqs) { 1375 if (!tags->rqs) {
1348 blk_mq_free_tags(tags); 1376 blk_mq_free_tags(tags);
1349 return NULL; 1377 return NULL;
@@ -1367,8 +1395,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1367 this_order--; 1395 this_order--;
1368 1396
1369 do { 1397 do {
1370 page = alloc_pages_node(set->numa_node, GFP_KERNEL, 1398 page = alloc_pages_node(set->numa_node,
1371 this_order); 1399 GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
1400 this_order);
1372 if (page) 1401 if (page)
1373 break; 1402 break;
1374 if (!this_order--) 1403 if (!this_order--)
@@ -1389,11 +1418,15 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1389 left -= to_do * rq_size; 1418 left -= to_do * rq_size;
1390 for (j = 0; j < to_do; j++) { 1419 for (j = 0; j < to_do; j++) {
1391 tags->rqs[i] = p; 1420 tags->rqs[i] = p;
1421 tags->rqs[i]->atomic_flags = 0;
1422 tags->rqs[i]->cmd_flags = 0;
1392 if (set->ops->init_request) { 1423 if (set->ops->init_request) {
1393 if (set->ops->init_request(set->driver_data, 1424 if (set->ops->init_request(set->driver_data,
1394 tags->rqs[i], hctx_idx, i, 1425 tags->rqs[i], hctx_idx, i,
1395 set->numa_node)) 1426 set->numa_node)) {
1427 tags->rqs[i] = NULL;
1396 goto fail; 1428 goto fail;
1429 }
1397 } 1430 }
1398 1431
1399 p += rq_size; 1432 p += rq_size;
@@ -1404,7 +1437,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1404 return tags; 1437 return tags;
1405 1438
1406fail: 1439fail:
1407 pr_warn("%s: failed to allocate requests\n", __func__);
1408 blk_mq_free_rq_map(set, tags, hctx_idx); 1440 blk_mq_free_rq_map(set, tags, hctx_idx);
1409 return NULL; 1441 return NULL;
1410} 1442}
@@ -1574,7 +1606,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
1574 hctx->tags = set->tags[i]; 1606 hctx->tags = set->tags[i];
1575 1607
1576 /* 1608 /*
1577 * Allocate space for all possible cpus to avoid allocation in 1609 * Allocate space for all possible cpus to avoid allocation at
1578 * runtime 1610 * runtime
1579 */ 1611 */
1580 hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *), 1612 hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *),
@@ -1662,8 +1694,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1662 1694
1663 queue_for_each_hw_ctx(q, hctx, i) { 1695 queue_for_each_hw_ctx(q, hctx, i) {
1664 /* 1696 /*
1665 * If not software queues are mapped to this hardware queue, 1697 * If no software queues are mapped to this hardware queue,
1666 * disable it and free the request entries 1698 * disable it and free the request entries.
1667 */ 1699 */
1668 if (!hctx->nr_ctx) { 1700 if (!hctx->nr_ctx) {
1669 struct blk_mq_tag_set *set = q->tag_set; 1701 struct blk_mq_tag_set *set = q->tag_set;
@@ -1713,14 +1745,10 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
1713{ 1745{
1714 struct blk_mq_tag_set *set = q->tag_set; 1746 struct blk_mq_tag_set *set = q->tag_set;
1715 1747
1716 blk_mq_freeze_queue(q);
1717
1718 mutex_lock(&set->tag_list_lock); 1748 mutex_lock(&set->tag_list_lock);
1719 list_del_init(&q->tag_set_list); 1749 list_del_init(&q->tag_set_list);
1720 blk_mq_update_tag_set_depth(set); 1750 blk_mq_update_tag_set_depth(set);
1721 mutex_unlock(&set->tag_list_lock); 1751 mutex_unlock(&set->tag_list_lock);
1722
1723 blk_mq_unfreeze_queue(q);
1724} 1752}
1725 1753
1726static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, 1754static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
@@ -1929,6 +1957,60 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
1929 return NOTIFY_OK; 1957 return NOTIFY_OK;
1930} 1958}
1931 1959
1960static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
1961{
1962 int i;
1963
1964 for (i = 0; i < set->nr_hw_queues; i++) {
1965 set->tags[i] = blk_mq_init_rq_map(set, i);
1966 if (!set->tags[i])
1967 goto out_unwind;
1968 }
1969
1970 return 0;
1971
1972out_unwind:
1973 while (--i >= 0)
1974 blk_mq_free_rq_map(set, set->tags[i], i);
1975
1976 return -ENOMEM;
1977}
1978
1979/*
1980 * Allocate the request maps associated with this tag_set. Note that this
1981 * may reduce the depth asked for, if memory is tight. set->queue_depth
1982 * will be updated to reflect the allocated depth.
1983 */
1984static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
1985{
1986 unsigned int depth;
1987 int err;
1988
1989 depth = set->queue_depth;
1990 do {
1991 err = __blk_mq_alloc_rq_maps(set);
1992 if (!err)
1993 break;
1994
1995 set->queue_depth >>= 1;
1996 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
1997 err = -ENOMEM;
1998 break;
1999 }
2000 } while (set->queue_depth);
2001
2002 if (!set->queue_depth || err) {
2003 pr_err("blk-mq: failed to allocate request map\n");
2004 return -ENOMEM;
2005 }
2006
2007 if (depth != set->queue_depth)
2008 pr_info("blk-mq: reduced tag depth (%u -> %u)\n",
2009 depth, set->queue_depth);
2010
2011 return 0;
2012}
2013
1932/* 2014/*
1933 * Alloc a tag set to be associated with one or more request queues. 2015 * Alloc a tag set to be associated with one or more request queues.
1934 * May fail with EINVAL for various error conditions. May adjust the 2016 * May fail with EINVAL for various error conditions. May adjust the
@@ -1937,8 +2019,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
1937 */ 2019 */
1938int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) 2020int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
1939{ 2021{
1940 int i;
1941
1942 if (!set->nr_hw_queues) 2022 if (!set->nr_hw_queues)
1943 return -EINVAL; 2023 return -EINVAL;
1944 if (!set->queue_depth) 2024 if (!set->queue_depth)
@@ -1959,23 +2039,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
1959 sizeof(struct blk_mq_tags *), 2039 sizeof(struct blk_mq_tags *),
1960 GFP_KERNEL, set->numa_node); 2040 GFP_KERNEL, set->numa_node);
1961 if (!set->tags) 2041 if (!set->tags)
1962 goto out; 2042 return -ENOMEM;
1963 2043
1964 for (i = 0; i < set->nr_hw_queues; i++) { 2044 if (blk_mq_alloc_rq_maps(set))
1965 set->tags[i] = blk_mq_init_rq_map(set, i); 2045 goto enomem;
1966 if (!set->tags[i])
1967 goto out_unwind;
1968 }
1969 2046
1970 mutex_init(&set->tag_list_lock); 2047 mutex_init(&set->tag_list_lock);
1971 INIT_LIST_HEAD(&set->tag_list); 2048 INIT_LIST_HEAD(&set->tag_list);
1972 2049
1973 return 0; 2050 return 0;
1974 2051enomem:
1975out_unwind: 2052 kfree(set->tags);
1976 while (--i >= 0) 2053 set->tags = NULL;
1977 blk_mq_free_rq_map(set, set->tags[i], i);
1978out:
1979 return -ENOMEM; 2054 return -ENOMEM;
1980} 2055}
1981EXPORT_SYMBOL(blk_mq_alloc_tag_set); 2056EXPORT_SYMBOL(blk_mq_alloc_tag_set);
@@ -1990,6 +2065,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
1990 } 2065 }
1991 2066
1992 kfree(set->tags); 2067 kfree(set->tags);
2068 set->tags = NULL;
1993} 2069}
1994EXPORT_SYMBOL(blk_mq_free_tag_set); 2070EXPORT_SYMBOL(blk_mq_free_tag_set);
1995 2071