aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2014-05-21 16:01:15 -0400
committerJens Axboe <axboe@fb.com>2014-05-21 16:01:15 -0400
commit484b4061e6683e0e6a09c7455f80781128dc8a6b (patch)
tree614cc76c04c6a7e17278839d28b3e26608a63f82 /block
parente814e71ba4a6e1d7509b0f4b1928365ea650cace (diff)
blk-mq: save memory by freeing requests on unused hardware queues
Depending on the topology of the machine and the number of queues exposed by a device, we can end up in a situation where some of the hardware queues are unused (as in, they don't map to any software queues). For this case, free up the memory used by the request map, as we will not use it. This can be a substantial amount of memory, depending on the number of queues vs CPUs and the queue depth of the device. Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-mq.c157
1 files changed, 105 insertions, 52 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5a3683fc5bdb..103aa1dbc000 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -597,8 +597,16 @@ static void blk_mq_rq_timer(unsigned long data)
597 unsigned long next = 0; 597 unsigned long next = 0;
598 int i, next_set = 0; 598 int i, next_set = 0;
599 599
600 queue_for_each_hw_ctx(q, hctx, i) 600 queue_for_each_hw_ctx(q, hctx, i) {
601 /*
602 * If not software queues are currently mapped to this
603 * hardware queue, there's nothing to check
604 */
605 if (!hctx->nr_ctx || !hctx->tags)
606 continue;
607
601 blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set); 608 blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set);
609 }
602 610
603 if (next_set) { 611 if (next_set) {
604 next = blk_rq_timeout(round_jiffies_up(next)); 612 next = blk_rq_timeout(round_jiffies_up(next));
@@ -1196,53 +1204,6 @@ void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *hctx,
1196} 1204}
1197EXPORT_SYMBOL(blk_mq_free_single_hw_queue); 1205EXPORT_SYMBOL(blk_mq_free_single_hw_queue);
1198 1206
1199static int blk_mq_hctx_notify(void *data, unsigned long action,
1200 unsigned int cpu)
1201{
1202 struct blk_mq_hw_ctx *hctx = data;
1203 struct request_queue *q = hctx->queue;
1204 struct blk_mq_ctx *ctx;
1205 LIST_HEAD(tmp);
1206
1207 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
1208 return NOTIFY_OK;
1209
1210 /*
1211 * Move ctx entries to new CPU, if this one is going away.
1212 */
1213 ctx = __blk_mq_get_ctx(q, cpu);
1214
1215 spin_lock(&ctx->lock);
1216 if (!list_empty(&ctx->rq_list)) {
1217 list_splice_init(&ctx->rq_list, &tmp);
1218 blk_mq_hctx_clear_pending(hctx, ctx);
1219 }
1220 spin_unlock(&ctx->lock);
1221
1222 if (list_empty(&tmp))
1223 return NOTIFY_OK;
1224
1225 ctx = blk_mq_get_ctx(q);
1226 spin_lock(&ctx->lock);
1227
1228 while (!list_empty(&tmp)) {
1229 struct request *rq;
1230
1231 rq = list_first_entry(&tmp, struct request, queuelist);
1232 rq->mq_ctx = ctx;
1233 list_move_tail(&rq->queuelist, &ctx->rq_list);
1234 }
1235
1236 hctx = q->mq_ops->map_queue(q, ctx->cpu);
1237 blk_mq_hctx_mark_pending(hctx, ctx);
1238
1239 spin_unlock(&ctx->lock);
1240
1241 blk_mq_run_hw_queue(hctx, true);
1242 blk_mq_put_ctx(ctx);
1243 return NOTIFY_OK;
1244}
1245
1246static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, 1207static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
1247 struct blk_mq_tags *tags, unsigned int hctx_idx) 1208 struct blk_mq_tags *tags, unsigned int hctx_idx)
1248{ 1209{
@@ -1384,6 +1345,77 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node)
1384 return 0; 1345 return 0;
1385} 1346}
1386 1347
1348static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
1349{
1350 struct request_queue *q = hctx->queue;
1351 struct blk_mq_ctx *ctx;
1352 LIST_HEAD(tmp);
1353
1354 /*
1355 * Move ctx entries to new CPU, if this one is going away.
1356 */
1357 ctx = __blk_mq_get_ctx(q, cpu);
1358
1359 spin_lock(&ctx->lock);
1360 if (!list_empty(&ctx->rq_list)) {
1361 list_splice_init(&ctx->rq_list, &tmp);
1362 blk_mq_hctx_clear_pending(hctx, ctx);
1363 }
1364 spin_unlock(&ctx->lock);
1365
1366 if (list_empty(&tmp))
1367 return NOTIFY_OK;
1368
1369 ctx = blk_mq_get_ctx(q);
1370 spin_lock(&ctx->lock);
1371
1372 while (!list_empty(&tmp)) {
1373 struct request *rq;
1374
1375 rq = list_first_entry(&tmp, struct request, queuelist);
1376 rq->mq_ctx = ctx;
1377 list_move_tail(&rq->queuelist, &ctx->rq_list);
1378 }
1379
1380 hctx = q->mq_ops->map_queue(q, ctx->cpu);
1381 blk_mq_hctx_mark_pending(hctx, ctx);
1382
1383 spin_unlock(&ctx->lock);
1384
1385 blk_mq_run_hw_queue(hctx, true);
1386 blk_mq_put_ctx(ctx);
1387 return NOTIFY_OK;
1388}
1389
1390static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu)
1391{
1392 struct request_queue *q = hctx->queue;
1393 struct blk_mq_tag_set *set = q->tag_set;
1394
1395 if (set->tags[hctx->queue_num])
1396 return NOTIFY_OK;
1397
1398 set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num);
1399 if (!set->tags[hctx->queue_num])
1400 return NOTIFY_STOP;
1401
1402 hctx->tags = set->tags[hctx->queue_num];
1403 return NOTIFY_OK;
1404}
1405
1406static int blk_mq_hctx_notify(void *data, unsigned long action,
1407 unsigned int cpu)
1408{
1409 struct blk_mq_hw_ctx *hctx = data;
1410
1411 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
1412 return blk_mq_hctx_cpu_offline(hctx, cpu);
1413 else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
1414 return blk_mq_hctx_cpu_online(hctx, cpu);
1415
1416 return NOTIFY_OK;
1417}
1418
1387static int blk_mq_init_hw_queues(struct request_queue *q, 1419static int blk_mq_init_hw_queues(struct request_queue *q,
1388 struct blk_mq_tag_set *set) 1420 struct blk_mq_tag_set *set)
1389{ 1421{
@@ -1513,6 +1545,24 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1513 } 1545 }
1514 1546
1515 queue_for_each_hw_ctx(q, hctx, i) { 1547 queue_for_each_hw_ctx(q, hctx, i) {
1548 /*
1549 * If not software queues are mapped to this hardware queue,
1550 * disable it and free the request entries
1551 */
1552 if (!hctx->nr_ctx) {
1553 struct blk_mq_tag_set *set = q->tag_set;
1554
1555 if (set->tags[i]) {
1556 blk_mq_free_rq_map(set, set->tags[i], i);
1557 set->tags[i] = NULL;
1558 hctx->tags = NULL;
1559 }
1560 continue;
1561 }
1562
1563 /*
1564 * Initialize batch roundrobin counts
1565 */
1516 hctx->next_cpu = cpumask_first(hctx->cpumask); 1566 hctx->next_cpu = cpumask_first(hctx->cpumask);
1517 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; 1567 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
1518 } 1568 }
@@ -1645,14 +1695,14 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1645 if (blk_mq_init_hw_queues(q, set)) 1695 if (blk_mq_init_hw_queues(q, set))
1646 goto err_flush_rq; 1696 goto err_flush_rq;
1647 1697
1648 blk_mq_map_swqueue(q);
1649
1650 mutex_lock(&all_q_mutex); 1698 mutex_lock(&all_q_mutex);
1651 list_add_tail(&q->all_q_node, &all_q_list); 1699 list_add_tail(&q->all_q_node, &all_q_list);
1652 mutex_unlock(&all_q_mutex); 1700 mutex_unlock(&all_q_mutex);
1653 1701
1654 blk_mq_add_queue_tag_set(set, q); 1702 blk_mq_add_queue_tag_set(set, q);
1655 1703
1704 blk_mq_map_swqueue(q);
1705
1656 return q; 1706 return q;
1657 1707
1658err_flush_rq: 1708err_flush_rq:
@@ -1790,8 +1840,11 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
1790{ 1840{
1791 int i; 1841 int i;
1792 1842
1793 for (i = 0; i < set->nr_hw_queues; i++) 1843 for (i = 0; i < set->nr_hw_queues; i++) {
1794 blk_mq_free_rq_map(set, set->tags[i], i); 1844 if (set->tags[i])
1845 blk_mq_free_rq_map(set, set->tags[i], i);
1846 }
1847
1795 kfree(set->tags); 1848 kfree(set->tags);
1796} 1849}
1797EXPORT_SYMBOL(blk_mq_free_tag_set); 1850EXPORT_SYMBOL(blk_mq_free_tag_set);