summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJianchao Wang <jianchao.w.wang@oracle.com>2018-10-12 06:07:27 -0400
committerJens Axboe <axboe@kernel.dk>2018-10-13 17:42:02 -0400
commit34d11ffac1f56c3895dad32153abd6814452dc77 (patch)
tree6c3cba5436908d5323a991d579ff8f9be88f122d /block
parent5b202853ffbc54b29f23c4b1b5f3948efab489a2 (diff)
blk-mq: realloc hctx when hw queue is mapped to another node
When the hw queues and mq_map are updated, a hctx could be mapped to a different numa node. At this moment, we need to realloc the hctx. If fail to do that, go on using previous hctx. Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-mq.c82
1 files changed, 56 insertions, 26 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6b734461fd39..941f51380077 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2521,6 +2521,39 @@ static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
2521 return hw_ctx_size; 2521 return hw_ctx_size;
2522} 2522}
2523 2523
2524static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
2525 struct blk_mq_tag_set *set, struct request_queue *q,
2526 int hctx_idx, int node)
2527{
2528 struct blk_mq_hw_ctx *hctx;
2529
2530 hctx = kzalloc_node(blk_mq_hw_ctx_size(set),
2531 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
2532 node);
2533 if (!hctx)
2534 return NULL;
2535
2536 if (!zalloc_cpumask_var_node(&hctx->cpumask,
2537 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
2538 node)) {
2539 kfree(hctx);
2540 return NULL;
2541 }
2542
2543 atomic_set(&hctx->nr_active, 0);
2544 hctx->numa_node = node;
2545 hctx->queue_num = hctx_idx;
2546
2547 if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) {
2548 free_cpumask_var(hctx->cpumask);
2549 kfree(hctx);
2550 return NULL;
2551 }
2552 blk_mq_hctx_kobj_init(hctx);
2553
2554 return hctx;
2555}
2556
2524static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, 2557static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
2525 struct request_queue *q) 2558 struct request_queue *q)
2526{ 2559{
@@ -2531,37 +2564,34 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
2531 mutex_lock(&q->sysfs_lock); 2564 mutex_lock(&q->sysfs_lock);
2532 for (i = 0; i < set->nr_hw_queues; i++) { 2565 for (i = 0; i < set->nr_hw_queues; i++) {
2533 int node; 2566 int node;
2534 2567 struct blk_mq_hw_ctx *hctx;
2535 if (hctxs[i])
2536 continue;
2537 2568
2538 node = blk_mq_hw_queue_to_node(q->mq_map, i); 2569 node = blk_mq_hw_queue_to_node(q->mq_map, i);
2539 hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set), 2570 /*
2540 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, 2571 * If the hw queue has been mapped to another numa node,
2541 node); 2572 * we need to realloc the hctx. If allocation fails, fallback
2542 if (!hctxs[i]) 2573 * to use the previous one.
2543 break; 2574 */
2544 2575 if (hctxs[i] && (hctxs[i]->numa_node == node))
2545 if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, 2576 continue;
2546 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
2547 node)) {
2548 kfree(hctxs[i]);
2549 hctxs[i] = NULL;
2550 break;
2551 }
2552
2553 atomic_set(&hctxs[i]->nr_active, 0);
2554 hctxs[i]->numa_node = node;
2555 hctxs[i]->queue_num = i;
2556 2577
2557 if (blk_mq_init_hctx(q, set, hctxs[i], i)) { 2578 hctx = blk_mq_alloc_and_init_hctx(set, q, i, node);
2558 free_cpumask_var(hctxs[i]->cpumask); 2579 if (hctx) {
2559 kfree(hctxs[i]); 2580 if (hctxs[i]) {
2560 hctxs[i] = NULL; 2581 blk_mq_exit_hctx(q, set, hctxs[i], i);
2561 break; 2582 kobject_put(&hctxs[i]->kobj);
2583 }
2584 hctxs[i] = hctx;
2585 } else {
2586 if (hctxs[i])
2587 pr_warn("Allocate new hctx on node %d fails,\
2588 fallback to previous one on node %d\n",
2589 node, hctxs[i]->numa_node);
2590 else
2591 break;
2562 } 2592 }
2563 blk_mq_hctx_kobj_init(hctxs[i]);
2564 } 2593 }
2594
2565 for (j = i; j < q->nr_hw_queues; j++) { 2595 for (j = i; j < q->nr_hw_queues; j++) {
2566 struct blk_mq_hw_ctx *hctx = hctxs[j]; 2596 struct blk_mq_hw_ctx *hctx = hctxs[j];
2567 2597