diff options
author | Jianchao Wang <jianchao.w.wang@oracle.com> | 2018-10-12 06:07:27 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2018-10-13 17:42:02 -0400 |
commit | 34d11ffac1f56c3895dad32153abd6814452dc77 (patch) | |
tree | 6c3cba5436908d5323a991d579ff8f9be88f122d /block | |
parent | 5b202853ffbc54b29f23c4b1b5f3948efab489a2 (diff) |
blk-mq: realloc hctx when hw queue is mapped to another node
When the hw queues and mq_map are updated, a hctx could be mapped
to a different numa node. At this moment, we need to realloc the
hctx. If fail to do that, go on using previous hctx.
Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-mq.c | 82 |
1 files changed, 56 insertions, 26 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index 6b734461fd39..941f51380077 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -2521,6 +2521,39 @@ static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) | |||
2521 | return hw_ctx_size; | 2521 | return hw_ctx_size; |
2522 | } | 2522 | } |
2523 | 2523 | ||
2524 | static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( | ||
2525 | struct blk_mq_tag_set *set, struct request_queue *q, | ||
2526 | int hctx_idx, int node) | ||
2527 | { | ||
2528 | struct blk_mq_hw_ctx *hctx; | ||
2529 | |||
2530 | hctx = kzalloc_node(blk_mq_hw_ctx_size(set), | ||
2531 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, | ||
2532 | node); | ||
2533 | if (!hctx) | ||
2534 | return NULL; | ||
2535 | |||
2536 | if (!zalloc_cpumask_var_node(&hctx->cpumask, | ||
2537 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, | ||
2538 | node)) { | ||
2539 | kfree(hctx); | ||
2540 | return NULL; | ||
2541 | } | ||
2542 | |||
2543 | atomic_set(&hctx->nr_active, 0); | ||
2544 | hctx->numa_node = node; | ||
2545 | hctx->queue_num = hctx_idx; | ||
2546 | |||
2547 | if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) { | ||
2548 | free_cpumask_var(hctx->cpumask); | ||
2549 | kfree(hctx); | ||
2550 | return NULL; | ||
2551 | } | ||
2552 | blk_mq_hctx_kobj_init(hctx); | ||
2553 | |||
2554 | return hctx; | ||
2555 | } | ||
2556 | |||
2524 | static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, | 2557 | static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, |
2525 | struct request_queue *q) | 2558 | struct request_queue *q) |
2526 | { | 2559 | { |
@@ -2531,37 +2564,34 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, | |||
2531 | mutex_lock(&q->sysfs_lock); | 2564 | mutex_lock(&q->sysfs_lock); |
2532 | for (i = 0; i < set->nr_hw_queues; i++) { | 2565 | for (i = 0; i < set->nr_hw_queues; i++) { |
2533 | int node; | 2566 | int node; |
2534 | 2567 | struct blk_mq_hw_ctx *hctx; | |
2535 | if (hctxs[i]) | ||
2536 | continue; | ||
2537 | 2568 | ||
2538 | node = blk_mq_hw_queue_to_node(q->mq_map, i); | 2569 | node = blk_mq_hw_queue_to_node(q->mq_map, i); |
2539 | hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set), | 2570 | /* |
2540 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, | 2571 | * If the hw queue has been mapped to another numa node, |
2541 | node); | 2572 | * we need to realloc the hctx. If allocation fails, fallback |
2542 | if (!hctxs[i]) | 2573 | * to use the previous one. |
2543 | break; | 2574 | */ |
2544 | 2575 | if (hctxs[i] && (hctxs[i]->numa_node == node)) | |
2545 | if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, | 2576 | continue; |
2546 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, | ||
2547 | node)) { | ||
2548 | kfree(hctxs[i]); | ||
2549 | hctxs[i] = NULL; | ||
2550 | break; | ||
2551 | } | ||
2552 | |||
2553 | atomic_set(&hctxs[i]->nr_active, 0); | ||
2554 | hctxs[i]->numa_node = node; | ||
2555 | hctxs[i]->queue_num = i; | ||
2556 | 2577 | ||
2557 | if (blk_mq_init_hctx(q, set, hctxs[i], i)) { | 2578 | hctx = blk_mq_alloc_and_init_hctx(set, q, i, node); |
2558 | free_cpumask_var(hctxs[i]->cpumask); | 2579 | if (hctx) { |
2559 | kfree(hctxs[i]); | 2580 | if (hctxs[i]) { |
2560 | hctxs[i] = NULL; | 2581 | blk_mq_exit_hctx(q, set, hctxs[i], i); |
2561 | break; | 2582 | kobject_put(&hctxs[i]->kobj); |
2583 | } | ||
2584 | hctxs[i] = hctx; | ||
2585 | } else { | ||
2586 | if (hctxs[i]) | ||
2587 | pr_warn("Allocate new hctx on node %d fails,\ | ||
2588 | fallback to previous one on node %d\n", | ||
2589 | node, hctxs[i]->numa_node); | ||
2590 | else | ||
2591 | break; | ||
2562 | } | 2592 | } |
2563 | blk_mq_hctx_kobj_init(hctxs[i]); | ||
2564 | } | 2593 | } |
2594 | |||
2565 | for (j = i; j < q->nr_hw_queues; j++) { | 2595 | for (j = i; j < q->nr_hw_queues; j++) { |
2566 | struct blk_mq_hw_ctx *hctx = hctxs[j]; | 2596 | struct blk_mq_hw_ctx *hctx = hctxs[j]; |
2567 | 2597 | ||