diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-09 20:32:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-09 20:32:20 -0400 |
commit | 24532f768121b07b16178ffb40442ece43365cbd (patch) | |
tree | ca2eaf2c3ed031dd3aa977af95df77bfa2e18cc6 | |
parent | 12e3d3cdd975fe986cc5c35f60b1467a8ec20b80 (diff) | |
parent | 97a32864e6de5944c6356049f60569de01e9ba1f (diff) |
Merge branch 'for-4.9/block-smp' of git://git.kernel.dk/linux-block
Pull blk-mq CPU hotplug update from Jens Axboe:
"This is the conversion of blk-mq to the new hotplug state machine"
* 'for-4.9/block-smp' of git://git.kernel.dk/linux-block:
blk-mq: fixup "Convert to new hotplug state machine"
blk-mq: Convert to new hotplug state machine
blk-mq/cpu-notif: Convert to new hotplug state machine
-rw-r--r-- | block/Makefile | 2 | ||||
-rw-r--r-- | block/blk-mq-cpu.c | 67 | ||||
-rw-r--r-- | block/blk-mq.c | 123 | ||||
-rw-r--r-- | block/blk-mq.h | 7 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 8 |
5 files changed, 58 insertions, 149 deletions
diff --git a/block/Makefile b/block/Makefile index 37a0d93f97bb..36acdd7545be 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | |||
6 | blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ | 6 | blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ |
7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ | 7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ |
8 | blk-lib.o blk-mq.o blk-mq-tag.o \ | 8 | blk-lib.o blk-mq.o blk-mq-tag.o \ |
9 | blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ | 9 | blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \ |
10 | genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ | 10 | genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ |
11 | badblocks.o partitions/ | 11 | badblocks.o partitions/ |
12 | 12 | ||
diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c deleted file mode 100644 index bb3ed488f7b5..000000000000 --- a/block/blk-mq-cpu.c +++ /dev/null | |||
@@ -1,67 +0,0 @@ | |||
1 | /* | ||
2 | * CPU notifier helper code for blk-mq | ||
3 | * | ||
4 | * Copyright (C) 2013-2014 Jens Axboe | ||
5 | */ | ||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/module.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/blkdev.h> | ||
10 | #include <linux/list.h> | ||
11 | #include <linux/llist.h> | ||
12 | #include <linux/smp.h> | ||
13 | #include <linux/cpu.h> | ||
14 | |||
15 | #include <linux/blk-mq.h> | ||
16 | #include "blk-mq.h" | ||
17 | |||
18 | static LIST_HEAD(blk_mq_cpu_notify_list); | ||
19 | static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock); | ||
20 | |||
21 | static int blk_mq_main_cpu_notify(struct notifier_block *self, | ||
22 | unsigned long action, void *hcpu) | ||
23 | { | ||
24 | unsigned int cpu = (unsigned long) hcpu; | ||
25 | struct blk_mq_cpu_notifier *notify; | ||
26 | int ret = NOTIFY_OK; | ||
27 | |||
28 | raw_spin_lock(&blk_mq_cpu_notify_lock); | ||
29 | |||
30 | list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) { | ||
31 | ret = notify->notify(notify->data, action, cpu); | ||
32 | if (ret != NOTIFY_OK) | ||
33 | break; | ||
34 | } | ||
35 | |||
36 | raw_spin_unlock(&blk_mq_cpu_notify_lock); | ||
37 | return ret; | ||
38 | } | ||
39 | |||
40 | void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) | ||
41 | { | ||
42 | BUG_ON(!notifier->notify); | ||
43 | |||
44 | raw_spin_lock(&blk_mq_cpu_notify_lock); | ||
45 | list_add_tail(¬ifier->list, &blk_mq_cpu_notify_list); | ||
46 | raw_spin_unlock(&blk_mq_cpu_notify_lock); | ||
47 | } | ||
48 | |||
49 | void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier) | ||
50 | { | ||
51 | raw_spin_lock(&blk_mq_cpu_notify_lock); | ||
52 | list_del(¬ifier->list); | ||
53 | raw_spin_unlock(&blk_mq_cpu_notify_lock); | ||
54 | } | ||
55 | |||
56 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, | ||
57 | int (*fn)(void *, unsigned long, unsigned int), | ||
58 | void *data) | ||
59 | { | ||
60 | notifier->notify = fn; | ||
61 | notifier->data = data; | ||
62 | } | ||
63 | |||
64 | void __init blk_mq_cpu_init(void) | ||
65 | { | ||
66 | hotcpu_notifier(blk_mq_main_cpu_notify, 0); | ||
67 | } | ||
diff --git a/block/blk-mq.c b/block/blk-mq.c index b65f572a4faf..ddc2eed64771 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -1563,11 +1563,13 @@ fail: | |||
1563 | * software queue to the hw queue dispatch list, and ensure that it | 1563 | * software queue to the hw queue dispatch list, and ensure that it |
1564 | * gets run. | 1564 | * gets run. |
1565 | */ | 1565 | */ |
1566 | static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) | 1566 | static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node) |
1567 | { | 1567 | { |
1568 | struct blk_mq_hw_ctx *hctx; | ||
1568 | struct blk_mq_ctx *ctx; | 1569 | struct blk_mq_ctx *ctx; |
1569 | LIST_HEAD(tmp); | 1570 | LIST_HEAD(tmp); |
1570 | 1571 | ||
1572 | hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead); | ||
1571 | ctx = __blk_mq_get_ctx(hctx->queue, cpu); | 1573 | ctx = __blk_mq_get_ctx(hctx->queue, cpu); |
1572 | 1574 | ||
1573 | spin_lock(&ctx->lock); | 1575 | spin_lock(&ctx->lock); |
@@ -1578,30 +1580,20 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) | |||
1578 | spin_unlock(&ctx->lock); | 1580 | spin_unlock(&ctx->lock); |
1579 | 1581 | ||
1580 | if (list_empty(&tmp)) | 1582 | if (list_empty(&tmp)) |
1581 | return NOTIFY_OK; | 1583 | return 0; |
1582 | 1584 | ||
1583 | spin_lock(&hctx->lock); | 1585 | spin_lock(&hctx->lock); |
1584 | list_splice_tail_init(&tmp, &hctx->dispatch); | 1586 | list_splice_tail_init(&tmp, &hctx->dispatch); |
1585 | spin_unlock(&hctx->lock); | 1587 | spin_unlock(&hctx->lock); |
1586 | 1588 | ||
1587 | blk_mq_run_hw_queue(hctx, true); | 1589 | blk_mq_run_hw_queue(hctx, true); |
1588 | return NOTIFY_OK; | 1590 | return 0; |
1589 | } | 1591 | } |
1590 | 1592 | ||
1591 | static int blk_mq_hctx_notify(void *data, unsigned long action, | 1593 | static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx) |
1592 | unsigned int cpu) | ||
1593 | { | 1594 | { |
1594 | struct blk_mq_hw_ctx *hctx = data; | 1595 | cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD, |
1595 | 1596 | &hctx->cpuhp_dead); | |
1596 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) | ||
1597 | return blk_mq_hctx_cpu_offline(hctx, cpu); | ||
1598 | |||
1599 | /* | ||
1600 | * In case of CPU online, tags may be reallocated | ||
1601 | * in blk_mq_map_swqueue() after mapping is updated. | ||
1602 | */ | ||
1603 | |||
1604 | return NOTIFY_OK; | ||
1605 | } | 1597 | } |
1606 | 1598 | ||
1607 | /* hctx->ctxs will be freed in queue's release handler */ | 1599 | /* hctx->ctxs will be freed in queue's release handler */ |
@@ -1621,7 +1613,7 @@ static void blk_mq_exit_hctx(struct request_queue *q, | |||
1621 | if (set->ops->exit_hctx) | 1613 | if (set->ops->exit_hctx) |
1622 | set->ops->exit_hctx(hctx, hctx_idx); | 1614 | set->ops->exit_hctx(hctx, hctx_idx); |
1623 | 1615 | ||
1624 | blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); | 1616 | blk_mq_remove_cpuhp(hctx); |
1625 | blk_free_flush_queue(hctx->fq); | 1617 | blk_free_flush_queue(hctx->fq); |
1626 | sbitmap_free(&hctx->ctx_map); | 1618 | sbitmap_free(&hctx->ctx_map); |
1627 | } | 1619 | } |
@@ -1668,9 +1660,7 @@ static int blk_mq_init_hctx(struct request_queue *q, | |||
1668 | hctx->queue_num = hctx_idx; | 1660 | hctx->queue_num = hctx_idx; |
1669 | hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; | 1661 | hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; |
1670 | 1662 | ||
1671 | blk_mq_init_cpu_notifier(&hctx->cpu_notifier, | 1663 | cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); |
1672 | blk_mq_hctx_notify, hctx); | ||
1673 | blk_mq_register_cpu_notifier(&hctx->cpu_notifier); | ||
1674 | 1664 | ||
1675 | hctx->tags = set->tags[hctx_idx]; | 1665 | hctx->tags = set->tags[hctx_idx]; |
1676 | 1666 | ||
@@ -1715,8 +1705,7 @@ static int blk_mq_init_hctx(struct request_queue *q, | |||
1715 | free_ctxs: | 1705 | free_ctxs: |
1716 | kfree(hctx->ctxs); | 1706 | kfree(hctx->ctxs); |
1717 | unregister_cpu_notifier: | 1707 | unregister_cpu_notifier: |
1718 | blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); | 1708 | blk_mq_remove_cpuhp(hctx); |
1719 | |||
1720 | return -1; | 1709 | return -1; |
1721 | } | 1710 | } |
1722 | 1711 | ||
@@ -2089,50 +2078,18 @@ static void blk_mq_queue_reinit(struct request_queue *q, | |||
2089 | blk_mq_sysfs_register(q); | 2078 | blk_mq_sysfs_register(q); |
2090 | } | 2079 | } |
2091 | 2080 | ||
2092 | static int blk_mq_queue_reinit_notify(struct notifier_block *nb, | 2081 | /* |
2093 | unsigned long action, void *hcpu) | 2082 | * New online cpumask which is going to be set in this hotplug event. |
2083 | * Declare this cpumasks as global as cpu-hotplug operation is invoked | ||
2084 | * one-by-one and dynamically allocating this could result in a failure. | ||
2085 | */ | ||
2086 | static struct cpumask cpuhp_online_new; | ||
2087 | |||
2088 | static void blk_mq_queue_reinit_work(void) | ||
2094 | { | 2089 | { |
2095 | struct request_queue *q; | 2090 | struct request_queue *q; |
2096 | int cpu = (unsigned long)hcpu; | ||
2097 | /* | ||
2098 | * New online cpumask which is going to be set in this hotplug event. | ||
2099 | * Declare this cpumasks as global as cpu-hotplug operation is invoked | ||
2100 | * one-by-one and dynamically allocating this could result in a failure. | ||
2101 | */ | ||
2102 | static struct cpumask online_new; | ||
2103 | |||
2104 | /* | ||
2105 | * Before hotadded cpu starts handling requests, new mappings must | ||
2106 | * be established. Otherwise, these requests in hw queue might | ||
2107 | * never be dispatched. | ||
2108 | * | ||
2109 | * For example, there is a single hw queue (hctx) and two CPU queues | ||
2110 | * (ctx0 for CPU0, and ctx1 for CPU1). | ||
2111 | * | ||
2112 | * Now CPU1 is just onlined and a request is inserted into | ||
2113 | * ctx1->rq_list and set bit0 in pending bitmap as ctx1->index_hw is | ||
2114 | * still zero. | ||
2115 | * | ||
2116 | * And then while running hw queue, flush_busy_ctxs() finds bit0 is | ||
2117 | * set in pending bitmap and tries to retrieve requests in | ||
2118 | * hctx->ctxs[0]->rq_list. But htx->ctxs[0] is a pointer to ctx0, | ||
2119 | * so the request in ctx1->rq_list is ignored. | ||
2120 | */ | ||
2121 | switch (action & ~CPU_TASKS_FROZEN) { | ||
2122 | case CPU_DEAD: | ||
2123 | case CPU_UP_CANCELED: | ||
2124 | cpumask_copy(&online_new, cpu_online_mask); | ||
2125 | break; | ||
2126 | case CPU_UP_PREPARE: | ||
2127 | cpumask_copy(&online_new, cpu_online_mask); | ||
2128 | cpumask_set_cpu(cpu, &online_new); | ||
2129 | break; | ||
2130 | default: | ||
2131 | return NOTIFY_OK; | ||
2132 | } | ||
2133 | 2091 | ||
2134 | mutex_lock(&all_q_mutex); | 2092 | mutex_lock(&all_q_mutex); |
2135 | |||
2136 | /* | 2093 | /* |
2137 | * We need to freeze and reinit all existing queues. Freezing | 2094 | * We need to freeze and reinit all existing queues. Freezing |
2138 | * involves synchronous wait for an RCU grace period and doing it | 2095 | * involves synchronous wait for an RCU grace period and doing it |
@@ -2153,13 +2110,43 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, | |||
2153 | } | 2110 | } |
2154 | 2111 | ||
2155 | list_for_each_entry(q, &all_q_list, all_q_node) | 2112 | list_for_each_entry(q, &all_q_list, all_q_node) |
2156 | blk_mq_queue_reinit(q, &online_new); | 2113 | blk_mq_queue_reinit(q, &cpuhp_online_new); |
2157 | 2114 | ||
2158 | list_for_each_entry(q, &all_q_list, all_q_node) | 2115 | list_for_each_entry(q, &all_q_list, all_q_node) |
2159 | blk_mq_unfreeze_queue(q); | 2116 | blk_mq_unfreeze_queue(q); |
2160 | 2117 | ||
2161 | mutex_unlock(&all_q_mutex); | 2118 | mutex_unlock(&all_q_mutex); |
2162 | return NOTIFY_OK; | 2119 | } |
2120 | |||
2121 | static int blk_mq_queue_reinit_dead(unsigned int cpu) | ||
2122 | { | ||
2123 | cpumask_copy(&cpuhp_online_new, cpu_online_mask); | ||
2124 | blk_mq_queue_reinit_work(); | ||
2125 | return 0; | ||
2126 | } | ||
2127 | |||
2128 | /* | ||
2129 | * Before hotadded cpu starts handling requests, new mappings must be | ||
2130 | * established. Otherwise, these requests in hw queue might never be | ||
2131 | * dispatched. | ||
2132 | * | ||
2133 | * For example, there is a single hw queue (hctx) and two CPU queues (ctx0 | ||
2134 | * for CPU0, and ctx1 for CPU1). | ||
2135 | * | ||
2136 | * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list | ||
2137 | * and set bit0 in pending bitmap as ctx1->index_hw is still zero. | ||
2138 | * | ||
2139 | * And then while running hw queue, flush_busy_ctxs() finds bit0 is set in | ||
2140 | * pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list. | ||
2141 | * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list | ||
2142 | * is ignored. | ||
2143 | */ | ||
2144 | static int blk_mq_queue_reinit_prepare(unsigned int cpu) | ||
2145 | { | ||
2146 | cpumask_copy(&cpuhp_online_new, cpu_online_mask); | ||
2147 | cpumask_set_cpu(cpu, &cpuhp_online_new); | ||
2148 | blk_mq_queue_reinit_work(); | ||
2149 | return 0; | ||
2163 | } | 2150 | } |
2164 | 2151 | ||
2165 | static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) | 2152 | static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) |
@@ -2378,10 +2365,12 @@ void blk_mq_enable_hotplug(void) | |||
2378 | 2365 | ||
2379 | static int __init blk_mq_init(void) | 2366 | static int __init blk_mq_init(void) |
2380 | { | 2367 | { |
2381 | blk_mq_cpu_init(); | 2368 | cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, |
2382 | 2369 | blk_mq_hctx_notify_dead); | |
2383 | hotcpu_notifier(blk_mq_queue_reinit_notify, 0); | ||
2384 | 2370 | ||
2371 | cpuhp_setup_state_nocalls(CPUHP_BLK_MQ_PREPARE, "block/mq:prepare", | ||
2372 | blk_mq_queue_reinit_prepare, | ||
2373 | blk_mq_queue_reinit_dead); | ||
2385 | return 0; | 2374 | return 0; |
2386 | } | 2375 | } |
2387 | subsys_initcall(blk_mq_init); | 2376 | subsys_initcall(blk_mq_init); |
diff --git a/block/blk-mq.h b/block/blk-mq.h index df6474cb5a4c..e5d25249028c 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h | |||
@@ -32,13 +32,6 @@ void blk_mq_wake_waiters(struct request_queue *q); | |||
32 | /* | 32 | /* |
33 | * CPU hotplug helpers | 33 | * CPU hotplug helpers |
34 | */ | 34 | */ |
35 | struct blk_mq_cpu_notifier; | ||
36 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, | ||
37 | int (*fn)(void *, unsigned long, unsigned int), | ||
38 | void *data); | ||
39 | void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier); | ||
40 | void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier); | ||
41 | void blk_mq_cpu_init(void); | ||
42 | void blk_mq_enable_hotplug(void); | 35 | void blk_mq_enable_hotplug(void); |
43 | void blk_mq_disable_hotplug(void); | 36 | void blk_mq_disable_hotplug(void); |
44 | 37 | ||
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ef6aebf291ed..535ab2e13d2e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h | |||
@@ -7,12 +7,6 @@ | |||
7 | struct blk_mq_tags; | 7 | struct blk_mq_tags; |
8 | struct blk_flush_queue; | 8 | struct blk_flush_queue; |
9 | 9 | ||
10 | struct blk_mq_cpu_notifier { | ||
11 | struct list_head list; | ||
12 | void *data; | ||
13 | int (*notify)(void *data, unsigned long action, unsigned int cpu); | ||
14 | }; | ||
15 | |||
16 | struct blk_mq_hw_ctx { | 10 | struct blk_mq_hw_ctx { |
17 | struct { | 11 | struct { |
18 | spinlock_t lock; | 12 | spinlock_t lock; |
@@ -53,7 +47,7 @@ struct blk_mq_hw_ctx { | |||
53 | 47 | ||
54 | struct delayed_work delay_work; | 48 | struct delayed_work delay_work; |
55 | 49 | ||
56 | struct blk_mq_cpu_notifier cpu_notifier; | 50 | struct hlist_node cpuhp_dead; |
57 | struct kobject kobj; | 51 | struct kobject kobj; |
58 | 52 | ||
59 | unsigned long poll_considered; | 53 | unsigned long poll_considered; |