aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-09 20:32:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-09 20:32:20 -0400
commit24532f768121b07b16178ffb40442ece43365cbd (patch)
treeca2eaf2c3ed031dd3aa977af95df77bfa2e18cc6
parent12e3d3cdd975fe986cc5c35f60b1467a8ec20b80 (diff)
parent97a32864e6de5944c6356049f60569de01e9ba1f (diff)
Merge branch 'for-4.9/block-smp' of git://git.kernel.dk/linux-block
Pull blk-mq CPU hotplug update from Jens Axboe: "This is the conversion of blk-mq to the new hotplug state machine" * 'for-4.9/block-smp' of git://git.kernel.dk/linux-block: blk-mq: fixup "Convert to new hotplug state machine" blk-mq: Convert to new hotplug state machine blk-mq/cpu-notif: Convert to new hotplug state machine
-rw-r--r--block/Makefile2
-rw-r--r--block/blk-mq-cpu.c67
-rw-r--r--block/blk-mq.c123
-rw-r--r--block/blk-mq.h7
-rw-r--r--include/linux/blk-mq.h8
5 files changed, 58 insertions, 149 deletions
diff --git a/block/Makefile b/block/Makefile
index 37a0d93f97bb..36acdd7545be 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
6 blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ 6 blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ 7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
8 blk-lib.o blk-mq.o blk-mq-tag.o \ 8 blk-lib.o blk-mq.o blk-mq-tag.o \
9 blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ 9 blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \
10 genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ 10 genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
11 badblocks.o partitions/ 11 badblocks.o partitions/
12 12
diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c
deleted file mode 100644
index bb3ed488f7b5..000000000000
--- a/block/blk-mq-cpu.c
+++ /dev/null
@@ -1,67 +0,0 @@
1/*
2 * CPU notifier helper code for blk-mq
3 *
4 * Copyright (C) 2013-2014 Jens Axboe
5 */
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/init.h>
9#include <linux/blkdev.h>
10#include <linux/list.h>
11#include <linux/llist.h>
12#include <linux/smp.h>
13#include <linux/cpu.h>
14
15#include <linux/blk-mq.h>
16#include "blk-mq.h"
17
18static LIST_HEAD(blk_mq_cpu_notify_list);
19static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
20
21static int blk_mq_main_cpu_notify(struct notifier_block *self,
22 unsigned long action, void *hcpu)
23{
24 unsigned int cpu = (unsigned long) hcpu;
25 struct blk_mq_cpu_notifier *notify;
26 int ret = NOTIFY_OK;
27
28 raw_spin_lock(&blk_mq_cpu_notify_lock);
29
30 list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
31 ret = notify->notify(notify->data, action, cpu);
32 if (ret != NOTIFY_OK)
33 break;
34 }
35
36 raw_spin_unlock(&blk_mq_cpu_notify_lock);
37 return ret;
38}
39
40void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
41{
42 BUG_ON(!notifier->notify);
43
44 raw_spin_lock(&blk_mq_cpu_notify_lock);
45 list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
46 raw_spin_unlock(&blk_mq_cpu_notify_lock);
47}
48
49void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
50{
51 raw_spin_lock(&blk_mq_cpu_notify_lock);
52 list_del(&notifier->list);
53 raw_spin_unlock(&blk_mq_cpu_notify_lock);
54}
55
56void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
57 int (*fn)(void *, unsigned long, unsigned int),
58 void *data)
59{
60 notifier->notify = fn;
61 notifier->data = data;
62}
63
64void __init blk_mq_cpu_init(void)
65{
66 hotcpu_notifier(blk_mq_main_cpu_notify, 0);
67}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b65f572a4faf..ddc2eed64771 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1563,11 +1563,13 @@ fail:
1563 * software queue to the hw queue dispatch list, and ensure that it 1563 * software queue to the hw queue dispatch list, and ensure that it
1564 * gets run. 1564 * gets run.
1565 */ 1565 */
1566static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) 1566static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
1567{ 1567{
1568 struct blk_mq_hw_ctx *hctx;
1568 struct blk_mq_ctx *ctx; 1569 struct blk_mq_ctx *ctx;
1569 LIST_HEAD(tmp); 1570 LIST_HEAD(tmp);
1570 1571
1572 hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
1571 ctx = __blk_mq_get_ctx(hctx->queue, cpu); 1573 ctx = __blk_mq_get_ctx(hctx->queue, cpu);
1572 1574
1573 spin_lock(&ctx->lock); 1575 spin_lock(&ctx->lock);
@@ -1578,30 +1580,20 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
1578 spin_unlock(&ctx->lock); 1580 spin_unlock(&ctx->lock);
1579 1581
1580 if (list_empty(&tmp)) 1582 if (list_empty(&tmp))
1581 return NOTIFY_OK; 1583 return 0;
1582 1584
1583 spin_lock(&hctx->lock); 1585 spin_lock(&hctx->lock);
1584 list_splice_tail_init(&tmp, &hctx->dispatch); 1586 list_splice_tail_init(&tmp, &hctx->dispatch);
1585 spin_unlock(&hctx->lock); 1587 spin_unlock(&hctx->lock);
1586 1588
1587 blk_mq_run_hw_queue(hctx, true); 1589 blk_mq_run_hw_queue(hctx, true);
1588 return NOTIFY_OK; 1590 return 0;
1589} 1591}
1590 1592
1591static int blk_mq_hctx_notify(void *data, unsigned long action, 1593static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
1592 unsigned int cpu)
1593{ 1594{
1594 struct blk_mq_hw_ctx *hctx = data; 1595 cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
1595 1596 &hctx->cpuhp_dead);
1596 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
1597 return blk_mq_hctx_cpu_offline(hctx, cpu);
1598
1599 /*
1600 * In case of CPU online, tags may be reallocated
1601 * in blk_mq_map_swqueue() after mapping is updated.
1602 */
1603
1604 return NOTIFY_OK;
1605} 1597}
1606 1598
1607/* hctx->ctxs will be freed in queue's release handler */ 1599/* hctx->ctxs will be freed in queue's release handler */
@@ -1621,7 +1613,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
1621 if (set->ops->exit_hctx) 1613 if (set->ops->exit_hctx)
1622 set->ops->exit_hctx(hctx, hctx_idx); 1614 set->ops->exit_hctx(hctx, hctx_idx);
1623 1615
1624 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); 1616 blk_mq_remove_cpuhp(hctx);
1625 blk_free_flush_queue(hctx->fq); 1617 blk_free_flush_queue(hctx->fq);
1626 sbitmap_free(&hctx->ctx_map); 1618 sbitmap_free(&hctx->ctx_map);
1627} 1619}
@@ -1668,9 +1660,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
1668 hctx->queue_num = hctx_idx; 1660 hctx->queue_num = hctx_idx;
1669 hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; 1661 hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
1670 1662
1671 blk_mq_init_cpu_notifier(&hctx->cpu_notifier, 1663 cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
1672 blk_mq_hctx_notify, hctx);
1673 blk_mq_register_cpu_notifier(&hctx->cpu_notifier);
1674 1664
1675 hctx->tags = set->tags[hctx_idx]; 1665 hctx->tags = set->tags[hctx_idx];
1676 1666
@@ -1715,8 +1705,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
1715 free_ctxs: 1705 free_ctxs:
1716 kfree(hctx->ctxs); 1706 kfree(hctx->ctxs);
1717 unregister_cpu_notifier: 1707 unregister_cpu_notifier:
1718 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); 1708 blk_mq_remove_cpuhp(hctx);
1719
1720 return -1; 1709 return -1;
1721} 1710}
1722 1711
@@ -2089,50 +2078,18 @@ static void blk_mq_queue_reinit(struct request_queue *q,
2089 blk_mq_sysfs_register(q); 2078 blk_mq_sysfs_register(q);
2090} 2079}
2091 2080
2092static int blk_mq_queue_reinit_notify(struct notifier_block *nb, 2081/*
2093 unsigned long action, void *hcpu) 2082 * New online cpumask which is going to be set in this hotplug event.
2083 * Declare this cpumasks as global as cpu-hotplug operation is invoked
2084 * one-by-one and dynamically allocating this could result in a failure.
2085 */
2086static struct cpumask cpuhp_online_new;
2087
2088static void blk_mq_queue_reinit_work(void)
2094{ 2089{
2095 struct request_queue *q; 2090 struct request_queue *q;
2096 int cpu = (unsigned long)hcpu;
2097 /*
2098 * New online cpumask which is going to be set in this hotplug event.
2099 * Declare this cpumasks as global as cpu-hotplug operation is invoked
2100 * one-by-one and dynamically allocating this could result in a failure.
2101 */
2102 static struct cpumask online_new;
2103
2104 /*
2105 * Before hotadded cpu starts handling requests, new mappings must
2106 * be established. Otherwise, these requests in hw queue might
2107 * never be dispatched.
2108 *
2109 * For example, there is a single hw queue (hctx) and two CPU queues
2110 * (ctx0 for CPU0, and ctx1 for CPU1).
2111 *
2112 * Now CPU1 is just onlined and a request is inserted into
2113 * ctx1->rq_list and set bit0 in pending bitmap as ctx1->index_hw is
2114 * still zero.
2115 *
2116 * And then while running hw queue, flush_busy_ctxs() finds bit0 is
2117 * set in pending bitmap and tries to retrieve requests in
2118 * hctx->ctxs[0]->rq_list. But htx->ctxs[0] is a pointer to ctx0,
2119 * so the request in ctx1->rq_list is ignored.
2120 */
2121 switch (action & ~CPU_TASKS_FROZEN) {
2122 case CPU_DEAD:
2123 case CPU_UP_CANCELED:
2124 cpumask_copy(&online_new, cpu_online_mask);
2125 break;
2126 case CPU_UP_PREPARE:
2127 cpumask_copy(&online_new, cpu_online_mask);
2128 cpumask_set_cpu(cpu, &online_new);
2129 break;
2130 default:
2131 return NOTIFY_OK;
2132 }
2133 2091
2134 mutex_lock(&all_q_mutex); 2092 mutex_lock(&all_q_mutex);
2135
2136 /* 2093 /*
2137 * We need to freeze and reinit all existing queues. Freezing 2094 * We need to freeze and reinit all existing queues. Freezing
2138 * involves synchronous wait for an RCU grace period and doing it 2095 * involves synchronous wait for an RCU grace period and doing it
@@ -2153,13 +2110,43 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
2153 } 2110 }
2154 2111
2155 list_for_each_entry(q, &all_q_list, all_q_node) 2112 list_for_each_entry(q, &all_q_list, all_q_node)
2156 blk_mq_queue_reinit(q, &online_new); 2113 blk_mq_queue_reinit(q, &cpuhp_online_new);
2157 2114
2158 list_for_each_entry(q, &all_q_list, all_q_node) 2115 list_for_each_entry(q, &all_q_list, all_q_node)
2159 blk_mq_unfreeze_queue(q); 2116 blk_mq_unfreeze_queue(q);
2160 2117
2161 mutex_unlock(&all_q_mutex); 2118 mutex_unlock(&all_q_mutex);
2162 return NOTIFY_OK; 2119}
2120
2121static int blk_mq_queue_reinit_dead(unsigned int cpu)
2122{
2123 cpumask_copy(&cpuhp_online_new, cpu_online_mask);
2124 blk_mq_queue_reinit_work();
2125 return 0;
2126}
2127
2128/*
2129 * Before hotadded cpu starts handling requests, new mappings must be
2130 * established. Otherwise, these requests in hw queue might never be
2131 * dispatched.
2132 *
2133 * For example, there is a single hw queue (hctx) and two CPU queues (ctx0
2134 * for CPU0, and ctx1 for CPU1).
2135 *
2136 * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list
2137 * and set bit0 in pending bitmap as ctx1->index_hw is still zero.
2138 *
2139 * And then while running hw queue, flush_busy_ctxs() finds bit0 is set in
2140 * pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list.
2141 * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list
2142 * is ignored.
2143 */
2144static int blk_mq_queue_reinit_prepare(unsigned int cpu)
2145{
2146 cpumask_copy(&cpuhp_online_new, cpu_online_mask);
2147 cpumask_set_cpu(cpu, &cpuhp_online_new);
2148 blk_mq_queue_reinit_work();
2149 return 0;
2163} 2150}
2164 2151
2165static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) 2152static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
@@ -2378,10 +2365,12 @@ void blk_mq_enable_hotplug(void)
2378 2365
2379static int __init blk_mq_init(void) 2366static int __init blk_mq_init(void)
2380{ 2367{
2381 blk_mq_cpu_init(); 2368 cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
2382 2369 blk_mq_hctx_notify_dead);
2383 hotcpu_notifier(blk_mq_queue_reinit_notify, 0);
2384 2370
2371 cpuhp_setup_state_nocalls(CPUHP_BLK_MQ_PREPARE, "block/mq:prepare",
2372 blk_mq_queue_reinit_prepare,
2373 blk_mq_queue_reinit_dead);
2385 return 0; 2374 return 0;
2386} 2375}
2387subsys_initcall(blk_mq_init); 2376subsys_initcall(blk_mq_init);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index df6474cb5a4c..e5d25249028c 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -32,13 +32,6 @@ void blk_mq_wake_waiters(struct request_queue *q);
32/* 32/*
33 * CPU hotplug helpers 33 * CPU hotplug helpers
34 */ 34 */
35struct blk_mq_cpu_notifier;
36void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
37 int (*fn)(void *, unsigned long, unsigned int),
38 void *data);
39void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
40void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
41void blk_mq_cpu_init(void);
42void blk_mq_enable_hotplug(void); 35void blk_mq_enable_hotplug(void);
43void blk_mq_disable_hotplug(void); 36void blk_mq_disable_hotplug(void);
44 37
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ef6aebf291ed..535ab2e13d2e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -7,12 +7,6 @@
7struct blk_mq_tags; 7struct blk_mq_tags;
8struct blk_flush_queue; 8struct blk_flush_queue;
9 9
10struct blk_mq_cpu_notifier {
11 struct list_head list;
12 void *data;
13 int (*notify)(void *data, unsigned long action, unsigned int cpu);
14};
15
16struct blk_mq_hw_ctx { 10struct blk_mq_hw_ctx {
17 struct { 11 struct {
18 spinlock_t lock; 12 spinlock_t lock;
@@ -53,7 +47,7 @@ struct blk_mq_hw_ctx {
53 47
54 struct delayed_work delay_work; 48 struct delayed_work delay_work;
55 49
56 struct blk_mq_cpu_notifier cpu_notifier; 50 struct hlist_node cpuhp_dead;
57 struct kobject kobj; 51 struct kobject kobj;
58 52
59 unsigned long poll_considered; 53 unsigned long poll_considered;