genirq/affinity: Assign vectors to all present CPUs

Currently the irq vector spread algorithm is restricted to online CPUs, which ties the IRQ mapping to the currently online devices and doesn't deal nicely with the fact that CPUs could come and go rapidly due to e.g. power management. Instead assign vectors to all present CPUs to avoid this churn. Build a map of all possible CPUs for a given node, as the architectures only provide a map of all onlines CPUs. Do this dynamically on each call for the vector assingments, which is a bit suboptimal and could be optimized in the future by provinding a mapping from the arch code. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Jens Axboe <axboe@kernel.dk> Cc: linux-block@vger.kernel.org Cc: Sagi Grimberg <sagi@grimberg.me> Cc: Marc Zyngier <marc.zyngier@arm.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: linux-nvme@lists.infradead.org Cc: Keith Busch <keith.busch@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20170603140403.27379-5-hch@lst.de
author: Christoph Hellwig <hch@lst.de> 2017-06-19 19:37:55 -0400
committer: Thomas Gleixner <tglx@linutronix.de> 2017-06-22 12:21:26 -0400
commit: 9a0ef98e186d86fb3c1ff3ec267a76f067005f74 (patch)
tree: cd8ae87f2671ebf4a16210250746ca5101fc0afe
parent: 3ca57222c36ba31b80aa25de313f3c8ab26a8102 (diff)
1 files changed, 63 insertions, 13 deletions
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index e2d356dd7581..d2747f9c5707 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -1,4 +1,7 @@
+/*
+ * Copyright (C) 2016 Thomas Gleixner.
+ * Copyright (C) 2016-2017 Christoph Hellwig.
+ */
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -35,13 +38,54 @@ static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
        }
 }
-static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
+static cpumask_var_t *alloc_node_to_present_cpumask(void)
+{
+        cpumask_var_t *masks;
+        int node;
+        masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
+        if (!masks)
+                return NULL;
+        for (node = 0; node < nr_node_ids; node++) {
+                if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
+                        goto out_unwind;
+        }
+        return masks;
+out_unwind:
+        while (--node >= 0)
+                free_cpumask_var(masks[node]);
+        kfree(masks);
+        return NULL;
+}
+static void free_node_to_present_cpumask(cpumask_var_t *masks)
+{
+        int node;
+        for (node = 0; node < nr_node_ids; node++)
+                free_cpumask_var(masks[node]);
+        kfree(masks);
+}
+static void build_node_to_present_cpumask(cpumask_var_t *masks)
+{
+        int cpu;
+        for_each_present_cpu(cpu)
+                cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
+}
+static int get_nodes_in_cpumask(cpumask_var_t *node_to_present_cpumask,
+                                const struct cpumask *mask, nodemask_t *nodemsk)
 {
        int n, nodes = 0;
        /* Calculate the number of nodes in the supplied affinity mask */
-        for_each_online_node(n) {
+        for_each_node(n) {
-                if (cpumask_intersects(mask, cpumask_of_node(n))) {
+                if (cpumask_intersects(mask, node_to_present_cpumask[n])) {
                        node_set(n, *nodemsk);
                        nodes++;
                }
@@ -64,7 +108,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
        int last_affv = affv + affd->pre_vectors;
        nodemask_t nodemsk = NODE_MASK_NONE;
        struct cpumask *masks;
-        cpumask_var_t nmsk;
+        cpumask_var_t nmsk, *node_to_present_cpumask;
        if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
                return NULL;
@@ -73,13 +117,19 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
        if (!masks)
                goto out;
+        node_to_present_cpumask = alloc_node_to_present_cpumask();
+        if (!node_to_present_cpumask)
+                goto out;
        /* Fill out vectors at the beginning that don't need affinity */
        for (curvec = 0; curvec < affd->pre_vectors; curvec++)
                cpumask_copy(masks + curvec, irq_default_affinity);
        /* Stabilize the cpumasks */
        get_online_cpus();
-        nodes = get_nodes_in_cpumask(cpu_online_mask, &nodemsk);
+        build_node_to_present_cpumask(node_to_present_cpumask);
+        nodes = get_nodes_in_cpumask(node_to_present_cpumask, cpu_present_mask,
+                                     &nodemsk);
        /*
         * If the number of nodes in the mask is greater than or equal the
@@ -87,7 +137,8 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
         */
        if (affv <= nodes) {
                for_each_node_mask(n, nodemsk) {
-                        cpumask_copy(masks + curvec, cpumask_of_node(n));
+                        cpumask_copy(masks + curvec,
+                                     node_to_present_cpumask[n]);
                        if (++curvec == last_affv)
                                break;
                }
@@ -101,7 +152,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
                vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes;
                /* Get the cpus on this node which are in the mask */
-                cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n));
+                cpumask_and(nmsk, cpu_present_mask, node_to_present_cpumask[n]);
                /* Calculate the number of cpus per vector */
                ncpus = cpumask_weight(nmsk);
@@ -133,6 +184,7 @@ done:
        /* Fill out vectors at the end that don't need affinity */
        for (; curvec < nvecs; curvec++)
                cpumask_copy(masks + curvec, irq_default_affinity);
+        free_node_to_present_cpumask(node_to_present_cpumask);
 out:
        free_cpumask_var(nmsk);
        return masks;
@@ -147,12 +199,10 @@ int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd)
 {
        int resv = affd->pre_vectors + affd->post_vectors;
        int vecs = maxvec - resv;
-        int cpus;
+        int ret;
-        /* Stabilize the cpumasks */
        get_online_cpus();
-        cpus = cpumask_weight(cpu_online_mask);
+        ret = min_t(int, cpumask_weight(cpu_present_mask), vecs) + resv;
        put_online_cpus();
+        return ret;
-        return min(cpus, vecs) + resv;
 }
author	Christoph Hellwig <hch@lst.de>	2017-06-19 19:37:55 -0400
committer	Thomas Gleixner <tglx@linutronix.de>	2017-06-22 12:21:26 -0400
commit	9a0ef98e186d86fb3c1ff3ec267a76f067005f74 (patch)
tree	cd8ae87f2671ebf4a16210250746ca5101fc0afe
parent	3ca57222c36ba31b80aa25de313f3c8ab26a8102 (diff)

diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index e2d356dd7581..d2747f9c5707 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c
@@ -1,4 +1,7 @@
1		1	/*
		2	* Copyright (C) 2016 Thomas Gleixner.
		3	* Copyright (C) 2016-2017 Christoph Hellwig.
		4	*/
2	#include <linux/interrupt.h>	5	#include <linux/interrupt.h>
3	#include <linux/kernel.h>	6	#include <linux/kernel.h>
4	#include <linux/slab.h>	7	#include <linux/slab.h>
@@ -35,13 +38,54 @@ static void irq_spread_init_one(struct cpumask irqmsk, struct cpumask nmsk,
35	}	38	}
36	}	39	}
37		40
38	static int get_nodes_in_cpumask(const struct cpumask mask, nodemask_t nodemsk)	41	static cpumask_var_t *alloc_node_to_present_cpumask(void)
		42	{
		43	cpumask_var_t *masks;
		44	int node;
		45
		46	masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
		47	if (!masks)
		48	return NULL;
		49
		50	for (node = 0; node < nr_node_ids; node++) {
		51	if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
		52	goto out_unwind;
		53	}
		54
		55	return masks;
		56
		57	out_unwind:
		58	while (--node >= 0)
		59	free_cpumask_var(masks[node]);
		60	kfree(masks);
		61	return NULL;
		62	}
		63
		64	static void free_node_to_present_cpumask(cpumask_var_t *masks)
		65	{
		66	int node;
		67
		68	for (node = 0; node < nr_node_ids; node++)
		69	free_cpumask_var(masks[node]);
		70	kfree(masks);
		71	}
		72
		73	static void build_node_to_present_cpumask(cpumask_var_t *masks)
		74	{
		75	int cpu;
		76
		77	for_each_present_cpu(cpu)
		78	cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
		79	}
		80
		81	static int get_nodes_in_cpumask(cpumask_var_t *node_to_present_cpumask,
		82	const struct cpumask mask, nodemask_t nodemsk)
39	{	83	{
40	int n, nodes = 0;	84	int n, nodes = 0;
41		85
42	/* Calculate the number of nodes in the supplied affinity mask */	86	/* Calculate the number of nodes in the supplied affinity mask */
43	for_each_online_node(n) {	87	for_each_node(n) {
44	if (cpumask_intersects(mask, cpumask_of_node(n))) {	88	if (cpumask_intersects(mask, node_to_present_cpumask[n])) {
45	node_set(n, *nodemsk);	89	node_set(n, *nodemsk);
46	nodes++;	90	nodes++;
47	}	91	}
@@ -64,7 +108,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
64	int last_affv = affv + affd->pre_vectors;	108	int last_affv = affv + affd->pre_vectors;
65	nodemask_t nodemsk = NODE_MASK_NONE;	109	nodemask_t nodemsk = NODE_MASK_NONE;
66	struct cpumask *masks;	110	struct cpumask *masks;
67	cpumask_var_t nmsk;	111	cpumask_var_t nmsk, *node_to_present_cpumask;
68		112
69	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))	113	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
70	return NULL;	114	return NULL;
@@ -73,13 +117,19 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
73	if (!masks)	117	if (!masks)
74	goto out;	118	goto out;
75		119
		120	node_to_present_cpumask = alloc_node_to_present_cpumask();
		121	if (!node_to_present_cpumask)
		122	goto out;
		123
76	/* Fill out vectors at the beginning that don't need affinity */	124	/* Fill out vectors at the beginning that don't need affinity */
77	for (curvec = 0; curvec < affd->pre_vectors; curvec++)	125	for (curvec = 0; curvec < affd->pre_vectors; curvec++)
78	cpumask_copy(masks + curvec, irq_default_affinity);	126	cpumask_copy(masks + curvec, irq_default_affinity);
79		127
80	/* Stabilize the cpumasks */	128	/* Stabilize the cpumasks */
81	get_online_cpus();	129	get_online_cpus();
82	nodes = get_nodes_in_cpumask(cpu_online_mask, &nodemsk);	130	build_node_to_present_cpumask(node_to_present_cpumask);
		131	nodes = get_nodes_in_cpumask(node_to_present_cpumask, cpu_present_mask,
		132	&nodemsk);
83		133
84	/*	134	/*
85	* If the number of nodes in the mask is greater than or equal the	135	* If the number of nodes in the mask is greater than or equal the
@@ -87,7 +137,8 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
87	*/	137	*/
88	if (affv <= nodes) {	138	if (affv <= nodes) {
89	for_each_node_mask(n, nodemsk) {	139	for_each_node_mask(n, nodemsk) {
90	cpumask_copy(masks + curvec, cpumask_of_node(n));	140	cpumask_copy(masks + curvec,
		141	node_to_present_cpumask[n]);
91	if (++curvec == last_affv)	142	if (++curvec == last_affv)
92	break;	143	break;
93	}	144	}
@@ -101,7 +152,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
101	vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes;	152	vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes;
102		153
103	/* Get the cpus on this node which are in the mask */	154	/* Get the cpus on this node which are in the mask */
104	cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n));	155	cpumask_and(nmsk, cpu_present_mask, node_to_present_cpumask[n]);
105		156
106	/* Calculate the number of cpus per vector */	157	/* Calculate the number of cpus per vector */
107	ncpus = cpumask_weight(nmsk);	158	ncpus = cpumask_weight(nmsk);
@@ -133,6 +184,7 @@ done:
133	/* Fill out vectors at the end that don't need affinity */	184	/* Fill out vectors at the end that don't need affinity */
134	for (; curvec < nvecs; curvec++)	185	for (; curvec < nvecs; curvec++)
135	cpumask_copy(masks + curvec, irq_default_affinity);	186	cpumask_copy(masks + curvec, irq_default_affinity);
		187	free_node_to_present_cpumask(node_to_present_cpumask);
136	out:	188	out:
137	free_cpumask_var(nmsk);	189	free_cpumask_var(nmsk);
138	return masks;	190	return masks;
@@ -147,12 +199,10 @@ int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd)
147	{	199	{
148	int resv = affd->pre_vectors + affd->post_vectors;	200	int resv = affd->pre_vectors + affd->post_vectors;
149	int vecs = maxvec - resv;	201	int vecs = maxvec - resv;
150	int cpus;	202	int ret;
151		203
152	/* Stabilize the cpumasks */
153	get_online_cpus();	204	get_online_cpus();
154	cpus = cpumask_weight(cpu_online_mask);	205	ret = min_t(int, cpumask_weight(cpu_present_mask), vecs) + resv;
155	put_online_cpus();	206	put_online_cpus();
156		207	return ret;
157	return min(cpus, vecs) + resv;
158	}	208	}