genirq/affinity: Provide smarter irq spreading infrastructure

The current irq spreading infrastructure is just looking at a cpumask and tries to spread the interrupts over the mask. Thats suboptimal as it does not take numa nodes into account. Change the logic so the interrupts are spread across numa nodes and inside the nodes. If there are more cpus than vectors per node, then we set the affinity to several cpus. If HT siblings are available we take that into account and try to set all siblings to a single vector. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Christoph Hellwig <hch@lst.de> Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Link: http://lkml.kernel.org/r/1473862739-15032-3-git-send-email-hch@lst.de
author: Thomas Gleixner <tglx@linutronix.de> 2016-09-14 10:18:48 -0400
committer: Thomas Gleixner <tglx@linutronix.de> 2016-09-14 16:11:08 -0400
commit: 34c3d9819fda464be4f1bec59b63353814f76c73 (patch)
tree: 752917d25a7099573d698595578a334cce9d78af /kernel/irq/affinity.c
parent: 28f4b04143c56135b1ca742fc64b664ed04de6a4 (diff)
1 files changed, 149 insertions, 0 deletions
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 32f6cfcff212..7812fecc6e2f 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -4,6 +4,155 @@
 #include <linux/slab.h>
 #include <linux/cpu.h>
+static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
+                                int cpus_per_vec)
+{
+        const struct cpumask *siblmsk;
+        int cpu, sibl;
+        for ( ; cpus_per_vec > 0; ) {
+                cpu = cpumask_first(nmsk);
+                /* Should not happen, but I'm too lazy to think about it */
+                if (cpu >= nr_cpu_ids)
+                        return;
+                cpumask_clear_cpu(cpu, nmsk);
+                cpumask_set_cpu(cpu, irqmsk);
+                cpus_per_vec--;
+                /* If the cpu has siblings, use them first */
+                siblmsk = topology_sibling_cpumask(cpu);
+                for (sibl = -1; cpus_per_vec > 0; ) {
+                        sibl = cpumask_next(sibl, siblmsk);
+                        if (sibl >= nr_cpu_ids)
+                                break;
+                        if (!cpumask_test_and_clear_cpu(sibl, nmsk))
+                                continue;
+                        cpumask_set_cpu(sibl, irqmsk);
+                        cpus_per_vec--;
+                }
+        }
+}
+static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
+{
+        int n, nodes;
+        /* Calculate the number of nodes in the supplied affinity mask */
+        for (n = 0, nodes = 0; n < num_online_nodes(); n++) {
+                if (cpumask_intersects(mask, cpumask_of_node(n))) {
+                        node_set(n, *nodemsk);
+                        nodes++;
+                }
+        }
+        return nodes;
+}
+/**
+ * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
+ * @affinity:           The affinity mask to spread. If NULL cpu_online_mask
+ *                      is used
+ * @nvecs:              The number of vectors
+ *
+ * Returns the masks pointer or NULL if allocation failed.
+ */
+struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity,
+                                          int nvec)
+{
+        int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec = 0;
+        nodemask_t nodemsk = NODE_MASK_NONE;
+        struct cpumask *masks;
+        cpumask_var_t nmsk;
+        if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
+                return NULL;
+        masks = kzalloc(nvec * sizeof(*masks), GFP_KERNEL);
+        if (!masks)
+                goto out;
+        /* Stabilize the cpumasks */
+        get_online_cpus();
+        /* If the supplied affinity mask is NULL, use cpu online mask */
+        if (!affinity)
+                affinity = cpu_online_mask;
+        nodes = get_nodes_in_cpumask(affinity, &nodemsk);
+        /*
+         * If the number of nodes in the mask is less than or equal the
+         * number of vectors we just spread the vectors across the nodes.
+         */
+        if (nvec <= nodes) {
+                for_each_node_mask(n, nodemsk) {
+                        cpumask_copy(masks + curvec, cpumask_of_node(n));
+                        if (++curvec == nvec)
+                                break;
+                }
+                goto outonl;
+        }
+        /* Spread the vectors per node */
+        vecs_per_node = nvec / nodes;
+        /* Account for rounding errors */
+        extra_vecs = nvec - (nodes * vecs_per_node);
+        for_each_node_mask(n, nodemsk) {
+                int ncpus, v, vecs_to_assign = vecs_per_node;
+                /* Get the cpus on this node which are in the mask */
+                cpumask_and(nmsk, affinity, cpumask_of_node(n));
+                /* Calculate the number of cpus per vector */
+                ncpus = cpumask_weight(nmsk);
+                for (v = 0; curvec < nvec && v < vecs_to_assign; curvec++, v++) {
+                        cpus_per_vec = ncpus / vecs_to_assign;
+                        /* Account for extra vectors to compensate rounding errors */
+                        if (extra_vecs) {
+                                cpus_per_vec++;
+                                if (!--extra_vecs)
+                                        vecs_per_node++;
+                        }
+                        irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
+                }
+                if (curvec >= nvec)
+                        break;
+        }
+outonl:
+        put_online_cpus();
+out:
+        free_cpumask_var(nmsk);
+        return masks;
+}
+/**
+ * irq_calc_affinity_vectors - Calculate to optimal number of vectors for a given affinity mask
+ * @affinity:           The affinity mask to spread. If NULL cpu_online_mask
+ *                      is used
+ * @maxvec:             The maximum number of vectors available
+ */
+int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
+{
+        int cpus, ret;
+        /* Stabilize the cpumasks */
+        get_online_cpus();
+        /* If the supplied affinity mask is NULL, use cpu online mask */
+        if (!affinity)
+                affinity = cpu_online_mask;
+        cpus = cpumask_weight(affinity);
+        ret = (cpus < maxvec) ? cpus : maxvec;
+        put_online_cpus();
+        return ret;
+}
 static int get_first_sibling(unsigned int cpu)
 {
        unsigned int ret;
author	Thomas Gleixner <tglx@linutronix.de>	2016-09-14 10:18:48 -0400
committer	Thomas Gleixner <tglx@linutronix.de>	2016-09-14 16:11:08 -0400
commit	34c3d9819fda464be4f1bec59b63353814f76c73 (patch)
tree	752917d25a7099573d698595578a334cce9d78af /kernel/irq/affinity.c
parent	28f4b04143c56135b1ca742fc64b664ed04de6a4 (diff)

diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 32f6cfcff212..7812fecc6e2f 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c
@@ -4,6 +4,155 @@
4	#include <linux/slab.h>	4	#include <linux/slab.h>
5	#include <linux/cpu.h>	5	#include <linux/cpu.h>
6		6
		7	static void irq_spread_init_one(struct cpumask irqmsk, struct cpumask nmsk,
		8	int cpus_per_vec)
		9	{
		10	const struct cpumask *siblmsk;
		11	int cpu, sibl;
		12
		13	for ( ; cpus_per_vec > 0; ) {
		14	cpu = cpumask_first(nmsk);
		15
		16	/* Should not happen, but I'm too lazy to think about it */
		17	if (cpu >= nr_cpu_ids)
		18	return;
		19
		20	cpumask_clear_cpu(cpu, nmsk);
		21	cpumask_set_cpu(cpu, irqmsk);
		22	cpus_per_vec--;
		23
		24	/* If the cpu has siblings, use them first */
		25	siblmsk = topology_sibling_cpumask(cpu);
		26	for (sibl = -1; cpus_per_vec > 0; ) {
		27	sibl = cpumask_next(sibl, siblmsk);
		28	if (sibl >= nr_cpu_ids)
		29	break;
		30	if (!cpumask_test_and_clear_cpu(sibl, nmsk))
		31	continue;
		32	cpumask_set_cpu(sibl, irqmsk);
		33	cpus_per_vec--;
		34	}
		35	}
		36	}
		37
		38	static int get_nodes_in_cpumask(const struct cpumask mask, nodemask_t nodemsk)
		39	{
		40	int n, nodes;
		41
		42	/* Calculate the number of nodes in the supplied affinity mask */
		43	for (n = 0, nodes = 0; n < num_online_nodes(); n++) {
		44	if (cpumask_intersects(mask, cpumask_of_node(n))) {
		45	node_set(n, *nodemsk);
		46	nodes++;
		47	}
		48	}
		49	return nodes;
		50	}
		51
		52	/**
		53	* irq_create_affinity_masks - Create affinity masks for multiqueue spreading
		54	* @affinity: The affinity mask to spread. If NULL cpu_online_mask
		55	* is used
		56	* @nvecs: The number of vectors
		57	*
		58	* Returns the masks pointer or NULL if allocation failed.
		59	*/
		60	struct cpumask irq_create_affinity_masks(const struct cpumask affinity,
		61	int nvec)
		62	{
		63	int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec = 0;
		64	nodemask_t nodemsk = NODE_MASK_NONE;
		65	struct cpumask *masks;
		66	cpumask_var_t nmsk;
		67
		68	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
		69	return NULL;
		70
		71	masks = kzalloc(nvec * sizeof(*masks), GFP_KERNEL);
		72	if (!masks)
		73	goto out;
		74
		75	/* Stabilize the cpumasks */
		76	get_online_cpus();
		77	/* If the supplied affinity mask is NULL, use cpu online mask */
		78	if (!affinity)
		79	affinity = cpu_online_mask;
		80
		81	nodes = get_nodes_in_cpumask(affinity, &nodemsk);
		82
		83	/*
		84	* If the number of nodes in the mask is less than or equal the
		85	* number of vectors we just spread the vectors across the nodes.
		86	*/
		87	if (nvec <= nodes) {
		88	for_each_node_mask(n, nodemsk) {
		89	cpumask_copy(masks + curvec, cpumask_of_node(n));
		90	if (++curvec == nvec)
		91	break;
		92	}
		93	goto outonl;
		94	}
		95
		96	/* Spread the vectors per node */
		97	vecs_per_node = nvec / nodes;
		98	/* Account for rounding errors */
		99	extra_vecs = nvec - (nodes * vecs_per_node);
		100
		101	for_each_node_mask(n, nodemsk) {
		102	int ncpus, v, vecs_to_assign = vecs_per_node;
		103
		104	/* Get the cpus on this node which are in the mask */
		105	cpumask_and(nmsk, affinity, cpumask_of_node(n));
		106
		107	/* Calculate the number of cpus per vector */
		108	ncpus = cpumask_weight(nmsk);
		109
		110	for (v = 0; curvec < nvec && v < vecs_to_assign; curvec++, v++) {
		111	cpus_per_vec = ncpus / vecs_to_assign;
		112
		113	/* Account for extra vectors to compensate rounding errors */
		114	if (extra_vecs) {
		115	cpus_per_vec++;
		116	if (!--extra_vecs)
		117	vecs_per_node++;
		118	}
		119	irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
		120	}
		121
		122	if (curvec >= nvec)
		123	break;
		124	}
		125
		126	outonl:
		127	put_online_cpus();
		128	out:
		129	free_cpumask_var(nmsk);
		130	return masks;
		131	}
		132
		133	/**
		134	* irq_calc_affinity_vectors - Calculate to optimal number of vectors for a given affinity mask
		135	* @affinity: The affinity mask to spread. If NULL cpu_online_mask
		136	* is used
		137	* @maxvec: The maximum number of vectors available
		138	*/
		139	int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
		140	{
		141	int cpus, ret;
		142
		143	/* Stabilize the cpumasks */
		144	get_online_cpus();
		145	/* If the supplied affinity mask is NULL, use cpu online mask */
		146	if (!affinity)
		147	affinity = cpu_online_mask;
		148
		149	cpus = cpumask_weight(affinity);
		150	ret = (cpus < maxvec) ? cpus : maxvec;
		151
		152	put_online_cpus();
		153	return ret;
		154	}
		155
7	static int get_first_sibling(unsigned int cpu)	156	static int get_first_sibling(unsigned int cpu)
8	{	157	{
9	unsigned int ret;	158	unsigned int ret;