genirq/affinity: Spread vectors on node according to nr_cpu ratio

Now __irq_build_affinity_masks() spreads vectors evenly per node, but there is a case that not all vectors have been spread when each numa node has a different number of CPUs which triggers the warning in the spreading code. Improve the spreading algorithm by - assigning vectors according to the ratio of the number of CPUs on a node to the number of remaining CPUs. - running the assignment from smaller nodes to bigger nodes to guarantee that every active node gets allocated at least one vector. This ensures that all vectors are spread out. Asided of that the spread becomes more fair if the nodes have different number of CPUs. For example, on the following machine: CPU(s): 16 On-line CPU(s) list: 0-15 Thread(s) per core: 1 Core(s) per socket: 8 Socket(s): 2 NUMA node(s): 2 ... NUMA node0 CPU(s): 0,1,3,5-9,11,13-15 NUMA node1 CPU(s): 2,4,10,12 When a driver requests to allocate 8 vectors, the following spread results: irq 31, cpu list 2,4 irq 32, cpu list 10,12 irq 33, cpu list 0-1 irq 34, cpu list 3,5 irq 35, cpu list 6-7 irq 36, cpu list 8-9 irq 37, cpu list 11,13 irq 38, cpu list 14-15 So Node 0 has now 6 and Node 1 has 2 vectors assigned. The original algorithm assigned 4 vectors on each node which was unfair versus Node 0. [ tglx: Massaged changelog ] Reported-by: Jon Derrick <jonathan.derrick@intel.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Keith Busch <kbusch@kernel.org> Reviewed-by: Jon Derrick <jonathan.derrick@intel.com> Link: https://lkml.kernel.org/r/20190816022849.14075-3-ming.lei@redhat.com
author: Ming Lei <ming.lei@redhat.com> 2019-08-15 22:28:49 -0400
committer: Thomas Gleixner <tglx@linutronix.de> 2019-08-27 10:31:17 -0400
commit: b1a5a73e64e99faa5f4deef2ae96d7371a0fb5d0 (patch)
tree: b1746030db9be93db21499a30aa7e6fe8a1939d4
parent: 53c1788b7d7720565214a466afffdc818d8c6e5f (diff)
1 files changed, 200 insertions, 39 deletions
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index c7cca942bd8a..d905e844bf3a 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
+#include <linux/sort.h>
 static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
                                unsigned int cpus_per_vec)
@@ -94,6 +95,155 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
        return nodes;
 }
+struct node_vectors {
+        unsigned id;
+        union {
+                unsigned nvectors;
+                unsigned ncpus;
+        };
+};
+static int ncpus_cmp_func(const void *l, const void *r)
+{
+        const struct node_vectors *ln = l;
+        const struct node_vectors *rn = r;
+        return ln->ncpus - rn->ncpus;
+}
+/*
+ * Allocate vector number for each node, so that for each node:
+ *
+ * 1) the allocated number is >= 1
+ *
+ * 2) the allocated numbver is <= active CPU number of this node
+ *
+ * The actual allocated total vectors may be less than @numvecs when
+ * active total CPU number is less than @numvecs.
+ *
+ * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
+ * for each node.
+ */
+static void alloc_nodes_vectors(unsigned int numvecs,
+                                const cpumask_var_t *node_to_cpumask,
+                                const struct cpumask *cpu_mask,
+                                const nodemask_t nodemsk,
+                                struct cpumask *nmsk,
+                                struct node_vectors *node_vectors)
+{
+        unsigned n, remaining_ncpus = 0;
+        for (n = 0; n < nr_node_ids; n++) {
+                node_vectors[n].id = n;
+                node_vectors[n].ncpus = UINT_MAX;
+        }
+        for_each_node_mask(n, nodemsk) {
+                unsigned ncpus;
+                cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
+                ncpus = cpumask_weight(nmsk);
+                if (!ncpus)
+                        continue;
+                remaining_ncpus += ncpus;
+                node_vectors[n].ncpus = ncpus;
+        }
+        numvecs = min_t(unsigned, remaining_ncpus, numvecs);
+        sort(node_vectors, nr_node_ids, sizeof(node_vectors[0]),
+             ncpus_cmp_func, NULL);
+        /*
+         * Allocate vectors for each node according to the ratio of this
+         * node's nr_cpus to remaining un-assigned ncpus. 'numvecs' is
+         * bigger than number of active numa nodes. Always start the
+         * allocation from the node with minimized nr_cpus.
+         *
+         * This way guarantees that each active node gets allocated at
+         * least one vector, and the theory is simple: over-allocation
+         * is only done when this node is assigned by one vector, so
+         * other nodes will be allocated >= 1 vector, since 'numvecs' is
+         * bigger than number of numa nodes.
+         *
+         * One perfect invariant is that number of allocated vectors for
+         * each node is <= CPU count of this node:
+         *
+         * 1) suppose there are two nodes: A and B
+         *      ncpu(X) is CPU count of node X
+         *      vecs(X) is the vector count allocated to node X via this
+         *      algorithm
+         *
+         *      ncpu(A) <= ncpu(B)
+         *      ncpu(A) + ncpu(B) = N
+         *      vecs(A) + vecs(B) = V
+         *
+         *      vecs(A) = max(1, round_down(V * ncpu(A) / N))
+         *      vecs(B) = V - vecs(A)
+         *
+         *      both N and V are integer, and 2 <= V <= N, suppose
+         *      V = N - delta, and 0 <= delta <= N - 2
+         *
+         * 2) obviously vecs(A) <= ncpu(A) because:
+         *
+         *      if vecs(A) is 1, then vecs(A) <= ncpu(A) given
+         *      ncpu(A) >= 1
+         *
+         *      otherwise,
+         *              vecs(A) <= V * ncpu(A) / N <= ncpu(A), given V <= N
+         *
+         * 3) prove how vecs(B) <= ncpu(B):
+         *
+         *      if round_down(V * ncpu(A) / N) == 0, vecs(B) won't be
+         *      over-allocated, so vecs(B) <= ncpu(B),
+         *
+         *      otherwise:
+         *
+         *      vecs(A) =
+         *              round_down(V * ncpu(A) / N) =
+         *              round_down((N - delta) * ncpu(A) / N) =
+         *              round_down((N * ncpu(A) - delta * ncpu(A)) / N)  >=
+         *              round_down((N * ncpu(A) - delta * N) / N)        =
+         *              cpu(A) - delta
+         *
+         *      then:
+         *
+         *      vecs(A) - V >= ncpu(A) - delta - V
+         *      =>
+         *      V - vecs(A) <= V + delta - ncpu(A)
+         *      =>
+         *      vecs(B) <= N - ncpu(A)
+         *      =>
+         *      vecs(B) <= cpu(B)
+         *
+         * For nodes >= 3, it can be thought as one node and another big
+         * node given that is exactly what this algorithm is implemented,
+         * and we always re-calculate 'remaining_ncpus' & 'numvecs', and
+         * finally for each node X: vecs(X) <= ncpu(X).
+         *
+         */
+        for (n = 0; n < nr_node_ids; n++) {
+                unsigned nvectors, ncpus;
+                if (node_vectors[n].ncpus == UINT_MAX)
+                        continue;
+                WARN_ON_ONCE(numvecs == 0);
+                ncpus = node_vectors[n].ncpus;
+                nvectors = max_t(unsigned, 1,
+                                 numvecs * ncpus / remaining_ncpus);
+                WARN_ON_ONCE(nvectors > ncpus);
+                node_vectors[n].nvectors = nvectors;
+                remaining_ncpus -= ncpus;
+                numvecs -= nvectors;
+        }
+}
 static int __irq_build_affinity_masks(unsigned int startvec,
                                      unsigned int numvecs,
                                      unsigned int firstvec,
@@ -102,10 +252,11 @@ static int __irq_build_affinity_masks(unsigned int startvec,
                                      struct cpumask *nmsk,
                                      struct irq_affinity_desc *masks)
 {
-        unsigned int n, nodes, cpus_per_vec, extra_vecs, done = 0;
+        unsigned int i, n, nodes, cpus_per_vec, extra_vecs, done = 0;
        unsigned int last_affv = firstvec + numvecs;
        unsigned int curvec = startvec;
        nodemask_t nodemsk = NODE_MASK_NONE;
+        struct node_vectors *node_vectors;
        if (!cpumask_weight(cpu_mask))
                return 0;
@@ -126,53 +277,57 @@ static int __irq_build_affinity_masks(unsigned int startvec,
                return numvecs;
        }
-        for_each_node_mask(n, nodemsk) {
+        node_vectors = kcalloc(nr_node_ids,
-                unsigned int ncpus, v, vecs_to_assign, vecs_per_node;
+                               sizeof(struct node_vectors),
+                               GFP_KERNEL);
+        if (!node_vectors)
+                return -ENOMEM;
+        /* allocate vector number for each node */
+        alloc_nodes_vectors(numvecs, node_to_cpumask, cpu_mask,
+                            nodemsk, nmsk, node_vectors);
+        for (i = 0; i < nr_node_ids; i++) {
+                unsigned int ncpus, v;
+                struct node_vectors *nv = &node_vectors[i];
+                if (nv->nvectors == UINT_MAX)
+                        continue;
                /* Get the cpus on this node which are in the mask */
-                cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
+                cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]);
                ncpus = cpumask_weight(nmsk);
                if (!ncpus)
                        continue;
-                /*
+                WARN_ON_ONCE(nv->nvectors > ncpus);
-                 * Calculate the number of cpus per vector
-                 *
-                 * Spread the vectors evenly per node. If the requested
-                 * vector number has been reached, simply allocate one
-                 * vector for each remaining node so that all nodes can
-                 * be covered
-                 */
-                if (numvecs > done)
-                        vecs_per_node = max_t(unsigned,
-                                        (numvecs - done) / nodes, 1);
-                else
-                        vecs_per_node = 1;
-                vecs_to_assign = min(vecs_per_node, ncpus);
                /* Account for rounding errors */
-                extra_vecs = ncpus - vecs_to_assign * (ncpus / vecs_to_assign);
+                extra_vecs = ncpus - nv->nvectors * (ncpus / nv->nvectors);
-                for (v = 0; curvec < last_affv && v < vecs_to_assign;
+                /* Spread allocated vectors on CPUs of the current node */
-                     curvec++, v++) {
+                for (v = 0; v < nv->nvectors; v++, curvec++) {
-                        cpus_per_vec = ncpus / vecs_to_assign;
+                        cpus_per_vec = ncpus / nv->nvectors;
                        /* Account for extra vectors to compensate rounding errors */
                        if (extra_vecs) {
                                cpus_per_vec++;
                                --extra_vecs;
                        }
+                        /*
+                         * wrapping has to be considered given 'startvec'
+                         * may start anywhere
+                         */
+                        if (curvec >= last_affv)
+                                curvec = firstvec;
                        irq_spread_init_one(&masks[curvec].mask, nmsk,
                                                cpus_per_vec);
                }
+                done += nv->nvectors;
-                done += v;
-                if (curvec >= last_affv)
-                        curvec = firstvec;
-                --nodes;
        }
-        return done < numvecs ? done : numvecs;
+        kfree(node_vectors);
+        return done;
 }
 /*
@@ -184,7 +339,7 @@ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
                                    unsigned int firstvec,
                                    struct irq_affinity_desc *masks)
 {
-        unsigned int curvec = startvec, nr_present, nr_others;
+        unsigned int curvec = startvec, nr_present = 0, nr_others = 0;
        cpumask_var_t *node_to_cpumask;
        cpumask_var_t nmsk, npresmsk;
        int ret = -ENOMEM;
@@ -199,15 +354,17 @@ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
        if (!node_to_cpumask)
                goto fail_npresmsk;
-        ret = 0;
        /* Stabilize the cpumasks */
        get_online_cpus();
        build_node_to_cpumask(node_to_cpumask);
        /* Spread on present CPUs starting from affd->pre_vectors */
-        nr_present = __irq_build_affinity_masks(curvec, numvecs,
+        ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
-                                                firstvec, node_to_cpumask,
+                                         node_to_cpumask, cpu_present_mask,
-                                                cpu_present_mask, nmsk, masks);
+                                         nmsk, masks);
+        if (ret < 0)
+                goto fail_build_affinity;
+        nr_present = ret;
        /*
         * Spread on non present CPUs starting from the next vector to be
@@ -220,12 +377,16 @@ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
        else
                curvec = firstvec + nr_present;
        cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
-        nr_others = __irq_build_affinity_masks(curvec, numvecs,
+        ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
-                                               firstvec, node_to_cpumask,
+                                         node_to_cpumask, npresmsk, nmsk,
-                                               npresmsk, nmsk, masks);
+                                         masks);
+        if (ret >= 0)
+                nr_others = ret;
+ fail_build_affinity:
        put_online_cpus();
-        if (nr_present < numvecs)
+        if (ret >= 0)
                WARN_ON(nr_present + nr_others < numvecs);
        free_node_to_cpumask(node_to_cpumask);
@@ -235,7 +396,7 @@ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
 fail_nmsk:
        free_cpumask_var(nmsk);
-        return ret;
+        return ret < 0 ? ret : 0;
 }
 static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
author	Ming Lei <ming.lei@redhat.com>	2019-08-15 22:28:49 -0400
committer	Thomas Gleixner <tglx@linutronix.de>	2019-08-27 10:31:17 -0400
commit	b1a5a73e64e99faa5f4deef2ae96d7371a0fb5d0 (patch)
tree	b1746030db9be93db21499a30aa7e6fe8a1939d4
parent	53c1788b7d7720565214a466afffdc818d8c6e5f (diff)

diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index c7cca942bd8a..d905e844bf3a 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c
@@ -7,6 +7,7 @@
7	#include <linux/kernel.h>	7	#include <linux/kernel.h>
8	#include <linux/slab.h>	8	#include <linux/slab.h>
9	#include <linux/cpu.h>	9	#include <linux/cpu.h>
		10	#include <linux/sort.h>
10		11
11	static void irq_spread_init_one(struct cpumask irqmsk, struct cpumask nmsk,	12	static void irq_spread_init_one(struct cpumask irqmsk, struct cpumask nmsk,
12	unsigned int cpus_per_vec)	13	unsigned int cpus_per_vec)
@@ -94,6 +95,155 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
94	return nodes;	95	return nodes;
95	}	96	}
96		97
		98	struct node_vectors {
		99	unsigned id;
		100
		101	union {
		102	unsigned nvectors;
		103	unsigned ncpus;
		104	};
		105	};
		106
		107	static int ncpus_cmp_func(const void l, const void r)
		108	{
		109	const struct node_vectors *ln = l;
		110	const struct node_vectors *rn = r;
		111
		112	return ln->ncpus - rn->ncpus;
		113	}
		114
		115	/*
		116	* Allocate vector number for each node, so that for each node:
		117	*
		118	* 1) the allocated number is >= 1
		119	*
		120	* 2) the allocated numbver is <= active CPU number of this node
		121	*
		122	* The actual allocated total vectors may be less than @numvecs when
		123	* active total CPU number is less than @numvecs.
		124	*
		125	* Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
		126	* for each node.
		127	*/
		128	static void alloc_nodes_vectors(unsigned int numvecs,
		129	const cpumask_var_t *node_to_cpumask,
		130	const struct cpumask *cpu_mask,
		131	const nodemask_t nodemsk,
		132	struct cpumask *nmsk,
		133	struct node_vectors *node_vectors)
		134	{
		135	unsigned n, remaining_ncpus = 0;
		136
		137	for (n = 0; n < nr_node_ids; n++) {
		138	node_vectors[n].id = n;
		139	node_vectors[n].ncpus = UINT_MAX;
		140	}
		141
		142	for_each_node_mask(n, nodemsk) {
		143	unsigned ncpus;
		144
		145	cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
		146	ncpus = cpumask_weight(nmsk);
		147
		148	if (!ncpus)
		149	continue;
		150	remaining_ncpus += ncpus;
		151	node_vectors[n].ncpus = ncpus;
		152	}
		153
		154	numvecs = min_t(unsigned, remaining_ncpus, numvecs);
		155
		156	sort(node_vectors, nr_node_ids, sizeof(node_vectors[0]),
		157	ncpus_cmp_func, NULL);
		158
		159	/*
		160	* Allocate vectors for each node according to the ratio of this
		161	* node's nr_cpus to remaining un-assigned ncpus. 'numvecs' is
		162	* bigger than number of active numa nodes. Always start the
		163	* allocation from the node with minimized nr_cpus.
		164	*
		165	* This way guarantees that each active node gets allocated at
		166	* least one vector, and the theory is simple: over-allocation
		167	* is only done when this node is assigned by one vector, so
		168	* other nodes will be allocated >= 1 vector, since 'numvecs' is
		169	* bigger than number of numa nodes.
		170	*
		171	* One perfect invariant is that number of allocated vectors for
		172	* each node is <= CPU count of this node:
		173	*
		174	* 1) suppose there are two nodes: A and B
		175	* ncpu(X) is CPU count of node X
		176	* vecs(X) is the vector count allocated to node X via this
		177	* algorithm
		178	*
		179	* ncpu(A) <= ncpu(B)
		180	* ncpu(A) + ncpu(B) = N
		181	* vecs(A) + vecs(B) = V
		182	*
		183	* vecs(A) = max(1, round_down(V * ncpu(A) / N))
		184	* vecs(B) = V - vecs(A)
		185	*
		186	* both N and V are integer, and 2 <= V <= N, suppose
		187	* V = N - delta, and 0 <= delta <= N - 2
		188	*
		189	* 2) obviously vecs(A) <= ncpu(A) because:
		190	*
		191	* if vecs(A) is 1, then vecs(A) <= ncpu(A) given
		192	* ncpu(A) >= 1
		193	*
		194	* otherwise,
		195	* vecs(A) <= V * ncpu(A) / N <= ncpu(A), given V <= N
		196	*
		197	* 3) prove how vecs(B) <= ncpu(B):
		198	*
		199	* if round_down(V * ncpu(A) / N) == 0, vecs(B) won't be
		200	* over-allocated, so vecs(B) <= ncpu(B),
		201	*
		202	* otherwise:
		203	*
		204	* vecs(A) =
		205	* round_down(V * ncpu(A) / N) =
		206	* round_down((N - delta) * ncpu(A) / N) =
		207	* round_down((N * ncpu(A) - delta * ncpu(A)) / N) >=
		208	* round_down((N * ncpu(A) - delta * N) / N) =
		209	* cpu(A) - delta
		210	*
		211	* then:
		212	*
		213	* vecs(A) - V >= ncpu(A) - delta - V
		214	* =>
		215	* V - vecs(A) <= V + delta - ncpu(A)
		216	* =>
		217	* vecs(B) <= N - ncpu(A)
		218	* =>
		219	* vecs(B) <= cpu(B)
		220	*
		221	* For nodes >= 3, it can be thought as one node and another big
		222	* node given that is exactly what this algorithm is implemented,
		223	* and we always re-calculate 'remaining_ncpus' & 'numvecs', and
		224	* finally for each node X: vecs(X) <= ncpu(X).
		225	*
		226	*/
		227	for (n = 0; n < nr_node_ids; n++) {
		228	unsigned nvectors, ncpus;
		229
		230	if (node_vectors[n].ncpus == UINT_MAX)
		231	continue;
		232
		233	WARN_ON_ONCE(numvecs == 0);
		234
		235	ncpus = node_vectors[n].ncpus;
		236	nvectors = max_t(unsigned, 1,
		237	numvecs * ncpus / remaining_ncpus);
		238	WARN_ON_ONCE(nvectors > ncpus);
		239
		240	node_vectors[n].nvectors = nvectors;
		241
		242	remaining_ncpus -= ncpus;
		243	numvecs -= nvectors;
		244	}
		245	}
		246
97	static int __irq_build_affinity_masks(unsigned int startvec,	247	static int __irq_build_affinity_masks(unsigned int startvec,
98	unsigned int numvecs,	248	unsigned int numvecs,
99	unsigned int firstvec,	249	unsigned int firstvec,
@@ -102,10 +252,11 @@ static int __irq_build_affinity_masks(unsigned int startvec,
102	struct cpumask *nmsk,	252	struct cpumask *nmsk,
103	struct irq_affinity_desc *masks)	253	struct irq_affinity_desc *masks)
104	{	254	{
105	unsigned int n, nodes, cpus_per_vec, extra_vecs, done = 0;	255	unsigned int i, n, nodes, cpus_per_vec, extra_vecs, done = 0;
106	unsigned int last_affv = firstvec + numvecs;	256	unsigned int last_affv = firstvec + numvecs;
107	unsigned int curvec = startvec;	257	unsigned int curvec = startvec;
108	nodemask_t nodemsk = NODE_MASK_NONE;	258	nodemask_t nodemsk = NODE_MASK_NONE;
		259	struct node_vectors *node_vectors;
109		260
110	if (!cpumask_weight(cpu_mask))	261	if (!cpumask_weight(cpu_mask))
111	return 0;	262	return 0;
@@ -126,53 +277,57 @@ static int __irq_build_affinity_masks(unsigned int startvec,
126	return numvecs;	277	return numvecs;
127	}	278	}
128		279
129	for_each_node_mask(n, nodemsk) {	280	node_vectors = kcalloc(nr_node_ids,
130	unsigned int ncpus, v, vecs_to_assign, vecs_per_node;	281	sizeof(struct node_vectors),
		282	GFP_KERNEL);
		283	if (!node_vectors)
		284	return -ENOMEM;
		285
		286	/* allocate vector number for each node */
		287	alloc_nodes_vectors(numvecs, node_to_cpumask, cpu_mask,
		288	nodemsk, nmsk, node_vectors);
		289
		290	for (i = 0; i < nr_node_ids; i++) {
		291	unsigned int ncpus, v;
		292	struct node_vectors *nv = &node_vectors[i];
		293
		294	if (nv->nvectors == UINT_MAX)
		295	continue;
131		296
132	/* Get the cpus on this node which are in the mask */	297	/* Get the cpus on this node which are in the mask */
133	cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);	298	cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]);
134	ncpus = cpumask_weight(nmsk);	299	ncpus = cpumask_weight(nmsk);
135	if (!ncpus)	300	if (!ncpus)
136	continue;	301	continue;
137		302
138	/*	303	WARN_ON_ONCE(nv->nvectors > ncpus);
139	* Calculate the number of cpus per vector
140	*
141	* Spread the vectors evenly per node. If the requested
142	* vector number has been reached, simply allocate one
143	* vector for each remaining node so that all nodes can
144	* be covered
145	*/
146	if (numvecs > done)
147	vecs_per_node = max_t(unsigned,
148	(numvecs - done) / nodes, 1);
149	else
150	vecs_per_node = 1;
151
152	vecs_to_assign = min(vecs_per_node, ncpus);
153		304
154	/* Account for rounding errors */	305	/* Account for rounding errors */
155	extra_vecs = ncpus - vecs_to_assign * (ncpus / vecs_to_assign);	306	extra_vecs = ncpus - nv->nvectors * (ncpus / nv->nvectors);
156		307
157	for (v = 0; curvec < last_affv && v < vecs_to_assign;	308	/* Spread allocated vectors on CPUs of the current node */
158	curvec++, v++) {	309	for (v = 0; v < nv->nvectors; v++, curvec++) {
159	cpus_per_vec = ncpus / vecs_to_assign;	310	cpus_per_vec = ncpus / nv->nvectors;
160		311
161	/* Account for extra vectors to compensate rounding errors */	312	/* Account for extra vectors to compensate rounding errors */
162	if (extra_vecs) {	313	if (extra_vecs) {
163	cpus_per_vec++;	314	cpus_per_vec++;
164	--extra_vecs;	315	--extra_vecs;
165	}	316	}
		317
		318	/*
		319	* wrapping has to be considered given 'startvec'
		320	* may start anywhere
		321	*/
		322	if (curvec >= last_affv)
		323	curvec = firstvec;
166	irq_spread_init_one(&masks[curvec].mask, nmsk,	324	irq_spread_init_one(&masks[curvec].mask, nmsk,
167	cpus_per_vec);	325	cpus_per_vec);
168	}	326	}
169		327	done += nv->nvectors;
170	done += v;
171	if (curvec >= last_affv)
172	curvec = firstvec;
173	--nodes;
174	}	328	}
175	return done < numvecs ? done : numvecs;	329	kfree(node_vectors);
		330	return done;
176	}	331	}
177		332
178	/*	333	/*
@@ -184,7 +339,7 @@ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
184	unsigned int firstvec,	339	unsigned int firstvec,
185	struct irq_affinity_desc *masks)	340	struct irq_affinity_desc *masks)
186	{	341	{
187	unsigned int curvec = startvec, nr_present, nr_others;	342	unsigned int curvec = startvec, nr_present = 0, nr_others = 0;
188	cpumask_var_t *node_to_cpumask;	343	cpumask_var_t *node_to_cpumask;
189	cpumask_var_t nmsk, npresmsk;	344	cpumask_var_t nmsk, npresmsk;
190	int ret = -ENOMEM;	345	int ret = -ENOMEM;
@@ -199,15 +354,17 @@ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
199	if (!node_to_cpumask)	354	if (!node_to_cpumask)
200	goto fail_npresmsk;	355	goto fail_npresmsk;
201		356
202	ret = 0;
203	/* Stabilize the cpumasks */	357	/* Stabilize the cpumasks */
204	get_online_cpus();	358	get_online_cpus();
205	build_node_to_cpumask(node_to_cpumask);	359	build_node_to_cpumask(node_to_cpumask);
206		360
207	/* Spread on present CPUs starting from affd->pre_vectors */	361	/* Spread on present CPUs starting from affd->pre_vectors */
208	nr_present = __irq_build_affinity_masks(curvec, numvecs,	362	ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
209	firstvec, node_to_cpumask,	363	node_to_cpumask, cpu_present_mask,
210	cpu_present_mask, nmsk, masks);	364	nmsk, masks);
		365	if (ret < 0)
		366	goto fail_build_affinity;
		367	nr_present = ret;
211		368
212	/*	369	/*
213	* Spread on non present CPUs starting from the next vector to be	370	* Spread on non present CPUs starting from the next vector to be
@@ -220,12 +377,16 @@ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
220	else	377	else
221	curvec = firstvec + nr_present;	378	curvec = firstvec + nr_present;
222	cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);	379	cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
223	nr_others = __irq_build_affinity_masks(curvec, numvecs,	380	ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
224	firstvec, node_to_cpumask,	381	node_to_cpumask, npresmsk, nmsk,
225	npresmsk, nmsk, masks);	382	masks);
		383	if (ret >= 0)
		384	nr_others = ret;
		385
		386	fail_build_affinity:
226	put_online_cpus();	387	put_online_cpus();
227		388
228	if (nr_present < numvecs)	389	if (ret >= 0)
229	WARN_ON(nr_present + nr_others < numvecs);	390	WARN_ON(nr_present + nr_others < numvecs);
230		391
231	free_node_to_cpumask(node_to_cpumask);	392	free_node_to_cpumask(node_to_cpumask);
@@ -235,7 +396,7 @@ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
235		396
236	fail_nmsk:	397	fail_nmsk:
237	free_cpumask_var(nmsk);	398	free_cpumask_var(nmsk);
238	return ret;	399	return ret < 0 ? ret : 0;
239	}	400	}
240		401
241	static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)	402	static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)