cpumask_set_cpu_local_first => cpumask_local_spread, lament

da91309e0a7e (cpumask: Utility function to set n'th cpu...) created a genuinely weird function. I never saw it before, it went through DaveM. (He only does this to make us other maintainers feel better about our own mistakes.) cpumask_set_cpu_local_first's purpose is say "I need to spread things across N online cpus, choose the ones on this numa node first"; you call it in a loop. It can fail. One of the two callers ignores this, the other aborts and fails the device open. It can fail in two ways: allocating the off-stack cpumask, or through a convoluted codepath which AFAICT can only occur if cpu_online_mask changes. Which shouldn't happen, because if cpu_online_mask can change while you call this, it could return a now-offline cpu anyway. It contains a nonsensical test "!cpumask_of_node(numa_node)". This was drawn to my attention by Geert, who said this causes a warning on Sparc. It sets a single bit in a cpumask instead of returning a cpu number, because that's what the callers want. It could be made more efficient by passing the previous cpu rather than an index, but that would be more invasive to the callers. Fixes: da91309e0a7e8966d916a74cce42ed170fde06bf Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (then rebased) Tested-by: Amir Vadai <amirv@mellanox.com> Acked-by: Amir Vadai <amirv@mellanox.com> Acked-by: David S. Miller <davem@davemloft.net>
author: Rusty Russell <rusty@rustcorp.com.au> 2015-05-08 13:44:13 -0400
committer: Rusty Russell <rusty@rustcorp.com.au> 2015-05-27 21:35:20 -0400
commit: f36963c9d3f6f415732710da3acdd8608a9fa0e5 (patch)
tree: 1a7bdf324a50bc75efe6c57c2525ccffd9c385f4
parent: 37815bf866ab6722a47550f8d25ad3f1a16a680c (diff)
5 files changed, 37 insertions, 65 deletions
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index a6dcbf850c1f..6f9ffb9026cd 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2358,11 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter)
                                    adapter->cfg_num_qs);
        for_all_evt_queues(adapter, eqo, i) {
+                int numa_node = dev_to_node(&adapter->pdev->dev);
                if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
                        return -ENOMEM;
-                cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev),
+                cpumask_set_cpu(cpumask_local_spread(i, numa_node),
-                                            eqo->affinity_mask);
+                                eqo->affinity_mask);
                netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
                               BE_NAPI_WEIGHT);
                napi_hash_add(&eqo->napi);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 32f5ec737472..cf467a9f6cc7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1501,17 +1501,13 @@ static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
 {
        struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
        int numa_node = priv->mdev->dev->numa_node;
-        int ret = 0;
        if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL))
                return -ENOMEM;
-        ret = cpumask_set_cpu_local_first(ring_idx, numa_node,
+        cpumask_set_cpu(cpumask_local_spread(ring_idx, numa_node),
-                                          ring->affinity_mask);
+                        ring->affinity_mask);
-        if (ret)
+        return 0;
-                free_cpumask_var(ring->affinity_mask);
-        return ret;
 }
 static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index f7bf312fb443..7bed3a88579f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -144,9 +144,9 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
        ring->queue_index = queue_index;
        if (queue_index < priv->num_tx_rings_p_up)
-                cpumask_set_cpu_local_first(queue_index,
+                cpumask_set_cpu(cpumask_local_spread(queue_index,
-                                            priv->mdev->dev->numa_node,
+                                                     priv->mdev->dev->numa_node),
-                                            &ring->affinity_mask);
+                                &ring->affinity_mask);
        *pring = ring;
        return 0;
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 27e285b92b5f..59915ea5373c 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -151,10 +151,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask,
        return 1;
 }
-static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+static inline unsigned int cpumask_local_spread(unsigned int i, int node)
 {
-        set_bit(0, cpumask_bits(dstp));
        return 0;
 }
@@ -208,7 +206,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
 int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
 int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
-int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp);
+unsigned int cpumask_local_spread(unsigned int i, int node);
 /**
 * for_each_cpu - iterate over every cpu in a mask
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 830dd5dec40f..5f627084f2e9 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -139,64 +139,42 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 #endif
 /**
- * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first
+ * cpumask_local_spread - select the i'th cpu with local numa cpu's first
- *
 * @i: index number
- * @numa_node: local numa_node
+ * @node: local numa_node
- * @dstp: cpumask with the relevant cpu bit set according to the policy
 *
- * This function sets the cpumask according to a numa aware policy.
+ * This function selects an online CPU according to a numa aware policy;
- * cpumask could be used as an affinity hint for the IRQ related to a
+ * local cpus are returned first, followed by non-local ones, then it
- * queue. When the policy is to spread queues across cores - local cores
+ * wraps around.
- * first.
 *
- * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set
+ * It's not very efficient, but useful for setup.
- * the cpu bit and need to re-call the function.
 */
-int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+unsigned int cpumask_local_spread(unsigned int i, int node)
 {
-        cpumask_var_t mask;
        int cpu;
-        int ret = 0;
-        if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
-                return -ENOMEM;
+        /* Wrap: we always want a cpu. */
        i %= num_online_cpus();
-        if (numa_node == -1 || !cpumask_of_node(numa_node)) {
+        if (node == -1) {
-                /* Use all online cpu's for non numa aware system */
+                for_each_cpu(cpu, cpu_online_mask)
-                cpumask_copy(mask, cpu_online_mask);
+                        if (i-- == 0)
+                                return cpu;
        } else {
-                int n;
+                /* NUMA first. */
+                for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask)
-                cpumask_and(mask,
+                        if (i-- == 0)
-                            cpumask_of_node(numa_node), cpu_online_mask);
+                                return cpu;
-                n = cpumask_weight(mask);
+                for_each_cpu(cpu, cpu_online_mask) {
-                if (i >= n) {
+                        /* Skip NUMA nodes, done above. */
-                        i -= n;
+                        if (cpumask_test_cpu(cpu, cpumask_of_node(node)))
+                                continue;
-                        /* If index > number of local cpu's, mask out local
-                         * cpu's
+                        if (i-- == 0)
-                         */
+                                return cpu;
-                        cpumask_andnot(mask, cpu_online_mask, mask);
                }
        }
+        BUG();
-        for_each_cpu(cpu, mask) {
-                if (--i < 0)
-                        goto out;
-        }
-        ret = -EAGAIN;
-out:
-        free_cpumask_var(mask);
-        if (!ret)
-                cpumask_set_cpu(cpu, dstp);
-        return ret;
 }
-EXPORT_SYMBOL(cpumask_set_cpu_local_first);
+EXPORT_SYMBOL(cpumask_local_spread);
author	Rusty Russell <rusty@rustcorp.com.au>	2015-05-08 13:44:13 -0400
committer	Rusty Russell <rusty@rustcorp.com.au>	2015-05-27 21:35:20 -0400
commit	f36963c9d3f6f415732710da3acdd8608a9fa0e5 (patch)
tree	1a7bdf324a50bc75efe6c57c2525ccffd9c385f4
parent	37815bf866ab6722a47550f8d25ad3f1a16a680c (diff)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a6dcbf850c1f..6f9ffb9026cd 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2358,11 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter)
2358	adapter->cfg_num_qs);	2358	adapter->cfg_num_qs);
2359		2359
2360	for_all_evt_queues(adapter, eqo, i) {	2360	for_all_evt_queues(adapter, eqo, i) {
		2361	int numa_node = dev_to_node(&adapter->pdev->dev);
2361	if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))	2362	if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2362	return -ENOMEM;	2363	return -ENOMEM;
2363	cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev),	2364	cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2364	eqo->affinity_mask);	2365	eqo->affinity_mask);
2365
2366	netif_napi_add(adapter->netdev, &eqo->napi, be_poll,	2366	netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2367	BE_NAPI_WEIGHT);	2367	BE_NAPI_WEIGHT);
2368	napi_hash_add(&eqo->napi);	2368	napi_hash_add(&eqo->napi);


diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 32f5ec737472..cf467a9f6cc7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1501,17 +1501,13 @@ static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
1501	{	1501	{
1502	struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];	1502	struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
1503	int numa_node = priv->mdev->dev->numa_node;	1503	int numa_node = priv->mdev->dev->numa_node;
1504	int ret = 0;
1505		1504
1506	if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL))	1505	if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL))
1507	return -ENOMEM;	1506	return -ENOMEM;
1508		1507
1509	ret = cpumask_set_cpu_local_first(ring_idx, numa_node,	1508	cpumask_set_cpu(cpumask_local_spread(ring_idx, numa_node),
1510	ring->affinity_mask);	1509	ring->affinity_mask);
1511	if (ret)	1510	return 0;
1512	free_cpumask_var(ring->affinity_mask);
1513
1514	return ret;
1515	}	1511	}
1516		1512
1517	static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)	1513	static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)


diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index f7bf312fb443..7bed3a88579f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -144,9 +144,9 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
144	ring->queue_index = queue_index;	144	ring->queue_index = queue_index;
145		145
146	if (queue_index < priv->num_tx_rings_p_up)	146	if (queue_index < priv->num_tx_rings_p_up)
147	cpumask_set_cpu_local_first(queue_index,	147	cpumask_set_cpu(cpumask_local_spread(queue_index,
148	priv->mdev->dev->numa_node,	148	priv->mdev->dev->numa_node),
149	&ring->affinity_mask);	149	&ring->affinity_mask);
150		150
151	*pring = ring;	151	*pring = ring;
152	return 0;	152	return 0;


diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 27e285b92b5f..59915ea5373c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h
@@ -151,10 +151,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask,
151	return 1;	151	return 1;
152	}	152	}
153		153
154	static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)	154	static inline unsigned int cpumask_local_spread(unsigned int i, int node)
155	{	155	{
156	set_bit(0, cpumask_bits(dstp));
157
158	return 0;	156	return 0;
159	}	157	}
160		158
@@ -208,7 +206,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
208		206
209	int cpumask_next_and(int n, const struct cpumask , const struct cpumask );	207	int cpumask_next_and(int n, const struct cpumask , const struct cpumask );
210	int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);	208	int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
211	int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp);	209	unsigned int cpumask_local_spread(unsigned int i, int node);
212		210
213	/**	211	/**
214	* for_each_cpu - iterate over every cpu in a mask	212	* for_each_cpu - iterate over every cpu in a mask


diff --git a/lib/cpumask.c b/lib/cpumask.c index 830dd5dec40f..5f627084f2e9 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c
@@ -139,64 +139,42 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
139	#endif	139	#endif
140		140
141	/**	141	/**
142	* cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first	142	* cpumask_local_spread - select the i'th cpu with local numa cpu's first
143	*
144	* @i: index number	143	* @i: index number
145	* @numa_node: local numa_node	144	* @node: local numa_node
146	* @dstp: cpumask with the relevant cpu bit set according to the policy
147	*	145	*
148	* This function sets the cpumask according to a numa aware policy.	146	* This function selects an online CPU according to a numa aware policy;
149	* cpumask could be used as an affinity hint for the IRQ related to a	147	* local cpus are returned first, followed by non-local ones, then it
150	* queue. When the policy is to spread queues across cores - local cores	148	* wraps around.
151	* first.
152	*	149	*
153	* Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set	150	* It's not very efficient, but useful for setup.
154	* the cpu bit and need to re-call the function.
155	*/	151	*/
156	int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)	152	unsigned int cpumask_local_spread(unsigned int i, int node)
157	{	153	{
158	cpumask_var_t mask;
159	int cpu;	154	int cpu;
160	int ret = 0;
161
162	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
163	return -ENOMEM;
164		155
		156	/* Wrap: we always want a cpu. */
165	i %= num_online_cpus();	157	i %= num_online_cpus();
166		158
167	if (numa_node == -1 \|\| !cpumask_of_node(numa_node)) {	159	if (node == -1) {
168	/* Use all online cpu's for non numa aware system */	160	for_each_cpu(cpu, cpu_online_mask)
169	cpumask_copy(mask, cpu_online_mask);	161	if (i-- == 0)
		162	return cpu;
170	} else {	163	} else {
171	int n;	164	/* NUMA first. */
172		165	for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask)
173	cpumask_and(mask,	166	if (i-- == 0)
174	cpumask_of_node(numa_node), cpu_online_mask);	167	return cpu;
175		168
176	n = cpumask_weight(mask);	169	for_each_cpu(cpu, cpu_online_mask) {
177	if (i >= n) {	170	/* Skip NUMA nodes, done above. */
178	i -= n;	171	if (cpumask_test_cpu(cpu, cpumask_of_node(node)))
179		172	continue;
180	/* If index > number of local cpu's, mask out local	173
181	* cpu's	174	if (i-- == 0)
182	*/	175	return cpu;
183	cpumask_andnot(mask, cpu_online_mask, mask);
184	}	176	}
185	}	177	}
186		178	BUG();
187	for_each_cpu(cpu, mask) {
188	if (--i < 0)
189	goto out;
190	}
191
192	ret = -EAGAIN;
193
194	out:
195	free_cpumask_var(mask);
196
197	if (!ret)
198	cpumask_set_cpu(cpu, dstp);
199
200	return ret;
201	}	179	}
202	EXPORT_SYMBOL(cpumask_set_cpu_local_first);	180	EXPORT_SYMBOL(cpumask_local_spread);