aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2015-05-08 13:44:13 -0400
committerRusty Russell <rusty@rustcorp.com.au>2015-05-27 21:35:20 -0400
commitf36963c9d3f6f415732710da3acdd8608a9fa0e5 (patch)
tree1a7bdf324a50bc75efe6c57c2525ccffd9c385f4
parent37815bf866ab6722a47550f8d25ad3f1a16a680c (diff)
cpumask_set_cpu_local_first => cpumask_local_spread, lament
da91309e0a7e (cpumask: Utility function to set n'th cpu...) created a genuinely weird function. I never saw it before, it went through DaveM. (He only does this to make us other maintainers feel better about our own mistakes.) cpumask_set_cpu_local_first's purpose is say "I need to spread things across N online cpus, choose the ones on this numa node first"; you call it in a loop. It can fail. One of the two callers ignores this, the other aborts and fails the device open. It can fail in two ways: allocating the off-stack cpumask, or through a convoluted codepath which AFAICT can only occur if cpu_online_mask changes. Which shouldn't happen, because if cpu_online_mask can change while you call this, it could return a now-offline cpu anyway. It contains a nonsensical test "!cpumask_of_node(numa_node)". This was drawn to my attention by Geert, who said this causes a warning on Sparc. It sets a single bit in a cpumask instead of returning a cpu number, because that's what the callers want. It could be made more efficient by passing the previous cpu rather than an index, but that would be more invasive to the callers. Fixes: da91309e0a7e8966d916a74cce42ed170fde06bf Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (then rebased) Tested-by: Amir Vadai <amirv@mellanox.com> Acked-by: Amir Vadai <amirv@mellanox.com> Acked-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c6
-rw-r--r--include/linux/cpumask.h6
-rw-r--r--lib/cpumask.c74
5 files changed, 37 insertions, 65 deletions
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index a6dcbf850c1f..6f9ffb9026cd 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2358,11 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter)
2358 adapter->cfg_num_qs); 2358 adapter->cfg_num_qs);
2359 2359
2360 for_all_evt_queues(adapter, eqo, i) { 2360 for_all_evt_queues(adapter, eqo, i) {
2361 int numa_node = dev_to_node(&adapter->pdev->dev);
2361 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL)) 2362 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2362 return -ENOMEM; 2363 return -ENOMEM;
2363 cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev), 2364 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2364 eqo->affinity_mask); 2365 eqo->affinity_mask);
2365
2366 netif_napi_add(adapter->netdev, &eqo->napi, be_poll, 2366 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2367 BE_NAPI_WEIGHT); 2367 BE_NAPI_WEIGHT);
2368 napi_hash_add(&eqo->napi); 2368 napi_hash_add(&eqo->napi);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 32f5ec737472..cf467a9f6cc7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1501,17 +1501,13 @@ static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
1501{ 1501{
1502 struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx]; 1502 struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
1503 int numa_node = priv->mdev->dev->numa_node; 1503 int numa_node = priv->mdev->dev->numa_node;
1504 int ret = 0;
1505 1504
1506 if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) 1505 if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL))
1507 return -ENOMEM; 1506 return -ENOMEM;
1508 1507
1509 ret = cpumask_set_cpu_local_first(ring_idx, numa_node, 1508 cpumask_set_cpu(cpumask_local_spread(ring_idx, numa_node),
1510 ring->affinity_mask); 1509 ring->affinity_mask);
1511 if (ret) 1510 return 0;
1512 free_cpumask_var(ring->affinity_mask);
1513
1514 return ret;
1515} 1511}
1516 1512
1517static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) 1513static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index f7bf312fb443..7bed3a88579f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -144,9 +144,9 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
144 ring->queue_index = queue_index; 144 ring->queue_index = queue_index;
145 145
146 if (queue_index < priv->num_tx_rings_p_up) 146 if (queue_index < priv->num_tx_rings_p_up)
147 cpumask_set_cpu_local_first(queue_index, 147 cpumask_set_cpu(cpumask_local_spread(queue_index,
148 priv->mdev->dev->numa_node, 148 priv->mdev->dev->numa_node),
149 &ring->affinity_mask); 149 &ring->affinity_mask);
150 150
151 *pring = ring; 151 *pring = ring;
152 return 0; 152 return 0;
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 27e285b92b5f..59915ea5373c 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -151,10 +151,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask,
151 return 1; 151 return 1;
152} 152}
153 153
154static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) 154static inline unsigned int cpumask_local_spread(unsigned int i, int node)
155{ 155{
156 set_bit(0, cpumask_bits(dstp));
157
158 return 0; 156 return 0;
159} 157}
160 158
@@ -208,7 +206,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
208 206
209int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); 207int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
210int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); 208int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
211int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp); 209unsigned int cpumask_local_spread(unsigned int i, int node);
212 210
213/** 211/**
214 * for_each_cpu - iterate over every cpu in a mask 212 * for_each_cpu - iterate over every cpu in a mask
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 830dd5dec40f..5f627084f2e9 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -139,64 +139,42 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
139#endif 139#endif
140 140
141/** 141/**
142 * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first 142 * cpumask_local_spread - select the i'th cpu with local numa cpu's first
143 *
144 * @i: index number 143 * @i: index number
145 * @numa_node: local numa_node 144 * @node: local numa_node
146 * @dstp: cpumask with the relevant cpu bit set according to the policy
147 * 145 *
148 * This function sets the cpumask according to a numa aware policy. 146 * This function selects an online CPU according to a numa aware policy;
149 * cpumask could be used as an affinity hint for the IRQ related to a 147 * local cpus are returned first, followed by non-local ones, then it
150 * queue. When the policy is to spread queues across cores - local cores 148 * wraps around.
151 * first.
152 * 149 *
153 * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set 150 * It's not very efficient, but useful for setup.
154 * the cpu bit and need to re-call the function.
155 */ 151 */
156int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) 152unsigned int cpumask_local_spread(unsigned int i, int node)
157{ 153{
158 cpumask_var_t mask;
159 int cpu; 154 int cpu;
160 int ret = 0;
161
162 if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
163 return -ENOMEM;
164 155
156 /* Wrap: we always want a cpu. */
165 i %= num_online_cpus(); 157 i %= num_online_cpus();
166 158
167 if (numa_node == -1 || !cpumask_of_node(numa_node)) { 159 if (node == -1) {
168 /* Use all online cpu's for non numa aware system */ 160 for_each_cpu(cpu, cpu_online_mask)
169 cpumask_copy(mask, cpu_online_mask); 161 if (i-- == 0)
162 return cpu;
170 } else { 163 } else {
171 int n; 164 /* NUMA first. */
172 165 for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask)
173 cpumask_and(mask, 166 if (i-- == 0)
174 cpumask_of_node(numa_node), cpu_online_mask); 167 return cpu;
175 168
176 n = cpumask_weight(mask); 169 for_each_cpu(cpu, cpu_online_mask) {
177 if (i >= n) { 170 /* Skip NUMA nodes, done above. */
178 i -= n; 171 if (cpumask_test_cpu(cpu, cpumask_of_node(node)))
179 172 continue;
180 /* If index > number of local cpu's, mask out local 173
181 * cpu's 174 if (i-- == 0)
182 */ 175 return cpu;
183 cpumask_andnot(mask, cpu_online_mask, mask);
184 } 176 }
185 } 177 }
186 178 BUG();
187 for_each_cpu(cpu, mask) {
188 if (--i < 0)
189 goto out;
190 }
191
192 ret = -EAGAIN;
193
194out:
195 free_cpumask_var(mask);
196
197 if (!ret)
198 cpumask_set_cpu(cpu, dstp);
199
200 return ret;
201} 179}
202EXPORT_SYMBOL(cpumask_set_cpu_local_first); 180EXPORT_SYMBOL(cpumask_local_spread);