aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-06-11 17:59:21 -0400
committerDavid S. Miller <davem@davemloft.net>2014-06-11 17:59:21 -0400
commit9b07d735c01a2ccace4d49b22232dd0097ec05b6 (patch)
tree1185661e0b45731a3c52278277729f73fce6e85c
parentd4f3862017f9aaa1a6b6bade396a99a4b77e2cb2 (diff)
parent9e311e77a85e37b5caec3d64c3593cd52b2cdb71 (diff)
Merge branch 'mlx4'
Amir Vadai says: ==================== cpumask,net: affinity hint helper function This patchset will set affinity hint to influence IRQs to be allocated on the same NUMA node as the one where the card resides. As discussed in http://www.spinics.net/lists/netdev/msg271497.html If number of IRQs allocated is greater than the number of local NUMA cores, all local cores will be used first, and the rest of the IRQs will be on a remote NUMA node. If no NUMA support - IRQ's and cores will be mapped 1:1 Since the utility function to calculate the mapping could be useful in other mq drivers in the kernel, it was added to cpumask.[ch] This patchset was tested and applied on top of net-next since the first consumer is a network device (mlx4_en). Over commit fff1f59 "mac802154: llsec: add forgotten list_del_rcu in key removal" ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_cq.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c35
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h1
-rw-r--r--include/linux/cpumask.h8
-rw-r--r--lib/cpumask.c63
5 files changed, 117 insertions, 2 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 636963db598a..4b2130760eed 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -163,6 +163,13 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
163 netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, 163 netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq,
164 NAPI_POLL_WEIGHT); 164 NAPI_POLL_WEIGHT);
165 } else { 165 } else {
166 struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
167
168 err = irq_set_affinity_hint(cq->mcq.irq,
169 ring->affinity_mask);
170 if (err)
171 mlx4_warn(mdev, "Failed setting affinity hint\n");
172
166 netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); 173 netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64);
167 napi_hash_add(&cq->napi); 174 napi_hash_add(&cq->napi);
168 } 175 }
@@ -179,8 +186,11 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
179 186
180 mlx4_en_unmap_buffer(&cq->wqres.buf); 187 mlx4_en_unmap_buffer(&cq->wqres.buf);
181 mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); 188 mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
182 if (priv->mdev->dev->caps.comp_pool && cq->vector) 189 if (priv->mdev->dev->caps.comp_pool && cq->vector) {
190 if (!cq->is_tx)
191 irq_set_affinity_hint(cq->mcq.irq, NULL);
183 mlx4_release_eq(priv->mdev->dev, cq->vector); 192 mlx4_release_eq(priv->mdev->dev, cq->vector);
193 }
184 cq->vector = 0; 194 cq->vector = 0;
185 cq->buf_size = 0; 195 cq->buf_size = 0;
186 cq->buf = NULL; 196 cq->buf = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 58209bd0c94c..7d4fb7bf2593 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1526,6 +1526,27 @@ static void mlx4_en_linkstate(struct work_struct *work)
1526 mutex_unlock(&mdev->state_lock); 1526 mutex_unlock(&mdev->state_lock);
1527} 1527}
1528 1528
1529static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
1530{
1531 struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
1532 int numa_node = priv->mdev->dev->numa_node;
1533 int ret = 0;
1534
1535 if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL))
1536 return -ENOMEM;
1537
1538 ret = cpumask_set_cpu_local_first(ring_idx, numa_node,
1539 ring->affinity_mask);
1540 if (ret)
1541 free_cpumask_var(ring->affinity_mask);
1542
1543 return ret;
1544}
1545
1546static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
1547{
1548 free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask);
1549}
1529 1550
1530int mlx4_en_start_port(struct net_device *dev) 1551int mlx4_en_start_port(struct net_device *dev)
1531{ 1552{
@@ -1567,9 +1588,16 @@ int mlx4_en_start_port(struct net_device *dev)
1567 1588
1568 mlx4_en_cq_init_lock(cq); 1589 mlx4_en_cq_init_lock(cq);
1569 1590
1591 err = mlx4_en_init_affinity_hint(priv, i);
1592 if (err) {
1593 en_err(priv, "Failed preparing IRQ affinity hint\n");
1594 goto cq_err;
1595 }
1596
1570 err = mlx4_en_activate_cq(priv, cq, i); 1597 err = mlx4_en_activate_cq(priv, cq, i);
1571 if (err) { 1598 if (err) {
1572 en_err(priv, "Failed activating Rx CQ\n"); 1599 en_err(priv, "Failed activating Rx CQ\n");
1600 mlx4_en_free_affinity_hint(priv, i);
1573 goto cq_err; 1601 goto cq_err;
1574 } 1602 }
1575 for (j = 0; j < cq->size; j++) 1603 for (j = 0; j < cq->size; j++)
@@ -1578,6 +1606,7 @@ int mlx4_en_start_port(struct net_device *dev)
1578 if (err) { 1606 if (err) {
1579 en_err(priv, "Failed setting cq moderation parameters\n"); 1607 en_err(priv, "Failed setting cq moderation parameters\n");
1580 mlx4_en_deactivate_cq(priv, cq); 1608 mlx4_en_deactivate_cq(priv, cq);
1609 mlx4_en_free_affinity_hint(priv, i);
1581 goto cq_err; 1610 goto cq_err;
1582 } 1611 }
1583 mlx4_en_arm_cq(priv, cq); 1612 mlx4_en_arm_cq(priv, cq);
@@ -1715,8 +1744,10 @@ rss_err:
1715mac_err: 1744mac_err:
1716 mlx4_en_put_qp(priv); 1745 mlx4_en_put_qp(priv);
1717cq_err: 1746cq_err:
1718 while (rx_index--) 1747 while (rx_index--) {
1719 mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]); 1748 mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]);
1749 mlx4_en_free_affinity_hint(priv, i);
1750 }
1720 for (i = 0; i < priv->rx_ring_num; i++) 1751 for (i = 0; i < priv->rx_ring_num; i++)
1721 mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); 1752 mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
1722 1753
@@ -1847,6 +1878,8 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
1847 msleep(1); 1878 msleep(1);
1848 mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); 1879 mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
1849 mlx4_en_deactivate_cq(priv, cq); 1880 mlx4_en_deactivate_cq(priv, cq);
1881
1882 mlx4_en_free_affinity_hint(priv, i);
1850 } 1883 }
1851} 1884}
1852 1885
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index b5db1bf361dc..0e15295bedd6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -313,6 +313,7 @@ struct mlx4_en_rx_ring {
313 unsigned long csum_ok; 313 unsigned long csum_ok;
314 unsigned long csum_none; 314 unsigned long csum_none;
315 int hwtstamp_rx_filter; 315 int hwtstamp_rx_filter;
316 cpumask_var_t affinity_mask;
316}; 317};
317 318
318struct mlx4_en_cq { 319struct mlx4_en_cq {
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d08e4d2a9b92..d5ef249735d2 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -142,6 +142,13 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask,
142 return 1; 142 return 1;
143} 143}
144 144
145static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
146{
147 set_bit(0, cpumask_bits(dstp));
148
149 return 0;
150}
151
145#define for_each_cpu(cpu, mask) \ 152#define for_each_cpu(cpu, mask) \
146 for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) 153 for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
147#define for_each_cpu_not(cpu, mask) \ 154#define for_each_cpu_not(cpu, mask) \
@@ -192,6 +199,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
192 199
193int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); 200int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
194int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); 201int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
202int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp);
195 203
196/** 204/**
197 * for_each_cpu - iterate over every cpu in a mask 205 * for_each_cpu - iterate over every cpu in a mask
diff --git a/lib/cpumask.c b/lib/cpumask.c
index b810b753c607..c101230658eb 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -164,3 +164,66 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
164 memblock_free_early(__pa(mask), cpumask_size()); 164 memblock_free_early(__pa(mask), cpumask_size());
165} 165}
166#endif 166#endif
167
168/**
169 * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first
170 *
171 * @i: index number
172 * @numa_node: local numa_node
173 * @dstp: cpumask with the relevant cpu bit set according to the policy
174 *
175 * This function sets the cpumask according to a numa aware policy.
176 * cpumask could be used as an affinity hint for the IRQ related to a
177 * queue. When the policy is to spread queues across cores - local cores
178 * first.
179 *
180 * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set
181 * the cpu bit and need to re-call the function.
182 */
183int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
184{
185 cpumask_var_t mask;
186 int cpu;
187 int ret = 0;
188
189 if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
190 return -ENOMEM;
191
192 i %= num_online_cpus();
193
194 if (!cpumask_of_node(numa_node)) {
195 /* Use all online cpu's for non numa aware system */
196 cpumask_copy(mask, cpu_online_mask);
197 } else {
198 int n;
199
200 cpumask_and(mask,
201 cpumask_of_node(numa_node), cpu_online_mask);
202
203 n = cpumask_weight(mask);
204 if (i >= n) {
205 i -= n;
206
207 /* If index > number of local cpu's, mask out local
208 * cpu's
209 */
210 cpumask_andnot(mask, cpu_online_mask, mask);
211 }
212 }
213
214 for_each_cpu(cpu, mask) {
215 if (--i < 0)
216 goto out;
217 }
218
219 ret = -EAGAIN;
220
221out:
222 free_cpumask_var(mask);
223
224 if (!ret)
225 cpumask_set_cpu(cpu, dstp);
226
227 return ret;
228}
229EXPORT_SYMBOL(cpumask_set_cpu_local_first);