diff options
author | David S. Miller <davem@davemloft.net> | 2014-06-11 17:59:21 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-06-11 17:59:21 -0400 |
commit | 9b07d735c01a2ccace4d49b22232dd0097ec05b6 (patch) | |
tree | 1185661e0b45731a3c52278277729f73fce6e85c | |
parent | d4f3862017f9aaa1a6b6bade396a99a4b77e2cb2 (diff) | |
parent | 9e311e77a85e37b5caec3d64c3593cd52b2cdb71 (diff) |
Merge branch 'mlx4'
Amir Vadai says:
====================
cpumask,net: affinity hint helper function
This patchset will set affinity hint to influence IRQs to be allocated on the
same NUMA node as the one where the card resides. As discussed in
http://www.spinics.net/lists/netdev/msg271497.html
If number of IRQs allocated is greater than the number of local NUMA cores, all
local cores will be used first, and the rest of the IRQs will be on a remote
NUMA node.
If no NUMA support - IRQ's and cores will be mapped 1:1
Since the utility function to calculate the mapping could be useful in other mq
drivers in the kernel, it was added to cpumask.[ch]
This patchset was tested and applied on top of net-next since the first
consumer is a network device (mlx4_en). Over commit fff1f59 "mac802154:
llsec: add forgotten list_del_rcu in key removal"
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_cq.c | 12 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 35 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 | ||||
-rw-r--r-- | include/linux/cpumask.h | 8 | ||||
-rw-r--r-- | lib/cpumask.c | 63 |
5 files changed, 117 insertions, 2 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 636963db598a..4b2130760eed 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c | |||
@@ -163,6 +163,13 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, | |||
163 | netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, | 163 | netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, |
164 | NAPI_POLL_WEIGHT); | 164 | NAPI_POLL_WEIGHT); |
165 | } else { | 165 | } else { |
166 | struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; | ||
167 | |||
168 | err = irq_set_affinity_hint(cq->mcq.irq, | ||
169 | ring->affinity_mask); | ||
170 | if (err) | ||
171 | mlx4_warn(mdev, "Failed setting affinity hint\n"); | ||
172 | |||
166 | netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); | 173 | netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); |
167 | napi_hash_add(&cq->napi); | 174 | napi_hash_add(&cq->napi); |
168 | } | 175 | } |
@@ -179,8 +186,11 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) | |||
179 | 186 | ||
180 | mlx4_en_unmap_buffer(&cq->wqres.buf); | 187 | mlx4_en_unmap_buffer(&cq->wqres.buf); |
181 | mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); | 188 | mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); |
182 | if (priv->mdev->dev->caps.comp_pool && cq->vector) | 189 | if (priv->mdev->dev->caps.comp_pool && cq->vector) { |
190 | if (!cq->is_tx) | ||
191 | irq_set_affinity_hint(cq->mcq.irq, NULL); | ||
183 | mlx4_release_eq(priv->mdev->dev, cq->vector); | 192 | mlx4_release_eq(priv->mdev->dev, cq->vector); |
193 | } | ||
184 | cq->vector = 0; | 194 | cq->vector = 0; |
185 | cq->buf_size = 0; | 195 | cq->buf_size = 0; |
186 | cq->buf = NULL; | 196 | cq->buf = NULL; |
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 58209bd0c94c..7d4fb7bf2593 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c | |||
@@ -1526,6 +1526,27 @@ static void mlx4_en_linkstate(struct work_struct *work) | |||
1526 | mutex_unlock(&mdev->state_lock); | 1526 | mutex_unlock(&mdev->state_lock); |
1527 | } | 1527 | } |
1528 | 1528 | ||
1529 | static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) | ||
1530 | { | ||
1531 | struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx]; | ||
1532 | int numa_node = priv->mdev->dev->numa_node; | ||
1533 | int ret = 0; | ||
1534 | |||
1535 | if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) | ||
1536 | return -ENOMEM; | ||
1537 | |||
1538 | ret = cpumask_set_cpu_local_first(ring_idx, numa_node, | ||
1539 | ring->affinity_mask); | ||
1540 | if (ret) | ||
1541 | free_cpumask_var(ring->affinity_mask); | ||
1542 | |||
1543 | return ret; | ||
1544 | } | ||
1545 | |||
1546 | static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) | ||
1547 | { | ||
1548 | free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask); | ||
1549 | } | ||
1529 | 1550 | ||
1530 | int mlx4_en_start_port(struct net_device *dev) | 1551 | int mlx4_en_start_port(struct net_device *dev) |
1531 | { | 1552 | { |
@@ -1567,9 +1588,16 @@ int mlx4_en_start_port(struct net_device *dev) | |||
1567 | 1588 | ||
1568 | mlx4_en_cq_init_lock(cq); | 1589 | mlx4_en_cq_init_lock(cq); |
1569 | 1590 | ||
1591 | err = mlx4_en_init_affinity_hint(priv, i); | ||
1592 | if (err) { | ||
1593 | en_err(priv, "Failed preparing IRQ affinity hint\n"); | ||
1594 | goto cq_err; | ||
1595 | } | ||
1596 | |||
1570 | err = mlx4_en_activate_cq(priv, cq, i); | 1597 | err = mlx4_en_activate_cq(priv, cq, i); |
1571 | if (err) { | 1598 | if (err) { |
1572 | en_err(priv, "Failed activating Rx CQ\n"); | 1599 | en_err(priv, "Failed activating Rx CQ\n"); |
1600 | mlx4_en_free_affinity_hint(priv, i); | ||
1573 | goto cq_err; | 1601 | goto cq_err; |
1574 | } | 1602 | } |
1575 | for (j = 0; j < cq->size; j++) | 1603 | for (j = 0; j < cq->size; j++) |
@@ -1578,6 +1606,7 @@ int mlx4_en_start_port(struct net_device *dev) | |||
1578 | if (err) { | 1606 | if (err) { |
1579 | en_err(priv, "Failed setting cq moderation parameters\n"); | 1607 | en_err(priv, "Failed setting cq moderation parameters\n"); |
1580 | mlx4_en_deactivate_cq(priv, cq); | 1608 | mlx4_en_deactivate_cq(priv, cq); |
1609 | mlx4_en_free_affinity_hint(priv, i); | ||
1581 | goto cq_err; | 1610 | goto cq_err; |
1582 | } | 1611 | } |
1583 | mlx4_en_arm_cq(priv, cq); | 1612 | mlx4_en_arm_cq(priv, cq); |
@@ -1715,8 +1744,10 @@ rss_err: | |||
1715 | mac_err: | 1744 | mac_err: |
1716 | mlx4_en_put_qp(priv); | 1745 | mlx4_en_put_qp(priv); |
1717 | cq_err: | 1746 | cq_err: |
1718 | while (rx_index--) | 1747 | while (rx_index--) { |
1719 | mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]); | 1748 | mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]); |
1749 | mlx4_en_free_affinity_hint(priv, i); | ||
1750 | } | ||
1720 | for (i = 0; i < priv->rx_ring_num; i++) | 1751 | for (i = 0; i < priv->rx_ring_num; i++) |
1721 | mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); | 1752 | mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); |
1722 | 1753 | ||
@@ -1847,6 +1878,8 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) | |||
1847 | msleep(1); | 1878 | msleep(1); |
1848 | mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); | 1879 | mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); |
1849 | mlx4_en_deactivate_cq(priv, cq); | 1880 | mlx4_en_deactivate_cq(priv, cq); |
1881 | |||
1882 | mlx4_en_free_affinity_hint(priv, i); | ||
1850 | } | 1883 | } |
1851 | } | 1884 | } |
1852 | 1885 | ||
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index b5db1bf361dc..0e15295bedd6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | |||
@@ -313,6 +313,7 @@ struct mlx4_en_rx_ring { | |||
313 | unsigned long csum_ok; | 313 | unsigned long csum_ok; |
314 | unsigned long csum_none; | 314 | unsigned long csum_none; |
315 | int hwtstamp_rx_filter; | 315 | int hwtstamp_rx_filter; |
316 | cpumask_var_t affinity_mask; | ||
316 | }; | 317 | }; |
317 | 318 | ||
318 | struct mlx4_en_cq { | 319 | struct mlx4_en_cq { |
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d08e4d2a9b92..d5ef249735d2 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h | |||
@@ -142,6 +142,13 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask, | |||
142 | return 1; | 142 | return 1; |
143 | } | 143 | } |
144 | 144 | ||
145 | static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) | ||
146 | { | ||
147 | set_bit(0, cpumask_bits(dstp)); | ||
148 | |||
149 | return 0; | ||
150 | } | ||
151 | |||
145 | #define for_each_cpu(cpu, mask) \ | 152 | #define for_each_cpu(cpu, mask) \ |
146 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) | 153 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) |
147 | #define for_each_cpu_not(cpu, mask) \ | 154 | #define for_each_cpu_not(cpu, mask) \ |
@@ -192,6 +199,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) | |||
192 | 199 | ||
193 | int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); | 200 | int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); |
194 | int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); | 201 | int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); |
202 | int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp); | ||
195 | 203 | ||
196 | /** | 204 | /** |
197 | * for_each_cpu - iterate over every cpu in a mask | 205 | * for_each_cpu - iterate over every cpu in a mask |
diff --git a/lib/cpumask.c b/lib/cpumask.c index b810b753c607..c101230658eb 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c | |||
@@ -164,3 +164,66 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) | |||
164 | memblock_free_early(__pa(mask), cpumask_size()); | 164 | memblock_free_early(__pa(mask), cpumask_size()); |
165 | } | 165 | } |
166 | #endif | 166 | #endif |
167 | |||
168 | /** | ||
169 | * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first | ||
170 | * | ||
171 | * @i: index number | ||
172 | * @numa_node: local numa_node | ||
173 | * @dstp: cpumask with the relevant cpu bit set according to the policy | ||
174 | * | ||
175 | * This function sets the cpumask according to a numa aware policy. | ||
176 | * cpumask could be used as an affinity hint for the IRQ related to a | ||
177 | * queue. When the policy is to spread queues across cores - local cores | ||
178 | * first. | ||
179 | * | ||
180 | * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set | ||
181 | * the cpu bit and need to re-call the function. | ||
182 | */ | ||
183 | int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) | ||
184 | { | ||
185 | cpumask_var_t mask; | ||
186 | int cpu; | ||
187 | int ret = 0; | ||
188 | |||
189 | if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) | ||
190 | return -ENOMEM; | ||
191 | |||
192 | i %= num_online_cpus(); | ||
193 | |||
194 | if (!cpumask_of_node(numa_node)) { | ||
195 | /* Use all online cpu's for non numa aware system */ | ||
196 | cpumask_copy(mask, cpu_online_mask); | ||
197 | } else { | ||
198 | int n; | ||
199 | |||
200 | cpumask_and(mask, | ||
201 | cpumask_of_node(numa_node), cpu_online_mask); | ||
202 | |||
203 | n = cpumask_weight(mask); | ||
204 | if (i >= n) { | ||
205 | i -= n; | ||
206 | |||
207 | /* If index > number of local cpu's, mask out local | ||
208 | * cpu's | ||
209 | */ | ||
210 | cpumask_andnot(mask, cpu_online_mask, mask); | ||
211 | } | ||
212 | } | ||
213 | |||
214 | for_each_cpu(cpu, mask) { | ||
215 | if (--i < 0) | ||
216 | goto out; | ||
217 | } | ||
218 | |||
219 | ret = -EAGAIN; | ||
220 | |||
221 | out: | ||
222 | free_cpumask_var(mask); | ||
223 | |||
224 | if (!ret) | ||
225 | cpumask_set_cpu(cpu, dstp); | ||
226 | |||
227 | return ret; | ||
228 | } | ||
229 | EXPORT_SYMBOL(cpumask_set_cpu_local_first); | ||