aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx4/en_tx.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-10-05 05:35:13 -0400
committerDavid S. Miller <davem@davemloft.net>2014-10-06 01:04:15 -0400
commit3d03641cb4ea050f969dd3ab34113adb95287f36 (patch)
treeb1cbd2ba62c956e93d88ccde59437d46b21e13da /drivers/net/ethernet/mellanox/mlx4/en_tx.c
parentdc9b06d156cefb95f7f6a3ac0521a3efa31d6805 (diff)
net/mlx4_en: Avoid a cache line miss in TX completion for single frag skb's
Add frag0_dma/frag0_byte_count into mlx4_en_tx_info to avoid a cache line miss in TX completion for frames having one dma element. (We avoid reading back the tx descriptor) Note this could be extended to 2/3 dma elements later, as we have free room in mlx4_en_tx_info Also, mlx4_en_free_tx_desc() no longer accesses skb_shinfo(). We use a new nr_maps fields in mlx4_en_tx_info to avoid 2 or 3 cache misses. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Amir Vadai <amirv@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/en_tx.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c83
1 files changed, 46 insertions, 37 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 14479068001f..edc4a8810368 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -259,38 +259,40 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
259 struct mlx4_en_tx_ring *ring, 259 struct mlx4_en_tx_ring *ring,
260 int index, u8 owner, u64 timestamp) 260 int index, u8 owner, u64 timestamp)
261{ 261{
262 struct mlx4_en_dev *mdev = priv->mdev;
263 struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; 262 struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
264 struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; 263 struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
265 struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; 264 struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
266 struct sk_buff *skb = tx_info->skb;
267 struct skb_frag_struct *frag;
268 void *end = ring->buf + ring->buf_size; 265 void *end = ring->buf + ring->buf_size;
269 int frags = skb_shinfo(skb)->nr_frags; 266 struct sk_buff *skb = tx_info->skb;
267 int nr_maps = tx_info->nr_maps;
270 int i; 268 int i;
271 struct skb_shared_hwtstamps hwts;
272 269
273 if (timestamp) { 270 if (unlikely(timestamp)) {
274 mlx4_en_fill_hwtstamps(mdev, &hwts, timestamp); 271 struct skb_shared_hwtstamps hwts;
272
273 mlx4_en_fill_hwtstamps(priv->mdev, &hwts, timestamp);
275 skb_tstamp_tx(skb, &hwts); 274 skb_tstamp_tx(skb, &hwts);
276 } 275 }
277 276
278 /* Optimize the common case when there are no wraparounds */ 277 /* Optimize the common case when there are no wraparounds */
279 if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { 278 if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
280 if (!tx_info->inl) { 279 if (!tx_info->inl) {
281 if (tx_info->linear) { 280 if (tx_info->linear)
282 dma_unmap_single(priv->ddev, 281 dma_unmap_single(priv->ddev,
283 (dma_addr_t) be64_to_cpu(data->addr), 282 tx_info->map0_dma,
284 be32_to_cpu(data->byte_count), 283 tx_info->map0_byte_count,
285 PCI_DMA_TODEVICE); 284 PCI_DMA_TODEVICE);
286 ++data; 285 else
287 } 286 dma_unmap_page(priv->ddev,
288 287 tx_info->map0_dma,
289 for (i = 0; i < frags; i++) { 288 tx_info->map0_byte_count,
290 frag = &skb_shinfo(skb)->frags[i]; 289 PCI_DMA_TODEVICE);
290 for (i = 1; i < nr_maps; i++) {
291 data++;
291 dma_unmap_page(priv->ddev, 292 dma_unmap_page(priv->ddev,
292 (dma_addr_t) be64_to_cpu(data[i].addr), 293 (dma_addr_t)be64_to_cpu(data->addr),
293 skb_frag_size(frag), PCI_DMA_TODEVICE); 294 be32_to_cpu(data->byte_count),
295 PCI_DMA_TODEVICE);
294 } 296 }
295 } 297 }
296 } else { 298 } else {
@@ -299,23 +301,25 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
299 data = ring->buf + ((void *)data - end); 301 data = ring->buf + ((void *)data - end);
300 } 302 }
301 303
302 if (tx_info->linear) { 304 if (tx_info->linear)
303 dma_unmap_single(priv->ddev, 305 dma_unmap_single(priv->ddev,
304 (dma_addr_t) be64_to_cpu(data->addr), 306 tx_info->map0_dma,
305 be32_to_cpu(data->byte_count), 307 tx_info->map0_byte_count,
306 PCI_DMA_TODEVICE); 308 PCI_DMA_TODEVICE);
307 ++data; 309 else
308 } 310 dma_unmap_page(priv->ddev,
309 311 tx_info->map0_dma,
310 for (i = 0; i < frags; i++) { 312 tx_info->map0_byte_count,
313 PCI_DMA_TODEVICE);
314 for (i = 1; i < nr_maps; i++) {
315 data++;
311 /* Check for wraparound before unmapping */ 316 /* Check for wraparound before unmapping */
312 if ((void *) data >= end) 317 if ((void *) data >= end)
313 data = ring->buf; 318 data = ring->buf;
314 frag = &skb_shinfo(skb)->frags[i];
315 dma_unmap_page(priv->ddev, 319 dma_unmap_page(priv->ddev,
316 (dma_addr_t) be64_to_cpu(data->addr), 320 (dma_addr_t)be64_to_cpu(data->addr),
317 skb_frag_size(frag), PCI_DMA_TODEVICE); 321 be32_to_cpu(data->byte_count),
318 ++data; 322 PCI_DMA_TODEVICE);
319 } 323 }
320 } 324 }
321 } 325 }
@@ -751,19 +755,22 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
751 tx_info->linear = (lso_header_size < skb_headlen(skb) && 755 tx_info->linear = (lso_header_size < skb_headlen(skb) &&
752 !is_inline(ring->inline_thold, skb, NULL)) ? 1 : 0; 756 !is_inline(ring->inline_thold, skb, NULL)) ? 1 : 0;
753 757
754 data += skb_shinfo(skb)->nr_frags + tx_info->linear - 1; 758 tx_info->nr_maps = skb_shinfo(skb)->nr_frags + tx_info->linear;
759 data += tx_info->nr_maps - 1;
755 760
756 if (is_inline(ring->inline_thold, skb, &fragptr)) { 761 if (is_inline(ring->inline_thold, skb, &fragptr)) {
757 tx_info->inl = 1; 762 tx_info->inl = 1;
758 } else { 763 } else {
764 dma_addr_t dma = 0;
765 u32 byte_count = 0;
766
759 /* Map fragments if any */ 767 /* Map fragments if any */
760 for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) { 768 for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
761 const struct skb_frag_struct *frag; 769 const struct skb_frag_struct *frag;
762 dma_addr_t dma;
763
764 frag = &skb_shinfo(skb)->frags[i]; 770 frag = &skb_shinfo(skb)->frags[i];
771 byte_count = skb_frag_size(frag);
765 dma = skb_frag_dma_map(ddev, frag, 772 dma = skb_frag_dma_map(ddev, frag,
766 0, skb_frag_size(frag), 773 0, byte_count,
767 DMA_TO_DEVICE); 774 DMA_TO_DEVICE);
768 if (dma_mapping_error(ddev, dma)) 775 if (dma_mapping_error(ddev, dma))
769 goto tx_drop_unmap; 776 goto tx_drop_unmap;
@@ -771,14 +778,13 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
771 data->addr = cpu_to_be64(dma); 778 data->addr = cpu_to_be64(dma);
772 data->lkey = ring->mr_key; 779 data->lkey = ring->mr_key;
773 wmb(); 780 wmb();
774 data->byte_count = cpu_to_be32(skb_frag_size(frag)); 781 data->byte_count = cpu_to_be32(byte_count);
775 --data; 782 --data;
776 } 783 }
777 784
778 /* Map linear part if needed */ 785 /* Map linear part if needed */
779 if (tx_info->linear) { 786 if (tx_info->linear) {
780 u32 byte_count = skb_headlen(skb) - lso_header_size; 787 byte_count = skb_headlen(skb) - lso_header_size;
781 dma_addr_t dma;
782 788
783 dma = dma_map_single(ddev, skb->data + 789 dma = dma_map_single(ddev, skb->data +
784 lso_header_size, byte_count, 790 lso_header_size, byte_count,
@@ -792,6 +798,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
792 data->byte_count = cpu_to_be32(byte_count); 798 data->byte_count = cpu_to_be32(byte_count);
793 } 799 }
794 tx_info->inl = 0; 800 tx_info->inl = 0;
801 /* tx completion can avoid cache line miss for common cases */
802 tx_info->map0_dma = dma;
803 tx_info->map0_byte_count = byte_count;
795 } 804 }
796 805
797 /* 806 /*