aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYevgeny Petrilin <yevgenyp@mellanox.co.il>2012-03-05 23:03:34 -0500
committerDavid S. Miller <davem@davemloft.net>2012-03-06 15:19:17 -0500
commitebf8c9aa032f03343b91c91951b0705021b02eb0 (patch)
tree821ec45a0319a6592603a66048700bac3b0f15bc
parent6975f4ce5a44e337514283e84761adaf2849aa26 (diff)
net/mlx4_en: Saving mem access on data path
Localized the pdev->dev, and using dma_map instead of pci_map There are multiple map/unmap operations on data path, optimizing those by saving redundant pointer access. Those places were identified as hot-spots when running kernel profiling during some benchmarks. The fixes had most impact when testing packet rate with small packets, reducing several % from CPU load, and in some case being the difference between reaching wire speed or being CPU bound. Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h1
4 files changed, 13 insertions, 16 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 9fe4f94c6da7..31b455a49273 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1062,6 +1062,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
1062 memset(priv, 0, sizeof(struct mlx4_en_priv)); 1062 memset(priv, 0, sizeof(struct mlx4_en_priv));
1063 priv->dev = dev; 1063 priv->dev = dev;
1064 priv->mdev = mdev; 1064 priv->mdev = mdev;
1065 priv->ddev = &mdev->pdev->dev;
1065 priv->prof = prof; 1066 priv->prof = prof;
1066 priv->port = port; 1067 priv->port = port;
1067 priv->port_up = false; 1068 priv->port_up = false;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index d703ef2c9c91..c881712b7492 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -48,7 +48,6 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
48 struct mlx4_en_rx_alloc *ring_alloc, 48 struct mlx4_en_rx_alloc *ring_alloc,
49 int i) 49 int i)
50{ 50{
51 struct mlx4_en_dev *mdev = priv->mdev;
52 struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; 51 struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
53 struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i]; 52 struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i];
54 struct page *page; 53 struct page *page;
@@ -72,7 +71,7 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
72 skb_frags[i].offset = page_alloc->offset; 71 skb_frags[i].offset = page_alloc->offset;
73 page_alloc->offset += frag_info->frag_stride; 72 page_alloc->offset += frag_info->frag_stride;
74 } 73 }
75 dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) + 74 dma = dma_map_single(priv->ddev, page_address(skb_frags[i].page) +
76 skb_frags[i].offset, frag_info->frag_size, 75 skb_frags[i].offset, frag_info->frag_size,
77 PCI_DMA_FROMDEVICE); 76 PCI_DMA_FROMDEVICE);
78 rx_desc->data[i].addr = cpu_to_be64(dma); 77 rx_desc->data[i].addr = cpu_to_be64(dma);
@@ -186,7 +185,6 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
186 struct mlx4_en_rx_ring *ring, 185 struct mlx4_en_rx_ring *ring,
187 int index) 186 int index)
188{ 187{
189 struct mlx4_en_dev *mdev = priv->mdev;
190 struct page_frag *skb_frags; 188 struct page_frag *skb_frags;
191 struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride); 189 struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride);
192 dma_addr_t dma; 190 dma_addr_t dma;
@@ -198,7 +196,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
198 dma = be64_to_cpu(rx_desc->data[nr].addr); 196 dma = be64_to_cpu(rx_desc->data[nr].addr);
199 197
200 en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma); 198 en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma);
201 pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size, 199 dma_unmap_single(priv->ddev, dma, skb_frags[nr].size,
202 PCI_DMA_FROMDEVICE); 200 PCI_DMA_FROMDEVICE);
203 put_page(skb_frags[nr].page); 201 put_page(skb_frags[nr].page);
204 } 202 }
@@ -412,7 +410,6 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
412 int length) 410 int length)
413{ 411{
414 struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags; 412 struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
415 struct mlx4_en_dev *mdev = priv->mdev;
416 struct mlx4_en_frag_info *frag_info; 413 struct mlx4_en_frag_info *frag_info;
417 int nr; 414 int nr;
418 dma_addr_t dma; 415 dma_addr_t dma;
@@ -435,7 +432,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
435 goto fail; 432 goto fail;
436 433
437 /* Unmap buffer */ 434 /* Unmap buffer */
438 pci_unmap_single(mdev->pdev, dma, skb_frag_size(&skb_frags_rx[nr]), 435 dma_unmap_single(priv->ddev, dma, skb_frag_size(&skb_frags_rx[nr]),
439 PCI_DMA_FROMDEVICE); 436 PCI_DMA_FROMDEVICE);
440 } 437 }
441 /* Adjust size of last fragment to match actual length */ 438 /* Adjust size of last fragment to match actual length */
@@ -461,7 +458,6 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
461 struct mlx4_en_rx_alloc *page_alloc, 458 struct mlx4_en_rx_alloc *page_alloc,
462 unsigned int length) 459 unsigned int length)
463{ 460{
464 struct mlx4_en_dev *mdev = priv->mdev;
465 struct sk_buff *skb; 461 struct sk_buff *skb;
466 void *va; 462 void *va;
467 int used_frags; 463 int used_frags;
@@ -483,10 +479,10 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
483 /* We are copying all relevant data to the skb - temporarily 479 /* We are copying all relevant data to the skb - temporarily
484 * synch buffers for the copy */ 480 * synch buffers for the copy */
485 dma = be64_to_cpu(rx_desc->data[0].addr); 481 dma = be64_to_cpu(rx_desc->data[0].addr);
486 dma_sync_single_for_cpu(&mdev->pdev->dev, dma, length, 482 dma_sync_single_for_cpu(priv->ddev, dma, length,
487 DMA_FROM_DEVICE); 483 DMA_FROM_DEVICE);
488 skb_copy_to_linear_data(skb, va, length); 484 skb_copy_to_linear_data(skb, va, length);
489 dma_sync_single_for_device(&mdev->pdev->dev, dma, length, 485 dma_sync_single_for_device(priv->ddev, dma, length,
490 DMA_FROM_DEVICE); 486 DMA_FROM_DEVICE);
491 skb->tail += length; 487 skb->tail += length;
492 } else { 488 } else {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 50b3fa5212ed..008f0af5cc8b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -198,7 +198,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
198 struct mlx4_en_tx_ring *ring, 198 struct mlx4_en_tx_ring *ring,
199 int index, u8 owner) 199 int index, u8 owner)
200{ 200{
201 struct mlx4_en_dev *mdev = priv->mdev;
202 struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; 201 struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
203 struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; 202 struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
204 struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; 203 struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
@@ -214,7 +213,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
214 if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { 213 if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
215 if (!tx_info->inl) { 214 if (!tx_info->inl) {
216 if (tx_info->linear) { 215 if (tx_info->linear) {
217 pci_unmap_single(mdev->pdev, 216 dma_unmap_single(priv->ddev,
218 (dma_addr_t) be64_to_cpu(data->addr), 217 (dma_addr_t) be64_to_cpu(data->addr),
219 be32_to_cpu(data->byte_count), 218 be32_to_cpu(data->byte_count),
220 PCI_DMA_TODEVICE); 219 PCI_DMA_TODEVICE);
@@ -223,7 +222,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
223 222
224 for (i = 0; i < frags; i++) { 223 for (i = 0; i < frags; i++) {
225 frag = &skb_shinfo(skb)->frags[i]; 224 frag = &skb_shinfo(skb)->frags[i];
226 pci_unmap_page(mdev->pdev, 225 dma_unmap_page(priv->ddev,
227 (dma_addr_t) be64_to_cpu(data[i].addr), 226 (dma_addr_t) be64_to_cpu(data[i].addr),
228 skb_frag_size(frag), PCI_DMA_TODEVICE); 227 skb_frag_size(frag), PCI_DMA_TODEVICE);
229 } 228 }
@@ -241,7 +240,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
241 } 240 }
242 241
243 if (tx_info->linear) { 242 if (tx_info->linear) {
244 pci_unmap_single(mdev->pdev, 243 dma_unmap_single(priv->ddev,
245 (dma_addr_t) be64_to_cpu(data->addr), 244 (dma_addr_t) be64_to_cpu(data->addr),
246 be32_to_cpu(data->byte_count), 245 be32_to_cpu(data->byte_count),
247 PCI_DMA_TODEVICE); 246 PCI_DMA_TODEVICE);
@@ -253,7 +252,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
253 if ((void *) data >= end) 252 if ((void *) data >= end)
254 data = ring->buf; 253 data = ring->buf;
255 frag = &skb_shinfo(skb)->frags[i]; 254 frag = &skb_shinfo(skb)->frags[i];
256 pci_unmap_page(mdev->pdev, 255 dma_unmap_page(priv->ddev,
257 (dma_addr_t) be64_to_cpu(data->addr), 256 (dma_addr_t) be64_to_cpu(data->addr),
258 skb_frag_size(frag), PCI_DMA_TODEVICE); 257 skb_frag_size(frag), PCI_DMA_TODEVICE);
259 ++data; 258 ++data;
@@ -733,7 +732,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
733 /* Map fragments */ 732 /* Map fragments */
734 for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) { 733 for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
735 frag = &skb_shinfo(skb)->frags[i]; 734 frag = &skb_shinfo(skb)->frags[i];
736 dma = skb_frag_dma_map(&mdev->dev->pdev->dev, frag, 735 dma = skb_frag_dma_map(priv->ddev, frag,
737 0, skb_frag_size(frag), 736 0, skb_frag_size(frag),
738 DMA_TO_DEVICE); 737 DMA_TO_DEVICE);
739 data->addr = cpu_to_be64(dma); 738 data->addr = cpu_to_be64(dma);
@@ -745,7 +744,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
745 744
746 /* Map linear part */ 745 /* Map linear part */
747 if (tx_info->linear) { 746 if (tx_info->linear) {
748 dma = pci_map_single(mdev->dev->pdev, skb->data + lso_header_size, 747 dma = dma_map_single(priv->ddev, skb->data + lso_header_size,
749 skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE); 748 skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE);
750 data->addr = cpu_to_be64(dma); 749 data->addr = cpu_to_be64(dma);
751 data->lkey = cpu_to_be32(mdev->mr.key); 750 data->lkey = cpu_to_be32(mdev->mr.key);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index d60335f3c473..a5bb0b7d339a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -482,6 +482,7 @@ struct mlx4_en_priv {
482 struct mlx4_en_stat_out_mbox hw_stats; 482 struct mlx4_en_stat_out_mbox hw_stats;
483 int vids[128]; 483 int vids[128];
484 bool wol; 484 bool wol;
485 struct device *ddev;
485}; 486};
486 487
487enum mlx4_en_wol { 488enum mlx4_en_wol {