diff options
author | Yevgeny Petrilin <yevgenyp@mellanox.co.il> | 2012-03-05 23:03:34 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-03-06 15:19:17 -0500 |
commit | ebf8c9aa032f03343b91c91951b0705021b02eb0 (patch) | |
tree | 821ec45a0319a6592603a66048700bac3b0f15bc | |
parent | 6975f4ce5a44e337514283e84761adaf2849aa26 (diff) |
net/mlx4_en: Saving mem access on data path
Localized the pdev->dev, and using dma_map instead of pci_map
There are multiple map/unmap operations on data path,
optimizing those by saving redundant pointer access.
Those places were identified as hot-spots when running kernel profiling
during some benchmarks.
The fixes had most impact when testing packet rate with small packets,
reducing several % from CPU load, and in some case being the difference
between reaching wire speed or being CPU bound.
Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_rx.c | 14 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_tx.c | 13 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 |
4 files changed, 13 insertions, 16 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 9fe4f94c6da7..31b455a49273 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c | |||
@@ -1062,6 +1062,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, | |||
1062 | memset(priv, 0, sizeof(struct mlx4_en_priv)); | 1062 | memset(priv, 0, sizeof(struct mlx4_en_priv)); |
1063 | priv->dev = dev; | 1063 | priv->dev = dev; |
1064 | priv->mdev = mdev; | 1064 | priv->mdev = mdev; |
1065 | priv->ddev = &mdev->pdev->dev; | ||
1065 | priv->prof = prof; | 1066 | priv->prof = prof; |
1066 | priv->port = port; | 1067 | priv->port = port; |
1067 | priv->port_up = false; | 1068 | priv->port_up = false; |
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index d703ef2c9c91..c881712b7492 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c | |||
@@ -48,7 +48,6 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, | |||
48 | struct mlx4_en_rx_alloc *ring_alloc, | 48 | struct mlx4_en_rx_alloc *ring_alloc, |
49 | int i) | 49 | int i) |
50 | { | 50 | { |
51 | struct mlx4_en_dev *mdev = priv->mdev; | ||
52 | struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; | 51 | struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; |
53 | struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i]; | 52 | struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i]; |
54 | struct page *page; | 53 | struct page *page; |
@@ -72,7 +71,7 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, | |||
72 | skb_frags[i].offset = page_alloc->offset; | 71 | skb_frags[i].offset = page_alloc->offset; |
73 | page_alloc->offset += frag_info->frag_stride; | 72 | page_alloc->offset += frag_info->frag_stride; |
74 | } | 73 | } |
75 | dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) + | 74 | dma = dma_map_single(priv->ddev, page_address(skb_frags[i].page) + |
76 | skb_frags[i].offset, frag_info->frag_size, | 75 | skb_frags[i].offset, frag_info->frag_size, |
77 | PCI_DMA_FROMDEVICE); | 76 | PCI_DMA_FROMDEVICE); |
78 | rx_desc->data[i].addr = cpu_to_be64(dma); | 77 | rx_desc->data[i].addr = cpu_to_be64(dma); |
@@ -186,7 +185,6 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, | |||
186 | struct mlx4_en_rx_ring *ring, | 185 | struct mlx4_en_rx_ring *ring, |
187 | int index) | 186 | int index) |
188 | { | 187 | { |
189 | struct mlx4_en_dev *mdev = priv->mdev; | ||
190 | struct page_frag *skb_frags; | 188 | struct page_frag *skb_frags; |
191 | struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride); | 189 | struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride); |
192 | dma_addr_t dma; | 190 | dma_addr_t dma; |
@@ -198,7 +196,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, | |||
198 | dma = be64_to_cpu(rx_desc->data[nr].addr); | 196 | dma = be64_to_cpu(rx_desc->data[nr].addr); |
199 | 197 | ||
200 | en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma); | 198 | en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma); |
201 | pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size, | 199 | dma_unmap_single(priv->ddev, dma, skb_frags[nr].size, |
202 | PCI_DMA_FROMDEVICE); | 200 | PCI_DMA_FROMDEVICE); |
203 | put_page(skb_frags[nr].page); | 201 | put_page(skb_frags[nr].page); |
204 | } | 202 | } |
@@ -412,7 +410,6 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, | |||
412 | int length) | 410 | int length) |
413 | { | 411 | { |
414 | struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags; | 412 | struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags; |
415 | struct mlx4_en_dev *mdev = priv->mdev; | ||
416 | struct mlx4_en_frag_info *frag_info; | 413 | struct mlx4_en_frag_info *frag_info; |
417 | int nr; | 414 | int nr; |
418 | dma_addr_t dma; | 415 | dma_addr_t dma; |
@@ -435,7 +432,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, | |||
435 | goto fail; | 432 | goto fail; |
436 | 433 | ||
437 | /* Unmap buffer */ | 434 | /* Unmap buffer */ |
438 | pci_unmap_single(mdev->pdev, dma, skb_frag_size(&skb_frags_rx[nr]), | 435 | dma_unmap_single(priv->ddev, dma, skb_frag_size(&skb_frags_rx[nr]), |
439 | PCI_DMA_FROMDEVICE); | 436 | PCI_DMA_FROMDEVICE); |
440 | } | 437 | } |
441 | /* Adjust size of last fragment to match actual length */ | 438 | /* Adjust size of last fragment to match actual length */ |
@@ -461,7 +458,6 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, | |||
461 | struct mlx4_en_rx_alloc *page_alloc, | 458 | struct mlx4_en_rx_alloc *page_alloc, |
462 | unsigned int length) | 459 | unsigned int length) |
463 | { | 460 | { |
464 | struct mlx4_en_dev *mdev = priv->mdev; | ||
465 | struct sk_buff *skb; | 461 | struct sk_buff *skb; |
466 | void *va; | 462 | void *va; |
467 | int used_frags; | 463 | int used_frags; |
@@ -483,10 +479,10 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, | |||
483 | /* We are copying all relevant data to the skb - temporarily | 479 | /* We are copying all relevant data to the skb - temporarily |
484 | * synch buffers for the copy */ | 480 | * synch buffers for the copy */ |
485 | dma = be64_to_cpu(rx_desc->data[0].addr); | 481 | dma = be64_to_cpu(rx_desc->data[0].addr); |
486 | dma_sync_single_for_cpu(&mdev->pdev->dev, dma, length, | 482 | dma_sync_single_for_cpu(priv->ddev, dma, length, |
487 | DMA_FROM_DEVICE); | 483 | DMA_FROM_DEVICE); |
488 | skb_copy_to_linear_data(skb, va, length); | 484 | skb_copy_to_linear_data(skb, va, length); |
489 | dma_sync_single_for_device(&mdev->pdev->dev, dma, length, | 485 | dma_sync_single_for_device(priv->ddev, dma, length, |
490 | DMA_FROM_DEVICE); | 486 | DMA_FROM_DEVICE); |
491 | skb->tail += length; | 487 | skb->tail += length; |
492 | } else { | 488 | } else { |
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 50b3fa5212ed..008f0af5cc8b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c | |||
@@ -198,7 +198,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, | |||
198 | struct mlx4_en_tx_ring *ring, | 198 | struct mlx4_en_tx_ring *ring, |
199 | int index, u8 owner) | 199 | int index, u8 owner) |
200 | { | 200 | { |
201 | struct mlx4_en_dev *mdev = priv->mdev; | ||
202 | struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; | 201 | struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; |
203 | struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; | 202 | struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; |
204 | struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; | 203 | struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; |
@@ -214,7 +213,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, | |||
214 | if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { | 213 | if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { |
215 | if (!tx_info->inl) { | 214 | if (!tx_info->inl) { |
216 | if (tx_info->linear) { | 215 | if (tx_info->linear) { |
217 | pci_unmap_single(mdev->pdev, | 216 | dma_unmap_single(priv->ddev, |
218 | (dma_addr_t) be64_to_cpu(data->addr), | 217 | (dma_addr_t) be64_to_cpu(data->addr), |
219 | be32_to_cpu(data->byte_count), | 218 | be32_to_cpu(data->byte_count), |
220 | PCI_DMA_TODEVICE); | 219 | PCI_DMA_TODEVICE); |
@@ -223,7 +222,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, | |||
223 | 222 | ||
224 | for (i = 0; i < frags; i++) { | 223 | for (i = 0; i < frags; i++) { |
225 | frag = &skb_shinfo(skb)->frags[i]; | 224 | frag = &skb_shinfo(skb)->frags[i]; |
226 | pci_unmap_page(mdev->pdev, | 225 | dma_unmap_page(priv->ddev, |
227 | (dma_addr_t) be64_to_cpu(data[i].addr), | 226 | (dma_addr_t) be64_to_cpu(data[i].addr), |
228 | skb_frag_size(frag), PCI_DMA_TODEVICE); | 227 | skb_frag_size(frag), PCI_DMA_TODEVICE); |
229 | } | 228 | } |
@@ -241,7 +240,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, | |||
241 | } | 240 | } |
242 | 241 | ||
243 | if (tx_info->linear) { | 242 | if (tx_info->linear) { |
244 | pci_unmap_single(mdev->pdev, | 243 | dma_unmap_single(priv->ddev, |
245 | (dma_addr_t) be64_to_cpu(data->addr), | 244 | (dma_addr_t) be64_to_cpu(data->addr), |
246 | be32_to_cpu(data->byte_count), | 245 | be32_to_cpu(data->byte_count), |
247 | PCI_DMA_TODEVICE); | 246 | PCI_DMA_TODEVICE); |
@@ -253,7 +252,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, | |||
253 | if ((void *) data >= end) | 252 | if ((void *) data >= end) |
254 | data = ring->buf; | 253 | data = ring->buf; |
255 | frag = &skb_shinfo(skb)->frags[i]; | 254 | frag = &skb_shinfo(skb)->frags[i]; |
256 | pci_unmap_page(mdev->pdev, | 255 | dma_unmap_page(priv->ddev, |
257 | (dma_addr_t) be64_to_cpu(data->addr), | 256 | (dma_addr_t) be64_to_cpu(data->addr), |
258 | skb_frag_size(frag), PCI_DMA_TODEVICE); | 257 | skb_frag_size(frag), PCI_DMA_TODEVICE); |
259 | ++data; | 258 | ++data; |
@@ -733,7 +732,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) | |||
733 | /* Map fragments */ | 732 | /* Map fragments */ |
734 | for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) { | 733 | for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) { |
735 | frag = &skb_shinfo(skb)->frags[i]; | 734 | frag = &skb_shinfo(skb)->frags[i]; |
736 | dma = skb_frag_dma_map(&mdev->dev->pdev->dev, frag, | 735 | dma = skb_frag_dma_map(priv->ddev, frag, |
737 | 0, skb_frag_size(frag), | 736 | 0, skb_frag_size(frag), |
738 | DMA_TO_DEVICE); | 737 | DMA_TO_DEVICE); |
739 | data->addr = cpu_to_be64(dma); | 738 | data->addr = cpu_to_be64(dma); |
@@ -745,7 +744,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) | |||
745 | 744 | ||
746 | /* Map linear part */ | 745 | /* Map linear part */ |
747 | if (tx_info->linear) { | 746 | if (tx_info->linear) { |
748 | dma = pci_map_single(mdev->dev->pdev, skb->data + lso_header_size, | 747 | dma = dma_map_single(priv->ddev, skb->data + lso_header_size, |
749 | skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE); | 748 | skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE); |
750 | data->addr = cpu_to_be64(dma); | 749 | data->addr = cpu_to_be64(dma); |
751 | data->lkey = cpu_to_be32(mdev->mr.key); | 750 | data->lkey = cpu_to_be32(mdev->mr.key); |
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d60335f3c473..a5bb0b7d339a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | |||
@@ -482,6 +482,7 @@ struct mlx4_en_priv { | |||
482 | struct mlx4_en_stat_out_mbox hw_stats; | 482 | struct mlx4_en_stat_out_mbox hw_stats; |
483 | int vids[128]; | 483 | int vids[128]; |
484 | bool wol; | 484 | bool wol; |
485 | struct device *ddev; | ||
485 | }; | 486 | }; |
486 | 487 | ||
487 | enum mlx4_en_wol { | 488 | enum mlx4_en_wol { |