diff options
author | Michael Chan <mchan@broadcom.com> | 2010-05-06 04:58:13 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-07 01:17:24 -0400 |
commit | a33fa66bcf365ffe5b79d1ae1d3582cc261ae56e (patch) | |
tree | cf714757a5c3644bd8154a4da0cea4eb53a80032 /drivers/net/bnx2.c | |
parent | c67938a9e071fa51c91ed17a14382e128368d115 (diff) |
bnx2: Add prefetches to rx path.
Add prefetches of the skb and the next rx descriptor to speed up rx path.
Use prefetchw() for the skb [suggested by Eric Dumazet].
The rx descriptor is in skb->data which is mapped for streaming mode DMA.
Eric Dumazet pointed out that we should not prefetch the data before
dma_sync. So we prefetch only if dma_sync is no_op on the system.
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/bnx2.c')
-rw-r--r-- | drivers/net/bnx2.c | 16 |
1 files changed, 13 insertions, 3 deletions
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 320526b9b467..667f4196dc29 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c | |||
@@ -2719,6 +2719,7 @@ bnx2_alloc_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index) | |||
2719 | } | 2719 | } |
2720 | 2720 | ||
2721 | rx_buf->skb = skb; | 2721 | rx_buf->skb = skb; |
2722 | rx_buf->desc = (struct l2_fhdr *) skb->data; | ||
2722 | dma_unmap_addr_set(rx_buf, mapping, mapping); | 2723 | dma_unmap_addr_set(rx_buf, mapping, mapping); |
2723 | 2724 | ||
2724 | rxbd->rx_bd_haddr_hi = (u64) mapping >> 32; | 2725 | rxbd->rx_bd_haddr_hi = (u64) mapping >> 32; |
@@ -2941,6 +2942,7 @@ bnx2_reuse_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, | |||
2941 | rxr->rx_prod_bseq += bp->rx_buf_use_size; | 2942 | rxr->rx_prod_bseq += bp->rx_buf_use_size; |
2942 | 2943 | ||
2943 | prod_rx_buf->skb = skb; | 2944 | prod_rx_buf->skb = skb; |
2945 | prod_rx_buf->desc = (struct l2_fhdr *) skb->data; | ||
2944 | 2946 | ||
2945 | if (cons == prod) | 2947 | if (cons == prod) |
2946 | return; | 2948 | return; |
@@ -3074,6 +3076,7 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget) | |||
3074 | u16 hw_cons, sw_cons, sw_ring_cons, sw_prod, sw_ring_prod; | 3076 | u16 hw_cons, sw_cons, sw_ring_cons, sw_prod, sw_ring_prod; |
3075 | struct l2_fhdr *rx_hdr; | 3077 | struct l2_fhdr *rx_hdr; |
3076 | int rx_pkt = 0, pg_ring_used = 0; | 3078 | int rx_pkt = 0, pg_ring_used = 0; |
3079 | struct pci_dev *pdev = bp->pdev; | ||
3077 | 3080 | ||
3078 | hw_cons = bnx2_get_hw_rx_cons(bnapi); | 3081 | hw_cons = bnx2_get_hw_rx_cons(bnapi); |
3079 | sw_cons = rxr->rx_cons; | 3082 | sw_cons = rxr->rx_cons; |
@@ -3086,7 +3089,7 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget) | |||
3086 | while (sw_cons != hw_cons) { | 3089 | while (sw_cons != hw_cons) { |
3087 | unsigned int len, hdr_len; | 3090 | unsigned int len, hdr_len; |
3088 | u32 status; | 3091 | u32 status; |
3089 | struct sw_bd *rx_buf; | 3092 | struct sw_bd *rx_buf, *next_rx_buf; |
3090 | struct sk_buff *skb; | 3093 | struct sk_buff *skb; |
3091 | dma_addr_t dma_addr; | 3094 | dma_addr_t dma_addr; |
3092 | u16 vtag = 0; | 3095 | u16 vtag = 0; |
@@ -3097,7 +3100,14 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget) | |||
3097 | 3100 | ||
3098 | rx_buf = &rxr->rx_buf_ring[sw_ring_cons]; | 3101 | rx_buf = &rxr->rx_buf_ring[sw_ring_cons]; |
3099 | skb = rx_buf->skb; | 3102 | skb = rx_buf->skb; |
3103 | prefetchw(skb); | ||
3100 | 3104 | ||
3105 | if (!get_dma_ops(&pdev->dev)->sync_single_for_cpu) { | ||
3106 | next_rx_buf = | ||
3107 | &rxr->rx_buf_ring[ | ||
3108 | RX_RING_IDX(NEXT_RX_BD(sw_cons))]; | ||
3109 | prefetch(next_rx_buf->desc); | ||
3110 | } | ||
3101 | rx_buf->skb = NULL; | 3111 | rx_buf->skb = NULL; |
3102 | 3112 | ||
3103 | dma_addr = dma_unmap_addr(rx_buf, mapping); | 3113 | dma_addr = dma_unmap_addr(rx_buf, mapping); |
@@ -3106,7 +3116,7 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget) | |||
3106 | BNX2_RX_OFFSET + BNX2_RX_COPY_THRESH, | 3116 | BNX2_RX_OFFSET + BNX2_RX_COPY_THRESH, |
3107 | PCI_DMA_FROMDEVICE); | 3117 | PCI_DMA_FROMDEVICE); |
3108 | 3118 | ||
3109 | rx_hdr = (struct l2_fhdr *) skb->data; | 3119 | rx_hdr = rx_buf->desc; |
3110 | len = rx_hdr->l2_fhdr_pkt_len; | 3120 | len = rx_hdr->l2_fhdr_pkt_len; |
3111 | status = rx_hdr->l2_fhdr_status; | 3121 | status = rx_hdr->l2_fhdr_status; |
3112 | 3122 | ||
@@ -5764,7 +5774,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode) | |||
5764 | rx_buf = &rxr->rx_buf_ring[rx_start_idx]; | 5774 | rx_buf = &rxr->rx_buf_ring[rx_start_idx]; |
5765 | rx_skb = rx_buf->skb; | 5775 | rx_skb = rx_buf->skb; |
5766 | 5776 | ||
5767 | rx_hdr = (struct l2_fhdr *) rx_skb->data; | 5777 | rx_hdr = rx_buf->desc; |
5768 | skb_reserve(rx_skb, BNX2_RX_OFFSET); | 5778 | skb_reserve(rx_skb, BNX2_RX_OFFSET); |
5769 | 5779 | ||
5770 | pci_dma_sync_single_for_cpu(bp->pdev, | 5780 | pci_dma_sync_single_for_cpu(bp->pdev, |