aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/broadcom/bnx2.c
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-11-15 02:30:05 -0500
committerDavid S. Miller <davem@davemloft.net>2011-11-18 02:04:47 -0500
commitdd2bc8e9c0685d8eaaaf06e65919e31d60478411 (patch)
tree228e68ebf3f4f8012be3f8c48a4172c9201d59dd /drivers/net/ethernet/broadcom/bnx2.c
parentadc9300e78e6091a7eaa1821213836379d4dbaa8 (diff)
bnx2: switch to build_skb() infrastructure
This is very similar to bnx2x conversion, but bnx2 only requires 16bytes alignement at start of the received frame to store its l2_fhdr, so goal was not to reduce skb truesize (in fact it should not change after this patch) Using build_skb() reduces cache line misses in the driver, since we use cache hot skb instead of cold ones. Number of in-flight sk_buff structures is lower, they are more likely recycled in SLUB caches while still hot. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Michael Chan <mchan@broadcom.com> CC: Eilon Greenstein <eilong@broadcom.com> Reviewed-by: Michael Chan <mchan@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/broadcom/bnx2.c')
-rw-r--r--drivers/net/ethernet/broadcom/bnx2.c137
1 files changed, 70 insertions, 67 deletions
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 6b7cd1e80ada..66f6e7f654c3 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -2725,31 +2725,27 @@ bnx2_free_rx_page(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index)
2725} 2725}
2726 2726
2727static inline int 2727static inline int
2728bnx2_alloc_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index, gfp_t gfp) 2728bnx2_alloc_rx_data(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index, gfp_t gfp)
2729{ 2729{
2730 struct sk_buff *skb; 2730 u8 *data;
2731 struct sw_bd *rx_buf = &rxr->rx_buf_ring[index]; 2731 struct sw_bd *rx_buf = &rxr->rx_buf_ring[index];
2732 dma_addr_t mapping; 2732 dma_addr_t mapping;
2733 struct rx_bd *rxbd = &rxr->rx_desc_ring[RX_RING(index)][RX_IDX(index)]; 2733 struct rx_bd *rxbd = &rxr->rx_desc_ring[RX_RING(index)][RX_IDX(index)];
2734 unsigned long align;
2735 2734
2736 skb = __netdev_alloc_skb(bp->dev, bp->rx_buf_size, gfp); 2735 data = kmalloc(bp->rx_buf_size, gfp);
2737 if (skb == NULL) { 2736 if (!data)
2738 return -ENOMEM; 2737 return -ENOMEM;
2739 }
2740 2738
2741 if (unlikely((align = (unsigned long) skb->data & (BNX2_RX_ALIGN - 1)))) 2739 mapping = dma_map_single(&bp->pdev->dev,
2742 skb_reserve(skb, BNX2_RX_ALIGN - align); 2740 get_l2_fhdr(data),
2743 2741 bp->rx_buf_use_size,
2744 mapping = dma_map_single(&bp->pdev->dev, skb->data, bp->rx_buf_use_size,
2745 PCI_DMA_FROMDEVICE); 2742 PCI_DMA_FROMDEVICE);
2746 if (dma_mapping_error(&bp->pdev->dev, mapping)) { 2743 if (dma_mapping_error(&bp->pdev->dev, mapping)) {
2747 dev_kfree_skb(skb); 2744 kfree(data);
2748 return -EIO; 2745 return -EIO;
2749 } 2746 }
2750 2747
2751 rx_buf->skb = skb; 2748 rx_buf->data = data;
2752 rx_buf->desc = (struct l2_fhdr *) skb->data;
2753 dma_unmap_addr_set(rx_buf, mapping, mapping); 2749 dma_unmap_addr_set(rx_buf, mapping, mapping);
2754 2750
2755 rxbd->rx_bd_haddr_hi = (u64) mapping >> 32; 2751 rxbd->rx_bd_haddr_hi = (u64) mapping >> 32;
@@ -2956,8 +2952,8 @@ bnx2_reuse_rx_skb_pages(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
2956} 2952}
2957 2953
2958static inline void 2954static inline void
2959bnx2_reuse_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, 2955bnx2_reuse_rx_data(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
2960 struct sk_buff *skb, u16 cons, u16 prod) 2956 u8 *data, u16 cons, u16 prod)
2961{ 2957{
2962 struct sw_bd *cons_rx_buf, *prod_rx_buf; 2958 struct sw_bd *cons_rx_buf, *prod_rx_buf;
2963 struct rx_bd *cons_bd, *prod_bd; 2959 struct rx_bd *cons_bd, *prod_bd;
@@ -2971,8 +2967,7 @@ bnx2_reuse_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
2971 2967
2972 rxr->rx_prod_bseq += bp->rx_buf_use_size; 2968 rxr->rx_prod_bseq += bp->rx_buf_use_size;
2973 2969
2974 prod_rx_buf->skb = skb; 2970 prod_rx_buf->data = data;
2975 prod_rx_buf->desc = (struct l2_fhdr *) skb->data;
2976 2971
2977 if (cons == prod) 2972 if (cons == prod)
2978 return; 2973 return;
@@ -2986,33 +2981,39 @@ bnx2_reuse_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
2986 prod_bd->rx_bd_haddr_lo = cons_bd->rx_bd_haddr_lo; 2981 prod_bd->rx_bd_haddr_lo = cons_bd->rx_bd_haddr_lo;
2987} 2982}
2988 2983
2989static int 2984static struct sk_buff *
2990bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb, 2985bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u8 *data,
2991 unsigned int len, unsigned int hdr_len, dma_addr_t dma_addr, 2986 unsigned int len, unsigned int hdr_len, dma_addr_t dma_addr,
2992 u32 ring_idx) 2987 u32 ring_idx)
2993{ 2988{
2994 int err; 2989 int err;
2995 u16 prod = ring_idx & 0xffff; 2990 u16 prod = ring_idx & 0xffff;
2991 struct sk_buff *skb;
2996 2992
2997 err = bnx2_alloc_rx_skb(bp, rxr, prod, GFP_ATOMIC); 2993 err = bnx2_alloc_rx_data(bp, rxr, prod, GFP_ATOMIC);
2998 if (unlikely(err)) { 2994 if (unlikely(err)) {
2999 bnx2_reuse_rx_skb(bp, rxr, skb, (u16) (ring_idx >> 16), prod); 2995 bnx2_reuse_rx_data(bp, rxr, data, (u16) (ring_idx >> 16), prod);
2996error:
3000 if (hdr_len) { 2997 if (hdr_len) {
3001 unsigned int raw_len = len + 4; 2998 unsigned int raw_len = len + 4;
3002 int pages = PAGE_ALIGN(raw_len - hdr_len) >> PAGE_SHIFT; 2999 int pages = PAGE_ALIGN(raw_len - hdr_len) >> PAGE_SHIFT;
3003 3000
3004 bnx2_reuse_rx_skb_pages(bp, rxr, NULL, pages); 3001 bnx2_reuse_rx_skb_pages(bp, rxr, NULL, pages);
3005 } 3002 }
3006 return err; 3003 return NULL;
3007 } 3004 }
3008 3005
3009 skb_reserve(skb, BNX2_RX_OFFSET);
3010 dma_unmap_single(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size, 3006 dma_unmap_single(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size,
3011 PCI_DMA_FROMDEVICE); 3007 PCI_DMA_FROMDEVICE);
3012 3008 skb = build_skb(data);
3009 if (!skb) {
3010 kfree(data);
3011 goto error;
3012 }
3013 skb_reserve(skb, ((u8 *)get_l2_fhdr(data) - data) + BNX2_RX_OFFSET);
3013 if (hdr_len == 0) { 3014 if (hdr_len == 0) {
3014 skb_put(skb, len); 3015 skb_put(skb, len);
3015 return 0; 3016 return skb;
3016 } else { 3017 } else {
3017 unsigned int i, frag_len, frag_size, pages; 3018 unsigned int i, frag_len, frag_size, pages;
3018 struct sw_pg *rx_pg; 3019 struct sw_pg *rx_pg;
@@ -3043,7 +3044,7 @@ bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb,
3043 skb_frag_size_sub(frag, tail); 3044 skb_frag_size_sub(frag, tail);
3044 skb->data_len -= tail; 3045 skb->data_len -= tail;
3045 } 3046 }
3046 return 0; 3047 return skb;
3047 } 3048 }
3048 rx_pg = &rxr->rx_pg_ring[pg_cons]; 3049 rx_pg = &rxr->rx_pg_ring[pg_cons];
3049 3050
@@ -3065,7 +3066,7 @@ bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb,
3065 rxr->rx_pg_prod = pg_prod; 3066 rxr->rx_pg_prod = pg_prod;
3066 bnx2_reuse_rx_skb_pages(bp, rxr, skb, 3067 bnx2_reuse_rx_skb_pages(bp, rxr, skb,
3067 pages - i); 3068 pages - i);
3068 return err; 3069 return NULL;
3069 } 3070 }
3070 3071
3071 dma_unmap_page(&bp->pdev->dev, mapping_old, 3072 dma_unmap_page(&bp->pdev->dev, mapping_old,
@@ -3082,7 +3083,7 @@ bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb,
3082 rxr->rx_pg_prod = pg_prod; 3083 rxr->rx_pg_prod = pg_prod;
3083 rxr->rx_pg_cons = pg_cons; 3084 rxr->rx_pg_cons = pg_cons;
3084 } 3085 }
3085 return 0; 3086 return skb;
3086} 3087}
3087 3088
3088static inline u16 3089static inline u16
@@ -3121,19 +3122,17 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
3121 struct sw_bd *rx_buf, *next_rx_buf; 3122 struct sw_bd *rx_buf, *next_rx_buf;
3122 struct sk_buff *skb; 3123 struct sk_buff *skb;
3123 dma_addr_t dma_addr; 3124 dma_addr_t dma_addr;
3125 u8 *data;
3124 3126
3125 sw_ring_cons = RX_RING_IDX(sw_cons); 3127 sw_ring_cons = RX_RING_IDX(sw_cons);
3126 sw_ring_prod = RX_RING_IDX(sw_prod); 3128 sw_ring_prod = RX_RING_IDX(sw_prod);
3127 3129
3128 rx_buf = &rxr->rx_buf_ring[sw_ring_cons]; 3130 rx_buf = &rxr->rx_buf_ring[sw_ring_cons];
3129 skb = rx_buf->skb; 3131 data = rx_buf->data;
3130 prefetchw(skb); 3132 rx_buf->data = NULL;
3131 3133
3132 next_rx_buf = 3134 rx_hdr = get_l2_fhdr(data);
3133 &rxr->rx_buf_ring[RX_RING_IDX(NEXT_RX_BD(sw_cons))]; 3135 prefetch(rx_hdr);
3134 prefetch(next_rx_buf->desc);
3135
3136 rx_buf->skb = NULL;
3137 3136
3138 dma_addr = dma_unmap_addr(rx_buf, mapping); 3137 dma_addr = dma_unmap_addr(rx_buf, mapping);
3139 3138
@@ -3141,7 +3140,10 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
3141 BNX2_RX_OFFSET + BNX2_RX_COPY_THRESH, 3140 BNX2_RX_OFFSET + BNX2_RX_COPY_THRESH,
3142 PCI_DMA_FROMDEVICE); 3141 PCI_DMA_FROMDEVICE);
3143 3142
3144 rx_hdr = rx_buf->desc; 3143 next_rx_buf =
3144 &rxr->rx_buf_ring[RX_RING_IDX(NEXT_RX_BD(sw_cons))];
3145 prefetch(get_l2_fhdr(next_rx_buf->data));
3146
3145 len = rx_hdr->l2_fhdr_pkt_len; 3147 len = rx_hdr->l2_fhdr_pkt_len;
3146 status = rx_hdr->l2_fhdr_status; 3148 status = rx_hdr->l2_fhdr_status;
3147 3149
@@ -3160,7 +3162,7 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
3160 L2_FHDR_ERRORS_TOO_SHORT | 3162 L2_FHDR_ERRORS_TOO_SHORT |
3161 L2_FHDR_ERRORS_GIANT_FRAME))) { 3163 L2_FHDR_ERRORS_GIANT_FRAME))) {
3162 3164
3163 bnx2_reuse_rx_skb(bp, rxr, skb, sw_ring_cons, 3165 bnx2_reuse_rx_data(bp, rxr, data, sw_ring_cons,
3164 sw_ring_prod); 3166 sw_ring_prod);
3165 if (pg_ring_used) { 3167 if (pg_ring_used) {
3166 int pages; 3168 int pages;
@@ -3175,30 +3177,29 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
3175 len -= 4; 3177 len -= 4;
3176 3178
3177 if (len <= bp->rx_copy_thresh) { 3179 if (len <= bp->rx_copy_thresh) {
3178 struct sk_buff *new_skb; 3180 skb = netdev_alloc_skb(bp->dev, len + 6);
3179 3181 if (skb == NULL) {
3180 new_skb = netdev_alloc_skb(bp->dev, len + 6); 3182 bnx2_reuse_rx_data(bp, rxr, data, sw_ring_cons,
3181 if (new_skb == NULL) {
3182 bnx2_reuse_rx_skb(bp, rxr, skb, sw_ring_cons,
3183 sw_ring_prod); 3183 sw_ring_prod);
3184 goto next_rx; 3184 goto next_rx;
3185 } 3185 }
3186 3186
3187 /* aligned copy */ 3187 /* aligned copy */
3188 skb_copy_from_linear_data_offset(skb, 3188 memcpy(skb->data,
3189 BNX2_RX_OFFSET - 6, 3189 (u8 *)rx_hdr + BNX2_RX_OFFSET - 6,
3190 new_skb->data, len + 6); 3190 len + 6);
3191 skb_reserve(new_skb, 6); 3191 skb_reserve(skb, 6);
3192 skb_put(new_skb, len); 3192 skb_put(skb, len);
3193 3193
3194 bnx2_reuse_rx_skb(bp, rxr, skb, 3194 bnx2_reuse_rx_data(bp, rxr, data,
3195 sw_ring_cons, sw_ring_prod); 3195 sw_ring_cons, sw_ring_prod);
3196 3196
3197 skb = new_skb; 3197 } else {
3198 } else if (unlikely(bnx2_rx_skb(bp, rxr, skb, len, hdr_len, 3198 skb = bnx2_rx_skb(bp, rxr, data, len, hdr_len, dma_addr,
3199 dma_addr, (sw_ring_cons << 16) | sw_ring_prod))) 3199 (sw_ring_cons << 16) | sw_ring_prod);
3200 goto next_rx; 3200 if (!skb)
3201 3201 goto next_rx;
3202 }
3202 if ((status & L2_FHDR_STATUS_L2_VLAN_TAG) && 3203 if ((status & L2_FHDR_STATUS_L2_VLAN_TAG) &&
3203 !(bp->rx_mode & BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG)) 3204 !(bp->rx_mode & BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG))
3204 __vlan_hwaccel_put_tag(skb, rx_hdr->l2_fhdr_vlan_tag); 3205 __vlan_hwaccel_put_tag(skb, rx_hdr->l2_fhdr_vlan_tag);
@@ -5225,7 +5226,7 @@ bnx2_init_rx_ring(struct bnx2 *bp, int ring_num)
5225 5226
5226 ring_prod = prod = rxr->rx_prod; 5227 ring_prod = prod = rxr->rx_prod;
5227 for (i = 0; i < bp->rx_ring_size; i++) { 5228 for (i = 0; i < bp->rx_ring_size; i++) {
5228 if (bnx2_alloc_rx_skb(bp, rxr, ring_prod, GFP_KERNEL) < 0) { 5229 if (bnx2_alloc_rx_data(bp, rxr, ring_prod, GFP_KERNEL) < 0) {
5229 netdev_warn(bp->dev, "init'ed rx ring %d with %d/%d skbs only\n", 5230 netdev_warn(bp->dev, "init'ed rx ring %d with %d/%d skbs only\n",
5230 ring_num, i, bp->rx_ring_size); 5231 ring_num, i, bp->rx_ring_size);
5231 break; 5232 break;
@@ -5320,7 +5321,7 @@ bnx2_set_rx_ring_size(struct bnx2 *bp, u32 size)
5320 rx_size = bp->dev->mtu + ETH_HLEN + BNX2_RX_OFFSET + 8; 5321 rx_size = bp->dev->mtu + ETH_HLEN + BNX2_RX_OFFSET + 8;
5321 5322
5322 rx_space = SKB_DATA_ALIGN(rx_size + BNX2_RX_ALIGN) + NET_SKB_PAD + 5323 rx_space = SKB_DATA_ALIGN(rx_size + BNX2_RX_ALIGN) + NET_SKB_PAD +
5323 sizeof(struct skb_shared_info); 5324 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
5324 5325
5325 bp->rx_copy_thresh = BNX2_RX_COPY_THRESH; 5326 bp->rx_copy_thresh = BNX2_RX_COPY_THRESH;
5326 bp->rx_pg_ring_size = 0; 5327 bp->rx_pg_ring_size = 0;
@@ -5342,8 +5343,9 @@ bnx2_set_rx_ring_size(struct bnx2 *bp, u32 size)
5342 } 5343 }
5343 5344
5344 bp->rx_buf_use_size = rx_size; 5345 bp->rx_buf_use_size = rx_size;
5345 /* hw alignment */ 5346 /* hw alignment + build_skb() overhead*/
5346 bp->rx_buf_size = bp->rx_buf_use_size + BNX2_RX_ALIGN; 5347 bp->rx_buf_size = SKB_DATA_ALIGN(bp->rx_buf_use_size + BNX2_RX_ALIGN) +
5348 NET_SKB_PAD + SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
5347 bp->rx_jumbo_thresh = rx_size - BNX2_RX_OFFSET; 5349 bp->rx_jumbo_thresh = rx_size - BNX2_RX_OFFSET;
5348 bp->rx_ring_size = size; 5350 bp->rx_ring_size = size;
5349 bp->rx_max_ring = bnx2_find_max_ring(size, MAX_RX_RINGS); 5351 bp->rx_max_ring = bnx2_find_max_ring(size, MAX_RX_RINGS);
@@ -5409,9 +5411,9 @@ bnx2_free_rx_skbs(struct bnx2 *bp)
5409 5411
5410 for (j = 0; j < bp->rx_max_ring_idx; j++) { 5412 for (j = 0; j < bp->rx_max_ring_idx; j++) {
5411 struct sw_bd *rx_buf = &rxr->rx_buf_ring[j]; 5413 struct sw_bd *rx_buf = &rxr->rx_buf_ring[j];
5412 struct sk_buff *skb = rx_buf->skb; 5414 u8 *data = rx_buf->data;
5413 5415
5414 if (skb == NULL) 5416 if (data == NULL)
5415 continue; 5417 continue;
5416 5418
5417 dma_unmap_single(&bp->pdev->dev, 5419 dma_unmap_single(&bp->pdev->dev,
@@ -5419,9 +5421,9 @@ bnx2_free_rx_skbs(struct bnx2 *bp)
5419 bp->rx_buf_use_size, 5421 bp->rx_buf_use_size,
5420 PCI_DMA_FROMDEVICE); 5422 PCI_DMA_FROMDEVICE);
5421 5423
5422 rx_buf->skb = NULL; 5424 rx_buf->data = NULL;
5423 5425
5424 dev_kfree_skb(skb); 5426 kfree(data);
5425 } 5427 }
5426 for (j = 0; j < bp->rx_max_pg_ring_idx; j++) 5428 for (j = 0; j < bp->rx_max_pg_ring_idx; j++)
5427 bnx2_free_rx_page(bp, rxr, j); 5429 bnx2_free_rx_page(bp, rxr, j);
@@ -5727,7 +5729,8 @@ static int
5727bnx2_run_loopback(struct bnx2 *bp, int loopback_mode) 5729bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
5728{ 5730{
5729 unsigned int pkt_size, num_pkts, i; 5731 unsigned int pkt_size, num_pkts, i;
5730 struct sk_buff *skb, *rx_skb; 5732 struct sk_buff *skb;
5733 u8 *data;
5731 unsigned char *packet; 5734 unsigned char *packet;
5732 u16 rx_start_idx, rx_idx; 5735 u16 rx_start_idx, rx_idx;
5733 dma_addr_t map; 5736 dma_addr_t map;
@@ -5819,14 +5822,14 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
5819 } 5822 }
5820 5823
5821 rx_buf = &rxr->rx_buf_ring[rx_start_idx]; 5824 rx_buf = &rxr->rx_buf_ring[rx_start_idx];
5822 rx_skb = rx_buf->skb; 5825 data = rx_buf->data;
5823 5826
5824 rx_hdr = rx_buf->desc; 5827 rx_hdr = get_l2_fhdr(data);
5825 skb_reserve(rx_skb, BNX2_RX_OFFSET); 5828 data = (u8 *)rx_hdr + BNX2_RX_OFFSET;
5826 5829
5827 dma_sync_single_for_cpu(&bp->pdev->dev, 5830 dma_sync_single_for_cpu(&bp->pdev->dev,
5828 dma_unmap_addr(rx_buf, mapping), 5831 dma_unmap_addr(rx_buf, mapping),
5829 bp->rx_buf_size, PCI_DMA_FROMDEVICE); 5832 bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
5830 5833
5831 if (rx_hdr->l2_fhdr_status & 5834 if (rx_hdr->l2_fhdr_status &
5832 (L2_FHDR_ERRORS_BAD_CRC | 5835 (L2_FHDR_ERRORS_BAD_CRC |
@@ -5843,7 +5846,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
5843 } 5846 }
5844 5847
5845 for (i = 14; i < pkt_size; i++) { 5848 for (i = 14; i < pkt_size; i++) {
5846 if (*(rx_skb->data + i) != (unsigned char) (i & 0xff)) { 5849 if (*(data + i) != (unsigned char) (i & 0xff)) {
5847 goto loopback_test_done; 5850 goto loopback_test_done;
5848 } 5851 }
5849 } 5852 }