aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2009-04-16 05:02:07 -0400
committerDavid S. Miller <davem@davemloft.net>2009-04-16 05:02:07 -0400
commit76620aafd66f0004829764940c5466144969cffc (patch)
tree38041e6938121b5611546c582cd23f289db047b0
parent861ab44059350e5cab350238606cf8814abab93b (diff)
gro: New frags interface to avoid copying shinfo
It turns out that copying a 16-byte area at ~800k times a second can be really expensive :) This patch redesigns the frags GRO interface to avoid copying that area twice. The two disciples of the frags interface have been converted. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/cxgb3/adapter.h2
-rw-r--r--drivers/net/cxgb3/sge.c53
-rw-r--r--drivers/net/sfc/rx.c26
-rw-r--r--include/linux/if_vlan.h6
-rw-r--r--include/linux/netdevice.h22
-rw-r--r--net/8021q/vlan_core.c4
-rw-r--r--net/core/dev.c81
7 files changed, 101 insertions, 93 deletions
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 714df2b675e6..322434ac42fc 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -195,7 +195,7 @@ struct sge_qset { /* an SGE queue set */
195 struct sge_rspq rspq; 195 struct sge_rspq rspq;
196 struct sge_fl fl[SGE_RXQ_PER_SET]; 196 struct sge_fl fl[SGE_RXQ_PER_SET];
197 struct sge_txq txq[SGE_TXQ_PER_SET]; 197 struct sge_txq txq[SGE_TXQ_PER_SET];
198 struct napi_gro_fraginfo lro_frag_tbl; 198 int nomem;
199 int lro_enabled; 199 int lro_enabled;
200 void *lro_va; 200 void *lro_va;
201 struct net_device *netdev; 201 struct net_device *netdev;
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 26d3587f3399..73d569e758ec 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -654,7 +654,8 @@ static void t3_reset_qset(struct sge_qset *q)
654 q->txq_stopped = 0; 654 q->txq_stopped = 0;
655 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */ 655 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
656 q->rx_reclaim_timer.function = NULL; 656 q->rx_reclaim_timer.function = NULL;
657 q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0; 657 q->nomem = 0;
658 napi_free_frags(&q->napi);
658} 659}
659 660
660 661
@@ -2074,20 +2075,19 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2074 struct sge_fl *fl, int len, int complete) 2075 struct sge_fl *fl, int len, int complete)
2075{ 2076{
2076 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2077 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2078 struct sk_buff *skb = NULL;
2077 struct cpl_rx_pkt *cpl; 2079 struct cpl_rx_pkt *cpl;
2078 struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags; 2080 struct skb_frag_struct *rx_frag;
2079 int nr_frags = qs->lro_frag_tbl.nr_frags; 2081 int nr_frags;
2080 int frag_len = qs->lro_frag_tbl.len;
2081 int offset = 0; 2082 int offset = 0;
2082 2083
2083 if (!nr_frags) { 2084 if (!qs->nomem) {
2084 offset = 2 + sizeof(struct cpl_rx_pkt); 2085 skb = napi_get_frags(&qs->napi);
2085 qs->lro_va = cpl = sd->pg_chunk.va + 2; 2086 qs->nomem = !skb;
2086 } 2087 }
2087 2088
2088 fl->credits--; 2089 fl->credits--;
2089 2090
2090 len -= offset;
2091 pci_dma_sync_single_for_cpu(adap->pdev, 2091 pci_dma_sync_single_for_cpu(adap->pdev,
2092 pci_unmap_addr(sd, dma_addr), 2092 pci_unmap_addr(sd, dma_addr),
2093 fl->buf_size - SGE_PG_RSVD, 2093 fl->buf_size - SGE_PG_RSVD,
@@ -2100,21 +2100,38 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2100 fl->alloc_size, 2100 fl->alloc_size,
2101 PCI_DMA_FROMDEVICE); 2101 PCI_DMA_FROMDEVICE);
2102 2102
2103 if (!skb) {
2104 put_page(sd->pg_chunk.page);
2105 if (complete)
2106 qs->nomem = 0;
2107 return;
2108 }
2109
2110 rx_frag = skb_shinfo(skb)->frags;
2111 nr_frags = skb_shinfo(skb)->nr_frags;
2112
2113 if (!nr_frags) {
2114 offset = 2 + sizeof(struct cpl_rx_pkt);
2115 qs->lro_va = sd->pg_chunk.va + 2;
2116 }
2117 len -= offset;
2118
2103 prefetch(qs->lro_va); 2119 prefetch(qs->lro_va);
2104 2120
2105 rx_frag += nr_frags; 2121 rx_frag += nr_frags;
2106 rx_frag->page = sd->pg_chunk.page; 2122 rx_frag->page = sd->pg_chunk.page;
2107 rx_frag->page_offset = sd->pg_chunk.offset + offset; 2123 rx_frag->page_offset = sd->pg_chunk.offset + offset;
2108 rx_frag->size = len; 2124 rx_frag->size = len;
2109 frag_len += len;
2110 qs->lro_frag_tbl.nr_frags++;
2111 qs->lro_frag_tbl.len = frag_len;
2112 2125
2126 skb->len += len;
2127 skb->data_len += len;
2128 skb->truesize += len;
2129 skb_shinfo(skb)->nr_frags++;
2113 2130
2114 if (!complete) 2131 if (!complete)
2115 return; 2132 return;
2116 2133
2117 qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY; 2134 skb->ip_summed = CHECKSUM_UNNECESSARY;
2118 cpl = qs->lro_va; 2135 cpl = qs->lro_va;
2119 2136
2120 if (unlikely(cpl->vlan_valid)) { 2137 if (unlikely(cpl->vlan_valid)) {
@@ -2123,15 +2140,11 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2123 struct vlan_group *grp = pi->vlan_grp; 2140 struct vlan_group *grp = pi->vlan_grp;
2124 2141
2125 if (likely(grp != NULL)) { 2142 if (likely(grp != NULL)) {
2126 vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan), 2143 vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan));
2127 &qs->lro_frag_tbl); 2144 return;
2128 goto out;
2129 } 2145 }
2130 } 2146 }
2131 napi_gro_frags(&qs->napi, &qs->lro_frag_tbl); 2147 napi_gro_frags(&qs->napi);
2132
2133out:
2134 qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0;
2135} 2148}
2136 2149
2137/** 2150/**
@@ -2300,8 +2313,6 @@ no_mem:
2300 if (fl->use_pages) { 2313 if (fl->use_pages) {
2301 void *addr = fl->sdesc[fl->cidx].pg_chunk.va; 2314 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2302 2315
2303 prefetch(&qs->lro_frag_tbl);
2304
2305 prefetch(addr); 2316 prefetch(addr);
2306#if L1_CACHE_BYTES < 128 2317#if L1_CACHE_BYTES < 128
2307 prefetch(addr + L1_CACHE_BYTES); 2318 prefetch(addr + L1_CACHE_BYTES);
diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c
index 66d7fe3db3e6..01f9432c31ef 100644
--- a/drivers/net/sfc/rx.c
+++ b/drivers/net/sfc/rx.c
@@ -450,17 +450,27 @@ static void efx_rx_packet_lro(struct efx_channel *channel,
450 450
451 /* Pass the skb/page into the LRO engine */ 451 /* Pass the skb/page into the LRO engine */
452 if (rx_buf->page) { 452 if (rx_buf->page) {
453 struct napi_gro_fraginfo info; 453 struct sk_buff *skb = napi_get_frags(napi);
454 454
455 info.frags[0].page = rx_buf->page; 455 if (!skb) {
456 info.frags[0].page_offset = efx_rx_buf_offset(rx_buf); 456 put_page(rx_buf->page);
457 info.frags[0].size = rx_buf->len; 457 goto out;
458 info.nr_frags = 1; 458 }
459 info.ip_summed = CHECKSUM_UNNECESSARY; 459
460 info.len = rx_buf->len; 460 skb_shinfo(skb)->frags[0].page = rx_buf->page;
461 skb_shinfo(skb)->frags[0].page_offset =
462 efx_rx_buf_offset(rx_buf);
463 skb_shinfo(skb)->frags[0].size = rx_buf->len;
464 skb_shinfo(skb)->nr_frags = 1;
465
466 skb->len = rx_buf->len;
467 skb->data_len = rx_buf->len;
468 skb->truesize += rx_buf->len;
469 skb->ip_summed = CHECKSUM_UNNECESSARY;
461 470
462 napi_gro_frags(napi, &info); 471 napi_gro_frags(napi);
463 472
473out:
464 EFX_BUG_ON_PARANOID(rx_buf->skb); 474 EFX_BUG_ON_PARANOID(rx_buf->skb);
465 rx_buf->page = NULL; 475 rx_buf->page = NULL;
466 } else { 476 } else {
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index e1ff5b14310e..7ff9af1d0f05 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -118,8 +118,7 @@ extern int vlan_hwaccel_do_receive(struct sk_buff *skb);
118extern int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, 118extern int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
119 unsigned int vlan_tci, struct sk_buff *skb); 119 unsigned int vlan_tci, struct sk_buff *skb);
120extern int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, 120extern int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
121 unsigned int vlan_tci, 121 unsigned int vlan_tci);
122 struct napi_gro_fraginfo *info);
123 122
124#else 123#else
125static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) 124static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev)
@@ -154,8 +153,7 @@ static inline int vlan_gro_receive(struct napi_struct *napi,
154} 153}
155 154
156static inline int vlan_gro_frags(struct napi_struct *napi, 155static inline int vlan_gro_frags(struct napi_struct *napi,
157 struct vlan_group *grp, unsigned int vlan_tci, 156 struct vlan_group *grp, unsigned int vlan_tci)
158 struct napi_gro_fraginfo *info)
159{ 157{
160 return NET_RX_DROP; 158 return NET_RX_DROP;
161} 159}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2e7783f4a755..54db3ebf2193 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1047,14 +1047,6 @@ struct packet_type {
1047 struct list_head list; 1047 struct list_head list;
1048}; 1048};
1049 1049
1050struct napi_gro_fraginfo {
1051 skb_frag_t frags[MAX_SKB_FRAGS];
1052 unsigned int nr_frags;
1053 unsigned int ip_summed;
1054 unsigned int len;
1055 __wsum csum;
1056};
1057
1058#include <linux/interrupt.h> 1050#include <linux/interrupt.h>
1059#include <linux/notifier.h> 1051#include <linux/notifier.h>
1060 1052
@@ -1442,12 +1434,18 @@ extern int napi_gro_receive(struct napi_struct *napi,
1442 struct sk_buff *skb); 1434 struct sk_buff *skb);
1443extern void napi_reuse_skb(struct napi_struct *napi, 1435extern void napi_reuse_skb(struct napi_struct *napi,
1444 struct sk_buff *skb); 1436 struct sk_buff *skb);
1445extern struct sk_buff * napi_fraginfo_skb(struct napi_struct *napi, 1437extern struct sk_buff * napi_get_frags(struct napi_struct *napi);
1446 struct napi_gro_fraginfo *info);
1447extern int napi_frags_finish(struct napi_struct *napi, 1438extern int napi_frags_finish(struct napi_struct *napi,
1448 struct sk_buff *skb, int ret); 1439 struct sk_buff *skb, int ret);
1449extern int napi_gro_frags(struct napi_struct *napi, 1440extern struct sk_buff * napi_frags_skb(struct napi_struct *napi);
1450 struct napi_gro_fraginfo *info); 1441extern int napi_gro_frags(struct napi_struct *napi);
1442
1443static inline void napi_free_frags(struct napi_struct *napi)
1444{
1445 kfree_skb(napi->skb);
1446 napi->skb = NULL;
1447}
1448
1451extern void netif_nit_deliver(struct sk_buff *skb); 1449extern void netif_nit_deliver(struct sk_buff *skb);
1452extern int dev_valid_name(const char *name); 1450extern int dev_valid_name(const char *name);
1453extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); 1451extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 654e45f5719d..c1f51e4a01b2 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -114,9 +114,9 @@ int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
114EXPORT_SYMBOL(vlan_gro_receive); 114EXPORT_SYMBOL(vlan_gro_receive);
115 115
116int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, 116int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
117 unsigned int vlan_tci, struct napi_gro_fraginfo *info) 117 unsigned int vlan_tci)
118{ 118{
119 struct sk_buff *skb = napi_fraginfo_skb(napi, info); 119 struct sk_buff *skb = napi_frags_skb(napi);
120 120
121 if (!skb) 121 if (!skb)
122 return NET_RX_DROP; 122 return NET_RX_DROP;
diff --git a/net/core/dev.c b/net/core/dev.c
index 91d792d17e09..619fa141b8f5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2519,16 +2519,10 @@ void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2519} 2519}
2520EXPORT_SYMBOL(napi_reuse_skb); 2520EXPORT_SYMBOL(napi_reuse_skb);
2521 2521
2522struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, 2522struct sk_buff *napi_get_frags(struct napi_struct *napi)
2523 struct napi_gro_fraginfo *info)
2524{ 2523{
2525 struct net_device *dev = napi->dev; 2524 struct net_device *dev = napi->dev;
2526 struct sk_buff *skb = napi->skb; 2525 struct sk_buff *skb = napi->skb;
2527 struct ethhdr *eth;
2528 skb_frag_t *frag;
2529 int i;
2530
2531 napi->skb = NULL;
2532 2526
2533 if (!skb) { 2527 if (!skb) {
2534 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); 2528 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
@@ -2536,47 +2530,14 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
2536 goto out; 2530 goto out;
2537 2531
2538 skb_reserve(skb, NET_IP_ALIGN); 2532 skb_reserve(skb, NET_IP_ALIGN);
2539 }
2540
2541 BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
2542 frag = &info->frags[info->nr_frags - 1];
2543 2533
2544 for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) { 2534 napi->skb = skb;
2545 skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
2546 frag->size);
2547 frag++;
2548 } 2535 }
2549 skb_shinfo(skb)->nr_frags = info->nr_frags;
2550
2551 skb->data_len = info->len;
2552 skb->len += info->len;
2553 skb->truesize += info->len;
2554
2555 skb_reset_mac_header(skb);
2556 skb_gro_reset_offset(skb);
2557
2558 eth = skb_gro_header(skb, sizeof(*eth));
2559 if (!eth) {
2560 napi_reuse_skb(napi, skb);
2561 skb = NULL;
2562 goto out;
2563 }
2564
2565 skb_gro_pull(skb, sizeof(*eth));
2566
2567 /*
2568 * This works because the only protocols we care about don't require
2569 * special handling. We'll fix it up properly at the end.
2570 */
2571 skb->protocol = eth->h_proto;
2572
2573 skb->ip_summed = info->ip_summed;
2574 skb->csum = info->csum;
2575 2536
2576out: 2537out:
2577 return skb; 2538 return skb;
2578} 2539}
2579EXPORT_SYMBOL(napi_fraginfo_skb); 2540EXPORT_SYMBOL(napi_get_frags);
2580 2541
2581int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) 2542int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2582{ 2543{
@@ -2606,9 +2567,39 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2606} 2567}
2607EXPORT_SYMBOL(napi_frags_finish); 2568EXPORT_SYMBOL(napi_frags_finish);
2608 2569
2609int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) 2570struct sk_buff *napi_frags_skb(struct napi_struct *napi)
2571{
2572 struct sk_buff *skb = napi->skb;
2573 struct ethhdr *eth;
2574
2575 napi->skb = NULL;
2576
2577 skb_reset_mac_header(skb);
2578 skb_gro_reset_offset(skb);
2579
2580 eth = skb_gro_header(skb, sizeof(*eth));
2581 if (!eth) {
2582 napi_reuse_skb(napi, skb);
2583 skb = NULL;
2584 goto out;
2585 }
2586
2587 skb_gro_pull(skb, sizeof(*eth));
2588
2589 /*
2590 * This works because the only protocols we care about don't require
2591 * special handling. We'll fix it up properly at the end.
2592 */
2593 skb->protocol = eth->h_proto;
2594
2595out:
2596 return skb;
2597}
2598EXPORT_SYMBOL(napi_frags_skb);
2599
2600int napi_gro_frags(struct napi_struct *napi)
2610{ 2601{
2611 struct sk_buff *skb = napi_fraginfo_skb(napi, info); 2602 struct sk_buff *skb = napi_frags_skb(napi);
2612 2603
2613 if (!skb) 2604 if (!skb)
2614 return NET_RX_DROP; 2605 return NET_RX_DROP;
@@ -2712,7 +2703,7 @@ void netif_napi_del(struct napi_struct *napi)
2712 struct sk_buff *skb, *next; 2703 struct sk_buff *skb, *next;
2713 2704
2714 list_del_init(&napi->dev_list); 2705 list_del_init(&napi->dev_list);
2715 kfree_skb(napi->skb); 2706 napi_free_frags(napi);
2716 2707
2717 for (skb = napi->gro_list; skb; skb = next) { 2708 for (skb = napi->gro_list; skb; skb = next) {
2718 next = skb->next; 2709 next = skb->next;