aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/cxgb3
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2009-04-16 05:02:07 -0400
committerDavid S. Miller <davem@davemloft.net>2009-04-16 05:02:07 -0400
commit76620aafd66f0004829764940c5466144969cffc (patch)
tree38041e6938121b5611546c582cd23f289db047b0 /drivers/net/cxgb3
parent861ab44059350e5cab350238606cf8814abab93b (diff)
gro: New frags interface to avoid copying shinfo
It turns out that copying a 16-byte area at ~800k times a second can be really expensive :) This patch redesigns the frags GRO interface to avoid copying that area twice. The two disciples of the frags interface have been converted. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/cxgb3')
-rw-r--r--drivers/net/cxgb3/adapter.h2
-rw-r--r--drivers/net/cxgb3/sge.c53
2 files changed, 33 insertions, 22 deletions
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 714df2b675e6..322434ac42fc 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -195,7 +195,7 @@ struct sge_qset { /* an SGE queue set */
195 struct sge_rspq rspq; 195 struct sge_rspq rspq;
196 struct sge_fl fl[SGE_RXQ_PER_SET]; 196 struct sge_fl fl[SGE_RXQ_PER_SET];
197 struct sge_txq txq[SGE_TXQ_PER_SET]; 197 struct sge_txq txq[SGE_TXQ_PER_SET];
198 struct napi_gro_fraginfo lro_frag_tbl; 198 int nomem;
199 int lro_enabled; 199 int lro_enabled;
200 void *lro_va; 200 void *lro_va;
201 struct net_device *netdev; 201 struct net_device *netdev;
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 26d3587f3399..73d569e758ec 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -654,7 +654,8 @@ static void t3_reset_qset(struct sge_qset *q)
654 q->txq_stopped = 0; 654 q->txq_stopped = 0;
655 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */ 655 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
656 q->rx_reclaim_timer.function = NULL; 656 q->rx_reclaim_timer.function = NULL;
657 q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0; 657 q->nomem = 0;
658 napi_free_frags(&q->napi);
658} 659}
659 660
660 661
@@ -2074,20 +2075,19 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2074 struct sge_fl *fl, int len, int complete) 2075 struct sge_fl *fl, int len, int complete)
2075{ 2076{
2076 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2077 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2078 struct sk_buff *skb = NULL;
2077 struct cpl_rx_pkt *cpl; 2079 struct cpl_rx_pkt *cpl;
2078 struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags; 2080 struct skb_frag_struct *rx_frag;
2079 int nr_frags = qs->lro_frag_tbl.nr_frags; 2081 int nr_frags;
2080 int frag_len = qs->lro_frag_tbl.len;
2081 int offset = 0; 2082 int offset = 0;
2082 2083
2083 if (!nr_frags) { 2084 if (!qs->nomem) {
2084 offset = 2 + sizeof(struct cpl_rx_pkt); 2085 skb = napi_get_frags(&qs->napi);
2085 qs->lro_va = cpl = sd->pg_chunk.va + 2; 2086 qs->nomem = !skb;
2086 } 2087 }
2087 2088
2088 fl->credits--; 2089 fl->credits--;
2089 2090
2090 len -= offset;
2091 pci_dma_sync_single_for_cpu(adap->pdev, 2091 pci_dma_sync_single_for_cpu(adap->pdev,
2092 pci_unmap_addr(sd, dma_addr), 2092 pci_unmap_addr(sd, dma_addr),
2093 fl->buf_size - SGE_PG_RSVD, 2093 fl->buf_size - SGE_PG_RSVD,
@@ -2100,21 +2100,38 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2100 fl->alloc_size, 2100 fl->alloc_size,
2101 PCI_DMA_FROMDEVICE); 2101 PCI_DMA_FROMDEVICE);
2102 2102
2103 if (!skb) {
2104 put_page(sd->pg_chunk.page);
2105 if (complete)
2106 qs->nomem = 0;
2107 return;
2108 }
2109
2110 rx_frag = skb_shinfo(skb)->frags;
2111 nr_frags = skb_shinfo(skb)->nr_frags;
2112
2113 if (!nr_frags) {
2114 offset = 2 + sizeof(struct cpl_rx_pkt);
2115 qs->lro_va = sd->pg_chunk.va + 2;
2116 }
2117 len -= offset;
2118
2103 prefetch(qs->lro_va); 2119 prefetch(qs->lro_va);
2104 2120
2105 rx_frag += nr_frags; 2121 rx_frag += nr_frags;
2106 rx_frag->page = sd->pg_chunk.page; 2122 rx_frag->page = sd->pg_chunk.page;
2107 rx_frag->page_offset = sd->pg_chunk.offset + offset; 2123 rx_frag->page_offset = sd->pg_chunk.offset + offset;
2108 rx_frag->size = len; 2124 rx_frag->size = len;
2109 frag_len += len;
2110 qs->lro_frag_tbl.nr_frags++;
2111 qs->lro_frag_tbl.len = frag_len;
2112 2125
2126 skb->len += len;
2127 skb->data_len += len;
2128 skb->truesize += len;
2129 skb_shinfo(skb)->nr_frags++;
2113 2130
2114 if (!complete) 2131 if (!complete)
2115 return; 2132 return;
2116 2133
2117 qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY; 2134 skb->ip_summed = CHECKSUM_UNNECESSARY;
2118 cpl = qs->lro_va; 2135 cpl = qs->lro_va;
2119 2136
2120 if (unlikely(cpl->vlan_valid)) { 2137 if (unlikely(cpl->vlan_valid)) {
@@ -2123,15 +2140,11 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2123 struct vlan_group *grp = pi->vlan_grp; 2140 struct vlan_group *grp = pi->vlan_grp;
2124 2141
2125 if (likely(grp != NULL)) { 2142 if (likely(grp != NULL)) {
2126 vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan), 2143 vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan));
2127 &qs->lro_frag_tbl); 2144 return;
2128 goto out;
2129 } 2145 }
2130 } 2146 }
2131 napi_gro_frags(&qs->napi, &qs->lro_frag_tbl); 2147 napi_gro_frags(&qs->napi);
2132
2133out:
2134 qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0;
2135} 2148}
2136 2149
2137/** 2150/**
@@ -2300,8 +2313,6 @@ no_mem:
2300 if (fl->use_pages) { 2313 if (fl->use_pages) {
2301 void *addr = fl->sdesc[fl->cidx].pg_chunk.va; 2314 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2302 2315
2303 prefetch(&qs->lro_frag_tbl);
2304
2305 prefetch(addr); 2316 prefetch(addr);
2306#if L1_CACHE_BYTES < 128 2317#if L1_CACHE_BYTES < 128
2307 prefetch(addr + L1_CACHE_BYTES); 2318 prefetch(addr + L1_CACHE_BYTES);