diff options
author | Herbert Xu <herbert@gondor.apana.org.au> | 2009-04-16 05:02:07 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-04-16 05:02:07 -0400 |
commit | 76620aafd66f0004829764940c5466144969cffc (patch) | |
tree | 38041e6938121b5611546c582cd23f289db047b0 /drivers/net/cxgb3 | |
parent | 861ab44059350e5cab350238606cf8814abab93b (diff) |
gro: New frags interface to avoid copying shinfo
It turns out that copying a 16-byte area at ~800k times a second
can be really expensive :) This patch redesigns the frags GRO
interface to avoid copying that area twice.
The two disciples of the frags interface have been converted.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/cxgb3')
-rw-r--r-- | drivers/net/cxgb3/adapter.h | 2 | ||||
-rw-r--r-- | drivers/net/cxgb3/sge.c | 53 |
2 files changed, 33 insertions, 22 deletions
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h index 714df2b675e6..322434ac42fc 100644 --- a/drivers/net/cxgb3/adapter.h +++ b/drivers/net/cxgb3/adapter.h | |||
@@ -195,7 +195,7 @@ struct sge_qset { /* an SGE queue set */ | |||
195 | struct sge_rspq rspq; | 195 | struct sge_rspq rspq; |
196 | struct sge_fl fl[SGE_RXQ_PER_SET]; | 196 | struct sge_fl fl[SGE_RXQ_PER_SET]; |
197 | struct sge_txq txq[SGE_TXQ_PER_SET]; | 197 | struct sge_txq txq[SGE_TXQ_PER_SET]; |
198 | struct napi_gro_fraginfo lro_frag_tbl; | 198 | int nomem; |
199 | int lro_enabled; | 199 | int lro_enabled; |
200 | void *lro_va; | 200 | void *lro_va; |
201 | struct net_device *netdev; | 201 | struct net_device *netdev; |
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 26d3587f3399..73d569e758ec 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c | |||
@@ -654,7 +654,8 @@ static void t3_reset_qset(struct sge_qset *q) | |||
654 | q->txq_stopped = 0; | 654 | q->txq_stopped = 0; |
655 | q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */ | 655 | q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */ |
656 | q->rx_reclaim_timer.function = NULL; | 656 | q->rx_reclaim_timer.function = NULL; |
657 | q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0; | 657 | q->nomem = 0; |
658 | napi_free_frags(&q->napi); | ||
658 | } | 659 | } |
659 | 660 | ||
660 | 661 | ||
@@ -2074,20 +2075,19 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, | |||
2074 | struct sge_fl *fl, int len, int complete) | 2075 | struct sge_fl *fl, int len, int complete) |
2075 | { | 2076 | { |
2076 | struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; | 2077 | struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; |
2078 | struct sk_buff *skb = NULL; | ||
2077 | struct cpl_rx_pkt *cpl; | 2079 | struct cpl_rx_pkt *cpl; |
2078 | struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags; | 2080 | struct skb_frag_struct *rx_frag; |
2079 | int nr_frags = qs->lro_frag_tbl.nr_frags; | 2081 | int nr_frags; |
2080 | int frag_len = qs->lro_frag_tbl.len; | ||
2081 | int offset = 0; | 2082 | int offset = 0; |
2082 | 2083 | ||
2083 | if (!nr_frags) { | 2084 | if (!qs->nomem) { |
2084 | offset = 2 + sizeof(struct cpl_rx_pkt); | 2085 | skb = napi_get_frags(&qs->napi); |
2085 | qs->lro_va = cpl = sd->pg_chunk.va + 2; | 2086 | qs->nomem = !skb; |
2086 | } | 2087 | } |
2087 | 2088 | ||
2088 | fl->credits--; | 2089 | fl->credits--; |
2089 | 2090 | ||
2090 | len -= offset; | ||
2091 | pci_dma_sync_single_for_cpu(adap->pdev, | 2091 | pci_dma_sync_single_for_cpu(adap->pdev, |
2092 | pci_unmap_addr(sd, dma_addr), | 2092 | pci_unmap_addr(sd, dma_addr), |
2093 | fl->buf_size - SGE_PG_RSVD, | 2093 | fl->buf_size - SGE_PG_RSVD, |
@@ -2100,21 +2100,38 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, | |||
2100 | fl->alloc_size, | 2100 | fl->alloc_size, |
2101 | PCI_DMA_FROMDEVICE); | 2101 | PCI_DMA_FROMDEVICE); |
2102 | 2102 | ||
2103 | if (!skb) { | ||
2104 | put_page(sd->pg_chunk.page); | ||
2105 | if (complete) | ||
2106 | qs->nomem = 0; | ||
2107 | return; | ||
2108 | } | ||
2109 | |||
2110 | rx_frag = skb_shinfo(skb)->frags; | ||
2111 | nr_frags = skb_shinfo(skb)->nr_frags; | ||
2112 | |||
2113 | if (!nr_frags) { | ||
2114 | offset = 2 + sizeof(struct cpl_rx_pkt); | ||
2115 | qs->lro_va = sd->pg_chunk.va + 2; | ||
2116 | } | ||
2117 | len -= offset; | ||
2118 | |||
2103 | prefetch(qs->lro_va); | 2119 | prefetch(qs->lro_va); |
2104 | 2120 | ||
2105 | rx_frag += nr_frags; | 2121 | rx_frag += nr_frags; |
2106 | rx_frag->page = sd->pg_chunk.page; | 2122 | rx_frag->page = sd->pg_chunk.page; |
2107 | rx_frag->page_offset = sd->pg_chunk.offset + offset; | 2123 | rx_frag->page_offset = sd->pg_chunk.offset + offset; |
2108 | rx_frag->size = len; | 2124 | rx_frag->size = len; |
2109 | frag_len += len; | ||
2110 | qs->lro_frag_tbl.nr_frags++; | ||
2111 | qs->lro_frag_tbl.len = frag_len; | ||
2112 | 2125 | ||
2126 | skb->len += len; | ||
2127 | skb->data_len += len; | ||
2128 | skb->truesize += len; | ||
2129 | skb_shinfo(skb)->nr_frags++; | ||
2113 | 2130 | ||
2114 | if (!complete) | 2131 | if (!complete) |
2115 | return; | 2132 | return; |
2116 | 2133 | ||
2117 | qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY; | 2134 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
2118 | cpl = qs->lro_va; | 2135 | cpl = qs->lro_va; |
2119 | 2136 | ||
2120 | if (unlikely(cpl->vlan_valid)) { | 2137 | if (unlikely(cpl->vlan_valid)) { |
@@ -2123,15 +2140,11 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, | |||
2123 | struct vlan_group *grp = pi->vlan_grp; | 2140 | struct vlan_group *grp = pi->vlan_grp; |
2124 | 2141 | ||
2125 | if (likely(grp != NULL)) { | 2142 | if (likely(grp != NULL)) { |
2126 | vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan), | 2143 | vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan)); |
2127 | &qs->lro_frag_tbl); | 2144 | return; |
2128 | goto out; | ||
2129 | } | 2145 | } |
2130 | } | 2146 | } |
2131 | napi_gro_frags(&qs->napi, &qs->lro_frag_tbl); | 2147 | napi_gro_frags(&qs->napi); |
2132 | |||
2133 | out: | ||
2134 | qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0; | ||
2135 | } | 2148 | } |
2136 | 2149 | ||
2137 | /** | 2150 | /** |
@@ -2300,8 +2313,6 @@ no_mem: | |||
2300 | if (fl->use_pages) { | 2313 | if (fl->use_pages) { |
2301 | void *addr = fl->sdesc[fl->cidx].pg_chunk.va; | 2314 | void *addr = fl->sdesc[fl->cidx].pg_chunk.va; |
2302 | 2315 | ||
2303 | prefetch(&qs->lro_frag_tbl); | ||
2304 | |||
2305 | prefetch(addr); | 2316 | prefetch(addr); |
2306 | #if L1_CACHE_BYTES < 128 | 2317 | #if L1_CACHE_BYTES < 128 |
2307 | prefetch(addr + L1_CACHE_BYTES); | 2318 | prefetch(addr + L1_CACHE_BYTES); |