aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
authorDivy Le Ray <divy@chelsio.com>2009-03-26 12:39:29 -0400
committerDavid S. Miller <davem@davemloft.net>2009-03-27 03:46:59 -0400
commit5e68b772e6efd189d6aca76f6872fb75d51ace60 (patch)
tree88a988918d021d397756790d89c8801adb131195 /drivers/net
parent952cdf333f9d1b0b71f1b9a3c5e421a2673ed7de (diff)
cxgb3: map entire Rx page, feed map+offset to Rx ring.
DMA mapping can be expensive in the presence of iommus. Reduce the Rx iommu activity by mapping an entire page, and provide the H/W the mapped address + offset of the current page chunk. Reserve bits at the end of the page to track mapping references, so the page can be unmapped. Signed-off-by: Divy Le Ray <divy@chelsio.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/cxgb3/adapter.h3
-rw-r--r--[-rwxr-xr-x]drivers/net/cxgb3/sge.c138
2 files changed, 106 insertions, 35 deletions
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 2cf6c9299f22..714df2b675e6 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -85,6 +85,8 @@ struct fl_pg_chunk {
85 struct page *page; 85 struct page *page;
86 void *va; 86 void *va;
87 unsigned int offset; 87 unsigned int offset;
88 u64 *p_cnt;
89 DECLARE_PCI_UNMAP_ADDR(mapping);
88}; 90};
89 91
90struct rx_desc; 92struct rx_desc;
@@ -101,6 +103,7 @@ struct sge_fl { /* SGE per free-buffer list state */
101 struct fl_pg_chunk pg_chunk;/* page chunk cache */ 103 struct fl_pg_chunk pg_chunk;/* page chunk cache */
102 unsigned int use_pages; /* whether FL uses pages or sk_buffs */ 104 unsigned int use_pages; /* whether FL uses pages or sk_buffs */
103 unsigned int order; /* order of page allocations */ 105 unsigned int order; /* order of page allocations */
106 unsigned int alloc_size; /* size of allocated buffer */
104 struct rx_desc *desc; /* address of HW Rx descriptor ring */ 107 struct rx_desc *desc; /* address of HW Rx descriptor ring */
105 struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */ 108 struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */
106 dma_addr_t phys_addr; /* physical address of HW ring start */ 109 dma_addr_t phys_addr; /* physical address of HW ring start */
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 54667f0dde94..26d3587f3399 100755..100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -50,6 +50,7 @@
50#define SGE_RX_COPY_THRES 256 50#define SGE_RX_COPY_THRES 256
51#define SGE_RX_PULL_LEN 128 51#define SGE_RX_PULL_LEN 128
52 52
53#define SGE_PG_RSVD SMP_CACHE_BYTES
53/* 54/*
54 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks. 55 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
55 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs 56 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
@@ -57,8 +58,10 @@
57 */ 58 */
58#define FL0_PG_CHUNK_SIZE 2048 59#define FL0_PG_CHUNK_SIZE 2048
59#define FL0_PG_ORDER 0 60#define FL0_PG_ORDER 0
61#define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER)
60#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192) 62#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
61#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1) 63#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
64#define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER)
62 65
63#define SGE_RX_DROP_THRES 16 66#define SGE_RX_DROP_THRES 16
64#define RX_RECLAIM_PERIOD (HZ/4) 67#define RX_RECLAIM_PERIOD (HZ/4)
@@ -345,13 +348,21 @@ static inline int should_restart_tx(const struct sge_txq *q)
345 return q->in_use - r < (q->size >> 1); 348 return q->in_use - r < (q->size >> 1);
346} 349}
347 350
348static void clear_rx_desc(const struct sge_fl *q, struct rx_sw_desc *d) 351static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
352 struct rx_sw_desc *d)
349{ 353{
350 if (q->use_pages) { 354 if (q->use_pages && d->pg_chunk.page) {
351 if (d->pg_chunk.page) 355 (*d->pg_chunk.p_cnt)--;
352 put_page(d->pg_chunk.page); 356 if (!*d->pg_chunk.p_cnt)
357 pci_unmap_page(pdev,
358 pci_unmap_addr(&d->pg_chunk, mapping),
359 q->alloc_size, PCI_DMA_FROMDEVICE);
360
361 put_page(d->pg_chunk.page);
353 d->pg_chunk.page = NULL; 362 d->pg_chunk.page = NULL;
354 } else { 363 } else {
364 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
365 q->buf_size, PCI_DMA_FROMDEVICE);
355 kfree_skb(d->skb); 366 kfree_skb(d->skb);
356 d->skb = NULL; 367 d->skb = NULL;
357 } 368 }
@@ -372,9 +383,8 @@ static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
372 while (q->credits--) { 383 while (q->credits--) {
373 struct rx_sw_desc *d = &q->sdesc[cidx]; 384 struct rx_sw_desc *d = &q->sdesc[cidx];
374 385
375 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr), 386
376 q->buf_size, PCI_DMA_FROMDEVICE); 387 clear_rx_desc(pdev, q, d);
377 clear_rx_desc(q, d);
378 if (++cidx == q->size) 388 if (++cidx == q->size)
379 cidx = 0; 389 cidx = 0;
380 } 390 }
@@ -417,18 +427,39 @@ static inline int add_one_rx_buf(void *va, unsigned int len,
417 return 0; 427 return 0;
418} 428}
419 429
420static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp, 430static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d,
431 unsigned int gen)
432{
433 d->addr_lo = cpu_to_be32(mapping);
434 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
435 wmb();
436 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
437 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
438 return 0;
439}
440
441static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
442 struct rx_sw_desc *sd, gfp_t gfp,
421 unsigned int order) 443 unsigned int order)
422{ 444{
423 if (!q->pg_chunk.page) { 445 if (!q->pg_chunk.page) {
446 dma_addr_t mapping;
447
424 q->pg_chunk.page = alloc_pages(gfp, order); 448 q->pg_chunk.page = alloc_pages(gfp, order);
425 if (unlikely(!q->pg_chunk.page)) 449 if (unlikely(!q->pg_chunk.page))
426 return -ENOMEM; 450 return -ENOMEM;
427 q->pg_chunk.va = page_address(q->pg_chunk.page); 451 q->pg_chunk.va = page_address(q->pg_chunk.page);
452 q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
453 SGE_PG_RSVD;
428 q->pg_chunk.offset = 0; 454 q->pg_chunk.offset = 0;
455 mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
456 0, q->alloc_size, PCI_DMA_FROMDEVICE);
457 pci_unmap_addr_set(&q->pg_chunk, mapping, mapping);
429 } 458 }
430 sd->pg_chunk = q->pg_chunk; 459 sd->pg_chunk = q->pg_chunk;
431 460
461 prefetch(sd->pg_chunk.p_cnt);
462
432 q->pg_chunk.offset += q->buf_size; 463 q->pg_chunk.offset += q->buf_size;
433 if (q->pg_chunk.offset == (PAGE_SIZE << order)) 464 if (q->pg_chunk.offset == (PAGE_SIZE << order))
434 q->pg_chunk.page = NULL; 465 q->pg_chunk.page = NULL;
@@ -436,6 +467,12 @@ static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp,
436 q->pg_chunk.va += q->buf_size; 467 q->pg_chunk.va += q->buf_size;
437 get_page(q->pg_chunk.page); 468 get_page(q->pg_chunk.page);
438 } 469 }
470
471 if (sd->pg_chunk.offset == 0)
472 *sd->pg_chunk.p_cnt = 1;
473 else
474 *sd->pg_chunk.p_cnt += 1;
475
439 return 0; 476 return 0;
440} 477}
441 478
@@ -460,35 +497,43 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
460 */ 497 */
461static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp) 498static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
462{ 499{
463 void *buf_start;
464 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 500 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
465 struct rx_desc *d = &q->desc[q->pidx]; 501 struct rx_desc *d = &q->desc[q->pidx];
466 unsigned int count = 0; 502 unsigned int count = 0;
467 503
468 while (n--) { 504 while (n--) {
505 dma_addr_t mapping;
469 int err; 506 int err;
470 507
471 if (q->use_pages) { 508 if (q->use_pages) {
472 if (unlikely(alloc_pg_chunk(q, sd, gfp, q->order))) { 509 if (unlikely(alloc_pg_chunk(adap, q, sd, gfp,
510 q->order))) {
473nomem: q->alloc_failed++; 511nomem: q->alloc_failed++;
474 break; 512 break;
475 } 513 }
476 buf_start = sd->pg_chunk.va; 514 mapping = pci_unmap_addr(&sd->pg_chunk, mapping) +
515 sd->pg_chunk.offset;
516 pci_unmap_addr_set(sd, dma_addr, mapping);
517
518 add_one_rx_chunk(mapping, d, q->gen);
519 pci_dma_sync_single_for_device(adap->pdev, mapping,
520 q->buf_size - SGE_PG_RSVD,
521 PCI_DMA_FROMDEVICE);
477 } else { 522 } else {
478 struct sk_buff *skb = alloc_skb(q->buf_size, gfp); 523 void *buf_start;
479 524
525 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
480 if (!skb) 526 if (!skb)
481 goto nomem; 527 goto nomem;
482 528
483 sd->skb = skb; 529 sd->skb = skb;
484 buf_start = skb->data; 530 buf_start = skb->data;
485 } 531 err = add_one_rx_buf(buf_start, q->buf_size, d, sd,
486 532 q->gen, adap->pdev);
487 err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen, 533 if (unlikely(err)) {
488 adap->pdev); 534 clear_rx_desc(adap->pdev, q, sd);
489 if (unlikely(err)) { 535 break;
490 clear_rx_desc(q, sd); 536 }
491 break;
492 } 537 }
493 538
494 d++; 539 d++;
@@ -795,19 +840,19 @@ static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
795 struct sk_buff *newskb, *skb; 840 struct sk_buff *newskb, *skb;
796 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 841 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
797 842
798 newskb = skb = q->pg_skb; 843 dma_addr_t dma_addr = pci_unmap_addr(sd, dma_addr);
799 844
845 newskb = skb = q->pg_skb;
800 if (!skb && (len <= SGE_RX_COPY_THRES)) { 846 if (!skb && (len <= SGE_RX_COPY_THRES)) {
801 newskb = alloc_skb(len, GFP_ATOMIC); 847 newskb = alloc_skb(len, GFP_ATOMIC);
802 if (likely(newskb != NULL)) { 848 if (likely(newskb != NULL)) {
803 __skb_put(newskb, len); 849 __skb_put(newskb, len);
804 pci_dma_sync_single_for_cpu(adap->pdev, 850 pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
805 pci_unmap_addr(sd, dma_addr), len,
806 PCI_DMA_FROMDEVICE); 851 PCI_DMA_FROMDEVICE);
807 memcpy(newskb->data, sd->pg_chunk.va, len); 852 memcpy(newskb->data, sd->pg_chunk.va, len);
808 pci_dma_sync_single_for_device(adap->pdev, 853 pci_dma_sync_single_for_device(adap->pdev, dma_addr,
809 pci_unmap_addr(sd, dma_addr), len, 854 len,
810 PCI_DMA_FROMDEVICE); 855 PCI_DMA_FROMDEVICE);
811 } else if (!drop_thres) 856 } else if (!drop_thres)
812 return NULL; 857 return NULL;
813recycle: 858recycle:
@@ -820,16 +865,25 @@ recycle:
820 if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres))) 865 if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
821 goto recycle; 866 goto recycle;
822 867
868 prefetch(sd->pg_chunk.p_cnt);
869
823 if (!skb) 870 if (!skb)
824 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC); 871 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
872
825 if (unlikely(!newskb)) { 873 if (unlikely(!newskb)) {
826 if (!drop_thres) 874 if (!drop_thres)
827 return NULL; 875 return NULL;
828 goto recycle; 876 goto recycle;
829 } 877 }
830 878
831 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr), 879 pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
832 fl->buf_size, PCI_DMA_FROMDEVICE); 880 PCI_DMA_FROMDEVICE);
881 (*sd->pg_chunk.p_cnt)--;
882 if (!*sd->pg_chunk.p_cnt)
883 pci_unmap_page(adap->pdev,
884 pci_unmap_addr(&sd->pg_chunk, mapping),
885 fl->alloc_size,
886 PCI_DMA_FROMDEVICE);
833 if (!skb) { 887 if (!skb) {
834 __skb_put(newskb, SGE_RX_PULL_LEN); 888 __skb_put(newskb, SGE_RX_PULL_LEN);
835 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN); 889 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
@@ -1958,8 +2012,8 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1958 skb_pull(skb, sizeof(*p) + pad); 2012 skb_pull(skb, sizeof(*p) + pad);
1959 skb->protocol = eth_type_trans(skb, adap->port[p->iff]); 2013 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
1960 pi = netdev_priv(skb->dev); 2014 pi = netdev_priv(skb->dev);
1961 if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid && p->csum == htons(0xffff) && 2015 if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid &&
1962 !p->fragment) { 2016 p->csum == htons(0xffff) && !p->fragment) {
1963 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2017 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1964 skb->ip_summed = CHECKSUM_UNNECESSARY; 2018 skb->ip_summed = CHECKSUM_UNNECESSARY;
1965 } else 2019 } else
@@ -2034,10 +2088,19 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2034 fl->credits--; 2088 fl->credits--;
2035 2089
2036 len -= offset; 2090 len -= offset;
2037 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr), 2091 pci_dma_sync_single_for_cpu(adap->pdev,
2038 fl->buf_size, PCI_DMA_FROMDEVICE); 2092 pci_unmap_addr(sd, dma_addr),
2093 fl->buf_size - SGE_PG_RSVD,
2094 PCI_DMA_FROMDEVICE);
2095
2096 (*sd->pg_chunk.p_cnt)--;
2097 if (!*sd->pg_chunk.p_cnt)
2098 pci_unmap_page(adap->pdev,
2099 pci_unmap_addr(&sd->pg_chunk, mapping),
2100 fl->alloc_size,
2101 PCI_DMA_FROMDEVICE);
2039 2102
2040 prefetch(&qs->lro_frag_tbl); 2103 prefetch(qs->lro_va);
2041 2104
2042 rx_frag += nr_frags; 2105 rx_frag += nr_frags;
2043 rx_frag->page = sd->pg_chunk.page; 2106 rx_frag->page = sd->pg_chunk.page;
@@ -2047,6 +2110,7 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2047 qs->lro_frag_tbl.nr_frags++; 2110 qs->lro_frag_tbl.nr_frags++;
2048 qs->lro_frag_tbl.len = frag_len; 2111 qs->lro_frag_tbl.len = frag_len;
2049 2112
2113
2050 if (!complete) 2114 if (!complete)
2051 return; 2115 return;
2052 2116
@@ -2236,6 +2300,8 @@ no_mem:
2236 if (fl->use_pages) { 2300 if (fl->use_pages) {
2237 void *addr = fl->sdesc[fl->cidx].pg_chunk.va; 2301 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2238 2302
2303 prefetch(&qs->lro_frag_tbl);
2304
2239 prefetch(addr); 2305 prefetch(addr);
2240#if L1_CACHE_BYTES < 128 2306#if L1_CACHE_BYTES < 128
2241 prefetch(addr + L1_CACHE_BYTES); 2307 prefetch(addr + L1_CACHE_BYTES);
@@ -2972,21 +3038,23 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2972 q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0; 3038 q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
2973 q->fl[0].order = FL0_PG_ORDER; 3039 q->fl[0].order = FL0_PG_ORDER;
2974 q->fl[1].order = FL1_PG_ORDER; 3040 q->fl[1].order = FL1_PG_ORDER;
3041 q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE;
3042 q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE;
2975 3043
2976 spin_lock_irq(&adapter->sge.reg_lock); 3044 spin_lock_irq(&adapter->sge.reg_lock);
2977 3045
2978 /* FL threshold comparison uses < */ 3046 /* FL threshold comparison uses < */
2979 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx, 3047 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2980 q->rspq.phys_addr, q->rspq.size, 3048 q->rspq.phys_addr, q->rspq.size,
2981 q->fl[0].buf_size, 1, 0); 3049 q->fl[0].buf_size - SGE_PG_RSVD, 1, 0);
2982 if (ret) 3050 if (ret)
2983 goto err_unlock; 3051 goto err_unlock;
2984 3052
2985 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 3053 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2986 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0, 3054 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2987 q->fl[i].phys_addr, q->fl[i].size, 3055 q->fl[i].phys_addr, q->fl[i].size,
2988 q->fl[i].buf_size, p->cong_thres, 1, 3056 q->fl[i].buf_size - SGE_PG_RSVD,
2989 0); 3057 p->cong_thres, 1, 0);
2990 if (ret) 3058 if (ret)
2991 goto err_unlock; 3059 goto err_unlock;
2992 } 3060 }