diff options
author | Divy Le Ray <divy@chelsio.com> | 2009-03-26 12:39:29 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-03-27 03:46:59 -0400 |
commit | 5e68b772e6efd189d6aca76f6872fb75d51ace60 (patch) | |
tree | 88a988918d021d397756790d89c8801adb131195 /drivers/net/cxgb3/sge.c | |
parent | 952cdf333f9d1b0b71f1b9a3c5e421a2673ed7de (diff) |
cxgb3: map entire Rx page, feed map+offset to Rx ring.
DMA mapping can be expensive in the presence of iommus.
Reduce the Rx iommu activity by mapping an entire page, and provide the H/W
the mapped address + offset of the current page chunk.
Reserve bits at the end of the page to track mapping references, so the page
can be unmapped.
Signed-off-by: Divy Le Ray <divy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/cxgb3/sge.c')
-rw-r--r--[-rwxr-xr-x] | drivers/net/cxgb3/sge.c | 138 |
1 files changed, 103 insertions, 35 deletions
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 54667f0dde94..26d3587f3399 100755..100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #define SGE_RX_COPY_THRES 256 | 50 | #define SGE_RX_COPY_THRES 256 |
51 | #define SGE_RX_PULL_LEN 128 | 51 | #define SGE_RX_PULL_LEN 128 |
52 | 52 | ||
53 | #define SGE_PG_RSVD SMP_CACHE_BYTES | ||
53 | /* | 54 | /* |
54 | * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks. | 55 | * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks. |
55 | * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs | 56 | * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs |
@@ -57,8 +58,10 @@ | |||
57 | */ | 58 | */ |
58 | #define FL0_PG_CHUNK_SIZE 2048 | 59 | #define FL0_PG_CHUNK_SIZE 2048 |
59 | #define FL0_PG_ORDER 0 | 60 | #define FL0_PG_ORDER 0 |
61 | #define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER) | ||
60 | #define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192) | 62 | #define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192) |
61 | #define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1) | 63 | #define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1) |
64 | #define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER) | ||
62 | 65 | ||
63 | #define SGE_RX_DROP_THRES 16 | 66 | #define SGE_RX_DROP_THRES 16 |
64 | #define RX_RECLAIM_PERIOD (HZ/4) | 67 | #define RX_RECLAIM_PERIOD (HZ/4) |
@@ -345,13 +348,21 @@ static inline int should_restart_tx(const struct sge_txq *q) | |||
345 | return q->in_use - r < (q->size >> 1); | 348 | return q->in_use - r < (q->size >> 1); |
346 | } | 349 | } |
347 | 350 | ||
348 | static void clear_rx_desc(const struct sge_fl *q, struct rx_sw_desc *d) | 351 | static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q, |
352 | struct rx_sw_desc *d) | ||
349 | { | 353 | { |
350 | if (q->use_pages) { | 354 | if (q->use_pages && d->pg_chunk.page) { |
351 | if (d->pg_chunk.page) | 355 | (*d->pg_chunk.p_cnt)--; |
352 | put_page(d->pg_chunk.page); | 356 | if (!*d->pg_chunk.p_cnt) |
357 | pci_unmap_page(pdev, | ||
358 | pci_unmap_addr(&d->pg_chunk, mapping), | ||
359 | q->alloc_size, PCI_DMA_FROMDEVICE); | ||
360 | |||
361 | put_page(d->pg_chunk.page); | ||
353 | d->pg_chunk.page = NULL; | 362 | d->pg_chunk.page = NULL; |
354 | } else { | 363 | } else { |
364 | pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr), | ||
365 | q->buf_size, PCI_DMA_FROMDEVICE); | ||
355 | kfree_skb(d->skb); | 366 | kfree_skb(d->skb); |
356 | d->skb = NULL; | 367 | d->skb = NULL; |
357 | } | 368 | } |
@@ -372,9 +383,8 @@ static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q) | |||
372 | while (q->credits--) { | 383 | while (q->credits--) { |
373 | struct rx_sw_desc *d = &q->sdesc[cidx]; | 384 | struct rx_sw_desc *d = &q->sdesc[cidx]; |
374 | 385 | ||
375 | pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr), | 386 | |
376 | q->buf_size, PCI_DMA_FROMDEVICE); | 387 | clear_rx_desc(pdev, q, d); |
377 | clear_rx_desc(q, d); | ||
378 | if (++cidx == q->size) | 388 | if (++cidx == q->size) |
379 | cidx = 0; | 389 | cidx = 0; |
380 | } | 390 | } |
@@ -417,18 +427,39 @@ static inline int add_one_rx_buf(void *va, unsigned int len, | |||
417 | return 0; | 427 | return 0; |
418 | } | 428 | } |
419 | 429 | ||
420 | static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp, | 430 | static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d, |
431 | unsigned int gen) | ||
432 | { | ||
433 | d->addr_lo = cpu_to_be32(mapping); | ||
434 | d->addr_hi = cpu_to_be32((u64) mapping >> 32); | ||
435 | wmb(); | ||
436 | d->len_gen = cpu_to_be32(V_FLD_GEN1(gen)); | ||
437 | d->gen2 = cpu_to_be32(V_FLD_GEN2(gen)); | ||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q, | ||
442 | struct rx_sw_desc *sd, gfp_t gfp, | ||
421 | unsigned int order) | 443 | unsigned int order) |
422 | { | 444 | { |
423 | if (!q->pg_chunk.page) { | 445 | if (!q->pg_chunk.page) { |
446 | dma_addr_t mapping; | ||
447 | |||
424 | q->pg_chunk.page = alloc_pages(gfp, order); | 448 | q->pg_chunk.page = alloc_pages(gfp, order); |
425 | if (unlikely(!q->pg_chunk.page)) | 449 | if (unlikely(!q->pg_chunk.page)) |
426 | return -ENOMEM; | 450 | return -ENOMEM; |
427 | q->pg_chunk.va = page_address(q->pg_chunk.page); | 451 | q->pg_chunk.va = page_address(q->pg_chunk.page); |
452 | q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) - | ||
453 | SGE_PG_RSVD; | ||
428 | q->pg_chunk.offset = 0; | 454 | q->pg_chunk.offset = 0; |
455 | mapping = pci_map_page(adapter->pdev, q->pg_chunk.page, | ||
456 | 0, q->alloc_size, PCI_DMA_FROMDEVICE); | ||
457 | pci_unmap_addr_set(&q->pg_chunk, mapping, mapping); | ||
429 | } | 458 | } |
430 | sd->pg_chunk = q->pg_chunk; | 459 | sd->pg_chunk = q->pg_chunk; |
431 | 460 | ||
461 | prefetch(sd->pg_chunk.p_cnt); | ||
462 | |||
432 | q->pg_chunk.offset += q->buf_size; | 463 | q->pg_chunk.offset += q->buf_size; |
433 | if (q->pg_chunk.offset == (PAGE_SIZE << order)) | 464 | if (q->pg_chunk.offset == (PAGE_SIZE << order)) |
434 | q->pg_chunk.page = NULL; | 465 | q->pg_chunk.page = NULL; |
@@ -436,6 +467,12 @@ static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp, | |||
436 | q->pg_chunk.va += q->buf_size; | 467 | q->pg_chunk.va += q->buf_size; |
437 | get_page(q->pg_chunk.page); | 468 | get_page(q->pg_chunk.page); |
438 | } | 469 | } |
470 | |||
471 | if (sd->pg_chunk.offset == 0) | ||
472 | *sd->pg_chunk.p_cnt = 1; | ||
473 | else | ||
474 | *sd->pg_chunk.p_cnt += 1; | ||
475 | |||
439 | return 0; | 476 | return 0; |
440 | } | 477 | } |
441 | 478 | ||
@@ -460,35 +497,43 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) | |||
460 | */ | 497 | */ |
461 | static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp) | 498 | static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp) |
462 | { | 499 | { |
463 | void *buf_start; | ||
464 | struct rx_sw_desc *sd = &q->sdesc[q->pidx]; | 500 | struct rx_sw_desc *sd = &q->sdesc[q->pidx]; |
465 | struct rx_desc *d = &q->desc[q->pidx]; | 501 | struct rx_desc *d = &q->desc[q->pidx]; |
466 | unsigned int count = 0; | 502 | unsigned int count = 0; |
467 | 503 | ||
468 | while (n--) { | 504 | while (n--) { |
505 | dma_addr_t mapping; | ||
469 | int err; | 506 | int err; |
470 | 507 | ||
471 | if (q->use_pages) { | 508 | if (q->use_pages) { |
472 | if (unlikely(alloc_pg_chunk(q, sd, gfp, q->order))) { | 509 | if (unlikely(alloc_pg_chunk(adap, q, sd, gfp, |
510 | q->order))) { | ||
473 | nomem: q->alloc_failed++; | 511 | nomem: q->alloc_failed++; |
474 | break; | 512 | break; |
475 | } | 513 | } |
476 | buf_start = sd->pg_chunk.va; | 514 | mapping = pci_unmap_addr(&sd->pg_chunk, mapping) + |
515 | sd->pg_chunk.offset; | ||
516 | pci_unmap_addr_set(sd, dma_addr, mapping); | ||
517 | |||
518 | add_one_rx_chunk(mapping, d, q->gen); | ||
519 | pci_dma_sync_single_for_device(adap->pdev, mapping, | ||
520 | q->buf_size - SGE_PG_RSVD, | ||
521 | PCI_DMA_FROMDEVICE); | ||
477 | } else { | 522 | } else { |
478 | struct sk_buff *skb = alloc_skb(q->buf_size, gfp); | 523 | void *buf_start; |
479 | 524 | ||
525 | struct sk_buff *skb = alloc_skb(q->buf_size, gfp); | ||
480 | if (!skb) | 526 | if (!skb) |
481 | goto nomem; | 527 | goto nomem; |
482 | 528 | ||
483 | sd->skb = skb; | 529 | sd->skb = skb; |
484 | buf_start = skb->data; | 530 | buf_start = skb->data; |
485 | } | 531 | err = add_one_rx_buf(buf_start, q->buf_size, d, sd, |
486 | 532 | q->gen, adap->pdev); | |
487 | err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen, | 533 | if (unlikely(err)) { |
488 | adap->pdev); | 534 | clear_rx_desc(adap->pdev, q, sd); |
489 | if (unlikely(err)) { | 535 | break; |
490 | clear_rx_desc(q, sd); | 536 | } |
491 | break; | ||
492 | } | 537 | } |
493 | 538 | ||
494 | d++; | 539 | d++; |
@@ -795,19 +840,19 @@ static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl, | |||
795 | struct sk_buff *newskb, *skb; | 840 | struct sk_buff *newskb, *skb; |
796 | struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; | 841 | struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; |
797 | 842 | ||
798 | newskb = skb = q->pg_skb; | 843 | dma_addr_t dma_addr = pci_unmap_addr(sd, dma_addr); |
799 | 844 | ||
845 | newskb = skb = q->pg_skb; | ||
800 | if (!skb && (len <= SGE_RX_COPY_THRES)) { | 846 | if (!skb && (len <= SGE_RX_COPY_THRES)) { |
801 | newskb = alloc_skb(len, GFP_ATOMIC); | 847 | newskb = alloc_skb(len, GFP_ATOMIC); |
802 | if (likely(newskb != NULL)) { | 848 | if (likely(newskb != NULL)) { |
803 | __skb_put(newskb, len); | 849 | __skb_put(newskb, len); |
804 | pci_dma_sync_single_for_cpu(adap->pdev, | 850 | pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len, |
805 | pci_unmap_addr(sd, dma_addr), len, | ||
806 | PCI_DMA_FROMDEVICE); | 851 | PCI_DMA_FROMDEVICE); |
807 | memcpy(newskb->data, sd->pg_chunk.va, len); | 852 | memcpy(newskb->data, sd->pg_chunk.va, len); |
808 | pci_dma_sync_single_for_device(adap->pdev, | 853 | pci_dma_sync_single_for_device(adap->pdev, dma_addr, |
809 | pci_unmap_addr(sd, dma_addr), len, | 854 | len, |
810 | PCI_DMA_FROMDEVICE); | 855 | PCI_DMA_FROMDEVICE); |
811 | } else if (!drop_thres) | 856 | } else if (!drop_thres) |
812 | return NULL; | 857 | return NULL; |
813 | recycle: | 858 | recycle: |
@@ -820,16 +865,25 @@ recycle: | |||
820 | if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres))) | 865 | if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres))) |
821 | goto recycle; | 866 | goto recycle; |
822 | 867 | ||
868 | prefetch(sd->pg_chunk.p_cnt); | ||
869 | |||
823 | if (!skb) | 870 | if (!skb) |
824 | newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC); | 871 | newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC); |
872 | |||
825 | if (unlikely(!newskb)) { | 873 | if (unlikely(!newskb)) { |
826 | if (!drop_thres) | 874 | if (!drop_thres) |
827 | return NULL; | 875 | return NULL; |
828 | goto recycle; | 876 | goto recycle; |
829 | } | 877 | } |
830 | 878 | ||
831 | pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr), | 879 | pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len, |
832 | fl->buf_size, PCI_DMA_FROMDEVICE); | 880 | PCI_DMA_FROMDEVICE); |
881 | (*sd->pg_chunk.p_cnt)--; | ||
882 | if (!*sd->pg_chunk.p_cnt) | ||
883 | pci_unmap_page(adap->pdev, | ||
884 | pci_unmap_addr(&sd->pg_chunk, mapping), | ||
885 | fl->alloc_size, | ||
886 | PCI_DMA_FROMDEVICE); | ||
833 | if (!skb) { | 887 | if (!skb) { |
834 | __skb_put(newskb, SGE_RX_PULL_LEN); | 888 | __skb_put(newskb, SGE_RX_PULL_LEN); |
835 | memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN); | 889 | memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN); |
@@ -1958,8 +2012,8 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq, | |||
1958 | skb_pull(skb, sizeof(*p) + pad); | 2012 | skb_pull(skb, sizeof(*p) + pad); |
1959 | skb->protocol = eth_type_trans(skb, adap->port[p->iff]); | 2013 | skb->protocol = eth_type_trans(skb, adap->port[p->iff]); |
1960 | pi = netdev_priv(skb->dev); | 2014 | pi = netdev_priv(skb->dev); |
1961 | if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid && p->csum == htons(0xffff) && | 2015 | if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid && |
1962 | !p->fragment) { | 2016 | p->csum == htons(0xffff) && !p->fragment) { |
1963 | qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; | 2017 | qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; |
1964 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 2018 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1965 | } else | 2019 | } else |
@@ -2034,10 +2088,19 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, | |||
2034 | fl->credits--; | 2088 | fl->credits--; |
2035 | 2089 | ||
2036 | len -= offset; | 2090 | len -= offset; |
2037 | pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr), | 2091 | pci_dma_sync_single_for_cpu(adap->pdev, |
2038 | fl->buf_size, PCI_DMA_FROMDEVICE); | 2092 | pci_unmap_addr(sd, dma_addr), |
2093 | fl->buf_size - SGE_PG_RSVD, | ||
2094 | PCI_DMA_FROMDEVICE); | ||
2095 | |||
2096 | (*sd->pg_chunk.p_cnt)--; | ||
2097 | if (!*sd->pg_chunk.p_cnt) | ||
2098 | pci_unmap_page(adap->pdev, | ||
2099 | pci_unmap_addr(&sd->pg_chunk, mapping), | ||
2100 | fl->alloc_size, | ||
2101 | PCI_DMA_FROMDEVICE); | ||
2039 | 2102 | ||
2040 | prefetch(&qs->lro_frag_tbl); | 2103 | prefetch(qs->lro_va); |
2041 | 2104 | ||
2042 | rx_frag += nr_frags; | 2105 | rx_frag += nr_frags; |
2043 | rx_frag->page = sd->pg_chunk.page; | 2106 | rx_frag->page = sd->pg_chunk.page; |
@@ -2047,6 +2110,7 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, | |||
2047 | qs->lro_frag_tbl.nr_frags++; | 2110 | qs->lro_frag_tbl.nr_frags++; |
2048 | qs->lro_frag_tbl.len = frag_len; | 2111 | qs->lro_frag_tbl.len = frag_len; |
2049 | 2112 | ||
2113 | |||
2050 | if (!complete) | 2114 | if (!complete) |
2051 | return; | 2115 | return; |
2052 | 2116 | ||
@@ -2236,6 +2300,8 @@ no_mem: | |||
2236 | if (fl->use_pages) { | 2300 | if (fl->use_pages) { |
2237 | void *addr = fl->sdesc[fl->cidx].pg_chunk.va; | 2301 | void *addr = fl->sdesc[fl->cidx].pg_chunk.va; |
2238 | 2302 | ||
2303 | prefetch(&qs->lro_frag_tbl); | ||
2304 | |||
2239 | prefetch(addr); | 2305 | prefetch(addr); |
2240 | #if L1_CACHE_BYTES < 128 | 2306 | #if L1_CACHE_BYTES < 128 |
2241 | prefetch(addr + L1_CACHE_BYTES); | 2307 | prefetch(addr + L1_CACHE_BYTES); |
@@ -2972,21 +3038,23 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, | |||
2972 | q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0; | 3038 | q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0; |
2973 | q->fl[0].order = FL0_PG_ORDER; | 3039 | q->fl[0].order = FL0_PG_ORDER; |
2974 | q->fl[1].order = FL1_PG_ORDER; | 3040 | q->fl[1].order = FL1_PG_ORDER; |
3041 | q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE; | ||
3042 | q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE; | ||
2975 | 3043 | ||
2976 | spin_lock_irq(&adapter->sge.reg_lock); | 3044 | spin_lock_irq(&adapter->sge.reg_lock); |
2977 | 3045 | ||
2978 | /* FL threshold comparison uses < */ | 3046 | /* FL threshold comparison uses < */ |
2979 | ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx, | 3047 | ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx, |
2980 | q->rspq.phys_addr, q->rspq.size, | 3048 | q->rspq.phys_addr, q->rspq.size, |
2981 | q->fl[0].buf_size, 1, 0); | 3049 | q->fl[0].buf_size - SGE_PG_RSVD, 1, 0); |
2982 | if (ret) | 3050 | if (ret) |
2983 | goto err_unlock; | 3051 | goto err_unlock; |
2984 | 3052 | ||
2985 | for (i = 0; i < SGE_RXQ_PER_SET; ++i) { | 3053 | for (i = 0; i < SGE_RXQ_PER_SET; ++i) { |
2986 | ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0, | 3054 | ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0, |
2987 | q->fl[i].phys_addr, q->fl[i].size, | 3055 | q->fl[i].phys_addr, q->fl[i].size, |
2988 | q->fl[i].buf_size, p->cong_thres, 1, | 3056 | q->fl[i].buf_size - SGE_PG_RSVD, |
2989 | 0); | 3057 | p->cong_thres, 1, 0); |
2990 | if (ret) | 3058 | if (ret) |
2991 | goto err_unlock; | 3059 | goto err_unlock; |
2992 | } | 3060 | } |