diff options
author | Stephen Hemminger <shemminger@osdl.org> | 2007-01-08 14:26:12 -0500 |
---|---|---|
committer | Jeff Garzik <jeff@garzik.org> | 2007-02-05 16:58:47 -0500 |
commit | 24a427cf76984726641ea0d8163e61e99119069d (patch) | |
tree | e5c31c2ed4a606776231acbe9c9b5d57f8b85c0a /drivers | |
parent | 3de00b8958b12d62712ae9500968c65d3b43bb27 (diff) |
chelsio: more rx speedup
Cleanup receive processing some more:
* do the reserve padding of skb during setup
* don't pass constants to get_packet
* do smart prefetch of skb
* make copybreak a module parameter
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/chelsio/sge.c | 87 |
1 files changed, 45 insertions, 42 deletions
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index 8e287e79e4e3..a15611925338 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c | |||
@@ -71,12 +71,9 @@ | |||
71 | #define SGE_FREEL_REFILL_THRESH 16 | 71 | #define SGE_FREEL_REFILL_THRESH 16 |
72 | #define SGE_RESPQ_E_N 1024 | 72 | #define SGE_RESPQ_E_N 1024 |
73 | #define SGE_INTRTIMER_NRES 1000 | 73 | #define SGE_INTRTIMER_NRES 1000 |
74 | #define SGE_RX_COPY_THRES 256 | ||
75 | #define SGE_RX_SM_BUF_SIZE 1536 | 74 | #define SGE_RX_SM_BUF_SIZE 1536 |
76 | #define SGE_TX_DESC_MAX_PLEN 16384 | 75 | #define SGE_TX_DESC_MAX_PLEN 16384 |
77 | 76 | ||
78 | # define SGE_RX_DROP_THRES 2 | ||
79 | |||
80 | #define SGE_RESPQ_REPLENISH_THRES (SGE_RESPQ_E_N / 4) | 77 | #define SGE_RESPQ_REPLENISH_THRES (SGE_RESPQ_E_N / 4) |
81 | 78 | ||
82 | /* | 79 | /* |
@@ -846,6 +843,8 @@ static void refill_free_list(struct sge *sge, struct freelQ *q) | |||
846 | skb_reserve(skb, q->dma_offset); | 843 | skb_reserve(skb, q->dma_offset); |
847 | mapping = pci_map_single(pdev, skb->data, dma_len, | 844 | mapping = pci_map_single(pdev, skb->data, dma_len, |
848 | PCI_DMA_FROMDEVICE); | 845 | PCI_DMA_FROMDEVICE); |
846 | skb_reserve(skb, sge->rx_pkt_pad); | ||
847 | |||
849 | ce->skb = skb; | 848 | ce->skb = skb; |
850 | pci_unmap_addr_set(ce, dma_addr, mapping); | 849 | pci_unmap_addr_set(ce, dma_addr, mapping); |
851 | pci_unmap_len_set(ce, dma_len, dma_len); | 850 | pci_unmap_len_set(ce, dma_len, dma_len); |
@@ -1024,6 +1023,10 @@ static void recycle_fl_buf(struct freelQ *fl, int idx) | |||
1024 | } | 1023 | } |
1025 | } | 1024 | } |
1026 | 1025 | ||
1026 | static int copybreak __read_mostly = 256; | ||
1027 | module_param(copybreak, int, 0); | ||
1028 | MODULE_PARM_DESC(copybreak, "Receive copy threshold"); | ||
1029 | |||
1027 | /** | 1030 | /** |
1028 | * get_packet - return the next ingress packet buffer | 1031 | * get_packet - return the next ingress packet buffer |
1029 | * @pdev: the PCI device that received the packet | 1032 | * @pdev: the PCI device that received the packet |
@@ -1043,45 +1046,42 @@ static void recycle_fl_buf(struct freelQ *fl, int idx) | |||
1043 | * be copied but there is no memory for the copy. | 1046 | * be copied but there is no memory for the copy. |
1044 | */ | 1047 | */ |
1045 | static inline struct sk_buff *get_packet(struct pci_dev *pdev, | 1048 | static inline struct sk_buff *get_packet(struct pci_dev *pdev, |
1046 | struct freelQ *fl, unsigned int len, | 1049 | struct freelQ *fl, unsigned int len) |
1047 | int dma_pad, int skb_pad, | ||
1048 | unsigned int copy_thres, | ||
1049 | unsigned int drop_thres) | ||
1050 | { | 1050 | { |
1051 | struct sk_buff *skb; | 1051 | struct sk_buff *skb; |
1052 | struct freelQ_ce *ce = &fl->centries[fl->cidx]; | 1052 | const struct freelQ_ce *ce = &fl->centries[fl->cidx]; |
1053 | 1053 | ||
1054 | if (len < copy_thres) { | 1054 | if (len < copybreak) { |
1055 | skb = alloc_skb(len + skb_pad, GFP_ATOMIC); | 1055 | skb = alloc_skb(len + 2, GFP_ATOMIC); |
1056 | if (likely(skb != NULL)) { | 1056 | if (!skb) |
1057 | skb_reserve(skb, skb_pad); | 1057 | goto use_orig_buf; |
1058 | skb_put(skb, len); | 1058 | |
1059 | pci_dma_sync_single_for_cpu(pdev, | 1059 | skb_reserve(skb, 2); /* align IP header */ |
1060 | pci_unmap_addr(ce, dma_addr), | 1060 | skb_put(skb, len); |
1061 | pci_unmap_len(ce, dma_len), | 1061 | pci_dma_sync_single_for_cpu(pdev, |
1062 | PCI_DMA_FROMDEVICE); | ||
1063 | memcpy(skb->data, ce->skb->data + dma_pad, len); | ||
1064 | pci_dma_sync_single_for_device(pdev, | ||
1065 | pci_unmap_addr(ce, dma_addr), | 1062 | pci_unmap_addr(ce, dma_addr), |
1066 | pci_unmap_len(ce, dma_len), | 1063 | pci_unmap_len(ce, dma_len), |
1067 | PCI_DMA_FROMDEVICE); | 1064 | PCI_DMA_FROMDEVICE); |
1068 | } else if (!drop_thres) | 1065 | memcpy(skb->data, ce->skb->data, len); |
1069 | goto use_orig_buf; | 1066 | pci_dma_sync_single_for_device(pdev, |
1070 | 1067 | pci_unmap_addr(ce, dma_addr), | |
1068 | pci_unmap_len(ce, dma_len), | ||
1069 | PCI_DMA_FROMDEVICE); | ||
1071 | recycle_fl_buf(fl, fl->cidx); | 1070 | recycle_fl_buf(fl, fl->cidx); |
1072 | return skb; | 1071 | return skb; |
1073 | } | 1072 | } |
1074 | 1073 | ||
1075 | if (fl->credits < drop_thres) { | 1074 | use_orig_buf: |
1075 | if (fl->credits < 2) { | ||
1076 | recycle_fl_buf(fl, fl->cidx); | 1076 | recycle_fl_buf(fl, fl->cidx); |
1077 | return NULL; | 1077 | return NULL; |
1078 | } | 1078 | } |
1079 | 1079 | ||
1080 | use_orig_buf: | ||
1081 | pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), | 1080 | pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), |
1082 | pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); | 1081 | pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); |
1083 | skb = ce->skb; | 1082 | skb = ce->skb; |
1084 | skb_reserve(skb, dma_pad); | 1083 | prefetch(skb->data); |
1084 | |||
1085 | skb_put(skb, len); | 1085 | skb_put(skb, len); |
1086 | return skb; | 1086 | return skb; |
1087 | } | 1087 | } |
@@ -1359,27 +1359,25 @@ static void restart_sched(unsigned long arg) | |||
1359 | * | 1359 | * |
1360 | * Process an ingress ethernet pakcet and deliver it to the stack. | 1360 | * Process an ingress ethernet pakcet and deliver it to the stack. |
1361 | */ | 1361 | */ |
1362 | static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) | 1362 | static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) |
1363 | { | 1363 | { |
1364 | struct sk_buff *skb; | 1364 | struct sk_buff *skb; |
1365 | struct cpl_rx_pkt *p; | 1365 | const struct cpl_rx_pkt *p; |
1366 | struct adapter *adapter = sge->adapter; | 1366 | struct adapter *adapter = sge->adapter; |
1367 | struct sge_port_stats *st; | 1367 | struct sge_port_stats *st; |
1368 | 1368 | ||
1369 | skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad, | 1369 | skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad); |
1370 | sge->rx_pkt_pad, 2, SGE_RX_COPY_THRES, | ||
1371 | SGE_RX_DROP_THRES); | ||
1372 | if (unlikely(!skb)) { | 1370 | if (unlikely(!skb)) { |
1373 | sge->stats.rx_drops++; | 1371 | sge->stats.rx_drops++; |
1374 | return 0; | 1372 | return; |
1375 | } | 1373 | } |
1376 | 1374 | ||
1377 | p = (struct cpl_rx_pkt *)skb->data; | 1375 | p = (const struct cpl_rx_pkt *) skb->data; |
1378 | skb_pull(skb, sizeof(*p)); | ||
1379 | if (p->iff >= adapter->params.nports) { | 1376 | if (p->iff >= adapter->params.nports) { |
1380 | kfree_skb(skb); | 1377 | kfree_skb(skb); |
1381 | return 0; | 1378 | return; |
1382 | } | 1379 | } |
1380 | __skb_pull(skb, sizeof(*p)); | ||
1383 | 1381 | ||
1384 | skb->dev = adapter->port[p->iff].dev; | 1382 | skb->dev = adapter->port[p->iff].dev; |
1385 | skb->dev->last_rx = jiffies; | 1383 | skb->dev->last_rx = jiffies; |
@@ -1411,7 +1409,6 @@ static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) | |||
1411 | netif_rx(skb); | 1409 | netif_rx(skb); |
1412 | #endif | 1410 | #endif |
1413 | } | 1411 | } |
1414 | return 0; | ||
1415 | } | 1412 | } |
1416 | 1413 | ||
1417 | /* | 1414 | /* |
@@ -1493,12 +1490,11 @@ static int process_responses(struct adapter *adapter, int budget) | |||
1493 | struct sge *sge = adapter->sge; | 1490 | struct sge *sge = adapter->sge; |
1494 | struct respQ *q = &sge->respQ; | 1491 | struct respQ *q = &sge->respQ; |
1495 | struct respQ_e *e = &q->entries[q->cidx]; | 1492 | struct respQ_e *e = &q->entries[q->cidx]; |
1496 | int budget_left = budget; | 1493 | int done = 0; |
1497 | unsigned int flags = 0; | 1494 | unsigned int flags = 0; |
1498 | unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; | 1495 | unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; |
1499 | 1496 | ||
1500 | 1497 | while (done < budget && e->GenerationBit == q->genbit) { | |
1501 | while (likely(budget_left && e->GenerationBit == q->genbit)) { | ||
1502 | flags |= e->Qsleeping; | 1498 | flags |= e->Qsleeping; |
1503 | 1499 | ||
1504 | cmdq_processed[0] += e->Cmdq0CreditReturn; | 1500 | cmdq_processed[0] += e->Cmdq0CreditReturn; |
@@ -1508,14 +1504,16 @@ static int process_responses(struct adapter *adapter, int budget) | |||
1508 | * ping-pong of TX state information on MP where the sender | 1504 | * ping-pong of TX state information on MP where the sender |
1509 | * might run on a different CPU than this function... | 1505 | * might run on a different CPU than this function... |
1510 | */ | 1506 | */ |
1511 | if (unlikely(flags & F_CMDQ0_ENABLE || cmdq_processed[0] > 64)) { | 1507 | if (unlikely((flags & F_CMDQ0_ENABLE) || cmdq_processed[0] > 64)) { |
1512 | flags = update_tx_info(adapter, flags, cmdq_processed[0]); | 1508 | flags = update_tx_info(adapter, flags, cmdq_processed[0]); |
1513 | cmdq_processed[0] = 0; | 1509 | cmdq_processed[0] = 0; |
1514 | } | 1510 | } |
1511 | |||
1515 | if (unlikely(cmdq_processed[1] > 16)) { | 1512 | if (unlikely(cmdq_processed[1] > 16)) { |
1516 | sge->cmdQ[1].processed += cmdq_processed[1]; | 1513 | sge->cmdQ[1].processed += cmdq_processed[1]; |
1517 | cmdq_processed[1] = 0; | 1514 | cmdq_processed[1] = 0; |
1518 | } | 1515 | } |
1516 | |||
1519 | if (likely(e->DataValid)) { | 1517 | if (likely(e->DataValid)) { |
1520 | struct freelQ *fl = &sge->freelQ[e->FreelistQid]; | 1518 | struct freelQ *fl = &sge->freelQ[e->FreelistQid]; |
1521 | 1519 | ||
@@ -1525,12 +1523,16 @@ static int process_responses(struct adapter *adapter, int budget) | |||
1525 | else | 1523 | else |
1526 | sge_rx(sge, fl, e->BufferLength); | 1524 | sge_rx(sge, fl, e->BufferLength); |
1527 | 1525 | ||
1526 | ++done; | ||
1527 | |||
1528 | /* | 1528 | /* |
1529 | * Note: this depends on each packet consuming a | 1529 | * Note: this depends on each packet consuming a |
1530 | * single free-list buffer; cf. the BUG above. | 1530 | * single free-list buffer; cf. the BUG above. |
1531 | */ | 1531 | */ |
1532 | if (++fl->cidx == fl->size) | 1532 | if (++fl->cidx == fl->size) |
1533 | fl->cidx = 0; | 1533 | fl->cidx = 0; |
1534 | prefetch(fl->centries[fl->cidx].skb); | ||
1535 | |||
1534 | if (unlikely(--fl->credits < | 1536 | if (unlikely(--fl->credits < |
1535 | fl->size - SGE_FREEL_REFILL_THRESH)) | 1537 | fl->size - SGE_FREEL_REFILL_THRESH)) |
1536 | refill_free_list(sge, fl); | 1538 | refill_free_list(sge, fl); |
@@ -1549,14 +1551,12 @@ static int process_responses(struct adapter *adapter, int budget) | |||
1549 | writel(q->credits, adapter->regs + A_SG_RSPQUEUECREDIT); | 1551 | writel(q->credits, adapter->regs + A_SG_RSPQUEUECREDIT); |
1550 | q->credits = 0; | 1552 | q->credits = 0; |
1551 | } | 1553 | } |
1552 | --budget_left; | ||
1553 | } | 1554 | } |
1554 | 1555 | ||
1555 | flags = update_tx_info(adapter, flags, cmdq_processed[0]); | 1556 | flags = update_tx_info(adapter, flags, cmdq_processed[0]); |
1556 | sge->cmdQ[1].processed += cmdq_processed[1]; | 1557 | sge->cmdQ[1].processed += cmdq_processed[1]; |
1557 | 1558 | ||
1558 | budget -= budget_left; | 1559 | return done; |
1559 | return budget; | ||
1560 | } | 1560 | } |
1561 | 1561 | ||
1562 | static inline int responses_pending(const struct adapter *adapter) | 1562 | static inline int responses_pending(const struct adapter *adapter) |
@@ -1581,11 +1581,14 @@ static int process_pure_responses(struct adapter *adapter) | |||
1581 | struct sge *sge = adapter->sge; | 1581 | struct sge *sge = adapter->sge; |
1582 | struct respQ *q = &sge->respQ; | 1582 | struct respQ *q = &sge->respQ; |
1583 | struct respQ_e *e = &q->entries[q->cidx]; | 1583 | struct respQ_e *e = &q->entries[q->cidx]; |
1584 | const struct freelQ *fl = &sge->freelQ[e->FreelistQid]; | ||
1584 | unsigned int flags = 0; | 1585 | unsigned int flags = 0; |
1585 | unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; | 1586 | unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; |
1586 | 1587 | ||
1588 | prefetch(fl->centries[fl->cidx].skb); | ||
1587 | if (e->DataValid) | 1589 | if (e->DataValid) |
1588 | return 1; | 1590 | return 1; |
1591 | |||
1589 | do { | 1592 | do { |
1590 | flags |= e->Qsleeping; | 1593 | flags |= e->Qsleeping; |
1591 | 1594 | ||