aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@osdl.org>2007-01-08 14:26:12 -0500
committerJeff Garzik <jeff@garzik.org>2007-02-05 16:58:47 -0500
commit24a427cf76984726641ea0d8163e61e99119069d (patch)
treee5c31c2ed4a606776231acbe9c9b5d57f8b85c0a
parent3de00b8958b12d62712ae9500968c65d3b43bb27 (diff)
chelsio: more rx speedup
Cleanup receive processing some more: * do the reserve padding of skb during setup * don't pass constants to get_packet * do smart prefetch of skb * make copybreak a module parameter Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: Jeff Garzik <jeff@garzik.org>
-rw-r--r--drivers/net/chelsio/sge.c87
1 files changed, 45 insertions, 42 deletions
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 8e287e79e4e3..a15611925338 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -71,12 +71,9 @@
71#define SGE_FREEL_REFILL_THRESH 16 71#define SGE_FREEL_REFILL_THRESH 16
72#define SGE_RESPQ_E_N 1024 72#define SGE_RESPQ_E_N 1024
73#define SGE_INTRTIMER_NRES 1000 73#define SGE_INTRTIMER_NRES 1000
74#define SGE_RX_COPY_THRES 256
75#define SGE_RX_SM_BUF_SIZE 1536 74#define SGE_RX_SM_BUF_SIZE 1536
76#define SGE_TX_DESC_MAX_PLEN 16384 75#define SGE_TX_DESC_MAX_PLEN 16384
77 76
78# define SGE_RX_DROP_THRES 2
79
80#define SGE_RESPQ_REPLENISH_THRES (SGE_RESPQ_E_N / 4) 77#define SGE_RESPQ_REPLENISH_THRES (SGE_RESPQ_E_N / 4)
81 78
82/* 79/*
@@ -846,6 +843,8 @@ static void refill_free_list(struct sge *sge, struct freelQ *q)
846 skb_reserve(skb, q->dma_offset); 843 skb_reserve(skb, q->dma_offset);
847 mapping = pci_map_single(pdev, skb->data, dma_len, 844 mapping = pci_map_single(pdev, skb->data, dma_len,
848 PCI_DMA_FROMDEVICE); 845 PCI_DMA_FROMDEVICE);
846 skb_reserve(skb, sge->rx_pkt_pad);
847
849 ce->skb = skb; 848 ce->skb = skb;
850 pci_unmap_addr_set(ce, dma_addr, mapping); 849 pci_unmap_addr_set(ce, dma_addr, mapping);
851 pci_unmap_len_set(ce, dma_len, dma_len); 850 pci_unmap_len_set(ce, dma_len, dma_len);
@@ -1024,6 +1023,10 @@ static void recycle_fl_buf(struct freelQ *fl, int idx)
1024 } 1023 }
1025} 1024}
1026 1025
1026static int copybreak __read_mostly = 256;
1027module_param(copybreak, int, 0);
1028MODULE_PARM_DESC(copybreak, "Receive copy threshold");
1029
1027/** 1030/**
1028 * get_packet - return the next ingress packet buffer 1031 * get_packet - return the next ingress packet buffer
1029 * @pdev: the PCI device that received the packet 1032 * @pdev: the PCI device that received the packet
@@ -1043,45 +1046,42 @@ static void recycle_fl_buf(struct freelQ *fl, int idx)
1043 * be copied but there is no memory for the copy. 1046 * be copied but there is no memory for the copy.
1044 */ 1047 */
1045static inline struct sk_buff *get_packet(struct pci_dev *pdev, 1048static inline struct sk_buff *get_packet(struct pci_dev *pdev,
1046 struct freelQ *fl, unsigned int len, 1049 struct freelQ *fl, unsigned int len)
1047 int dma_pad, int skb_pad,
1048 unsigned int copy_thres,
1049 unsigned int drop_thres)
1050{ 1050{
1051 struct sk_buff *skb; 1051 struct sk_buff *skb;
1052 struct freelQ_ce *ce = &fl->centries[fl->cidx]; 1052 const struct freelQ_ce *ce = &fl->centries[fl->cidx];
1053 1053
1054 if (len < copy_thres) { 1054 if (len < copybreak) {
1055 skb = alloc_skb(len + skb_pad, GFP_ATOMIC); 1055 skb = alloc_skb(len + 2, GFP_ATOMIC);
1056 if (likely(skb != NULL)) { 1056 if (!skb)
1057 skb_reserve(skb, skb_pad); 1057 goto use_orig_buf;
1058 skb_put(skb, len); 1058
1059 pci_dma_sync_single_for_cpu(pdev, 1059 skb_reserve(skb, 2); /* align IP header */
1060 pci_unmap_addr(ce, dma_addr), 1060 skb_put(skb, len);
1061 pci_unmap_len(ce, dma_len), 1061 pci_dma_sync_single_for_cpu(pdev,
1062 PCI_DMA_FROMDEVICE);
1063 memcpy(skb->data, ce->skb->data + dma_pad, len);
1064 pci_dma_sync_single_for_device(pdev,
1065 pci_unmap_addr(ce, dma_addr), 1062 pci_unmap_addr(ce, dma_addr),
1066 pci_unmap_len(ce, dma_len), 1063 pci_unmap_len(ce, dma_len),
1067 PCI_DMA_FROMDEVICE); 1064 PCI_DMA_FROMDEVICE);
1068 } else if (!drop_thres) 1065 memcpy(skb->data, ce->skb->data, len);
1069 goto use_orig_buf; 1066 pci_dma_sync_single_for_device(pdev,
1070 1067 pci_unmap_addr(ce, dma_addr),
1068 pci_unmap_len(ce, dma_len),
1069 PCI_DMA_FROMDEVICE);
1071 recycle_fl_buf(fl, fl->cidx); 1070 recycle_fl_buf(fl, fl->cidx);
1072 return skb; 1071 return skb;
1073 } 1072 }
1074 1073
1075 if (fl->credits < drop_thres) { 1074use_orig_buf:
1075 if (fl->credits < 2) {
1076 recycle_fl_buf(fl, fl->cidx); 1076 recycle_fl_buf(fl, fl->cidx);
1077 return NULL; 1077 return NULL;
1078 } 1078 }
1079 1079
1080use_orig_buf:
1081 pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), 1080 pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr),
1082 pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); 1081 pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE);
1083 skb = ce->skb; 1082 skb = ce->skb;
1084 skb_reserve(skb, dma_pad); 1083 prefetch(skb->data);
1084
1085 skb_put(skb, len); 1085 skb_put(skb, len);
1086 return skb; 1086 return skb;
1087} 1087}
@@ -1359,27 +1359,25 @@ static void restart_sched(unsigned long arg)
1359 * 1359 *
1360 * Process an ingress ethernet pakcet and deliver it to the stack. 1360 * Process an ingress ethernet pakcet and deliver it to the stack.
1361 */ 1361 */
1362static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) 1362static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
1363{ 1363{
1364 struct sk_buff *skb; 1364 struct sk_buff *skb;
1365 struct cpl_rx_pkt *p; 1365 const struct cpl_rx_pkt *p;
1366 struct adapter *adapter = sge->adapter; 1366 struct adapter *adapter = sge->adapter;
1367 struct sge_port_stats *st; 1367 struct sge_port_stats *st;
1368 1368
1369 skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad, 1369 skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad);
1370 sge->rx_pkt_pad, 2, SGE_RX_COPY_THRES,
1371 SGE_RX_DROP_THRES);
1372 if (unlikely(!skb)) { 1370 if (unlikely(!skb)) {
1373 sge->stats.rx_drops++; 1371 sge->stats.rx_drops++;
1374 return 0; 1372 return;
1375 } 1373 }
1376 1374
1377 p = (struct cpl_rx_pkt *)skb->data; 1375 p = (const struct cpl_rx_pkt *) skb->data;
1378 skb_pull(skb, sizeof(*p));
1379 if (p->iff >= adapter->params.nports) { 1376 if (p->iff >= adapter->params.nports) {
1380 kfree_skb(skb); 1377 kfree_skb(skb);
1381 return 0; 1378 return;
1382 } 1379 }
1380 __skb_pull(skb, sizeof(*p));
1383 1381
1384 skb->dev = adapter->port[p->iff].dev; 1382 skb->dev = adapter->port[p->iff].dev;
1385 skb->dev->last_rx = jiffies; 1383 skb->dev->last_rx = jiffies;
@@ -1411,7 +1409,6 @@ static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
1411 netif_rx(skb); 1409 netif_rx(skb);
1412#endif 1410#endif
1413 } 1411 }
1414 return 0;
1415} 1412}
1416 1413
1417/* 1414/*
@@ -1493,12 +1490,11 @@ static int process_responses(struct adapter *adapter, int budget)
1493 struct sge *sge = adapter->sge; 1490 struct sge *sge = adapter->sge;
1494 struct respQ *q = &sge->respQ; 1491 struct respQ *q = &sge->respQ;
1495 struct respQ_e *e = &q->entries[q->cidx]; 1492 struct respQ_e *e = &q->entries[q->cidx];
1496 int budget_left = budget; 1493 int done = 0;
1497 unsigned int flags = 0; 1494 unsigned int flags = 0;
1498 unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; 1495 unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0};
1499 1496
1500 1497 while (done < budget && e->GenerationBit == q->genbit) {
1501 while (likely(budget_left && e->GenerationBit == q->genbit)) {
1502 flags |= e->Qsleeping; 1498 flags |= e->Qsleeping;
1503 1499
1504 cmdq_processed[0] += e->Cmdq0CreditReturn; 1500 cmdq_processed[0] += e->Cmdq0CreditReturn;
@@ -1508,14 +1504,16 @@ static int process_responses(struct adapter *adapter, int budget)
1508 * ping-pong of TX state information on MP where the sender 1504 * ping-pong of TX state information on MP where the sender
1509 * might run on a different CPU than this function... 1505 * might run on a different CPU than this function...
1510 */ 1506 */
1511 if (unlikely(flags & F_CMDQ0_ENABLE || cmdq_processed[0] > 64)) { 1507 if (unlikely((flags & F_CMDQ0_ENABLE) || cmdq_processed[0] > 64)) {
1512 flags = update_tx_info(adapter, flags, cmdq_processed[0]); 1508 flags = update_tx_info(adapter, flags, cmdq_processed[0]);
1513 cmdq_processed[0] = 0; 1509 cmdq_processed[0] = 0;
1514 } 1510 }
1511
1515 if (unlikely(cmdq_processed[1] > 16)) { 1512 if (unlikely(cmdq_processed[1] > 16)) {
1516 sge->cmdQ[1].processed += cmdq_processed[1]; 1513 sge->cmdQ[1].processed += cmdq_processed[1];
1517 cmdq_processed[1] = 0; 1514 cmdq_processed[1] = 0;
1518 } 1515 }
1516
1519 if (likely(e->DataValid)) { 1517 if (likely(e->DataValid)) {
1520 struct freelQ *fl = &sge->freelQ[e->FreelistQid]; 1518 struct freelQ *fl = &sge->freelQ[e->FreelistQid];
1521 1519
@@ -1525,12 +1523,16 @@ static int process_responses(struct adapter *adapter, int budget)
1525 else 1523 else
1526 sge_rx(sge, fl, e->BufferLength); 1524 sge_rx(sge, fl, e->BufferLength);
1527 1525
1526 ++done;
1527
1528 /* 1528 /*
1529 * Note: this depends on each packet consuming a 1529 * Note: this depends on each packet consuming a
1530 * single free-list buffer; cf. the BUG above. 1530 * single free-list buffer; cf. the BUG above.
1531 */ 1531 */
1532 if (++fl->cidx == fl->size) 1532 if (++fl->cidx == fl->size)
1533 fl->cidx = 0; 1533 fl->cidx = 0;
1534 prefetch(fl->centries[fl->cidx].skb);
1535
1534 if (unlikely(--fl->credits < 1536 if (unlikely(--fl->credits <
1535 fl->size - SGE_FREEL_REFILL_THRESH)) 1537 fl->size - SGE_FREEL_REFILL_THRESH))
1536 refill_free_list(sge, fl); 1538 refill_free_list(sge, fl);
@@ -1549,14 +1551,12 @@ static int process_responses(struct adapter *adapter, int budget)
1549 writel(q->credits, adapter->regs + A_SG_RSPQUEUECREDIT); 1551 writel(q->credits, adapter->regs + A_SG_RSPQUEUECREDIT);
1550 q->credits = 0; 1552 q->credits = 0;
1551 } 1553 }
1552 --budget_left;
1553 } 1554 }
1554 1555
1555 flags = update_tx_info(adapter, flags, cmdq_processed[0]); 1556 flags = update_tx_info(adapter, flags, cmdq_processed[0]);
1556 sge->cmdQ[1].processed += cmdq_processed[1]; 1557 sge->cmdQ[1].processed += cmdq_processed[1];
1557 1558
1558 budget -= budget_left; 1559 return done;
1559 return budget;
1560} 1560}
1561 1561
1562static inline int responses_pending(const struct adapter *adapter) 1562static inline int responses_pending(const struct adapter *adapter)
@@ -1581,11 +1581,14 @@ static int process_pure_responses(struct adapter *adapter)
1581 struct sge *sge = adapter->sge; 1581 struct sge *sge = adapter->sge;
1582 struct respQ *q = &sge->respQ; 1582 struct respQ *q = &sge->respQ;
1583 struct respQ_e *e = &q->entries[q->cidx]; 1583 struct respQ_e *e = &q->entries[q->cidx];
1584 const struct freelQ *fl = &sge->freelQ[e->FreelistQid];
1584 unsigned int flags = 0; 1585 unsigned int flags = 0;
1585 unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; 1586 unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0};
1586 1587
1588 prefetch(fl->centries[fl->cidx].skb);
1587 if (e->DataValid) 1589 if (e->DataValid)
1588 return 1; 1590 return 1;
1591
1589 do { 1592 do {
1590 flags |= e->Qsleeping; 1593 flags |= e->Qsleeping;
1591 1594