aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/chelsio/sge.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/chelsio/sge.c')
-rw-r--r--drivers/net/chelsio/sge.c328
1 files changed, 155 insertions, 173 deletions
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 659cb2252e44..89a682702fa9 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -71,12 +71,9 @@
71#define SGE_FREEL_REFILL_THRESH 16 71#define SGE_FREEL_REFILL_THRESH 16
72#define SGE_RESPQ_E_N 1024 72#define SGE_RESPQ_E_N 1024
73#define SGE_INTRTIMER_NRES 1000 73#define SGE_INTRTIMER_NRES 1000
74#define SGE_RX_COPY_THRES 256
75#define SGE_RX_SM_BUF_SIZE 1536 74#define SGE_RX_SM_BUF_SIZE 1536
76#define SGE_TX_DESC_MAX_PLEN 16384 75#define SGE_TX_DESC_MAX_PLEN 16384
77 76
78# define SGE_RX_DROP_THRES 2
79
80#define SGE_RESPQ_REPLENISH_THRES (SGE_RESPQ_E_N / 4) 77#define SGE_RESPQ_REPLENISH_THRES (SGE_RESPQ_E_N / 4)
81 78
82/* 79/*
@@ -85,10 +82,6 @@
85 */ 82 */
86#define TX_RECLAIM_PERIOD (HZ / 4) 83#define TX_RECLAIM_PERIOD (HZ / 4)
87 84
88#ifndef NET_IP_ALIGN
89# define NET_IP_ALIGN 2
90#endif
91
92#define M_CMD_LEN 0x7fffffff 85#define M_CMD_LEN 0x7fffffff
93#define V_CMD_LEN(v) (v) 86#define V_CMD_LEN(v) (v)
94#define G_CMD_LEN(v) ((v) & M_CMD_LEN) 87#define G_CMD_LEN(v) ((v) & M_CMD_LEN)
@@ -195,7 +188,7 @@ struct cmdQ {
195 struct cmdQ_e *entries; /* HW command descriptor Q */ 188 struct cmdQ_e *entries; /* HW command descriptor Q */
196 struct cmdQ_ce *centries; /* SW command context descriptor Q */ 189 struct cmdQ_ce *centries; /* SW command context descriptor Q */
197 dma_addr_t dma_addr; /* DMA addr HW command descriptor Q */ 190 dma_addr_t dma_addr; /* DMA addr HW command descriptor Q */
198 spinlock_t lock; /* Lock to protect cmdQ enqueuing */ 191 spinlock_t lock; /* Lock to protect cmdQ enqueuing */
199}; 192};
200 193
201struct freelQ { 194struct freelQ {
@@ -241,9 +234,9 @@ struct sched_port {
241/* Per T204 device */ 234/* Per T204 device */
242struct sched { 235struct sched {
243 ktime_t last_updated; /* last time quotas were computed */ 236 ktime_t last_updated; /* last time quotas were computed */
244 unsigned int max_avail; /* max bits to be sent to any port */ 237 unsigned int max_avail; /* max bits to be sent to any port */
245 unsigned int port; /* port index (round robin ports) */ 238 unsigned int port; /* port index (round robin ports) */
246 unsigned int num; /* num skbs in per port queues */ 239 unsigned int num; /* num skbs in per port queues */
247 struct sched_port p[MAX_NPORTS]; 240 struct sched_port p[MAX_NPORTS];
248 struct tasklet_struct sched_tsk;/* tasklet used to run scheduler */ 241 struct tasklet_struct sched_tsk;/* tasklet used to run scheduler */
249}; 242};
@@ -259,10 +252,10 @@ static void restart_sched(unsigned long);
259 * contention. 252 * contention.
260 */ 253 */
261struct sge { 254struct sge {
262 struct adapter *adapter; /* adapter backpointer */ 255 struct adapter *adapter; /* adapter backpointer */
263 struct net_device *netdev; /* netdevice backpointer */ 256 struct net_device *netdev; /* netdevice backpointer */
264 struct freelQ freelQ[SGE_FREELQ_N]; /* buffer free lists */ 257 struct freelQ freelQ[SGE_FREELQ_N]; /* buffer free lists */
265 struct respQ respQ; /* response Q */ 258 struct respQ respQ; /* response Q */
266 unsigned long stopped_tx_queues; /* bitmap of suspended Tx queues */ 259 unsigned long stopped_tx_queues; /* bitmap of suspended Tx queues */
267 unsigned int rx_pkt_pad; /* RX padding for L2 packets */ 260 unsigned int rx_pkt_pad; /* RX padding for L2 packets */
268 unsigned int jumbo_fl; /* jumbo freelist Q index */ 261 unsigned int jumbo_fl; /* jumbo freelist Q index */
@@ -460,7 +453,7 @@ static struct sk_buff *sched_skb(struct sge *sge, struct sk_buff *skb,
460 if (credits < MAX_SKB_FRAGS + 1) 453 if (credits < MAX_SKB_FRAGS + 1)
461 goto out; 454 goto out;
462 455
463 again: 456again:
464 for (i = 0; i < MAX_NPORTS; i++) { 457 for (i = 0; i < MAX_NPORTS; i++) {
465 s->port = ++s->port & (MAX_NPORTS - 1); 458 s->port = ++s->port & (MAX_NPORTS - 1);
466 skbq = &s->p[s->port].skbq; 459 skbq = &s->p[s->port].skbq;
@@ -483,8 +476,8 @@ static struct sk_buff *sched_skb(struct sge *sge, struct sk_buff *skb,
483 if (update-- && sched_update_avail(sge)) 476 if (update-- && sched_update_avail(sge))
484 goto again; 477 goto again;
485 478
486 out: 479out:
487 /* If there are more pending skbs, we use the hardware to schedule us 480 /* If there are more pending skbs, we use the hardware to schedule us
488 * again. 481 * again.
489 */ 482 */
490 if (s->num && !skb) { 483 if (s->num && !skb) {
@@ -575,11 +568,10 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p)
575 q->size = p->freelQ_size[i]; 568 q->size = p->freelQ_size[i];
576 q->dma_offset = sge->rx_pkt_pad ? 0 : NET_IP_ALIGN; 569 q->dma_offset = sge->rx_pkt_pad ? 0 : NET_IP_ALIGN;
577 size = sizeof(struct freelQ_e) * q->size; 570 size = sizeof(struct freelQ_e) * q->size;
578 q->entries = (struct freelQ_e *) 571 q->entries = pci_alloc_consistent(pdev, size, &q->dma_addr);
579 pci_alloc_consistent(pdev, size, &q->dma_addr);
580 if (!q->entries) 572 if (!q->entries)
581 goto err_no_mem; 573 goto err_no_mem;
582 memset(q->entries, 0, size); 574
583 size = sizeof(struct freelQ_ce) * q->size; 575 size = sizeof(struct freelQ_ce) * q->size;
584 q->centries = kzalloc(size, GFP_KERNEL); 576 q->centries = kzalloc(size, GFP_KERNEL);
585 if (!q->centries) 577 if (!q->centries)
@@ -613,11 +605,10 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p)
613 sge->respQ.size = SGE_RESPQ_E_N; 605 sge->respQ.size = SGE_RESPQ_E_N;
614 sge->respQ.credits = 0; 606 sge->respQ.credits = 0;
615 size = sizeof(struct respQ_e) * sge->respQ.size; 607 size = sizeof(struct respQ_e) * sge->respQ.size;
616 sge->respQ.entries = (struct respQ_e *) 608 sge->respQ.entries =
617 pci_alloc_consistent(pdev, size, &sge->respQ.dma_addr); 609 pci_alloc_consistent(pdev, size, &sge->respQ.dma_addr);
618 if (!sge->respQ.entries) 610 if (!sge->respQ.entries)
619 goto err_no_mem; 611 goto err_no_mem;
620 memset(sge->respQ.entries, 0, size);
621 return 0; 612 return 0;
622 613
623err_no_mem: 614err_no_mem:
@@ -637,20 +628,12 @@ static void free_cmdQ_buffers(struct sge *sge, struct cmdQ *q, unsigned int n)
637 q->in_use -= n; 628 q->in_use -= n;
638 ce = &q->centries[cidx]; 629 ce = &q->centries[cidx];
639 while (n--) { 630 while (n--) {
640 if (q->sop) { 631 if (likely(pci_unmap_len(ce, dma_len))) {
641 if (likely(pci_unmap_len(ce, dma_len))) { 632 pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr),
642 pci_unmap_single(pdev, 633 pci_unmap_len(ce, dma_len),
643 pci_unmap_addr(ce, dma_addr), 634 PCI_DMA_TODEVICE);
644 pci_unmap_len(ce, dma_len), 635 if (q->sop)
645 PCI_DMA_TODEVICE);
646 q->sop = 0; 636 q->sop = 0;
647 }
648 } else {
649 if (likely(pci_unmap_len(ce, dma_len))) {
650 pci_unmap_page(pdev, pci_unmap_addr(ce, dma_addr),
651 pci_unmap_len(ce, dma_len),
652 PCI_DMA_TODEVICE);
653 }
654 } 637 }
655 if (ce->skb) { 638 if (ce->skb) {
656 dev_kfree_skb_any(ce->skb); 639 dev_kfree_skb_any(ce->skb);
@@ -711,11 +694,10 @@ static int alloc_tx_resources(struct sge *sge, struct sge_params *p)
711 q->stop_thres = 0; 694 q->stop_thres = 0;
712 spin_lock_init(&q->lock); 695 spin_lock_init(&q->lock);
713 size = sizeof(struct cmdQ_e) * q->size; 696 size = sizeof(struct cmdQ_e) * q->size;
714 q->entries = (struct cmdQ_e *) 697 q->entries = pci_alloc_consistent(pdev, size, &q->dma_addr);
715 pci_alloc_consistent(pdev, size, &q->dma_addr);
716 if (!q->entries) 698 if (!q->entries)
717 goto err_no_mem; 699 goto err_no_mem;
718 memset(q->entries, 0, size); 700
719 size = sizeof(struct cmdQ_ce) * q->size; 701 size = sizeof(struct cmdQ_ce) * q->size;
720 q->centries = kzalloc(size, GFP_KERNEL); 702 q->centries = kzalloc(size, GFP_KERNEL);
721 if (!q->centries) 703 if (!q->centries)
@@ -770,7 +752,7 @@ void t1_set_vlan_accel(struct adapter *adapter, int on_off)
770static void configure_sge(struct sge *sge, struct sge_params *p) 752static void configure_sge(struct sge *sge, struct sge_params *p)
771{ 753{
772 struct adapter *ap = sge->adapter; 754 struct adapter *ap = sge->adapter;
773 755
774 writel(0, ap->regs + A_SG_CONTROL); 756 writel(0, ap->regs + A_SG_CONTROL);
775 setup_ring_params(ap, sge->cmdQ[0].dma_addr, sge->cmdQ[0].size, 757 setup_ring_params(ap, sge->cmdQ[0].dma_addr, sge->cmdQ[0].size,
776 A_SG_CMD0BASELWR, A_SG_CMD0BASEUPR, A_SG_CMD0SIZE); 758 A_SG_CMD0BASELWR, A_SG_CMD0BASEUPR, A_SG_CMD0SIZE);
@@ -850,7 +832,6 @@ static void refill_free_list(struct sge *sge, struct freelQ *q)
850 struct freelQ_e *e = &q->entries[q->pidx]; 832 struct freelQ_e *e = &q->entries[q->pidx];
851 unsigned int dma_len = q->rx_buffer_size - q->dma_offset; 833 unsigned int dma_len = q->rx_buffer_size - q->dma_offset;
852 834
853
854 while (q->credits < q->size) { 835 while (q->credits < q->size) {
855 struct sk_buff *skb; 836 struct sk_buff *skb;
856 dma_addr_t mapping; 837 dma_addr_t mapping;
@@ -862,6 +843,8 @@ static void refill_free_list(struct sge *sge, struct freelQ *q)
862 skb_reserve(skb, q->dma_offset); 843 skb_reserve(skb, q->dma_offset);
863 mapping = pci_map_single(pdev, skb->data, dma_len, 844 mapping = pci_map_single(pdev, skb->data, dma_len,
864 PCI_DMA_FROMDEVICE); 845 PCI_DMA_FROMDEVICE);
846 skb_reserve(skb, sge->rx_pkt_pad);
847
865 ce->skb = skb; 848 ce->skb = skb;
866 pci_unmap_addr_set(ce, dma_addr, mapping); 849 pci_unmap_addr_set(ce, dma_addr, mapping);
867 pci_unmap_len_set(ce, dma_len, dma_len); 850 pci_unmap_len_set(ce, dma_len, dma_len);
@@ -881,7 +864,6 @@ static void refill_free_list(struct sge *sge, struct freelQ *q)
881 } 864 }
882 q->credits++; 865 q->credits++;
883 } 866 }
884
885} 867}
886 868
887/* 869/*
@@ -1041,6 +1023,10 @@ static void recycle_fl_buf(struct freelQ *fl, int idx)
1041 } 1023 }
1042} 1024}
1043 1025
1026static int copybreak __read_mostly = 256;
1027module_param(copybreak, int, 0);
1028MODULE_PARM_DESC(copybreak, "Receive copy threshold");
1029
1044/** 1030/**
1045 * get_packet - return the next ingress packet buffer 1031 * get_packet - return the next ingress packet buffer
1046 * @pdev: the PCI device that received the packet 1032 * @pdev: the PCI device that received the packet
@@ -1060,45 +1046,42 @@ static void recycle_fl_buf(struct freelQ *fl, int idx)
1060 * be copied but there is no memory for the copy. 1046 * be copied but there is no memory for the copy.
1061 */ 1047 */
1062static inline struct sk_buff *get_packet(struct pci_dev *pdev, 1048static inline struct sk_buff *get_packet(struct pci_dev *pdev,
1063 struct freelQ *fl, unsigned int len, 1049 struct freelQ *fl, unsigned int len)
1064 int dma_pad, int skb_pad,
1065 unsigned int copy_thres,
1066 unsigned int drop_thres)
1067{ 1050{
1068 struct sk_buff *skb; 1051 struct sk_buff *skb;
1069 struct freelQ_ce *ce = &fl->centries[fl->cidx]; 1052 const struct freelQ_ce *ce = &fl->centries[fl->cidx];
1070 1053
1071 if (len < copy_thres) { 1054 if (len < copybreak) {
1072 skb = alloc_skb(len + skb_pad, GFP_ATOMIC); 1055 skb = alloc_skb(len + 2, GFP_ATOMIC);
1073 if (likely(skb != NULL)) { 1056 if (!skb)
1074 skb_reserve(skb, skb_pad);
1075 skb_put(skb, len);
1076 pci_dma_sync_single_for_cpu(pdev,
1077 pci_unmap_addr(ce, dma_addr),
1078 pci_unmap_len(ce, dma_len),
1079 PCI_DMA_FROMDEVICE);
1080 memcpy(skb->data, ce->skb->data + dma_pad, len);
1081 pci_dma_sync_single_for_device(pdev,
1082 pci_unmap_addr(ce, dma_addr),
1083 pci_unmap_len(ce, dma_len),
1084 PCI_DMA_FROMDEVICE);
1085 } else if (!drop_thres)
1086 goto use_orig_buf; 1057 goto use_orig_buf;
1087 1058
1059 skb_reserve(skb, 2); /* align IP header */
1060 skb_put(skb, len);
1061 pci_dma_sync_single_for_cpu(pdev,
1062 pci_unmap_addr(ce, dma_addr),
1063 pci_unmap_len(ce, dma_len),
1064 PCI_DMA_FROMDEVICE);
1065 memcpy(skb->data, ce->skb->data, len);
1066 pci_dma_sync_single_for_device(pdev,
1067 pci_unmap_addr(ce, dma_addr),
1068 pci_unmap_len(ce, dma_len),
1069 PCI_DMA_FROMDEVICE);
1088 recycle_fl_buf(fl, fl->cidx); 1070 recycle_fl_buf(fl, fl->cidx);
1089 return skb; 1071 return skb;
1090 } 1072 }
1091 1073
1092 if (fl->credits < drop_thres) { 1074use_orig_buf:
1075 if (fl->credits < 2) {
1093 recycle_fl_buf(fl, fl->cidx); 1076 recycle_fl_buf(fl, fl->cidx);
1094 return NULL; 1077 return NULL;
1095 } 1078 }
1096 1079
1097use_orig_buf:
1098 pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), 1080 pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr),
1099 pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); 1081 pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE);
1100 skb = ce->skb; 1082 skb = ce->skb;
1101 skb_reserve(skb, dma_pad); 1083 prefetch(skb->data);
1084
1102 skb_put(skb, len); 1085 skb_put(skb, len);
1103 return skb; 1086 return skb;
1104} 1087}
@@ -1137,6 +1120,7 @@ static void unexpected_offload(struct adapter *adapter, struct freelQ *fl)
1137static inline unsigned int compute_large_page_tx_descs(struct sk_buff *skb) 1120static inline unsigned int compute_large_page_tx_descs(struct sk_buff *skb)
1138{ 1121{
1139 unsigned int count = 0; 1122 unsigned int count = 0;
1123
1140 if (PAGE_SIZE > SGE_TX_DESC_MAX_PLEN) { 1124 if (PAGE_SIZE > SGE_TX_DESC_MAX_PLEN) {
1141 unsigned int nfrags = skb_shinfo(skb)->nr_frags; 1125 unsigned int nfrags = skb_shinfo(skb)->nr_frags;
1142 unsigned int i, len = skb->len - skb->data_len; 1126 unsigned int i, len = skb->len - skb->data_len;
@@ -1343,7 +1327,7 @@ static void restart_sched(unsigned long arg)
1343 while ((skb = sched_skb(sge, NULL, credits)) != NULL) { 1327 while ((skb = sched_skb(sge, NULL, credits)) != NULL) {
1344 unsigned int genbit, pidx, count; 1328 unsigned int genbit, pidx, count;
1345 count = 1 + skb_shinfo(skb)->nr_frags; 1329 count = 1 + skb_shinfo(skb)->nr_frags;
1346 count += compute_large_page_tx_descs(skb); 1330 count += compute_large_page_tx_descs(skb);
1347 q->in_use += count; 1331 q->in_use += count;
1348 genbit = q->genbit; 1332 genbit = q->genbit;
1349 pidx = q->pidx; 1333 pidx = q->pidx;
@@ -1375,27 +1359,25 @@ static void restart_sched(unsigned long arg)
1375 * 1359 *
1376 * Process an ingress ethernet pakcet and deliver it to the stack. 1360 * Process an ingress ethernet pakcet and deliver it to the stack.
1377 */ 1361 */
1378static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) 1362static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
1379{ 1363{
1380 struct sk_buff *skb; 1364 struct sk_buff *skb;
1381 struct cpl_rx_pkt *p; 1365 const struct cpl_rx_pkt *p;
1382 struct adapter *adapter = sge->adapter; 1366 struct adapter *adapter = sge->adapter;
1383 struct sge_port_stats *st; 1367 struct sge_port_stats *st;
1384 1368
1385 skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad, 1369 skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad);
1386 sge->rx_pkt_pad, 2, SGE_RX_COPY_THRES,
1387 SGE_RX_DROP_THRES);
1388 if (unlikely(!skb)) { 1370 if (unlikely(!skb)) {
1389 sge->stats.rx_drops++; 1371 sge->stats.rx_drops++;
1390 return 0; 1372 return;
1391 } 1373 }
1392 1374
1393 p = (struct cpl_rx_pkt *)skb->data; 1375 p = (const struct cpl_rx_pkt *) skb->data;
1394 skb_pull(skb, sizeof(*p));
1395 if (p->iff >= adapter->params.nports) { 1376 if (p->iff >= adapter->params.nports) {
1396 kfree_skb(skb); 1377 kfree_skb(skb);
1397 return 0; 1378 return;
1398 } 1379 }
1380 __skb_pull(skb, sizeof(*p));
1399 1381
1400 skb->dev = adapter->port[p->iff].dev; 1382 skb->dev = adapter->port[p->iff].dev;
1401 skb->dev->last_rx = jiffies; 1383 skb->dev->last_rx = jiffies;
@@ -1427,7 +1409,6 @@ static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
1427 netif_rx(skb); 1409 netif_rx(skb);
1428#endif 1410#endif
1429 } 1411 }
1430 return 0;
1431} 1412}
1432 1413
1433/* 1414/*
@@ -1448,29 +1429,28 @@ static inline int enough_free_Tx_descs(const struct cmdQ *q)
1448static void restart_tx_queues(struct sge *sge) 1429static void restart_tx_queues(struct sge *sge)
1449{ 1430{
1450 struct adapter *adap = sge->adapter; 1431 struct adapter *adap = sge->adapter;
1432 int i;
1451 1433
1452 if (enough_free_Tx_descs(&sge->cmdQ[0])) { 1434 if (!enough_free_Tx_descs(&sge->cmdQ[0]))
1453 int i; 1435 return;
1454 1436
1455 for_each_port(adap, i) { 1437 for_each_port(adap, i) {
1456 struct net_device *nd = adap->port[i].dev; 1438 struct net_device *nd = adap->port[i].dev;
1457 1439
1458 if (test_and_clear_bit(nd->if_port, 1440 if (test_and_clear_bit(nd->if_port, &sge->stopped_tx_queues) &&
1459 &sge->stopped_tx_queues) && 1441 netif_running(nd)) {
1460 netif_running(nd)) { 1442 sge->stats.cmdQ_restarted[2]++;
1461 sge->stats.cmdQ_restarted[2]++; 1443 netif_wake_queue(nd);
1462 netif_wake_queue(nd);
1463 }
1464 } 1444 }
1465 } 1445 }
1466} 1446}
1467 1447
1468/* 1448/*
1469 * update_tx_info is called from the interrupt handler/NAPI to return cmdQ0 1449 * update_tx_info is called from the interrupt handler/NAPI to return cmdQ0
1470 * information. 1450 * information.
1471 */ 1451 */
1472static unsigned int update_tx_info(struct adapter *adapter, 1452static unsigned int update_tx_info(struct adapter *adapter,
1473 unsigned int flags, 1453 unsigned int flags,
1474 unsigned int pr0) 1454 unsigned int pr0)
1475{ 1455{
1476 struct sge *sge = adapter->sge; 1456 struct sge *sge = adapter->sge;
@@ -1510,29 +1490,30 @@ static int process_responses(struct adapter *adapter, int budget)
1510 struct sge *sge = adapter->sge; 1490 struct sge *sge = adapter->sge;
1511 struct respQ *q = &sge->respQ; 1491 struct respQ *q = &sge->respQ;
1512 struct respQ_e *e = &q->entries[q->cidx]; 1492 struct respQ_e *e = &q->entries[q->cidx];
1513 int budget_left = budget; 1493 int done = 0;
1514 unsigned int flags = 0; 1494 unsigned int flags = 0;
1515 unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; 1495 unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0};
1516
1517 1496
1518 while (likely(budget_left && e->GenerationBit == q->genbit)) { 1497 while (done < budget && e->GenerationBit == q->genbit) {
1519 flags |= e->Qsleeping; 1498 flags |= e->Qsleeping;
1520 1499
1521 cmdq_processed[0] += e->Cmdq0CreditReturn; 1500 cmdq_processed[0] += e->Cmdq0CreditReturn;
1522 cmdq_processed[1] += e->Cmdq1CreditReturn; 1501 cmdq_processed[1] += e->Cmdq1CreditReturn;
1523 1502
1524 /* We batch updates to the TX side to avoid cacheline 1503 /* We batch updates to the TX side to avoid cacheline
1525 * ping-pong of TX state information on MP where the sender 1504 * ping-pong of TX state information on MP where the sender
1526 * might run on a different CPU than this function... 1505 * might run on a different CPU than this function...
1527 */ 1506 */
1528 if (unlikely(flags & F_CMDQ0_ENABLE || cmdq_processed[0] > 64)) { 1507 if (unlikely((flags & F_CMDQ0_ENABLE) || cmdq_processed[0] > 64)) {
1529 flags = update_tx_info(adapter, flags, cmdq_processed[0]); 1508 flags = update_tx_info(adapter, flags, cmdq_processed[0]);
1530 cmdq_processed[0] = 0; 1509 cmdq_processed[0] = 0;
1531 } 1510 }
1511
1532 if (unlikely(cmdq_processed[1] > 16)) { 1512 if (unlikely(cmdq_processed[1] > 16)) {
1533 sge->cmdQ[1].processed += cmdq_processed[1]; 1513 sge->cmdQ[1].processed += cmdq_processed[1];
1534 cmdq_processed[1] = 0; 1514 cmdq_processed[1] = 0;
1535 } 1515 }
1516
1536 if (likely(e->DataValid)) { 1517 if (likely(e->DataValid)) {
1537 struct freelQ *fl = &sge->freelQ[e->FreelistQid]; 1518 struct freelQ *fl = &sge->freelQ[e->FreelistQid];
1538 1519
@@ -1542,12 +1523,16 @@ static int process_responses(struct adapter *adapter, int budget)
1542 else 1523 else
1543 sge_rx(sge, fl, e->BufferLength); 1524 sge_rx(sge, fl, e->BufferLength);
1544 1525
1526 ++done;
1527
1545 /* 1528 /*
1546 * Note: this depends on each packet consuming a 1529 * Note: this depends on each packet consuming a
1547 * single free-list buffer; cf. the BUG above. 1530 * single free-list buffer; cf. the BUG above.
1548 */ 1531 */
1549 if (++fl->cidx == fl->size) 1532 if (++fl->cidx == fl->size)
1550 fl->cidx = 0; 1533 fl->cidx = 0;
1534 prefetch(fl->centries[fl->cidx].skb);
1535
1551 if (unlikely(--fl->credits < 1536 if (unlikely(--fl->credits <
1552 fl->size - SGE_FREEL_REFILL_THRESH)) 1537 fl->size - SGE_FREEL_REFILL_THRESH))
1553 refill_free_list(sge, fl); 1538 refill_free_list(sge, fl);
@@ -1566,14 +1551,20 @@ static int process_responses(struct adapter *adapter, int budget)
1566 writel(q->credits, adapter->regs + A_SG_RSPQUEUECREDIT); 1551 writel(q->credits, adapter->regs + A_SG_RSPQUEUECREDIT);
1567 q->credits = 0; 1552 q->credits = 0;
1568 } 1553 }
1569 --budget_left;
1570 } 1554 }
1571 1555
1572 flags = update_tx_info(adapter, flags, cmdq_processed[0]); 1556 flags = update_tx_info(adapter, flags, cmdq_processed[0]);
1573 sge->cmdQ[1].processed += cmdq_processed[1]; 1557 sge->cmdQ[1].processed += cmdq_processed[1];
1574 1558
1575 budget -= budget_left; 1559 return done;
1576 return budget; 1560}
1561
1562static inline int responses_pending(const struct adapter *adapter)
1563{
1564 const struct respQ *Q = &adapter->sge->respQ;
1565 const struct respQ_e *e = &Q->entries[Q->cidx];
1566
1567 return (e->GenerationBit == Q->genbit);
1577} 1568}
1578 1569
1579#ifdef CONFIG_CHELSIO_T1_NAPI 1570#ifdef CONFIG_CHELSIO_T1_NAPI
@@ -1585,19 +1576,25 @@ static int process_responses(struct adapter *adapter, int budget)
1585 * which the caller must ensure is a valid pure response. Returns 1 if it 1576 * which the caller must ensure is a valid pure response. Returns 1 if it
1586 * encounters a valid data-carrying response, 0 otherwise. 1577 * encounters a valid data-carrying response, 0 otherwise.
1587 */ 1578 */
1588static int process_pure_responses(struct adapter *adapter, struct respQ_e *e) 1579static int process_pure_responses(struct adapter *adapter)
1589{ 1580{
1590 struct sge *sge = adapter->sge; 1581 struct sge *sge = adapter->sge;
1591 struct respQ *q = &sge->respQ; 1582 struct respQ *q = &sge->respQ;
1583 struct respQ_e *e = &q->entries[q->cidx];
1584 const struct freelQ *fl = &sge->freelQ[e->FreelistQid];
1592 unsigned int flags = 0; 1585 unsigned int flags = 0;
1593 unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; 1586 unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0};
1594 1587
1588 prefetch(fl->centries[fl->cidx].skb);
1589 if (e->DataValid)
1590 return 1;
1591
1595 do { 1592 do {
1596 flags |= e->Qsleeping; 1593 flags |= e->Qsleeping;
1597 1594
1598 cmdq_processed[0] += e->Cmdq0CreditReturn; 1595 cmdq_processed[0] += e->Cmdq0CreditReturn;
1599 cmdq_processed[1] += e->Cmdq1CreditReturn; 1596 cmdq_processed[1] += e->Cmdq1CreditReturn;
1600 1597
1601 e++; 1598 e++;
1602 if (unlikely(++q->cidx == q->size)) { 1599 if (unlikely(++q->cidx == q->size)) {
1603 q->cidx = 0; 1600 q->cidx = 0;
@@ -1613,7 +1610,7 @@ static int process_pure_responses(struct adapter *adapter, struct respQ_e *e)
1613 sge->stats.pure_rsps++; 1610 sge->stats.pure_rsps++;
1614 } while (e->GenerationBit == q->genbit && !e->DataValid); 1611 } while (e->GenerationBit == q->genbit && !e->DataValid);
1615 1612
1616 flags = update_tx_info(adapter, flags, cmdq_processed[0]); 1613 flags = update_tx_info(adapter, flags, cmdq_processed[0]);
1617 sge->cmdQ[1].processed += cmdq_processed[1]; 1614 sge->cmdQ[1].processed += cmdq_processed[1];
1618 1615
1619 return e->GenerationBit == q->genbit; 1616 return e->GenerationBit == q->genbit;
@@ -1627,23 +1624,20 @@ static int process_pure_responses(struct adapter *adapter, struct respQ_e *e)
1627int t1_poll(struct net_device *dev, int *budget) 1624int t1_poll(struct net_device *dev, int *budget)
1628{ 1625{
1629 struct adapter *adapter = dev->priv; 1626 struct adapter *adapter = dev->priv;
1630 int effective_budget = min(*budget, dev->quota); 1627 int work_done;
1631 int work_done = process_responses(adapter, effective_budget);
1632 1628
1629 work_done = process_responses(adapter, min(*budget, dev->quota));
1633 *budget -= work_done; 1630 *budget -= work_done;
1634 dev->quota -= work_done; 1631 dev->quota -= work_done;
1635 1632
1636 if (work_done >= effective_budget) 1633 if (unlikely(responses_pending(adapter)))
1637 return 1; 1634 return 1;
1638 1635
1639 spin_lock_irq(&adapter->async_lock); 1636 netif_rx_complete(dev);
1640 __netif_rx_complete(dev);
1641 writel(adapter->sge->respQ.cidx, adapter->regs + A_SG_SLEEPING); 1637 writel(adapter->sge->respQ.cidx, adapter->regs + A_SG_SLEEPING);
1642 writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
1643 adapter->regs + A_PL_ENABLE);
1644 spin_unlock_irq(&adapter->async_lock);
1645 1638
1646 return 0; 1639 return 0;
1640
1647} 1641}
1648 1642
1649/* 1643/*
@@ -1652,44 +1646,33 @@ int t1_poll(struct net_device *dev, int *budget)
1652irqreturn_t t1_interrupt(int irq, void *data) 1646irqreturn_t t1_interrupt(int irq, void *data)
1653{ 1647{
1654 struct adapter *adapter = data; 1648 struct adapter *adapter = data;
1655 struct net_device *dev = adapter->sge->netdev;
1656 struct sge *sge = adapter->sge; 1649 struct sge *sge = adapter->sge;
1657 u32 cause; 1650 int handled;
1658 int handled = 0;
1659 1651
1660 cause = readl(adapter->regs + A_PL_CAUSE); 1652 if (likely(responses_pending(adapter))) {
1661 if (cause == 0 || cause == ~0) 1653 struct net_device *dev = sge->netdev;
1662 return IRQ_NONE;
1663 1654
1664 spin_lock(&adapter->async_lock); 1655 writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE);
1665 if (cause & F_PL_INTR_SGE_DATA) {
1666 struct respQ *q = &adapter->sge->respQ;
1667 struct respQ_e *e = &q->entries[q->cidx];
1668
1669 handled = 1;
1670 writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE);
1671
1672 if (e->GenerationBit == q->genbit &&
1673 __netif_rx_schedule_prep(dev)) {
1674 if (e->DataValid || process_pure_responses(adapter, e)) {
1675 /* mask off data IRQ */
1676 writel(adapter->slow_intr_mask,
1677 adapter->regs + A_PL_ENABLE);
1678 __netif_rx_schedule(sge->netdev);
1679 goto unlock;
1680 }
1681 /* no data, no NAPI needed */
1682 netif_poll_enable(dev);
1683 1656
1657 if (__netif_rx_schedule_prep(dev)) {
1658 if (process_pure_responses(adapter))
1659 __netif_rx_schedule(dev);
1660 else {
1661 /* no data, no NAPI needed */
1662 writel(sge->respQ.cidx, adapter->regs + A_SG_SLEEPING);
1663 netif_poll_enable(dev); /* undo schedule_prep */
1664 }
1684 } 1665 }
1685 writel(q->cidx, adapter->regs + A_SG_SLEEPING); 1666 return IRQ_HANDLED;
1686 } else 1667 }
1687 handled = t1_slow_intr_handler(adapter); 1668
1669 spin_lock(&adapter->async_lock);
1670 handled = t1_slow_intr_handler(adapter);
1671 spin_unlock(&adapter->async_lock);
1688 1672
1689 if (!handled) 1673 if (!handled)
1690 sge->stats.unhandled_irqs++; 1674 sge->stats.unhandled_irqs++;
1691unlock: 1675
1692 spin_unlock(&adapter->async_lock);
1693 return IRQ_RETVAL(handled != 0); 1676 return IRQ_RETVAL(handled != 0);
1694} 1677}
1695 1678
@@ -1712,17 +1695,13 @@ unlock:
1712irqreturn_t t1_interrupt(int irq, void *cookie) 1695irqreturn_t t1_interrupt(int irq, void *cookie)
1713{ 1696{
1714 int work_done; 1697 int work_done;
1715 struct respQ_e *e;
1716 struct adapter *adapter = cookie; 1698 struct adapter *adapter = cookie;
1717 struct respQ *Q = &adapter->sge->respQ;
1718 1699
1719 spin_lock(&adapter->async_lock); 1700 spin_lock(&adapter->async_lock);
1720 e = &Q->entries[Q->cidx];
1721 prefetch(e);
1722 1701
1723 writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE); 1702 writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE);
1724 1703
1725 if (likely(e->GenerationBit == Q->genbit)) 1704 if (likely(responses_pending(adapter)))
1726 work_done = process_responses(adapter, -1); 1705 work_done = process_responses(adapter, -1);
1727 else 1706 else
1728 work_done = t1_slow_intr_handler(adapter); 1707 work_done = t1_slow_intr_handler(adapter);
@@ -1796,7 +1775,7 @@ static int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter,
1796 * through the scheduler. 1775 * through the scheduler.
1797 */ 1776 */
1798 if (sge->tx_sched && !qid && skb->dev) { 1777 if (sge->tx_sched && !qid && skb->dev) {
1799 use_sched: 1778use_sched:
1800 use_sched_skb = 1; 1779 use_sched_skb = 1;
1801 /* Note that the scheduler might return a different skb than 1780 /* Note that the scheduler might return a different skb than
1802 * the one passed in. 1781 * the one passed in.
@@ -1900,7 +1879,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
1900 cpl = (struct cpl_tx_pkt *)hdr; 1879 cpl = (struct cpl_tx_pkt *)hdr;
1901 } else { 1880 } else {
1902 /* 1881 /*
1903 * Packets shorter than ETH_HLEN can break the MAC, drop them 1882 * Packets shorter than ETH_HLEN can break the MAC, drop them
1904 * early. Also, we may get oversized packets because some 1883 * early. Also, we may get oversized packets because some
1905 * parts of the kernel don't handle our unusual hard_header_len 1884 * parts of the kernel don't handle our unusual hard_header_len
1906 * right, drop those too. 1885 * right, drop those too.
@@ -1984,9 +1963,9 @@ send:
1984 * then silently discard to avoid leak. 1963 * then silently discard to avoid leak.
1985 */ 1964 */
1986 if (unlikely(ret != NETDEV_TX_OK && skb != orig_skb)) { 1965 if (unlikely(ret != NETDEV_TX_OK && skb != orig_skb)) {
1987 dev_kfree_skb_any(skb); 1966 dev_kfree_skb_any(skb);
1988 ret = NETDEV_TX_OK; 1967 ret = NETDEV_TX_OK;
1989 } 1968 }
1990 return ret; 1969 return ret;
1991} 1970}
1992 1971
@@ -2099,31 +2078,35 @@ static void espibug_workaround_t204(unsigned long data)
2099 2078
2100 if (adapter->open_device_map & PORT_MASK) { 2079 if (adapter->open_device_map & PORT_MASK) {
2101 int i; 2080 int i;
2102 if (t1_espi_get_mon_t204(adapter, &(seop[0]), 0) < 0) { 2081
2082 if (t1_espi_get_mon_t204(adapter, &(seop[0]), 0) < 0)
2103 return; 2083 return;
2104 } 2084
2105 for (i = 0; i < nports; i++) { 2085 for (i = 0; i < nports; i++) {
2106 struct sk_buff *skb = sge->espibug_skb[i]; 2086 struct sk_buff *skb = sge->espibug_skb[i];
2107 if ( (netif_running(adapter->port[i].dev)) && 2087
2108 !(netif_queue_stopped(adapter->port[i].dev)) && 2088 if (!netif_running(adapter->port[i].dev) ||
2109 (seop[i] && ((seop[i] & 0xfff) == 0)) && 2089 netif_queue_stopped(adapter->port[i].dev) ||
2110 skb ) { 2090 !seop[i] || ((seop[i] & 0xfff) != 0) || !skb)
2111 if (!skb->cb[0]) { 2091 continue;
2112 u8 ch_mac_addr[ETH_ALEN] = 2092
2113 {0x0, 0x7, 0x43, 0x0, 0x0, 0x0}; 2093 if (!skb->cb[0]) {
2114 memcpy(skb->data + sizeof(struct cpl_tx_pkt), 2094 u8 ch_mac_addr[ETH_ALEN] = {
2115 ch_mac_addr, ETH_ALEN); 2095 0x0, 0x7, 0x43, 0x0, 0x0, 0x0
2116 memcpy(skb->data + skb->len - 10, 2096 };
2117 ch_mac_addr, ETH_ALEN); 2097
2118 skb->cb[0] = 0xff; 2098 memcpy(skb->data + sizeof(struct cpl_tx_pkt),
2119 } 2099 ch_mac_addr, ETH_ALEN);
2120 2100 memcpy(skb->data + skb->len - 10,
2121 /* bump the reference count to avoid freeing of 2101 ch_mac_addr, ETH_ALEN);
2122 * the skb once the DMA has completed. 2102 skb->cb[0] = 0xff;
2123 */
2124 skb = skb_get(skb);
2125 t1_sge_tx(skb, adapter, 0, adapter->port[i].dev);
2126 } 2103 }
2104
2105 /* bump the reference count to avoid freeing of
2106 * the skb once the DMA has completed.
2107 */
2108 skb = skb_get(skb);
2109 t1_sge_tx(skb, adapter, 0, adapter->port[i].dev);
2127 } 2110 }
2128 } 2111 }
2129 mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout); 2112 mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout);
@@ -2192,9 +2175,8 @@ struct sge * __devinit t1_sge_create(struct adapter *adapter,
2192 if (adapter->params.nports > 1) { 2175 if (adapter->params.nports > 1) {
2193 tx_sched_init(sge); 2176 tx_sched_init(sge);
2194 sge->espibug_timer.function = espibug_workaround_t204; 2177 sge->espibug_timer.function = espibug_workaround_t204;
2195 } else { 2178 } else
2196 sge->espibug_timer.function = espibug_workaround; 2179 sge->espibug_timer.function = espibug_workaround;
2197 }
2198 sge->espibug_timer.data = (unsigned long)sge->adapter; 2180 sge->espibug_timer.data = (unsigned long)sge->adapter;
2199 2181
2200 sge->espibug_timeout = 1; 2182 sge->espibug_timeout = 1;
@@ -2202,7 +2184,7 @@ struct sge * __devinit t1_sge_create(struct adapter *adapter,
2202 if (adapter->params.nports > 1) 2184 if (adapter->params.nports > 1)
2203 sge->espibug_timeout = HZ/100; 2185 sge->espibug_timeout = HZ/100;
2204 } 2186 }
2205 2187
2206 2188
2207 p->cmdQ_size[0] = SGE_CMDQ0_E_N; 2189 p->cmdQ_size[0] = SGE_CMDQ0_E_N;
2208 p->cmdQ_size[1] = SGE_CMDQ1_E_N; 2190 p->cmdQ_size[1] = SGE_CMDQ1_E_N;