aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/chelsio/sge.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/chelsio/sge.c')
-rw-r--r--drivers/net/chelsio/sge.c867
1 files changed, 716 insertions, 151 deletions
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 9799c12380fc..0ca8d876e16f 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -42,12 +42,14 @@
42#include <linux/types.h> 42#include <linux/types.h>
43#include <linux/errno.h> 43#include <linux/errno.h>
44#include <linux/pci.h> 44#include <linux/pci.h>
45#include <linux/ktime.h>
45#include <linux/netdevice.h> 46#include <linux/netdevice.h>
46#include <linux/etherdevice.h> 47#include <linux/etherdevice.h>
47#include <linux/if_vlan.h> 48#include <linux/if_vlan.h>
48#include <linux/skbuff.h> 49#include <linux/skbuff.h>
49#include <linux/init.h> 50#include <linux/init.h>
50#include <linux/mm.h> 51#include <linux/mm.h>
52#include <linux/tcp.h>
51#include <linux/ip.h> 53#include <linux/ip.h>
52#include <linux/in.h> 54#include <linux/in.h>
53#include <linux/if_arp.h> 55#include <linux/if_arp.h>
@@ -57,10 +59,8 @@
57#include "regs.h" 59#include "regs.h"
58#include "espi.h" 60#include "espi.h"
59 61
60 62/* This belongs in if_ether.h */
61#ifdef NETIF_F_TSO 63#define ETH_P_CPL5 0xf
62#include <linux/tcp.h>
63#endif
64 64
65#define SGE_CMDQ_N 2 65#define SGE_CMDQ_N 2
66#define SGE_FREELQ_N 2 66#define SGE_FREELQ_N 2
@@ -73,6 +73,7 @@
73#define SGE_INTRTIMER_NRES 1000 73#define SGE_INTRTIMER_NRES 1000
74#define SGE_RX_COPY_THRES 256 74#define SGE_RX_COPY_THRES 256
75#define SGE_RX_SM_BUF_SIZE 1536 75#define SGE_RX_SM_BUF_SIZE 1536
76#define SGE_TX_DESC_MAX_PLEN 16384
76 77
77# define SGE_RX_DROP_THRES 2 78# define SGE_RX_DROP_THRES 2
78 79
@@ -184,17 +185,17 @@ struct cmdQ {
184 unsigned long status; /* HW DMA fetch status */ 185 unsigned long status; /* HW DMA fetch status */
185 unsigned int in_use; /* # of in-use command descriptors */ 186 unsigned int in_use; /* # of in-use command descriptors */
186 unsigned int size; /* # of descriptors */ 187 unsigned int size; /* # of descriptors */
187 unsigned int processed; /* total # of descs HW has processed */ 188 unsigned int processed; /* total # of descs HW has processed */
188 unsigned int cleaned; /* total # of descs SW has reclaimed */ 189 unsigned int cleaned; /* total # of descs SW has reclaimed */
189 unsigned int stop_thres; /* SW TX queue suspend threshold */ 190 unsigned int stop_thres; /* SW TX queue suspend threshold */
190 u16 pidx; /* producer index (SW) */ 191 u16 pidx; /* producer index (SW) */
191 u16 cidx; /* consumer index (HW) */ 192 u16 cidx; /* consumer index (HW) */
192 u8 genbit; /* current generation (=valid) bit */ 193 u8 genbit; /* current generation (=valid) bit */
193 u8 sop; /* is next entry start of packet? */ 194 u8 sop; /* is next entry start of packet? */
194 struct cmdQ_e *entries; /* HW command descriptor Q */ 195 struct cmdQ_e *entries; /* HW command descriptor Q */
195 struct cmdQ_ce *centries; /* SW command context descriptor Q */ 196 struct cmdQ_ce *centries; /* SW command context descriptor Q */
196 spinlock_t lock; /* Lock to protect cmdQ enqueuing */
197 dma_addr_t dma_addr; /* DMA addr HW command descriptor Q */ 197 dma_addr_t dma_addr; /* DMA addr HW command descriptor Q */
198 spinlock_t lock; /* Lock to protect cmdQ enqueuing */
198}; 199};
199 200
200struct freelQ { 201struct freelQ {
@@ -203,8 +204,8 @@ struct freelQ {
203 u16 pidx; /* producer index (SW) */ 204 u16 pidx; /* producer index (SW) */
204 u16 cidx; /* consumer index (HW) */ 205 u16 cidx; /* consumer index (HW) */
205 u16 rx_buffer_size; /* Buffer size on this free list */ 206 u16 rx_buffer_size; /* Buffer size on this free list */
206 u16 dma_offset; /* DMA offset to align IP headers */ 207 u16 dma_offset; /* DMA offset to align IP headers */
207 u16 recycleq_idx; /* skb recycle q to use */ 208 u16 recycleq_idx; /* skb recycle q to use */
208 u8 genbit; /* current generation (=valid) bit */ 209 u8 genbit; /* current generation (=valid) bit */
209 struct freelQ_e *entries; /* HW freelist descriptor Q */ 210 struct freelQ_e *entries; /* HW freelist descriptor Q */
210 struct freelQ_ce *centries; /* SW freelist context descriptor Q */ 211 struct freelQ_ce *centries; /* SW freelist context descriptor Q */
@@ -226,6 +227,29 @@ enum {
226 CMDQ_STAT_LAST_PKT_DB = 2 /* last packet rung the doorbell */ 227 CMDQ_STAT_LAST_PKT_DB = 2 /* last packet rung the doorbell */
227}; 228};
228 229
230/* T204 TX SW scheduler */
231
232/* Per T204 TX port */
233struct sched_port {
234 unsigned int avail; /* available bits - quota */
235 unsigned int drain_bits_per_1024ns; /* drain rate */
236 unsigned int speed; /* drain rate, mbps */
237 unsigned int mtu; /* mtu size */
238 struct sk_buff_head skbq; /* pending skbs */
239};
240
241/* Per T204 device */
242struct sched {
243 ktime_t last_updated; /* last time quotas were computed */
244 unsigned int max_avail; /* max bits to be sent to any port */
245 unsigned int port; /* port index (round robin ports) */
246 unsigned int num; /* num skbs in per port queues */
247 struct sched_port p[MAX_NPORTS];
248 struct tasklet_struct sched_tsk;/* tasklet used to run scheduler */
249};
250static void restart_sched(unsigned long);
251
252
229/* 253/*
230 * Main SGE data structure 254 * Main SGE data structure
231 * 255 *
@@ -243,18 +267,240 @@ struct sge {
243 unsigned int rx_pkt_pad; /* RX padding for L2 packets */ 267 unsigned int rx_pkt_pad; /* RX padding for L2 packets */
244 unsigned int jumbo_fl; /* jumbo freelist Q index */ 268 unsigned int jumbo_fl; /* jumbo freelist Q index */
245 unsigned int intrtimer_nres; /* no-resource interrupt timer */ 269 unsigned int intrtimer_nres; /* no-resource interrupt timer */
246 unsigned int fixed_intrtimer;/* non-adaptive interrupt timer */ 270 unsigned int fixed_intrtimer;/* non-adaptive interrupt timer */
247 struct timer_list tx_reclaim_timer; /* reclaims TX buffers */ 271 struct timer_list tx_reclaim_timer; /* reclaims TX buffers */
248 struct timer_list espibug_timer; 272 struct timer_list espibug_timer;
249 unsigned int espibug_timeout; 273 unsigned long espibug_timeout;
250 struct sk_buff *espibug_skb; 274 struct sk_buff *espibug_skb[MAX_NPORTS];
251 u32 sge_control; /* shadow value of sge control reg */ 275 u32 sge_control; /* shadow value of sge control reg */
252 struct sge_intr_counts stats; 276 struct sge_intr_counts stats;
253 struct sge_port_stats port_stats[MAX_NPORTS]; 277 struct sge_port_stats *port_stats[MAX_NPORTS];
278 struct sched *tx_sched;
254 struct cmdQ cmdQ[SGE_CMDQ_N] ____cacheline_aligned_in_smp; 279 struct cmdQ cmdQ[SGE_CMDQ_N] ____cacheline_aligned_in_smp;
255}; 280};
256 281
257/* 282/*
283 * stop tasklet and free all pending skb's
284 */
285static void tx_sched_stop(struct sge *sge)
286{
287 struct sched *s = sge->tx_sched;
288 int i;
289
290 tasklet_kill(&s->sched_tsk);
291
292 for (i = 0; i < MAX_NPORTS; i++)
293 __skb_queue_purge(&s->p[s->port].skbq);
294}
295
296/*
297 * t1_sched_update_parms() is called when the MTU or link speed changes. It
298 * re-computes scheduler parameters to scope with the change.
299 */
300unsigned int t1_sched_update_parms(struct sge *sge, unsigned int port,
301 unsigned int mtu, unsigned int speed)
302{
303 struct sched *s = sge->tx_sched;
304 struct sched_port *p = &s->p[port];
305 unsigned int max_avail_segs;
306
307 pr_debug("t1_sched_update_params mtu=%d speed=%d\n", mtu, speed);
308 if (speed)
309 p->speed = speed;
310 if (mtu)
311 p->mtu = mtu;
312
313 if (speed || mtu) {
314 unsigned long long drain = 1024ULL * p->speed * (p->mtu - 40);
315 do_div(drain, (p->mtu + 50) * 1000);
316 p->drain_bits_per_1024ns = (unsigned int) drain;
317
318 if (p->speed < 1000)
319 p->drain_bits_per_1024ns =
320 90 * p->drain_bits_per_1024ns / 100;
321 }
322
323 if (board_info(sge->adapter)->board == CHBT_BOARD_CHT204) {
324 p->drain_bits_per_1024ns -= 16;
325 s->max_avail = max(4096U, p->mtu + 16 + 14 + 4);
326 max_avail_segs = max(1U, 4096 / (p->mtu - 40));
327 } else {
328 s->max_avail = 16384;
329 max_avail_segs = max(1U, 9000 / (p->mtu - 40));
330 }
331
332 pr_debug("t1_sched_update_parms: mtu %u speed %u max_avail %u "
333 "max_avail_segs %u drain_bits_per_1024ns %u\n", p->mtu,
334 p->speed, s->max_avail, max_avail_segs,
335 p->drain_bits_per_1024ns);
336
337 return max_avail_segs * (p->mtu - 40);
338}
339
340/*
341 * t1_sched_max_avail_bytes() tells the scheduler the maximum amount of
342 * data that can be pushed per port.
343 */
344void t1_sched_set_max_avail_bytes(struct sge *sge, unsigned int val)
345{
346 struct sched *s = sge->tx_sched;
347 unsigned int i;
348
349 s->max_avail = val;
350 for (i = 0; i < MAX_NPORTS; i++)
351 t1_sched_update_parms(sge, i, 0, 0);
352}
353
354/*
355 * t1_sched_set_drain_bits_per_us() tells the scheduler at which rate a port
356 * is draining.
357 */
358void t1_sched_set_drain_bits_per_us(struct sge *sge, unsigned int port,
359 unsigned int val)
360{
361 struct sched *s = sge->tx_sched;
362 struct sched_port *p = &s->p[port];
363 p->drain_bits_per_1024ns = val * 1024 / 1000;
364 t1_sched_update_parms(sge, port, 0, 0);
365}
366
367
368/*
369 * get_clock() implements a ns clock (see ktime_get)
370 */
371static inline ktime_t get_clock(void)
372{
373 struct timespec ts;
374
375 ktime_get_ts(&ts);
376 return timespec_to_ktime(ts);
377}
378
379/*
380 * tx_sched_init() allocates resources and does basic initialization.
381 */
382static int tx_sched_init(struct sge *sge)
383{
384 struct sched *s;
385 int i;
386
387 s = kzalloc(sizeof (struct sched), GFP_KERNEL);
388 if (!s)
389 return -ENOMEM;
390
391 pr_debug("tx_sched_init\n");
392 tasklet_init(&s->sched_tsk, restart_sched, (unsigned long) sge);
393 sge->tx_sched = s;
394
395 for (i = 0; i < MAX_NPORTS; i++) {
396 skb_queue_head_init(&s->p[i].skbq);
397 t1_sched_update_parms(sge, i, 1500, 1000);
398 }
399
400 return 0;
401}
402
403/*
404 * sched_update_avail() computes the delta since the last time it was called
405 * and updates the per port quota (number of bits that can be sent to the any
406 * port).
407 */
408static inline int sched_update_avail(struct sge *sge)
409{
410 struct sched *s = sge->tx_sched;
411 ktime_t now = get_clock();
412 unsigned int i;
413 long long delta_time_ns;
414
415 delta_time_ns = ktime_to_ns(ktime_sub(now, s->last_updated));
416
417 pr_debug("sched_update_avail delta=%lld\n", delta_time_ns);
418 if (delta_time_ns < 15000)
419 return 0;
420
421 for (i = 0; i < MAX_NPORTS; i++) {
422 struct sched_port *p = &s->p[i];
423 unsigned int delta_avail;
424
425 delta_avail = (p->drain_bits_per_1024ns * delta_time_ns) >> 13;
426 p->avail = min(p->avail + delta_avail, s->max_avail);
427 }
428
429 s->last_updated = now;
430
431 return 1;
432}
433
434/*
435 * sched_skb() is called from two different places. In the tx path, any
436 * packet generating load on an output port will call sched_skb()
437 * (skb != NULL). In addition, sched_skb() is called from the irq/soft irq
438 * context (skb == NULL).
439 * The scheduler only returns a skb (which will then be sent) if the
440 * length of the skb is <= the current quota of the output port.
441 */
442static struct sk_buff *sched_skb(struct sge *sge, struct sk_buff *skb,
443 unsigned int credits)
444{
445 struct sched *s = sge->tx_sched;
446 struct sk_buff_head *skbq;
447 unsigned int i, len, update = 1;
448
449 pr_debug("sched_skb %p\n", skb);
450 if (!skb) {
451 if (!s->num)
452 return NULL;
453 } else {
454 skbq = &s->p[skb->dev->if_port].skbq;
455 __skb_queue_tail(skbq, skb);
456 s->num++;
457 skb = NULL;
458 }
459
460 if (credits < MAX_SKB_FRAGS + 1)
461 goto out;
462
463 again:
464 for (i = 0; i < MAX_NPORTS; i++) {
465 s->port = ++s->port & (MAX_NPORTS - 1);
466 skbq = &s->p[s->port].skbq;
467
468 skb = skb_peek(skbq);
469
470 if (!skb)
471 continue;
472
473 len = skb->len;
474 if (len <= s->p[s->port].avail) {
475 s->p[s->port].avail -= len;
476 s->num--;
477 __skb_unlink(skb, skbq);
478 goto out;
479 }
480 skb = NULL;
481 }
482
483 if (update-- && sched_update_avail(sge))
484 goto again;
485
486 out:
487 /* If there are more pending skbs, we use the hardware to schedule us
488 * again.
489 */
490 if (s->num && !skb) {
491 struct cmdQ *q = &sge->cmdQ[0];
492 clear_bit(CMDQ_STAT_LAST_PKT_DB, &q->status);
493 if (test_and_set_bit(CMDQ_STAT_RUNNING, &q->status) == 0) {
494 set_bit(CMDQ_STAT_LAST_PKT_DB, &q->status);
495 writel(F_CMDQ0_ENABLE, sge->adapter->regs + A_SG_DOORBELL);
496 }
497 }
498 pr_debug("sched_skb ret %p\n", skb);
499
500 return skb;
501}
502
503/*
258 * PIO to indicate that memory mapped Q contains valid descriptor(s). 504 * PIO to indicate that memory mapped Q contains valid descriptor(s).
259 */ 505 */
260static inline void doorbell_pio(struct adapter *adapter, u32 val) 506static inline void doorbell_pio(struct adapter *adapter, u32 val)
@@ -335,10 +581,9 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p)
335 goto err_no_mem; 581 goto err_no_mem;
336 memset(q->entries, 0, size); 582 memset(q->entries, 0, size);
337 size = sizeof(struct freelQ_ce) * q->size; 583 size = sizeof(struct freelQ_ce) * q->size;
338 q->centries = kmalloc(size, GFP_KERNEL); 584 q->centries = kzalloc(size, GFP_KERNEL);
339 if (!q->centries) 585 if (!q->centries)
340 goto err_no_mem; 586 goto err_no_mem;
341 memset(q->centries, 0, size);
342 } 587 }
343 588
344 /* 589 /*
@@ -351,8 +596,11 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p)
351 sge->freelQ[!sge->jumbo_fl].rx_buffer_size = SGE_RX_SM_BUF_SIZE + 596 sge->freelQ[!sge->jumbo_fl].rx_buffer_size = SGE_RX_SM_BUF_SIZE +
352 sizeof(struct cpl_rx_data) + 597 sizeof(struct cpl_rx_data) +
353 sge->freelQ[!sge->jumbo_fl].dma_offset; 598 sge->freelQ[!sge->jumbo_fl].dma_offset;
354 sge->freelQ[sge->jumbo_fl].rx_buffer_size = (16 * 1024) - 599
355 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 600 size = (16 * 1024) -
601 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
602
603 sge->freelQ[sge->jumbo_fl].rx_buffer_size = size;
356 604
357 /* 605 /*
358 * Setup which skb recycle Q should be used when recycling buffers from 606 * Setup which skb recycle Q should be used when recycling buffers from
@@ -389,17 +637,23 @@ static void free_cmdQ_buffers(struct sge *sge, struct cmdQ *q, unsigned int n)
389 q->in_use -= n; 637 q->in_use -= n;
390 ce = &q->centries[cidx]; 638 ce = &q->centries[cidx];
391 while (n--) { 639 while (n--) {
392 if (q->sop) 640 if (q->sop) {
393 pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), 641 if (likely(pci_unmap_len(ce, dma_len))) {
394 pci_unmap_len(ce, dma_len), 642 pci_unmap_single(pdev,
395 PCI_DMA_TODEVICE); 643 pci_unmap_addr(ce, dma_addr),
396 else 644 pci_unmap_len(ce, dma_len),
397 pci_unmap_page(pdev, pci_unmap_addr(ce, dma_addr), 645 PCI_DMA_TODEVICE);
398 pci_unmap_len(ce, dma_len), 646 q->sop = 0;
399 PCI_DMA_TODEVICE); 647 }
400 q->sop = 0; 648 } else {
649 if (likely(pci_unmap_len(ce, dma_len))) {
650 pci_unmap_page(pdev, pci_unmap_addr(ce, dma_addr),
651 pci_unmap_len(ce, dma_len),
652 PCI_DMA_TODEVICE);
653 }
654 }
401 if (ce->skb) { 655 if (ce->skb) {
402 dev_kfree_skb(ce->skb); 656 dev_kfree_skb_any(ce->skb);
403 q->sop = 1; 657 q->sop = 1;
404 } 658 }
405 ce++; 659 ce++;
@@ -463,10 +717,9 @@ static int alloc_tx_resources(struct sge *sge, struct sge_params *p)
463 goto err_no_mem; 717 goto err_no_mem;
464 memset(q->entries, 0, size); 718 memset(q->entries, 0, size);
465 size = sizeof(struct cmdQ_ce) * q->size; 719 size = sizeof(struct cmdQ_ce) * q->size;
466 q->centries = kmalloc(size, GFP_KERNEL); 720 q->centries = kzalloc(size, GFP_KERNEL);
467 if (!q->centries) 721 if (!q->centries)
468 goto err_no_mem; 722 goto err_no_mem;
469 memset(q->centries, 0, size);
470 } 723 }
471 724
472 /* 725 /*
@@ -506,7 +759,7 @@ void t1_set_vlan_accel(struct adapter *adapter, int on_off)
506 sge->sge_control |= F_VLAN_XTRACT; 759 sge->sge_control |= F_VLAN_XTRACT;
507 if (adapter->open_device_map) { 760 if (adapter->open_device_map) {
508 writel(sge->sge_control, adapter->regs + A_SG_CONTROL); 761 writel(sge->sge_control, adapter->regs + A_SG_CONTROL);
509 readl(adapter->regs + A_SG_CONTROL); /* flush */ 762 readl(adapter->regs + A_SG_CONTROL); /* flush */
510 } 763 }
511} 764}
512 765
@@ -540,7 +793,6 @@ static void configure_sge(struct sge *sge, struct sge_params *p)
540 sge->sge_control = F_CMDQ0_ENABLE | F_CMDQ1_ENABLE | F_FL0_ENABLE | 793 sge->sge_control = F_CMDQ0_ENABLE | F_CMDQ1_ENABLE | F_FL0_ENABLE |
541 F_FL1_ENABLE | F_CPL_ENABLE | F_RESPONSE_QUEUE_ENABLE | 794 F_FL1_ENABLE | F_CPL_ENABLE | F_RESPONSE_QUEUE_ENABLE |
542 V_CMDQ_PRIORITY(2) | F_DISABLE_CMDQ1_GTS | F_ISCSI_COALESCE | 795 V_CMDQ_PRIORITY(2) | F_DISABLE_CMDQ1_GTS | F_ISCSI_COALESCE |
543 F_DISABLE_FL0_GTS | F_DISABLE_FL1_GTS |
544 V_RX_PKT_OFFSET(sge->rx_pkt_pad); 796 V_RX_PKT_OFFSET(sge->rx_pkt_pad);
545 797
546#if defined(__BIG_ENDIAN_BITFIELD) 798#if defined(__BIG_ENDIAN_BITFIELD)
@@ -568,9 +820,12 @@ static inline unsigned int jumbo_payload_capacity(const struct sge *sge)
568 */ 820 */
569void t1_sge_destroy(struct sge *sge) 821void t1_sge_destroy(struct sge *sge)
570{ 822{
571 if (sge->espibug_skb) 823 int i;
572 kfree_skb(sge->espibug_skb);
573 824
825 for_each_port(sge->adapter, i)
826 free_percpu(sge->port_stats[i]);
827
828 kfree(sge->tx_sched);
574 free_tx_resources(sge); 829 free_tx_resources(sge);
575 free_rx_resources(sge); 830 free_rx_resources(sge);
576 kfree(sge); 831 kfree(sge);
@@ -735,14 +990,28 @@ int t1_sge_intr_error_handler(struct sge *sge)
735 return 0; 990 return 0;
736} 991}
737 992
738const struct sge_intr_counts *t1_sge_get_intr_counts(struct sge *sge) 993const struct sge_intr_counts *t1_sge_get_intr_counts(const struct sge *sge)
739{ 994{
740 return &sge->stats; 995 return &sge->stats;
741} 996}
742 997
743const struct sge_port_stats *t1_sge_get_port_stats(struct sge *sge, int port) 998void t1_sge_get_port_stats(const struct sge *sge, int port,
999 struct sge_port_stats *ss)
744{ 1000{
745 return &sge->port_stats[port]; 1001 int cpu;
1002
1003 memset(ss, 0, sizeof(*ss));
1004 for_each_possible_cpu(cpu) {
1005 struct sge_port_stats *st = per_cpu_ptr(sge->port_stats[port], cpu);
1006
1007 ss->rx_packets += st->rx_packets;
1008 ss->rx_cso_good += st->rx_cso_good;
1009 ss->tx_packets += st->tx_packets;
1010 ss->tx_cso += st->tx_cso;
1011 ss->tx_tso += st->tx_tso;
1012 ss->vlan_xtract += st->vlan_xtract;
1013 ss->vlan_insert += st->vlan_insert;
1014 }
746} 1015}
747 1016
748/** 1017/**
@@ -856,6 +1125,99 @@ static void unexpected_offload(struct adapter *adapter, struct freelQ *fl)
856} 1125}
857 1126
858/* 1127/*
1128 * T1/T2 SGE limits the maximum DMA size per TX descriptor to
1129 * SGE_TX_DESC_MAX_PLEN (16KB). If the PAGE_SIZE is larger than 16KB, the
1130 * stack might send more than SGE_TX_DESC_MAX_PLEN in a contiguous manner.
1131 * Note that the *_large_page_tx_descs stuff will be optimized out when
1132 * PAGE_SIZE <= SGE_TX_DESC_MAX_PLEN.
1133 *
1134 * compute_large_page_descs() computes how many additional descriptors are
1135 * required to break down the stack's request.
1136 */
1137static inline unsigned int compute_large_page_tx_descs(struct sk_buff *skb)
1138{
1139 unsigned int count = 0;
1140 if (PAGE_SIZE > SGE_TX_DESC_MAX_PLEN) {
1141 unsigned int nfrags = skb_shinfo(skb)->nr_frags;
1142 unsigned int i, len = skb->len - skb->data_len;
1143 while (len > SGE_TX_DESC_MAX_PLEN) {
1144 count++;
1145 len -= SGE_TX_DESC_MAX_PLEN;
1146 }
1147 for (i = 0; nfrags--; i++) {
1148 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1149 len = frag->size;
1150 while (len > SGE_TX_DESC_MAX_PLEN) {
1151 count++;
1152 len -= SGE_TX_DESC_MAX_PLEN;
1153 }
1154 }
1155 }
1156 return count;
1157}
1158
1159/*
1160 * Write a cmdQ entry.
1161 *
1162 * Since this function writes the 'flags' field, it must not be used to
1163 * write the first cmdQ entry.
1164 */
1165static inline void write_tx_desc(struct cmdQ_e *e, dma_addr_t mapping,
1166 unsigned int len, unsigned int gen,
1167 unsigned int eop)
1168{
1169 if (unlikely(len > SGE_TX_DESC_MAX_PLEN))
1170 BUG();
1171 e->addr_lo = (u32)mapping;
1172 e->addr_hi = (u64)mapping >> 32;
1173 e->len_gen = V_CMD_LEN(len) | V_CMD_GEN1(gen);
1174 e->flags = F_CMD_DATAVALID | V_CMD_EOP(eop) | V_CMD_GEN2(gen);
1175}
1176
1177/*
1178 * See comment for previous function.
1179 *
1180 * write_tx_descs_large_page() writes additional SGE tx descriptors if
1181 * *desc_len exceeds HW's capability.
1182 */
1183static inline unsigned int write_large_page_tx_descs(unsigned int pidx,
1184 struct cmdQ_e **e,
1185 struct cmdQ_ce **ce,
1186 unsigned int *gen,
1187 dma_addr_t *desc_mapping,
1188 unsigned int *desc_len,
1189 unsigned int nfrags,
1190 struct cmdQ *q)
1191{
1192 if (PAGE_SIZE > SGE_TX_DESC_MAX_PLEN) {
1193 struct cmdQ_e *e1 = *e;
1194 struct cmdQ_ce *ce1 = *ce;
1195
1196 while (*desc_len > SGE_TX_DESC_MAX_PLEN) {
1197 *desc_len -= SGE_TX_DESC_MAX_PLEN;
1198 write_tx_desc(e1, *desc_mapping, SGE_TX_DESC_MAX_PLEN,
1199 *gen, nfrags == 0 && *desc_len == 0);
1200 ce1->skb = NULL;
1201 pci_unmap_len_set(ce1, dma_len, 0);
1202 *desc_mapping += SGE_TX_DESC_MAX_PLEN;
1203 if (*desc_len) {
1204 ce1++;
1205 e1++;
1206 if (++pidx == q->size) {
1207 pidx = 0;
1208 *gen ^= 1;
1209 ce1 = q->centries;
1210 e1 = q->entries;
1211 }
1212 }
1213 }
1214 *e = e1;
1215 *ce = ce1;
1216 }
1217 return pidx;
1218}
1219
1220/*
859 * Write the command descriptors to transmit the given skb starting at 1221 * Write the command descriptors to transmit the given skb starting at
860 * descriptor pidx with the given generation. 1222 * descriptor pidx with the given generation.
861 */ 1223 */
@@ -863,50 +1225,84 @@ static inline void write_tx_descs(struct adapter *adapter, struct sk_buff *skb,
863 unsigned int pidx, unsigned int gen, 1225 unsigned int pidx, unsigned int gen,
864 struct cmdQ *q) 1226 struct cmdQ *q)
865{ 1227{
866 dma_addr_t mapping; 1228 dma_addr_t mapping, desc_mapping;
867 struct cmdQ_e *e, *e1; 1229 struct cmdQ_e *e, *e1;
868 struct cmdQ_ce *ce; 1230 struct cmdQ_ce *ce;
869 unsigned int i, flags, nfrags = skb_shinfo(skb)->nr_frags; 1231 unsigned int i, flags, first_desc_len, desc_len,
1232 nfrags = skb_shinfo(skb)->nr_frags;
870 1233
871 mapping = pci_map_single(adapter->pdev, skb->data, 1234 e = e1 = &q->entries[pidx];
872 skb->len - skb->data_len, PCI_DMA_TODEVICE);
873 ce = &q->centries[pidx]; 1235 ce = &q->centries[pidx];
1236
1237 mapping = pci_map_single(adapter->pdev, skb->data,
1238 skb->len - skb->data_len, PCI_DMA_TODEVICE);
1239
1240 desc_mapping = mapping;
1241 desc_len = skb->len - skb->data_len;
1242
1243 flags = F_CMD_DATAVALID | F_CMD_SOP |
1244 V_CMD_EOP(nfrags == 0 && desc_len <= SGE_TX_DESC_MAX_PLEN) |
1245 V_CMD_GEN2(gen);
1246 first_desc_len = (desc_len <= SGE_TX_DESC_MAX_PLEN) ?
1247 desc_len : SGE_TX_DESC_MAX_PLEN;
1248 e->addr_lo = (u32)desc_mapping;
1249 e->addr_hi = (u64)desc_mapping >> 32;
1250 e->len_gen = V_CMD_LEN(first_desc_len) | V_CMD_GEN1(gen);
1251 ce->skb = NULL;
1252 pci_unmap_len_set(ce, dma_len, 0);
1253
1254 if (PAGE_SIZE > SGE_TX_DESC_MAX_PLEN &&
1255 desc_len > SGE_TX_DESC_MAX_PLEN) {
1256 desc_mapping += first_desc_len;
1257 desc_len -= first_desc_len;
1258 e1++;
1259 ce++;
1260 if (++pidx == q->size) {
1261 pidx = 0;
1262 gen ^= 1;
1263 e1 = q->entries;
1264 ce = q->centries;
1265 }
1266 pidx = write_large_page_tx_descs(pidx, &e1, &ce, &gen,
1267 &desc_mapping, &desc_len,
1268 nfrags, q);
1269
1270 if (likely(desc_len))
1271 write_tx_desc(e1, desc_mapping, desc_len, gen,
1272 nfrags == 0);
1273 }
1274
874 ce->skb = NULL; 1275 ce->skb = NULL;
875 pci_unmap_addr_set(ce, dma_addr, mapping); 1276 pci_unmap_addr_set(ce, dma_addr, mapping);
876 pci_unmap_len_set(ce, dma_len, skb->len - skb->data_len); 1277 pci_unmap_len_set(ce, dma_len, skb->len - skb->data_len);
877 1278
878 flags = F_CMD_DATAVALID | F_CMD_SOP | V_CMD_EOP(nfrags == 0) | 1279 for (i = 0; nfrags--; i++) {
879 V_CMD_GEN2(gen);
880 e = &q->entries[pidx];
881 e->addr_lo = (u32)mapping;
882 e->addr_hi = (u64)mapping >> 32;
883 e->len_gen = V_CMD_LEN(skb->len - skb->data_len) | V_CMD_GEN1(gen);
884 for (e1 = e, i = 0; nfrags--; i++) {
885 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1280 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
886
887 ce++;
888 e1++; 1281 e1++;
1282 ce++;
889 if (++pidx == q->size) { 1283 if (++pidx == q->size) {
890 pidx = 0; 1284 pidx = 0;
891 gen ^= 1; 1285 gen ^= 1;
892 ce = q->centries;
893 e1 = q->entries; 1286 e1 = q->entries;
1287 ce = q->centries;
894 } 1288 }
895 1289
896 mapping = pci_map_page(adapter->pdev, frag->page, 1290 mapping = pci_map_page(adapter->pdev, frag->page,
897 frag->page_offset, frag->size, 1291 frag->page_offset, frag->size,
898 PCI_DMA_TODEVICE); 1292 PCI_DMA_TODEVICE);
1293 desc_mapping = mapping;
1294 desc_len = frag->size;
1295
1296 pidx = write_large_page_tx_descs(pidx, &e1, &ce, &gen,
1297 &desc_mapping, &desc_len,
1298 nfrags, q);
1299 if (likely(desc_len))
1300 write_tx_desc(e1, desc_mapping, desc_len, gen,
1301 nfrags == 0);
899 ce->skb = NULL; 1302 ce->skb = NULL;
900 pci_unmap_addr_set(ce, dma_addr, mapping); 1303 pci_unmap_addr_set(ce, dma_addr, mapping);
901 pci_unmap_len_set(ce, dma_len, frag->size); 1304 pci_unmap_len_set(ce, dma_len, frag->size);
902
903 e1->addr_lo = (u32)mapping;
904 e1->addr_hi = (u64)mapping >> 32;
905 e1->len_gen = V_CMD_LEN(frag->size) | V_CMD_GEN1(gen);
906 e1->flags = F_CMD_DATAVALID | V_CMD_EOP(nfrags == 0) |
907 V_CMD_GEN2(gen);
908 } 1305 }
909
910 ce->skb = skb; 1306 ce->skb = skb;
911 wmb(); 1307 wmb();
912 e->flags = flags; 1308 e->flags = flags;
@@ -920,26 +1316,56 @@ static inline void reclaim_completed_tx(struct sge *sge, struct cmdQ *q)
920 unsigned int reclaim = q->processed - q->cleaned; 1316 unsigned int reclaim = q->processed - q->cleaned;
921 1317
922 if (reclaim) { 1318 if (reclaim) {
1319 pr_debug("reclaim_completed_tx processed:%d cleaned:%d\n",
1320 q->processed, q->cleaned);
923 free_cmdQ_buffers(sge, q, reclaim); 1321 free_cmdQ_buffers(sge, q, reclaim);
924 q->cleaned += reclaim; 1322 q->cleaned += reclaim;
925 } 1323 }
926} 1324}
927 1325
928#ifndef SET_ETHTOOL_OPS
929# define __netif_rx_complete(dev) netif_rx_complete(dev)
930#endif
931
932/* 1326/*
933 * We cannot use the standard netif_rx_schedule_prep() because we have multiple 1327 * Called from tasklet. Checks the scheduler for any
934 * ports plus the TOE all multiplexing onto a single response queue, therefore 1328 * pending skbs that can be sent.
935 * accepting new responses cannot depend on the state of any particular port.
936 * So define our own equivalent that omits the netif_running() test.
937 */ 1329 */
938static inline int napi_schedule_prep(struct net_device *dev) 1330static void restart_sched(unsigned long arg)
939{ 1331{
940 return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); 1332 struct sge *sge = (struct sge *) arg;
941} 1333 struct adapter *adapter = sge->adapter;
1334 struct cmdQ *q = &sge->cmdQ[0];
1335 struct sk_buff *skb;
1336 unsigned int credits, queued_skb = 0;
942 1337
1338 spin_lock(&q->lock);
1339 reclaim_completed_tx(sge, q);
1340
1341 credits = q->size - q->in_use;
1342 pr_debug("restart_sched credits=%d\n", credits);
1343 while ((skb = sched_skb(sge, NULL, credits)) != NULL) {
1344 unsigned int genbit, pidx, count;
1345 count = 1 + skb_shinfo(skb)->nr_frags;
1346 count += compute_large_page_tx_descs(skb);
1347 q->in_use += count;
1348 genbit = q->genbit;
1349 pidx = q->pidx;
1350 q->pidx += count;
1351 if (q->pidx >= q->size) {
1352 q->pidx -= q->size;
1353 q->genbit ^= 1;
1354 }
1355 write_tx_descs(adapter, skb, pidx, genbit, q);
1356 credits = q->size - q->in_use;
1357 queued_skb = 1;
1358 }
1359
1360 if (queued_skb) {
1361 clear_bit(CMDQ_STAT_LAST_PKT_DB, &q->status);
1362 if (test_and_set_bit(CMDQ_STAT_RUNNING, &q->status) == 0) {
1363 set_bit(CMDQ_STAT_LAST_PKT_DB, &q->status);
1364 writel(F_CMDQ0_ENABLE, adapter->regs + A_SG_DOORBELL);
1365 }
1366 }
1367 spin_unlock(&q->lock);
1368}
943 1369
944/** 1370/**
945 * sge_rx - process an ingress ethernet packet 1371 * sge_rx - process an ingress ethernet packet
@@ -954,31 +1380,39 @@ static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
954 struct sk_buff *skb; 1380 struct sk_buff *skb;
955 struct cpl_rx_pkt *p; 1381 struct cpl_rx_pkt *p;
956 struct adapter *adapter = sge->adapter; 1382 struct adapter *adapter = sge->adapter;
1383 struct sge_port_stats *st;
957 1384
958 sge->stats.ethernet_pkts++;
959 skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad, 1385 skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad,
960 sge->rx_pkt_pad, 2, SGE_RX_COPY_THRES, 1386 sge->rx_pkt_pad, 2, SGE_RX_COPY_THRES,
961 SGE_RX_DROP_THRES); 1387 SGE_RX_DROP_THRES);
962 if (!skb) { 1388 if (unlikely(!skb)) {
963 sge->port_stats[0].rx_drops++; /* charge only port 0 for now */ 1389 sge->stats.rx_drops++;
964 return 0; 1390 return 0;
965 } 1391 }
966 1392
967 p = (struct cpl_rx_pkt *)skb->data; 1393 p = (struct cpl_rx_pkt *)skb->data;
968 skb_pull(skb, sizeof(*p)); 1394 skb_pull(skb, sizeof(*p));
1395 if (p->iff >= adapter->params.nports) {
1396 kfree_skb(skb);
1397 return 0;
1398 }
1399
969 skb->dev = adapter->port[p->iff].dev; 1400 skb->dev = adapter->port[p->iff].dev;
970 skb->dev->last_rx = jiffies; 1401 skb->dev->last_rx = jiffies;
1402 st = per_cpu_ptr(sge->port_stats[p->iff], smp_processor_id());
1403 st->rx_packets++;
1404
971 skb->protocol = eth_type_trans(skb, skb->dev); 1405 skb->protocol = eth_type_trans(skb, skb->dev);
972 if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff && 1406 if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff &&
973 skb->protocol == htons(ETH_P_IP) && 1407 skb->protocol == htons(ETH_P_IP) &&
974 (skb->data[9] == IPPROTO_TCP || skb->data[9] == IPPROTO_UDP)) { 1408 (skb->data[9] == IPPROTO_TCP || skb->data[9] == IPPROTO_UDP)) {
975 sge->port_stats[p->iff].rx_cso_good++; 1409 ++st->rx_cso_good;
976 skb->ip_summed = CHECKSUM_UNNECESSARY; 1410 skb->ip_summed = CHECKSUM_UNNECESSARY;
977 } else 1411 } else
978 skb->ip_summed = CHECKSUM_NONE; 1412 skb->ip_summed = CHECKSUM_NONE;
979 1413
980 if (unlikely(adapter->vlan_grp && p->vlan_valid)) { 1414 if (unlikely(adapter->vlan_grp && p->vlan_valid)) {
981 sge->port_stats[p->iff].vlan_xtract++; 1415 st->vlan_xtract++;
982 if (adapter->params.sge.polling) 1416 if (adapter->params.sge.polling)
983 vlan_hwaccel_receive_skb(skb, adapter->vlan_grp, 1417 vlan_hwaccel_receive_skb(skb, adapter->vlan_grp,
984 ntohs(p->vlan)); 1418 ntohs(p->vlan));
@@ -1039,18 +1473,24 @@ static unsigned int update_tx_info(struct adapter *adapter,
1039 struct cmdQ *cmdq = &sge->cmdQ[0]; 1473 struct cmdQ *cmdq = &sge->cmdQ[0];
1040 1474
1041 cmdq->processed += pr0; 1475 cmdq->processed += pr0;
1042 1476 if (flags & (F_FL0_ENABLE | F_FL1_ENABLE)) {
1477 freelQs_empty(sge);
1478 flags &= ~(F_FL0_ENABLE | F_FL1_ENABLE);
1479 }
1043 if (flags & F_CMDQ0_ENABLE) { 1480 if (flags & F_CMDQ0_ENABLE) {
1044 clear_bit(CMDQ_STAT_RUNNING, &cmdq->status); 1481 clear_bit(CMDQ_STAT_RUNNING, &cmdq->status);
1045 1482
1046 if (cmdq->cleaned + cmdq->in_use != cmdq->processed && 1483 if (cmdq->cleaned + cmdq->in_use != cmdq->processed &&
1047 !test_and_set_bit(CMDQ_STAT_LAST_PKT_DB, &cmdq->status)) { 1484 !test_and_set_bit(CMDQ_STAT_LAST_PKT_DB, &cmdq->status)) {
1048 set_bit(CMDQ_STAT_RUNNING, &cmdq->status); 1485 set_bit(CMDQ_STAT_RUNNING, &cmdq->status);
1049 writel(F_CMDQ0_ENABLE, adapter->regs + A_SG_DOORBELL); 1486 writel(F_CMDQ0_ENABLE, adapter->regs + A_SG_DOORBELL);
1050 } 1487 }
1051 flags &= ~F_CMDQ0_ENABLE; 1488 if (sge->tx_sched)
1489 tasklet_hi_schedule(&sge->tx_sched->sched_tsk);
1490
1491 flags &= ~F_CMDQ0_ENABLE;
1052 } 1492 }
1053 1493
1054 if (unlikely(sge->stopped_tx_queues != 0)) 1494 if (unlikely(sge->stopped_tx_queues != 0))
1055 restart_tx_queues(sge); 1495 restart_tx_queues(sge);
1056 1496
@@ -1241,20 +1681,21 @@ static irqreturn_t t1_interrupt_napi(int irq, void *data)
1241 if (e->GenerationBit == q->genbit) { 1681 if (e->GenerationBit == q->genbit) {
1242 if (e->DataValid || 1682 if (e->DataValid ||
1243 process_pure_responses(adapter, e)) { 1683 process_pure_responses(adapter, e)) {
1244 if (likely(napi_schedule_prep(sge->netdev))) 1684 if (likely(__netif_rx_schedule_prep(sge->netdev)))
1245 __netif_rx_schedule(sge->netdev); 1685 __netif_rx_schedule(sge->netdev);
1246 else 1686 else if (net_ratelimit())
1247 printk(KERN_CRIT 1687 printk(KERN_INFO
1248 "NAPI schedule failure!\n"); 1688 "NAPI schedule failure!\n");
1249 } else 1689 } else
1250 writel(q->cidx, adapter->regs + A_SG_SLEEPING); 1690 writel(q->cidx, adapter->regs + A_SG_SLEEPING);
1691
1251 handled = 1; 1692 handled = 1;
1252 goto unlock; 1693 goto unlock;
1253 } else 1694 } else
1254 writel(q->cidx, adapter->regs + A_SG_SLEEPING); 1695 writel(q->cidx, adapter->regs + A_SG_SLEEPING);
1255 } else 1696 } else if (readl(adapter->regs + A_PL_CAUSE) & F_PL_INTR_SGE_DATA) {
1256 if (readl(adapter->regs + A_PL_CAUSE) & F_PL_INTR_SGE_DATA) 1697 printk(KERN_ERR "data interrupt while NAPI running\n");
1257 printk(KERN_ERR "data interrupt while NAPI running\n"); 1698 }
1258 1699
1259 handled = t1_slow_intr_handler(adapter); 1700 handled = t1_slow_intr_handler(adapter);
1260 if (!handled) 1701 if (!handled)
@@ -1335,34 +1776,59 @@ static int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter,
1335{ 1776{
1336 struct sge *sge = adapter->sge; 1777 struct sge *sge = adapter->sge;
1337 struct cmdQ *q = &sge->cmdQ[qid]; 1778 struct cmdQ *q = &sge->cmdQ[qid];
1338 unsigned int credits, pidx, genbit, count; 1779 unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
1780
1781 if (!spin_trylock(&q->lock))
1782 return NETDEV_TX_LOCKED;
1339 1783
1340 spin_lock(&q->lock);
1341 reclaim_completed_tx(sge, q); 1784 reclaim_completed_tx(sge, q);
1342 1785
1343 pidx = q->pidx; 1786 pidx = q->pidx;
1344 credits = q->size - q->in_use; 1787 credits = q->size - q->in_use;
1345 count = 1 + skb_shinfo(skb)->nr_frags; 1788 count = 1 + skb_shinfo(skb)->nr_frags;
1789 count += compute_large_page_tx_descs(skb);
1346 1790
1347 { /* Ethernet packet */ 1791 /* Ethernet packet */
1348 if (unlikely(credits < count)) { 1792 if (unlikely(credits < count)) {
1793 if (!netif_queue_stopped(dev)) {
1349 netif_stop_queue(dev); 1794 netif_stop_queue(dev);
1350 set_bit(dev->if_port, &sge->stopped_tx_queues); 1795 set_bit(dev->if_port, &sge->stopped_tx_queues);
1351 sge->stats.cmdQ_full[2]++; 1796 sge->stats.cmdQ_full[2]++;
1352 spin_unlock(&q->lock); 1797 CH_ERR("%s: Tx ring full while queue awake!\n",
1353 if (!netif_queue_stopped(dev)) 1798 adapter->name);
1354 CH_ERR("%s: Tx ring full while queue awake!\n",
1355 adapter->name);
1356 return NETDEV_TX_BUSY;
1357 } 1799 }
1358 if (unlikely(credits - count < q->stop_thres)) { 1800 spin_unlock(&q->lock);
1359 sge->stats.cmdQ_full[2]++; 1801 return NETDEV_TX_BUSY;
1360 netif_stop_queue(dev); 1802 }
1361 set_bit(dev->if_port, &sge->stopped_tx_queues); 1803
1804 if (unlikely(credits - count < q->stop_thres)) {
1805 netif_stop_queue(dev);
1806 set_bit(dev->if_port, &sge->stopped_tx_queues);
1807 sge->stats.cmdQ_full[2]++;
1808 }
1809
1810 /* T204 cmdQ0 skbs that are destined for a certain port have to go
1811 * through the scheduler.
1812 */
1813 if (sge->tx_sched && !qid && skb->dev) {
1814 use_sched:
1815 use_sched_skb = 1;
1816 /* Note that the scheduler might return a different skb than
1817 * the one passed in.
1818 */
1819 skb = sched_skb(sge, skb, credits);
1820 if (!skb) {
1821 spin_unlock(&q->lock);
1822 return NETDEV_TX_OK;
1362 } 1823 }
1824 pidx = q->pidx;
1825 count = 1 + skb_shinfo(skb)->nr_frags;
1826 count += compute_large_page_tx_descs(skb);
1363 } 1827 }
1828
1364 q->in_use += count; 1829 q->in_use += count;
1365 genbit = q->genbit; 1830 genbit = q->genbit;
1831 pidx = q->pidx;
1366 q->pidx += count; 1832 q->pidx += count;
1367 if (q->pidx >= q->size) { 1833 if (q->pidx >= q->size) {
1368 q->pidx -= q->size; 1834 q->pidx -= q->size;
@@ -1388,6 +1854,14 @@ static int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter,
1388 writel(F_CMDQ0_ENABLE, adapter->regs + A_SG_DOORBELL); 1854 writel(F_CMDQ0_ENABLE, adapter->regs + A_SG_DOORBELL);
1389 } 1855 }
1390 } 1856 }
1857
1858 if (use_sched_skb) {
1859 if (spin_trylock(&q->lock)) {
1860 credits = q->size - q->in_use;
1861 skb = NULL;
1862 goto use_sched;
1863 }
1864 }
1391 return NETDEV_TX_OK; 1865 return NETDEV_TX_OK;
1392} 1866}
1393 1867
@@ -1412,16 +1886,20 @@ static inline int eth_hdr_len(const void *data)
1412int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) 1886int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
1413{ 1887{
1414 struct adapter *adapter = dev->priv; 1888 struct adapter *adapter = dev->priv;
1415 struct sge_port_stats *st = &adapter->sge->port_stats[dev->if_port];
1416 struct sge *sge = adapter->sge; 1889 struct sge *sge = adapter->sge;
1890 struct sge_port_stats *st = per_cpu_ptr(sge->port_stats[dev->if_port], smp_processor_id());
1417 struct cpl_tx_pkt *cpl; 1891 struct cpl_tx_pkt *cpl;
1892 struct sk_buff *orig_skb = skb;
1893 int ret;
1894
1895 if (skb->protocol == htons(ETH_P_CPL5))
1896 goto send;
1418 1897
1419#ifdef NETIF_F_TSO 1898 if (skb_shinfo(skb)->gso_size) {
1420 if (skb_is_gso(skb)) {
1421 int eth_type; 1899 int eth_type;
1422 struct cpl_tx_pkt_lso *hdr; 1900 struct cpl_tx_pkt_lso *hdr;
1423 1901
1424 st->tso++; 1902 ++st->tx_tso;
1425 1903
1426 eth_type = skb->nh.raw - skb->data == ETH_HLEN ? 1904 eth_type = skb->nh.raw - skb->data == ETH_HLEN ?
1427 CPL_ETH_II : CPL_ETH_II_VLAN; 1905 CPL_ETH_II : CPL_ETH_II_VLAN;
@@ -1432,13 +1910,10 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
1432 hdr->ip_hdr_words = skb->nh.iph->ihl; 1910 hdr->ip_hdr_words = skb->nh.iph->ihl;
1433 hdr->tcp_hdr_words = skb->h.th->doff; 1911 hdr->tcp_hdr_words = skb->h.th->doff;
1434 hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type, 1912 hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type,
1435 skb_shinfo(skb)->gso_size)); 1913 skb_shinfo(skb)->gso_size));
1436 hdr->len = htonl(skb->len - sizeof(*hdr)); 1914 hdr->len = htonl(skb->len - sizeof(*hdr));
1437 cpl = (struct cpl_tx_pkt *)hdr; 1915 cpl = (struct cpl_tx_pkt *)hdr;
1438 sge->stats.tx_lso_pkts++; 1916 } else {
1439 } else
1440#endif
1441 {
1442 /* 1917 /*
1443 * Packets shorter than ETH_HLEN can break the MAC, drop them 1918 * Packets shorter than ETH_HLEN can break the MAC, drop them
1444 * early. Also, we may get oversized packets because some 1919 * early. Also, we may get oversized packets because some
@@ -1447,6 +1922,8 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
1447 */ 1922 */
1448 if (unlikely(skb->len < ETH_HLEN || 1923 if (unlikely(skb->len < ETH_HLEN ||
1449 skb->len > dev->mtu + eth_hdr_len(skb->data))) { 1924 skb->len > dev->mtu + eth_hdr_len(skb->data))) {
1925 pr_debug("%s: packet size %d hdr %d mtu%d\n", dev->name,
1926 skb->len, eth_hdr_len(skb->data), dev->mtu);
1450 dev_kfree_skb_any(skb); 1927 dev_kfree_skb_any(skb);
1451 return NETDEV_TX_OK; 1928 return NETDEV_TX_OK;
1452 } 1929 }
@@ -1456,9 +1933,9 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
1456 * components, such as pktgen, do not handle it right. 1933 * components, such as pktgen, do not handle it right.
1457 * Complain when this happens but try to fix things up. 1934 * Complain when this happens but try to fix things up.
1458 */ 1935 */
1459 if (unlikely(skb_headroom(skb) < 1936 if (unlikely(skb_headroom(skb) < dev->hard_header_len - ETH_HLEN)) {
1460 dev->hard_header_len - ETH_HLEN)) { 1937 pr_debug("%s: headroom %d header_len %d\n", dev->name,
1461 struct sk_buff *orig_skb = skb; 1938 skb_headroom(skb), dev->hard_header_len);
1462 1939
1463 if (net_ratelimit()) 1940 if (net_ratelimit())
1464 printk(KERN_ERR "%s: inadequate headroom in " 1941 printk(KERN_ERR "%s: inadequate headroom in "
@@ -1471,19 +1948,21 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
1471 1948
1472 if (!(adapter->flags & UDP_CSUM_CAPABLE) && 1949 if (!(adapter->flags & UDP_CSUM_CAPABLE) &&
1473 skb->ip_summed == CHECKSUM_PARTIAL && 1950 skb->ip_summed == CHECKSUM_PARTIAL &&
1474 skb->nh.iph->protocol == IPPROTO_UDP) 1951 skb->nh.iph->protocol == IPPROTO_UDP) {
1475 if (unlikely(skb_checksum_help(skb))) { 1952 if (unlikely(skb_checksum_help(skb))) {
1953 pr_debug("%s: unable to do udp checksum\n", dev->name);
1476 dev_kfree_skb_any(skb); 1954 dev_kfree_skb_any(skb);
1477 return NETDEV_TX_OK; 1955 return NETDEV_TX_OK;
1478 } 1956 }
1957 }
1479 1958
1480 /* Hmmm, assuming to catch the gratious arp... and we'll use 1959 /* Hmmm, assuming to catch the gratious arp... and we'll use
1481 * it to flush out stuck espi packets... 1960 * it to flush out stuck espi packets...
1482 */ 1961 */
1483 if (unlikely(!adapter->sge->espibug_skb)) { 1962 if ((unlikely(!adapter->sge->espibug_skb[dev->if_port]))) {
1484 if (skb->protocol == htons(ETH_P_ARP) && 1963 if (skb->protocol == htons(ETH_P_ARP) &&
1485 skb->nh.arph->ar_op == htons(ARPOP_REQUEST)) { 1964 skb->nh.arph->ar_op == htons(ARPOP_REQUEST)) {
1486 adapter->sge->espibug_skb = skb; 1965 adapter->sge->espibug_skb[dev->if_port] = skb;
1487 /* We want to re-use this skb later. We 1966 /* We want to re-use this skb later. We
1488 * simply bump the reference count and it 1967 * simply bump the reference count and it
1489 * will not be freed... 1968 * will not be freed...
@@ -1499,8 +1978,6 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
1499 /* the length field isn't used so don't bother setting it */ 1978 /* the length field isn't used so don't bother setting it */
1500 1979
1501 st->tx_cso += (skb->ip_summed == CHECKSUM_PARTIAL); 1980 st->tx_cso += (skb->ip_summed == CHECKSUM_PARTIAL);
1502 sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_PARTIAL);
1503 sge->stats.tx_reg_pkts++;
1504 } 1981 }
1505 cpl->iff = dev->if_port; 1982 cpl->iff = dev->if_port;
1506 1983
@@ -1513,8 +1990,19 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
1513#endif 1990#endif
1514 cpl->vlan_valid = 0; 1991 cpl->vlan_valid = 0;
1515 1992
1993send:
1994 st->tx_packets++;
1516 dev->trans_start = jiffies; 1995 dev->trans_start = jiffies;
1517 return t1_sge_tx(skb, adapter, 0, dev); 1996 ret = t1_sge_tx(skb, adapter, 0, dev);
1997
1998 /* If transmit busy, and we reallocated skb's due to headroom limit,
1999 * then silently discard to avoid leak.
2000 */
2001 if (unlikely(ret != NETDEV_TX_OK && skb != orig_skb)) {
2002 dev_kfree_skb_any(skb);
2003 ret = NETDEV_TX_OK;
2004 }
2005 return ret;
1518} 2006}
1519 2007
1520/* 2008/*
@@ -1532,10 +2020,9 @@ static void sge_tx_reclaim_cb(unsigned long data)
1532 continue; 2020 continue;
1533 2021
1534 reclaim_completed_tx(sge, q); 2022 reclaim_completed_tx(sge, q);
1535 if (i == 0 && q->in_use) /* flush pending credits */ 2023 if (i == 0 && q->in_use) { /* flush pending credits */
1536 writel(F_CMDQ0_ENABLE, 2024 writel(F_CMDQ0_ENABLE, sge->adapter->regs + A_SG_DOORBELL);
1537 sge->adapter->regs + A_SG_DOORBELL); 2025 }
1538
1539 spin_unlock(&q->lock); 2026 spin_unlock(&q->lock);
1540 } 2027 }
1541 mod_timer(&sge->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD); 2028 mod_timer(&sge->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
@@ -1582,11 +2069,20 @@ int t1_sge_configure(struct sge *sge, struct sge_params *p)
1582 */ 2069 */
1583void t1_sge_stop(struct sge *sge) 2070void t1_sge_stop(struct sge *sge)
1584{ 2071{
2072 int i;
1585 writel(0, sge->adapter->regs + A_SG_CONTROL); 2073 writel(0, sge->adapter->regs + A_SG_CONTROL);
1586 (void) readl(sge->adapter->regs + A_SG_CONTROL); /* flush */ 2074 readl(sge->adapter->regs + A_SG_CONTROL); /* flush */
2075
1587 if (is_T2(sge->adapter)) 2076 if (is_T2(sge->adapter))
1588 del_timer_sync(&sge->espibug_timer); 2077 del_timer_sync(&sge->espibug_timer);
2078
1589 del_timer_sync(&sge->tx_reclaim_timer); 2079 del_timer_sync(&sge->tx_reclaim_timer);
2080 if (sge->tx_sched)
2081 tx_sched_stop(sge);
2082
2083 for (i = 0; i < MAX_NPORTS; i++)
2084 if (sge->espibug_skb[i])
2085 kfree_skb(sge->espibug_skb[i]);
1590} 2086}
1591 2087
1592/* 2088/*
@@ -1599,74 +2095,128 @@ void t1_sge_start(struct sge *sge)
1599 2095
1600 writel(sge->sge_control, sge->adapter->regs + A_SG_CONTROL); 2096 writel(sge->sge_control, sge->adapter->regs + A_SG_CONTROL);
1601 doorbell_pio(sge->adapter, F_FL0_ENABLE | F_FL1_ENABLE); 2097 doorbell_pio(sge->adapter, F_FL0_ENABLE | F_FL1_ENABLE);
1602 (void) readl(sge->adapter->regs + A_SG_CONTROL); /* flush */ 2098 readl(sge->adapter->regs + A_SG_CONTROL); /* flush */
1603 2099
1604 mod_timer(&sge->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD); 2100 mod_timer(&sge->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
1605 2101
1606 if (is_T2(sge->adapter)) 2102 if (is_T2(sge->adapter))
1607 mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout); 2103 mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout);
1608} 2104}
1609 2105
1610/* 2106/*
1611 * Callback for the T2 ESPI 'stuck packet feature' workaorund 2107 * Callback for the T2 ESPI 'stuck packet feature' workaorund
1612 */ 2108 */
1613static void espibug_workaround(void *data) 2109static void espibug_workaround_t204(unsigned long data)
1614{ 2110{
1615 struct adapter *adapter = (struct adapter *)data; 2111 struct adapter *adapter = (struct adapter *)data;
1616 struct sge *sge = adapter->sge; 2112 struct sge *sge = adapter->sge;
2113 unsigned int nports = adapter->params.nports;
2114 u32 seop[MAX_NPORTS];
1617 2115
1618 if (netif_running(adapter->port[0].dev)) { 2116 if (adapter->open_device_map & PORT_MASK) {
1619 struct sk_buff *skb = sge->espibug_skb; 2117 int i;
1620 2118 if (t1_espi_get_mon_t204(adapter, &(seop[0]), 0) < 0) {
1621 u32 seop = t1_espi_get_mon(adapter, 0x930, 0); 2119 return;
1622 2120 }
1623 if ((seop & 0xfff0fff) == 0xfff && skb) { 2121 for (i = 0; i < nports; i++) {
1624 if (!skb->cb[0]) { 2122 struct sk_buff *skb = sge->espibug_skb[i];
1625 u8 ch_mac_addr[ETH_ALEN] = 2123 if ( (netif_running(adapter->port[i].dev)) &&
1626 {0x0, 0x7, 0x43, 0x0, 0x0, 0x0}; 2124 !(netif_queue_stopped(adapter->port[i].dev)) &&
1627 memcpy(skb->data + sizeof(struct cpl_tx_pkt), 2125 (seop[i] && ((seop[i] & 0xfff) == 0)) &&
1628 ch_mac_addr, ETH_ALEN); 2126 skb ) {
1629 memcpy(skb->data + skb->len - 10, ch_mac_addr, 2127 if (!skb->cb[0]) {
1630 ETH_ALEN); 2128 u8 ch_mac_addr[ETH_ALEN] =
1631 skb->cb[0] = 0xff; 2129 {0x0, 0x7, 0x43, 0x0, 0x0, 0x0};
2130 memcpy(skb->data + sizeof(struct cpl_tx_pkt),
2131 ch_mac_addr, ETH_ALEN);
2132 memcpy(skb->data + skb->len - 10,
2133 ch_mac_addr, ETH_ALEN);
2134 skb->cb[0] = 0xff;
2135 }
2136
2137 /* bump the reference count to avoid freeing of
2138 * the skb once the DMA has completed.
2139 */
2140 skb = skb_get(skb);
2141 t1_sge_tx(skb, adapter, 0, adapter->port[i].dev);
1632 } 2142 }
1633
1634 /* bump the reference count to avoid freeing of the
1635 * skb once the DMA has completed.
1636 */
1637 skb = skb_get(skb);
1638 t1_sge_tx(skb, adapter, 0, adapter->port[0].dev);
1639 } 2143 }
1640 } 2144 }
1641 mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout); 2145 mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout);
1642} 2146}
1643 2147
2148static void espibug_workaround(unsigned long data)
2149{
2150 struct adapter *adapter = (struct adapter *)data;
2151 struct sge *sge = adapter->sge;
2152
2153 if (netif_running(adapter->port[0].dev)) {
2154 struct sk_buff *skb = sge->espibug_skb[0];
2155 u32 seop = t1_espi_get_mon(adapter, 0x930, 0);
2156
2157 if ((seop & 0xfff0fff) == 0xfff && skb) {
2158 if (!skb->cb[0]) {
2159 u8 ch_mac_addr[ETH_ALEN] =
2160 {0x0, 0x7, 0x43, 0x0, 0x0, 0x0};
2161 memcpy(skb->data + sizeof(struct cpl_tx_pkt),
2162 ch_mac_addr, ETH_ALEN);
2163 memcpy(skb->data + skb->len - 10, ch_mac_addr,
2164 ETH_ALEN);
2165 skb->cb[0] = 0xff;
2166 }
2167
2168 /* bump the reference count to avoid freeing of the
2169 * skb once the DMA has completed.
2170 */
2171 skb = skb_get(skb);
2172 t1_sge_tx(skb, adapter, 0, adapter->port[0].dev);
2173 }
2174 }
2175 mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout);
2176}
2177
1644/* 2178/*
1645 * Creates a t1_sge structure and returns suggested resource parameters. 2179 * Creates a t1_sge structure and returns suggested resource parameters.
1646 */ 2180 */
1647struct sge * __devinit t1_sge_create(struct adapter *adapter, 2181struct sge * __devinit t1_sge_create(struct adapter *adapter,
1648 struct sge_params *p) 2182 struct sge_params *p)
1649{ 2183{
1650 struct sge *sge = kmalloc(sizeof(*sge), GFP_KERNEL); 2184 struct sge *sge = kzalloc(sizeof(*sge), GFP_KERNEL);
2185 int i;
1651 2186
1652 if (!sge) 2187 if (!sge)
1653 return NULL; 2188 return NULL;
1654 memset(sge, 0, sizeof(*sge));
1655 2189
1656 sge->adapter = adapter; 2190 sge->adapter = adapter;
1657 sge->netdev = adapter->port[0].dev; 2191 sge->netdev = adapter->port[0].dev;
1658 sge->rx_pkt_pad = t1_is_T1B(adapter) ? 0 : 2; 2192 sge->rx_pkt_pad = t1_is_T1B(adapter) ? 0 : 2;
1659 sge->jumbo_fl = t1_is_T1B(adapter) ? 1 : 0; 2193 sge->jumbo_fl = t1_is_T1B(adapter) ? 1 : 0;
1660 2194
2195 for_each_port(adapter, i) {
2196 sge->port_stats[i] = alloc_percpu(struct sge_port_stats);
2197 if (!sge->port_stats[i])
2198 goto nomem_port;
2199 }
2200
1661 init_timer(&sge->tx_reclaim_timer); 2201 init_timer(&sge->tx_reclaim_timer);
1662 sge->tx_reclaim_timer.data = (unsigned long)sge; 2202 sge->tx_reclaim_timer.data = (unsigned long)sge;
1663 sge->tx_reclaim_timer.function = sge_tx_reclaim_cb; 2203 sge->tx_reclaim_timer.function = sge_tx_reclaim_cb;
1664 2204
1665 if (is_T2(sge->adapter)) { 2205 if (is_T2(sge->adapter)) {
1666 init_timer(&sge->espibug_timer); 2206 init_timer(&sge->espibug_timer);
1667 sge->espibug_timer.function = (void *)&espibug_workaround; 2207
2208 if (adapter->params.nports > 1) {
2209 tx_sched_init(sge);
2210 sge->espibug_timer.function = espibug_workaround_t204;
2211 } else {
2212 sge->espibug_timer.function = espibug_workaround;
2213 }
1668 sge->espibug_timer.data = (unsigned long)sge->adapter; 2214 sge->espibug_timer.data = (unsigned long)sge->adapter;
2215
1669 sge->espibug_timeout = 1; 2216 sge->espibug_timeout = 1;
2217 /* for T204, every 10ms */
2218 if (adapter->params.nports > 1)
2219 sge->espibug_timeout = HZ/100;
1670 } 2220 }
1671 2221
1672 2222
@@ -1674,10 +2224,25 @@ struct sge * __devinit t1_sge_create(struct adapter *adapter,
1674 p->cmdQ_size[1] = SGE_CMDQ1_E_N; 2224 p->cmdQ_size[1] = SGE_CMDQ1_E_N;
1675 p->freelQ_size[!sge->jumbo_fl] = SGE_FREEL_SIZE; 2225 p->freelQ_size[!sge->jumbo_fl] = SGE_FREEL_SIZE;
1676 p->freelQ_size[sge->jumbo_fl] = SGE_JUMBO_FREEL_SIZE; 2226 p->freelQ_size[sge->jumbo_fl] = SGE_JUMBO_FREEL_SIZE;
1677 p->rx_coalesce_usecs = 50; 2227 if (sge->tx_sched) {
2228 if (board_info(sge->adapter)->board == CHBT_BOARD_CHT204)
2229 p->rx_coalesce_usecs = 15;
2230 else
2231 p->rx_coalesce_usecs = 50;
2232 } else
2233 p->rx_coalesce_usecs = 50;
2234
1678 p->coalesce_enable = 0; 2235 p->coalesce_enable = 0;
1679 p->sample_interval_usecs = 0; 2236 p->sample_interval_usecs = 0;
1680 p->polling = 0; 2237 p->polling = 0;
1681 2238
1682 return sge; 2239 return sge;
2240nomem_port:
2241 while (i >= 0) {
2242 free_percpu(sge->port_stats[i]);
2243 --i;
2244 }
2245 kfree(sge);
2246 return NULL;
2247
1683} 2248}