aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/cxgb3/sge.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/cxgb3/sge.c')
-rw-r--r--drivers/net/cxgb3/sge.c2681
1 files changed, 2681 insertions, 0 deletions
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
new file mode 100644
index 000000000000..3f2cf8a07c61
--- /dev/null
+++ b/drivers/net/cxgb3/sge.c
@@ -0,0 +1,2681 @@
1/*
2 * Copyright (c) 2005-2007 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <linux/skbuff.h>
33#include <linux/netdevice.h>
34#include <linux/etherdevice.h>
35#include <linux/if_vlan.h>
36#include <linux/ip.h>
37#include <linux/tcp.h>
38#include <linux/dma-mapping.h>
39#include "common.h"
40#include "regs.h"
41#include "sge_defs.h"
42#include "t3_cpl.h"
43#include "firmware_exports.h"
44
45#define USE_GTS 0
46
47#define SGE_RX_SM_BUF_SIZE 1536
48#define SGE_RX_COPY_THRES 256
49
50# define SGE_RX_DROP_THRES 16
51
52/*
53 * Period of the Tx buffer reclaim timer. This timer does not need to run
54 * frequently as Tx buffers are usually reclaimed by new Tx packets.
55 */
56#define TX_RECLAIM_PERIOD (HZ / 4)
57
58/* WR size in bytes */
59#define WR_LEN (WR_FLITS * 8)
60
61/*
62 * Types of Tx queues in each queue set. Order here matters, do not change.
63 */
64enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
65
66/* Values for sge_txq.flags */
67enum {
68 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
69 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
70};
71
72struct tx_desc {
73 u64 flit[TX_DESC_FLITS];
74};
75
76struct rx_desc {
77 __be32 addr_lo;
78 __be32 len_gen;
79 __be32 gen2;
80 __be32 addr_hi;
81};
82
83struct tx_sw_desc { /* SW state per Tx descriptor */
84 struct sk_buff *skb;
85};
86
87struct rx_sw_desc { /* SW state per Rx descriptor */
88 struct sk_buff *skb;
89 DECLARE_PCI_UNMAP_ADDR(dma_addr);
90};
91
92struct rsp_desc { /* response queue descriptor */
93 struct rss_header rss_hdr;
94 __be32 flags;
95 __be32 len_cq;
96 u8 imm_data[47];
97 u8 intr_gen;
98};
99
100struct unmap_info { /* packet unmapping info, overlays skb->cb */
101 int sflit; /* start flit of first SGL entry in Tx descriptor */
102 u16 fragidx; /* first page fragment in current Tx descriptor */
103 u16 addr_idx; /* buffer index of first SGL entry in descriptor */
104 u32 len; /* mapped length of skb main body */
105};
106
107/*
108 * Maps a number of flits to the number of Tx descriptors that can hold them.
109 * The formula is
110 *
111 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
112 *
113 * HW allows up to 4 descriptors to be combined into a WR.
114 */
115static u8 flit_desc_map[] = {
116 0,
117#if SGE_NUM_GENBITS == 1
118 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
121 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
122#elif SGE_NUM_GENBITS == 2
123 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
124 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
125 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127#else
128# error "SGE_NUM_GENBITS must be 1 or 2"
129#endif
130};
131
132static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
133{
134 return container_of(q, struct sge_qset, fl[qidx]);
135}
136
137static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
138{
139 return container_of(q, struct sge_qset, rspq);
140}
141
142static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
143{
144 return container_of(q, struct sge_qset, txq[qidx]);
145}
146
147/**
148 * refill_rspq - replenish an SGE response queue
149 * @adapter: the adapter
150 * @q: the response queue to replenish
151 * @credits: how many new responses to make available
152 *
153 * Replenishes a response queue by making the supplied number of responses
154 * available to HW.
155 */
156static inline void refill_rspq(struct adapter *adapter,
157 const struct sge_rspq *q, unsigned int credits)
158{
159 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
160 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
161}
162
163/**
164 * need_skb_unmap - does the platform need unmapping of sk_buffs?
165 *
166 * Returns true if the platfrom needs sk_buff unmapping. The compiler
167 * optimizes away unecessary code if this returns true.
168 */
169static inline int need_skb_unmap(void)
170{
171 /*
172 * This structure is used to tell if the platfrom needs buffer
173 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
174 */
175 struct dummy {
176 DECLARE_PCI_UNMAP_ADDR(addr);
177 };
178
179 return sizeof(struct dummy) != 0;
180}
181
182/**
183 * unmap_skb - unmap a packet main body and its page fragments
184 * @skb: the packet
185 * @q: the Tx queue containing Tx descriptors for the packet
186 * @cidx: index of Tx descriptor
187 * @pdev: the PCI device
188 *
189 * Unmap the main body of an sk_buff and its page fragments, if any.
190 * Because of the fairly complicated structure of our SGLs and the desire
191 * to conserve space for metadata, we keep the information necessary to
192 * unmap an sk_buff partly in the sk_buff itself (in its cb), and partly
193 * in the Tx descriptors (the physical addresses of the various data
194 * buffers). The send functions initialize the state in skb->cb so we
195 * can unmap the buffers held in the first Tx descriptor here, and we
196 * have enough information at this point to update the state for the next
197 * Tx descriptor.
198 */
199static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
200 unsigned int cidx, struct pci_dev *pdev)
201{
202 const struct sg_ent *sgp;
203 struct unmap_info *ui = (struct unmap_info *)skb->cb;
204 int nfrags, frag_idx, curflit, j = ui->addr_idx;
205
206 sgp = (struct sg_ent *)&q->desc[cidx].flit[ui->sflit];
207
208 if (ui->len) {
209 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]), ui->len,
210 PCI_DMA_TODEVICE);
211 ui->len = 0; /* so we know for next descriptor for this skb */
212 j = 1;
213 }
214
215 frag_idx = ui->fragidx;
216 curflit = ui->sflit + 1 + j;
217 nfrags = skb_shinfo(skb)->nr_frags;
218
219 while (frag_idx < nfrags && curflit < WR_FLITS) {
220 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
221 skb_shinfo(skb)->frags[frag_idx].size,
222 PCI_DMA_TODEVICE);
223 j ^= 1;
224 if (j == 0) {
225 sgp++;
226 curflit++;
227 }
228 curflit++;
229 frag_idx++;
230 }
231
232 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
233 ui->fragidx = frag_idx;
234 ui->addr_idx = j;
235 ui->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
236 }
237}
238
239/**
240 * free_tx_desc - reclaims Tx descriptors and their buffers
241 * @adapter: the adapter
242 * @q: the Tx queue to reclaim descriptors from
243 * @n: the number of descriptors to reclaim
244 *
245 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
246 * Tx buffers. Called with the Tx queue lock held.
247 */
248static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
249 unsigned int n)
250{
251 struct tx_sw_desc *d;
252 struct pci_dev *pdev = adapter->pdev;
253 unsigned int cidx = q->cidx;
254
255 d = &q->sdesc[cidx];
256 while (n--) {
257 if (d->skb) { /* an SGL is present */
258 if (need_skb_unmap())
259 unmap_skb(d->skb, q, cidx, pdev);
260 if (d->skb->priority == cidx)
261 kfree_skb(d->skb);
262 }
263 ++d;
264 if (++cidx == q->size) {
265 cidx = 0;
266 d = q->sdesc;
267 }
268 }
269 q->cidx = cidx;
270}
271
272/**
273 * reclaim_completed_tx - reclaims completed Tx descriptors
274 * @adapter: the adapter
275 * @q: the Tx queue to reclaim completed descriptors from
276 *
277 * Reclaims Tx descriptors that the SGE has indicated it has processed,
278 * and frees the associated buffers if possible. Called with the Tx
279 * queue's lock held.
280 */
281static inline void reclaim_completed_tx(struct adapter *adapter,
282 struct sge_txq *q)
283{
284 unsigned int reclaim = q->processed - q->cleaned;
285
286 if (reclaim) {
287 free_tx_desc(adapter, q, reclaim);
288 q->cleaned += reclaim;
289 q->in_use -= reclaim;
290 }
291}
292
293/**
294 * should_restart_tx - are there enough resources to restart a Tx queue?
295 * @q: the Tx queue
296 *
297 * Checks if there are enough descriptors to restart a suspended Tx queue.
298 */
299static inline int should_restart_tx(const struct sge_txq *q)
300{
301 unsigned int r = q->processed - q->cleaned;
302
303 return q->in_use - r < (q->size >> 1);
304}
305
306/**
307 * free_rx_bufs - free the Rx buffers on an SGE free list
308 * @pdev: the PCI device associated with the adapter
309 * @rxq: the SGE free list to clean up
310 *
311 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
312 * this queue should be stopped before calling this function.
313 */
314static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
315{
316 unsigned int cidx = q->cidx;
317
318 while (q->credits--) {
319 struct rx_sw_desc *d = &q->sdesc[cidx];
320
321 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
322 q->buf_size, PCI_DMA_FROMDEVICE);
323 kfree_skb(d->skb);
324 d->skb = NULL;
325 if (++cidx == q->size)
326 cidx = 0;
327 }
328}
329
330/**
331 * add_one_rx_buf - add a packet buffer to a free-buffer list
332 * @skb: the buffer to add
333 * @len: the buffer length
334 * @d: the HW Rx descriptor to write
335 * @sd: the SW Rx descriptor to write
336 * @gen: the generation bit value
337 * @pdev: the PCI device associated with the adapter
338 *
339 * Add a buffer of the given length to the supplied HW and SW Rx
340 * descriptors.
341 */
342static inline void add_one_rx_buf(struct sk_buff *skb, unsigned int len,
343 struct rx_desc *d, struct rx_sw_desc *sd,
344 unsigned int gen, struct pci_dev *pdev)
345{
346 dma_addr_t mapping;
347
348 sd->skb = skb;
349 mapping = pci_map_single(pdev, skb->data, len, PCI_DMA_FROMDEVICE);
350 pci_unmap_addr_set(sd, dma_addr, mapping);
351
352 d->addr_lo = cpu_to_be32(mapping);
353 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
354 wmb();
355 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
356 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
357}
358
359/**
360 * refill_fl - refill an SGE free-buffer list
361 * @adapter: the adapter
362 * @q: the free-list to refill
363 * @n: the number of new buffers to allocate
364 * @gfp: the gfp flags for allocating new buffers
365 *
366 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
367 * allocated with the supplied gfp flags. The caller must assure that
368 * @n does not exceed the queue's capacity.
369 */
370static void refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
371{
372 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
373 struct rx_desc *d = &q->desc[q->pidx];
374
375 while (n--) {
376 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
377
378 if (!skb)
379 break;
380
381 add_one_rx_buf(skb, q->buf_size, d, sd, q->gen, adap->pdev);
382 d++;
383 sd++;
384 if (++q->pidx == q->size) {
385 q->pidx = 0;
386 q->gen ^= 1;
387 sd = q->sdesc;
388 d = q->desc;
389 }
390 q->credits++;
391 }
392
393 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
394}
395
396static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
397{
398 refill_fl(adap, fl, min(16U, fl->size - fl->credits), GFP_ATOMIC);
399}
400
401/**
402 * recycle_rx_buf - recycle a receive buffer
403 * @adapter: the adapter
404 * @q: the SGE free list
405 * @idx: index of buffer to recycle
406 *
407 * Recycles the specified buffer on the given free list by adding it at
408 * the next available slot on the list.
409 */
410static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
411 unsigned int idx)
412{
413 struct rx_desc *from = &q->desc[idx];
414 struct rx_desc *to = &q->desc[q->pidx];
415
416 q->sdesc[q->pidx] = q->sdesc[idx];
417 to->addr_lo = from->addr_lo; /* already big endian */
418 to->addr_hi = from->addr_hi; /* likewise */
419 wmb();
420 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
421 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
422 q->credits++;
423
424 if (++q->pidx == q->size) {
425 q->pidx = 0;
426 q->gen ^= 1;
427 }
428 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
429}
430
431/**
432 * alloc_ring - allocate resources for an SGE descriptor ring
433 * @pdev: the PCI device
434 * @nelem: the number of descriptors
435 * @elem_size: the size of each descriptor
436 * @sw_size: the size of the SW state associated with each ring element
437 * @phys: the physical address of the allocated ring
438 * @metadata: address of the array holding the SW state for the ring
439 *
440 * Allocates resources for an SGE descriptor ring, such as Tx queues,
441 * free buffer lists, or response queues. Each SGE ring requires
442 * space for its HW descriptors plus, optionally, space for the SW state
443 * associated with each HW entry (the metadata). The function returns
444 * three values: the virtual address for the HW ring (the return value
445 * of the function), the physical address of the HW ring, and the address
446 * of the SW ring.
447 */
448static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
449 size_t sw_size, dma_addr_t *phys, void *metadata)
450{
451 size_t len = nelem * elem_size;
452 void *s = NULL;
453 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
454
455 if (!p)
456 return NULL;
457 if (sw_size) {
458 s = kcalloc(nelem, sw_size, GFP_KERNEL);
459
460 if (!s) {
461 dma_free_coherent(&pdev->dev, len, p, *phys);
462 return NULL;
463 }
464 }
465 if (metadata)
466 *(void **)metadata = s;
467 memset(p, 0, len);
468 return p;
469}
470
471/**
472 * free_qset - free the resources of an SGE queue set
473 * @adapter: the adapter owning the queue set
474 * @q: the queue set
475 *
476 * Release the HW and SW resources associated with an SGE queue set, such
477 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
478 * queue set must be quiesced prior to calling this.
479 */
480void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
481{
482 int i;
483 struct pci_dev *pdev = adapter->pdev;
484
485 if (q->tx_reclaim_timer.function)
486 del_timer_sync(&q->tx_reclaim_timer);
487
488 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
489 if (q->fl[i].desc) {
490 spin_lock(&adapter->sge.reg_lock);
491 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
492 spin_unlock(&adapter->sge.reg_lock);
493 free_rx_bufs(pdev, &q->fl[i]);
494 kfree(q->fl[i].sdesc);
495 dma_free_coherent(&pdev->dev,
496 q->fl[i].size *
497 sizeof(struct rx_desc), q->fl[i].desc,
498 q->fl[i].phys_addr);
499 }
500
501 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
502 if (q->txq[i].desc) {
503 spin_lock(&adapter->sge.reg_lock);
504 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
505 spin_unlock(&adapter->sge.reg_lock);
506 if (q->txq[i].sdesc) {
507 free_tx_desc(adapter, &q->txq[i],
508 q->txq[i].in_use);
509 kfree(q->txq[i].sdesc);
510 }
511 dma_free_coherent(&pdev->dev,
512 q->txq[i].size *
513 sizeof(struct tx_desc),
514 q->txq[i].desc, q->txq[i].phys_addr);
515 __skb_queue_purge(&q->txq[i].sendq);
516 }
517
518 if (q->rspq.desc) {
519 spin_lock(&adapter->sge.reg_lock);
520 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
521 spin_unlock(&adapter->sge.reg_lock);
522 dma_free_coherent(&pdev->dev,
523 q->rspq.size * sizeof(struct rsp_desc),
524 q->rspq.desc, q->rspq.phys_addr);
525 }
526
527 if (q->netdev)
528 q->netdev->atalk_ptr = NULL;
529
530 memset(q, 0, sizeof(*q));
531}
532
533/**
534 * init_qset_cntxt - initialize an SGE queue set context info
535 * @qs: the queue set
536 * @id: the queue set id
537 *
538 * Initializes the TIDs and context ids for the queues of a queue set.
539 */
540static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
541{
542 qs->rspq.cntxt_id = id;
543 qs->fl[0].cntxt_id = 2 * id;
544 qs->fl[1].cntxt_id = 2 * id + 1;
545 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
546 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
547 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
548 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
549 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
550}
551
552/**
553 * sgl_len - calculates the size of an SGL of the given capacity
554 * @n: the number of SGL entries
555 *
556 * Calculates the number of flits needed for a scatter/gather list that
557 * can hold the given number of entries.
558 */
559static inline unsigned int sgl_len(unsigned int n)
560{
561 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
562 return (3 * n) / 2 + (n & 1);
563}
564
565/**
566 * flits_to_desc - returns the num of Tx descriptors for the given flits
567 * @n: the number of flits
568 *
569 * Calculates the number of Tx descriptors needed for the supplied number
570 * of flits.
571 */
572static inline unsigned int flits_to_desc(unsigned int n)
573{
574 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
575 return flit_desc_map[n];
576}
577
578/**
579 * get_packet - return the next ingress packet buffer from a free list
580 * @adap: the adapter that received the packet
581 * @fl: the SGE free list holding the packet
582 * @len: the packet length including any SGE padding
583 * @drop_thres: # of remaining buffers before we start dropping packets
584 *
585 * Get the next packet from a free list and complete setup of the
586 * sk_buff. If the packet is small we make a copy and recycle the
587 * original buffer, otherwise we use the original buffer itself. If a
588 * positive drop threshold is supplied packets are dropped and their
589 * buffers recycled if (a) the number of remaining buffers is under the
590 * threshold and the packet is too big to copy, or (b) the packet should
591 * be copied but there is no memory for the copy.
592 */
593static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
594 unsigned int len, unsigned int drop_thres)
595{
596 struct sk_buff *skb = NULL;
597 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
598
599 prefetch(sd->skb->data);
600
601 if (len <= SGE_RX_COPY_THRES) {
602 skb = alloc_skb(len, GFP_ATOMIC);
603 if (likely(skb != NULL)) {
604 __skb_put(skb, len);
605 pci_dma_sync_single_for_cpu(adap->pdev,
606 pci_unmap_addr(sd,
607 dma_addr),
608 len, PCI_DMA_FROMDEVICE);
609 memcpy(skb->data, sd->skb->data, len);
610 pci_dma_sync_single_for_device(adap->pdev,
611 pci_unmap_addr(sd,
612 dma_addr),
613 len, PCI_DMA_FROMDEVICE);
614 } else if (!drop_thres)
615 goto use_orig_buf;
616 recycle:
617 recycle_rx_buf(adap, fl, fl->cidx);
618 return skb;
619 }
620
621 if (unlikely(fl->credits < drop_thres))
622 goto recycle;
623
624 use_orig_buf:
625 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
626 fl->buf_size, PCI_DMA_FROMDEVICE);
627 skb = sd->skb;
628 skb_put(skb, len);
629 __refill_fl(adap, fl);
630 return skb;
631}
632
633/**
634 * get_imm_packet - return the next ingress packet buffer from a response
635 * @resp: the response descriptor containing the packet data
636 *
637 * Return a packet containing the immediate data of the given response.
638 */
639static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
640{
641 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
642
643 if (skb) {
644 __skb_put(skb, IMMED_PKT_SIZE);
645 memcpy(skb->data, resp->imm_data, IMMED_PKT_SIZE);
646 }
647 return skb;
648}
649
650/**
651 * calc_tx_descs - calculate the number of Tx descriptors for a packet
652 * @skb: the packet
653 *
654 * Returns the number of Tx descriptors needed for the given Ethernet
655 * packet. Ethernet packets require addition of WR and CPL headers.
656 */
657static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
658{
659 unsigned int flits;
660
661 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
662 return 1;
663
664 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
665 if (skb_shinfo(skb)->gso_size)
666 flits++;
667 return flits_to_desc(flits);
668}
669
670/**
671 * make_sgl - populate a scatter/gather list for a packet
672 * @skb: the packet
673 * @sgp: the SGL to populate
674 * @start: start address of skb main body data to include in the SGL
675 * @len: length of skb main body data to include in the SGL
676 * @pdev: the PCI device
677 *
678 * Generates a scatter/gather list for the buffers that make up a packet
679 * and returns the SGL size in 8-byte words. The caller must size the SGL
680 * appropriately.
681 */
682static inline unsigned int make_sgl(const struct sk_buff *skb,
683 struct sg_ent *sgp, unsigned char *start,
684 unsigned int len, struct pci_dev *pdev)
685{
686 dma_addr_t mapping;
687 unsigned int i, j = 0, nfrags;
688
689 if (len) {
690 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
691 sgp->len[0] = cpu_to_be32(len);
692 sgp->addr[0] = cpu_to_be64(mapping);
693 j = 1;
694 }
695
696 nfrags = skb_shinfo(skb)->nr_frags;
697 for (i = 0; i < nfrags; i++) {
698 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
699
700 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
701 frag->size, PCI_DMA_TODEVICE);
702 sgp->len[j] = cpu_to_be32(frag->size);
703 sgp->addr[j] = cpu_to_be64(mapping);
704 j ^= 1;
705 if (j == 0)
706 ++sgp;
707 }
708 if (j)
709 sgp->len[j] = 0;
710 return ((nfrags + (len != 0)) * 3) / 2 + j;
711}
712
713/**
714 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
715 * @adap: the adapter
716 * @q: the Tx queue
717 *
718 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
719 * where the HW is going to sleep just after we checked, however,
720 * then the interrupt handler will detect the outstanding TX packet
721 * and ring the doorbell for us.
722 *
723 * When GTS is disabled we unconditionally ring the doorbell.
724 */
725static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
726{
727#if USE_GTS
728 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
729 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
730 set_bit(TXQ_LAST_PKT_DB, &q->flags);
731 t3_write_reg(adap, A_SG_KDOORBELL,
732 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
733 }
734#else
735 wmb(); /* write descriptors before telling HW */
736 t3_write_reg(adap, A_SG_KDOORBELL,
737 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
738#endif
739}
740
741static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
742{
743#if SGE_NUM_GENBITS == 2
744 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
745#endif
746}
747
748/**
749 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
750 * @ndesc: number of Tx descriptors spanned by the SGL
751 * @skb: the packet corresponding to the WR
752 * @d: first Tx descriptor to be written
753 * @pidx: index of above descriptors
754 * @q: the SGE Tx queue
755 * @sgl: the SGL
756 * @flits: number of flits to the start of the SGL in the first descriptor
757 * @sgl_flits: the SGL size in flits
758 * @gen: the Tx descriptor generation
759 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
760 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
761 *
762 * Write a work request header and an associated SGL. If the SGL is
763 * small enough to fit into one Tx descriptor it has already been written
764 * and we just need to write the WR header. Otherwise we distribute the
765 * SGL across the number of descriptors it spans.
766 */
767static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
768 struct tx_desc *d, unsigned int pidx,
769 const struct sge_txq *q,
770 const struct sg_ent *sgl,
771 unsigned int flits, unsigned int sgl_flits,
772 unsigned int gen, unsigned int wr_hi,
773 unsigned int wr_lo)
774{
775 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
776 struct tx_sw_desc *sd = &q->sdesc[pidx];
777
778 sd->skb = skb;
779 if (need_skb_unmap()) {
780 struct unmap_info *ui = (struct unmap_info *)skb->cb;
781
782 ui->fragidx = 0;
783 ui->addr_idx = 0;
784 ui->sflit = flits;
785 }
786
787 if (likely(ndesc == 1)) {
788 skb->priority = pidx;
789 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
790 V_WR_SGLSFLT(flits)) | wr_hi;
791 wmb();
792 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
793 V_WR_GEN(gen)) | wr_lo;
794 wr_gen2(d, gen);
795 } else {
796 unsigned int ogen = gen;
797 const u64 *fp = (const u64 *)sgl;
798 struct work_request_hdr *wp = wrp;
799
800 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
801 V_WR_SGLSFLT(flits)) | wr_hi;
802
803 while (sgl_flits) {
804 unsigned int avail = WR_FLITS - flits;
805
806 if (avail > sgl_flits)
807 avail = sgl_flits;
808 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
809 sgl_flits -= avail;
810 ndesc--;
811 if (!sgl_flits)
812 break;
813
814 fp += avail;
815 d++;
816 sd++;
817 if (++pidx == q->size) {
818 pidx = 0;
819 gen ^= 1;
820 d = q->desc;
821 sd = q->sdesc;
822 }
823
824 sd->skb = skb;
825 wrp = (struct work_request_hdr *)d;
826 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
827 V_WR_SGLSFLT(1)) | wr_hi;
828 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
829 sgl_flits + 1)) |
830 V_WR_GEN(gen)) | wr_lo;
831 wr_gen2(d, gen);
832 flits = 1;
833 }
834 skb->priority = pidx;
835 wrp->wr_hi |= htonl(F_WR_EOP);
836 wmb();
837 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
838 wr_gen2((struct tx_desc *)wp, ogen);
839 WARN_ON(ndesc != 0);
840 }
841}
842
843/**
844 * write_tx_pkt_wr - write a TX_PKT work request
845 * @adap: the adapter
846 * @skb: the packet to send
847 * @pi: the egress interface
848 * @pidx: index of the first Tx descriptor to write
849 * @gen: the generation value to use
850 * @q: the Tx queue
851 * @ndesc: number of descriptors the packet will occupy
852 * @compl: the value of the COMPL bit to use
853 *
854 * Generate a TX_PKT work request to send the supplied packet.
855 */
856static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
857 const struct port_info *pi,
858 unsigned int pidx, unsigned int gen,
859 struct sge_txq *q, unsigned int ndesc,
860 unsigned int compl)
861{
862 unsigned int flits, sgl_flits, cntrl, tso_info;
863 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
864 struct tx_desc *d = &q->desc[pidx];
865 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
866
867 cpl->len = htonl(skb->len | 0x80000000);
868 cntrl = V_TXPKT_INTF(pi->port_id);
869
870 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
871 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
872
873 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
874 if (tso_info) {
875 int eth_type;
876 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
877
878 d->flit[2] = 0;
879 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
880 hdr->cntrl = htonl(cntrl);
881 eth_type = skb->nh.raw - skb->data == ETH_HLEN ?
882 CPL_ETH_II : CPL_ETH_II_VLAN;
883 tso_info |= V_LSO_ETH_TYPE(eth_type) |
884 V_LSO_IPHDR_WORDS(skb->nh.iph->ihl) |
885 V_LSO_TCPHDR_WORDS(skb->h.th->doff);
886 hdr->lso_info = htonl(tso_info);
887 flits = 3;
888 } else {
889 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
890 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
891 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
892 cpl->cntrl = htonl(cntrl);
893
894 if (skb->len <= WR_LEN - sizeof(*cpl)) {
895 q->sdesc[pidx].skb = NULL;
896 if (!skb->data_len)
897 memcpy(&d->flit[2], skb->data, skb->len);
898 else
899 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
900
901 flits = (skb->len + 7) / 8 + 2;
902 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
903 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
904 | F_WR_SOP | F_WR_EOP | compl);
905 wmb();
906 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
907 V_WR_TID(q->token));
908 wr_gen2(d, gen);
909 kfree_skb(skb);
910 return;
911 }
912
913 flits = 2;
914 }
915
916 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
917 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
918 if (need_skb_unmap())
919 ((struct unmap_info *)skb->cb)->len = skb_headlen(skb);
920
921 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
922 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
923 htonl(V_WR_TID(q->token)));
924}
925
926/**
927 * eth_xmit - add a packet to the Ethernet Tx queue
928 * @skb: the packet
929 * @dev: the egress net device
930 *
931 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
932 */
933int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
934{
935 unsigned int ndesc, pidx, credits, gen, compl;
936 const struct port_info *pi = netdev_priv(dev);
937 struct adapter *adap = dev->priv;
938 struct sge_qset *qs = dev2qset(dev);
939 struct sge_txq *q = &qs->txq[TXQ_ETH];
940
941 /*
942 * The chip min packet length is 9 octets but play safe and reject
943 * anything shorter than an Ethernet header.
944 */
945 if (unlikely(skb->len < ETH_HLEN)) {
946 dev_kfree_skb(skb);
947 return NETDEV_TX_OK;
948 }
949
950 spin_lock(&q->lock);
951 reclaim_completed_tx(adap, q);
952
953 credits = q->size - q->in_use;
954 ndesc = calc_tx_descs(skb);
955
956 if (unlikely(credits < ndesc)) {
957 if (!netif_queue_stopped(dev)) {
958 netif_stop_queue(dev);
959 set_bit(TXQ_ETH, &qs->txq_stopped);
960 q->stops++;
961 dev_err(&adap->pdev->dev,
962 "%s: Tx ring %u full while queue awake!\n",
963 dev->name, q->cntxt_id & 7);
964 }
965 spin_unlock(&q->lock);
966 return NETDEV_TX_BUSY;
967 }
968
969 q->in_use += ndesc;
970 if (unlikely(credits - ndesc < q->stop_thres)) {
971 q->stops++;
972 netif_stop_queue(dev);
973 set_bit(TXQ_ETH, &qs->txq_stopped);
974#if !USE_GTS
975 if (should_restart_tx(q) &&
976 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
977 q->restarts++;
978 netif_wake_queue(dev);
979 }
980#endif
981 }
982
983 gen = q->gen;
984 q->unacked += ndesc;
985 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
986 q->unacked &= 7;
987 pidx = q->pidx;
988 q->pidx += ndesc;
989 if (q->pidx >= q->size) {
990 q->pidx -= q->size;
991 q->gen ^= 1;
992 }
993
994 /* update port statistics */
995 if (skb->ip_summed == CHECKSUM_COMPLETE)
996 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
997 if (skb_shinfo(skb)->gso_size)
998 qs->port_stats[SGE_PSTAT_TSO]++;
999 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1000 qs->port_stats[SGE_PSTAT_VLANINS]++;
1001
1002 dev->trans_start = jiffies;
1003 spin_unlock(&q->lock);
1004
1005 /*
1006 * We do not use Tx completion interrupts to free DMAd Tx packets.
1007 * This is good for performamce but means that we rely on new Tx
1008 * packets arriving to run the destructors of completed packets,
1009 * which open up space in their sockets' send queues. Sometimes
1010 * we do not get such new packets causing Tx to stall. A single
1011 * UDP transmitter is a good example of this situation. We have
1012 * a clean up timer that periodically reclaims completed packets
1013 * but it doesn't run often enough (nor do we want it to) to prevent
1014 * lengthy stalls. A solution to this problem is to run the
1015 * destructor early, after the packet is queued but before it's DMAd.
1016 * A cons is that we lie to socket memory accounting, but the amount
1017 * of extra memory is reasonable (limited by the number of Tx
1018 * descriptors), the packets do actually get freed quickly by new
1019 * packets almost always, and for protocols like TCP that wait for
1020 * acks to really free up the data the extra memory is even less.
1021 * On the positive side we run the destructors on the sending CPU
1022 * rather than on a potentially different completing CPU, usually a
1023 * good thing. We also run them without holding our Tx queue lock,
1024 * unlike what reclaim_completed_tx() would otherwise do.
1025 *
1026 * Run the destructor before telling the DMA engine about the packet
1027 * to make sure it doesn't complete and get freed prematurely.
1028 */
1029 if (likely(!skb_shared(skb)))
1030 skb_orphan(skb);
1031
1032 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1033 check_ring_tx_db(adap, q);
1034 return NETDEV_TX_OK;
1035}
1036
1037/**
1038 * write_imm - write a packet into a Tx descriptor as immediate data
1039 * @d: the Tx descriptor to write
1040 * @skb: the packet
1041 * @len: the length of packet data to write as immediate data
1042 * @gen: the generation bit value to write
1043 *
1044 * Writes a packet as immediate data into a Tx descriptor. The packet
1045 * contains a work request at its beginning. We must write the packet
1046 * carefully so the SGE doesn't read accidentally before it's written in
1047 * its entirety.
1048 */
1049static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1050 unsigned int len, unsigned int gen)
1051{
1052 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1053 struct work_request_hdr *to = (struct work_request_hdr *)d;
1054
1055 memcpy(&to[1], &from[1], len - sizeof(*from));
1056 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1057 V_WR_BCNTLFLT(len & 7));
1058 wmb();
1059 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1060 V_WR_LEN((len + 7) / 8));
1061 wr_gen2(d, gen);
1062 kfree_skb(skb);
1063}
1064
1065/**
1066 * check_desc_avail - check descriptor availability on a send queue
1067 * @adap: the adapter
1068 * @q: the send queue
1069 * @skb: the packet needing the descriptors
1070 * @ndesc: the number of Tx descriptors needed
1071 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1072 *
1073 * Checks if the requested number of Tx descriptors is available on an
1074 * SGE send queue. If the queue is already suspended or not enough
1075 * descriptors are available the packet is queued for later transmission.
1076 * Must be called with the Tx queue locked.
1077 *
1078 * Returns 0 if enough descriptors are available, 1 if there aren't
1079 * enough descriptors and the packet has been queued, and 2 if the caller
1080 * needs to retry because there weren't enough descriptors at the
1081 * beginning of the call but some freed up in the mean time.
1082 */
1083static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1084 struct sk_buff *skb, unsigned int ndesc,
1085 unsigned int qid)
1086{
1087 if (unlikely(!skb_queue_empty(&q->sendq))) {
1088 addq_exit:__skb_queue_tail(&q->sendq, skb);
1089 return 1;
1090 }
1091 if (unlikely(q->size - q->in_use < ndesc)) {
1092 struct sge_qset *qs = txq_to_qset(q, qid);
1093
1094 set_bit(qid, &qs->txq_stopped);
1095 smp_mb__after_clear_bit();
1096
1097 if (should_restart_tx(q) &&
1098 test_and_clear_bit(qid, &qs->txq_stopped))
1099 return 2;
1100
1101 q->stops++;
1102 goto addq_exit;
1103 }
1104 return 0;
1105}
1106
1107/**
1108 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1109 * @q: the SGE control Tx queue
1110 *
1111 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1112 * that send only immediate data (presently just the control queues) and
1113 * thus do not have any sk_buffs to release.
1114 */
1115static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1116{
1117 unsigned int reclaim = q->processed - q->cleaned;
1118
1119 q->in_use -= reclaim;
1120 q->cleaned += reclaim;
1121}
1122
1123static inline int immediate(const struct sk_buff *skb)
1124{
1125 return skb->len <= WR_LEN && !skb->data_len;
1126}
1127
1128/**
1129 * ctrl_xmit - send a packet through an SGE control Tx queue
1130 * @adap: the adapter
1131 * @q: the control queue
1132 * @skb: the packet
1133 *
1134 * Send a packet through an SGE control Tx queue. Packets sent through
1135 * a control queue must fit entirely as immediate data in a single Tx
1136 * descriptor and have no page fragments.
1137 */
1138static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1139 struct sk_buff *skb)
1140{
1141 int ret;
1142 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1143
1144 if (unlikely(!immediate(skb))) {
1145 WARN_ON(1);
1146 dev_kfree_skb(skb);
1147 return NET_XMIT_SUCCESS;
1148 }
1149
1150 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1151 wrp->wr_lo = htonl(V_WR_TID(q->token));
1152
1153 spin_lock(&q->lock);
1154 again:reclaim_completed_tx_imm(q);
1155
1156 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1157 if (unlikely(ret)) {
1158 if (ret == 1) {
1159 spin_unlock(&q->lock);
1160 return NET_XMIT_CN;
1161 }
1162 goto again;
1163 }
1164
1165 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1166
1167 q->in_use++;
1168 if (++q->pidx >= q->size) {
1169 q->pidx = 0;
1170 q->gen ^= 1;
1171 }
1172 spin_unlock(&q->lock);
1173 wmb();
1174 t3_write_reg(adap, A_SG_KDOORBELL,
1175 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1176 return NET_XMIT_SUCCESS;
1177}
1178
1179/**
1180 * restart_ctrlq - restart a suspended control queue
1181 * @qs: the queue set cotaining the control queue
1182 *
1183 * Resumes transmission on a suspended Tx control queue.
1184 */
1185static void restart_ctrlq(unsigned long data)
1186{
1187 struct sk_buff *skb;
1188 struct sge_qset *qs = (struct sge_qset *)data;
1189 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1190 struct adapter *adap = qs->netdev->priv;
1191
1192 spin_lock(&q->lock);
1193 again:reclaim_completed_tx_imm(q);
1194
1195 while (q->in_use < q->size && (skb = __skb_dequeue(&q->sendq)) != NULL) {
1196
1197 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1198
1199 if (++q->pidx >= q->size) {
1200 q->pidx = 0;
1201 q->gen ^= 1;
1202 }
1203 q->in_use++;
1204 }
1205
1206 if (!skb_queue_empty(&q->sendq)) {
1207 set_bit(TXQ_CTRL, &qs->txq_stopped);
1208 smp_mb__after_clear_bit();
1209
1210 if (should_restart_tx(q) &&
1211 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1212 goto again;
1213 q->stops++;
1214 }
1215
1216 spin_unlock(&q->lock);
1217 t3_write_reg(adap, A_SG_KDOORBELL,
1218 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1219}
1220
1221/*
1222 * Send a management message through control queue 0
1223 */
1224int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1225{
1226 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1227}
1228
1229/**
1230 * write_ofld_wr - write an offload work request
1231 * @adap: the adapter
1232 * @skb: the packet to send
1233 * @q: the Tx queue
1234 * @pidx: index of the first Tx descriptor to write
1235 * @gen: the generation value to use
1236 * @ndesc: number of descriptors the packet will occupy
1237 *
1238 * Write an offload work request to send the supplied packet. The packet
1239 * data already carry the work request with most fields populated.
1240 */
1241static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1242 struct sge_txq *q, unsigned int pidx,
1243 unsigned int gen, unsigned int ndesc)
1244{
1245 unsigned int sgl_flits, flits;
1246 struct work_request_hdr *from;
1247 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1248 struct tx_desc *d = &q->desc[pidx];
1249
1250 if (immediate(skb)) {
1251 q->sdesc[pidx].skb = NULL;
1252 write_imm(d, skb, skb->len, gen);
1253 return;
1254 }
1255
1256 /* Only TX_DATA builds SGLs */
1257
1258 from = (struct work_request_hdr *)skb->data;
1259 memcpy(&d->flit[1], &from[1], skb->h.raw - skb->data - sizeof(*from));
1260
1261 flits = (skb->h.raw - skb->data) / 8;
1262 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1263 sgl_flits = make_sgl(skb, sgp, skb->h.raw, skb->tail - skb->h.raw,
1264 adap->pdev);
1265 if (need_skb_unmap())
1266 ((struct unmap_info *)skb->cb)->len = skb->tail - skb->h.raw;
1267
1268 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1269 gen, from->wr_hi, from->wr_lo);
1270}
1271
1272/**
1273 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1274 * @skb: the packet
1275 *
1276 * Returns the number of Tx descriptors needed for the given offload
1277 * packet. These packets are already fully constructed.
1278 */
1279static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1280{
1281 unsigned int flits, cnt = skb_shinfo(skb)->nr_frags;
1282
1283 if (skb->len <= WR_LEN && cnt == 0)
1284 return 1; /* packet fits as immediate data */
1285
1286 flits = (skb->h.raw - skb->data) / 8; /* headers */
1287 if (skb->tail != skb->h.raw)
1288 cnt++;
1289 return flits_to_desc(flits + sgl_len(cnt));
1290}
1291
1292/**
1293 * ofld_xmit - send a packet through an offload queue
1294 * @adap: the adapter
1295 * @q: the Tx offload queue
1296 * @skb: the packet
1297 *
1298 * Send an offload packet through an SGE offload queue.
1299 */
1300static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1301 struct sk_buff *skb)
1302{
1303 int ret;
1304 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1305
1306 spin_lock(&q->lock);
1307 again:reclaim_completed_tx(adap, q);
1308
1309 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1310 if (unlikely(ret)) {
1311 if (ret == 1) {
1312 skb->priority = ndesc; /* save for restart */
1313 spin_unlock(&q->lock);
1314 return NET_XMIT_CN;
1315 }
1316 goto again;
1317 }
1318
1319 gen = q->gen;
1320 q->in_use += ndesc;
1321 pidx = q->pidx;
1322 q->pidx += ndesc;
1323 if (q->pidx >= q->size) {
1324 q->pidx -= q->size;
1325 q->gen ^= 1;
1326 }
1327 spin_unlock(&q->lock);
1328
1329 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1330 check_ring_tx_db(adap, q);
1331 return NET_XMIT_SUCCESS;
1332}
1333
1334/**
1335 * restart_offloadq - restart a suspended offload queue
1336 * @qs: the queue set cotaining the offload queue
1337 *
1338 * Resumes transmission on a suspended Tx offload queue.
1339 */
1340static void restart_offloadq(unsigned long data)
1341{
1342 struct sk_buff *skb;
1343 struct sge_qset *qs = (struct sge_qset *)data;
1344 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1345 struct adapter *adap = qs->netdev->priv;
1346
1347 spin_lock(&q->lock);
1348 again:reclaim_completed_tx(adap, q);
1349
1350 while ((skb = skb_peek(&q->sendq)) != NULL) {
1351 unsigned int gen, pidx;
1352 unsigned int ndesc = skb->priority;
1353
1354 if (unlikely(q->size - q->in_use < ndesc)) {
1355 set_bit(TXQ_OFLD, &qs->txq_stopped);
1356 smp_mb__after_clear_bit();
1357
1358 if (should_restart_tx(q) &&
1359 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1360 goto again;
1361 q->stops++;
1362 break;
1363 }
1364
1365 gen = q->gen;
1366 q->in_use += ndesc;
1367 pidx = q->pidx;
1368 q->pidx += ndesc;
1369 if (q->pidx >= q->size) {
1370 q->pidx -= q->size;
1371 q->gen ^= 1;
1372 }
1373 __skb_unlink(skb, &q->sendq);
1374 spin_unlock(&q->lock);
1375
1376 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1377 spin_lock(&q->lock);
1378 }
1379 spin_unlock(&q->lock);
1380
1381#if USE_GTS
1382 set_bit(TXQ_RUNNING, &q->flags);
1383 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1384#endif
1385 t3_write_reg(adap, A_SG_KDOORBELL,
1386 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1387}
1388
1389/**
1390 * queue_set - return the queue set a packet should use
1391 * @skb: the packet
1392 *
1393 * Maps a packet to the SGE queue set it should use. The desired queue
1394 * set is carried in bits 1-3 in the packet's priority.
1395 */
1396static inline int queue_set(const struct sk_buff *skb)
1397{
1398 return skb->priority >> 1;
1399}
1400
1401/**
1402 * is_ctrl_pkt - return whether an offload packet is a control packet
1403 * @skb: the packet
1404 *
1405 * Determines whether an offload packet should use an OFLD or a CTRL
1406 * Tx queue. This is indicated by bit 0 in the packet's priority.
1407 */
1408static inline int is_ctrl_pkt(const struct sk_buff *skb)
1409{
1410 return skb->priority & 1;
1411}
1412
1413/**
1414 * t3_offload_tx - send an offload packet
1415 * @tdev: the offload device to send to
1416 * @skb: the packet
1417 *
1418 * Sends an offload packet. We use the packet priority to select the
1419 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1420 * should be sent as regular or control, bits 1-3 select the queue set.
1421 */
1422int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1423{
1424 struct adapter *adap = tdev2adap(tdev);
1425 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1426
1427 if (unlikely(is_ctrl_pkt(skb)))
1428 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1429
1430 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1431}
1432
1433/**
1434 * offload_enqueue - add an offload packet to an SGE offload receive queue
1435 * @q: the SGE response queue
1436 * @skb: the packet
1437 *
1438 * Add a new offload packet to an SGE response queue's offload packet
1439 * queue. If the packet is the first on the queue it schedules the RX
1440 * softirq to process the queue.
1441 */
1442static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1443{
1444 skb->next = skb->prev = NULL;
1445 if (q->rx_tail)
1446 q->rx_tail->next = skb;
1447 else {
1448 struct sge_qset *qs = rspq_to_qset(q);
1449
1450 if (__netif_rx_schedule_prep(qs->netdev))
1451 __netif_rx_schedule(qs->netdev);
1452 q->rx_head = skb;
1453 }
1454 q->rx_tail = skb;
1455}
1456
1457/**
1458 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1459 * @tdev: the offload device that will be receiving the packets
1460 * @q: the SGE response queue that assembled the bundle
1461 * @skbs: the partial bundle
1462 * @n: the number of packets in the bundle
1463 *
1464 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1465 */
1466static inline void deliver_partial_bundle(struct t3cdev *tdev,
1467 struct sge_rspq *q,
1468 struct sk_buff *skbs[], int n)
1469{
1470 if (n) {
1471 q->offload_bundles++;
1472 tdev->recv(tdev, skbs, n);
1473 }
1474}
1475
1476/**
1477 * ofld_poll - NAPI handler for offload packets in interrupt mode
1478 * @dev: the network device doing the polling
1479 * @budget: polling budget
1480 *
1481 * The NAPI handler for offload packets when a response queue is serviced
1482 * by the hard interrupt handler, i.e., when it's operating in non-polling
1483 * mode. Creates small packet batches and sends them through the offload
1484 * receive handler. Batches need to be of modest size as we do prefetches
1485 * on the packets in each.
1486 */
1487static int ofld_poll(struct net_device *dev, int *budget)
1488{
1489 struct adapter *adapter = dev->priv;
1490 struct sge_qset *qs = dev2qset(dev);
1491 struct sge_rspq *q = &qs->rspq;
1492 int work_done, limit = min(*budget, dev->quota), avail = limit;
1493
1494 while (avail) {
1495 struct sk_buff *head, *tail, *skbs[RX_BUNDLE_SIZE];
1496 int ngathered;
1497
1498 spin_lock_irq(&q->lock);
1499 head = q->rx_head;
1500 if (!head) {
1501 work_done = limit - avail;
1502 *budget -= work_done;
1503 dev->quota -= work_done;
1504 __netif_rx_complete(dev);
1505 spin_unlock_irq(&q->lock);
1506 return 0;
1507 }
1508
1509 tail = q->rx_tail;
1510 q->rx_head = q->rx_tail = NULL;
1511 spin_unlock_irq(&q->lock);
1512
1513 for (ngathered = 0; avail && head; avail--) {
1514 prefetch(head->data);
1515 skbs[ngathered] = head;
1516 head = head->next;
1517 skbs[ngathered]->next = NULL;
1518 if (++ngathered == RX_BUNDLE_SIZE) {
1519 q->offload_bundles++;
1520 adapter->tdev.recv(&adapter->tdev, skbs,
1521 ngathered);
1522 ngathered = 0;
1523 }
1524 }
1525 if (head) { /* splice remaining packets back onto Rx queue */
1526 spin_lock_irq(&q->lock);
1527 tail->next = q->rx_head;
1528 if (!q->rx_head)
1529 q->rx_tail = tail;
1530 q->rx_head = head;
1531 spin_unlock_irq(&q->lock);
1532 }
1533 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1534 }
1535 work_done = limit - avail;
1536 *budget -= work_done;
1537 dev->quota -= work_done;
1538 return 1;
1539}
1540
1541/**
1542 * rx_offload - process a received offload packet
1543 * @tdev: the offload device receiving the packet
1544 * @rq: the response queue that received the packet
1545 * @skb: the packet
1546 * @rx_gather: a gather list of packets if we are building a bundle
1547 * @gather_idx: index of the next available slot in the bundle
1548 *
1549 * Process an ingress offload pakcet and add it to the offload ingress
1550 * queue. Returns the index of the next available slot in the bundle.
1551 */
1552static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1553 struct sk_buff *skb, struct sk_buff *rx_gather[],
1554 unsigned int gather_idx)
1555{
1556 rq->offload_pkts++;
1557 skb->mac.raw = skb->nh.raw = skb->h.raw = skb->data;
1558
1559 if (rq->polling) {
1560 rx_gather[gather_idx++] = skb;
1561 if (gather_idx == RX_BUNDLE_SIZE) {
1562 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1563 gather_idx = 0;
1564 rq->offload_bundles++;
1565 }
1566 } else
1567 offload_enqueue(rq, skb);
1568
1569 return gather_idx;
1570}
1571
1572/**
1573 * restart_tx - check whether to restart suspended Tx queues
1574 * @qs: the queue set to resume
1575 *
1576 * Restarts suspended Tx queues of an SGE queue set if they have enough
1577 * free resources to resume operation.
1578 */
1579static void restart_tx(struct sge_qset *qs)
1580{
1581 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1582 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1583 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1584 qs->txq[TXQ_ETH].restarts++;
1585 if (netif_running(qs->netdev))
1586 netif_wake_queue(qs->netdev);
1587 }
1588
1589 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1590 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1591 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1592 qs->txq[TXQ_OFLD].restarts++;
1593 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1594 }
1595 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1596 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1597 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1598 qs->txq[TXQ_CTRL].restarts++;
1599 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1600 }
1601}
1602
1603/**
1604 * rx_eth - process an ingress ethernet packet
1605 * @adap: the adapter
1606 * @rq: the response queue that received the packet
1607 * @skb: the packet
1608 * @pad: amount of padding at the start of the buffer
1609 *
1610 * Process an ingress ethernet pakcet and deliver it to the stack.
1611 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1612 * if it was immediate data in a response.
1613 */
1614static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1615 struct sk_buff *skb, int pad)
1616{
1617 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
1618 struct port_info *pi;
1619
1620 rq->eth_pkts++;
1621 skb_pull(skb, sizeof(*p) + pad);
1622 skb->dev = adap->port[p->iff];
1623 skb->dev->last_rx = jiffies;
1624 skb->protocol = eth_type_trans(skb, skb->dev);
1625 pi = netdev_priv(skb->dev);
1626 if (pi->rx_csum_offload && p->csum_valid && p->csum == 0xffff &&
1627 !p->fragment) {
1628 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1629 skb->ip_summed = CHECKSUM_UNNECESSARY;
1630 } else
1631 skb->ip_summed = CHECKSUM_NONE;
1632
1633 if (unlikely(p->vlan_valid)) {
1634 struct vlan_group *grp = pi->vlan_grp;
1635
1636 rspq_to_qset(rq)->port_stats[SGE_PSTAT_VLANEX]++;
1637 if (likely(grp))
1638 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1639 rq->polling);
1640 else
1641 dev_kfree_skb_any(skb);
1642 } else if (rq->polling)
1643 netif_receive_skb(skb);
1644 else
1645 netif_rx(skb);
1646}
1647
1648/**
1649 * handle_rsp_cntrl_info - handles control information in a response
1650 * @qs: the queue set corresponding to the response
1651 * @flags: the response control flags
1652 *
1653 * Handles the control information of an SGE response, such as GTS
1654 * indications and completion credits for the queue set's Tx queues.
1655 * HW coalesces credits, we don't do any extra SW coalescing.
1656 */
1657static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
1658{
1659 unsigned int credits;
1660
1661#if USE_GTS
1662 if (flags & F_RSPD_TXQ0_GTS)
1663 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
1664#endif
1665
1666 credits = G_RSPD_TXQ0_CR(flags);
1667 if (credits)
1668 qs->txq[TXQ_ETH].processed += credits;
1669
1670 credits = G_RSPD_TXQ2_CR(flags);
1671 if (credits)
1672 qs->txq[TXQ_CTRL].processed += credits;
1673
1674# if USE_GTS
1675 if (flags & F_RSPD_TXQ1_GTS)
1676 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
1677# endif
1678 credits = G_RSPD_TXQ1_CR(flags);
1679 if (credits)
1680 qs->txq[TXQ_OFLD].processed += credits;
1681}
1682
1683/**
1684 * check_ring_db - check if we need to ring any doorbells
1685 * @adapter: the adapter
1686 * @qs: the queue set whose Tx queues are to be examined
1687 * @sleeping: indicates which Tx queue sent GTS
1688 *
1689 * Checks if some of a queue set's Tx queues need to ring their doorbells
1690 * to resume transmission after idling while they still have unprocessed
1691 * descriptors.
1692 */
1693static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
1694 unsigned int sleeping)
1695{
1696 if (sleeping & F_RSPD_TXQ0_GTS) {
1697 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1698
1699 if (txq->cleaned + txq->in_use != txq->processed &&
1700 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1701 set_bit(TXQ_RUNNING, &txq->flags);
1702 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1703 V_EGRCNTX(txq->cntxt_id));
1704 }
1705 }
1706
1707 if (sleeping & F_RSPD_TXQ1_GTS) {
1708 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
1709
1710 if (txq->cleaned + txq->in_use != txq->processed &&
1711 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1712 set_bit(TXQ_RUNNING, &txq->flags);
1713 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1714 V_EGRCNTX(txq->cntxt_id));
1715 }
1716 }
1717}
1718
1719/**
1720 * is_new_response - check if a response is newly written
1721 * @r: the response descriptor
1722 * @q: the response queue
1723 *
1724 * Returns true if a response descriptor contains a yet unprocessed
1725 * response.
1726 */
1727static inline int is_new_response(const struct rsp_desc *r,
1728 const struct sge_rspq *q)
1729{
1730 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1731}
1732
1733#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1734#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1735 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1736 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1737 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1738
1739/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1740#define NOMEM_INTR_DELAY 2500
1741
1742/**
1743 * process_responses - process responses from an SGE response queue
1744 * @adap: the adapter
1745 * @qs: the queue set to which the response queue belongs
1746 * @budget: how many responses can be processed in this round
1747 *
1748 * Process responses from an SGE response queue up to the supplied budget.
1749 * Responses include received packets as well as credits and other events
1750 * for the queues that belong to the response queue's queue set.
1751 * A negative budget is effectively unlimited.
1752 *
1753 * Additionally choose the interrupt holdoff time for the next interrupt
1754 * on this queue. If the system is under memory shortage use a fairly
1755 * long delay to help recovery.
1756 */
1757static int process_responses(struct adapter *adap, struct sge_qset *qs,
1758 int budget)
1759{
1760 struct sge_rspq *q = &qs->rspq;
1761 struct rsp_desc *r = &q->desc[q->cidx];
1762 int budget_left = budget;
1763 unsigned int sleeping = 0;
1764 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
1765 int ngathered = 0;
1766
1767 q->next_holdoff = q->holdoff_tmr;
1768
1769 while (likely(budget_left && is_new_response(r, q))) {
1770 int eth, ethpad = 0;
1771 struct sk_buff *skb = NULL;
1772 u32 len, flags = ntohl(r->flags);
1773 u32 rss_hi = *(const u32 *)r, rss_lo = r->rss_hdr.rss_hash_val;
1774
1775 eth = r->rss_hdr.opcode == CPL_RX_PKT;
1776
1777 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
1778 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
1779 if (!skb)
1780 goto no_mem;
1781
1782 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
1783 skb->data[0] = CPL_ASYNC_NOTIF;
1784 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
1785 q->async_notif++;
1786 } else if (flags & F_RSPD_IMM_DATA_VALID) {
1787 skb = get_imm_packet(r);
1788 if (unlikely(!skb)) {
1789 no_mem:
1790 q->next_holdoff = NOMEM_INTR_DELAY;
1791 q->nomem++;
1792 /* consume one credit since we tried */
1793 budget_left--;
1794 break;
1795 }
1796 q->imm_data++;
1797 } else if ((len = ntohl(r->len_cq)) != 0) {
1798 struct sge_fl *fl;
1799
1800 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
1801 fl->credits--;
1802 skb = get_packet(adap, fl, G_RSPD_LEN(len),
1803 eth ? SGE_RX_DROP_THRES : 0);
1804 if (!skb)
1805 q->rx_drops++;
1806 else if (r->rss_hdr.opcode == CPL_TRACE_PKT)
1807 __skb_pull(skb, 2);
1808 ethpad = 2;
1809 if (++fl->cidx == fl->size)
1810 fl->cidx = 0;
1811 } else
1812 q->pure_rsps++;
1813
1814 if (flags & RSPD_CTRL_MASK) {
1815 sleeping |= flags & RSPD_GTS_MASK;
1816 handle_rsp_cntrl_info(qs, flags);
1817 }
1818
1819 r++;
1820 if (unlikely(++q->cidx == q->size)) {
1821 q->cidx = 0;
1822 q->gen ^= 1;
1823 r = q->desc;
1824 }
1825 prefetch(r);
1826
1827 if (++q->credits >= (q->size / 4)) {
1828 refill_rspq(adap, q, q->credits);
1829 q->credits = 0;
1830 }
1831
1832 if (likely(skb != NULL)) {
1833 if (eth)
1834 rx_eth(adap, q, skb, ethpad);
1835 else {
1836 /* Preserve the RSS info in csum & priority */
1837 skb->csum = rss_hi;
1838 skb->priority = rss_lo;
1839 ngathered = rx_offload(&adap->tdev, q, skb,
1840 offload_skbs, ngathered);
1841 }
1842 }
1843
1844 --budget_left;
1845 }
1846
1847 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
1848 if (sleeping)
1849 check_ring_db(adap, qs, sleeping);
1850
1851 smp_mb(); /* commit Tx queue .processed updates */
1852 if (unlikely(qs->txq_stopped != 0))
1853 restart_tx(qs);
1854
1855 budget -= budget_left;
1856 return budget;
1857}
1858
1859static inline int is_pure_response(const struct rsp_desc *r)
1860{
1861 u32 n = ntohl(r->flags) & (F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
1862
1863 return (n | r->len_cq) == 0;
1864}
1865
1866/**
1867 * napi_rx_handler - the NAPI handler for Rx processing
1868 * @dev: the net device
1869 * @budget: how many packets we can process in this round
1870 *
1871 * Handler for new data events when using NAPI.
1872 */
1873static int napi_rx_handler(struct net_device *dev, int *budget)
1874{
1875 struct adapter *adap = dev->priv;
1876 struct sge_qset *qs = dev2qset(dev);
1877 int effective_budget = min(*budget, dev->quota);
1878
1879 int work_done = process_responses(adap, qs, effective_budget);
1880 *budget -= work_done;
1881 dev->quota -= work_done;
1882
1883 if (work_done >= effective_budget)
1884 return 1;
1885
1886 netif_rx_complete(dev);
1887
1888 /*
1889 * Because we don't atomically flush the following write it is
1890 * possible that in very rare cases it can reach the device in a way
1891 * that races with a new response being written plus an error interrupt
1892 * causing the NAPI interrupt handler below to return unhandled status
1893 * to the OS. To protect against this would require flushing the write
1894 * and doing both the write and the flush with interrupts off. Way too
1895 * expensive and unjustifiable given the rarity of the race.
1896 *
1897 * The race cannot happen at all with MSI-X.
1898 */
1899 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
1900 V_NEWTIMER(qs->rspq.next_holdoff) |
1901 V_NEWINDEX(qs->rspq.cidx));
1902 return 0;
1903}
1904
1905/*
1906 * Returns true if the device is already scheduled for polling.
1907 */
1908static inline int napi_is_scheduled(struct net_device *dev)
1909{
1910 return test_bit(__LINK_STATE_RX_SCHED, &dev->state);
1911}
1912
1913/**
1914 * process_pure_responses - process pure responses from a response queue
1915 * @adap: the adapter
1916 * @qs: the queue set owning the response queue
1917 * @r: the first pure response to process
1918 *
1919 * A simpler version of process_responses() that handles only pure (i.e.,
1920 * non data-carrying) responses. Such respones are too light-weight to
1921 * justify calling a softirq under NAPI, so we handle them specially in
1922 * the interrupt handler. The function is called with a pointer to a
1923 * response, which the caller must ensure is a valid pure response.
1924 *
1925 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
1926 */
1927static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
1928 struct rsp_desc *r)
1929{
1930 struct sge_rspq *q = &qs->rspq;
1931 unsigned int sleeping = 0;
1932
1933 do {
1934 u32 flags = ntohl(r->flags);
1935
1936 r++;
1937 if (unlikely(++q->cidx == q->size)) {
1938 q->cidx = 0;
1939 q->gen ^= 1;
1940 r = q->desc;
1941 }
1942 prefetch(r);
1943
1944 if (flags & RSPD_CTRL_MASK) {
1945 sleeping |= flags & RSPD_GTS_MASK;
1946 handle_rsp_cntrl_info(qs, flags);
1947 }
1948
1949 q->pure_rsps++;
1950 if (++q->credits >= (q->size / 4)) {
1951 refill_rspq(adap, q, q->credits);
1952 q->credits = 0;
1953 }
1954 } while (is_new_response(r, q) && is_pure_response(r));
1955
1956 if (sleeping)
1957 check_ring_db(adap, qs, sleeping);
1958
1959 smp_mb(); /* commit Tx queue .processed updates */
1960 if (unlikely(qs->txq_stopped != 0))
1961 restart_tx(qs);
1962
1963 return is_new_response(r, q);
1964}
1965
1966/**
1967 * handle_responses - decide what to do with new responses in NAPI mode
1968 * @adap: the adapter
1969 * @q: the response queue
1970 *
1971 * This is used by the NAPI interrupt handlers to decide what to do with
1972 * new SGE responses. If there are no new responses it returns -1. If
1973 * there are new responses and they are pure (i.e., non-data carrying)
1974 * it handles them straight in hard interrupt context as they are very
1975 * cheap and don't deliver any packets. Finally, if there are any data
1976 * signaling responses it schedules the NAPI handler. Returns 1 if it
1977 * schedules NAPI, 0 if all new responses were pure.
1978 *
1979 * The caller must ascertain NAPI is not already running.
1980 */
1981static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
1982{
1983 struct sge_qset *qs = rspq_to_qset(q);
1984 struct rsp_desc *r = &q->desc[q->cidx];
1985
1986 if (!is_new_response(r, q))
1987 return -1;
1988 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
1989 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
1990 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
1991 return 0;
1992 }
1993 if (likely(__netif_rx_schedule_prep(qs->netdev)))
1994 __netif_rx_schedule(qs->netdev);
1995 return 1;
1996}
1997
1998/*
1999 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2000 * (i.e., response queue serviced in hard interrupt).
2001 */
2002irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2003{
2004 struct sge_qset *qs = cookie;
2005 struct adapter *adap = qs->netdev->priv;
2006 struct sge_rspq *q = &qs->rspq;
2007
2008 spin_lock(&q->lock);
2009 if (process_responses(adap, qs, -1) == 0)
2010 q->unhandled_irqs++;
2011 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2012 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2013 spin_unlock(&q->lock);
2014 return IRQ_HANDLED;
2015}
2016
2017/*
2018 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2019 * (i.e., response queue serviced by NAPI polling).
2020 */
2021irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2022{
2023 struct sge_qset *qs = cookie;
2024 struct adapter *adap = qs->netdev->priv;
2025 struct sge_rspq *q = &qs->rspq;
2026
2027 spin_lock(&q->lock);
2028 BUG_ON(napi_is_scheduled(qs->netdev));
2029
2030 if (handle_responses(adap, q) < 0)
2031 q->unhandled_irqs++;
2032 spin_unlock(&q->lock);
2033 return IRQ_HANDLED;
2034}
2035
2036/*
2037 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2038 * SGE response queues as well as error and other async events as they all use
2039 * the same MSI vector. We use one SGE response queue per port in this mode
2040 * and protect all response queues with queue 0's lock.
2041 */
2042static irqreturn_t t3_intr_msi(int irq, void *cookie)
2043{
2044 int new_packets = 0;
2045 struct adapter *adap = cookie;
2046 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2047
2048 spin_lock(&q->lock);
2049
2050 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2051 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2052 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2053 new_packets = 1;
2054 }
2055
2056 if (adap->params.nports == 2 &&
2057 process_responses(adap, &adap->sge.qs[1], -1)) {
2058 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2059
2060 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2061 V_NEWTIMER(q1->next_holdoff) |
2062 V_NEWINDEX(q1->cidx));
2063 new_packets = 1;
2064 }
2065
2066 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2067 q->unhandled_irqs++;
2068
2069 spin_unlock(&q->lock);
2070 return IRQ_HANDLED;
2071}
2072
2073static int rspq_check_napi(struct net_device *dev, struct sge_rspq *q)
2074{
2075 if (!napi_is_scheduled(dev) && is_new_response(&q->desc[q->cidx], q)) {
2076 if (likely(__netif_rx_schedule_prep(dev)))
2077 __netif_rx_schedule(dev);
2078 return 1;
2079 }
2080 return 0;
2081}
2082
2083/*
2084 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2085 * by NAPI polling). Handles data events from SGE response queues as well as
2086 * error and other async events as they all use the same MSI vector. We use
2087 * one SGE response queue per port in this mode and protect all response
2088 * queues with queue 0's lock.
2089 */
2090irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2091{
2092 int new_packets;
2093 struct adapter *adap = cookie;
2094 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2095
2096 spin_lock(&q->lock);
2097
2098 new_packets = rspq_check_napi(adap->sge.qs[0].netdev, q);
2099 if (adap->params.nports == 2)
2100 new_packets += rspq_check_napi(adap->sge.qs[1].netdev,
2101 &adap->sge.qs[1].rspq);
2102 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2103 q->unhandled_irqs++;
2104
2105 spin_unlock(&q->lock);
2106 return IRQ_HANDLED;
2107}
2108
2109/*
2110 * A helper function that processes responses and issues GTS.
2111 */
2112static inline int process_responses_gts(struct adapter *adap,
2113 struct sge_rspq *rq)
2114{
2115 int work;
2116
2117 work = process_responses(adap, rspq_to_qset(rq), -1);
2118 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2119 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2120 return work;
2121}
2122
2123/*
2124 * The legacy INTx interrupt handler. This needs to handle data events from
2125 * SGE response queues as well as error and other async events as they all use
2126 * the same interrupt pin. We use one SGE response queue per port in this mode
2127 * and protect all response queues with queue 0's lock.
2128 */
2129static irqreturn_t t3_intr(int irq, void *cookie)
2130{
2131 int work_done, w0, w1;
2132 struct adapter *adap = cookie;
2133 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2134 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2135
2136 spin_lock(&q0->lock);
2137
2138 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2139 w1 = adap->params.nports == 2 &&
2140 is_new_response(&q1->desc[q1->cidx], q1);
2141
2142 if (likely(w0 | w1)) {
2143 t3_write_reg(adap, A_PL_CLI, 0);
2144 t3_read_reg(adap, A_PL_CLI); /* flush */
2145
2146 if (likely(w0))
2147 process_responses_gts(adap, q0);
2148
2149 if (w1)
2150 process_responses_gts(adap, q1);
2151
2152 work_done = w0 | w1;
2153 } else
2154 work_done = t3_slow_intr_handler(adap);
2155
2156 spin_unlock(&q0->lock);
2157 return IRQ_RETVAL(work_done != 0);
2158}
2159
2160/*
2161 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2162 * Handles data events from SGE response queues as well as error and other
2163 * async events as they all use the same interrupt pin. We use one SGE
2164 * response queue per port in this mode and protect all response queues with
2165 * queue 0's lock.
2166 */
2167static irqreturn_t t3b_intr(int irq, void *cookie)
2168{
2169 u32 map;
2170 struct adapter *adap = cookie;
2171 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2172
2173 t3_write_reg(adap, A_PL_CLI, 0);
2174 map = t3_read_reg(adap, A_SG_DATA_INTR);
2175
2176 if (unlikely(!map)) /* shared interrupt, most likely */
2177 return IRQ_NONE;
2178
2179 spin_lock(&q0->lock);
2180
2181 if (unlikely(map & F_ERRINTR))
2182 t3_slow_intr_handler(adap);
2183
2184 if (likely(map & 1))
2185 process_responses_gts(adap, q0);
2186
2187 if (map & 2)
2188 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2189
2190 spin_unlock(&q0->lock);
2191 return IRQ_HANDLED;
2192}
2193
2194/*
2195 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2196 * Handles data events from SGE response queues as well as error and other
2197 * async events as they all use the same interrupt pin. We use one SGE
2198 * response queue per port in this mode and protect all response queues with
2199 * queue 0's lock.
2200 */
2201static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2202{
2203 u32 map;
2204 struct net_device *dev;
2205 struct adapter *adap = cookie;
2206 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2207
2208 t3_write_reg(adap, A_PL_CLI, 0);
2209 map = t3_read_reg(adap, A_SG_DATA_INTR);
2210
2211 if (unlikely(!map)) /* shared interrupt, most likely */
2212 return IRQ_NONE;
2213
2214 spin_lock(&q0->lock);
2215
2216 if (unlikely(map & F_ERRINTR))
2217 t3_slow_intr_handler(adap);
2218
2219 if (likely(map & 1)) {
2220 dev = adap->sge.qs[0].netdev;
2221
2222 if (likely(__netif_rx_schedule_prep(dev)))
2223 __netif_rx_schedule(dev);
2224 }
2225 if (map & 2) {
2226 dev = adap->sge.qs[1].netdev;
2227
2228 if (likely(__netif_rx_schedule_prep(dev)))
2229 __netif_rx_schedule(dev);
2230 }
2231
2232 spin_unlock(&q0->lock);
2233 return IRQ_HANDLED;
2234}
2235
2236/**
2237 * t3_intr_handler - select the top-level interrupt handler
2238 * @adap: the adapter
2239 * @polling: whether using NAPI to service response queues
2240 *
2241 * Selects the top-level interrupt handler based on the type of interrupts
2242 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2243 * response queues.
2244 */
2245intr_handler_t t3_intr_handler(struct adapter *adap, int polling)
2246{
2247 if (adap->flags & USING_MSIX)
2248 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2249 if (adap->flags & USING_MSI)
2250 return polling ? t3_intr_msi_napi : t3_intr_msi;
2251 if (adap->params.rev > 0)
2252 return polling ? t3b_intr_napi : t3b_intr;
2253 return t3_intr;
2254}
2255
2256/**
2257 * t3_sge_err_intr_handler - SGE async event interrupt handler
2258 * @adapter: the adapter
2259 *
2260 * Interrupt handler for SGE asynchronous (non-data) events.
2261 */
2262void t3_sge_err_intr_handler(struct adapter *adapter)
2263{
2264 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2265
2266 if (status & F_RSPQCREDITOVERFOW)
2267 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2268
2269 if (status & F_RSPQDISABLED) {
2270 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2271
2272 CH_ALERT(adapter,
2273 "packet delivered to disabled response queue "
2274 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2275 }
2276
2277 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2278 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
2279 t3_fatal_err(adapter);
2280}
2281
2282/**
2283 * sge_timer_cb - perform periodic maintenance of an SGE qset
2284 * @data: the SGE queue set to maintain
2285 *
2286 * Runs periodically from a timer to perform maintenance of an SGE queue
2287 * set. It performs two tasks:
2288 *
2289 * a) Cleans up any completed Tx descriptors that may still be pending.
2290 * Normal descriptor cleanup happens when new packets are added to a Tx
2291 * queue so this timer is relatively infrequent and does any cleanup only
2292 * if the Tx queue has not seen any new packets in a while. We make a
2293 * best effort attempt to reclaim descriptors, in that we don't wait
2294 * around if we cannot get a queue's lock (which most likely is because
2295 * someone else is queueing new packets and so will also handle the clean
2296 * up). Since control queues use immediate data exclusively we don't
2297 * bother cleaning them up here.
2298 *
2299 * b) Replenishes Rx queues that have run out due to memory shortage.
2300 * Normally new Rx buffers are added when existing ones are consumed but
2301 * when out of memory a queue can become empty. We try to add only a few
2302 * buffers here, the queue will be replenished fully as these new buffers
2303 * are used up if memory shortage has subsided.
2304 */
2305static void sge_timer_cb(unsigned long data)
2306{
2307 spinlock_t *lock;
2308 struct sge_qset *qs = (struct sge_qset *)data;
2309 struct adapter *adap = qs->netdev->priv;
2310
2311 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2312 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2313 spin_unlock(&qs->txq[TXQ_ETH].lock);
2314 }
2315 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2316 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2317 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2318 }
2319 lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
2320 &adap->sge.qs[0].rspq.lock;
2321 if (spin_trylock_irq(lock)) {
2322 if (!napi_is_scheduled(qs->netdev)) {
2323 if (qs->fl[0].credits < qs->fl[0].size)
2324 __refill_fl(adap, &qs->fl[0]);
2325 if (qs->fl[1].credits < qs->fl[1].size)
2326 __refill_fl(adap, &qs->fl[1]);
2327 }
2328 spin_unlock_irq(lock);
2329 }
2330 mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2331}
2332
2333/**
2334 * t3_update_qset_coalesce - update coalescing settings for a queue set
2335 * @qs: the SGE queue set
2336 * @p: new queue set parameters
2337 *
2338 * Update the coalescing settings for an SGE queue set. Nothing is done
2339 * if the queue set is not initialized yet.
2340 */
2341void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2342{
2343 if (!qs->netdev)
2344 return;
2345
2346 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2347 qs->rspq.polling = p->polling;
2348 qs->netdev->poll = p->polling ? napi_rx_handler : ofld_poll;
2349}
2350
2351/**
2352 * t3_sge_alloc_qset - initialize an SGE queue set
2353 * @adapter: the adapter
2354 * @id: the queue set id
2355 * @nports: how many Ethernet ports will be using this queue set
2356 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2357 * @p: configuration parameters for this queue set
2358 * @ntxq: number of Tx queues for the queue set
2359 * @netdev: net device associated with this queue set
2360 *
2361 * Allocate resources and initialize an SGE queue set. A queue set
2362 * comprises a response queue, two Rx free-buffer queues, and up to 3
2363 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2364 * queue, offload queue, and control queue.
2365 */
2366int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2367 int irq_vec_idx, const struct qset_params *p,
2368 int ntxq, struct net_device *netdev)
2369{
2370 int i, ret = -ENOMEM;
2371 struct sge_qset *q = &adapter->sge.qs[id];
2372
2373 init_qset_cntxt(q, id);
2374 init_timer(&q->tx_reclaim_timer);
2375 q->tx_reclaim_timer.data = (unsigned long)q;
2376 q->tx_reclaim_timer.function = sge_timer_cb;
2377
2378 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2379 sizeof(struct rx_desc),
2380 sizeof(struct rx_sw_desc),
2381 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2382 if (!q->fl[0].desc)
2383 goto err;
2384
2385 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2386 sizeof(struct rx_desc),
2387 sizeof(struct rx_sw_desc),
2388 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2389 if (!q->fl[1].desc)
2390 goto err;
2391
2392 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2393 sizeof(struct rsp_desc), 0,
2394 &q->rspq.phys_addr, NULL);
2395 if (!q->rspq.desc)
2396 goto err;
2397
2398 for (i = 0; i < ntxq; ++i) {
2399 /*
2400 * The control queue always uses immediate data so does not
2401 * need to keep track of any sk_buffs.
2402 */
2403 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2404
2405 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2406 sizeof(struct tx_desc), sz,
2407 &q->txq[i].phys_addr,
2408 &q->txq[i].sdesc);
2409 if (!q->txq[i].desc)
2410 goto err;
2411
2412 q->txq[i].gen = 1;
2413 q->txq[i].size = p->txq_size[i];
2414 spin_lock_init(&q->txq[i].lock);
2415 skb_queue_head_init(&q->txq[i].sendq);
2416 }
2417
2418 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2419 (unsigned long)q);
2420 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2421 (unsigned long)q);
2422
2423 q->fl[0].gen = q->fl[1].gen = 1;
2424 q->fl[0].size = p->fl_size;
2425 q->fl[1].size = p->jumbo_size;
2426
2427 q->rspq.gen = 1;
2428 q->rspq.size = p->rspq_size;
2429 spin_lock_init(&q->rspq.lock);
2430
2431 q->txq[TXQ_ETH].stop_thres = nports *
2432 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2433
2434 if (ntxq == 1) {
2435 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + 2 +
2436 sizeof(struct cpl_rx_pkt);
2437 q->fl[1].buf_size = MAX_FRAME_SIZE + 2 +
2438 sizeof(struct cpl_rx_pkt);
2439 } else {
2440 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE +
2441 sizeof(struct cpl_rx_data);
2442 q->fl[1].buf_size = (16 * 1024) -
2443 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2444 }
2445
2446 spin_lock(&adapter->sge.reg_lock);
2447
2448 /* FL threshold comparison uses < */
2449 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2450 q->rspq.phys_addr, q->rspq.size,
2451 q->fl[0].buf_size, 1, 0);
2452 if (ret)
2453 goto err_unlock;
2454
2455 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2456 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2457 q->fl[i].phys_addr, q->fl[i].size,
2458 q->fl[i].buf_size, p->cong_thres, 1,
2459 0);
2460 if (ret)
2461 goto err_unlock;
2462 }
2463
2464 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2465 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2466 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2467 1, 0);
2468 if (ret)
2469 goto err_unlock;
2470
2471 if (ntxq > 1) {
2472 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2473 USE_GTS, SGE_CNTXT_OFLD, id,
2474 q->txq[TXQ_OFLD].phys_addr,
2475 q->txq[TXQ_OFLD].size, 0, 1, 0);
2476 if (ret)
2477 goto err_unlock;
2478 }
2479
2480 if (ntxq > 2) {
2481 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2482 SGE_CNTXT_CTRL, id,
2483 q->txq[TXQ_CTRL].phys_addr,
2484 q->txq[TXQ_CTRL].size,
2485 q->txq[TXQ_CTRL].token, 1, 0);
2486 if (ret)
2487 goto err_unlock;
2488 }
2489
2490 spin_unlock(&adapter->sge.reg_lock);
2491 q->netdev = netdev;
2492 t3_update_qset_coalesce(q, p);
2493
2494 /*
2495 * We use atalk_ptr as a backpointer to a qset. In case a device is
2496 * associated with multiple queue sets only the first one sets
2497 * atalk_ptr.
2498 */
2499 if (netdev->atalk_ptr == NULL)
2500 netdev->atalk_ptr = q;
2501
2502 refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL);
2503 refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL);
2504 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2505
2506 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2507 V_NEWTIMER(q->rspq.holdoff_tmr));
2508
2509 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2510 return 0;
2511
2512 err_unlock:
2513 spin_unlock(&adapter->sge.reg_lock);
2514 err:
2515 t3_free_qset(adapter, q);
2516 return ret;
2517}
2518
2519/**
2520 * t3_free_sge_resources - free SGE resources
2521 * @adap: the adapter
2522 *
2523 * Frees resources used by the SGE queue sets.
2524 */
2525void t3_free_sge_resources(struct adapter *adap)
2526{
2527 int i;
2528
2529 for (i = 0; i < SGE_QSETS; ++i)
2530 t3_free_qset(adap, &adap->sge.qs[i]);
2531}
2532
2533/**
2534 * t3_sge_start - enable SGE
2535 * @adap: the adapter
2536 *
2537 * Enables the SGE for DMAs. This is the last step in starting packet
2538 * transfers.
2539 */
2540void t3_sge_start(struct adapter *adap)
2541{
2542 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2543}
2544
2545/**
2546 * t3_sge_stop - disable SGE operation
2547 * @adap: the adapter
2548 *
2549 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2550 * from error interrupts) or from normal process context. In the latter
2551 * case it also disables any pending queue restart tasklets. Note that
2552 * if it is called in interrupt context it cannot disable the restart
2553 * tasklets as it cannot wait, however the tasklets will have no effect
2554 * since the doorbells are disabled and the driver will call this again
2555 * later from process context, at which time the tasklets will be stopped
2556 * if they are still running.
2557 */
2558void t3_sge_stop(struct adapter *adap)
2559{
2560 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
2561 if (!in_interrupt()) {
2562 int i;
2563
2564 for (i = 0; i < SGE_QSETS; ++i) {
2565 struct sge_qset *qs = &adap->sge.qs[i];
2566
2567 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
2568 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
2569 }
2570 }
2571}
2572
2573/**
2574 * t3_sge_init - initialize SGE
2575 * @adap: the adapter
2576 * @p: the SGE parameters
2577 *
2578 * Performs SGE initialization needed every time after a chip reset.
2579 * We do not initialize any of the queue sets here, instead the driver
2580 * top-level must request those individually. We also do not enable DMA
2581 * here, that should be done after the queues have been set up.
2582 */
2583void t3_sge_init(struct adapter *adap, struct sge_params *p)
2584{
2585 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
2586
2587 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
2588 F_CQCRDTCTRL |
2589 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
2590 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
2591#if SGE_NUM_GENBITS == 1
2592 ctrl |= F_EGRGENCTRL;
2593#endif
2594 if (adap->params.rev > 0) {
2595 if (!(adap->flags & (USING_MSIX | USING_MSI)))
2596 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
2597 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
2598 }
2599 t3_write_reg(adap, A_SG_CONTROL, ctrl);
2600 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
2601 V_LORCQDRBTHRSH(512));
2602 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
2603 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
2604 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
2605 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
2606 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
2607 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
2608 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
2609 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
2610 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
2611}
2612
2613/**
2614 * t3_sge_prep - one-time SGE initialization
2615 * @adap: the associated adapter
2616 * @p: SGE parameters
2617 *
2618 * Performs one-time initialization of SGE SW state. Includes determining
2619 * defaults for the assorted SGE parameters, which admins can change until
2620 * they are used to initialize the SGE.
2621 */
2622void __devinit t3_sge_prep(struct adapter *adap, struct sge_params *p)
2623{
2624 int i;
2625
2626 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
2627 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2628
2629 for (i = 0; i < SGE_QSETS; ++i) {
2630 struct qset_params *q = p->qset + i;
2631
2632 q->polling = adap->params.rev > 0;
2633 q->coalesce_usecs = 5;
2634 q->rspq_size = 1024;
2635 q->fl_size = 4096;
2636 q->jumbo_size = 512;
2637 q->txq_size[TXQ_ETH] = 1024;
2638 q->txq_size[TXQ_OFLD] = 1024;
2639 q->txq_size[TXQ_CTRL] = 256;
2640 q->cong_thres = 0;
2641 }
2642
2643 spin_lock_init(&adap->sge.reg_lock);
2644}
2645
2646/**
2647 * t3_get_desc - dump an SGE descriptor for debugging purposes
2648 * @qs: the queue set
2649 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2650 * @idx: the descriptor index in the queue
2651 * @data: where to dump the descriptor contents
2652 *
2653 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2654 * size of the descriptor.
2655 */
2656int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2657 unsigned char *data)
2658{
2659 if (qnum >= 6)
2660 return -EINVAL;
2661
2662 if (qnum < 3) {
2663 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2664 return -EINVAL;
2665 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2666 return sizeof(struct tx_desc);
2667 }
2668
2669 if (qnum == 3) {
2670 if (!qs->rspq.desc || idx >= qs->rspq.size)
2671 return -EINVAL;
2672 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2673 return sizeof(struct rsp_desc);
2674 }
2675
2676 qnum -= 4;
2677 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2678 return -EINVAL;
2679 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2680 return sizeof(struct rx_desc);
2681}