aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/cxgb4vf
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-22 10:38:37 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-22 10:38:37 -0500
commitfcc9d2e5a6c89d22b8b773a64fb4ad21ac318446 (patch)
treea57612d1888735a2ec7972891b68c1ac5ec8faea /drivers/net/cxgb4vf
parent8dea78da5cee153b8af9c07a2745f6c55057fe12 (diff)
Added missing tegra files.HEADmaster
Diffstat (limited to 'drivers/net/cxgb4vf')
-rw-r--r--drivers/net/cxgb4vf/Makefile7
-rw-r--r--drivers/net/cxgb4vf/adapter.h534
-rw-r--r--drivers/net/cxgb4vf/cxgb4vf_main.c2947
-rw-r--r--drivers/net/cxgb4vf/sge.c2465
-rw-r--r--drivers/net/cxgb4vf/t4vf_common.h274
-rw-r--r--drivers/net/cxgb4vf/t4vf_defs.h121
-rw-r--r--drivers/net/cxgb4vf/t4vf_hw.c1387
7 files changed, 7735 insertions, 0 deletions
diff --git a/drivers/net/cxgb4vf/Makefile b/drivers/net/cxgb4vf/Makefile
new file mode 100644
index 00000000000..d72ee26cb4c
--- /dev/null
+++ b/drivers/net/cxgb4vf/Makefile
@@ -0,0 +1,7 @@
1#
2# Chelsio T4 SR-IOV Virtual Function Driver
3#
4
5obj-$(CONFIG_CHELSIO_T4VF) += cxgb4vf.o
6
7cxgb4vf-objs := cxgb4vf_main.o t4vf_hw.o sge.o
diff --git a/drivers/net/cxgb4vf/adapter.h b/drivers/net/cxgb4vf/adapter.h
new file mode 100644
index 00000000000..594334d5c71
--- /dev/null
+++ b/drivers/net/cxgb4vf/adapter.h
@@ -0,0 +1,534 @@
1/*
2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3 * driver for Linux.
4 *
5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36/*
37 * This file should not be included directly. Include t4vf_common.h instead.
38 */
39
40#ifndef __CXGB4VF_ADAPTER_H__
41#define __CXGB4VF_ADAPTER_H__
42
43#include <linux/interrupt.h>
44#include <linux/pci.h>
45#include <linux/spinlock.h>
46#include <linux/skbuff.h>
47#include <linux/if_ether.h>
48#include <linux/netdevice.h>
49
50#include "../cxgb4/t4_hw.h"
51
52/*
53 * Constants of the implementation.
54 */
55enum {
56 MAX_NPORTS = 1, /* max # of "ports" */
57 MAX_PORT_QSETS = 8, /* max # of Queue Sets / "port" */
58 MAX_ETH_QSETS = MAX_NPORTS*MAX_PORT_QSETS,
59
60 /*
61 * MSI-X interrupt index usage.
62 */
63 MSIX_FW = 0, /* MSI-X index for firmware Q */
64 MSIX_IQFLINT = 1, /* MSI-X index base for Ingress Qs */
65 MSIX_EXTRAS = 1,
66 MSIX_ENTRIES = MAX_ETH_QSETS + MSIX_EXTRAS,
67
68 /*
69 * The maximum number of Ingress and Egress Queues is determined by
70 * the maximum number of "Queue Sets" which we support plus any
71 * ancillary queues. Each "Queue Set" requires one Ingress Queue
72 * for RX Packet Ingress Event notifications and two Egress Queues for
73 * a Free List and an Ethernet TX list.
74 */
75 INGQ_EXTRAS = 2, /* firmware event queue and */
76 /* forwarded interrupts */
77 MAX_INGQ = MAX_ETH_QSETS+INGQ_EXTRAS,
78 MAX_EGRQ = MAX_ETH_QSETS*2,
79};
80
81/*
82 * Forward structure definition references.
83 */
84struct adapter;
85struct sge_eth_rxq;
86struct sge_rspq;
87
88/*
89 * Per-"port" information. This is really per-Virtual Interface information
90 * but the use of the "port" nomanclature makes it easier to go back and forth
91 * between the PF and VF drivers ...
92 */
93struct port_info {
94 struct adapter *adapter; /* our adapter */
95 u16 viid; /* virtual interface ID */
96 s16 xact_addr_filt; /* index of our MAC address filter */
97 u16 rss_size; /* size of VI's RSS table slice */
98 u8 pidx; /* index into adapter port[] */
99 u8 port_id; /* physical port ID */
100 u8 nqsets; /* # of "Queue Sets" */
101 u8 first_qset; /* index of first "Queue Set" */
102 struct link_config link_cfg; /* physical port configuration */
103};
104
105/*
106 * Scatter Gather Engine resources for the "adapter". Our ingress and egress
107 * queues are organized into "Queue Sets" with one ingress and one egress
108 * queue per Queue Set. These Queue Sets are aportionable between the "ports"
109 * (Virtual Interfaces). One extra ingress queue is used to receive
110 * asynchronous messages from the firmware. Note that the "Queue IDs" that we
111 * use here are really "Relative Queue IDs" which are returned as part of the
112 * firmware command to allocate queues. These queue IDs are relative to the
113 * absolute Queue ID base of the section of the Queue ID space allocated to
114 * the PF/VF.
115 */
116
117/*
118 * SGE free-list queue state.
119 */
120struct rx_sw_desc;
121struct sge_fl {
122 unsigned int avail; /* # of available RX buffers */
123 unsigned int pend_cred; /* new buffers since last FL DB ring */
124 unsigned int cidx; /* consumer index */
125 unsigned int pidx; /* producer index */
126 unsigned long alloc_failed; /* # of buffer allocation failures */
127 unsigned long large_alloc_failed;
128 unsigned long starving; /* # of times FL was found starving */
129
130 /*
131 * Write-once/infrequently fields.
132 * -------------------------------
133 */
134
135 unsigned int cntxt_id; /* SGE relative QID for the free list */
136 unsigned int abs_id; /* SGE absolute QID for the free list */
137 unsigned int size; /* capacity of free list */
138 struct rx_sw_desc *sdesc; /* address of SW RX descriptor ring */
139 __be64 *desc; /* address of HW RX descriptor ring */
140 dma_addr_t addr; /* PCI bus address of hardware ring */
141};
142
143/*
144 * An ingress packet gather list.
145 */
146struct pkt_gl {
147 skb_frag_t frags[MAX_SKB_FRAGS];
148 void *va; /* virtual address of first byte */
149 unsigned int nfrags; /* # of fragments */
150 unsigned int tot_len; /* total length of fragments */
151};
152
153typedef int (*rspq_handler_t)(struct sge_rspq *, const __be64 *,
154 const struct pkt_gl *);
155
156/*
157 * State for an SGE Response Queue.
158 */
159struct sge_rspq {
160 struct napi_struct napi; /* NAPI scheduling control */
161 const __be64 *cur_desc; /* current descriptor in queue */
162 unsigned int cidx; /* consumer index */
163 u8 gen; /* current generation bit */
164 u8 next_intr_params; /* holdoff params for next interrupt */
165 int offset; /* offset into current FL buffer */
166
167 unsigned int unhandled_irqs; /* bogus interrupts */
168
169 /*
170 * Write-once/infrequently fields.
171 * -------------------------------
172 */
173
174 u8 intr_params; /* interrupt holdoff parameters */
175 u8 pktcnt_idx; /* interrupt packet threshold */
176 u8 idx; /* queue index within its group */
177 u16 cntxt_id; /* SGE rel QID for the response Q */
178 u16 abs_id; /* SGE abs QID for the response Q */
179 __be64 *desc; /* address of hardware response ring */
180 dma_addr_t phys_addr; /* PCI bus address of ring */
181 unsigned int iqe_len; /* entry size */
182 unsigned int size; /* capcity of response Q */
183 struct adapter *adapter; /* our adapter */
184 struct net_device *netdev; /* associated net device */
185 rspq_handler_t handler; /* the handler for this response Q */
186};
187
188/*
189 * Ethernet queue statistics
190 */
191struct sge_eth_stats {
192 unsigned long pkts; /* # of ethernet packets */
193 unsigned long lro_pkts; /* # of LRO super packets */
194 unsigned long lro_merged; /* # of wire packets merged by LRO */
195 unsigned long rx_cso; /* # of Rx checksum offloads */
196 unsigned long vlan_ex; /* # of Rx VLAN extractions */
197 unsigned long rx_drops; /* # of packets dropped due to no mem */
198};
199
200/*
201 * State for an Ethernet Receive Queue.
202 */
203struct sge_eth_rxq {
204 struct sge_rspq rspq; /* Response Queue */
205 struct sge_fl fl; /* Free List */
206 struct sge_eth_stats stats; /* receive statistics */
207};
208
209/*
210 * SGE Transmit Queue state. This contains all of the resources associated
211 * with the hardware status of a TX Queue which is a circular ring of hardware
212 * TX Descriptors. For convenience, it also contains a pointer to a parallel
213 * "Software Descriptor" array but we don't know anything about it here other
214 * than its type name.
215 */
216struct tx_desc {
217 /*
218 * Egress Queues are measured in units of SGE_EQ_IDXSIZE by the
219 * hardware: Sizes, Producer and Consumer indices, etc.
220 */
221 __be64 flit[SGE_EQ_IDXSIZE/sizeof(__be64)];
222};
223struct tx_sw_desc;
224struct sge_txq {
225 unsigned int in_use; /* # of in-use TX descriptors */
226 unsigned int size; /* # of descriptors */
227 unsigned int cidx; /* SW consumer index */
228 unsigned int pidx; /* producer index */
229 unsigned long stops; /* # of times queue has been stopped */
230 unsigned long restarts; /* # of queue restarts */
231
232 /*
233 * Write-once/infrequently fields.
234 * -------------------------------
235 */
236
237 unsigned int cntxt_id; /* SGE relative QID for the TX Q */
238 unsigned int abs_id; /* SGE absolute QID for the TX Q */
239 struct tx_desc *desc; /* address of HW TX descriptor ring */
240 struct tx_sw_desc *sdesc; /* address of SW TX descriptor ring */
241 struct sge_qstat *stat; /* queue status entry */
242 dma_addr_t phys_addr; /* PCI bus address of hardware ring */
243};
244
245/*
246 * State for an Ethernet Transmit Queue.
247 */
248struct sge_eth_txq {
249 struct sge_txq q; /* SGE TX Queue */
250 struct netdev_queue *txq; /* associated netdev TX queue */
251 unsigned long tso; /* # of TSO requests */
252 unsigned long tx_cso; /* # of TX checksum offloads */
253 unsigned long vlan_ins; /* # of TX VLAN insertions */
254 unsigned long mapping_err; /* # of I/O MMU packet mapping errors */
255};
256
257/*
258 * The complete set of Scatter/Gather Engine resources.
259 */
260struct sge {
261 /*
262 * Our "Queue Sets" ...
263 */
264 struct sge_eth_txq ethtxq[MAX_ETH_QSETS];
265 struct sge_eth_rxq ethrxq[MAX_ETH_QSETS];
266
267 /*
268 * Extra ingress queues for asynchronous firmware events and
269 * forwarded interrupts (when in MSI mode).
270 */
271 struct sge_rspq fw_evtq ____cacheline_aligned_in_smp;
272
273 struct sge_rspq intrq ____cacheline_aligned_in_smp;
274 spinlock_t intrq_lock;
275
276 /*
277 * State for managing "starving Free Lists" -- Free Lists which have
278 * fallen below a certain threshold of buffers available to the
279 * hardware and attempts to refill them up to that threshold have
280 * failed. We have a regular "slow tick" timer process which will
281 * make periodic attempts to refill these starving Free Lists ...
282 */
283 DECLARE_BITMAP(starving_fl, MAX_EGRQ);
284 struct timer_list rx_timer;
285
286 /*
287 * State for cleaning up completed TX descriptors.
288 */
289 struct timer_list tx_timer;
290
291 /*
292 * Write-once/infrequently fields.
293 * -------------------------------
294 */
295
296 u16 max_ethqsets; /* # of available Ethernet queue sets */
297 u16 ethqsets; /* # of active Ethernet queue sets */
298 u16 ethtxq_rover; /* Tx queue to clean up next */
299 u16 timer_val[SGE_NTIMERS]; /* interrupt holdoff timer array */
300 u8 counter_val[SGE_NCOUNTERS]; /* interrupt RX threshold array */
301
302 /*
303 * Reverse maps from Absolute Queue IDs to associated queue pointers.
304 * The absolute Queue IDs are in a compact range which start at a
305 * [potentially large] Base Queue ID. We perform the reverse map by
306 * first converting the Absolute Queue ID into a Relative Queue ID by
307 * subtracting off the Base Queue ID and then use a Relative Queue ID
308 * indexed table to get the pointer to the corresponding software
309 * queue structure.
310 */
311 unsigned int egr_base;
312 unsigned int ingr_base;
313 void *egr_map[MAX_EGRQ];
314 struct sge_rspq *ingr_map[MAX_INGQ];
315};
316
317/*
318 * Utility macros to convert Absolute- to Relative-Queue indices and Egress-
319 * and Ingress-Queues. The EQ_MAP() and IQ_MAP() macros which provide
320 * pointers to Ingress- and Egress-Queues can be used as both L- and R-values
321 */
322#define EQ_IDX(s, abs_id) ((unsigned int)((abs_id) - (s)->egr_base))
323#define IQ_IDX(s, abs_id) ((unsigned int)((abs_id) - (s)->ingr_base))
324
325#define EQ_MAP(s, abs_id) ((s)->egr_map[EQ_IDX(s, abs_id)])
326#define IQ_MAP(s, abs_id) ((s)->ingr_map[IQ_IDX(s, abs_id)])
327
328/*
329 * Macro to iterate across Queue Sets ("rxq" is a historic misnomer).
330 */
331#define for_each_ethrxq(sge, iter) \
332 for (iter = 0; iter < (sge)->ethqsets; iter++)
333
334/*
335 * Per-"adapter" (Virtual Function) information.
336 */
337struct adapter {
338 /* PCI resources */
339 void __iomem *regs;
340 struct pci_dev *pdev;
341 struct device *pdev_dev;
342
343 /* "adapter" resources */
344 unsigned long registered_device_map;
345 unsigned long open_device_map;
346 unsigned long flags;
347 struct adapter_params params;
348
349 /* queue and interrupt resources */
350 struct {
351 unsigned short vec;
352 char desc[22];
353 } msix_info[MSIX_ENTRIES];
354 struct sge sge;
355
356 /* Linux network device resources */
357 struct net_device *port[MAX_NPORTS];
358 const char *name;
359 unsigned int msg_enable;
360
361 /* debugfs resources */
362 struct dentry *debugfs_root;
363
364 /* various locks */
365 spinlock_t stats_lock;
366};
367
368enum { /* adapter flags */
369 FULL_INIT_DONE = (1UL << 0),
370 USING_MSI = (1UL << 1),
371 USING_MSIX = (1UL << 2),
372 QUEUES_BOUND = (1UL << 3),
373};
374
375/*
376 * The following register read/write routine definitions are required by
377 * the common code.
378 */
379
380/**
381 * t4_read_reg - read a HW register
382 * @adapter: the adapter
383 * @reg_addr: the register address
384 *
385 * Returns the 32-bit value of the given HW register.
386 */
387static inline u32 t4_read_reg(struct adapter *adapter, u32 reg_addr)
388{
389 return readl(adapter->regs + reg_addr);
390}
391
392/**
393 * t4_write_reg - write a HW register
394 * @adapter: the adapter
395 * @reg_addr: the register address
396 * @val: the value to write
397 *
398 * Write a 32-bit value into the given HW register.
399 */
400static inline void t4_write_reg(struct adapter *adapter, u32 reg_addr, u32 val)
401{
402 writel(val, adapter->regs + reg_addr);
403}
404
405#ifndef readq
406static inline u64 readq(const volatile void __iomem *addr)
407{
408 return readl(addr) + ((u64)readl(addr + 4) << 32);
409}
410
411static inline void writeq(u64 val, volatile void __iomem *addr)
412{
413 writel(val, addr);
414 writel(val >> 32, addr + 4);
415}
416#endif
417
418/**
419 * t4_read_reg64 - read a 64-bit HW register
420 * @adapter: the adapter
421 * @reg_addr: the register address
422 *
423 * Returns the 64-bit value of the given HW register.
424 */
425static inline u64 t4_read_reg64(struct adapter *adapter, u32 reg_addr)
426{
427 return readq(adapter->regs + reg_addr);
428}
429
430/**
431 * t4_write_reg64 - write a 64-bit HW register
432 * @adapter: the adapter
433 * @reg_addr: the register address
434 * @val: the value to write
435 *
436 * Write a 64-bit value into the given HW register.
437 */
438static inline void t4_write_reg64(struct adapter *adapter, u32 reg_addr,
439 u64 val)
440{
441 writeq(val, adapter->regs + reg_addr);
442}
443
444/**
445 * port_name - return the string name of a port
446 * @adapter: the adapter
447 * @pidx: the port index
448 *
449 * Return the string name of the selected port.
450 */
451static inline const char *port_name(struct adapter *adapter, int pidx)
452{
453 return adapter->port[pidx]->name;
454}
455
456/**
457 * t4_os_set_hw_addr - store a port's MAC address in SW
458 * @adapter: the adapter
459 * @pidx: the port index
460 * @hw_addr: the Ethernet address
461 *
462 * Store the Ethernet address of the given port in SW. Called by the common
463 * code when it retrieves a port's Ethernet address from EEPROM.
464 */
465static inline void t4_os_set_hw_addr(struct adapter *adapter, int pidx,
466 u8 hw_addr[])
467{
468 memcpy(adapter->port[pidx]->dev_addr, hw_addr, ETH_ALEN);
469 memcpy(adapter->port[pidx]->perm_addr, hw_addr, ETH_ALEN);
470}
471
472/**
473 * netdev2pinfo - return the port_info structure associated with a net_device
474 * @dev: the netdev
475 *
476 * Return the struct port_info associated with a net_device
477 */
478static inline struct port_info *netdev2pinfo(const struct net_device *dev)
479{
480 return netdev_priv(dev);
481}
482
483/**
484 * adap2pinfo - return the port_info of a port
485 * @adap: the adapter
486 * @pidx: the port index
487 *
488 * Return the port_info structure for the adapter.
489 */
490static inline struct port_info *adap2pinfo(struct adapter *adapter, int pidx)
491{
492 return netdev_priv(adapter->port[pidx]);
493}
494
495/**
496 * netdev2adap - return the adapter structure associated with a net_device
497 * @dev: the netdev
498 *
499 * Return the struct adapter associated with a net_device
500 */
501static inline struct adapter *netdev2adap(const struct net_device *dev)
502{
503 return netdev2pinfo(dev)->adapter;
504}
505
506/*
507 * OS "Callback" function declarations. These are functions that the OS code
508 * is "contracted" to provide for the common code.
509 */
510void t4vf_os_link_changed(struct adapter *, int, int);
511
512/*
513 * SGE function prototype declarations.
514 */
515int t4vf_sge_alloc_rxq(struct adapter *, struct sge_rspq *, bool,
516 struct net_device *, int,
517 struct sge_fl *, rspq_handler_t);
518int t4vf_sge_alloc_eth_txq(struct adapter *, struct sge_eth_txq *,
519 struct net_device *, struct netdev_queue *,
520 unsigned int);
521void t4vf_free_sge_resources(struct adapter *);
522
523int t4vf_eth_xmit(struct sk_buff *, struct net_device *);
524int t4vf_ethrx_handler(struct sge_rspq *, const __be64 *,
525 const struct pkt_gl *);
526
527irq_handler_t t4vf_intr_handler(struct adapter *);
528irqreturn_t t4vf_sge_intr_msix(int, void *);
529
530int t4vf_sge_init(struct adapter *);
531void t4vf_sge_start(struct adapter *);
532void t4vf_sge_stop(struct adapter *);
533
534#endif /* __CXGB4VF_ADAPTER_H__ */
diff --git a/drivers/net/cxgb4vf/cxgb4vf_main.c b/drivers/net/cxgb4vf/cxgb4vf_main.c
new file mode 100644
index 00000000000..ec799139dfe
--- /dev/null
+++ b/drivers/net/cxgb4vf/cxgb4vf_main.c
@@ -0,0 +1,2947 @@
1/*
2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3 * driver for Linux.
4 *
5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/module.h>
37#include <linux/moduleparam.h>
38#include <linux/init.h>
39#include <linux/pci.h>
40#include <linux/dma-mapping.h>
41#include <linux/netdevice.h>
42#include <linux/etherdevice.h>
43#include <linux/debugfs.h>
44#include <linux/ethtool.h>
45
46#include "t4vf_common.h"
47#include "t4vf_defs.h"
48
49#include "../cxgb4/t4_regs.h"
50#include "../cxgb4/t4_msg.h"
51
52/*
53 * Generic information about the driver.
54 */
55#define DRV_VERSION "1.0.0"
56#define DRV_DESC "Chelsio T4 Virtual Function (VF) Network Driver"
57
58/*
59 * Module Parameters.
60 * ==================
61 */
62
63/*
64 * Default ethtool "message level" for adapters.
65 */
66#define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
67 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
68 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
69
70static int dflt_msg_enable = DFLT_MSG_ENABLE;
71
72module_param(dflt_msg_enable, int, 0644);
73MODULE_PARM_DESC(dflt_msg_enable,
74 "default adapter ethtool message level bitmap");
75
76/*
77 * The driver uses the best interrupt scheme available on a platform in the
78 * order MSI-X then MSI. This parameter determines which of these schemes the
79 * driver may consider as follows:
80 *
81 * msi = 2: choose from among MSI-X and MSI
82 * msi = 1: only consider MSI interrupts
83 *
84 * Note that unlike the Physical Function driver, this Virtual Function driver
85 * does _not_ support legacy INTx interrupts (this limitation is mandated by
86 * the PCI-E SR-IOV standard).
87 */
88#define MSI_MSIX 2
89#define MSI_MSI 1
90#define MSI_DEFAULT MSI_MSIX
91
92static int msi = MSI_DEFAULT;
93
94module_param(msi, int, 0644);
95MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
96
97/*
98 * Fundamental constants.
99 * ======================
100 */
101
102enum {
103 MAX_TXQ_ENTRIES = 16384,
104 MAX_RSPQ_ENTRIES = 16384,
105 MAX_RX_BUFFERS = 16384,
106
107 MIN_TXQ_ENTRIES = 32,
108 MIN_RSPQ_ENTRIES = 128,
109 MIN_FL_ENTRIES = 16,
110
111 /*
112 * For purposes of manipulating the Free List size we need to
113 * recognize that Free Lists are actually Egress Queues (the host
114 * produces free buffers which the hardware consumes), Egress Queues
115 * indices are all in units of Egress Context Units bytes, and free
116 * list entries are 64-bit PCI DMA addresses. And since the state of
117 * the Producer Index == the Consumer Index implies an EMPTY list, we
118 * always have at least one Egress Unit's worth of Free List entries
119 * unused. See sge.c for more details ...
120 */
121 EQ_UNIT = SGE_EQ_IDXSIZE,
122 FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
123 MIN_FL_RESID = FL_PER_EQ_UNIT,
124};
125
126/*
127 * Global driver state.
128 * ====================
129 */
130
131static struct dentry *cxgb4vf_debugfs_root;
132
133/*
134 * OS "Callback" functions.
135 * ========================
136 */
137
138/*
139 * The link status has changed on the indicated "port" (Virtual Interface).
140 */
141void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
142{
143 struct net_device *dev = adapter->port[pidx];
144
145 /*
146 * If the port is disabled or the current recorded "link up"
147 * status matches the new status, just return.
148 */
149 if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
150 return;
151
152 /*
153 * Tell the OS that the link status has changed and print a short
154 * informative message on the console about the event.
155 */
156 if (link_ok) {
157 const char *s;
158 const char *fc;
159 const struct port_info *pi = netdev_priv(dev);
160
161 netif_carrier_on(dev);
162
163 switch (pi->link_cfg.speed) {
164 case SPEED_10000:
165 s = "10Gbps";
166 break;
167
168 case SPEED_1000:
169 s = "1000Mbps";
170 break;
171
172 case SPEED_100:
173 s = "100Mbps";
174 break;
175
176 default:
177 s = "unknown";
178 break;
179 }
180
181 switch (pi->link_cfg.fc) {
182 case PAUSE_RX:
183 fc = "RX";
184 break;
185
186 case PAUSE_TX:
187 fc = "TX";
188 break;
189
190 case PAUSE_RX|PAUSE_TX:
191 fc = "RX/TX";
192 break;
193
194 default:
195 fc = "no";
196 break;
197 }
198
199 printk(KERN_INFO "%s: link up, %s, full-duplex, %s PAUSE\n",
200 dev->name, s, fc);
201 } else {
202 netif_carrier_off(dev);
203 printk(KERN_INFO "%s: link down\n", dev->name);
204 }
205}
206
207/*
208 * Net device operations.
209 * ======================
210 */
211
212
213
214
215/*
216 * Perform the MAC and PHY actions needed to enable a "port" (Virtual
217 * Interface).
218 */
219static int link_start(struct net_device *dev)
220{
221 int ret;
222 struct port_info *pi = netdev_priv(dev);
223
224 /*
225 * We do not set address filters and promiscuity here, the stack does
226 * that step explicitly. Enable vlan accel.
227 */
228 ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
229 true);
230 if (ret == 0) {
231 ret = t4vf_change_mac(pi->adapter, pi->viid,
232 pi->xact_addr_filt, dev->dev_addr, true);
233 if (ret >= 0) {
234 pi->xact_addr_filt = ret;
235 ret = 0;
236 }
237 }
238
239 /*
240 * We don't need to actually "start the link" itself since the
241 * firmware will do that for us when the first Virtual Interface
242 * is enabled on a port.
243 */
244 if (ret == 0)
245 ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
246 return ret;
247}
248
249/*
250 * Name the MSI-X interrupts.
251 */
252static void name_msix_vecs(struct adapter *adapter)
253{
254 int namelen = sizeof(adapter->msix_info[0].desc) - 1;
255 int pidx;
256
257 /*
258 * Firmware events.
259 */
260 snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
261 "%s-FWeventq", adapter->name);
262 adapter->msix_info[MSIX_FW].desc[namelen] = 0;
263
264 /*
265 * Ethernet queues.
266 */
267 for_each_port(adapter, pidx) {
268 struct net_device *dev = adapter->port[pidx];
269 const struct port_info *pi = netdev_priv(dev);
270 int qs, msi;
271
272 for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
273 snprintf(adapter->msix_info[msi].desc, namelen,
274 "%s-%d", dev->name, qs);
275 adapter->msix_info[msi].desc[namelen] = 0;
276 }
277 }
278}
279
280/*
281 * Request all of our MSI-X resources.
282 */
283static int request_msix_queue_irqs(struct adapter *adapter)
284{
285 struct sge *s = &adapter->sge;
286 int rxq, msi, err;
287
288 /*
289 * Firmware events.
290 */
291 err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
292 0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
293 if (err)
294 return err;
295
296 /*
297 * Ethernet queues.
298 */
299 msi = MSIX_IQFLINT;
300 for_each_ethrxq(s, rxq) {
301 err = request_irq(adapter->msix_info[msi].vec,
302 t4vf_sge_intr_msix, 0,
303 adapter->msix_info[msi].desc,
304 &s->ethrxq[rxq].rspq);
305 if (err)
306 goto err_free_irqs;
307 msi++;
308 }
309 return 0;
310
311err_free_irqs:
312 while (--rxq >= 0)
313 free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
314 free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
315 return err;
316}
317
318/*
319 * Free our MSI-X resources.
320 */
321static void free_msix_queue_irqs(struct adapter *adapter)
322{
323 struct sge *s = &adapter->sge;
324 int rxq, msi;
325
326 free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
327 msi = MSIX_IQFLINT;
328 for_each_ethrxq(s, rxq)
329 free_irq(adapter->msix_info[msi++].vec,
330 &s->ethrxq[rxq].rspq);
331}
332
333/*
334 * Turn on NAPI and start up interrupts on a response queue.
335 */
336static void qenable(struct sge_rspq *rspq)
337{
338 napi_enable(&rspq->napi);
339
340 /*
341 * 0-increment the Going To Sleep register to start the timer and
342 * enable interrupts.
343 */
344 t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
345 CIDXINC(0) |
346 SEINTARM(rspq->intr_params) |
347 INGRESSQID(rspq->cntxt_id));
348}
349
350/*
351 * Enable NAPI scheduling and interrupt generation for all Receive Queues.
352 */
353static void enable_rx(struct adapter *adapter)
354{
355 int rxq;
356 struct sge *s = &adapter->sge;
357
358 for_each_ethrxq(s, rxq)
359 qenable(&s->ethrxq[rxq].rspq);
360 qenable(&s->fw_evtq);
361
362 /*
363 * The interrupt queue doesn't use NAPI so we do the 0-increment of
364 * its Going To Sleep register here to get it started.
365 */
366 if (adapter->flags & USING_MSI)
367 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
368 CIDXINC(0) |
369 SEINTARM(s->intrq.intr_params) |
370 INGRESSQID(s->intrq.cntxt_id));
371
372}
373
374/*
375 * Wait until all NAPI handlers are descheduled.
376 */
377static void quiesce_rx(struct adapter *adapter)
378{
379 struct sge *s = &adapter->sge;
380 int rxq;
381
382 for_each_ethrxq(s, rxq)
383 napi_disable(&s->ethrxq[rxq].rspq.napi);
384 napi_disable(&s->fw_evtq.napi);
385}
386
387/*
388 * Response queue handler for the firmware event queue.
389 */
390static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
391 const struct pkt_gl *gl)
392{
393 /*
394 * Extract response opcode and get pointer to CPL message body.
395 */
396 struct adapter *adapter = rspq->adapter;
397 u8 opcode = ((const struct rss_header *)rsp)->opcode;
398 void *cpl = (void *)(rsp + 1);
399
400 switch (opcode) {
401 case CPL_FW6_MSG: {
402 /*
403 * We've received an asynchronous message from the firmware.
404 */
405 const struct cpl_fw6_msg *fw_msg = cpl;
406 if (fw_msg->type == FW6_TYPE_CMD_RPL)
407 t4vf_handle_fw_rpl(adapter, fw_msg->data);
408 break;
409 }
410
411 case CPL_SGE_EGR_UPDATE: {
412 /*
413 * We've received an Egress Queue Status Update message. We
414 * get these, if the SGE is configured to send these when the
415 * firmware passes certain points in processing our TX
416 * Ethernet Queue or if we make an explicit request for one.
417 * We use these updates to determine when we may need to
418 * restart a TX Ethernet Queue which was stopped for lack of
419 * free TX Queue Descriptors ...
420 */
421 const struct cpl_sge_egr_update *p = (void *)cpl;
422 unsigned int qid = EGR_QID(be32_to_cpu(p->opcode_qid));
423 struct sge *s = &adapter->sge;
424 struct sge_txq *tq;
425 struct sge_eth_txq *txq;
426 unsigned int eq_idx;
427
428 /*
429 * Perform sanity checking on the Queue ID to make sure it
430 * really refers to one of our TX Ethernet Egress Queues which
431 * is active and matches the queue's ID. None of these error
432 * conditions should ever happen so we may want to either make
433 * them fatal and/or conditionalized under DEBUG.
434 */
435 eq_idx = EQ_IDX(s, qid);
436 if (unlikely(eq_idx >= MAX_EGRQ)) {
437 dev_err(adapter->pdev_dev,
438 "Egress Update QID %d out of range\n", qid);
439 break;
440 }
441 tq = s->egr_map[eq_idx];
442 if (unlikely(tq == NULL)) {
443 dev_err(adapter->pdev_dev,
444 "Egress Update QID %d TXQ=NULL\n", qid);
445 break;
446 }
447 txq = container_of(tq, struct sge_eth_txq, q);
448 if (unlikely(tq->abs_id != qid)) {
449 dev_err(adapter->pdev_dev,
450 "Egress Update QID %d refers to TXQ %d\n",
451 qid, tq->abs_id);
452 break;
453 }
454
455 /*
456 * Restart a stopped TX Queue which has less than half of its
457 * TX ring in use ...
458 */
459 txq->q.restarts++;
460 netif_tx_wake_queue(txq->txq);
461 break;
462 }
463
464 default:
465 dev_err(adapter->pdev_dev,
466 "unexpected CPL %#x on FW event queue\n", opcode);
467 }
468
469 return 0;
470}
471
472/*
473 * Allocate SGE TX/RX response queues. Determine how many sets of SGE queues
474 * to use and initializes them. We support multiple "Queue Sets" per port if
475 * we have MSI-X, otherwise just one queue set per port.
476 */
477static int setup_sge_queues(struct adapter *adapter)
478{
479 struct sge *s = &adapter->sge;
480 int err, pidx, msix;
481
482 /*
483 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
484 * state.
485 */
486 bitmap_zero(s->starving_fl, MAX_EGRQ);
487
488 /*
489 * If we're using MSI interrupt mode we need to set up a "forwarded
490 * interrupt" queue which we'll set up with our MSI vector. The rest
491 * of the ingress queues will be set up to forward their interrupts to
492 * this queue ... This must be first since t4vf_sge_alloc_rxq() uses
493 * the intrq's queue ID as the interrupt forwarding queue for the
494 * subsequent calls ...
495 */
496 if (adapter->flags & USING_MSI) {
497 err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
498 adapter->port[0], 0, NULL, NULL);
499 if (err)
500 goto err_free_queues;
501 }
502
503 /*
504 * Allocate our ingress queue for asynchronous firmware messages.
505 */
506 err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
507 MSIX_FW, NULL, fwevtq_handler);
508 if (err)
509 goto err_free_queues;
510
511 /*
512 * Allocate each "port"'s initial Queue Sets. These can be changed
513 * later on ... up to the point where any interface on the adapter is
514 * brought up at which point lots of things get nailed down
515 * permanently ...
516 */
517 msix = MSIX_IQFLINT;
518 for_each_port(adapter, pidx) {
519 struct net_device *dev = adapter->port[pidx];
520 struct port_info *pi = netdev_priv(dev);
521 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
522 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
523 int qs;
524
525 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
526 err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
527 dev, msix++,
528 &rxq->fl, t4vf_ethrx_handler);
529 if (err)
530 goto err_free_queues;
531
532 err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
533 netdev_get_tx_queue(dev, qs),
534 s->fw_evtq.cntxt_id);
535 if (err)
536 goto err_free_queues;
537
538 rxq->rspq.idx = qs;
539 memset(&rxq->stats, 0, sizeof(rxq->stats));
540 }
541 }
542
543 /*
544 * Create the reverse mappings for the queues.
545 */
546 s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
547 s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
548 IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
549 for_each_port(adapter, pidx) {
550 struct net_device *dev = adapter->port[pidx];
551 struct port_info *pi = netdev_priv(dev);
552 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
553 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
554 int qs;
555
556 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
557 IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
558 EQ_MAP(s, txq->q.abs_id) = &txq->q;
559
560 /*
561 * The FW_IQ_CMD doesn't return the Absolute Queue IDs
562 * for Free Lists but since all of the Egress Queues
563 * (including Free Lists) have Relative Queue IDs
564 * which are computed as Absolute - Base Queue ID, we
565 * can synthesize the Absolute Queue IDs for the Free
566 * Lists. This is useful for debugging purposes when
567 * we want to dump Queue Contexts via the PF Driver.
568 */
569 rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
570 EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
571 }
572 }
573 return 0;
574
575err_free_queues:
576 t4vf_free_sge_resources(adapter);
577 return err;
578}
579
580/*
581 * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
582 * queues. We configure the RSS CPU lookup table to distribute to the number
583 * of HW receive queues, and the response queue lookup table to narrow that
584 * down to the response queues actually configured for each "port" (Virtual
585 * Interface). We always configure the RSS mapping for all ports since the
586 * mapping table has plenty of entries.
587 */
588static int setup_rss(struct adapter *adapter)
589{
590 int pidx;
591
592 for_each_port(adapter, pidx) {
593 struct port_info *pi = adap2pinfo(adapter, pidx);
594 struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
595 u16 rss[MAX_PORT_QSETS];
596 int qs, err;
597
598 for (qs = 0; qs < pi->nqsets; qs++)
599 rss[qs] = rxq[qs].rspq.abs_id;
600
601 err = t4vf_config_rss_range(adapter, pi->viid,
602 0, pi->rss_size, rss, pi->nqsets);
603 if (err)
604 return err;
605
606 /*
607 * Perform Global RSS Mode-specific initialization.
608 */
609 switch (adapter->params.rss.mode) {
610 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
611 /*
612 * If Tunnel All Lookup isn't specified in the global
613 * RSS Configuration, then we need to specify a
614 * default Ingress Queue for any ingress packets which
615 * aren't hashed. We'll use our first ingress queue
616 * ...
617 */
618 if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
619 union rss_vi_config config;
620 err = t4vf_read_rss_vi_config(adapter,
621 pi->viid,
622 &config);
623 if (err)
624 return err;
625 config.basicvirtual.defaultq =
626 rxq[0].rspq.abs_id;
627 err = t4vf_write_rss_vi_config(adapter,
628 pi->viid,
629 &config);
630 if (err)
631 return err;
632 }
633 break;
634 }
635 }
636
637 return 0;
638}
639
640/*
641 * Bring the adapter up. Called whenever we go from no "ports" open to having
642 * one open. This function performs the actions necessary to make an adapter
643 * operational, such as completing the initialization of HW modules, and
644 * enabling interrupts. Must be called with the rtnl lock held. (Note that
645 * this is called "cxgb_up" in the PF Driver.)
646 */
647static int adapter_up(struct adapter *adapter)
648{
649 int err;
650
651 /*
652 * If this is the first time we've been called, perform basic
653 * adapter setup. Once we've done this, many of our adapter
654 * parameters can no longer be changed ...
655 */
656 if ((adapter->flags & FULL_INIT_DONE) == 0) {
657 err = setup_sge_queues(adapter);
658 if (err)
659 return err;
660 err = setup_rss(adapter);
661 if (err) {
662 t4vf_free_sge_resources(adapter);
663 return err;
664 }
665
666 if (adapter->flags & USING_MSIX)
667 name_msix_vecs(adapter);
668 adapter->flags |= FULL_INIT_DONE;
669 }
670
671 /*
672 * Acquire our interrupt resources. We only support MSI-X and MSI.
673 */
674 BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
675 if (adapter->flags & USING_MSIX)
676 err = request_msix_queue_irqs(adapter);
677 else
678 err = request_irq(adapter->pdev->irq,
679 t4vf_intr_handler(adapter), 0,
680 adapter->name, adapter);
681 if (err) {
682 dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
683 err);
684 return err;
685 }
686
687 /*
688 * Enable NAPI ingress processing and return success.
689 */
690 enable_rx(adapter);
691 t4vf_sge_start(adapter);
692 return 0;
693}
694
695/*
696 * Bring the adapter down. Called whenever the last "port" (Virtual
697 * Interface) closed. (Note that this routine is called "cxgb_down" in the PF
698 * Driver.)
699 */
700static void adapter_down(struct adapter *adapter)
701{
702 /*
703 * Free interrupt resources.
704 */
705 if (adapter->flags & USING_MSIX)
706 free_msix_queue_irqs(adapter);
707 else
708 free_irq(adapter->pdev->irq, adapter);
709
710 /*
711 * Wait for NAPI handlers to finish.
712 */
713 quiesce_rx(adapter);
714}
715
716/*
717 * Start up a net device.
718 */
719static int cxgb4vf_open(struct net_device *dev)
720{
721 int err;
722 struct port_info *pi = netdev_priv(dev);
723 struct adapter *adapter = pi->adapter;
724
725 /*
726 * If this is the first interface that we're opening on the "adapter",
727 * bring the "adapter" up now.
728 */
729 if (adapter->open_device_map == 0) {
730 err = adapter_up(adapter);
731 if (err)
732 return err;
733 }
734
735 /*
736 * Note that this interface is up and start everything up ...
737 */
738 netif_set_real_num_tx_queues(dev, pi->nqsets);
739 err = netif_set_real_num_rx_queues(dev, pi->nqsets);
740 if (err)
741 goto err_unwind;
742 err = link_start(dev);
743 if (err)
744 goto err_unwind;
745
746 netif_tx_start_all_queues(dev);
747 set_bit(pi->port_id, &adapter->open_device_map);
748 return 0;
749
750err_unwind:
751 if (adapter->open_device_map == 0)
752 adapter_down(adapter);
753 return err;
754}
755
756/*
757 * Shut down a net device. This routine is called "cxgb_close" in the PF
758 * Driver ...
759 */
760static int cxgb4vf_stop(struct net_device *dev)
761{
762 struct port_info *pi = netdev_priv(dev);
763 struct adapter *adapter = pi->adapter;
764
765 netif_tx_stop_all_queues(dev);
766 netif_carrier_off(dev);
767 t4vf_enable_vi(adapter, pi->viid, false, false);
768 pi->link_cfg.link_ok = 0;
769
770 clear_bit(pi->port_id, &adapter->open_device_map);
771 if (adapter->open_device_map == 0)
772 adapter_down(adapter);
773 return 0;
774}
775
776/*
777 * Translate our basic statistics into the standard "ifconfig" statistics.
778 */
779static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
780{
781 struct t4vf_port_stats stats;
782 struct port_info *pi = netdev2pinfo(dev);
783 struct adapter *adapter = pi->adapter;
784 struct net_device_stats *ns = &dev->stats;
785 int err;
786
787 spin_lock(&adapter->stats_lock);
788 err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
789 spin_unlock(&adapter->stats_lock);
790
791 memset(ns, 0, sizeof(*ns));
792 if (err)
793 return ns;
794
795 ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
796 stats.tx_ucast_bytes + stats.tx_offload_bytes);
797 ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
798 stats.tx_ucast_frames + stats.tx_offload_frames);
799 ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
800 stats.rx_ucast_bytes);
801 ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
802 stats.rx_ucast_frames);
803 ns->multicast = stats.rx_mcast_frames;
804 ns->tx_errors = stats.tx_drop_frames;
805 ns->rx_errors = stats.rx_err_frames;
806
807 return ns;
808}
809
810/*
811 * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting
812 * at a specified offset within the list, into an array of addrss pointers and
813 * return the number collected.
814 */
815static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev,
816 const u8 **addr,
817 unsigned int offset,
818 unsigned int maxaddrs)
819{
820 unsigned int index = 0;
821 unsigned int naddr = 0;
822 const struct netdev_hw_addr *ha;
823
824 for_each_dev_addr(dev, ha)
825 if (index++ >= offset) {
826 addr[naddr++] = ha->addr;
827 if (naddr >= maxaddrs)
828 break;
829 }
830 return naddr;
831}
832
833/*
834 * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting
835 * at a specified offset within the list, into an array of addrss pointers and
836 * return the number collected.
837 */
838static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev,
839 const u8 **addr,
840 unsigned int offset,
841 unsigned int maxaddrs)
842{
843 unsigned int index = 0;
844 unsigned int naddr = 0;
845 const struct netdev_hw_addr *ha;
846
847 netdev_for_each_mc_addr(ha, dev)
848 if (index++ >= offset) {
849 addr[naddr++] = ha->addr;
850 if (naddr >= maxaddrs)
851 break;
852 }
853 return naddr;
854}
855
856/*
857 * Configure the exact and hash address filters to handle a port's multicast
858 * and secondary unicast MAC addresses.
859 */
860static int set_addr_filters(const struct net_device *dev, bool sleep)
861{
862 u64 mhash = 0;
863 u64 uhash = 0;
864 bool free = true;
865 unsigned int offset, naddr;
866 const u8 *addr[7];
867 int ret;
868 const struct port_info *pi = netdev_priv(dev);
869
870 /* first do the secondary unicast addresses */
871 for (offset = 0; ; offset += naddr) {
872 naddr = collect_netdev_uc_list_addrs(dev, addr, offset,
873 ARRAY_SIZE(addr));
874 if (naddr == 0)
875 break;
876
877 ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
878 naddr, addr, NULL, &uhash, sleep);
879 if (ret < 0)
880 return ret;
881
882 free = false;
883 }
884
885 /* next set up the multicast addresses */
886 for (offset = 0; ; offset += naddr) {
887 naddr = collect_netdev_mc_list_addrs(dev, addr, offset,
888 ARRAY_SIZE(addr));
889 if (naddr == 0)
890 break;
891
892 ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
893 naddr, addr, NULL, &mhash, sleep);
894 if (ret < 0)
895 return ret;
896 free = false;
897 }
898
899 return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0,
900 uhash | mhash, sleep);
901}
902
903/*
904 * Set RX properties of a port, such as promiscruity, address filters, and MTU.
905 * If @mtu is -1 it is left unchanged.
906 */
907static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
908{
909 int ret;
910 struct port_info *pi = netdev_priv(dev);
911
912 ret = set_addr_filters(dev, sleep_ok);
913 if (ret == 0)
914 ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1,
915 (dev->flags & IFF_PROMISC) != 0,
916 (dev->flags & IFF_ALLMULTI) != 0,
917 1, -1, sleep_ok);
918 return ret;
919}
920
921/*
922 * Set the current receive modes on the device.
923 */
924static void cxgb4vf_set_rxmode(struct net_device *dev)
925{
926 /* unfortunately we can't return errors to the stack */
927 set_rxmode(dev, -1, false);
928}
929
930/*
931 * Find the entry in the interrupt holdoff timer value array which comes
932 * closest to the specified interrupt holdoff value.
933 */
934static int closest_timer(const struct sge *s, int us)
935{
936 int i, timer_idx = 0, min_delta = INT_MAX;
937
938 for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
939 int delta = us - s->timer_val[i];
940 if (delta < 0)
941 delta = -delta;
942 if (delta < min_delta) {
943 min_delta = delta;
944 timer_idx = i;
945 }
946 }
947 return timer_idx;
948}
949
950static int closest_thres(const struct sge *s, int thres)
951{
952 int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
953
954 for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
955 delta = thres - s->counter_val[i];
956 if (delta < 0)
957 delta = -delta;
958 if (delta < min_delta) {
959 min_delta = delta;
960 pktcnt_idx = i;
961 }
962 }
963 return pktcnt_idx;
964}
965
966/*
967 * Return a queue's interrupt hold-off time in us. 0 means no timer.
968 */
969static unsigned int qtimer_val(const struct adapter *adapter,
970 const struct sge_rspq *rspq)
971{
972 unsigned int timer_idx = QINTR_TIMER_IDX_GET(rspq->intr_params);
973
974 return timer_idx < SGE_NTIMERS
975 ? adapter->sge.timer_val[timer_idx]
976 : 0;
977}
978
979/**
980 * set_rxq_intr_params - set a queue's interrupt holdoff parameters
981 * @adapter: the adapter
982 * @rspq: the RX response queue
983 * @us: the hold-off time in us, or 0 to disable timer
984 * @cnt: the hold-off packet count, or 0 to disable counter
985 *
986 * Sets an RX response queue's interrupt hold-off time and packet count.
987 * At least one of the two needs to be enabled for the queue to generate
988 * interrupts.
989 */
990static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
991 unsigned int us, unsigned int cnt)
992{
993 unsigned int timer_idx;
994
995 /*
996 * If both the interrupt holdoff timer and count are specified as
997 * zero, default to a holdoff count of 1 ...
998 */
999 if ((us | cnt) == 0)
1000 cnt = 1;
1001
1002 /*
1003 * If an interrupt holdoff count has been specified, then find the
1004 * closest configured holdoff count and use that. If the response
1005 * queue has already been created, then update its queue context
1006 * parameters ...
1007 */
1008 if (cnt) {
1009 int err;
1010 u32 v, pktcnt_idx;
1011
1012 pktcnt_idx = closest_thres(&adapter->sge, cnt);
1013 if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1014 v = FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1015 FW_PARAMS_PARAM_X(
1016 FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1017 FW_PARAMS_PARAM_YZ(rspq->cntxt_id);
1018 err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1019 if (err)
1020 return err;
1021 }
1022 rspq->pktcnt_idx = pktcnt_idx;
1023 }
1024
1025 /*
1026 * Compute the closest holdoff timer index from the supplied holdoff
1027 * timer value.
1028 */
1029 timer_idx = (us == 0
1030 ? SGE_TIMER_RSTRT_CNTR
1031 : closest_timer(&adapter->sge, us));
1032
1033 /*
1034 * Update the response queue's interrupt coalescing parameters and
1035 * return success.
1036 */
1037 rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
1038 (cnt > 0 ? QINTR_CNT_EN : 0));
1039 return 0;
1040}
1041
1042/*
1043 * Return a version number to identify the type of adapter. The scheme is:
1044 * - bits 0..9: chip version
1045 * - bits 10..15: chip revision
1046 */
1047static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1048{
1049 /*
1050 * Chip version 4, revision 0x3f (cxgb4vf).
1051 */
1052 return 4 | (0x3f << 10);
1053}
1054
1055/*
1056 * Execute the specified ioctl command.
1057 */
1058static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1059{
1060 int ret = 0;
1061
1062 switch (cmd) {
1063 /*
1064 * The VF Driver doesn't have access to any of the other
1065 * common Ethernet device ioctl()'s (like reading/writing
1066 * PHY registers, etc.
1067 */
1068
1069 default:
1070 ret = -EOPNOTSUPP;
1071 break;
1072 }
1073 return ret;
1074}
1075
1076/*
1077 * Change the device's MTU.
1078 */
1079static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1080{
1081 int ret;
1082 struct port_info *pi = netdev_priv(dev);
1083
1084 /* accommodate SACK */
1085 if (new_mtu < 81)
1086 return -EINVAL;
1087
1088 ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1089 -1, -1, -1, -1, true);
1090 if (!ret)
1091 dev->mtu = new_mtu;
1092 return ret;
1093}
1094
1095static u32 cxgb4vf_fix_features(struct net_device *dev, u32 features)
1096{
1097 /*
1098 * Since there is no support for separate rx/tx vlan accel
1099 * enable/disable make sure tx flag is always in same state as rx.
1100 */
1101 if (features & NETIF_F_HW_VLAN_RX)
1102 features |= NETIF_F_HW_VLAN_TX;
1103 else
1104 features &= ~NETIF_F_HW_VLAN_TX;
1105
1106 return features;
1107}
1108
1109static int cxgb4vf_set_features(struct net_device *dev, u32 features)
1110{
1111 struct port_info *pi = netdev_priv(dev);
1112 u32 changed = dev->features ^ features;
1113
1114 if (changed & NETIF_F_HW_VLAN_RX)
1115 t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1116 features & NETIF_F_HW_VLAN_TX, 0);
1117
1118 return 0;
1119}
1120
1121/*
1122 * Change the devices MAC address.
1123 */
1124static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1125{
1126 int ret;
1127 struct sockaddr *addr = _addr;
1128 struct port_info *pi = netdev_priv(dev);
1129
1130 if (!is_valid_ether_addr(addr->sa_data))
1131 return -EINVAL;
1132
1133 ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1134 addr->sa_data, true);
1135 if (ret < 0)
1136 return ret;
1137
1138 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1139 pi->xact_addr_filt = ret;
1140 return 0;
1141}
1142
1143#ifdef CONFIG_NET_POLL_CONTROLLER
1144/*
1145 * Poll all of our receive queues. This is called outside of normal interrupt
1146 * context.
1147 */
1148static void cxgb4vf_poll_controller(struct net_device *dev)
1149{
1150 struct port_info *pi = netdev_priv(dev);
1151 struct adapter *adapter = pi->adapter;
1152
1153 if (adapter->flags & USING_MSIX) {
1154 struct sge_eth_rxq *rxq;
1155 int nqsets;
1156
1157 rxq = &adapter->sge.ethrxq[pi->first_qset];
1158 for (nqsets = pi->nqsets; nqsets; nqsets--) {
1159 t4vf_sge_intr_msix(0, &rxq->rspq);
1160 rxq++;
1161 }
1162 } else
1163 t4vf_intr_handler(adapter)(0, adapter);
1164}
1165#endif
1166
1167/*
1168 * Ethtool operations.
1169 * ===================
1170 *
1171 * Note that we don't support any ethtool operations which change the physical
1172 * state of the port to which we're linked.
1173 */
1174
1175/*
1176 * Return current port link settings.
1177 */
1178static int cxgb4vf_get_settings(struct net_device *dev,
1179 struct ethtool_cmd *cmd)
1180{
1181 const struct port_info *pi = netdev_priv(dev);
1182
1183 cmd->supported = pi->link_cfg.supported;
1184 cmd->advertising = pi->link_cfg.advertising;
1185 ethtool_cmd_speed_set(cmd,
1186 netif_carrier_ok(dev) ? pi->link_cfg.speed : -1);
1187 cmd->duplex = DUPLEX_FULL;
1188
1189 cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE;
1190 cmd->phy_address = pi->port_id;
1191 cmd->transceiver = XCVR_EXTERNAL;
1192 cmd->autoneg = pi->link_cfg.autoneg;
1193 cmd->maxtxpkt = 0;
1194 cmd->maxrxpkt = 0;
1195 return 0;
1196}
1197
1198/*
1199 * Return our driver information.
1200 */
1201static void cxgb4vf_get_drvinfo(struct net_device *dev,
1202 struct ethtool_drvinfo *drvinfo)
1203{
1204 struct adapter *adapter = netdev2adap(dev);
1205
1206 strcpy(drvinfo->driver, KBUILD_MODNAME);
1207 strcpy(drvinfo->version, DRV_VERSION);
1208 strcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)));
1209 snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1210 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1211 FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.fwrev),
1212 FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.fwrev),
1213 FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.fwrev),
1214 FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.fwrev),
1215 FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.tprev),
1216 FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.tprev),
1217 FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.tprev),
1218 FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.tprev));
1219}
1220
1221/*
1222 * Return current adapter message level.
1223 */
1224static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1225{
1226 return netdev2adap(dev)->msg_enable;
1227}
1228
1229/*
1230 * Set current adapter message level.
1231 */
1232static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1233{
1234 netdev2adap(dev)->msg_enable = msglevel;
1235}
1236
1237/*
1238 * Return the device's current Queue Set ring size parameters along with the
1239 * allowed maximum values. Since ethtool doesn't understand the concept of
1240 * multi-queue devices, we just return the current values associated with the
1241 * first Queue Set.
1242 */
1243static void cxgb4vf_get_ringparam(struct net_device *dev,
1244 struct ethtool_ringparam *rp)
1245{
1246 const struct port_info *pi = netdev_priv(dev);
1247 const struct sge *s = &pi->adapter->sge;
1248
1249 rp->rx_max_pending = MAX_RX_BUFFERS;
1250 rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1251 rp->rx_jumbo_max_pending = 0;
1252 rp->tx_max_pending = MAX_TXQ_ENTRIES;
1253
1254 rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1255 rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1256 rp->rx_jumbo_pending = 0;
1257 rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1258}
1259
1260/*
1261 * Set the Queue Set ring size parameters for the device. Again, since
1262 * ethtool doesn't allow for the concept of multiple queues per device, we'll
1263 * apply these new values across all of the Queue Sets associated with the
1264 * device -- after vetting them of course!
1265 */
1266static int cxgb4vf_set_ringparam(struct net_device *dev,
1267 struct ethtool_ringparam *rp)
1268{
1269 const struct port_info *pi = netdev_priv(dev);
1270 struct adapter *adapter = pi->adapter;
1271 struct sge *s = &adapter->sge;
1272 int qs;
1273
1274 if (rp->rx_pending > MAX_RX_BUFFERS ||
1275 rp->rx_jumbo_pending ||
1276 rp->tx_pending > MAX_TXQ_ENTRIES ||
1277 rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1278 rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1279 rp->rx_pending < MIN_FL_ENTRIES ||
1280 rp->tx_pending < MIN_TXQ_ENTRIES)
1281 return -EINVAL;
1282
1283 if (adapter->flags & FULL_INIT_DONE)
1284 return -EBUSY;
1285
1286 for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1287 s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1288 s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1289 s->ethtxq[qs].q.size = rp->tx_pending;
1290 }
1291 return 0;
1292}
1293
1294/*
1295 * Return the interrupt holdoff timer and count for the first Queue Set on the
1296 * device. Our extension ioctl() (the cxgbtool interface) allows the
1297 * interrupt holdoff timer to be read on all of the device's Queue Sets.
1298 */
1299static int cxgb4vf_get_coalesce(struct net_device *dev,
1300 struct ethtool_coalesce *coalesce)
1301{
1302 const struct port_info *pi = netdev_priv(dev);
1303 const struct adapter *adapter = pi->adapter;
1304 const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1305
1306 coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1307 coalesce->rx_max_coalesced_frames =
1308 ((rspq->intr_params & QINTR_CNT_EN)
1309 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1310 : 0);
1311 return 0;
1312}
1313
1314/*
1315 * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1316 * interface. Our extension ioctl() (the cxgbtool interface) allows us to set
1317 * the interrupt holdoff timer on any of the device's Queue Sets.
1318 */
1319static int cxgb4vf_set_coalesce(struct net_device *dev,
1320 struct ethtool_coalesce *coalesce)
1321{
1322 const struct port_info *pi = netdev_priv(dev);
1323 struct adapter *adapter = pi->adapter;
1324
1325 return set_rxq_intr_params(adapter,
1326 &adapter->sge.ethrxq[pi->first_qset].rspq,
1327 coalesce->rx_coalesce_usecs,
1328 coalesce->rx_max_coalesced_frames);
1329}
1330
1331/*
1332 * Report current port link pause parameter settings.
1333 */
1334static void cxgb4vf_get_pauseparam(struct net_device *dev,
1335 struct ethtool_pauseparam *pauseparam)
1336{
1337 struct port_info *pi = netdev_priv(dev);
1338
1339 pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1340 pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1341 pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1342}
1343
1344/*
1345 * Identify the port by blinking the port's LED.
1346 */
1347static int cxgb4vf_phys_id(struct net_device *dev,
1348 enum ethtool_phys_id_state state)
1349{
1350 unsigned int val;
1351 struct port_info *pi = netdev_priv(dev);
1352
1353 if (state == ETHTOOL_ID_ACTIVE)
1354 val = 0xffff;
1355 else if (state == ETHTOOL_ID_INACTIVE)
1356 val = 0;
1357 else
1358 return -EINVAL;
1359
1360 return t4vf_identify_port(pi->adapter, pi->viid, val);
1361}
1362
1363/*
1364 * Port stats maintained per queue of the port.
1365 */
1366struct queue_port_stats {
1367 u64 tso;
1368 u64 tx_csum;
1369 u64 rx_csum;
1370 u64 vlan_ex;
1371 u64 vlan_ins;
1372 u64 lro_pkts;
1373 u64 lro_merged;
1374};
1375
1376/*
1377 * Strings for the ETH_SS_STATS statistics set ("ethtool -S"). Note that
1378 * these need to match the order of statistics returned by
1379 * t4vf_get_port_stats().
1380 */
1381static const char stats_strings[][ETH_GSTRING_LEN] = {
1382 /*
1383 * These must match the layout of the t4vf_port_stats structure.
1384 */
1385 "TxBroadcastBytes ",
1386 "TxBroadcastFrames ",
1387 "TxMulticastBytes ",
1388 "TxMulticastFrames ",
1389 "TxUnicastBytes ",
1390 "TxUnicastFrames ",
1391 "TxDroppedFrames ",
1392 "TxOffloadBytes ",
1393 "TxOffloadFrames ",
1394 "RxBroadcastBytes ",
1395 "RxBroadcastFrames ",
1396 "RxMulticastBytes ",
1397 "RxMulticastFrames ",
1398 "RxUnicastBytes ",
1399 "RxUnicastFrames ",
1400 "RxErrorFrames ",
1401
1402 /*
1403 * These are accumulated per-queue statistics and must match the
1404 * order of the fields in the queue_port_stats structure.
1405 */
1406 "TSO ",
1407 "TxCsumOffload ",
1408 "RxCsumGood ",
1409 "VLANextractions ",
1410 "VLANinsertions ",
1411 "GROPackets ",
1412 "GROMerged ",
1413};
1414
1415/*
1416 * Return the number of statistics in the specified statistics set.
1417 */
1418static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1419{
1420 switch (sset) {
1421 case ETH_SS_STATS:
1422 return ARRAY_SIZE(stats_strings);
1423 default:
1424 return -EOPNOTSUPP;
1425 }
1426 /*NOTREACHED*/
1427}
1428
1429/*
1430 * Return the strings for the specified statistics set.
1431 */
1432static void cxgb4vf_get_strings(struct net_device *dev,
1433 u32 sset,
1434 u8 *data)
1435{
1436 switch (sset) {
1437 case ETH_SS_STATS:
1438 memcpy(data, stats_strings, sizeof(stats_strings));
1439 break;
1440 }
1441}
1442
1443/*
1444 * Small utility routine to accumulate queue statistics across the queues of
1445 * a "port".
1446 */
1447static void collect_sge_port_stats(const struct adapter *adapter,
1448 const struct port_info *pi,
1449 struct queue_port_stats *stats)
1450{
1451 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1452 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1453 int qs;
1454
1455 memset(stats, 0, sizeof(*stats));
1456 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1457 stats->tso += txq->tso;
1458 stats->tx_csum += txq->tx_cso;
1459 stats->rx_csum += rxq->stats.rx_cso;
1460 stats->vlan_ex += rxq->stats.vlan_ex;
1461 stats->vlan_ins += txq->vlan_ins;
1462 stats->lro_pkts += rxq->stats.lro_pkts;
1463 stats->lro_merged += rxq->stats.lro_merged;
1464 }
1465}
1466
1467/*
1468 * Return the ETH_SS_STATS statistics set.
1469 */
1470static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1471 struct ethtool_stats *stats,
1472 u64 *data)
1473{
1474 struct port_info *pi = netdev2pinfo(dev);
1475 struct adapter *adapter = pi->adapter;
1476 int err = t4vf_get_port_stats(adapter, pi->pidx,
1477 (struct t4vf_port_stats *)data);
1478 if (err)
1479 memset(data, 0, sizeof(struct t4vf_port_stats));
1480
1481 data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1482 collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1483}
1484
1485/*
1486 * Return the size of our register map.
1487 */
1488static int cxgb4vf_get_regs_len(struct net_device *dev)
1489{
1490 return T4VF_REGMAP_SIZE;
1491}
1492
1493/*
1494 * Dump a block of registers, start to end inclusive, into a buffer.
1495 */
1496static void reg_block_dump(struct adapter *adapter, void *regbuf,
1497 unsigned int start, unsigned int end)
1498{
1499 u32 *bp = regbuf + start - T4VF_REGMAP_START;
1500
1501 for ( ; start <= end; start += sizeof(u32)) {
1502 /*
1503 * Avoid reading the Mailbox Control register since that
1504 * can trigger a Mailbox Ownership Arbitration cycle and
1505 * interfere with communication with the firmware.
1506 */
1507 if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1508 *bp++ = 0xffff;
1509 else
1510 *bp++ = t4_read_reg(adapter, start);
1511 }
1512}
1513
1514/*
1515 * Copy our entire register map into the provided buffer.
1516 */
1517static void cxgb4vf_get_regs(struct net_device *dev,
1518 struct ethtool_regs *regs,
1519 void *regbuf)
1520{
1521 struct adapter *adapter = netdev2adap(dev);
1522
1523 regs->version = mk_adap_vers(adapter);
1524
1525 /*
1526 * Fill in register buffer with our register map.
1527 */
1528 memset(regbuf, 0, T4VF_REGMAP_SIZE);
1529
1530 reg_block_dump(adapter, regbuf,
1531 T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1532 T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1533 reg_block_dump(adapter, regbuf,
1534 T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1535 T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1536 reg_block_dump(adapter, regbuf,
1537 T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1538 T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_LAST);
1539 reg_block_dump(adapter, regbuf,
1540 T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1541 T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1542
1543 reg_block_dump(adapter, regbuf,
1544 T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1545 T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1546}
1547
1548/*
1549 * Report current Wake On LAN settings.
1550 */
1551static void cxgb4vf_get_wol(struct net_device *dev,
1552 struct ethtool_wolinfo *wol)
1553{
1554 wol->supported = 0;
1555 wol->wolopts = 0;
1556 memset(&wol->sopass, 0, sizeof(wol->sopass));
1557}
1558
1559/*
1560 * TCP Segmentation Offload flags which we support.
1561 */
1562#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1563
1564static struct ethtool_ops cxgb4vf_ethtool_ops = {
1565 .get_settings = cxgb4vf_get_settings,
1566 .get_drvinfo = cxgb4vf_get_drvinfo,
1567 .get_msglevel = cxgb4vf_get_msglevel,
1568 .set_msglevel = cxgb4vf_set_msglevel,
1569 .get_ringparam = cxgb4vf_get_ringparam,
1570 .set_ringparam = cxgb4vf_set_ringparam,
1571 .get_coalesce = cxgb4vf_get_coalesce,
1572 .set_coalesce = cxgb4vf_set_coalesce,
1573 .get_pauseparam = cxgb4vf_get_pauseparam,
1574 .get_link = ethtool_op_get_link,
1575 .get_strings = cxgb4vf_get_strings,
1576 .set_phys_id = cxgb4vf_phys_id,
1577 .get_sset_count = cxgb4vf_get_sset_count,
1578 .get_ethtool_stats = cxgb4vf_get_ethtool_stats,
1579 .get_regs_len = cxgb4vf_get_regs_len,
1580 .get_regs = cxgb4vf_get_regs,
1581 .get_wol = cxgb4vf_get_wol,
1582};
1583
1584/*
1585 * /sys/kernel/debug/cxgb4vf support code and data.
1586 * ================================================
1587 */
1588
1589/*
1590 * Show SGE Queue Set information. We display QPL Queues Sets per line.
1591 */
1592#define QPL 4
1593
1594static int sge_qinfo_show(struct seq_file *seq, void *v)
1595{
1596 struct adapter *adapter = seq->private;
1597 int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1598 int qs, r = (uintptr_t)v - 1;
1599
1600 if (r)
1601 seq_putc(seq, '\n');
1602
1603 #define S3(fmt_spec, s, v) \
1604 do {\
1605 seq_printf(seq, "%-12s", s); \
1606 for (qs = 0; qs < n; ++qs) \
1607 seq_printf(seq, " %16" fmt_spec, v); \
1608 seq_putc(seq, '\n'); \
1609 } while (0)
1610 #define S(s, v) S3("s", s, v)
1611 #define T(s, v) S3("u", s, txq[qs].v)
1612 #define R(s, v) S3("u", s, rxq[qs].v)
1613
1614 if (r < eth_entries) {
1615 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1616 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1617 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1618
1619 S("QType:", "Ethernet");
1620 S("Interface:",
1621 (rxq[qs].rspq.netdev
1622 ? rxq[qs].rspq.netdev->name
1623 : "N/A"));
1624 S3("d", "Port:",
1625 (rxq[qs].rspq.netdev
1626 ? ((struct port_info *)
1627 netdev_priv(rxq[qs].rspq.netdev))->port_id
1628 : -1));
1629 T("TxQ ID:", q.abs_id);
1630 T("TxQ size:", q.size);
1631 T("TxQ inuse:", q.in_use);
1632 T("TxQ PIdx:", q.pidx);
1633 T("TxQ CIdx:", q.cidx);
1634 R("RspQ ID:", rspq.abs_id);
1635 R("RspQ size:", rspq.size);
1636 R("RspQE size:", rspq.iqe_len);
1637 S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1638 S3("u", "Intr pktcnt:",
1639 adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1640 R("RspQ CIdx:", rspq.cidx);
1641 R("RspQ Gen:", rspq.gen);
1642 R("FL ID:", fl.abs_id);
1643 R("FL size:", fl.size - MIN_FL_RESID);
1644 R("FL avail:", fl.avail);
1645 R("FL PIdx:", fl.pidx);
1646 R("FL CIdx:", fl.cidx);
1647 return 0;
1648 }
1649
1650 r -= eth_entries;
1651 if (r == 0) {
1652 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1653
1654 seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1655 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1656 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1657 qtimer_val(adapter, evtq));
1658 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1659 adapter->sge.counter_val[evtq->pktcnt_idx]);
1660 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1661 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1662 } else if (r == 1) {
1663 const struct sge_rspq *intrq = &adapter->sge.intrq;
1664
1665 seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1666 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1667 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1668 qtimer_val(adapter, intrq));
1669 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1670 adapter->sge.counter_val[intrq->pktcnt_idx]);
1671 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1672 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1673 }
1674
1675 #undef R
1676 #undef T
1677 #undef S
1678 #undef S3
1679
1680 return 0;
1681}
1682
1683/*
1684 * Return the number of "entries" in our "file". We group the multi-Queue
1685 * sections with QPL Queue Sets per "entry". The sections of the output are:
1686 *
1687 * Ethernet RX/TX Queue Sets
1688 * Firmware Event Queue
1689 * Forwarded Interrupt Queue (if in MSI mode)
1690 */
1691static int sge_queue_entries(const struct adapter *adapter)
1692{
1693 return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1694 ((adapter->flags & USING_MSI) != 0);
1695}
1696
1697static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1698{
1699 int entries = sge_queue_entries(seq->private);
1700
1701 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1702}
1703
1704static void sge_queue_stop(struct seq_file *seq, void *v)
1705{
1706}
1707
1708static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
1709{
1710 int entries = sge_queue_entries(seq->private);
1711
1712 ++*pos;
1713 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1714}
1715
1716static const struct seq_operations sge_qinfo_seq_ops = {
1717 .start = sge_queue_start,
1718 .next = sge_queue_next,
1719 .stop = sge_queue_stop,
1720 .show = sge_qinfo_show
1721};
1722
1723static int sge_qinfo_open(struct inode *inode, struct file *file)
1724{
1725 int res = seq_open(file, &sge_qinfo_seq_ops);
1726
1727 if (!res) {
1728 struct seq_file *seq = file->private_data;
1729 seq->private = inode->i_private;
1730 }
1731 return res;
1732}
1733
1734static const struct file_operations sge_qinfo_debugfs_fops = {
1735 .owner = THIS_MODULE,
1736 .open = sge_qinfo_open,
1737 .read = seq_read,
1738 .llseek = seq_lseek,
1739 .release = seq_release,
1740};
1741
1742/*
1743 * Show SGE Queue Set statistics. We display QPL Queues Sets per line.
1744 */
1745#define QPL 4
1746
1747static int sge_qstats_show(struct seq_file *seq, void *v)
1748{
1749 struct adapter *adapter = seq->private;
1750 int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1751 int qs, r = (uintptr_t)v - 1;
1752
1753 if (r)
1754 seq_putc(seq, '\n');
1755
1756 #define S3(fmt, s, v) \
1757 do { \
1758 seq_printf(seq, "%-16s", s); \
1759 for (qs = 0; qs < n; ++qs) \
1760 seq_printf(seq, " %8" fmt, v); \
1761 seq_putc(seq, '\n'); \
1762 } while (0)
1763 #define S(s, v) S3("s", s, v)
1764
1765 #define T3(fmt, s, v) S3(fmt, s, txq[qs].v)
1766 #define T(s, v) T3("lu", s, v)
1767
1768 #define R3(fmt, s, v) S3(fmt, s, rxq[qs].v)
1769 #define R(s, v) R3("lu", s, v)
1770
1771 if (r < eth_entries) {
1772 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1773 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1774 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1775
1776 S("QType:", "Ethernet");
1777 S("Interface:",
1778 (rxq[qs].rspq.netdev
1779 ? rxq[qs].rspq.netdev->name
1780 : "N/A"));
1781 R3("u", "RspQNullInts:", rspq.unhandled_irqs);
1782 R("RxPackets:", stats.pkts);
1783 R("RxCSO:", stats.rx_cso);
1784 R("VLANxtract:", stats.vlan_ex);
1785 R("LROmerged:", stats.lro_merged);
1786 R("LROpackets:", stats.lro_pkts);
1787 R("RxDrops:", stats.rx_drops);
1788 T("TSO:", tso);
1789 T("TxCSO:", tx_cso);
1790 T("VLANins:", vlan_ins);
1791 T("TxQFull:", q.stops);
1792 T("TxQRestarts:", q.restarts);
1793 T("TxMapErr:", mapping_err);
1794 R("FLAllocErr:", fl.alloc_failed);
1795 R("FLLrgAlcErr:", fl.large_alloc_failed);
1796 R("FLStarving:", fl.starving);
1797 return 0;
1798 }
1799
1800 r -= eth_entries;
1801 if (r == 0) {
1802 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1803
1804 seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
1805 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1806 evtq->unhandled_irqs);
1807 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
1808 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
1809 } else if (r == 1) {
1810 const struct sge_rspq *intrq = &adapter->sge.intrq;
1811
1812 seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
1813 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1814 intrq->unhandled_irqs);
1815 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
1816 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
1817 }
1818
1819 #undef R
1820 #undef T
1821 #undef S
1822 #undef R3
1823 #undef T3
1824 #undef S3
1825
1826 return 0;
1827}
1828
1829/*
1830 * Return the number of "entries" in our "file". We group the multi-Queue
1831 * sections with QPL Queue Sets per "entry". The sections of the output are:
1832 *
1833 * Ethernet RX/TX Queue Sets
1834 * Firmware Event Queue
1835 * Forwarded Interrupt Queue (if in MSI mode)
1836 */
1837static int sge_qstats_entries(const struct adapter *adapter)
1838{
1839 return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1840 ((adapter->flags & USING_MSI) != 0);
1841}
1842
1843static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
1844{
1845 int entries = sge_qstats_entries(seq->private);
1846
1847 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1848}
1849
1850static void sge_qstats_stop(struct seq_file *seq, void *v)
1851{
1852}
1853
1854static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
1855{
1856 int entries = sge_qstats_entries(seq->private);
1857
1858 (*pos)++;
1859 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1860}
1861
1862static const struct seq_operations sge_qstats_seq_ops = {
1863 .start = sge_qstats_start,
1864 .next = sge_qstats_next,
1865 .stop = sge_qstats_stop,
1866 .show = sge_qstats_show
1867};
1868
1869static int sge_qstats_open(struct inode *inode, struct file *file)
1870{
1871 int res = seq_open(file, &sge_qstats_seq_ops);
1872
1873 if (res == 0) {
1874 struct seq_file *seq = file->private_data;
1875 seq->private = inode->i_private;
1876 }
1877 return res;
1878}
1879
1880static const struct file_operations sge_qstats_proc_fops = {
1881 .owner = THIS_MODULE,
1882 .open = sge_qstats_open,
1883 .read = seq_read,
1884 .llseek = seq_lseek,
1885 .release = seq_release,
1886};
1887
1888/*
1889 * Show PCI-E SR-IOV Virtual Function Resource Limits.
1890 */
1891static int resources_show(struct seq_file *seq, void *v)
1892{
1893 struct adapter *adapter = seq->private;
1894 struct vf_resources *vfres = &adapter->params.vfres;
1895
1896 #define S(desc, fmt, var) \
1897 seq_printf(seq, "%-60s " fmt "\n", \
1898 desc " (" #var "):", vfres->var)
1899
1900 S("Virtual Interfaces", "%d", nvi);
1901 S("Egress Queues", "%d", neq);
1902 S("Ethernet Control", "%d", nethctrl);
1903 S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
1904 S("Ingress Queues", "%d", niq);
1905 S("Traffic Class", "%d", tc);
1906 S("Port Access Rights Mask", "%#x", pmask);
1907 S("MAC Address Filters", "%d", nexactf);
1908 S("Firmware Command Read Capabilities", "%#x", r_caps);
1909 S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
1910
1911 #undef S
1912
1913 return 0;
1914}
1915
1916static int resources_open(struct inode *inode, struct file *file)
1917{
1918 return single_open(file, resources_show, inode->i_private);
1919}
1920
1921static const struct file_operations resources_proc_fops = {
1922 .owner = THIS_MODULE,
1923 .open = resources_open,
1924 .read = seq_read,
1925 .llseek = seq_lseek,
1926 .release = single_release,
1927};
1928
1929/*
1930 * Show Virtual Interfaces.
1931 */
1932static int interfaces_show(struct seq_file *seq, void *v)
1933{
1934 if (v == SEQ_START_TOKEN) {
1935 seq_puts(seq, "Interface Port VIID\n");
1936 } else {
1937 struct adapter *adapter = seq->private;
1938 int pidx = (uintptr_t)v - 2;
1939 struct net_device *dev = adapter->port[pidx];
1940 struct port_info *pi = netdev_priv(dev);
1941
1942 seq_printf(seq, "%9s %4d %#5x\n",
1943 dev->name, pi->port_id, pi->viid);
1944 }
1945 return 0;
1946}
1947
1948static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
1949{
1950 return pos <= adapter->params.nports
1951 ? (void *)(uintptr_t)(pos + 1)
1952 : NULL;
1953}
1954
1955static void *interfaces_start(struct seq_file *seq, loff_t *pos)
1956{
1957 return *pos
1958 ? interfaces_get_idx(seq->private, *pos)
1959 : SEQ_START_TOKEN;
1960}
1961
1962static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
1963{
1964 (*pos)++;
1965 return interfaces_get_idx(seq->private, *pos);
1966}
1967
1968static void interfaces_stop(struct seq_file *seq, void *v)
1969{
1970}
1971
1972static const struct seq_operations interfaces_seq_ops = {
1973 .start = interfaces_start,
1974 .next = interfaces_next,
1975 .stop = interfaces_stop,
1976 .show = interfaces_show
1977};
1978
1979static int interfaces_open(struct inode *inode, struct file *file)
1980{
1981 int res = seq_open(file, &interfaces_seq_ops);
1982
1983 if (res == 0) {
1984 struct seq_file *seq = file->private_data;
1985 seq->private = inode->i_private;
1986 }
1987 return res;
1988}
1989
1990static const struct file_operations interfaces_proc_fops = {
1991 .owner = THIS_MODULE,
1992 .open = interfaces_open,
1993 .read = seq_read,
1994 .llseek = seq_lseek,
1995 .release = seq_release,
1996};
1997
1998/*
1999 * /sys/kernel/debugfs/cxgb4vf/ files list.
2000 */
2001struct cxgb4vf_debugfs_entry {
2002 const char *name; /* name of debugfs node */
2003 mode_t mode; /* file system mode */
2004 const struct file_operations *fops;
2005};
2006
2007static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2008 { "sge_qinfo", S_IRUGO, &sge_qinfo_debugfs_fops },
2009 { "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2010 { "resources", S_IRUGO, &resources_proc_fops },
2011 { "interfaces", S_IRUGO, &interfaces_proc_fops },
2012};
2013
2014/*
2015 * Module and device initialization and cleanup code.
2016 * ==================================================
2017 */
2018
2019/*
2020 * Set up out /sys/kernel/debug/cxgb4vf sub-nodes. We assume that the
2021 * directory (debugfs_root) has already been set up.
2022 */
2023static int __devinit setup_debugfs(struct adapter *adapter)
2024{
2025 int i;
2026
2027 BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2028
2029 /*
2030 * Debugfs support is best effort.
2031 */
2032 for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2033 (void)debugfs_create_file(debugfs_files[i].name,
2034 debugfs_files[i].mode,
2035 adapter->debugfs_root,
2036 (void *)adapter,
2037 debugfs_files[i].fops);
2038
2039 return 0;
2040}
2041
2042/*
2043 * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above. We leave
2044 * it to our caller to tear down the directory (debugfs_root).
2045 */
2046static void cleanup_debugfs(struct adapter *adapter)
2047{
2048 BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2049
2050 /*
2051 * Unlike our sister routine cleanup_proc(), we don't need to remove
2052 * individual entries because a call will be made to
2053 * debugfs_remove_recursive(). We just need to clean up any ancillary
2054 * persistent state.
2055 */
2056 /* nothing to do */
2057}
2058
2059/*
2060 * Perform early "adapter" initialization. This is where we discover what
2061 * adapter parameters we're going to be using and initialize basic adapter
2062 * hardware support.
2063 */
2064static int __devinit adap_init0(struct adapter *adapter)
2065{
2066 struct vf_resources *vfres = &adapter->params.vfres;
2067 struct sge_params *sge_params = &adapter->params.sge;
2068 struct sge *s = &adapter->sge;
2069 unsigned int ethqsets;
2070 int err;
2071
2072 /*
2073 * Wait for the device to become ready before proceeding ...
2074 */
2075 err = t4vf_wait_dev_ready(adapter);
2076 if (err) {
2077 dev_err(adapter->pdev_dev, "device didn't become ready:"
2078 " err=%d\n", err);
2079 return err;
2080 }
2081
2082 /*
2083 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2084 * 2.6.31 and later we can't call pci_reset_function() in order to
2085 * issue an FLR because of a self- deadlock on the device semaphore.
2086 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2087 * cases where they're needed -- for instance, some versions of KVM
2088 * fail to reset "Assigned Devices" when the VM reboots. Therefore we
2089 * use the firmware based reset in order to reset any per function
2090 * state.
2091 */
2092 err = t4vf_fw_reset(adapter);
2093 if (err < 0) {
2094 dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2095 return err;
2096 }
2097
2098 /*
2099 * Grab basic operational parameters. These will predominantly have
2100 * been set up by the Physical Function Driver or will be hard coded
2101 * into the adapter. We just have to live with them ... Note that
2102 * we _must_ get our VPD parameters before our SGE parameters because
2103 * we need to know the adapter's core clock from the VPD in order to
2104 * properly decode the SGE Timer Values.
2105 */
2106 err = t4vf_get_dev_params(adapter);
2107 if (err) {
2108 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2109 " device parameters: err=%d\n", err);
2110 return err;
2111 }
2112 err = t4vf_get_vpd_params(adapter);
2113 if (err) {
2114 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2115 " VPD parameters: err=%d\n", err);
2116 return err;
2117 }
2118 err = t4vf_get_sge_params(adapter);
2119 if (err) {
2120 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2121 " SGE parameters: err=%d\n", err);
2122 return err;
2123 }
2124 err = t4vf_get_rss_glb_config(adapter);
2125 if (err) {
2126 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2127 " RSS parameters: err=%d\n", err);
2128 return err;
2129 }
2130 if (adapter->params.rss.mode !=
2131 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2132 dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2133 " mode %d\n", adapter->params.rss.mode);
2134 return -EINVAL;
2135 }
2136 err = t4vf_sge_init(adapter);
2137 if (err) {
2138 dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2139 " err=%d\n", err);
2140 return err;
2141 }
2142
2143 /*
2144 * Retrieve our RX interrupt holdoff timer values and counter
2145 * threshold values from the SGE parameters.
2146 */
2147 s->timer_val[0] = core_ticks_to_us(adapter,
2148 TIMERVALUE0_GET(sge_params->sge_timer_value_0_and_1));
2149 s->timer_val[1] = core_ticks_to_us(adapter,
2150 TIMERVALUE1_GET(sge_params->sge_timer_value_0_and_1));
2151 s->timer_val[2] = core_ticks_to_us(adapter,
2152 TIMERVALUE0_GET(sge_params->sge_timer_value_2_and_3));
2153 s->timer_val[3] = core_ticks_to_us(adapter,
2154 TIMERVALUE1_GET(sge_params->sge_timer_value_2_and_3));
2155 s->timer_val[4] = core_ticks_to_us(adapter,
2156 TIMERVALUE0_GET(sge_params->sge_timer_value_4_and_5));
2157 s->timer_val[5] = core_ticks_to_us(adapter,
2158 TIMERVALUE1_GET(sge_params->sge_timer_value_4_and_5));
2159
2160 s->counter_val[0] =
2161 THRESHOLD_0_GET(sge_params->sge_ingress_rx_threshold);
2162 s->counter_val[1] =
2163 THRESHOLD_1_GET(sge_params->sge_ingress_rx_threshold);
2164 s->counter_val[2] =
2165 THRESHOLD_2_GET(sge_params->sge_ingress_rx_threshold);
2166 s->counter_val[3] =
2167 THRESHOLD_3_GET(sge_params->sge_ingress_rx_threshold);
2168
2169 /*
2170 * Grab our Virtual Interface resource allocation, extract the
2171 * features that we're interested in and do a bit of sanity testing on
2172 * what we discover.
2173 */
2174 err = t4vf_get_vfres(adapter);
2175 if (err) {
2176 dev_err(adapter->pdev_dev, "unable to get virtual interface"
2177 " resources: err=%d\n", err);
2178 return err;
2179 }
2180
2181 /*
2182 * The number of "ports" which we support is equal to the number of
2183 * Virtual Interfaces with which we've been provisioned.
2184 */
2185 adapter->params.nports = vfres->nvi;
2186 if (adapter->params.nports > MAX_NPORTS) {
2187 dev_warn(adapter->pdev_dev, "only using %d of %d allowed"
2188 " virtual interfaces\n", MAX_NPORTS,
2189 adapter->params.nports);
2190 adapter->params.nports = MAX_NPORTS;
2191 }
2192
2193 /*
2194 * We need to reserve a number of the ingress queues with Free List
2195 * and Interrupt capabilities for special interrupt purposes (like
2196 * asynchronous firmware messages, or forwarded interrupts if we're
2197 * using MSI). The rest of the FL/Intr-capable ingress queues will be
2198 * matched up one-for-one with Ethernet/Control egress queues in order
2199 * to form "Queue Sets" which will be aportioned between the "ports".
2200 * For each Queue Set, we'll need the ability to allocate two Egress
2201 * Contexts -- one for the Ingress Queue Free List and one for the TX
2202 * Ethernet Queue.
2203 */
2204 ethqsets = vfres->niqflint - INGQ_EXTRAS;
2205 if (vfres->nethctrl != ethqsets) {
2206 dev_warn(adapter->pdev_dev, "unequal number of [available]"
2207 " ingress/egress queues (%d/%d); using minimum for"
2208 " number of Queue Sets\n", ethqsets, vfres->nethctrl);
2209 ethqsets = min(vfres->nethctrl, ethqsets);
2210 }
2211 if (vfres->neq < ethqsets*2) {
2212 dev_warn(adapter->pdev_dev, "Not enough Egress Contexts (%d)"
2213 " to support Queue Sets (%d); reducing allowed Queue"
2214 " Sets\n", vfres->neq, ethqsets);
2215 ethqsets = vfres->neq/2;
2216 }
2217 if (ethqsets > MAX_ETH_QSETS) {
2218 dev_warn(adapter->pdev_dev, "only using %d of %d allowed Queue"
2219 " Sets\n", MAX_ETH_QSETS, adapter->sge.max_ethqsets);
2220 ethqsets = MAX_ETH_QSETS;
2221 }
2222 if (vfres->niq != 0 || vfres->neq > ethqsets*2) {
2223 dev_warn(adapter->pdev_dev, "unused resources niq/neq (%d/%d)"
2224 " ignored\n", vfres->niq, vfres->neq - ethqsets*2);
2225 }
2226 adapter->sge.max_ethqsets = ethqsets;
2227
2228 /*
2229 * Check for various parameter sanity issues. Most checks simply
2230 * result in us using fewer resources than our provissioning but we
2231 * do need at least one "port" with which to work ...
2232 */
2233 if (adapter->sge.max_ethqsets < adapter->params.nports) {
2234 dev_warn(adapter->pdev_dev, "only using %d of %d available"
2235 " virtual interfaces (too few Queue Sets)\n",
2236 adapter->sge.max_ethqsets, adapter->params.nports);
2237 adapter->params.nports = adapter->sge.max_ethqsets;
2238 }
2239 if (adapter->params.nports == 0) {
2240 dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2241 "usable!\n");
2242 return -EINVAL;
2243 }
2244 return 0;
2245}
2246
2247static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2248 u8 pkt_cnt_idx, unsigned int size,
2249 unsigned int iqe_size)
2250{
2251 rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
2252 (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0));
2253 rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2254 ? pkt_cnt_idx
2255 : 0);
2256 rspq->iqe_len = iqe_size;
2257 rspq->size = size;
2258}
2259
2260/*
2261 * Perform default configuration of DMA queues depending on the number and
2262 * type of ports we found and the number of available CPUs. Most settings can
2263 * be modified by the admin via ethtool and cxgbtool prior to the adapter
2264 * being brought up for the first time.
2265 */
2266static void __devinit cfg_queues(struct adapter *adapter)
2267{
2268 struct sge *s = &adapter->sge;
2269 int q10g, n10g, qidx, pidx, qs;
2270 size_t iqe_size;
2271
2272 /*
2273 * We should not be called till we know how many Queue Sets we can
2274 * support. In particular, this means that we need to know what kind
2275 * of interrupts we'll be using ...
2276 */
2277 BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2278
2279 /*
2280 * Count the number of 10GbE Virtual Interfaces that we have.
2281 */
2282 n10g = 0;
2283 for_each_port(adapter, pidx)
2284 n10g += is_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2285
2286 /*
2287 * We default to 1 queue per non-10G port and up to # of cores queues
2288 * per 10G port.
2289 */
2290 if (n10g == 0)
2291 q10g = 0;
2292 else {
2293 int n1g = (adapter->params.nports - n10g);
2294 q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2295 if (q10g > num_online_cpus())
2296 q10g = num_online_cpus();
2297 }
2298
2299 /*
2300 * Allocate the "Queue Sets" to the various Virtual Interfaces.
2301 * The layout will be established in setup_sge_queues() when the
2302 * adapter is brough up for the first time.
2303 */
2304 qidx = 0;
2305 for_each_port(adapter, pidx) {
2306 struct port_info *pi = adap2pinfo(adapter, pidx);
2307
2308 pi->first_qset = qidx;
2309 pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1;
2310 qidx += pi->nqsets;
2311 }
2312 s->ethqsets = qidx;
2313
2314 /*
2315 * The Ingress Queue Entry Size for our various Response Queues needs
2316 * to be big enough to accommodate the largest message we can receive
2317 * from the chip/firmware; which is 64 bytes ...
2318 */
2319 iqe_size = 64;
2320
2321 /*
2322 * Set up default Queue Set parameters ... Start off with the
2323 * shortest interrupt holdoff timer.
2324 */
2325 for (qs = 0; qs < s->max_ethqsets; qs++) {
2326 struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2327 struct sge_eth_txq *txq = &s->ethtxq[qs];
2328
2329 init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2330 rxq->fl.size = 72;
2331 txq->q.size = 1024;
2332 }
2333
2334 /*
2335 * The firmware event queue is used for link state changes and
2336 * notifications of TX DMA completions.
2337 */
2338 init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2339
2340 /*
2341 * The forwarded interrupt queue is used when we're in MSI interrupt
2342 * mode. In this mode all interrupts associated with RX queues will
2343 * be forwarded to a single queue which we'll associate with our MSI
2344 * interrupt vector. The messages dropped in the forwarded interrupt
2345 * queue will indicate which ingress queue needs servicing ... This
2346 * queue needs to be large enough to accommodate all of the ingress
2347 * queues which are forwarding their interrupt (+1 to prevent the PIDX
2348 * from equalling the CIDX if every ingress queue has an outstanding
2349 * interrupt). The queue doesn't need to be any larger because no
2350 * ingress queue will ever have more than one outstanding interrupt at
2351 * any time ...
2352 */
2353 init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2354 iqe_size);
2355}
2356
2357/*
2358 * Reduce the number of Ethernet queues across all ports to at most n.
2359 * n provides at least one queue per port.
2360 */
2361static void __devinit reduce_ethqs(struct adapter *adapter, int n)
2362{
2363 int i;
2364 struct port_info *pi;
2365
2366 /*
2367 * While we have too many active Ether Queue Sets, interate across the
2368 * "ports" and reduce their individual Queue Set allocations.
2369 */
2370 BUG_ON(n < adapter->params.nports);
2371 while (n < adapter->sge.ethqsets)
2372 for_each_port(adapter, i) {
2373 pi = adap2pinfo(adapter, i);
2374 if (pi->nqsets > 1) {
2375 pi->nqsets--;
2376 adapter->sge.ethqsets--;
2377 if (adapter->sge.ethqsets <= n)
2378 break;
2379 }
2380 }
2381
2382 /*
2383 * Reassign the starting Queue Sets for each of the "ports" ...
2384 */
2385 n = 0;
2386 for_each_port(adapter, i) {
2387 pi = adap2pinfo(adapter, i);
2388 pi->first_qset = n;
2389 n += pi->nqsets;
2390 }
2391}
2392
2393/*
2394 * We need to grab enough MSI-X vectors to cover our interrupt needs. Ideally
2395 * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2396 * need. Minimally we need one for every Virtual Interface plus those needed
2397 * for our "extras". Note that this process may lower the maximum number of
2398 * allowed Queue Sets ...
2399 */
2400static int __devinit enable_msix(struct adapter *adapter)
2401{
2402 int i, err, want, need;
2403 struct msix_entry entries[MSIX_ENTRIES];
2404 struct sge *s = &adapter->sge;
2405
2406 for (i = 0; i < MSIX_ENTRIES; ++i)
2407 entries[i].entry = i;
2408
2409 /*
2410 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2411 * plus those needed for our "extras" (for example, the firmware
2412 * message queue). We _need_ at least one "Queue Set" per Virtual
2413 * Interface plus those needed for our "extras". So now we get to see
2414 * if the song is right ...
2415 */
2416 want = s->max_ethqsets + MSIX_EXTRAS;
2417 need = adapter->params.nports + MSIX_EXTRAS;
2418 while ((err = pci_enable_msix(adapter->pdev, entries, want)) >= need)
2419 want = err;
2420
2421 if (err == 0) {
2422 int nqsets = want - MSIX_EXTRAS;
2423 if (nqsets < s->max_ethqsets) {
2424 dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2425 " for %d Queue Sets\n", nqsets);
2426 s->max_ethqsets = nqsets;
2427 if (nqsets < s->ethqsets)
2428 reduce_ethqs(adapter, nqsets);
2429 }
2430 for (i = 0; i < want; ++i)
2431 adapter->msix_info[i].vec = entries[i].vector;
2432 } else if (err > 0) {
2433 pci_disable_msix(adapter->pdev);
2434 dev_info(adapter->pdev_dev, "only %d MSI-X vectors left,"
2435 " not using MSI-X\n", err);
2436 }
2437 return err;
2438}
2439
2440static const struct net_device_ops cxgb4vf_netdev_ops = {
2441 .ndo_open = cxgb4vf_open,
2442 .ndo_stop = cxgb4vf_stop,
2443 .ndo_start_xmit = t4vf_eth_xmit,
2444 .ndo_get_stats = cxgb4vf_get_stats,
2445 .ndo_set_rx_mode = cxgb4vf_set_rxmode,
2446 .ndo_set_mac_address = cxgb4vf_set_mac_addr,
2447 .ndo_validate_addr = eth_validate_addr,
2448 .ndo_do_ioctl = cxgb4vf_do_ioctl,
2449 .ndo_change_mtu = cxgb4vf_change_mtu,
2450 .ndo_fix_features = cxgb4vf_fix_features,
2451 .ndo_set_features = cxgb4vf_set_features,
2452#ifdef CONFIG_NET_POLL_CONTROLLER
2453 .ndo_poll_controller = cxgb4vf_poll_controller,
2454#endif
2455};
2456
2457/*
2458 * "Probe" a device: initialize a device and construct all kernel and driver
2459 * state needed to manage the device. This routine is called "init_one" in
2460 * the PF Driver ...
2461 */
2462static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev,
2463 const struct pci_device_id *ent)
2464{
2465 static int version_printed;
2466
2467 int pci_using_dac;
2468 int err, pidx;
2469 unsigned int pmask;
2470 struct adapter *adapter;
2471 struct port_info *pi;
2472 struct net_device *netdev;
2473
2474 /*
2475 * Print our driver banner the first time we're called to initialize a
2476 * device.
2477 */
2478 if (version_printed == 0) {
2479 printk(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
2480 version_printed = 1;
2481 }
2482
2483 /*
2484 * Initialize generic PCI device state.
2485 */
2486 err = pci_enable_device(pdev);
2487 if (err) {
2488 dev_err(&pdev->dev, "cannot enable PCI device\n");
2489 return err;
2490 }
2491
2492 /*
2493 * Reserve PCI resources for the device. If we can't get them some
2494 * other driver may have already claimed the device ...
2495 */
2496 err = pci_request_regions(pdev, KBUILD_MODNAME);
2497 if (err) {
2498 dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2499 goto err_disable_device;
2500 }
2501
2502 /*
2503 * Set up our DMA mask: try for 64-bit address masking first and
2504 * fall back to 32-bit if we can't get 64 bits ...
2505 */
2506 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2507 if (err == 0) {
2508 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2509 if (err) {
2510 dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2511 " coherent allocations\n");
2512 goto err_release_regions;
2513 }
2514 pci_using_dac = 1;
2515 } else {
2516 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2517 if (err != 0) {
2518 dev_err(&pdev->dev, "no usable DMA configuration\n");
2519 goto err_release_regions;
2520 }
2521 pci_using_dac = 0;
2522 }
2523
2524 /*
2525 * Enable bus mastering for the device ...
2526 */
2527 pci_set_master(pdev);
2528
2529 /*
2530 * Allocate our adapter data structure and attach it to the device.
2531 */
2532 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2533 if (!adapter) {
2534 err = -ENOMEM;
2535 goto err_release_regions;
2536 }
2537 pci_set_drvdata(pdev, adapter);
2538 adapter->pdev = pdev;
2539 adapter->pdev_dev = &pdev->dev;
2540
2541 /*
2542 * Initialize SMP data synchronization resources.
2543 */
2544 spin_lock_init(&adapter->stats_lock);
2545
2546 /*
2547 * Map our I/O registers in BAR0.
2548 */
2549 adapter->regs = pci_ioremap_bar(pdev, 0);
2550 if (!adapter->regs) {
2551 dev_err(&pdev->dev, "cannot map device registers\n");
2552 err = -ENOMEM;
2553 goto err_free_adapter;
2554 }
2555
2556 /*
2557 * Initialize adapter level features.
2558 */
2559 adapter->name = pci_name(pdev);
2560 adapter->msg_enable = dflt_msg_enable;
2561 err = adap_init0(adapter);
2562 if (err)
2563 goto err_unmap_bar;
2564
2565 /*
2566 * Allocate our "adapter ports" and stitch everything together.
2567 */
2568 pmask = adapter->params.vfres.pmask;
2569 for_each_port(adapter, pidx) {
2570 int port_id, viid;
2571
2572 /*
2573 * We simplistically allocate our virtual interfaces
2574 * sequentially across the port numbers to which we have
2575 * access rights. This should be configurable in some manner
2576 * ...
2577 */
2578 if (pmask == 0)
2579 break;
2580 port_id = ffs(pmask) - 1;
2581 pmask &= ~(1 << port_id);
2582 viid = t4vf_alloc_vi(adapter, port_id);
2583 if (viid < 0) {
2584 dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2585 " err=%d\n", port_id, viid);
2586 err = viid;
2587 goto err_free_dev;
2588 }
2589
2590 /*
2591 * Allocate our network device and stitch things together.
2592 */
2593 netdev = alloc_etherdev_mq(sizeof(struct port_info),
2594 MAX_PORT_QSETS);
2595 if (netdev == NULL) {
2596 dev_err(&pdev->dev, "cannot allocate netdev for"
2597 " port %d\n", port_id);
2598 t4vf_free_vi(adapter, viid);
2599 err = -ENOMEM;
2600 goto err_free_dev;
2601 }
2602 adapter->port[pidx] = netdev;
2603 SET_NETDEV_DEV(netdev, &pdev->dev);
2604 pi = netdev_priv(netdev);
2605 pi->adapter = adapter;
2606 pi->pidx = pidx;
2607 pi->port_id = port_id;
2608 pi->viid = viid;
2609
2610 /*
2611 * Initialize the starting state of our "port" and register
2612 * it.
2613 */
2614 pi->xact_addr_filt = -1;
2615 netif_carrier_off(netdev);
2616 netdev->irq = pdev->irq;
2617
2618 netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2619 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2620 NETIF_F_HW_VLAN_RX | NETIF_F_RXCSUM;
2621 netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2622 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2623 NETIF_F_HIGHDMA;
2624 netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_TX;
2625 if (pci_using_dac)
2626 netdev->features |= NETIF_F_HIGHDMA;
2627
2628 netdev->netdev_ops = &cxgb4vf_netdev_ops;
2629 SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops);
2630
2631 /*
2632 * Initialize the hardware/software state for the port.
2633 */
2634 err = t4vf_port_init(adapter, pidx);
2635 if (err) {
2636 dev_err(&pdev->dev, "cannot initialize port %d\n",
2637 pidx);
2638 goto err_free_dev;
2639 }
2640 }
2641
2642 /*
2643 * The "card" is now ready to go. If any errors occur during device
2644 * registration we do not fail the whole "card" but rather proceed
2645 * only with the ports we manage to register successfully. However we
2646 * must register at least one net device.
2647 */
2648 for_each_port(adapter, pidx) {
2649 netdev = adapter->port[pidx];
2650 if (netdev == NULL)
2651 continue;
2652
2653 err = register_netdev(netdev);
2654 if (err) {
2655 dev_warn(&pdev->dev, "cannot register net device %s,"
2656 " skipping\n", netdev->name);
2657 continue;
2658 }
2659
2660 set_bit(pidx, &adapter->registered_device_map);
2661 }
2662 if (adapter->registered_device_map == 0) {
2663 dev_err(&pdev->dev, "could not register any net devices\n");
2664 goto err_free_dev;
2665 }
2666
2667 /*
2668 * Set up our debugfs entries.
2669 */
2670 if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
2671 adapter->debugfs_root =
2672 debugfs_create_dir(pci_name(pdev),
2673 cxgb4vf_debugfs_root);
2674 if (IS_ERR_OR_NULL(adapter->debugfs_root))
2675 dev_warn(&pdev->dev, "could not create debugfs"
2676 " directory");
2677 else
2678 setup_debugfs(adapter);
2679 }
2680
2681 /*
2682 * See what interrupts we'll be using. If we've been configured to
2683 * use MSI-X interrupts, try to enable them but fall back to using
2684 * MSI interrupts if we can't enable MSI-X interrupts. If we can't
2685 * get MSI interrupts we bail with the error.
2686 */
2687 if (msi == MSI_MSIX && enable_msix(adapter) == 0)
2688 adapter->flags |= USING_MSIX;
2689 else {
2690 err = pci_enable_msi(pdev);
2691 if (err) {
2692 dev_err(&pdev->dev, "Unable to allocate %s interrupts;"
2693 " err=%d\n",
2694 msi == MSI_MSIX ? "MSI-X or MSI" : "MSI", err);
2695 goto err_free_debugfs;
2696 }
2697 adapter->flags |= USING_MSI;
2698 }
2699
2700 /*
2701 * Now that we know how many "ports" we have and what their types are,
2702 * and how many Queue Sets we can support, we can configure our queue
2703 * resources.
2704 */
2705 cfg_queues(adapter);
2706
2707 /*
2708 * Print a short notice on the existence and configuration of the new
2709 * VF network device ...
2710 */
2711 for_each_port(adapter, pidx) {
2712 dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
2713 adapter->port[pidx]->name,
2714 (adapter->flags & USING_MSIX) ? "MSI-X" :
2715 (adapter->flags & USING_MSI) ? "MSI" : "");
2716 }
2717
2718 /*
2719 * Return success!
2720 */
2721 return 0;
2722
2723 /*
2724 * Error recovery and exit code. Unwind state that's been created
2725 * so far and return the error.
2726 */
2727
2728err_free_debugfs:
2729 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2730 cleanup_debugfs(adapter);
2731 debugfs_remove_recursive(adapter->debugfs_root);
2732 }
2733
2734err_free_dev:
2735 for_each_port(adapter, pidx) {
2736 netdev = adapter->port[pidx];
2737 if (netdev == NULL)
2738 continue;
2739 pi = netdev_priv(netdev);
2740 t4vf_free_vi(adapter, pi->viid);
2741 if (test_bit(pidx, &adapter->registered_device_map))
2742 unregister_netdev(netdev);
2743 free_netdev(netdev);
2744 }
2745
2746err_unmap_bar:
2747 iounmap(adapter->regs);
2748
2749err_free_adapter:
2750 kfree(adapter);
2751 pci_set_drvdata(pdev, NULL);
2752
2753err_release_regions:
2754 pci_release_regions(pdev);
2755 pci_set_drvdata(pdev, NULL);
2756 pci_clear_master(pdev);
2757
2758err_disable_device:
2759 pci_disable_device(pdev);
2760
2761 return err;
2762}
2763
2764/*
2765 * "Remove" a device: tear down all kernel and driver state created in the
2766 * "probe" routine and quiesce the device (disable interrupts, etc.). (Note
2767 * that this is called "remove_one" in the PF Driver.)
2768 */
2769static void __devexit cxgb4vf_pci_remove(struct pci_dev *pdev)
2770{
2771 struct adapter *adapter = pci_get_drvdata(pdev);
2772
2773 /*
2774 * Tear down driver state associated with device.
2775 */
2776 if (adapter) {
2777 int pidx;
2778
2779 /*
2780 * Stop all of our activity. Unregister network port,
2781 * disable interrupts, etc.
2782 */
2783 for_each_port(adapter, pidx)
2784 if (test_bit(pidx, &adapter->registered_device_map))
2785 unregister_netdev(adapter->port[pidx]);
2786 t4vf_sge_stop(adapter);
2787 if (adapter->flags & USING_MSIX) {
2788 pci_disable_msix(adapter->pdev);
2789 adapter->flags &= ~USING_MSIX;
2790 } else if (adapter->flags & USING_MSI) {
2791 pci_disable_msi(adapter->pdev);
2792 adapter->flags &= ~USING_MSI;
2793 }
2794
2795 /*
2796 * Tear down our debugfs entries.
2797 */
2798 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2799 cleanup_debugfs(adapter);
2800 debugfs_remove_recursive(adapter->debugfs_root);
2801 }
2802
2803 /*
2804 * Free all of the various resources which we've acquired ...
2805 */
2806 t4vf_free_sge_resources(adapter);
2807 for_each_port(adapter, pidx) {
2808 struct net_device *netdev = adapter->port[pidx];
2809 struct port_info *pi;
2810
2811 if (netdev == NULL)
2812 continue;
2813
2814 pi = netdev_priv(netdev);
2815 t4vf_free_vi(adapter, pi->viid);
2816 free_netdev(netdev);
2817 }
2818 iounmap(adapter->regs);
2819 kfree(adapter);
2820 pci_set_drvdata(pdev, NULL);
2821 }
2822
2823 /*
2824 * Disable the device and release its PCI resources.
2825 */
2826 pci_disable_device(pdev);
2827 pci_clear_master(pdev);
2828 pci_release_regions(pdev);
2829}
2830
2831/*
2832 * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
2833 * delivery.
2834 */
2835static void __devexit cxgb4vf_pci_shutdown(struct pci_dev *pdev)
2836{
2837 struct adapter *adapter;
2838 int pidx;
2839
2840 adapter = pci_get_drvdata(pdev);
2841 if (!adapter)
2842 return;
2843
2844 /*
2845 * Disable all Virtual Interfaces. This will shut down the
2846 * delivery of all ingress packets into the chip for these
2847 * Virtual Interfaces.
2848 */
2849 for_each_port(adapter, pidx) {
2850 struct net_device *netdev;
2851 struct port_info *pi;
2852
2853 if (!test_bit(pidx, &adapter->registered_device_map))
2854 continue;
2855
2856 netdev = adapter->port[pidx];
2857 if (!netdev)
2858 continue;
2859
2860 pi = netdev_priv(netdev);
2861 t4vf_enable_vi(adapter, pi->viid, false, false);
2862 }
2863
2864 /*
2865 * Free up all Queues which will prevent further DMA and
2866 * Interrupts allowing various internal pathways to drain.
2867 */
2868 t4vf_free_sge_resources(adapter);
2869}
2870
2871/*
2872 * PCI Device registration data structures.
2873 */
2874#define CH_DEVICE(devid, idx) \
2875 { PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, PCI_ANY_ID, 0, 0, idx }
2876
2877static struct pci_device_id cxgb4vf_pci_tbl[] = {
2878 CH_DEVICE(0xb000, 0), /* PE10K FPGA */
2879 CH_DEVICE(0x4800, 0), /* T440-dbg */
2880 CH_DEVICE(0x4801, 0), /* T420-cr */
2881 CH_DEVICE(0x4802, 0), /* T422-cr */
2882 CH_DEVICE(0x4803, 0), /* T440-cr */
2883 CH_DEVICE(0x4804, 0), /* T420-bch */
2884 CH_DEVICE(0x4805, 0), /* T440-bch */
2885 CH_DEVICE(0x4806, 0), /* T460-ch */
2886 CH_DEVICE(0x4807, 0), /* T420-so */
2887 CH_DEVICE(0x4808, 0), /* T420-cx */
2888 CH_DEVICE(0x4809, 0), /* T420-bt */
2889 CH_DEVICE(0x480a, 0), /* T404-bt */
2890 { 0, }
2891};
2892
2893MODULE_DESCRIPTION(DRV_DESC);
2894MODULE_AUTHOR("Chelsio Communications");
2895MODULE_LICENSE("Dual BSD/GPL");
2896MODULE_VERSION(DRV_VERSION);
2897MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
2898
2899static struct pci_driver cxgb4vf_driver = {
2900 .name = KBUILD_MODNAME,
2901 .id_table = cxgb4vf_pci_tbl,
2902 .probe = cxgb4vf_pci_probe,
2903 .remove = __devexit_p(cxgb4vf_pci_remove),
2904 .shutdown = __devexit_p(cxgb4vf_pci_shutdown),
2905};
2906
2907/*
2908 * Initialize global driver state.
2909 */
2910static int __init cxgb4vf_module_init(void)
2911{
2912 int ret;
2913
2914 /*
2915 * Vet our module parameters.
2916 */
2917 if (msi != MSI_MSIX && msi != MSI_MSI) {
2918 printk(KERN_WARNING KBUILD_MODNAME
2919 ": bad module parameter msi=%d; must be %d"
2920 " (MSI-X or MSI) or %d (MSI)\n",
2921 msi, MSI_MSIX, MSI_MSI);
2922 return -EINVAL;
2923 }
2924
2925 /* Debugfs support is optional, just warn if this fails */
2926 cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
2927 if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2928 printk(KERN_WARNING KBUILD_MODNAME ": could not create"
2929 " debugfs entry, continuing\n");
2930
2931 ret = pci_register_driver(&cxgb4vf_driver);
2932 if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2933 debugfs_remove(cxgb4vf_debugfs_root);
2934 return ret;
2935}
2936
2937/*
2938 * Tear down global driver state.
2939 */
2940static void __exit cxgb4vf_module_exit(void)
2941{
2942 pci_unregister_driver(&cxgb4vf_driver);
2943 debugfs_remove(cxgb4vf_debugfs_root);
2944}
2945
2946module_init(cxgb4vf_module_init);
2947module_exit(cxgb4vf_module_exit);
diff --git a/drivers/net/cxgb4vf/sge.c b/drivers/net/cxgb4vf/sge.c
new file mode 100644
index 00000000000..cffb328c46c
--- /dev/null
+++ b/drivers/net/cxgb4vf/sge.c
@@ -0,0 +1,2465 @@
1/*
2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3 * driver for Linux.
4 *
5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/skbuff.h>
37#include <linux/netdevice.h>
38#include <linux/etherdevice.h>
39#include <linux/if_vlan.h>
40#include <linux/ip.h>
41#include <net/ipv6.h>
42#include <net/tcp.h>
43#include <linux/dma-mapping.h>
44#include <linux/prefetch.h>
45
46#include "t4vf_common.h"
47#include "t4vf_defs.h"
48
49#include "../cxgb4/t4_regs.h"
50#include "../cxgb4/t4fw_api.h"
51#include "../cxgb4/t4_msg.h"
52
53/*
54 * Decoded Adapter Parameters.
55 */
56static u32 FL_PG_ORDER; /* large page allocation size */
57static u32 STAT_LEN; /* length of status page at ring end */
58static u32 PKTSHIFT; /* padding between CPL and packet data */
59static u32 FL_ALIGN; /* response queue message alignment */
60
61/*
62 * Constants ...
63 */
64enum {
65 /*
66 * Egress Queue sizes, producer and consumer indices are all in units
67 * of Egress Context Units bytes. Note that as far as the hardware is
68 * concerned, the free list is an Egress Queue (the host produces free
69 * buffers which the hardware consumes) and free list entries are
70 * 64-bit PCI DMA addresses.
71 */
72 EQ_UNIT = SGE_EQ_IDXSIZE,
73 FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
74 TXD_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
75
76 /*
77 * Max number of TX descriptors we clean up at a time. Should be
78 * modest as freeing skbs isn't cheap and it happens while holding
79 * locks. We just need to free packets faster than they arrive, we
80 * eventually catch up and keep the amortized cost reasonable.
81 */
82 MAX_TX_RECLAIM = 16,
83
84 /*
85 * Max number of Rx buffers we replenish at a time. Again keep this
86 * modest, allocating buffers isn't cheap either.
87 */
88 MAX_RX_REFILL = 16,
89
90 /*
91 * Period of the Rx queue check timer. This timer is infrequent as it
92 * has something to do only when the system experiences severe memory
93 * shortage.
94 */
95 RX_QCHECK_PERIOD = (HZ / 2),
96
97 /*
98 * Period of the TX queue check timer and the maximum number of TX
99 * descriptors to be reclaimed by the TX timer.
100 */
101 TX_QCHECK_PERIOD = (HZ / 2),
102 MAX_TIMER_TX_RECLAIM = 100,
103
104 /*
105 * An FL with <= FL_STARVE_THRES buffers is starving and a periodic
106 * timer will attempt to refill it.
107 */
108 FL_STARVE_THRES = 4,
109
110 /*
111 * Suspend an Ethernet TX queue with fewer available descriptors than
112 * this. We always want to have room for a maximum sized packet:
113 * inline immediate data + MAX_SKB_FRAGS. This is the same as
114 * calc_tx_flits() for a TSO packet with nr_frags == MAX_SKB_FRAGS
115 * (see that function and its helpers for a description of the
116 * calculation).
117 */
118 ETHTXQ_MAX_FRAGS = MAX_SKB_FRAGS + 1,
119 ETHTXQ_MAX_SGL_LEN = ((3 * (ETHTXQ_MAX_FRAGS-1))/2 +
120 ((ETHTXQ_MAX_FRAGS-1) & 1) +
121 2),
122 ETHTXQ_MAX_HDR = (sizeof(struct fw_eth_tx_pkt_vm_wr) +
123 sizeof(struct cpl_tx_pkt_lso_core) +
124 sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64),
125 ETHTXQ_MAX_FLITS = ETHTXQ_MAX_SGL_LEN + ETHTXQ_MAX_HDR,
126
127 ETHTXQ_STOP_THRES = 1 + DIV_ROUND_UP(ETHTXQ_MAX_FLITS, TXD_PER_EQ_UNIT),
128
129 /*
130 * Max TX descriptor space we allow for an Ethernet packet to be
131 * inlined into a WR. This is limited by the maximum value which
132 * we can specify for immediate data in the firmware Ethernet TX
133 * Work Request.
134 */
135 MAX_IMM_TX_PKT_LEN = FW_WR_IMMDLEN_MASK,
136
137 /*
138 * Max size of a WR sent through a control TX queue.
139 */
140 MAX_CTRL_WR_LEN = 256,
141
142 /*
143 * Maximum amount of data which we'll ever need to inline into a
144 * TX ring: max(MAX_IMM_TX_PKT_LEN, MAX_CTRL_WR_LEN).
145 */
146 MAX_IMM_TX_LEN = (MAX_IMM_TX_PKT_LEN > MAX_CTRL_WR_LEN
147 ? MAX_IMM_TX_PKT_LEN
148 : MAX_CTRL_WR_LEN),
149
150 /*
151 * For incoming packets less than RX_COPY_THRES, we copy the data into
152 * an skb rather than referencing the data. We allocate enough
153 * in-line room in skb's to accommodate pulling in RX_PULL_LEN bytes
154 * of the data (header).
155 */
156 RX_COPY_THRES = 256,
157 RX_PULL_LEN = 128,
158
159 /*
160 * Main body length for sk_buffs used for RX Ethernet packets with
161 * fragments. Should be >= RX_PULL_LEN but possibly bigger to give
162 * pskb_may_pull() some room.
163 */
164 RX_SKB_LEN = 512,
165};
166
167/*
168 * Software state per TX descriptor.
169 */
170struct tx_sw_desc {
171 struct sk_buff *skb; /* socket buffer of TX data source */
172 struct ulptx_sgl *sgl; /* scatter/gather list in TX Queue */
173};
174
175/*
176 * Software state per RX Free List descriptor. We keep track of the allocated
177 * FL page, its size, and its PCI DMA address (if the page is mapped). The FL
178 * page size and its PCI DMA mapped state are stored in the low bits of the
179 * PCI DMA address as per below.
180 */
181struct rx_sw_desc {
182 struct page *page; /* Free List page buffer */
183 dma_addr_t dma_addr; /* PCI DMA address (if mapped) */
184 /* and flags (see below) */
185};
186
187/*
188 * The low bits of rx_sw_desc.dma_addr have special meaning. Note that the
189 * SGE also uses the low 4 bits to determine the size of the buffer. It uses
190 * those bits to index into the SGE_FL_BUFFER_SIZE[index] register array.
191 * Since we only use SGE_FL_BUFFER_SIZE0 and SGE_FL_BUFFER_SIZE1, these low 4
192 * bits can only contain a 0 or a 1 to indicate which size buffer we're giving
193 * to the SGE. Thus, our software state of "is the buffer mapped for DMA" is
194 * maintained in an inverse sense so the hardware never sees that bit high.
195 */
196enum {
197 RX_LARGE_BUF = 1 << 0, /* buffer is SGE_FL_BUFFER_SIZE[1] */
198 RX_UNMAPPED_BUF = 1 << 1, /* buffer is not mapped */
199};
200
201/**
202 * get_buf_addr - return DMA buffer address of software descriptor
203 * @sdesc: pointer to the software buffer descriptor
204 *
205 * Return the DMA buffer address of a software descriptor (stripping out
206 * our low-order flag bits).
207 */
208static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *sdesc)
209{
210 return sdesc->dma_addr & ~(dma_addr_t)(RX_LARGE_BUF | RX_UNMAPPED_BUF);
211}
212
213/**
214 * is_buf_mapped - is buffer mapped for DMA?
215 * @sdesc: pointer to the software buffer descriptor
216 *
217 * Determine whether the buffer associated with a software descriptor in
218 * mapped for DMA or not.
219 */
220static inline bool is_buf_mapped(const struct rx_sw_desc *sdesc)
221{
222 return !(sdesc->dma_addr & RX_UNMAPPED_BUF);
223}
224
225/**
226 * need_skb_unmap - does the platform need unmapping of sk_buffs?
227 *
228 * Returns true if the platform needs sk_buff unmapping. The compiler
229 * optimizes away unnecessary code if this returns true.
230 */
231static inline int need_skb_unmap(void)
232{
233#ifdef CONFIG_NEED_DMA_MAP_STATE
234 return 1;
235#else
236 return 0;
237#endif
238}
239
240/**
241 * txq_avail - return the number of available slots in a TX queue
242 * @tq: the TX queue
243 *
244 * Returns the number of available descriptors in a TX queue.
245 */
246static inline unsigned int txq_avail(const struct sge_txq *tq)
247{
248 return tq->size - 1 - tq->in_use;
249}
250
251/**
252 * fl_cap - return the capacity of a Free List
253 * @fl: the Free List
254 *
255 * Returns the capacity of a Free List. The capacity is less than the
256 * size because an Egress Queue Index Unit worth of descriptors needs to
257 * be left unpopulated, otherwise the Producer and Consumer indices PIDX
258 * and CIDX will match and the hardware will think the FL is empty.
259 */
260static inline unsigned int fl_cap(const struct sge_fl *fl)
261{
262 return fl->size - FL_PER_EQ_UNIT;
263}
264
265/**
266 * fl_starving - return whether a Free List is starving.
267 * @fl: the Free List
268 *
269 * Tests specified Free List to see whether the number of buffers
270 * available to the hardware has falled below our "starvation"
271 * threshold.
272 */
273static inline bool fl_starving(const struct sge_fl *fl)
274{
275 return fl->avail - fl->pend_cred <= FL_STARVE_THRES;
276}
277
278/**
279 * map_skb - map an skb for DMA to the device
280 * @dev: the egress net device
281 * @skb: the packet to map
282 * @addr: a pointer to the base of the DMA mapping array
283 *
284 * Map an skb for DMA to the device and return an array of DMA addresses.
285 */
286static int map_skb(struct device *dev, const struct sk_buff *skb,
287 dma_addr_t *addr)
288{
289 const skb_frag_t *fp, *end;
290 const struct skb_shared_info *si;
291
292 *addr = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
293 if (dma_mapping_error(dev, *addr))
294 goto out_err;
295
296 si = skb_shinfo(skb);
297 end = &si->frags[si->nr_frags];
298 for (fp = si->frags; fp < end; fp++) {
299 *++addr = dma_map_page(dev, fp->page, fp->page_offset, fp->size,
300 DMA_TO_DEVICE);
301 if (dma_mapping_error(dev, *addr))
302 goto unwind;
303 }
304 return 0;
305
306unwind:
307 while (fp-- > si->frags)
308 dma_unmap_page(dev, *--addr, fp->size, DMA_TO_DEVICE);
309 dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE);
310
311out_err:
312 return -ENOMEM;
313}
314
315static void unmap_sgl(struct device *dev, const struct sk_buff *skb,
316 const struct ulptx_sgl *sgl, const struct sge_txq *tq)
317{
318 const struct ulptx_sge_pair *p;
319 unsigned int nfrags = skb_shinfo(skb)->nr_frags;
320
321 if (likely(skb_headlen(skb)))
322 dma_unmap_single(dev, be64_to_cpu(sgl->addr0),
323 be32_to_cpu(sgl->len0), DMA_TO_DEVICE);
324 else {
325 dma_unmap_page(dev, be64_to_cpu(sgl->addr0),
326 be32_to_cpu(sgl->len0), DMA_TO_DEVICE);
327 nfrags--;
328 }
329
330 /*
331 * the complexity below is because of the possibility of a wrap-around
332 * in the middle of an SGL
333 */
334 for (p = sgl->sge; nfrags >= 2; nfrags -= 2) {
335 if (likely((u8 *)(p + 1) <= (u8 *)tq->stat)) {
336unmap:
337 dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
338 be32_to_cpu(p->len[0]), DMA_TO_DEVICE);
339 dma_unmap_page(dev, be64_to_cpu(p->addr[1]),
340 be32_to_cpu(p->len[1]), DMA_TO_DEVICE);
341 p++;
342 } else if ((u8 *)p == (u8 *)tq->stat) {
343 p = (const struct ulptx_sge_pair *)tq->desc;
344 goto unmap;
345 } else if ((u8 *)p + 8 == (u8 *)tq->stat) {
346 const __be64 *addr = (const __be64 *)tq->desc;
347
348 dma_unmap_page(dev, be64_to_cpu(addr[0]),
349 be32_to_cpu(p->len[0]), DMA_TO_DEVICE);
350 dma_unmap_page(dev, be64_to_cpu(addr[1]),
351 be32_to_cpu(p->len[1]), DMA_TO_DEVICE);
352 p = (const struct ulptx_sge_pair *)&addr[2];
353 } else {
354 const __be64 *addr = (const __be64 *)tq->desc;
355
356 dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
357 be32_to_cpu(p->len[0]), DMA_TO_DEVICE);
358 dma_unmap_page(dev, be64_to_cpu(addr[0]),
359 be32_to_cpu(p->len[1]), DMA_TO_DEVICE);
360 p = (const struct ulptx_sge_pair *)&addr[1];
361 }
362 }
363 if (nfrags) {
364 __be64 addr;
365
366 if ((u8 *)p == (u8 *)tq->stat)
367 p = (const struct ulptx_sge_pair *)tq->desc;
368 addr = ((u8 *)p + 16 <= (u8 *)tq->stat
369 ? p->addr[0]
370 : *(const __be64 *)tq->desc);
371 dma_unmap_page(dev, be64_to_cpu(addr), be32_to_cpu(p->len[0]),
372 DMA_TO_DEVICE);
373 }
374}
375
376/**
377 * free_tx_desc - reclaims TX descriptors and their buffers
378 * @adapter: the adapter
379 * @tq: the TX queue to reclaim descriptors from
380 * @n: the number of descriptors to reclaim
381 * @unmap: whether the buffers should be unmapped for DMA
382 *
383 * Reclaims TX descriptors from an SGE TX queue and frees the associated
384 * TX buffers. Called with the TX queue lock held.
385 */
386static void free_tx_desc(struct adapter *adapter, struct sge_txq *tq,
387 unsigned int n, bool unmap)
388{
389 struct tx_sw_desc *sdesc;
390 unsigned int cidx = tq->cidx;
391 struct device *dev = adapter->pdev_dev;
392
393 const int need_unmap = need_skb_unmap() && unmap;
394
395 sdesc = &tq->sdesc[cidx];
396 while (n--) {
397 /*
398 * If we kept a reference to the original TX skb, we need to
399 * unmap it from PCI DMA space (if required) and free it.
400 */
401 if (sdesc->skb) {
402 if (need_unmap)
403 unmap_sgl(dev, sdesc->skb, sdesc->sgl, tq);
404 kfree_skb(sdesc->skb);
405 sdesc->skb = NULL;
406 }
407
408 sdesc++;
409 if (++cidx == tq->size) {
410 cidx = 0;
411 sdesc = tq->sdesc;
412 }
413 }
414 tq->cidx = cidx;
415}
416
417/*
418 * Return the number of reclaimable descriptors in a TX queue.
419 */
420static inline int reclaimable(const struct sge_txq *tq)
421{
422 int hw_cidx = be16_to_cpu(tq->stat->cidx);
423 int reclaimable = hw_cidx - tq->cidx;
424 if (reclaimable < 0)
425 reclaimable += tq->size;
426 return reclaimable;
427}
428
429/**
430 * reclaim_completed_tx - reclaims completed TX descriptors
431 * @adapter: the adapter
432 * @tq: the TX queue to reclaim completed descriptors from
433 * @unmap: whether the buffers should be unmapped for DMA
434 *
435 * Reclaims TX descriptors that the SGE has indicated it has processed,
436 * and frees the associated buffers if possible. Called with the TX
437 * queue locked.
438 */
439static inline void reclaim_completed_tx(struct adapter *adapter,
440 struct sge_txq *tq,
441 bool unmap)
442{
443 int avail = reclaimable(tq);
444
445 if (avail) {
446 /*
447 * Limit the amount of clean up work we do at a time to keep
448 * the TX lock hold time O(1).
449 */
450 if (avail > MAX_TX_RECLAIM)
451 avail = MAX_TX_RECLAIM;
452
453 free_tx_desc(adapter, tq, avail, unmap);
454 tq->in_use -= avail;
455 }
456}
457
458/**
459 * get_buf_size - return the size of an RX Free List buffer.
460 * @sdesc: pointer to the software buffer descriptor
461 */
462static inline int get_buf_size(const struct rx_sw_desc *sdesc)
463{
464 return FL_PG_ORDER > 0 && (sdesc->dma_addr & RX_LARGE_BUF)
465 ? (PAGE_SIZE << FL_PG_ORDER)
466 : PAGE_SIZE;
467}
468
469/**
470 * free_rx_bufs - free RX buffers on an SGE Free List
471 * @adapter: the adapter
472 * @fl: the SGE Free List to free buffers from
473 * @n: how many buffers to free
474 *
475 * Release the next @n buffers on an SGE Free List RX queue. The
476 * buffers must be made inaccessible to hardware before calling this
477 * function.
478 */
479static void free_rx_bufs(struct adapter *adapter, struct sge_fl *fl, int n)
480{
481 while (n--) {
482 struct rx_sw_desc *sdesc = &fl->sdesc[fl->cidx];
483
484 if (is_buf_mapped(sdesc))
485 dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc),
486 get_buf_size(sdesc), PCI_DMA_FROMDEVICE);
487 put_page(sdesc->page);
488 sdesc->page = NULL;
489 if (++fl->cidx == fl->size)
490 fl->cidx = 0;
491 fl->avail--;
492 }
493}
494
495/**
496 * unmap_rx_buf - unmap the current RX buffer on an SGE Free List
497 * @adapter: the adapter
498 * @fl: the SGE Free List
499 *
500 * Unmap the current buffer on an SGE Free List RX queue. The
501 * buffer must be made inaccessible to HW before calling this function.
502 *
503 * This is similar to @free_rx_bufs above but does not free the buffer.
504 * Do note that the FL still loses any further access to the buffer.
505 * This is used predominantly to "transfer ownership" of an FL buffer
506 * to another entity (typically an skb's fragment list).
507 */
508static void unmap_rx_buf(struct adapter *adapter, struct sge_fl *fl)
509{
510 struct rx_sw_desc *sdesc = &fl->sdesc[fl->cidx];
511
512 if (is_buf_mapped(sdesc))
513 dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc),
514 get_buf_size(sdesc), PCI_DMA_FROMDEVICE);
515 sdesc->page = NULL;
516 if (++fl->cidx == fl->size)
517 fl->cidx = 0;
518 fl->avail--;
519}
520
521/**
522 * ring_fl_db - righ doorbell on free list
523 * @adapter: the adapter
524 * @fl: the Free List whose doorbell should be rung ...
525 *
526 * Tell the Scatter Gather Engine that there are new free list entries
527 * available.
528 */
529static inline void ring_fl_db(struct adapter *adapter, struct sge_fl *fl)
530{
531 /*
532 * The SGE keeps track of its Producer and Consumer Indices in terms
533 * of Egress Queue Units so we can only tell it about integral numbers
534 * of multiples of Free List Entries per Egress Queue Units ...
535 */
536 if (fl->pend_cred >= FL_PER_EQ_UNIT) {
537 wmb();
538 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
539 DBPRIO |
540 QID(fl->cntxt_id) |
541 PIDX(fl->pend_cred / FL_PER_EQ_UNIT));
542 fl->pend_cred %= FL_PER_EQ_UNIT;
543 }
544}
545
546/**
547 * set_rx_sw_desc - initialize software RX buffer descriptor
548 * @sdesc: pointer to the softwore RX buffer descriptor
549 * @page: pointer to the page data structure backing the RX buffer
550 * @dma_addr: PCI DMA address (possibly with low-bit flags)
551 */
552static inline void set_rx_sw_desc(struct rx_sw_desc *sdesc, struct page *page,
553 dma_addr_t dma_addr)
554{
555 sdesc->page = page;
556 sdesc->dma_addr = dma_addr;
557}
558
559/*
560 * Support for poisoning RX buffers ...
561 */
562#define POISON_BUF_VAL -1
563
564static inline void poison_buf(struct page *page, size_t sz)
565{
566#if POISON_BUF_VAL >= 0
567 memset(page_address(page), POISON_BUF_VAL, sz);
568#endif
569}
570
571/**
572 * refill_fl - refill an SGE RX buffer ring
573 * @adapter: the adapter
574 * @fl: the Free List ring to refill
575 * @n: the number of new buffers to allocate
576 * @gfp: the gfp flags for the allocations
577 *
578 * (Re)populate an SGE free-buffer queue with up to @n new packet buffers,
579 * allocated with the supplied gfp flags. The caller must assure that
580 * @n does not exceed the queue's capacity -- i.e. (cidx == pidx) _IN
581 * EGRESS QUEUE UNITS_ indicates an empty Free List! Returns the number
582 * of buffers allocated. If afterwards the queue is found critically low,
583 * mark it as starving in the bitmap of starving FLs.
584 */
585static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl,
586 int n, gfp_t gfp)
587{
588 struct page *page;
589 dma_addr_t dma_addr;
590 unsigned int cred = fl->avail;
591 __be64 *d = &fl->desc[fl->pidx];
592 struct rx_sw_desc *sdesc = &fl->sdesc[fl->pidx];
593
594 /*
595 * Sanity: ensure that the result of adding n Free List buffers
596 * won't result in wrapping the SGE's Producer Index around to
597 * it's Consumer Index thereby indicating an empty Free List ...
598 */
599 BUG_ON(fl->avail + n > fl->size - FL_PER_EQ_UNIT);
600
601 /*
602 * If we support large pages, prefer large buffers and fail over to
603 * small pages if we can't allocate large pages to satisfy the refill.
604 * If we don't support large pages, drop directly into the small page
605 * allocation code.
606 */
607 if (FL_PG_ORDER == 0)
608 goto alloc_small_pages;
609
610 while (n) {
611 page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
612 FL_PG_ORDER);
613 if (unlikely(!page)) {
614 /*
615 * We've failed inour attempt to allocate a "large
616 * page". Fail over to the "small page" allocation
617 * below.
618 */
619 fl->large_alloc_failed++;
620 break;
621 }
622 poison_buf(page, PAGE_SIZE << FL_PG_ORDER);
623
624 dma_addr = dma_map_page(adapter->pdev_dev, page, 0,
625 PAGE_SIZE << FL_PG_ORDER,
626 PCI_DMA_FROMDEVICE);
627 if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) {
628 /*
629 * We've run out of DMA mapping space. Free up the
630 * buffer and return with what we've managed to put
631 * into the free list. We don't want to fail over to
632 * the small page allocation below in this case
633 * because DMA mapping resources are typically
634 * critical resources once they become scarse.
635 */
636 __free_pages(page, FL_PG_ORDER);
637 goto out;
638 }
639 dma_addr |= RX_LARGE_BUF;
640 *d++ = cpu_to_be64(dma_addr);
641
642 set_rx_sw_desc(sdesc, page, dma_addr);
643 sdesc++;
644
645 fl->avail++;
646 if (++fl->pidx == fl->size) {
647 fl->pidx = 0;
648 sdesc = fl->sdesc;
649 d = fl->desc;
650 }
651 n--;
652 }
653
654alloc_small_pages:
655 while (n--) {
656 page = __netdev_alloc_page(adapter->port[0],
657 gfp | __GFP_NOWARN);
658 if (unlikely(!page)) {
659 fl->alloc_failed++;
660 break;
661 }
662 poison_buf(page, PAGE_SIZE);
663
664 dma_addr = dma_map_page(adapter->pdev_dev, page, 0, PAGE_SIZE,
665 PCI_DMA_FROMDEVICE);
666 if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) {
667 netdev_free_page(adapter->port[0], page);
668 break;
669 }
670 *d++ = cpu_to_be64(dma_addr);
671
672 set_rx_sw_desc(sdesc, page, dma_addr);
673 sdesc++;
674
675 fl->avail++;
676 if (++fl->pidx == fl->size) {
677 fl->pidx = 0;
678 sdesc = fl->sdesc;
679 d = fl->desc;
680 }
681 }
682
683out:
684 /*
685 * Update our accounting state to incorporate the new Free List
686 * buffers, tell the hardware about them and return the number of
687 * bufers which we were able to allocate.
688 */
689 cred = fl->avail - cred;
690 fl->pend_cred += cred;
691 ring_fl_db(adapter, fl);
692
693 if (unlikely(fl_starving(fl))) {
694 smp_wmb();
695 set_bit(fl->cntxt_id, adapter->sge.starving_fl);
696 }
697
698 return cred;
699}
700
701/*
702 * Refill a Free List to its capacity or the Maximum Refill Increment,
703 * whichever is smaller ...
704 */
705static inline void __refill_fl(struct adapter *adapter, struct sge_fl *fl)
706{
707 refill_fl(adapter, fl,
708 min((unsigned int)MAX_RX_REFILL, fl_cap(fl) - fl->avail),
709 GFP_ATOMIC);
710}
711
712/**
713 * alloc_ring - allocate resources for an SGE descriptor ring
714 * @dev: the PCI device's core device
715 * @nelem: the number of descriptors
716 * @hwsize: the size of each hardware descriptor
717 * @swsize: the size of each software descriptor
718 * @busaddrp: the physical PCI bus address of the allocated ring
719 * @swringp: return address pointer for software ring
720 * @stat_size: extra space in hardware ring for status information
721 *
722 * Allocates resources for an SGE descriptor ring, such as TX queues,
723 * free buffer lists, response queues, etc. Each SGE ring requires
724 * space for its hardware descriptors plus, optionally, space for software
725 * state associated with each hardware entry (the metadata). The function
726 * returns three values: the virtual address for the hardware ring (the
727 * return value of the function), the PCI bus address of the hardware
728 * ring (in *busaddrp), and the address of the software ring (in swringp).
729 * Both the hardware and software rings are returned zeroed out.
730 */
731static void *alloc_ring(struct device *dev, size_t nelem, size_t hwsize,
732 size_t swsize, dma_addr_t *busaddrp, void *swringp,
733 size_t stat_size)
734{
735 /*
736 * Allocate the hardware ring and PCI DMA bus address space for said.
737 */
738 size_t hwlen = nelem * hwsize + stat_size;
739 void *hwring = dma_alloc_coherent(dev, hwlen, busaddrp, GFP_KERNEL);
740
741 if (!hwring)
742 return NULL;
743
744 /*
745 * If the caller wants a software ring, allocate it and return a
746 * pointer to it in *swringp.
747 */
748 BUG_ON((swsize != 0) != (swringp != NULL));
749 if (swsize) {
750 void *swring = kcalloc(nelem, swsize, GFP_KERNEL);
751
752 if (!swring) {
753 dma_free_coherent(dev, hwlen, hwring, *busaddrp);
754 return NULL;
755 }
756 *(void **)swringp = swring;
757 }
758
759 /*
760 * Zero out the hardware ring and return its address as our function
761 * value.
762 */
763 memset(hwring, 0, hwlen);
764 return hwring;
765}
766
767/**
768 * sgl_len - calculates the size of an SGL of the given capacity
769 * @n: the number of SGL entries
770 *
771 * Calculates the number of flits (8-byte units) needed for a Direct
772 * Scatter/Gather List that can hold the given number of entries.
773 */
774static inline unsigned int sgl_len(unsigned int n)
775{
776 /*
777 * A Direct Scatter Gather List uses 32-bit lengths and 64-bit PCI DMA
778 * addresses. The DSGL Work Request starts off with a 32-bit DSGL
779 * ULPTX header, then Length0, then Address0, then, for 1 <= i <= N,
780 * repeated sequences of { Length[i], Length[i+1], Address[i],
781 * Address[i+1] } (this ensures that all addresses are on 64-bit
782 * boundaries). If N is even, then Length[N+1] should be set to 0 and
783 * Address[N+1] is omitted.
784 *
785 * The following calculation incorporates all of the above. It's
786 * somewhat hard to follow but, briefly: the "+2" accounts for the
787 * first two flits which include the DSGL header, Length0 and
788 * Address0; the "(3*(n-1))/2" covers the main body of list entries (3
789 * flits for every pair of the remaining N) +1 if (n-1) is odd; and
790 * finally the "+((n-1)&1)" adds the one remaining flit needed if
791 * (n-1) is odd ...
792 */
793 n--;
794 return (3 * n) / 2 + (n & 1) + 2;
795}
796
797/**
798 * flits_to_desc - returns the num of TX descriptors for the given flits
799 * @flits: the number of flits
800 *
801 * Returns the number of TX descriptors needed for the supplied number
802 * of flits.
803 */
804static inline unsigned int flits_to_desc(unsigned int flits)
805{
806 BUG_ON(flits > SGE_MAX_WR_LEN / sizeof(__be64));
807 return DIV_ROUND_UP(flits, TXD_PER_EQ_UNIT);
808}
809
810/**
811 * is_eth_imm - can an Ethernet packet be sent as immediate data?
812 * @skb: the packet
813 *
814 * Returns whether an Ethernet packet is small enough to fit completely as
815 * immediate data.
816 */
817static inline int is_eth_imm(const struct sk_buff *skb)
818{
819 /*
820 * The VF Driver uses the FW_ETH_TX_PKT_VM_WR firmware Work Request
821 * which does not accommodate immediate data. We could dike out all
822 * of the support code for immediate data but that would tie our hands
823 * too much if we ever want to enhace the firmware. It would also
824 * create more differences between the PF and VF Drivers.
825 */
826 return false;
827}
828
829/**
830 * calc_tx_flits - calculate the number of flits for a packet TX WR
831 * @skb: the packet
832 *
833 * Returns the number of flits needed for a TX Work Request for the
834 * given Ethernet packet, including the needed WR and CPL headers.
835 */
836static inline unsigned int calc_tx_flits(const struct sk_buff *skb)
837{
838 unsigned int flits;
839
840 /*
841 * If the skb is small enough, we can pump it out as a work request
842 * with only immediate data. In that case we just have to have the
843 * TX Packet header plus the skb data in the Work Request.
844 */
845 if (is_eth_imm(skb))
846 return DIV_ROUND_UP(skb->len + sizeof(struct cpl_tx_pkt),
847 sizeof(__be64));
848
849 /*
850 * Otherwise, we're going to have to construct a Scatter gather list
851 * of the skb body and fragments. We also include the flits necessary
852 * for the TX Packet Work Request and CPL. We always have a firmware
853 * Write Header (incorporated as part of the cpl_tx_pkt_lso and
854 * cpl_tx_pkt structures), followed by either a TX Packet Write CPL
855 * message or, if we're doing a Large Send Offload, an LSO CPL message
856 * with an embeded TX Packet Write CPL message.
857 */
858 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
859 if (skb_shinfo(skb)->gso_size)
860 flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) +
861 sizeof(struct cpl_tx_pkt_lso_core) +
862 sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
863 else
864 flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) +
865 sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
866 return flits;
867}
868
869/**
870 * write_sgl - populate a Scatter/Gather List for a packet
871 * @skb: the packet
872 * @tq: the TX queue we are writing into
873 * @sgl: starting location for writing the SGL
874 * @end: points right after the end of the SGL
875 * @start: start offset into skb main-body data to include in the SGL
876 * @addr: the list of DMA bus addresses for the SGL elements
877 *
878 * Generates a Scatter/Gather List for the buffers that make up a packet.
879 * The caller must provide adequate space for the SGL that will be written.
880 * The SGL includes all of the packet's page fragments and the data in its
881 * main body except for the first @start bytes. @pos must be 16-byte
882 * aligned and within a TX descriptor with available space. @end points
883 * write after the end of the SGL but does not account for any potential
884 * wrap around, i.e., @end > @tq->stat.
885 */
886static void write_sgl(const struct sk_buff *skb, struct sge_txq *tq,
887 struct ulptx_sgl *sgl, u64 *end, unsigned int start,
888 const dma_addr_t *addr)
889{
890 unsigned int i, len;
891 struct ulptx_sge_pair *to;
892 const struct skb_shared_info *si = skb_shinfo(skb);
893 unsigned int nfrags = si->nr_frags;
894 struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1];
895
896 len = skb_headlen(skb) - start;
897 if (likely(len)) {
898 sgl->len0 = htonl(len);
899 sgl->addr0 = cpu_to_be64(addr[0] + start);
900 nfrags++;
901 } else {
902 sgl->len0 = htonl(si->frags[0].size);
903 sgl->addr0 = cpu_to_be64(addr[1]);
904 }
905
906 sgl->cmd_nsge = htonl(ULPTX_CMD(ULP_TX_SC_DSGL) |
907 ULPTX_NSGE(nfrags));
908 if (likely(--nfrags == 0))
909 return;
910 /*
911 * Most of the complexity below deals with the possibility we hit the
912 * end of the queue in the middle of writing the SGL. For this case
913 * only we create the SGL in a temporary buffer and then copy it.
914 */
915 to = (u8 *)end > (u8 *)tq->stat ? buf : sgl->sge;
916
917 for (i = (nfrags != si->nr_frags); nfrags >= 2; nfrags -= 2, to++) {
918 to->len[0] = cpu_to_be32(si->frags[i].size);
919 to->len[1] = cpu_to_be32(si->frags[++i].size);
920 to->addr[0] = cpu_to_be64(addr[i]);
921 to->addr[1] = cpu_to_be64(addr[++i]);
922 }
923 if (nfrags) {
924 to->len[0] = cpu_to_be32(si->frags[i].size);
925 to->len[1] = cpu_to_be32(0);
926 to->addr[0] = cpu_to_be64(addr[i + 1]);
927 }
928 if (unlikely((u8 *)end > (u8 *)tq->stat)) {
929 unsigned int part0 = (u8 *)tq->stat - (u8 *)sgl->sge, part1;
930
931 if (likely(part0))
932 memcpy(sgl->sge, buf, part0);
933 part1 = (u8 *)end - (u8 *)tq->stat;
934 memcpy(tq->desc, (u8 *)buf + part0, part1);
935 end = (void *)tq->desc + part1;
936 }
937 if ((uintptr_t)end & 8) /* 0-pad to multiple of 16 */
938 *(u64 *)end = 0;
939}
940
941/**
942 * check_ring_tx_db - check and potentially ring a TX queue's doorbell
943 * @adapter: the adapter
944 * @tq: the TX queue
945 * @n: number of new descriptors to give to HW
946 *
947 * Ring the doorbel for a TX queue.
948 */
949static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq,
950 int n)
951{
952 /*
953 * Warn if we write doorbells with the wrong priority and write
954 * descriptors before telling HW.
955 */
956 WARN_ON((QID(tq->cntxt_id) | PIDX(n)) & DBPRIO);
957 wmb();
958 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
959 QID(tq->cntxt_id) | PIDX(n));
960}
961
962/**
963 * inline_tx_skb - inline a packet's data into TX descriptors
964 * @skb: the packet
965 * @tq: the TX queue where the packet will be inlined
966 * @pos: starting position in the TX queue to inline the packet
967 *
968 * Inline a packet's contents directly into TX descriptors, starting at
969 * the given position within the TX DMA ring.
970 * Most of the complexity of this operation is dealing with wrap arounds
971 * in the middle of the packet we want to inline.
972 */
973static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *tq,
974 void *pos)
975{
976 u64 *p;
977 int left = (void *)tq->stat - pos;
978
979 if (likely(skb->len <= left)) {
980 if (likely(!skb->data_len))
981 skb_copy_from_linear_data(skb, pos, skb->len);
982 else
983 skb_copy_bits(skb, 0, pos, skb->len);
984 pos += skb->len;
985 } else {
986 skb_copy_bits(skb, 0, pos, left);
987 skb_copy_bits(skb, left, tq->desc, skb->len - left);
988 pos = (void *)tq->desc + (skb->len - left);
989 }
990
991 /* 0-pad to multiple of 16 */
992 p = PTR_ALIGN(pos, 8);
993 if ((uintptr_t)p & 8)
994 *p = 0;
995}
996
997/*
998 * Figure out what HW csum a packet wants and return the appropriate control
999 * bits.
1000 */
1001static u64 hwcsum(const struct sk_buff *skb)
1002{
1003 int csum_type;
1004 const struct iphdr *iph = ip_hdr(skb);
1005
1006 if (iph->version == 4) {
1007 if (iph->protocol == IPPROTO_TCP)
1008 csum_type = TX_CSUM_TCPIP;
1009 else if (iph->protocol == IPPROTO_UDP)
1010 csum_type = TX_CSUM_UDPIP;
1011 else {
1012nocsum:
1013 /*
1014 * unknown protocol, disable HW csum
1015 * and hope a bad packet is detected
1016 */
1017 return TXPKT_L4CSUM_DIS;
1018 }
1019 } else {
1020 /*
1021 * this doesn't work with extension headers
1022 */
1023 const struct ipv6hdr *ip6h = (const struct ipv6hdr *)iph;
1024
1025 if (ip6h->nexthdr == IPPROTO_TCP)
1026 csum_type = TX_CSUM_TCPIP6;
1027 else if (ip6h->nexthdr == IPPROTO_UDP)
1028 csum_type = TX_CSUM_UDPIP6;
1029 else
1030 goto nocsum;
1031 }
1032
1033 if (likely(csum_type >= TX_CSUM_TCPIP))
1034 return TXPKT_CSUM_TYPE(csum_type) |
1035 TXPKT_IPHDR_LEN(skb_network_header_len(skb)) |
1036 TXPKT_ETHHDR_LEN(skb_network_offset(skb) - ETH_HLEN);
1037 else {
1038 int start = skb_transport_offset(skb);
1039
1040 return TXPKT_CSUM_TYPE(csum_type) |
1041 TXPKT_CSUM_START(start) |
1042 TXPKT_CSUM_LOC(start + skb->csum_offset);
1043 }
1044}
1045
1046/*
1047 * Stop an Ethernet TX queue and record that state change.
1048 */
1049static void txq_stop(struct sge_eth_txq *txq)
1050{
1051 netif_tx_stop_queue(txq->txq);
1052 txq->q.stops++;
1053}
1054
1055/*
1056 * Advance our software state for a TX queue by adding n in use descriptors.
1057 */
1058static inline void txq_advance(struct sge_txq *tq, unsigned int n)
1059{
1060 tq->in_use += n;
1061 tq->pidx += n;
1062 if (tq->pidx >= tq->size)
1063 tq->pidx -= tq->size;
1064}
1065
1066/**
1067 * t4vf_eth_xmit - add a packet to an Ethernet TX queue
1068 * @skb: the packet
1069 * @dev: the egress net device
1070 *
1071 * Add a packet to an SGE Ethernet TX queue. Runs with softirqs disabled.
1072 */
1073int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1074{
1075 u32 wr_mid;
1076 u64 cntrl, *end;
1077 int qidx, credits;
1078 unsigned int flits, ndesc;
1079 struct adapter *adapter;
1080 struct sge_eth_txq *txq;
1081 const struct port_info *pi;
1082 struct fw_eth_tx_pkt_vm_wr *wr;
1083 struct cpl_tx_pkt_core *cpl;
1084 const struct skb_shared_info *ssi;
1085 dma_addr_t addr[MAX_SKB_FRAGS + 1];
1086 const size_t fw_hdr_copy_len = (sizeof(wr->ethmacdst) +
1087 sizeof(wr->ethmacsrc) +
1088 sizeof(wr->ethtype) +
1089 sizeof(wr->vlantci));
1090
1091 /*
1092 * The chip minimum packet length is 10 octets but the firmware
1093 * command that we are using requires that we copy the Ethernet header
1094 * (including the VLAN tag) into the header so we reject anything
1095 * smaller than that ...
1096 */
1097 if (unlikely(skb->len < fw_hdr_copy_len))
1098 goto out_free;
1099
1100 /*
1101 * Figure out which TX Queue we're going to use.
1102 */
1103 pi = netdev_priv(dev);
1104 adapter = pi->adapter;
1105 qidx = skb_get_queue_mapping(skb);
1106 BUG_ON(qidx >= pi->nqsets);
1107 txq = &adapter->sge.ethtxq[pi->first_qset + qidx];
1108
1109 /*
1110 * Take this opportunity to reclaim any TX Descriptors whose DMA
1111 * transfers have completed.
1112 */
1113 reclaim_completed_tx(adapter, &txq->q, true);
1114
1115 /*
1116 * Calculate the number of flits and TX Descriptors we're going to
1117 * need along with how many TX Descriptors will be left over after
1118 * we inject our Work Request.
1119 */
1120 flits = calc_tx_flits(skb);
1121 ndesc = flits_to_desc(flits);
1122 credits = txq_avail(&txq->q) - ndesc;
1123
1124 if (unlikely(credits < 0)) {
1125 /*
1126 * Not enough room for this packet's Work Request. Stop the
1127 * TX Queue and return a "busy" condition. The queue will get
1128 * started later on when the firmware informs us that space
1129 * has opened up.
1130 */
1131 txq_stop(txq);
1132 dev_err(adapter->pdev_dev,
1133 "%s: TX ring %u full while queue awake!\n",
1134 dev->name, qidx);
1135 return NETDEV_TX_BUSY;
1136 }
1137
1138 if (!is_eth_imm(skb) &&
1139 unlikely(map_skb(adapter->pdev_dev, skb, addr) < 0)) {
1140 /*
1141 * We need to map the skb into PCI DMA space (because it can't
1142 * be in-lined directly into the Work Request) and the mapping
1143 * operation failed. Record the error and drop the packet.
1144 */
1145 txq->mapping_err++;
1146 goto out_free;
1147 }
1148
1149 wr_mid = FW_WR_LEN16(DIV_ROUND_UP(flits, 2));
1150 if (unlikely(credits < ETHTXQ_STOP_THRES)) {
1151 /*
1152 * After we're done injecting the Work Request for this
1153 * packet, we'll be below our "stop threshold" so stop the TX
1154 * Queue now and schedule a request for an SGE Egress Queue
1155 * Update message. The queue will get started later on when
1156 * the firmware processes this Work Request and sends us an
1157 * Egress Queue Status Update message indicating that space
1158 * has opened up.
1159 */
1160 txq_stop(txq);
1161 wr_mid |= FW_WR_EQUEQ | FW_WR_EQUIQ;
1162 }
1163
1164 /*
1165 * Start filling in our Work Request. Note that we do _not_ handle
1166 * the WR Header wrapping around the TX Descriptor Ring. If our
1167 * maximum header size ever exceeds one TX Descriptor, we'll need to
1168 * do something else here.
1169 */
1170 BUG_ON(DIV_ROUND_UP(ETHTXQ_MAX_HDR, TXD_PER_EQ_UNIT) > 1);
1171 wr = (void *)&txq->q.desc[txq->q.pidx];
1172 wr->equiq_to_len16 = cpu_to_be32(wr_mid);
1173 wr->r3[0] = cpu_to_be64(0);
1174 wr->r3[1] = cpu_to_be64(0);
1175 skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, fw_hdr_copy_len);
1176 end = (u64 *)wr + flits;
1177
1178 /*
1179 * If this is a Large Send Offload packet we'll put in an LSO CPL
1180 * message with an encapsulated TX Packet CPL message. Otherwise we
1181 * just use a TX Packet CPL message.
1182 */
1183 ssi = skb_shinfo(skb);
1184 if (ssi->gso_size) {
1185 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
1186 bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
1187 int l3hdr_len = skb_network_header_len(skb);
1188 int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
1189
1190 wr->op_immdlen =
1191 cpu_to_be32(FW_WR_OP(FW_ETH_TX_PKT_VM_WR) |
1192 FW_WR_IMMDLEN(sizeof(*lso) +
1193 sizeof(*cpl)));
1194 /*
1195 * Fill in the LSO CPL message.
1196 */
1197 lso->lso_ctrl =
1198 cpu_to_be32(LSO_OPCODE(CPL_TX_PKT_LSO) |
1199 LSO_FIRST_SLICE |
1200 LSO_LAST_SLICE |
1201 LSO_IPV6(v6) |
1202 LSO_ETHHDR_LEN(eth_xtra_len/4) |
1203 LSO_IPHDR_LEN(l3hdr_len/4) |
1204 LSO_TCPHDR_LEN(tcp_hdr(skb)->doff));
1205 lso->ipid_ofst = cpu_to_be16(0);
1206 lso->mss = cpu_to_be16(ssi->gso_size);
1207 lso->seqno_offset = cpu_to_be32(0);
1208 lso->len = cpu_to_be32(skb->len);
1209
1210 /*
1211 * Set up TX Packet CPL pointer, control word and perform
1212 * accounting.
1213 */
1214 cpl = (void *)(lso + 1);
1215 cntrl = (TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) |
1216 TXPKT_IPHDR_LEN(l3hdr_len) |
1217 TXPKT_ETHHDR_LEN(eth_xtra_len));
1218 txq->tso++;
1219 txq->tx_cso += ssi->gso_segs;
1220 } else {
1221 int len;
1222
1223 len = is_eth_imm(skb) ? skb->len + sizeof(*cpl) : sizeof(*cpl);
1224 wr->op_immdlen =
1225 cpu_to_be32(FW_WR_OP(FW_ETH_TX_PKT_VM_WR) |
1226 FW_WR_IMMDLEN(len));
1227
1228 /*
1229 * Set up TX Packet CPL pointer, control word and perform
1230 * accounting.
1231 */
1232 cpl = (void *)(wr + 1);
1233 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1234 cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS;
1235 txq->tx_cso++;
1236 } else
1237 cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS;
1238 }
1239
1240 /*
1241 * If there's a VLAN tag present, add that to the list of things to
1242 * do in this Work Request.
1243 */
1244 if (vlan_tx_tag_present(skb)) {
1245 txq->vlan_ins++;
1246 cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(vlan_tx_tag_get(skb));
1247 }
1248
1249 /*
1250 * Fill in the TX Packet CPL message header.
1251 */
1252 cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE(CPL_TX_PKT_XT) |
1253 TXPKT_INTF(pi->port_id) |
1254 TXPKT_PF(0));
1255 cpl->pack = cpu_to_be16(0);
1256 cpl->len = cpu_to_be16(skb->len);
1257 cpl->ctrl1 = cpu_to_be64(cntrl);
1258
1259#ifdef T4_TRACE
1260 T4_TRACE5(adapter->tb[txq->q.cntxt_id & 7],
1261 "eth_xmit: ndesc %u, credits %u, pidx %u, len %u, frags %u",
1262 ndesc, credits, txq->q.pidx, skb->len, ssi->nr_frags);
1263#endif
1264
1265 /*
1266 * Fill in the body of the TX Packet CPL message with either in-lined
1267 * data or a Scatter/Gather List.
1268 */
1269 if (is_eth_imm(skb)) {
1270 /*
1271 * In-line the packet's data and free the skb since we don't
1272 * need it any longer.
1273 */
1274 inline_tx_skb(skb, &txq->q, cpl + 1);
1275 dev_kfree_skb(skb);
1276 } else {
1277 /*
1278 * Write the skb's Scatter/Gather list into the TX Packet CPL
1279 * message and retain a pointer to the skb so we can free it
1280 * later when its DMA completes. (We store the skb pointer
1281 * in the Software Descriptor corresponding to the last TX
1282 * Descriptor used by the Work Request.)
1283 *
1284 * The retained skb will be freed when the corresponding TX
1285 * Descriptors are reclaimed after their DMAs complete.
1286 * However, this could take quite a while since, in general,
1287 * the hardware is set up to be lazy about sending DMA
1288 * completion notifications to us and we mostly perform TX
1289 * reclaims in the transmit routine.
1290 *
1291 * This is good for performamce but means that we rely on new
1292 * TX packets arriving to run the destructors of completed
1293 * packets, which open up space in their sockets' send queues.
1294 * Sometimes we do not get such new packets causing TX to
1295 * stall. A single UDP transmitter is a good example of this
1296 * situation. We have a clean up timer that periodically
1297 * reclaims completed packets but it doesn't run often enough
1298 * (nor do we want it to) to prevent lengthy stalls. A
1299 * solution to this problem is to run the destructor early,
1300 * after the packet is queued but before it's DMAd. A con is
1301 * that we lie to socket memory accounting, but the amount of
1302 * extra memory is reasonable (limited by the number of TX
1303 * descriptors), the packets do actually get freed quickly by
1304 * new packets almost always, and for protocols like TCP that
1305 * wait for acks to really free up the data the extra memory
1306 * is even less. On the positive side we run the destructors
1307 * on the sending CPU rather than on a potentially different
1308 * completing CPU, usually a good thing.
1309 *
1310 * Run the destructor before telling the DMA engine about the
1311 * packet to make sure it doesn't complete and get freed
1312 * prematurely.
1313 */
1314 struct ulptx_sgl *sgl = (struct ulptx_sgl *)(cpl + 1);
1315 struct sge_txq *tq = &txq->q;
1316 int last_desc;
1317
1318 /*
1319 * If the Work Request header was an exact multiple of our TX
1320 * Descriptor length, then it's possible that the starting SGL
1321 * pointer lines up exactly with the end of our TX Descriptor
1322 * ring. If that's the case, wrap around to the beginning
1323 * here ...
1324 */
1325 if (unlikely((void *)sgl == (void *)tq->stat)) {
1326 sgl = (void *)tq->desc;
1327 end = (void *)((void *)tq->desc +
1328 ((void *)end - (void *)tq->stat));
1329 }
1330
1331 write_sgl(skb, tq, sgl, end, 0, addr);
1332 skb_orphan(skb);
1333
1334 last_desc = tq->pidx + ndesc - 1;
1335 if (last_desc >= tq->size)
1336 last_desc -= tq->size;
1337 tq->sdesc[last_desc].skb = skb;
1338 tq->sdesc[last_desc].sgl = sgl;
1339 }
1340
1341 /*
1342 * Advance our internal TX Queue state, tell the hardware about
1343 * the new TX descriptors and return success.
1344 */
1345 txq_advance(&txq->q, ndesc);
1346 dev->trans_start = jiffies;
1347 ring_tx_db(adapter, &txq->q, ndesc);
1348 return NETDEV_TX_OK;
1349
1350out_free:
1351 /*
1352 * An error of some sort happened. Free the TX skb and tell the
1353 * OS that we've "dealt" with the packet ...
1354 */
1355 dev_kfree_skb(skb);
1356 return NETDEV_TX_OK;
1357}
1358
1359/**
1360 * t4vf_pktgl_to_skb - build an sk_buff from a packet gather list
1361 * @gl: the gather list
1362 * @skb_len: size of sk_buff main body if it carries fragments
1363 * @pull_len: amount of data to move to the sk_buff's main body
1364 *
1365 * Builds an sk_buff from the given packet gather list. Returns the
1366 * sk_buff or %NULL if sk_buff allocation failed.
1367 */
1368struct sk_buff *t4vf_pktgl_to_skb(const struct pkt_gl *gl,
1369 unsigned int skb_len, unsigned int pull_len)
1370{
1371 struct sk_buff *skb;
1372 struct skb_shared_info *ssi;
1373
1374 /*
1375 * If the ingress packet is small enough, allocate an skb large enough
1376 * for all of the data and copy it inline. Otherwise, allocate an skb
1377 * with enough room to pull in the header and reference the rest of
1378 * the data via the skb fragment list.
1379 *
1380 * Below we rely on RX_COPY_THRES being less than the smallest Rx
1381 * buff! size, which is expected since buffers are at least
1382 * PAGE_SIZEd. In this case packets up to RX_COPY_THRES have only one
1383 * fragment.
1384 */
1385 if (gl->tot_len <= RX_COPY_THRES) {
1386 /* small packets have only one fragment */
1387 skb = alloc_skb(gl->tot_len, GFP_ATOMIC);
1388 if (unlikely(!skb))
1389 goto out;
1390 __skb_put(skb, gl->tot_len);
1391 skb_copy_to_linear_data(skb, gl->va, gl->tot_len);
1392 } else {
1393 skb = alloc_skb(skb_len, GFP_ATOMIC);
1394 if (unlikely(!skb))
1395 goto out;
1396 __skb_put(skb, pull_len);
1397 skb_copy_to_linear_data(skb, gl->va, pull_len);
1398
1399 ssi = skb_shinfo(skb);
1400 ssi->frags[0].page = gl->frags[0].page;
1401 ssi->frags[0].page_offset = gl->frags[0].page_offset + pull_len;
1402 ssi->frags[0].size = gl->frags[0].size - pull_len;
1403 if (gl->nfrags > 1)
1404 memcpy(&ssi->frags[1], &gl->frags[1],
1405 (gl->nfrags-1) * sizeof(skb_frag_t));
1406 ssi->nr_frags = gl->nfrags;
1407
1408 skb->len = gl->tot_len;
1409 skb->data_len = skb->len - pull_len;
1410 skb->truesize += skb->data_len;
1411
1412 /* Get a reference for the last page, we don't own it */
1413 get_page(gl->frags[gl->nfrags - 1].page);
1414 }
1415
1416out:
1417 return skb;
1418}
1419
1420/**
1421 * t4vf_pktgl_free - free a packet gather list
1422 * @gl: the gather list
1423 *
1424 * Releases the pages of a packet gather list. We do not own the last
1425 * page on the list and do not free it.
1426 */
1427void t4vf_pktgl_free(const struct pkt_gl *gl)
1428{
1429 int frag;
1430
1431 frag = gl->nfrags - 1;
1432 while (frag--)
1433 put_page(gl->frags[frag].page);
1434}
1435
1436/**
1437 * copy_frags - copy fragments from gather list into skb_shared_info
1438 * @si: destination skb shared info structure
1439 * @gl: source internal packet gather list
1440 * @offset: packet start offset in first page
1441 *
1442 * Copy an internal packet gather list into a Linux skb_shared_info
1443 * structure.
1444 */
1445static inline void copy_frags(struct skb_shared_info *si,
1446 const struct pkt_gl *gl,
1447 unsigned int offset)
1448{
1449 unsigned int n;
1450
1451 /* usually there's just one frag */
1452 si->frags[0].page = gl->frags[0].page;
1453 si->frags[0].page_offset = gl->frags[0].page_offset + offset;
1454 si->frags[0].size = gl->frags[0].size - offset;
1455 si->nr_frags = gl->nfrags;
1456
1457 n = gl->nfrags - 1;
1458 if (n)
1459 memcpy(&si->frags[1], &gl->frags[1], n * sizeof(skb_frag_t));
1460
1461 /* get a reference to the last page, we don't own it */
1462 get_page(gl->frags[n].page);
1463}
1464
1465/**
1466 * do_gro - perform Generic Receive Offload ingress packet processing
1467 * @rxq: ingress RX Ethernet Queue
1468 * @gl: gather list for ingress packet
1469 * @pkt: CPL header for last packet fragment
1470 *
1471 * Perform Generic Receive Offload (GRO) ingress packet processing.
1472 * We use the standard Linux GRO interfaces for this.
1473 */
1474static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
1475 const struct cpl_rx_pkt *pkt)
1476{
1477 int ret;
1478 struct sk_buff *skb;
1479
1480 skb = napi_get_frags(&rxq->rspq.napi);
1481 if (unlikely(!skb)) {
1482 t4vf_pktgl_free(gl);
1483 rxq->stats.rx_drops++;
1484 return;
1485 }
1486
1487 copy_frags(skb_shinfo(skb), gl, PKTSHIFT);
1488 skb->len = gl->tot_len - PKTSHIFT;
1489 skb->data_len = skb->len;
1490 skb->truesize += skb->data_len;
1491 skb->ip_summed = CHECKSUM_UNNECESSARY;
1492 skb_record_rx_queue(skb, rxq->rspq.idx);
1493
1494 if (pkt->vlan_ex)
1495 __vlan_hwaccel_put_tag(skb, be16_to_cpu(pkt->vlan));
1496 ret = napi_gro_frags(&rxq->rspq.napi);
1497
1498 if (ret == GRO_HELD)
1499 rxq->stats.lro_pkts++;
1500 else if (ret == GRO_MERGED || ret == GRO_MERGED_FREE)
1501 rxq->stats.lro_merged++;
1502 rxq->stats.pkts++;
1503 rxq->stats.rx_cso++;
1504}
1505
1506/**
1507 * t4vf_ethrx_handler - process an ingress ethernet packet
1508 * @rspq: the response queue that received the packet
1509 * @rsp: the response queue descriptor holding the RX_PKT message
1510 * @gl: the gather list of packet fragments
1511 *
1512 * Process an ingress ethernet packet and deliver it to the stack.
1513 */
1514int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp,
1515 const struct pkt_gl *gl)
1516{
1517 struct sk_buff *skb;
1518 const struct cpl_rx_pkt *pkt = (void *)&rsp[1];
1519 bool csum_ok = pkt->csum_calc && !pkt->err_vec;
1520 struct sge_eth_rxq *rxq = container_of(rspq, struct sge_eth_rxq, rspq);
1521
1522 /*
1523 * If this is a good TCP packet and we have Generic Receive Offload
1524 * enabled, handle the packet in the GRO path.
1525 */
1526 if ((pkt->l2info & cpu_to_be32(RXF_TCP)) &&
1527 (rspq->netdev->features & NETIF_F_GRO) && csum_ok &&
1528 !pkt->ip_frag) {
1529 do_gro(rxq, gl, pkt);
1530 return 0;
1531 }
1532
1533 /*
1534 * Convert the Packet Gather List into an skb.
1535 */
1536 skb = t4vf_pktgl_to_skb(gl, RX_SKB_LEN, RX_PULL_LEN);
1537 if (unlikely(!skb)) {
1538 t4vf_pktgl_free(gl);
1539 rxq->stats.rx_drops++;
1540 return 0;
1541 }
1542 __skb_pull(skb, PKTSHIFT);
1543 skb->protocol = eth_type_trans(skb, rspq->netdev);
1544 skb_record_rx_queue(skb, rspq->idx);
1545 rxq->stats.pkts++;
1546
1547 if (csum_ok && (rspq->netdev->features & NETIF_F_RXCSUM) &&
1548 !pkt->err_vec && (be32_to_cpu(pkt->l2info) & (RXF_UDP|RXF_TCP))) {
1549 if (!pkt->ip_frag)
1550 skb->ip_summed = CHECKSUM_UNNECESSARY;
1551 else {
1552 __sum16 c = (__force __sum16)pkt->csum;
1553 skb->csum = csum_unfold(c);
1554 skb->ip_summed = CHECKSUM_COMPLETE;
1555 }
1556 rxq->stats.rx_cso++;
1557 } else
1558 skb_checksum_none_assert(skb);
1559
1560 if (pkt->vlan_ex) {
1561 rxq->stats.vlan_ex++;
1562 __vlan_hwaccel_put_tag(skb, be16_to_cpu(pkt->vlan));
1563 }
1564
1565 netif_receive_skb(skb);
1566
1567 return 0;
1568}
1569
1570/**
1571 * is_new_response - check if a response is newly written
1572 * @rc: the response control descriptor
1573 * @rspq: the response queue
1574 *
1575 * Returns true if a response descriptor contains a yet unprocessed
1576 * response.
1577 */
1578static inline bool is_new_response(const struct rsp_ctrl *rc,
1579 const struct sge_rspq *rspq)
1580{
1581 return RSPD_GEN(rc->type_gen) == rspq->gen;
1582}
1583
1584/**
1585 * restore_rx_bufs - put back a packet's RX buffers
1586 * @gl: the packet gather list
1587 * @fl: the SGE Free List
1588 * @nfrags: how many fragments in @si
1589 *
1590 * Called when we find out that the current packet, @si, can't be
1591 * processed right away for some reason. This is a very rare event and
1592 * there's no effort to make this suspension/resumption process
1593 * particularly efficient.
1594 *
1595 * We implement the suspension by putting all of the RX buffers associated
1596 * with the current packet back on the original Free List. The buffers
1597 * have already been unmapped and are left unmapped, we mark them as
1598 * unmapped in order to prevent further unmapping attempts. (Effectively
1599 * this function undoes the series of @unmap_rx_buf calls which were done
1600 * to create the current packet's gather list.) This leaves us ready to
1601 * restart processing of the packet the next time we start processing the
1602 * RX Queue ...
1603 */
1604static void restore_rx_bufs(const struct pkt_gl *gl, struct sge_fl *fl,
1605 int frags)
1606{
1607 struct rx_sw_desc *sdesc;
1608
1609 while (frags--) {
1610 if (fl->cidx == 0)
1611 fl->cidx = fl->size - 1;
1612 else
1613 fl->cidx--;
1614 sdesc = &fl->sdesc[fl->cidx];
1615 sdesc->page = gl->frags[frags].page;
1616 sdesc->dma_addr |= RX_UNMAPPED_BUF;
1617 fl->avail++;
1618 }
1619}
1620
1621/**
1622 * rspq_next - advance to the next entry in a response queue
1623 * @rspq: the queue
1624 *
1625 * Updates the state of a response queue to advance it to the next entry.
1626 */
1627static inline void rspq_next(struct sge_rspq *rspq)
1628{
1629 rspq->cur_desc = (void *)rspq->cur_desc + rspq->iqe_len;
1630 if (unlikely(++rspq->cidx == rspq->size)) {
1631 rspq->cidx = 0;
1632 rspq->gen ^= 1;
1633 rspq->cur_desc = rspq->desc;
1634 }
1635}
1636
1637/**
1638 * process_responses - process responses from an SGE response queue
1639 * @rspq: the ingress response queue to process
1640 * @budget: how many responses can be processed in this round
1641 *
1642 * Process responses from a Scatter Gather Engine response queue up to
1643 * the supplied budget. Responses include received packets as well as
1644 * control messages from firmware or hardware.
1645 *
1646 * Additionally choose the interrupt holdoff time for the next interrupt
1647 * on this queue. If the system is under memory shortage use a fairly
1648 * long delay to help recovery.
1649 */
1650int process_responses(struct sge_rspq *rspq, int budget)
1651{
1652 struct sge_eth_rxq *rxq = container_of(rspq, struct sge_eth_rxq, rspq);
1653 int budget_left = budget;
1654
1655 while (likely(budget_left)) {
1656 int ret, rsp_type;
1657 const struct rsp_ctrl *rc;
1658
1659 rc = (void *)rspq->cur_desc + (rspq->iqe_len - sizeof(*rc));
1660 if (!is_new_response(rc, rspq))
1661 break;
1662
1663 /*
1664 * Figure out what kind of response we've received from the
1665 * SGE.
1666 */
1667 rmb();
1668 rsp_type = RSPD_TYPE(rc->type_gen);
1669 if (likely(rsp_type == RSP_TYPE_FLBUF)) {
1670 skb_frag_t *fp;
1671 struct pkt_gl gl;
1672 const struct rx_sw_desc *sdesc;
1673 u32 bufsz, frag;
1674 u32 len = be32_to_cpu(rc->pldbuflen_qid);
1675
1676 /*
1677 * If we get a "new buffer" message from the SGE we
1678 * need to move on to the next Free List buffer.
1679 */
1680 if (len & RSPD_NEWBUF) {
1681 /*
1682 * We get one "new buffer" message when we
1683 * first start up a queue so we need to ignore
1684 * it when our offset into the buffer is 0.
1685 */
1686 if (likely(rspq->offset > 0)) {
1687 free_rx_bufs(rspq->adapter, &rxq->fl,
1688 1);
1689 rspq->offset = 0;
1690 }
1691 len = RSPD_LEN(len);
1692 }
1693 gl.tot_len = len;
1694
1695 /*
1696 * Gather packet fragments.
1697 */
1698 for (frag = 0, fp = gl.frags; /**/; frag++, fp++) {
1699 BUG_ON(frag >= MAX_SKB_FRAGS);
1700 BUG_ON(rxq->fl.avail == 0);
1701 sdesc = &rxq->fl.sdesc[rxq->fl.cidx];
1702 bufsz = get_buf_size(sdesc);
1703 fp->page = sdesc->page;
1704 fp->page_offset = rspq->offset;
1705 fp->size = min(bufsz, len);
1706 len -= fp->size;
1707 if (!len)
1708 break;
1709 unmap_rx_buf(rspq->adapter, &rxq->fl);
1710 }
1711 gl.nfrags = frag+1;
1712
1713 /*
1714 * Last buffer remains mapped so explicitly make it
1715 * coherent for CPU access and start preloading first
1716 * cache line ...
1717 */
1718 dma_sync_single_for_cpu(rspq->adapter->pdev_dev,
1719 get_buf_addr(sdesc),
1720 fp->size, DMA_FROM_DEVICE);
1721 gl.va = (page_address(gl.frags[0].page) +
1722 gl.frags[0].page_offset);
1723 prefetch(gl.va);
1724
1725 /*
1726 * Hand the new ingress packet to the handler for
1727 * this Response Queue.
1728 */
1729 ret = rspq->handler(rspq, rspq->cur_desc, &gl);
1730 if (likely(ret == 0))
1731 rspq->offset += ALIGN(fp->size, FL_ALIGN);
1732 else
1733 restore_rx_bufs(&gl, &rxq->fl, frag);
1734 } else if (likely(rsp_type == RSP_TYPE_CPL)) {
1735 ret = rspq->handler(rspq, rspq->cur_desc, NULL);
1736 } else {
1737 WARN_ON(rsp_type > RSP_TYPE_CPL);
1738 ret = 0;
1739 }
1740
1741 if (unlikely(ret)) {
1742 /*
1743 * Couldn't process descriptor, back off for recovery.
1744 * We use the SGE's last timer which has the longest
1745 * interrupt coalescing value ...
1746 */
1747 const int NOMEM_TIMER_IDX = SGE_NTIMERS-1;
1748 rspq->next_intr_params =
1749 QINTR_TIMER_IDX(NOMEM_TIMER_IDX);
1750 break;
1751 }
1752
1753 rspq_next(rspq);
1754 budget_left--;
1755 }
1756
1757 /*
1758 * If this is a Response Queue with an associated Free List and
1759 * at least two Egress Queue units available in the Free List
1760 * for new buffer pointers, refill the Free List.
1761 */
1762 if (rspq->offset >= 0 &&
1763 rxq->fl.size - rxq->fl.avail >= 2*FL_PER_EQ_UNIT)
1764 __refill_fl(rspq->adapter, &rxq->fl);
1765 return budget - budget_left;
1766}
1767
1768/**
1769 * napi_rx_handler - the NAPI handler for RX processing
1770 * @napi: the napi instance
1771 * @budget: how many packets we can process in this round
1772 *
1773 * Handler for new data events when using NAPI. This does not need any
1774 * locking or protection from interrupts as data interrupts are off at
1775 * this point and other adapter interrupts do not interfere (the latter
1776 * in not a concern at all with MSI-X as non-data interrupts then have
1777 * a separate handler).
1778 */
1779static int napi_rx_handler(struct napi_struct *napi, int budget)
1780{
1781 unsigned int intr_params;
1782 struct sge_rspq *rspq = container_of(napi, struct sge_rspq, napi);
1783 int work_done = process_responses(rspq, budget);
1784
1785 if (likely(work_done < budget)) {
1786 napi_complete(napi);
1787 intr_params = rspq->next_intr_params;
1788 rspq->next_intr_params = rspq->intr_params;
1789 } else
1790 intr_params = QINTR_TIMER_IDX(SGE_TIMER_UPD_CIDX);
1791
1792 if (unlikely(work_done == 0))
1793 rspq->unhandled_irqs++;
1794
1795 t4_write_reg(rspq->adapter,
1796 T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
1797 CIDXINC(work_done) |
1798 INGRESSQID((u32)rspq->cntxt_id) |
1799 SEINTARM(intr_params));
1800 return work_done;
1801}
1802
1803/*
1804 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
1805 * (i.e., response queue serviced by NAPI polling).
1806 */
1807irqreturn_t t4vf_sge_intr_msix(int irq, void *cookie)
1808{
1809 struct sge_rspq *rspq = cookie;
1810
1811 napi_schedule(&rspq->napi);
1812 return IRQ_HANDLED;
1813}
1814
1815/*
1816 * Process the indirect interrupt entries in the interrupt queue and kick off
1817 * NAPI for each queue that has generated an entry.
1818 */
1819static unsigned int process_intrq(struct adapter *adapter)
1820{
1821 struct sge *s = &adapter->sge;
1822 struct sge_rspq *intrq = &s->intrq;
1823 unsigned int work_done;
1824
1825 spin_lock(&adapter->sge.intrq_lock);
1826 for (work_done = 0; ; work_done++) {
1827 const struct rsp_ctrl *rc;
1828 unsigned int qid, iq_idx;
1829 struct sge_rspq *rspq;
1830
1831 /*
1832 * Grab the next response from the interrupt queue and bail
1833 * out if it's not a new response.
1834 */
1835 rc = (void *)intrq->cur_desc + (intrq->iqe_len - sizeof(*rc));
1836 if (!is_new_response(rc, intrq))
1837 break;
1838
1839 /*
1840 * If the response isn't a forwarded interrupt message issue a
1841 * error and go on to the next response message. This should
1842 * never happen ...
1843 */
1844 rmb();
1845 if (unlikely(RSPD_TYPE(rc->type_gen) != RSP_TYPE_INTR)) {
1846 dev_err(adapter->pdev_dev,
1847 "Unexpected INTRQ response type %d\n",
1848 RSPD_TYPE(rc->type_gen));
1849 continue;
1850 }
1851
1852 /*
1853 * Extract the Queue ID from the interrupt message and perform
1854 * sanity checking to make sure it really refers to one of our
1855 * Ingress Queues which is active and matches the queue's ID.
1856 * None of these error conditions should ever happen so we may
1857 * want to either make them fatal and/or conditionalized under
1858 * DEBUG.
1859 */
1860 qid = RSPD_QID(be32_to_cpu(rc->pldbuflen_qid));
1861 iq_idx = IQ_IDX(s, qid);
1862 if (unlikely(iq_idx >= MAX_INGQ)) {
1863 dev_err(adapter->pdev_dev,
1864 "Ingress QID %d out of range\n", qid);
1865 continue;
1866 }
1867 rspq = s->ingr_map[iq_idx];
1868 if (unlikely(rspq == NULL)) {
1869 dev_err(adapter->pdev_dev,
1870 "Ingress QID %d RSPQ=NULL\n", qid);
1871 continue;
1872 }
1873 if (unlikely(rspq->abs_id != qid)) {
1874 dev_err(adapter->pdev_dev,
1875 "Ingress QID %d refers to RSPQ %d\n",
1876 qid, rspq->abs_id);
1877 continue;
1878 }
1879
1880 /*
1881 * Schedule NAPI processing on the indicated Response Queue
1882 * and move on to the next entry in the Forwarded Interrupt
1883 * Queue.
1884 */
1885 napi_schedule(&rspq->napi);
1886 rspq_next(intrq);
1887 }
1888
1889 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
1890 CIDXINC(work_done) |
1891 INGRESSQID(intrq->cntxt_id) |
1892 SEINTARM(intrq->intr_params));
1893
1894 spin_unlock(&adapter->sge.intrq_lock);
1895
1896 return work_done;
1897}
1898
1899/*
1900 * The MSI interrupt handler handles data events from SGE response queues as
1901 * well as error and other async events as they all use the same MSI vector.
1902 */
1903irqreturn_t t4vf_intr_msi(int irq, void *cookie)
1904{
1905 struct adapter *adapter = cookie;
1906
1907 process_intrq(adapter);
1908 return IRQ_HANDLED;
1909}
1910
1911/**
1912 * t4vf_intr_handler - select the top-level interrupt handler
1913 * @adapter: the adapter
1914 *
1915 * Selects the top-level interrupt handler based on the type of interrupts
1916 * (MSI-X or MSI).
1917 */
1918irq_handler_t t4vf_intr_handler(struct adapter *adapter)
1919{
1920 BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
1921 if (adapter->flags & USING_MSIX)
1922 return t4vf_sge_intr_msix;
1923 else
1924 return t4vf_intr_msi;
1925}
1926
1927/**
1928 * sge_rx_timer_cb - perform periodic maintenance of SGE RX queues
1929 * @data: the adapter
1930 *
1931 * Runs periodically from a timer to perform maintenance of SGE RX queues.
1932 *
1933 * a) Replenishes RX queues that have run out due to memory shortage.
1934 * Normally new RX buffers are added when existing ones are consumed but
1935 * when out of memory a queue can become empty. We schedule NAPI to do
1936 * the actual refill.
1937 */
1938static void sge_rx_timer_cb(unsigned long data)
1939{
1940 struct adapter *adapter = (struct adapter *)data;
1941 struct sge *s = &adapter->sge;
1942 unsigned int i;
1943
1944 /*
1945 * Scan the "Starving Free Lists" flag array looking for any Free
1946 * Lists in need of more free buffers. If we find one and it's not
1947 * being actively polled, then bump its "starving" counter and attempt
1948 * to refill it. If we're successful in adding enough buffers to push
1949 * the Free List over the starving threshold, then we can clear its
1950 * "starving" status.
1951 */
1952 for (i = 0; i < ARRAY_SIZE(s->starving_fl); i++) {
1953 unsigned long m;
1954
1955 for (m = s->starving_fl[i]; m; m &= m - 1) {
1956 unsigned int id = __ffs(m) + i * BITS_PER_LONG;
1957 struct sge_fl *fl = s->egr_map[id];
1958
1959 clear_bit(id, s->starving_fl);
1960 smp_mb__after_clear_bit();
1961
1962 /*
1963 * Since we are accessing fl without a lock there's a
1964 * small probability of a false positive where we
1965 * schedule napi but the FL is no longer starving.
1966 * No biggie.
1967 */
1968 if (fl_starving(fl)) {
1969 struct sge_eth_rxq *rxq;
1970
1971 rxq = container_of(fl, struct sge_eth_rxq, fl);
1972 if (napi_reschedule(&rxq->rspq.napi))
1973 fl->starving++;
1974 else
1975 set_bit(id, s->starving_fl);
1976 }
1977 }
1978 }
1979
1980 /*
1981 * Reschedule the next scan for starving Free Lists ...
1982 */
1983 mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
1984}
1985
1986/**
1987 * sge_tx_timer_cb - perform periodic maintenance of SGE Tx queues
1988 * @data: the adapter
1989 *
1990 * Runs periodically from a timer to perform maintenance of SGE TX queues.
1991 *
1992 * b) Reclaims completed Tx packets for the Ethernet queues. Normally
1993 * packets are cleaned up by new Tx packets, this timer cleans up packets
1994 * when no new packets are being submitted. This is essential for pktgen,
1995 * at least.
1996 */
1997static void sge_tx_timer_cb(unsigned long data)
1998{
1999 struct adapter *adapter = (struct adapter *)data;
2000 struct sge *s = &adapter->sge;
2001 unsigned int i, budget;
2002
2003 budget = MAX_TIMER_TX_RECLAIM;
2004 i = s->ethtxq_rover;
2005 do {
2006 struct sge_eth_txq *txq = &s->ethtxq[i];
2007
2008 if (reclaimable(&txq->q) && __netif_tx_trylock(txq->txq)) {
2009 int avail = reclaimable(&txq->q);
2010
2011 if (avail > budget)
2012 avail = budget;
2013
2014 free_tx_desc(adapter, &txq->q, avail, true);
2015 txq->q.in_use -= avail;
2016 __netif_tx_unlock(txq->txq);
2017
2018 budget -= avail;
2019 if (!budget)
2020 break;
2021 }
2022
2023 i++;
2024 if (i >= s->ethqsets)
2025 i = 0;
2026 } while (i != s->ethtxq_rover);
2027 s->ethtxq_rover = i;
2028
2029 /*
2030 * If we found too many reclaimable packets schedule a timer in the
2031 * near future to continue where we left off. Otherwise the next timer
2032 * will be at its normal interval.
2033 */
2034 mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2));
2035}
2036
2037/**
2038 * t4vf_sge_alloc_rxq - allocate an SGE RX Queue
2039 * @adapter: the adapter
2040 * @rspq: pointer to to the new rxq's Response Queue to be filled in
2041 * @iqasynch: if 0, a normal rspq; if 1, an asynchronous event queue
2042 * @dev: the network device associated with the new rspq
2043 * @intr_dest: MSI-X vector index (overriden in MSI mode)
2044 * @fl: pointer to the new rxq's Free List to be filled in
2045 * @hnd: the interrupt handler to invoke for the rspq
2046 */
2047int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
2048 bool iqasynch, struct net_device *dev,
2049 int intr_dest,
2050 struct sge_fl *fl, rspq_handler_t hnd)
2051{
2052 struct port_info *pi = netdev_priv(dev);
2053 struct fw_iq_cmd cmd, rpl;
2054 int ret, iqandst, flsz = 0;
2055
2056 /*
2057 * If we're using MSI interrupts and we're not initializing the
2058 * Forwarded Interrupt Queue itself, then set up this queue for
2059 * indirect interrupts to the Forwarded Interrupt Queue. Obviously
2060 * the Forwarded Interrupt Queue must be set up before any other
2061 * ingress queue ...
2062 */
2063 if ((adapter->flags & USING_MSI) && rspq != &adapter->sge.intrq) {
2064 iqandst = SGE_INTRDST_IQ;
2065 intr_dest = adapter->sge.intrq.abs_id;
2066 } else
2067 iqandst = SGE_INTRDST_PCI;
2068
2069 /*
2070 * Allocate the hardware ring for the Response Queue. The size needs
2071 * to be a multiple of 16 which includes the mandatory status entry
2072 * (regardless of whether the Status Page capabilities are enabled or
2073 * not).
2074 */
2075 rspq->size = roundup(rspq->size, 16);
2076 rspq->desc = alloc_ring(adapter->pdev_dev, rspq->size, rspq->iqe_len,
2077 0, &rspq->phys_addr, NULL, 0);
2078 if (!rspq->desc)
2079 return -ENOMEM;
2080
2081 /*
2082 * Fill in the Ingress Queue Command. Note: Ideally this code would
2083 * be in t4vf_hw.c but there are so many parameters and dependencies
2084 * on our Linux SGE state that we would end up having to pass tons of
2085 * parameters. We'll have to think about how this might be migrated
2086 * into OS-independent common code ...
2087 */
2088 memset(&cmd, 0, sizeof(cmd));
2089 cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_IQ_CMD) |
2090 FW_CMD_REQUEST |
2091 FW_CMD_WRITE |
2092 FW_CMD_EXEC);
2093 cmd.alloc_to_len16 = cpu_to_be32(FW_IQ_CMD_ALLOC |
2094 FW_IQ_CMD_IQSTART(1) |
2095 FW_LEN16(cmd));
2096 cmd.type_to_iqandstindex =
2097 cpu_to_be32(FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
2098 FW_IQ_CMD_IQASYNCH(iqasynch) |
2099 FW_IQ_CMD_VIID(pi->viid) |
2100 FW_IQ_CMD_IQANDST(iqandst) |
2101 FW_IQ_CMD_IQANUS(1) |
2102 FW_IQ_CMD_IQANUD(SGE_UPDATEDEL_INTR) |
2103 FW_IQ_CMD_IQANDSTINDEX(intr_dest));
2104 cmd.iqdroprss_to_iqesize =
2105 cpu_to_be16(FW_IQ_CMD_IQPCIECH(pi->port_id) |
2106 FW_IQ_CMD_IQGTSMODE |
2107 FW_IQ_CMD_IQINTCNTTHRESH(rspq->pktcnt_idx) |
2108 FW_IQ_CMD_IQESIZE(ilog2(rspq->iqe_len) - 4));
2109 cmd.iqsize = cpu_to_be16(rspq->size);
2110 cmd.iqaddr = cpu_to_be64(rspq->phys_addr);
2111
2112 if (fl) {
2113 /*
2114 * Allocate the ring for the hardware free list (with space
2115 * for its status page) along with the associated software
2116 * descriptor ring. The free list size needs to be a multiple
2117 * of the Egress Queue Unit.
2118 */
2119 fl->size = roundup(fl->size, FL_PER_EQ_UNIT);
2120 fl->desc = alloc_ring(adapter->pdev_dev, fl->size,
2121 sizeof(__be64), sizeof(struct rx_sw_desc),
2122 &fl->addr, &fl->sdesc, STAT_LEN);
2123 if (!fl->desc) {
2124 ret = -ENOMEM;
2125 goto err;
2126 }
2127
2128 /*
2129 * Calculate the size of the hardware free list ring plus
2130 * Status Page (which the SGE will place after the end of the
2131 * free list ring) in Egress Queue Units.
2132 */
2133 flsz = (fl->size / FL_PER_EQ_UNIT +
2134 STAT_LEN / EQ_UNIT);
2135
2136 /*
2137 * Fill in all the relevant firmware Ingress Queue Command
2138 * fields for the free list.
2139 */
2140 cmd.iqns_to_fl0congen =
2141 cpu_to_be32(
2142 FW_IQ_CMD_FL0HOSTFCMODE(SGE_HOSTFCMODE_NONE) |
2143 FW_IQ_CMD_FL0PACKEN |
2144 FW_IQ_CMD_FL0PADEN);
2145 cmd.fl0dcaen_to_fl0cidxfthresh =
2146 cpu_to_be16(
2147 FW_IQ_CMD_FL0FBMIN(SGE_FETCHBURSTMIN_64B) |
2148 FW_IQ_CMD_FL0FBMAX(SGE_FETCHBURSTMAX_512B));
2149 cmd.fl0size = cpu_to_be16(flsz);
2150 cmd.fl0addr = cpu_to_be64(fl->addr);
2151 }
2152
2153 /*
2154 * Issue the firmware Ingress Queue Command and extract the results if
2155 * it completes successfully.
2156 */
2157 ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl);
2158 if (ret)
2159 goto err;
2160
2161 netif_napi_add(dev, &rspq->napi, napi_rx_handler, 64);
2162 rspq->cur_desc = rspq->desc;
2163 rspq->cidx = 0;
2164 rspq->gen = 1;
2165 rspq->next_intr_params = rspq->intr_params;
2166 rspq->cntxt_id = be16_to_cpu(rpl.iqid);
2167 rspq->abs_id = be16_to_cpu(rpl.physiqid);
2168 rspq->size--; /* subtract status entry */
2169 rspq->adapter = adapter;
2170 rspq->netdev = dev;
2171 rspq->handler = hnd;
2172
2173 /* set offset to -1 to distinguish ingress queues without FL */
2174 rspq->offset = fl ? 0 : -1;
2175
2176 if (fl) {
2177 fl->cntxt_id = be16_to_cpu(rpl.fl0id);
2178 fl->avail = 0;
2179 fl->pend_cred = 0;
2180 fl->pidx = 0;
2181 fl->cidx = 0;
2182 fl->alloc_failed = 0;
2183 fl->large_alloc_failed = 0;
2184 fl->starving = 0;
2185 refill_fl(adapter, fl, fl_cap(fl), GFP_KERNEL);
2186 }
2187
2188 return 0;
2189
2190err:
2191 /*
2192 * An error occurred. Clean up our partial allocation state and
2193 * return the error.
2194 */
2195 if (rspq->desc) {
2196 dma_free_coherent(adapter->pdev_dev, rspq->size * rspq->iqe_len,
2197 rspq->desc, rspq->phys_addr);
2198 rspq->desc = NULL;
2199 }
2200 if (fl && fl->desc) {
2201 kfree(fl->sdesc);
2202 fl->sdesc = NULL;
2203 dma_free_coherent(adapter->pdev_dev, flsz * EQ_UNIT,
2204 fl->desc, fl->addr);
2205 fl->desc = NULL;
2206 }
2207 return ret;
2208}
2209
2210/**
2211 * t4vf_sge_alloc_eth_txq - allocate an SGE Ethernet TX Queue
2212 * @adapter: the adapter
2213 * @txq: pointer to the new txq to be filled in
2214 * @devq: the network TX queue associated with the new txq
2215 * @iqid: the relative ingress queue ID to which events relating to
2216 * the new txq should be directed
2217 */
2218int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq,
2219 struct net_device *dev, struct netdev_queue *devq,
2220 unsigned int iqid)
2221{
2222 int ret, nentries;
2223 struct fw_eq_eth_cmd cmd, rpl;
2224 struct port_info *pi = netdev_priv(dev);
2225
2226 /*
2227 * Calculate the size of the hardware TX Queue (including the Status
2228 * Page on the end of the TX Queue) in units of TX Descriptors.
2229 */
2230 nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc);
2231
2232 /*
2233 * Allocate the hardware ring for the TX ring (with space for its
2234 * status page) along with the associated software descriptor ring.
2235 */
2236 txq->q.desc = alloc_ring(adapter->pdev_dev, txq->q.size,
2237 sizeof(struct tx_desc),
2238 sizeof(struct tx_sw_desc),
2239 &txq->q.phys_addr, &txq->q.sdesc, STAT_LEN);
2240 if (!txq->q.desc)
2241 return -ENOMEM;
2242
2243 /*
2244 * Fill in the Egress Queue Command. Note: As with the direct use of
2245 * the firmware Ingress Queue COmmand above in our RXQ allocation
2246 * routine, ideally, this code would be in t4vf_hw.c. Again, we'll
2247 * have to see if there's some reasonable way to parameterize it
2248 * into the common code ...
2249 */
2250 memset(&cmd, 0, sizeof(cmd));
2251 cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_EQ_ETH_CMD) |
2252 FW_CMD_REQUEST |
2253 FW_CMD_WRITE |
2254 FW_CMD_EXEC);
2255 cmd.alloc_to_len16 = cpu_to_be32(FW_EQ_ETH_CMD_ALLOC |
2256 FW_EQ_ETH_CMD_EQSTART |
2257 FW_LEN16(cmd));
2258 cmd.viid_pkd = cpu_to_be32(FW_EQ_ETH_CMD_VIID(pi->viid));
2259 cmd.fetchszm_to_iqid =
2260 cpu_to_be32(FW_EQ_ETH_CMD_HOSTFCMODE(SGE_HOSTFCMODE_STPG) |
2261 FW_EQ_ETH_CMD_PCIECHN(pi->port_id) |
2262 FW_EQ_ETH_CMD_IQID(iqid));
2263 cmd.dcaen_to_eqsize =
2264 cpu_to_be32(FW_EQ_ETH_CMD_FBMIN(SGE_FETCHBURSTMIN_64B) |
2265 FW_EQ_ETH_CMD_FBMAX(SGE_FETCHBURSTMAX_512B) |
2266 FW_EQ_ETH_CMD_CIDXFTHRESH(SGE_CIDXFLUSHTHRESH_32) |
2267 FW_EQ_ETH_CMD_EQSIZE(nentries));
2268 cmd.eqaddr = cpu_to_be64(txq->q.phys_addr);
2269
2270 /*
2271 * Issue the firmware Egress Queue Command and extract the results if
2272 * it completes successfully.
2273 */
2274 ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl);
2275 if (ret) {
2276 /*
2277 * The girmware Ingress Queue Command failed for some reason.
2278 * Free up our partial allocation state and return the error.
2279 */
2280 kfree(txq->q.sdesc);
2281 txq->q.sdesc = NULL;
2282 dma_free_coherent(adapter->pdev_dev,
2283 nentries * sizeof(struct tx_desc),
2284 txq->q.desc, txq->q.phys_addr);
2285 txq->q.desc = NULL;
2286 return ret;
2287 }
2288
2289 txq->q.in_use = 0;
2290 txq->q.cidx = 0;
2291 txq->q.pidx = 0;
2292 txq->q.stat = (void *)&txq->q.desc[txq->q.size];
2293 txq->q.cntxt_id = FW_EQ_ETH_CMD_EQID_GET(be32_to_cpu(rpl.eqid_pkd));
2294 txq->q.abs_id =
2295 FW_EQ_ETH_CMD_PHYSEQID_GET(be32_to_cpu(rpl.physeqid_pkd));
2296 txq->txq = devq;
2297 txq->tso = 0;
2298 txq->tx_cso = 0;
2299 txq->vlan_ins = 0;
2300 txq->q.stops = 0;
2301 txq->q.restarts = 0;
2302 txq->mapping_err = 0;
2303 return 0;
2304}
2305
2306/*
2307 * Free the DMA map resources associated with a TX queue.
2308 */
2309static void free_txq(struct adapter *adapter, struct sge_txq *tq)
2310{
2311 dma_free_coherent(adapter->pdev_dev,
2312 tq->size * sizeof(*tq->desc) + STAT_LEN,
2313 tq->desc, tq->phys_addr);
2314 tq->cntxt_id = 0;
2315 tq->sdesc = NULL;
2316 tq->desc = NULL;
2317}
2318
2319/*
2320 * Free the resources associated with a response queue (possibly including a
2321 * free list).
2322 */
2323static void free_rspq_fl(struct adapter *adapter, struct sge_rspq *rspq,
2324 struct sge_fl *fl)
2325{
2326 unsigned int flid = fl ? fl->cntxt_id : 0xffff;
2327
2328 t4vf_iq_free(adapter, FW_IQ_TYPE_FL_INT_CAP,
2329 rspq->cntxt_id, flid, 0xffff);
2330 dma_free_coherent(adapter->pdev_dev, (rspq->size + 1) * rspq->iqe_len,
2331 rspq->desc, rspq->phys_addr);
2332 netif_napi_del(&rspq->napi);
2333 rspq->netdev = NULL;
2334 rspq->cntxt_id = 0;
2335 rspq->abs_id = 0;
2336 rspq->desc = NULL;
2337
2338 if (fl) {
2339 free_rx_bufs(adapter, fl, fl->avail);
2340 dma_free_coherent(adapter->pdev_dev,
2341 fl->size * sizeof(*fl->desc) + STAT_LEN,
2342 fl->desc, fl->addr);
2343 kfree(fl->sdesc);
2344 fl->sdesc = NULL;
2345 fl->cntxt_id = 0;
2346 fl->desc = NULL;
2347 }
2348}
2349
2350/**
2351 * t4vf_free_sge_resources - free SGE resources
2352 * @adapter: the adapter
2353 *
2354 * Frees resources used by the SGE queue sets.
2355 */
2356void t4vf_free_sge_resources(struct adapter *adapter)
2357{
2358 struct sge *s = &adapter->sge;
2359 struct sge_eth_rxq *rxq = s->ethrxq;
2360 struct sge_eth_txq *txq = s->ethtxq;
2361 struct sge_rspq *evtq = &s->fw_evtq;
2362 struct sge_rspq *intrq = &s->intrq;
2363 int qs;
2364
2365 for (qs = 0; qs < adapter->sge.ethqsets; qs++, rxq++, txq++) {
2366 if (rxq->rspq.desc)
2367 free_rspq_fl(adapter, &rxq->rspq, &rxq->fl);
2368 if (txq->q.desc) {
2369 t4vf_eth_eq_free(adapter, txq->q.cntxt_id);
2370 free_tx_desc(adapter, &txq->q, txq->q.in_use, true);
2371 kfree(txq->q.sdesc);
2372 free_txq(adapter, &txq->q);
2373 }
2374 }
2375 if (evtq->desc)
2376 free_rspq_fl(adapter, evtq, NULL);
2377 if (intrq->desc)
2378 free_rspq_fl(adapter, intrq, NULL);
2379}
2380
2381/**
2382 * t4vf_sge_start - enable SGE operation
2383 * @adapter: the adapter
2384 *
2385 * Start tasklets and timers associated with the DMA engine.
2386 */
2387void t4vf_sge_start(struct adapter *adapter)
2388{
2389 adapter->sge.ethtxq_rover = 0;
2390 mod_timer(&adapter->sge.rx_timer, jiffies + RX_QCHECK_PERIOD);
2391 mod_timer(&adapter->sge.tx_timer, jiffies + TX_QCHECK_PERIOD);
2392}
2393
2394/**
2395 * t4vf_sge_stop - disable SGE operation
2396 * @adapter: the adapter
2397 *
2398 * Stop tasklets and timers associated with the DMA engine. Note that
2399 * this is effective only if measures have been taken to disable any HW
2400 * events that may restart them.
2401 */
2402void t4vf_sge_stop(struct adapter *adapter)
2403{
2404 struct sge *s = &adapter->sge;
2405
2406 if (s->rx_timer.function)
2407 del_timer_sync(&s->rx_timer);
2408 if (s->tx_timer.function)
2409 del_timer_sync(&s->tx_timer);
2410}
2411
2412/**
2413 * t4vf_sge_init - initialize SGE
2414 * @adapter: the adapter
2415 *
2416 * Performs SGE initialization needed every time after a chip reset.
2417 * We do not initialize any of the queue sets here, instead the driver
2418 * top-level must request those individually. We also do not enable DMA
2419 * here, that should be done after the queues have been set up.
2420 */
2421int t4vf_sge_init(struct adapter *adapter)
2422{
2423 struct sge_params *sge_params = &adapter->params.sge;
2424 u32 fl0 = sge_params->sge_fl_buffer_size[0];
2425 u32 fl1 = sge_params->sge_fl_buffer_size[1];
2426 struct sge *s = &adapter->sge;
2427
2428 /*
2429 * Start by vetting the basic SGE parameters which have been set up by
2430 * the Physical Function Driver. Ideally we should be able to deal
2431 * with _any_ configuration. Practice is different ...
2432 */
2433 if (fl0 != PAGE_SIZE || (fl1 != 0 && fl1 <= fl0)) {
2434 dev_err(adapter->pdev_dev, "bad SGE FL buffer sizes [%d, %d]\n",
2435 fl0, fl1);
2436 return -EINVAL;
2437 }
2438 if ((sge_params->sge_control & RXPKTCPLMODE) == 0) {
2439 dev_err(adapter->pdev_dev, "bad SGE CPL MODE\n");
2440 return -EINVAL;
2441 }
2442
2443 /*
2444 * Now translate the adapter parameters into our internal forms.
2445 */
2446 if (fl1)
2447 FL_PG_ORDER = ilog2(fl1) - PAGE_SHIFT;
2448 STAT_LEN = ((sge_params->sge_control & EGRSTATUSPAGESIZE) ? 128 : 64);
2449 PKTSHIFT = PKTSHIFT_GET(sge_params->sge_control);
2450 FL_ALIGN = 1 << (INGPADBOUNDARY_GET(sge_params->sge_control) +
2451 SGE_INGPADBOUNDARY_SHIFT);
2452
2453 /*
2454 * Set up tasklet timers.
2455 */
2456 setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adapter);
2457 setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adapter);
2458
2459 /*
2460 * Initialize Forwarded Interrupt Queue lock.
2461 */
2462 spin_lock_init(&s->intrq_lock);
2463
2464 return 0;
2465}
diff --git a/drivers/net/cxgb4vf/t4vf_common.h b/drivers/net/cxgb4vf/t4vf_common.h
new file mode 100644
index 00000000000..a65c80aed1f
--- /dev/null
+++ b/drivers/net/cxgb4vf/t4vf_common.h
@@ -0,0 +1,274 @@
1/*
2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3 * driver for Linux.
4 *
5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#ifndef __T4VF_COMMON_H__
37#define __T4VF_COMMON_H__
38
39#include "../cxgb4/t4fw_api.h"
40
41/*
42 * The "len16" field of a Firmware Command Structure ...
43 */
44#define FW_LEN16(fw_struct) FW_CMD_LEN16(sizeof(fw_struct) / 16)
45
46/*
47 * Per-VF statistics.
48 */
49struct t4vf_port_stats {
50 /*
51 * TX statistics.
52 */
53 u64 tx_bcast_bytes; /* broadcast */
54 u64 tx_bcast_frames;
55 u64 tx_mcast_bytes; /* multicast */
56 u64 tx_mcast_frames;
57 u64 tx_ucast_bytes; /* unicast */
58 u64 tx_ucast_frames;
59 u64 tx_drop_frames; /* TX dropped frames */
60 u64 tx_offload_bytes; /* offload */
61 u64 tx_offload_frames;
62
63 /*
64 * RX statistics.
65 */
66 u64 rx_bcast_bytes; /* broadcast */
67 u64 rx_bcast_frames;
68 u64 rx_mcast_bytes; /* multicast */
69 u64 rx_mcast_frames;
70 u64 rx_ucast_bytes;
71 u64 rx_ucast_frames; /* unicast */
72
73 u64 rx_err_frames; /* RX error frames */
74};
75
76/*
77 * Per-"port" (Virtual Interface) link configuration ...
78 */
79struct link_config {
80 unsigned int supported; /* link capabilities */
81 unsigned int advertising; /* advertised capabilities */
82 unsigned short requested_speed; /* speed user has requested */
83 unsigned short speed; /* actual link speed */
84 unsigned char requested_fc; /* flow control user has requested */
85 unsigned char fc; /* actual link flow control */
86 unsigned char autoneg; /* autonegotiating? */
87 unsigned char link_ok; /* link up? */
88};
89
90enum {
91 PAUSE_RX = 1 << 0,
92 PAUSE_TX = 1 << 1,
93 PAUSE_AUTONEG = 1 << 2
94};
95
96/*
97 * General device parameters ...
98 */
99struct dev_params {
100 u32 fwrev; /* firmware version */
101 u32 tprev; /* TP Microcode Version */
102};
103
104/*
105 * Scatter Gather Engine parameters. These are almost all determined by the
106 * Physical Function Driver. We just need to grab them to see within which
107 * environment we're playing ...
108 */
109struct sge_params {
110 u32 sge_control; /* padding, boundaries, lengths, etc. */
111 u32 sge_host_page_size; /* RDMA page sizes */
112 u32 sge_queues_per_page; /* RDMA queues/page */
113 u32 sge_user_mode_limits; /* limits for BAR2 user mode accesses */
114 u32 sge_fl_buffer_size[16]; /* free list buffer sizes */
115 u32 sge_ingress_rx_threshold; /* RX counter interrupt threshold[4] */
116 u32 sge_timer_value_0_and_1; /* interrupt coalescing timer values */
117 u32 sge_timer_value_2_and_3;
118 u32 sge_timer_value_4_and_5;
119};
120
121/*
122 * Vital Product Data parameters.
123 */
124struct vpd_params {
125 u32 cclk; /* Core Clock (KHz) */
126};
127
128/*
129 * Global Receive Side Scaling (RSS) parameters in host-native format.
130 */
131struct rss_params {
132 unsigned int mode; /* RSS mode */
133 union {
134 struct {
135 unsigned int synmapen:1; /* SYN Map Enable */
136 unsigned int syn4tupenipv6:1; /* enable hashing 4-tuple IPv6 SYNs */
137 unsigned int syn2tupenipv6:1; /* enable hashing 2-tuple IPv6 SYNs */
138 unsigned int syn4tupenipv4:1; /* enable hashing 4-tuple IPv4 SYNs */
139 unsigned int syn2tupenipv4:1; /* enable hashing 2-tuple IPv4 SYNs */
140 unsigned int ofdmapen:1; /* Offload Map Enable */
141 unsigned int tnlmapen:1; /* Tunnel Map Enable */
142 unsigned int tnlalllookup:1; /* Tunnel All Lookup */
143 unsigned int hashtoeplitz:1; /* use Toeplitz hash */
144 } basicvirtual;
145 } u;
146};
147
148/*
149 * Virtual Interface RSS Configuration in host-native format.
150 */
151union rss_vi_config {
152 struct {
153 u16 defaultq; /* Ingress Queue ID for !tnlalllookup */
154 unsigned int ip6fourtupen:1; /* hash 4-tuple IPv6 ingress packets */
155 unsigned int ip6twotupen:1; /* hash 2-tuple IPv6 ingress packets */
156 unsigned int ip4fourtupen:1; /* hash 4-tuple IPv4 ingress packets */
157 unsigned int ip4twotupen:1; /* hash 2-tuple IPv4 ingress packets */
158 int udpen; /* hash 4-tuple UDP ingress packets */
159 } basicvirtual;
160};
161
162/*
163 * Maximum resources provisioned for a PCI VF.
164 */
165struct vf_resources {
166 unsigned int nvi; /* N virtual interfaces */
167 unsigned int neq; /* N egress Qs */
168 unsigned int nethctrl; /* N egress ETH or CTRL Qs */
169 unsigned int niqflint; /* N ingress Qs/w free list(s) & intr */
170 unsigned int niq; /* N ingress Qs */
171 unsigned int tc; /* PCI-E traffic class */
172 unsigned int pmask; /* port access rights mask */
173 unsigned int nexactf; /* N exact MPS filters */
174 unsigned int r_caps; /* read capabilities */
175 unsigned int wx_caps; /* write/execute capabilities */
176};
177
178/*
179 * Per-"adapter" (Virtual Function) parameters.
180 */
181struct adapter_params {
182 struct dev_params dev; /* general device parameters */
183 struct sge_params sge; /* Scatter Gather Engine */
184 struct vpd_params vpd; /* Vital Product Data */
185 struct rss_params rss; /* Receive Side Scaling */
186 struct vf_resources vfres; /* Virtual Function Resource limits */
187 u8 nports; /* # of Ethernet "ports" */
188};
189
190#include "adapter.h"
191
192#ifndef PCI_VENDOR_ID_CHELSIO
193# define PCI_VENDOR_ID_CHELSIO 0x1425
194#endif
195
196#define for_each_port(adapter, iter) \
197 for (iter = 0; iter < (adapter)->params.nports; iter++)
198
199static inline bool is_10g_port(const struct link_config *lc)
200{
201 return (lc->supported & SUPPORTED_10000baseT_Full) != 0;
202}
203
204static inline unsigned int core_ticks_per_usec(const struct adapter *adapter)
205{
206 return adapter->params.vpd.cclk / 1000;
207}
208
209static inline unsigned int us_to_core_ticks(const struct adapter *adapter,
210 unsigned int us)
211{
212 return (us * adapter->params.vpd.cclk) / 1000;
213}
214
215static inline unsigned int core_ticks_to_us(const struct adapter *adapter,
216 unsigned int ticks)
217{
218 return (ticks * 1000) / adapter->params.vpd.cclk;
219}
220
221int t4vf_wr_mbox_core(struct adapter *, const void *, int, void *, bool);
222
223static inline int t4vf_wr_mbox(struct adapter *adapter, const void *cmd,
224 int size, void *rpl)
225{
226 return t4vf_wr_mbox_core(adapter, cmd, size, rpl, true);
227}
228
229static inline int t4vf_wr_mbox_ns(struct adapter *adapter, const void *cmd,
230 int size, void *rpl)
231{
232 return t4vf_wr_mbox_core(adapter, cmd, size, rpl, false);
233}
234
235int __devinit t4vf_wait_dev_ready(struct adapter *);
236int __devinit t4vf_port_init(struct adapter *, int);
237
238int t4vf_fw_reset(struct adapter *);
239int t4vf_query_params(struct adapter *, unsigned int, const u32 *, u32 *);
240int t4vf_set_params(struct adapter *, unsigned int, const u32 *, const u32 *);
241
242int t4vf_get_sge_params(struct adapter *);
243int t4vf_get_vpd_params(struct adapter *);
244int t4vf_get_dev_params(struct adapter *);
245int t4vf_get_rss_glb_config(struct adapter *);
246int t4vf_get_vfres(struct adapter *);
247
248int t4vf_read_rss_vi_config(struct adapter *, unsigned int,
249 union rss_vi_config *);
250int t4vf_write_rss_vi_config(struct adapter *, unsigned int,
251 union rss_vi_config *);
252int t4vf_config_rss_range(struct adapter *, unsigned int, int, int,
253 const u16 *, int);
254
255int t4vf_alloc_vi(struct adapter *, int);
256int t4vf_free_vi(struct adapter *, int);
257int t4vf_enable_vi(struct adapter *, unsigned int, bool, bool);
258int t4vf_identify_port(struct adapter *, unsigned int, unsigned int);
259
260int t4vf_set_rxmode(struct adapter *, unsigned int, int, int, int, int, int,
261 bool);
262int t4vf_alloc_mac_filt(struct adapter *, unsigned int, bool, unsigned int,
263 const u8 **, u16 *, u64 *, bool);
264int t4vf_change_mac(struct adapter *, unsigned int, int, const u8 *, bool);
265int t4vf_set_addr_hash(struct adapter *, unsigned int, bool, u64, bool);
266int t4vf_get_port_stats(struct adapter *, int, struct t4vf_port_stats *);
267
268int t4vf_iq_free(struct adapter *, unsigned int, unsigned int, unsigned int,
269 unsigned int);
270int t4vf_eth_eq_free(struct adapter *, unsigned int);
271
272int t4vf_handle_fw_rpl(struct adapter *, const __be64 *);
273
274#endif /* __T4VF_COMMON_H__ */
diff --git a/drivers/net/cxgb4vf/t4vf_defs.h b/drivers/net/cxgb4vf/t4vf_defs.h
new file mode 100644
index 00000000000..c7b127d9376
--- /dev/null
+++ b/drivers/net/cxgb4vf/t4vf_defs.h
@@ -0,0 +1,121 @@
1/*
2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3 * driver for Linux.
4 *
5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#ifndef __T4VF_DEFS_H__
37#define __T4VF_DEFS_H__
38
39#include "../cxgb4/t4_regs.h"
40
41/*
42 * The VF Register Map.
43 *
44 * The Scatter Gather Engine (SGE), Multiport Support module (MPS), PIO Local
45 * bus module (PL) and CPU Interface Module (CIM) components are mapped via
46 * the Slice to Module Map Table (see below) in the Physical Function Register
47 * Map. The Mail Box Data (MBDATA) range is mapped via the PCI-E Mailbox Base
48 * and Offset registers in the PF Register Map. The MBDATA base address is
49 * quite constrained as it determines the Mailbox Data addresses for both PFs
50 * and VFs, and therefore must fit in both the VF and PF Register Maps without
51 * overlapping other registers.
52 */
53#define T4VF_SGE_BASE_ADDR 0x0000
54#define T4VF_MPS_BASE_ADDR 0x0100
55#define T4VF_PL_BASE_ADDR 0x0200
56#define T4VF_MBDATA_BASE_ADDR 0x0240
57#define T4VF_CIM_BASE_ADDR 0x0300
58
59#define T4VF_REGMAP_START 0x0000
60#define T4VF_REGMAP_SIZE 0x0400
61
62/*
63 * There's no hardware limitation which requires that the addresses of the
64 * Mailbox Data in the fixed CIM PF map and the programmable VF map must
65 * match. However, it's a useful convention ...
66 */
67#if T4VF_MBDATA_BASE_ADDR != CIM_PF_MAILBOX_DATA
68#error T4VF_MBDATA_BASE_ADDR must match CIM_PF_MAILBOX_DATA!
69#endif
70
71/*
72 * Virtual Function "Slice to Module Map Table" definitions.
73 *
74 * This table allows us to map subsets of the various module register sets
75 * into the T4VF Register Map. Each table entry identifies the index of the
76 * module whose registers are being mapped, the offset within the module's
77 * register set that the mapping should start at, the limit of the mapping,
78 * and the offset within the T4VF Register Map to which the module's registers
79 * are being mapped. All addresses and qualtities are in terms of 32-bit
80 * words. The "limit" value is also in terms of 32-bit words and is equal to
81 * the last address mapped in the T4VF Register Map 1 (i.e. it's a "<="
82 * relation rather than a "<").
83 */
84#define T4VF_MOD_MAP(module, index, first, last) \
85 T4VF_MOD_MAP_##module##_INDEX = (index), \
86 T4VF_MOD_MAP_##module##_FIRST = (first), \
87 T4VF_MOD_MAP_##module##_LAST = (last), \
88 T4VF_MOD_MAP_##module##_OFFSET = ((first)/4), \
89 T4VF_MOD_MAP_##module##_BASE = \
90 (T4VF_##module##_BASE_ADDR/4 + (first)/4), \
91 T4VF_MOD_MAP_##module##_LIMIT = \
92 (T4VF_##module##_BASE_ADDR/4 + (last)/4),
93
94#define SGE_VF_KDOORBELL 0x0
95#define SGE_VF_GTS 0x4
96#define MPS_VF_CTL 0x0
97#define MPS_VF_STAT_RX_VF_ERR_FRAMES_H 0xfc
98#define PL_VF_WHOAMI 0x0
99#define CIM_VF_EXT_MAILBOX_CTRL 0x0
100#define CIM_VF_EXT_MAILBOX_STATUS 0x4
101
102enum {
103 T4VF_MOD_MAP(SGE, 2, SGE_VF_KDOORBELL, SGE_VF_GTS)
104 T4VF_MOD_MAP(MPS, 0, MPS_VF_CTL, MPS_VF_STAT_RX_VF_ERR_FRAMES_H)
105 T4VF_MOD_MAP(PL, 3, PL_VF_WHOAMI, PL_VF_WHOAMI)
106 T4VF_MOD_MAP(CIM, 1, CIM_VF_EXT_MAILBOX_CTRL, CIM_VF_EXT_MAILBOX_STATUS)
107};
108
109/*
110 * There isn't a Slice to Module Map Table entry for the Mailbox Data
111 * registers, but it's convenient to use similar names as above. There are 8
112 * little-endian 64-bit Mailbox Data registers. Note that the "instances"
113 * value below is in terms of 32-bit words which matches the "word" addressing
114 * space we use above for the Slice to Module Map Space.
115 */
116#define NUM_CIM_VF_MAILBOX_DATA_INSTANCES 16
117
118#define T4VF_MBDATA_FIRST 0
119#define T4VF_MBDATA_LAST ((NUM_CIM_VF_MAILBOX_DATA_INSTANCES-1)*4)
120
121#endif /* __T4T4VF_DEFS_H__ */
diff --git a/drivers/net/cxgb4vf/t4vf_hw.c b/drivers/net/cxgb4vf/t4vf_hw.c
new file mode 100644
index 00000000000..fe3fd3dad6f
--- /dev/null
+++ b/drivers/net/cxgb4vf/t4vf_hw.c
@@ -0,0 +1,1387 @@
1/*
2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3 * driver for Linux.
4 *
5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/pci.h>
37
38#include "t4vf_common.h"
39#include "t4vf_defs.h"
40
41#include "../cxgb4/t4_regs.h"
42#include "../cxgb4/t4fw_api.h"
43
44/*
45 * Wait for the device to become ready (signified by our "who am I" register
46 * returning a value other than all 1's). Return an error if it doesn't
47 * become ready ...
48 */
49int __devinit t4vf_wait_dev_ready(struct adapter *adapter)
50{
51 const u32 whoami = T4VF_PL_BASE_ADDR + PL_VF_WHOAMI;
52 const u32 notready1 = 0xffffffff;
53 const u32 notready2 = 0xeeeeeeee;
54 u32 val;
55
56 val = t4_read_reg(adapter, whoami);
57 if (val != notready1 && val != notready2)
58 return 0;
59 msleep(500);
60 val = t4_read_reg(adapter, whoami);
61 if (val != notready1 && val != notready2)
62 return 0;
63 else
64 return -EIO;
65}
66
67/*
68 * Get the reply to a mailbox command and store it in @rpl in big-endian order
69 * (since the firmware data structures are specified in a big-endian layout).
70 */
71static void get_mbox_rpl(struct adapter *adapter, __be64 *rpl, int size,
72 u32 mbox_data)
73{
74 for ( ; size; size -= 8, mbox_data += 8)
75 *rpl++ = cpu_to_be64(t4_read_reg64(adapter, mbox_data));
76}
77
78/*
79 * Dump contents of mailbox with a leading tag.
80 */
81static void dump_mbox(struct adapter *adapter, const char *tag, u32 mbox_data)
82{
83 dev_err(adapter->pdev_dev,
84 "mbox %s: %llx %llx %llx %llx %llx %llx %llx %llx\n", tag,
85 (unsigned long long)t4_read_reg64(adapter, mbox_data + 0),
86 (unsigned long long)t4_read_reg64(adapter, mbox_data + 8),
87 (unsigned long long)t4_read_reg64(adapter, mbox_data + 16),
88 (unsigned long long)t4_read_reg64(adapter, mbox_data + 24),
89 (unsigned long long)t4_read_reg64(adapter, mbox_data + 32),
90 (unsigned long long)t4_read_reg64(adapter, mbox_data + 40),
91 (unsigned long long)t4_read_reg64(adapter, mbox_data + 48),
92 (unsigned long long)t4_read_reg64(adapter, mbox_data + 56));
93}
94
95/**
96 * t4vf_wr_mbox_core - send a command to FW through the mailbox
97 * @adapter: the adapter
98 * @cmd: the command to write
99 * @size: command length in bytes
100 * @rpl: where to optionally store the reply
101 * @sleep_ok: if true we may sleep while awaiting command completion
102 *
103 * Sends the given command to FW through the mailbox and waits for the
104 * FW to execute the command. If @rpl is not %NULL it is used to store
105 * the FW's reply to the command. The command and its optional reply
106 * are of the same length. FW can take up to 500 ms to respond.
107 * @sleep_ok determines whether we may sleep while awaiting the response.
108 * If sleeping is allowed we use progressive backoff otherwise we spin.
109 *
110 * The return value is 0 on success or a negative errno on failure. A
111 * failure can happen either because we are not able to execute the
112 * command or FW executes it but signals an error. In the latter case
113 * the return value is the error code indicated by FW (negated).
114 */
115int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
116 void *rpl, bool sleep_ok)
117{
118 static const int delay[] = {
119 1, 1, 3, 5, 10, 10, 20, 50, 100
120 };
121
122 u32 v;
123 int i, ms, delay_idx;
124 const __be64 *p;
125 u32 mbox_data = T4VF_MBDATA_BASE_ADDR;
126 u32 mbox_ctl = T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL;
127
128 /*
129 * Commands must be multiples of 16 bytes in length and may not be
130 * larger than the size of the Mailbox Data register array.
131 */
132 if ((size % 16) != 0 ||
133 size > NUM_CIM_VF_MAILBOX_DATA_INSTANCES * 4)
134 return -EINVAL;
135
136 /*
137 * Loop trying to get ownership of the mailbox. Return an error
138 * if we can't gain ownership.
139 */
140 v = MBOWNER_GET(t4_read_reg(adapter, mbox_ctl));
141 for (i = 0; v == MBOX_OWNER_NONE && i < 3; i++)
142 v = MBOWNER_GET(t4_read_reg(adapter, mbox_ctl));
143 if (v != MBOX_OWNER_DRV)
144 return v == MBOX_OWNER_FW ? -EBUSY : -ETIMEDOUT;
145
146 /*
147 * Write the command array into the Mailbox Data register array and
148 * transfer ownership of the mailbox to the firmware.
149 *
150 * For the VFs, the Mailbox Data "registers" are actually backed by
151 * T4's "MA" interface rather than PL Registers (as is the case for
152 * the PFs). Because these are in different coherency domains, the
153 * write to the VF's PL-register-backed Mailbox Control can race in
154 * front of the writes to the MA-backed VF Mailbox Data "registers".
155 * So we need to do a read-back on at least one byte of the VF Mailbox
156 * Data registers before doing the write to the VF Mailbox Control
157 * register.
158 */
159 for (i = 0, p = cmd; i < size; i += 8)
160 t4_write_reg64(adapter, mbox_data + i, be64_to_cpu(*p++));
161 t4_read_reg(adapter, mbox_data); /* flush write */
162
163 t4_write_reg(adapter, mbox_ctl,
164 MBMSGVALID | MBOWNER(MBOX_OWNER_FW));
165 t4_read_reg(adapter, mbox_ctl); /* flush write */
166
167 /*
168 * Spin waiting for firmware to acknowledge processing our command.
169 */
170 delay_idx = 0;
171 ms = delay[0];
172
173 for (i = 0; i < FW_CMD_MAX_TIMEOUT; i += ms) {
174 if (sleep_ok) {
175 ms = delay[delay_idx];
176 if (delay_idx < ARRAY_SIZE(delay) - 1)
177 delay_idx++;
178 msleep(ms);
179 } else
180 mdelay(ms);
181
182 /*
183 * If we're the owner, see if this is the reply we wanted.
184 */
185 v = t4_read_reg(adapter, mbox_ctl);
186 if (MBOWNER_GET(v) == MBOX_OWNER_DRV) {
187 /*
188 * If the Message Valid bit isn't on, revoke ownership
189 * of the mailbox and continue waiting for our reply.
190 */
191 if ((v & MBMSGVALID) == 0) {
192 t4_write_reg(adapter, mbox_ctl,
193 MBOWNER(MBOX_OWNER_NONE));
194 continue;
195 }
196
197 /*
198 * We now have our reply. Extract the command return
199 * value, copy the reply back to our caller's buffer
200 * (if specified) and revoke ownership of the mailbox.
201 * We return the (negated) firmware command return
202 * code (this depends on FW_SUCCESS == 0).
203 */
204
205 /* return value in low-order little-endian word */
206 v = t4_read_reg(adapter, mbox_data);
207 if (FW_CMD_RETVAL_GET(v))
208 dump_mbox(adapter, "FW Error", mbox_data);
209
210 if (rpl) {
211 /* request bit in high-order BE word */
212 WARN_ON((be32_to_cpu(*(const u32 *)cmd)
213 & FW_CMD_REQUEST) == 0);
214 get_mbox_rpl(adapter, rpl, size, mbox_data);
215 WARN_ON((be32_to_cpu(*(u32 *)rpl)
216 & FW_CMD_REQUEST) != 0);
217 }
218 t4_write_reg(adapter, mbox_ctl,
219 MBOWNER(MBOX_OWNER_NONE));
220 return -FW_CMD_RETVAL_GET(v);
221 }
222 }
223
224 /*
225 * We timed out. Return the error ...
226 */
227 dump_mbox(adapter, "FW Timeout", mbox_data);
228 return -ETIMEDOUT;
229}
230
231/**
232 * hash_mac_addr - return the hash value of a MAC address
233 * @addr: the 48-bit Ethernet MAC address
234 *
235 * Hashes a MAC address according to the hash function used by hardware
236 * inexact (hash) address matching.
237 */
238static int hash_mac_addr(const u8 *addr)
239{
240 u32 a = ((u32)addr[0] << 16) | ((u32)addr[1] << 8) | addr[2];
241 u32 b = ((u32)addr[3] << 16) | ((u32)addr[4] << 8) | addr[5];
242 a ^= b;
243 a ^= (a >> 12);
244 a ^= (a >> 6);
245 return a & 0x3f;
246}
247
248/**
249 * init_link_config - initialize a link's SW state
250 * @lc: structure holding the link state
251 * @caps: link capabilities
252 *
253 * Initializes the SW state maintained for each link, including the link's
254 * capabilities and default speed/flow-control/autonegotiation settings.
255 */
256static void __devinit init_link_config(struct link_config *lc,
257 unsigned int caps)
258{
259 lc->supported = caps;
260 lc->requested_speed = 0;
261 lc->speed = 0;
262 lc->requested_fc = lc->fc = PAUSE_RX | PAUSE_TX;
263 if (lc->supported & SUPPORTED_Autoneg) {
264 lc->advertising = lc->supported;
265 lc->autoneg = AUTONEG_ENABLE;
266 lc->requested_fc |= PAUSE_AUTONEG;
267 } else {
268 lc->advertising = 0;
269 lc->autoneg = AUTONEG_DISABLE;
270 }
271}
272
273/**
274 * t4vf_port_init - initialize port hardware/software state
275 * @adapter: the adapter
276 * @pidx: the adapter port index
277 */
278int __devinit t4vf_port_init(struct adapter *adapter, int pidx)
279{
280 struct port_info *pi = adap2pinfo(adapter, pidx);
281 struct fw_vi_cmd vi_cmd, vi_rpl;
282 struct fw_port_cmd port_cmd, port_rpl;
283 int v;
284 u32 word;
285
286 /*
287 * Execute a VI Read command to get our Virtual Interface information
288 * like MAC address, etc.
289 */
290 memset(&vi_cmd, 0, sizeof(vi_cmd));
291 vi_cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_VI_CMD) |
292 FW_CMD_REQUEST |
293 FW_CMD_READ);
294 vi_cmd.alloc_to_len16 = cpu_to_be32(FW_LEN16(vi_cmd));
295 vi_cmd.type_viid = cpu_to_be16(FW_VI_CMD_VIID(pi->viid));
296 v = t4vf_wr_mbox(adapter, &vi_cmd, sizeof(vi_cmd), &vi_rpl);
297 if (v)
298 return v;
299
300 BUG_ON(pi->port_id != FW_VI_CMD_PORTID_GET(vi_rpl.portid_pkd));
301 pi->rss_size = FW_VI_CMD_RSSSIZE_GET(be16_to_cpu(vi_rpl.rsssize_pkd));
302 t4_os_set_hw_addr(adapter, pidx, vi_rpl.mac);
303
304 /*
305 * If we don't have read access to our port information, we're done
306 * now. Otherwise, execute a PORT Read command to get it ...
307 */
308 if (!(adapter->params.vfres.r_caps & FW_CMD_CAP_PORT))
309 return 0;
310
311 memset(&port_cmd, 0, sizeof(port_cmd));
312 port_cmd.op_to_portid = cpu_to_be32(FW_CMD_OP(FW_PORT_CMD) |
313 FW_CMD_REQUEST |
314 FW_CMD_READ |
315 FW_PORT_CMD_PORTID(pi->port_id));
316 port_cmd.action_to_len16 =
317 cpu_to_be32(FW_PORT_CMD_ACTION(FW_PORT_ACTION_GET_PORT_INFO) |
318 FW_LEN16(port_cmd));
319 v = t4vf_wr_mbox(adapter, &port_cmd, sizeof(port_cmd), &port_rpl);
320 if (v)
321 return v;
322
323 v = 0;
324 word = be16_to_cpu(port_rpl.u.info.pcap);
325 if (word & FW_PORT_CAP_SPEED_100M)
326 v |= SUPPORTED_100baseT_Full;
327 if (word & FW_PORT_CAP_SPEED_1G)
328 v |= SUPPORTED_1000baseT_Full;
329 if (word & FW_PORT_CAP_SPEED_10G)
330 v |= SUPPORTED_10000baseT_Full;
331 if (word & FW_PORT_CAP_ANEG)
332 v |= SUPPORTED_Autoneg;
333 init_link_config(&pi->link_cfg, v);
334
335 return 0;
336}
337
338/**
339 * t4vf_fw_reset - issue a reset to FW
340 * @adapter: the adapter
341 *
342 * Issues a reset command to FW. For a Physical Function this would
343 * result in the Firmware reseting all of its state. For a Virtual
344 * Function this just resets the state associated with the VF.
345 */
346int t4vf_fw_reset(struct adapter *adapter)
347{
348 struct fw_reset_cmd cmd;
349
350 memset(&cmd, 0, sizeof(cmd));
351 cmd.op_to_write = cpu_to_be32(FW_CMD_OP(FW_RESET_CMD) |
352 FW_CMD_WRITE);
353 cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd));
354 return t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), NULL);
355}
356
357/**
358 * t4vf_query_params - query FW or device parameters
359 * @adapter: the adapter
360 * @nparams: the number of parameters
361 * @params: the parameter names
362 * @vals: the parameter values
363 *
364 * Reads the values of firmware or device parameters. Up to 7 parameters
365 * can be queried at once.
366 */
367int t4vf_query_params(struct adapter *adapter, unsigned int nparams,
368 const u32 *params, u32 *vals)
369{
370 int i, ret;
371 struct fw_params_cmd cmd, rpl;
372 struct fw_params_param *p;
373 size_t len16;
374
375 if (nparams > 7)
376 return -EINVAL;
377
378 memset(&cmd, 0, sizeof(cmd));
379 cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_PARAMS_CMD) |
380 FW_CMD_REQUEST |
381 FW_CMD_READ);
382 len16 = DIV_ROUND_UP(offsetof(struct fw_params_cmd,
383 param[nparams].mnem), 16);
384 cmd.retval_len16 = cpu_to_be32(FW_CMD_LEN16(len16));
385 for (i = 0, p = &cmd.param[0]; i < nparams; i++, p++)
386 p->mnem = htonl(*params++);
387
388 ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl);
389 if (ret == 0)
390 for (i = 0, p = &rpl.param[0]; i < nparams; i++, p++)
391 *vals++ = be32_to_cpu(p->val);
392 return ret;
393}
394
395/**
396 * t4vf_set_params - sets FW or device parameters
397 * @adapter: the adapter
398 * @nparams: the number of parameters
399 * @params: the parameter names
400 * @vals: the parameter values
401 *
402 * Sets the values of firmware or device parameters. Up to 7 parameters
403 * can be specified at once.
404 */
405int t4vf_set_params(struct adapter *adapter, unsigned int nparams,
406 const u32 *params, const u32 *vals)
407{
408 int i;
409 struct fw_params_cmd cmd;
410 struct fw_params_param *p;
411 size_t len16;
412
413 if (nparams > 7)
414 return -EINVAL;
415
416 memset(&cmd, 0, sizeof(cmd));
417 cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_PARAMS_CMD) |
418 FW_CMD_REQUEST |
419 FW_CMD_WRITE);
420 len16 = DIV_ROUND_UP(offsetof(struct fw_params_cmd,
421 param[nparams]), 16);
422 cmd.retval_len16 = cpu_to_be32(FW_CMD_LEN16(len16));
423 for (i = 0, p = &cmd.param[0]; i < nparams; i++, p++) {
424 p->mnem = cpu_to_be32(*params++);
425 p->val = cpu_to_be32(*vals++);
426 }
427
428 return t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), NULL);
429}
430
431/**
432 * t4vf_get_sge_params - retrieve adapter Scatter gather Engine parameters
433 * @adapter: the adapter
434 *
435 * Retrieves various core SGE parameters in the form of hardware SGE
436 * register values. The caller is responsible for decoding these as
437 * needed. The SGE parameters are stored in @adapter->params.sge.
438 */
439int t4vf_get_sge_params(struct adapter *adapter)
440{
441 struct sge_params *sge_params = &adapter->params.sge;
442 u32 params[7], vals[7];
443 int v;
444
445 params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) |
446 FW_PARAMS_PARAM_XYZ(SGE_CONTROL));
447 params[1] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) |
448 FW_PARAMS_PARAM_XYZ(SGE_HOST_PAGE_SIZE));
449 params[2] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) |
450 FW_PARAMS_PARAM_XYZ(SGE_FL_BUFFER_SIZE0));
451 params[3] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) |
452 FW_PARAMS_PARAM_XYZ(SGE_FL_BUFFER_SIZE1));
453 params[4] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) |
454 FW_PARAMS_PARAM_XYZ(SGE_TIMER_VALUE_0_AND_1));
455 params[5] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) |
456 FW_PARAMS_PARAM_XYZ(SGE_TIMER_VALUE_2_AND_3));
457 params[6] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) |
458 FW_PARAMS_PARAM_XYZ(SGE_TIMER_VALUE_4_AND_5));
459 v = t4vf_query_params(adapter, 7, params, vals);
460 if (v)
461 return v;
462 sge_params->sge_control = vals[0];
463 sge_params->sge_host_page_size = vals[1];
464 sge_params->sge_fl_buffer_size[0] = vals[2];
465 sge_params->sge_fl_buffer_size[1] = vals[3];
466 sge_params->sge_timer_value_0_and_1 = vals[4];
467 sge_params->sge_timer_value_2_and_3 = vals[5];
468 sge_params->sge_timer_value_4_and_5 = vals[6];
469
470 params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) |
471 FW_PARAMS_PARAM_XYZ(SGE_INGRESS_RX_THRESHOLD));
472 v = t4vf_query_params(adapter, 1, params, vals);
473 if (v)
474 return v;
475 sge_params->sge_ingress_rx_threshold = vals[0];
476
477 return 0;
478}
479
480/**
481 * t4vf_get_vpd_params - retrieve device VPD paremeters
482 * @adapter: the adapter
483 *
484 * Retrives various device Vital Product Data parameters. The parameters
485 * are stored in @adapter->params.vpd.
486 */
487int t4vf_get_vpd_params(struct adapter *adapter)
488{
489 struct vpd_params *vpd_params = &adapter->params.vpd;
490 u32 params[7], vals[7];
491 int v;
492
493 params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
494 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_CCLK));
495 v = t4vf_query_params(adapter, 1, params, vals);
496 if (v)
497 return v;
498 vpd_params->cclk = vals[0];
499
500 return 0;
501}
502
503/**
504 * t4vf_get_dev_params - retrieve device paremeters
505 * @adapter: the adapter
506 *
507 * Retrives various device parameters. The parameters are stored in
508 * @adapter->params.dev.
509 */
510int t4vf_get_dev_params(struct adapter *adapter)
511{
512 struct dev_params *dev_params = &adapter->params.dev;
513 u32 params[7], vals[7];
514 int v;
515
516 params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
517 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_FWREV));
518 params[1] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
519 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_TPREV));
520 v = t4vf_query_params(adapter, 2, params, vals);
521 if (v)
522 return v;
523 dev_params->fwrev = vals[0];
524 dev_params->tprev = vals[1];
525
526 return 0;
527}
528
529/**
530 * t4vf_get_rss_glb_config - retrieve adapter RSS Global Configuration
531 * @adapter: the adapter
532 *
533 * Retrieves global RSS mode and parameters with which we have to live
534 * and stores them in the @adapter's RSS parameters.
535 */
536int t4vf_get_rss_glb_config(struct adapter *adapter)
537{
538 struct rss_params *rss = &adapter->params.rss;
539 struct fw_rss_glb_config_cmd cmd, rpl;
540 int v;
541
542 /*
543 * Execute an RSS Global Configuration read command to retrieve
544 * our RSS configuration.
545 */
546 memset(&cmd, 0, sizeof(cmd));
547 cmd.op_to_write = cpu_to_be32(FW_CMD_OP(FW_RSS_GLB_CONFIG_CMD) |
548 FW_CMD_REQUEST |
549 FW_CMD_READ);
550 cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd));
551 v = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl);
552 if (v)
553 return v;
554
555 /*
556 * Transate the big-endian RSS Global Configuration into our
557 * cpu-endian format based on the RSS mode. We also do first level
558 * filtering at this point to weed out modes which don't support
559 * VF Drivers ...
560 */
561 rss->mode = FW_RSS_GLB_CONFIG_CMD_MODE_GET(
562 be32_to_cpu(rpl.u.manual.mode_pkd));
563 switch (rss->mode) {
564 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL: {
565 u32 word = be32_to_cpu(
566 rpl.u.basicvirtual.synmapen_to_hashtoeplitz);
567
568 rss->u.basicvirtual.synmapen =
569 ((word & FW_RSS_GLB_CONFIG_CMD_SYNMAPEN) != 0);
570 rss->u.basicvirtual.syn4tupenipv6 =
571 ((word & FW_RSS_GLB_CONFIG_CMD_SYN4TUPENIPV6) != 0);
572 rss->u.basicvirtual.syn2tupenipv6 =
573 ((word & FW_RSS_GLB_CONFIG_CMD_SYN2TUPENIPV6) != 0);
574 rss->u.basicvirtual.syn4tupenipv4 =
575 ((word & FW_RSS_GLB_CONFIG_CMD_SYN4TUPENIPV4) != 0);
576 rss->u.basicvirtual.syn2tupenipv4 =
577 ((word & FW_RSS_GLB_CONFIG_CMD_SYN2TUPENIPV4) != 0);
578
579 rss->u.basicvirtual.ofdmapen =
580 ((word & FW_RSS_GLB_CONFIG_CMD_OFDMAPEN) != 0);
581
582 rss->u.basicvirtual.tnlmapen =
583 ((word & FW_RSS_GLB_CONFIG_CMD_TNLMAPEN) != 0);
584 rss->u.basicvirtual.tnlalllookup =
585 ((word & FW_RSS_GLB_CONFIG_CMD_TNLALLLKP) != 0);
586
587 rss->u.basicvirtual.hashtoeplitz =
588 ((word & FW_RSS_GLB_CONFIG_CMD_HASHTOEPLITZ) != 0);
589
590 /* we need at least Tunnel Map Enable to be set */
591 if (!rss->u.basicvirtual.tnlmapen)
592 return -EINVAL;
593 break;
594 }
595
596 default:
597 /* all unknown/unsupported RSS modes result in an error */
598 return -EINVAL;
599 }
600
601 return 0;
602}
603
604/**
605 * t4vf_get_vfres - retrieve VF resource limits
606 * @adapter: the adapter
607 *
608 * Retrieves configured resource limits and capabilities for a virtual
609 * function. The results are stored in @adapter->vfres.
610 */
611int t4vf_get_vfres(struct adapter *adapter)
612{
613 struct vf_resources *vfres = &adapter->params.vfres;
614 struct fw_pfvf_cmd cmd, rpl;
615 int v;
616 u32 word;
617
618 /*
619 * Execute PFVF Read command to get VF resource limits; bail out early
620 * with error on command failure.
621 */
622 memset(&cmd, 0, sizeof(cmd));
623 cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_PFVF_CMD) |
624 FW_CMD_REQUEST |
625 FW_CMD_READ);
626 cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd));
627 v = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl);
628 if (v)
629 return v;
630
631 /*
632 * Extract VF resource limits and return success.
633 */
634 word = be32_to_cpu(rpl.niqflint_niq);
635 vfres->niqflint = FW_PFVF_CMD_NIQFLINT_GET(word);
636 vfres->niq = FW_PFVF_CMD_NIQ_GET(word);
637
638 word = be32_to_cpu(rpl.type_to_neq);
639 vfres->neq = FW_PFVF_CMD_NEQ_GET(word);
640 vfres->pmask = FW_PFVF_CMD_PMASK_GET(word);
641
642 word = be32_to_cpu(rpl.tc_to_nexactf);
643 vfres->tc = FW_PFVF_CMD_TC_GET(word);
644 vfres->nvi = FW_PFVF_CMD_NVI_GET(word);
645 vfres->nexactf = FW_PFVF_CMD_NEXACTF_GET(word);
646
647 word = be32_to_cpu(rpl.r_caps_to_nethctrl);
648 vfres->r_caps = FW_PFVF_CMD_R_CAPS_GET(word);
649 vfres->wx_caps = FW_PFVF_CMD_WX_CAPS_GET(word);
650 vfres->nethctrl = FW_PFVF_CMD_NETHCTRL_GET(word);
651
652 return 0;
653}
654
655/**
656 * t4vf_read_rss_vi_config - read a VI's RSS configuration
657 * @adapter: the adapter
658 * @viid: Virtual Interface ID
659 * @config: pointer to host-native VI RSS Configuration buffer
660 *
661 * Reads the Virtual Interface's RSS configuration information and
662 * translates it into CPU-native format.
663 */
664int t4vf_read_rss_vi_config(struct adapter *adapter, unsigned int viid,
665 union rss_vi_config *config)
666{
667 struct fw_rss_vi_config_cmd cmd, rpl;
668 int v;
669
670 memset(&cmd, 0, sizeof(cmd));
671 cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_RSS_VI_CONFIG_CMD) |
672 FW_CMD_REQUEST |
673 FW_CMD_READ |
674 FW_RSS_VI_CONFIG_CMD_VIID(viid));
675 cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd));
676 v = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl);
677 if (v)
678 return v;
679
680 switch (adapter->params.rss.mode) {
681 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL: {
682 u32 word = be32_to_cpu(rpl.u.basicvirtual.defaultq_to_udpen);
683
684 config->basicvirtual.ip6fourtupen =
685 ((word & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) != 0);
686 config->basicvirtual.ip6twotupen =
687 ((word & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN) != 0);
688 config->basicvirtual.ip4fourtupen =
689 ((word & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) != 0);
690 config->basicvirtual.ip4twotupen =
691 ((word & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN) != 0);
692 config->basicvirtual.udpen =
693 ((word & FW_RSS_VI_CONFIG_CMD_UDPEN) != 0);
694 config->basicvirtual.defaultq =
695 FW_RSS_VI_CONFIG_CMD_DEFAULTQ_GET(word);
696 break;
697 }
698
699 default:
700 return -EINVAL;
701 }
702
703 return 0;
704}
705
706/**
707 * t4vf_write_rss_vi_config - write a VI's RSS configuration
708 * @adapter: the adapter
709 * @viid: Virtual Interface ID
710 * @config: pointer to host-native VI RSS Configuration buffer
711 *
712 * Write the Virtual Interface's RSS configuration information
713 * (translating it into firmware-native format before writing).
714 */
715int t4vf_write_rss_vi_config(struct adapter *adapter, unsigned int viid,
716 union rss_vi_config *config)
717{
718 struct fw_rss_vi_config_cmd cmd, rpl;
719
720 memset(&cmd, 0, sizeof(cmd));
721 cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_RSS_VI_CONFIG_CMD) |
722 FW_CMD_REQUEST |
723 FW_CMD_WRITE |
724 FW_RSS_VI_CONFIG_CMD_VIID(viid));
725 cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd));
726 switch (adapter->params.rss.mode) {
727 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL: {
728 u32 word = 0;
729
730 if (config->basicvirtual.ip6fourtupen)
731 word |= FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
732 if (config->basicvirtual.ip6twotupen)
733 word |= FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
734 if (config->basicvirtual.ip4fourtupen)
735 word |= FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
736 if (config->basicvirtual.ip4twotupen)
737 word |= FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
738 if (config->basicvirtual.udpen)
739 word |= FW_RSS_VI_CONFIG_CMD_UDPEN;
740 word |= FW_RSS_VI_CONFIG_CMD_DEFAULTQ(
741 config->basicvirtual.defaultq);
742 cmd.u.basicvirtual.defaultq_to_udpen = cpu_to_be32(word);
743 break;
744 }
745
746 default:
747 return -EINVAL;
748 }
749
750 return t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl);
751}
752
753/**
754 * t4vf_config_rss_range - configure a portion of the RSS mapping table
755 * @adapter: the adapter
756 * @viid: Virtual Interface of RSS Table Slice
757 * @start: starting entry in the table to write
758 * @n: how many table entries to write
759 * @rspq: values for the "Response Queue" (Ingress Queue) lookup table
760 * @nrspq: number of values in @rspq
761 *
762 * Programs the selected part of the VI's RSS mapping table with the
763 * provided values. If @nrspq < @n the supplied values are used repeatedly
764 * until the full table range is populated.
765 *
766 * The caller must ensure the values in @rspq are in the range 0..1023.
767 */
768int t4vf_config_rss_range(struct adapter *adapter, unsigned int viid,
769 int start, int n, const u16 *rspq, int nrspq)
770{
771 const u16 *rsp = rspq;
772 const u16 *rsp_end = rspq+nrspq;
773 struct fw_rss_ind_tbl_cmd cmd;
774
775 /*
776 * Initialize firmware command template to write the RSS table.
777 */
778 memset(&cmd, 0, sizeof(cmd));
779 cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_RSS_IND_TBL_CMD) |
780 FW_CMD_REQUEST |
781 FW_CMD_WRITE |
782 FW_RSS_IND_TBL_CMD_VIID(viid));
783 cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd));
784
785 /*
786 * Each firmware RSS command can accommodate up to 32 RSS Ingress
787 * Queue Identifiers. These Ingress Queue IDs are packed three to
788 * a 32-bit word as 10-bit values with the upper remaining 2 bits
789 * reserved.
790 */
791 while (n > 0) {
792 __be32 *qp = &cmd.iq0_to_iq2;
793 int nq = min(n, 32);
794 int ret;
795
796 /*
797 * Set up the firmware RSS command header to send the next
798 * "nq" Ingress Queue IDs to the firmware.
799 */
800 cmd.niqid = cpu_to_be16(nq);
801 cmd.startidx = cpu_to_be16(start);
802
803 /*
804 * "nq" more done for the start of the next loop.
805 */
806 start += nq;
807 n -= nq;
808
809 /*
810 * While there are still Ingress Queue IDs to stuff into the
811 * current firmware RSS command, retrieve them from the
812 * Ingress Queue ID array and insert them into the command.
813 */
814 while (nq > 0) {
815 /*
816 * Grab up to the next 3 Ingress Queue IDs (wrapping
817 * around the Ingress Queue ID array if necessary) and
818 * insert them into the firmware RSS command at the
819 * current 3-tuple position within the commad.
820 */
821 u16 qbuf[3];
822 u16 *qbp = qbuf;
823 int nqbuf = min(3, nq);
824
825 nq -= nqbuf;
826 qbuf[0] = qbuf[1] = qbuf[2] = 0;
827 while (nqbuf) {
828 nqbuf--;
829 *qbp++ = *rsp++;
830 if (rsp >= rsp_end)
831 rsp = rspq;
832 }
833 *qp++ = cpu_to_be32(FW_RSS_IND_TBL_CMD_IQ0(qbuf[0]) |
834 FW_RSS_IND_TBL_CMD_IQ1(qbuf[1]) |
835 FW_RSS_IND_TBL_CMD_IQ2(qbuf[2]));
836 }
837
838 /*
839 * Send this portion of the RRS table update to the firmware;
840 * bail out on any errors.
841 */
842 ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), NULL);
843 if (ret)
844 return ret;
845 }
846 return 0;
847}
848
849/**
850 * t4vf_alloc_vi - allocate a virtual interface on a port
851 * @adapter: the adapter
852 * @port_id: physical port associated with the VI
853 *
854 * Allocate a new Virtual Interface and bind it to the indicated
855 * physical port. Return the new Virtual Interface Identifier on
856 * success, or a [negative] error number on failure.
857 */
858int t4vf_alloc_vi(struct adapter *adapter, int port_id)
859{
860 struct fw_vi_cmd cmd, rpl;
861 int v;
862
863 /*
864 * Execute a VI command to allocate Virtual Interface and return its
865 * VIID.
866 */
867 memset(&cmd, 0, sizeof(cmd));
868 cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_VI_CMD) |
869 FW_CMD_REQUEST |
870 FW_CMD_WRITE |
871 FW_CMD_EXEC);
872 cmd.alloc_to_len16 = cpu_to_be32(FW_LEN16(cmd) |
873 FW_VI_CMD_ALLOC);
874 cmd.portid_pkd = FW_VI_CMD_PORTID(port_id);
875 v = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl);
876 if (v)
877 return v;
878
879 return FW_VI_CMD_VIID_GET(be16_to_cpu(rpl.type_viid));
880}
881
882/**
883 * t4vf_free_vi -- free a virtual interface
884 * @adapter: the adapter
885 * @viid: the virtual interface identifier
886 *
887 * Free a previously allocated Virtual Interface. Return an error on
888 * failure.
889 */
890int t4vf_free_vi(struct adapter *adapter, int viid)
891{
892 struct fw_vi_cmd cmd;
893
894 /*
895 * Execute a VI command to free the Virtual Interface.
896 */
897 memset(&cmd, 0, sizeof(cmd));
898 cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_VI_CMD) |
899 FW_CMD_REQUEST |
900 FW_CMD_EXEC);
901 cmd.alloc_to_len16 = cpu_to_be32(FW_LEN16(cmd) |
902 FW_VI_CMD_FREE);
903 cmd.type_viid = cpu_to_be16(FW_VI_CMD_VIID(viid));
904 return t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), NULL);
905}
906
907/**
908 * t4vf_enable_vi - enable/disable a virtual interface
909 * @adapter: the adapter
910 * @viid: the Virtual Interface ID
911 * @rx_en: 1=enable Rx, 0=disable Rx
912 * @tx_en: 1=enable Tx, 0=disable Tx
913 *
914 * Enables/disables a virtual interface.
915 */
916int t4vf_enable_vi(struct adapter *adapter, unsigned int viid,
917 bool rx_en, bool tx_en)
918{
919 struct fw_vi_enable_cmd cmd;
920
921 memset(&cmd, 0, sizeof(cmd));
922 cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_ENABLE_CMD) |
923 FW_CMD_REQUEST |
924 FW_CMD_EXEC |
925 FW_VI_ENABLE_CMD_VIID(viid));
926 cmd.ien_to_len16 = cpu_to_be32(FW_VI_ENABLE_CMD_IEN(rx_en) |
927 FW_VI_ENABLE_CMD_EEN(tx_en) |
928 FW_LEN16(cmd));
929 return t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), NULL);
930}
931
932/**
933 * t4vf_identify_port - identify a VI's port by blinking its LED
934 * @adapter: the adapter
935 * @viid: the Virtual Interface ID
936 * @nblinks: how many times to blink LED at 2.5 Hz
937 *
938 * Identifies a VI's port by blinking its LED.
939 */
940int t4vf_identify_port(struct adapter *adapter, unsigned int viid,
941 unsigned int nblinks)
942{
943 struct fw_vi_enable_cmd cmd;
944
945 memset(&cmd, 0, sizeof(cmd));
946 cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_ENABLE_CMD) |
947 FW_CMD_REQUEST |
948 FW_CMD_EXEC |
949 FW_VI_ENABLE_CMD_VIID(viid));
950 cmd.ien_to_len16 = cpu_to_be32(FW_VI_ENABLE_CMD_LED |
951 FW_LEN16(cmd));
952 cmd.blinkdur = cpu_to_be16(nblinks);
953 return t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), NULL);
954}
955
956/**
957 * t4vf_set_rxmode - set Rx properties of a virtual interface
958 * @adapter: the adapter
959 * @viid: the VI id
960 * @mtu: the new MTU or -1 for no change
961 * @promisc: 1 to enable promiscuous mode, 0 to disable it, -1 no change
962 * @all_multi: 1 to enable all-multi mode, 0 to disable it, -1 no change
963 * @bcast: 1 to enable broadcast Rx, 0 to disable it, -1 no change
964 * @vlanex: 1 to enable hardware VLAN Tag extraction, 0 to disable it,
965 * -1 no change
966 *
967 * Sets Rx properties of a virtual interface.
968 */
969int t4vf_set_rxmode(struct adapter *adapter, unsigned int viid,
970 int mtu, int promisc, int all_multi, int bcast, int vlanex,
971 bool sleep_ok)
972{
973 struct fw_vi_rxmode_cmd cmd;
974
975 /* convert to FW values */
976 if (mtu < 0)
977 mtu = FW_VI_RXMODE_CMD_MTU_MASK;
978 if (promisc < 0)
979 promisc = FW_VI_RXMODE_CMD_PROMISCEN_MASK;
980 if (all_multi < 0)
981 all_multi = FW_VI_RXMODE_CMD_ALLMULTIEN_MASK;
982 if (bcast < 0)
983 bcast = FW_VI_RXMODE_CMD_BROADCASTEN_MASK;
984 if (vlanex < 0)
985 vlanex = FW_VI_RXMODE_CMD_VLANEXEN_MASK;
986
987 memset(&cmd, 0, sizeof(cmd));
988 cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_RXMODE_CMD) |
989 FW_CMD_REQUEST |
990 FW_CMD_WRITE |
991 FW_VI_RXMODE_CMD_VIID(viid));
992 cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd));
993 cmd.mtu_to_vlanexen =
994 cpu_to_be32(FW_VI_RXMODE_CMD_MTU(mtu) |
995 FW_VI_RXMODE_CMD_PROMISCEN(promisc) |
996 FW_VI_RXMODE_CMD_ALLMULTIEN(all_multi) |
997 FW_VI_RXMODE_CMD_BROADCASTEN(bcast) |
998 FW_VI_RXMODE_CMD_VLANEXEN(vlanex));
999 return t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), NULL, sleep_ok);
1000}
1001
1002/**
1003 * t4vf_alloc_mac_filt - allocates exact-match filters for MAC addresses
1004 * @adapter: the adapter
1005 * @viid: the Virtual Interface Identifier
1006 * @free: if true any existing filters for this VI id are first removed
1007 * @naddr: the number of MAC addresses to allocate filters for (up to 7)
1008 * @addr: the MAC address(es)
1009 * @idx: where to store the index of each allocated filter
1010 * @hash: pointer to hash address filter bitmap
1011 * @sleep_ok: call is allowed to sleep
1012 *
1013 * Allocates an exact-match filter for each of the supplied addresses and
1014 * sets it to the corresponding address. If @idx is not %NULL it should
1015 * have at least @naddr entries, each of which will be set to the index of
1016 * the filter allocated for the corresponding MAC address. If a filter
1017 * could not be allocated for an address its index is set to 0xffff.
1018 * If @hash is not %NULL addresses that fail to allocate an exact filter
1019 * are hashed and update the hash filter bitmap pointed at by @hash.
1020 *
1021 * Returns a negative error number or the number of filters allocated.
1022 */
1023int t4vf_alloc_mac_filt(struct adapter *adapter, unsigned int viid, bool free,
1024 unsigned int naddr, const u8 **addr, u16 *idx,
1025 u64 *hash, bool sleep_ok)
1026{
1027 int offset, ret = 0;
1028 unsigned nfilters = 0;
1029 unsigned int rem = naddr;
1030 struct fw_vi_mac_cmd cmd, rpl;
1031
1032 if (naddr > FW_CLS_TCAM_NUM_ENTRIES)
1033 return -EINVAL;
1034
1035 for (offset = 0; offset < naddr; /**/) {
1036 unsigned int fw_naddr = (rem < ARRAY_SIZE(cmd.u.exact)
1037 ? rem
1038 : ARRAY_SIZE(cmd.u.exact));
1039 size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd,
1040 u.exact[fw_naddr]), 16);
1041 struct fw_vi_mac_exact *p;
1042 int i;
1043
1044 memset(&cmd, 0, sizeof(cmd));
1045 cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_MAC_CMD) |
1046 FW_CMD_REQUEST |
1047 FW_CMD_WRITE |
1048 (free ? FW_CMD_EXEC : 0) |
1049 FW_VI_MAC_CMD_VIID(viid));
1050 cmd.freemacs_to_len16 =
1051 cpu_to_be32(FW_VI_MAC_CMD_FREEMACS(free) |
1052 FW_CMD_LEN16(len16));
1053
1054 for (i = 0, p = cmd.u.exact; i < fw_naddr; i++, p++) {
1055 p->valid_to_idx = cpu_to_be16(
1056 FW_VI_MAC_CMD_VALID |
1057 FW_VI_MAC_CMD_IDX(FW_VI_MAC_ADD_MAC));
1058 memcpy(p->macaddr, addr[offset+i], sizeof(p->macaddr));
1059 }
1060
1061
1062 ret = t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), &rpl,
1063 sleep_ok);
1064 if (ret && ret != -ENOMEM)
1065 break;
1066
1067 for (i = 0, p = rpl.u.exact; i < fw_naddr; i++, p++) {
1068 u16 index = FW_VI_MAC_CMD_IDX_GET(
1069 be16_to_cpu(p->valid_to_idx));
1070
1071 if (idx)
1072 idx[offset+i] =
1073 (index >= FW_CLS_TCAM_NUM_ENTRIES
1074 ? 0xffff
1075 : index);
1076 if (index < FW_CLS_TCAM_NUM_ENTRIES)
1077 nfilters++;
1078 else if (hash)
1079 *hash |= (1ULL << hash_mac_addr(addr[offset+i]));
1080 }
1081
1082 free = false;
1083 offset += fw_naddr;
1084 rem -= fw_naddr;
1085 }
1086
1087 /*
1088 * If there were no errors or we merely ran out of room in our MAC
1089 * address arena, return the number of filters actually written.
1090 */
1091 if (ret == 0 || ret == -ENOMEM)
1092 ret = nfilters;
1093 return ret;
1094}
1095
1096/**
1097 * t4vf_change_mac - modifies the exact-match filter for a MAC address
1098 * @adapter: the adapter
1099 * @viid: the Virtual Interface ID
1100 * @idx: index of existing filter for old value of MAC address, or -1
1101 * @addr: the new MAC address value
1102 * @persist: if idx < 0, the new MAC allocation should be persistent
1103 *
1104 * Modifies an exact-match filter and sets it to the new MAC address.
1105 * Note that in general it is not possible to modify the value of a given
1106 * filter so the generic way to modify an address filter is to free the
1107 * one being used by the old address value and allocate a new filter for
1108 * the new address value. @idx can be -1 if the address is a new
1109 * addition.
1110 *
1111 * Returns a negative error number or the index of the filter with the new
1112 * MAC value.
1113 */
1114int t4vf_change_mac(struct adapter *adapter, unsigned int viid,
1115 int idx, const u8 *addr, bool persist)
1116{
1117 int ret;
1118 struct fw_vi_mac_cmd cmd, rpl;
1119 struct fw_vi_mac_exact *p = &cmd.u.exact[0];
1120 size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd,
1121 u.exact[1]), 16);
1122
1123 /*
1124 * If this is a new allocation, determine whether it should be
1125 * persistent (across a "freemacs" operation) or not.
1126 */
1127 if (idx < 0)
1128 idx = persist ? FW_VI_MAC_ADD_PERSIST_MAC : FW_VI_MAC_ADD_MAC;
1129
1130 memset(&cmd, 0, sizeof(cmd));
1131 cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_MAC_CMD) |
1132 FW_CMD_REQUEST |
1133 FW_CMD_WRITE |
1134 FW_VI_MAC_CMD_VIID(viid));
1135 cmd.freemacs_to_len16 = cpu_to_be32(FW_CMD_LEN16(len16));
1136 p->valid_to_idx = cpu_to_be16(FW_VI_MAC_CMD_VALID |
1137 FW_VI_MAC_CMD_IDX(idx));
1138 memcpy(p->macaddr, addr, sizeof(p->macaddr));
1139
1140 ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl);
1141 if (ret == 0) {
1142 p = &rpl.u.exact[0];
1143 ret = FW_VI_MAC_CMD_IDX_GET(be16_to_cpu(p->valid_to_idx));
1144 if (ret >= FW_CLS_TCAM_NUM_ENTRIES)
1145 ret = -ENOMEM;
1146 }
1147 return ret;
1148}
1149
1150/**
1151 * t4vf_set_addr_hash - program the MAC inexact-match hash filter
1152 * @adapter: the adapter
1153 * @viid: the Virtual Interface Identifier
1154 * @ucast: whether the hash filter should also match unicast addresses
1155 * @vec: the value to be written to the hash filter
1156 * @sleep_ok: call is allowed to sleep
1157 *
1158 * Sets the 64-bit inexact-match hash filter for a virtual interface.
1159 */
1160int t4vf_set_addr_hash(struct adapter *adapter, unsigned int viid,
1161 bool ucast, u64 vec, bool sleep_ok)
1162{
1163 struct fw_vi_mac_cmd cmd;
1164 size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd,
1165 u.exact[0]), 16);
1166
1167 memset(&cmd, 0, sizeof(cmd));
1168 cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_MAC_CMD) |
1169 FW_CMD_REQUEST |
1170 FW_CMD_WRITE |
1171 FW_VI_ENABLE_CMD_VIID(viid));
1172 cmd.freemacs_to_len16 = cpu_to_be32(FW_VI_MAC_CMD_HASHVECEN |
1173 FW_VI_MAC_CMD_HASHUNIEN(ucast) |
1174 FW_CMD_LEN16(len16));
1175 cmd.u.hash.hashvec = cpu_to_be64(vec);
1176 return t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), NULL, sleep_ok);
1177}
1178
1179/**
1180 * t4vf_get_port_stats - collect "port" statistics
1181 * @adapter: the adapter
1182 * @pidx: the port index
1183 * @s: the stats structure to fill
1184 *
1185 * Collect statistics for the "port"'s Virtual Interface.
1186 */
1187int t4vf_get_port_stats(struct adapter *adapter, int pidx,
1188 struct t4vf_port_stats *s)
1189{
1190 struct port_info *pi = adap2pinfo(adapter, pidx);
1191 struct fw_vi_stats_vf fwstats;
1192 unsigned int rem = VI_VF_NUM_STATS;
1193 __be64 *fwsp = (__be64 *)&fwstats;
1194
1195 /*
1196 * Grab the Virtual Interface statistics a chunk at a time via mailbox
1197 * commands. We could use a Work Request and get all of them at once
1198 * but that's an asynchronous interface which is awkward to use.
1199 */
1200 while (rem) {
1201 unsigned int ix = VI_VF_NUM_STATS - rem;
1202 unsigned int nstats = min(6U, rem);
1203 struct fw_vi_stats_cmd cmd, rpl;
1204 size_t len = (offsetof(struct fw_vi_stats_cmd, u) +
1205 sizeof(struct fw_vi_stats_ctl));
1206 size_t len16 = DIV_ROUND_UP(len, 16);
1207 int ret;
1208
1209 memset(&cmd, 0, sizeof(cmd));
1210 cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_STATS_CMD) |
1211 FW_VI_STATS_CMD_VIID(pi->viid) |
1212 FW_CMD_REQUEST |
1213 FW_CMD_READ);
1214 cmd.retval_len16 = cpu_to_be32(FW_CMD_LEN16(len16));
1215 cmd.u.ctl.nstats_ix =
1216 cpu_to_be16(FW_VI_STATS_CMD_IX(ix) |
1217 FW_VI_STATS_CMD_NSTATS(nstats));
1218 ret = t4vf_wr_mbox_ns(adapter, &cmd, len, &rpl);
1219 if (ret)
1220 return ret;
1221
1222 memcpy(fwsp, &rpl.u.ctl.stat0, sizeof(__be64) * nstats);
1223
1224 rem -= nstats;
1225 fwsp += nstats;
1226 }
1227
1228 /*
1229 * Translate firmware statistics into host native statistics.
1230 */
1231 s->tx_bcast_bytes = be64_to_cpu(fwstats.tx_bcast_bytes);
1232 s->tx_bcast_frames = be64_to_cpu(fwstats.tx_bcast_frames);
1233 s->tx_mcast_bytes = be64_to_cpu(fwstats.tx_mcast_bytes);
1234 s->tx_mcast_frames = be64_to_cpu(fwstats.tx_mcast_frames);
1235 s->tx_ucast_bytes = be64_to_cpu(fwstats.tx_ucast_bytes);
1236 s->tx_ucast_frames = be64_to_cpu(fwstats.tx_ucast_frames);
1237 s->tx_drop_frames = be64_to_cpu(fwstats.tx_drop_frames);
1238 s->tx_offload_bytes = be64_to_cpu(fwstats.tx_offload_bytes);
1239 s->tx_offload_frames = be64_to_cpu(fwstats.tx_offload_frames);
1240
1241 s->rx_bcast_bytes = be64_to_cpu(fwstats.rx_bcast_bytes);
1242 s->rx_bcast_frames = be64_to_cpu(fwstats.rx_bcast_frames);
1243 s->rx_mcast_bytes = be64_to_cpu(fwstats.rx_mcast_bytes);
1244 s->rx_mcast_frames = be64_to_cpu(fwstats.rx_mcast_frames);
1245 s->rx_ucast_bytes = be64_to_cpu(fwstats.rx_ucast_bytes);
1246 s->rx_ucast_frames = be64_to_cpu(fwstats.rx_ucast_frames);
1247
1248 s->rx_err_frames = be64_to_cpu(fwstats.rx_err_frames);
1249
1250 return 0;
1251}
1252
1253/**
1254 * t4vf_iq_free - free an ingress queue and its free lists
1255 * @adapter: the adapter
1256 * @iqtype: the ingress queue type (FW_IQ_TYPE_FL_INT_CAP, etc.)
1257 * @iqid: ingress queue ID
1258 * @fl0id: FL0 queue ID or 0xffff if no attached FL0
1259 * @fl1id: FL1 queue ID or 0xffff if no attached FL1
1260 *
1261 * Frees an ingress queue and its associated free lists, if any.
1262 */
1263int t4vf_iq_free(struct adapter *adapter, unsigned int iqtype,
1264 unsigned int iqid, unsigned int fl0id, unsigned int fl1id)
1265{
1266 struct fw_iq_cmd cmd;
1267
1268 memset(&cmd, 0, sizeof(cmd));
1269 cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_IQ_CMD) |
1270 FW_CMD_REQUEST |
1271 FW_CMD_EXEC);
1272 cmd.alloc_to_len16 = cpu_to_be32(FW_IQ_CMD_FREE |
1273 FW_LEN16(cmd));
1274 cmd.type_to_iqandstindex =
1275 cpu_to_be32(FW_IQ_CMD_TYPE(iqtype));
1276
1277 cmd.iqid = cpu_to_be16(iqid);
1278 cmd.fl0id = cpu_to_be16(fl0id);
1279 cmd.fl1id = cpu_to_be16(fl1id);
1280 return t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), NULL);
1281}
1282
1283/**
1284 * t4vf_eth_eq_free - free an Ethernet egress queue
1285 * @adapter: the adapter
1286 * @eqid: egress queue ID
1287 *
1288 * Frees an Ethernet egress queue.
1289 */
1290int t4vf_eth_eq_free(struct adapter *adapter, unsigned int eqid)
1291{
1292 struct fw_eq_eth_cmd cmd;
1293
1294 memset(&cmd, 0, sizeof(cmd));
1295 cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_EQ_ETH_CMD) |
1296 FW_CMD_REQUEST |
1297 FW_CMD_EXEC);
1298 cmd.alloc_to_len16 = cpu_to_be32(FW_EQ_ETH_CMD_FREE |
1299 FW_LEN16(cmd));
1300 cmd.eqid_pkd = cpu_to_be32(FW_EQ_ETH_CMD_EQID(eqid));
1301 return t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), NULL);
1302}
1303
1304/**
1305 * t4vf_handle_fw_rpl - process a firmware reply message
1306 * @adapter: the adapter
1307 * @rpl: start of the firmware message
1308 *
1309 * Processes a firmware message, such as link state change messages.
1310 */
1311int t4vf_handle_fw_rpl(struct adapter *adapter, const __be64 *rpl)
1312{
1313 const struct fw_cmd_hdr *cmd_hdr = (const struct fw_cmd_hdr *)rpl;
1314 u8 opcode = FW_CMD_OP_GET(be32_to_cpu(cmd_hdr->hi));
1315
1316 switch (opcode) {
1317 case FW_PORT_CMD: {
1318 /*
1319 * Link/module state change message.
1320 */
1321 const struct fw_port_cmd *port_cmd =
1322 (const struct fw_port_cmd *)rpl;
1323 u32 word;
1324 int action, port_id, link_ok, speed, fc, pidx;
1325
1326 /*
1327 * Extract various fields from port status change message.
1328 */
1329 action = FW_PORT_CMD_ACTION_GET(
1330 be32_to_cpu(port_cmd->action_to_len16));
1331 if (action != FW_PORT_ACTION_GET_PORT_INFO) {
1332 dev_err(adapter->pdev_dev,
1333 "Unknown firmware PORT reply action %x\n",
1334 action);
1335 break;
1336 }
1337
1338 port_id = FW_PORT_CMD_PORTID_GET(
1339 be32_to_cpu(port_cmd->op_to_portid));
1340
1341 word = be32_to_cpu(port_cmd->u.info.lstatus_to_modtype);
1342 link_ok = (word & FW_PORT_CMD_LSTATUS) != 0;
1343 speed = 0;
1344 fc = 0;
1345 if (word & FW_PORT_CMD_RXPAUSE)
1346 fc |= PAUSE_RX;
1347 if (word & FW_PORT_CMD_TXPAUSE)
1348 fc |= PAUSE_TX;
1349 if (word & FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_100M))
1350 speed = SPEED_100;
1351 else if (word & FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_1G))
1352 speed = SPEED_1000;
1353 else if (word & FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_10G))
1354 speed = SPEED_10000;
1355
1356 /*
1357 * Scan all of our "ports" (Virtual Interfaces) looking for
1358 * those bound to the physical port which has changed. If
1359 * our recorded state doesn't match the current state,
1360 * signal that change to the OS code.
1361 */
1362 for_each_port(adapter, pidx) {
1363 struct port_info *pi = adap2pinfo(adapter, pidx);
1364 struct link_config *lc;
1365
1366 if (pi->port_id != port_id)
1367 continue;
1368
1369 lc = &pi->link_cfg;
1370 if (link_ok != lc->link_ok || speed != lc->speed ||
1371 fc != lc->fc) {
1372 /* something changed */
1373 lc->link_ok = link_ok;
1374 lc->speed = speed;
1375 lc->fc = fc;
1376 t4vf_os_link_changed(adapter, pidx, link_ok);
1377 }
1378 }
1379 break;
1380 }
1381
1382 default:
1383 dev_err(adapter->pdev_dev, "Unknown firmware reply %X\n",
1384 opcode);
1385 }
1386 return 0;
1387}