aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCasey Leedom <leedom@chelsio.com>2010-06-25 08:14:15 -0400
committerDavid S. Miller <davem@davemloft.net>2010-06-29 02:59:36 -0400
commitbe839e391725d7f3a61714530d0e90d7a773a871 (patch)
treedac32bd4fab64e9c5ad4dcc198b518f52782e205
parentc6e0d91464da214081af546496dd3a4b6d19db70 (diff)
cxgb4vf: Add main T4 PCI-E SR-IOV Virtual Function driver for cxgb4vf
Add main T4 PCI-E SR-IOV Virtual Function driver for "cxgb4vf". Signed-off-by: Casey Leedom Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/cxgb4vf/adapter.h540
-rw-r--r--drivers/net/cxgb4vf/cxgb4vf_main.c2906
2 files changed, 3446 insertions, 0 deletions
diff --git a/drivers/net/cxgb4vf/adapter.h b/drivers/net/cxgb4vf/adapter.h
new file mode 100644
index 000000000000..8ea01962e045
--- /dev/null
+++ b/drivers/net/cxgb4vf/adapter.h
@@ -0,0 +1,540 @@
1/*
2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3 * driver for Linux.
4 *
5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36/*
37 * This file should not be included directly. Include t4vf_common.h instead.
38 */
39
40#ifndef __CXGB4VF_ADAPTER_H__
41#define __CXGB4VF_ADAPTER_H__
42
43#include <linux/pci.h>
44#include <linux/spinlock.h>
45#include <linux/skbuff.h>
46#include <linux/if_ether.h>
47#include <linux/netdevice.h>
48
49#include "../cxgb4/t4_hw.h"
50
51/*
52 * Constants of the implementation.
53 */
54enum {
55 MAX_NPORTS = 1, /* max # of "ports" */
56 MAX_PORT_QSETS = 8, /* max # of Queue Sets / "port" */
57 MAX_ETH_QSETS = MAX_NPORTS*MAX_PORT_QSETS,
58
59 /*
60 * MSI-X interrupt index usage.
61 */
62 MSIX_FW = 0, /* MSI-X index for firmware Q */
63 MSIX_NIQFLINT = 1, /* MSI-X index base for Ingress Qs */
64 MSIX_EXTRAS = 1,
65 MSIX_ENTRIES = MAX_ETH_QSETS + MSIX_EXTRAS,
66
67 /*
68 * The maximum number of Ingress and Egress Queues is determined by
69 * the maximum number of "Queue Sets" which we support plus any
70 * ancillary queues. Each "Queue Set" requires one Ingress Queue
71 * for RX Packet Ingress Event notifications and two Egress Queues for
72 * a Free List and an Ethernet TX list.
73 */
74 INGQ_EXTRAS = 2, /* firmware event queue and */
75 /* forwarded interrupts */
76 MAX_INGQ = MAX_ETH_QSETS+INGQ_EXTRAS,
77 MAX_EGRQ = MAX_ETH_QSETS*2,
78};
79
80/*
81 * Forward structure definition references.
82 */
83struct adapter;
84struct sge_eth_rxq;
85struct sge_rspq;
86
87/*
88 * Per-"port" information. This is really per-Virtual Interface information
89 * but the use of the "port" nomanclature makes it easier to go back and forth
90 * between the PF and VF drivers ...
91 */
92struct port_info {
93 struct adapter *adapter; /* our adapter */
94 struct vlan_group *vlan_grp; /* out VLAN group */
95 u16 viid; /* virtual interface ID */
96 s16 xact_addr_filt; /* index of our MAC address filter */
97 u16 rss_size; /* size of VI's RSS table slice */
98 u8 pidx; /* index into adapter port[] */
99 u8 port_id; /* physical port ID */
100 u8 rx_offload; /* CSO, etc. */
101 u8 nqsets; /* # of "Queue Sets" */
102 u8 first_qset; /* index of first "Queue Set" */
103 struct link_config link_cfg; /* physical port configuration */
104};
105
106/* port_info.rx_offload flags */
107enum {
108 RX_CSO = 1 << 0,
109};
110
111/*
112 * Scatter Gather Engine resources for the "adapter". Our ingress and egress
113 * queues are organized into "Queue Sets" with one ingress and one egress
114 * queue per Queue Set. These Queue Sets are aportionable between the "ports"
115 * (Virtual Interfaces). One extra ingress queue is used to receive
116 * asynchronous messages from the firmware. Note that the "Queue IDs" that we
117 * use here are really "Relative Queue IDs" which are returned as part of the
118 * firmware command to allocate queues. These queue IDs are relative to the
119 * absolute Queue ID base of the section of the Queue ID space allocated to
120 * the PF/VF.
121 */
122
123/*
124 * SGE free-list queue state.
125 */
126struct rx_sw_desc;
127struct sge_fl {
128 unsigned int avail; /* # of available RX buffers */
129 unsigned int pend_cred; /* new buffers since last FL DB ring */
130 unsigned int cidx; /* consumer index */
131 unsigned int pidx; /* producer index */
132 unsigned long alloc_failed; /* # of buffer allocation failures */
133 unsigned long large_alloc_failed;
134 unsigned long starving; /* # of times FL was found starving */
135
136 /*
137 * Write-once/infrequently fields.
138 * -------------------------------
139 */
140
141 unsigned int cntxt_id; /* SGE relative QID for the free list */
142 unsigned int abs_id; /* SGE absolute QID for the free list */
143 unsigned int size; /* capacity of free list */
144 struct rx_sw_desc *sdesc; /* address of SW RX descriptor ring */
145 __be64 *desc; /* address of HW RX descriptor ring */
146 dma_addr_t addr; /* PCI bus address of hardware ring */
147};
148
149/*
150 * An ingress packet gather list.
151 */
152struct pkt_gl {
153 skb_frag_t frags[MAX_SKB_FRAGS];
154 void *va; /* virtual address of first byte */
155 unsigned int nfrags; /* # of fragments */
156 unsigned int tot_len; /* total length of fragments */
157};
158
159typedef int (*rspq_handler_t)(struct sge_rspq *, const __be64 *,
160 const struct pkt_gl *);
161
162/*
163 * State for an SGE Response Queue.
164 */
165struct sge_rspq {
166 struct napi_struct napi; /* NAPI scheduling control */
167 const __be64 *cur_desc; /* current descriptor in queue */
168 unsigned int cidx; /* consumer index */
169 u8 gen; /* current generation bit */
170 u8 next_intr_params; /* holdoff params for next interrupt */
171 int offset; /* offset into current FL buffer */
172
173 unsigned int unhandled_irqs; /* bogus interrupts */
174
175 /*
176 * Write-once/infrequently fields.
177 * -------------------------------
178 */
179
180 u8 intr_params; /* interrupt holdoff parameters */
181 u8 pktcnt_idx; /* interrupt packet threshold */
182 u8 idx; /* queue index within its group */
183 u16 cntxt_id; /* SGE rel QID for the response Q */
184 u16 abs_id; /* SGE abs QID for the response Q */
185 __be64 *desc; /* address of hardware response ring */
186 dma_addr_t phys_addr; /* PCI bus address of ring */
187 unsigned int iqe_len; /* entry size */
188 unsigned int size; /* capcity of response Q */
189 struct adapter *adapter; /* our adapter */
190 struct net_device *netdev; /* associated net device */
191 rspq_handler_t handler; /* the handler for this response Q */
192};
193
194/*
195 * Ethernet queue statistics
196 */
197struct sge_eth_stats {
198 unsigned long pkts; /* # of ethernet packets */
199 unsigned long lro_pkts; /* # of LRO super packets */
200 unsigned long lro_merged; /* # of wire packets merged by LRO */
201 unsigned long rx_cso; /* # of Rx checksum offloads */
202 unsigned long vlan_ex; /* # of Rx VLAN extractions */
203 unsigned long rx_drops; /* # of packets dropped due to no mem */
204};
205
206/*
207 * State for an Ethernet Receive Queue.
208 */
209struct sge_eth_rxq {
210 struct sge_rspq rspq; /* Response Queue */
211 struct sge_fl fl; /* Free List */
212 struct sge_eth_stats stats; /* receive statistics */
213};
214
215/*
216 * SGE Transmit Queue state. This contains all of the resources associated
217 * with the hardware status of a TX Queue which is a circular ring of hardware
218 * TX Descriptors. For convenience, it also contains a pointer to a parallel
219 * "Software Descriptor" array but we don't know anything about it here other
220 * than its type name.
221 */
222struct tx_desc {
223 /*
224 * Egress Queues are measured in units of SGE_EQ_IDXSIZE by the
225 * hardware: Sizes, Producer and Consumer indices, etc.
226 */
227 __be64 flit[SGE_EQ_IDXSIZE/sizeof(__be64)];
228};
229struct tx_sw_desc;
230struct sge_txq {
231 unsigned int in_use; /* # of in-use TX descriptors */
232 unsigned int size; /* # of descriptors */
233 unsigned int cidx; /* SW consumer index */
234 unsigned int pidx; /* producer index */
235 unsigned long stops; /* # of times queue has been stopped */
236 unsigned long restarts; /* # of queue restarts */
237
238 /*
239 * Write-once/infrequently fields.
240 * -------------------------------
241 */
242
243 unsigned int cntxt_id; /* SGE relative QID for the TX Q */
244 unsigned int abs_id; /* SGE absolute QID for the TX Q */
245 struct tx_desc *desc; /* address of HW TX descriptor ring */
246 struct tx_sw_desc *sdesc; /* address of SW TX descriptor ring */
247 struct sge_qstat *stat; /* queue status entry */
248 dma_addr_t phys_addr; /* PCI bus address of hardware ring */
249};
250
251/*
252 * State for an Ethernet Transmit Queue.
253 */
254struct sge_eth_txq {
255 struct sge_txq q; /* SGE TX Queue */
256 struct netdev_queue *txq; /* associated netdev TX queue */
257 unsigned long tso; /* # of TSO requests */
258 unsigned long tx_cso; /* # of TX checksum offloads */
259 unsigned long vlan_ins; /* # of TX VLAN insertions */
260 unsigned long mapping_err; /* # of I/O MMU packet mapping errors */
261};
262
263/*
264 * The complete set of Scatter/Gather Engine resources.
265 */
266struct sge {
267 /*
268 * Our "Queue Sets" ...
269 */
270 struct sge_eth_txq ethtxq[MAX_ETH_QSETS];
271 struct sge_eth_rxq ethrxq[MAX_ETH_QSETS];
272
273 /*
274 * Extra ingress queues for asynchronous firmware events and
275 * forwarded interrupts (when in MSI mode).
276 */
277 struct sge_rspq fw_evtq ____cacheline_aligned_in_smp;
278
279 struct sge_rspq intrq ____cacheline_aligned_in_smp;
280 spinlock_t intrq_lock;
281
282 /*
283 * State for managing "starving Free Lists" -- Free Lists which have
284 * fallen below a certain threshold of buffers available to the
285 * hardware and attempts to refill them up to that threshold have
286 * failed. We have a regular "slow tick" timer process which will
287 * make periodic attempts to refill these starving Free Lists ...
288 */
289 DECLARE_BITMAP(starving_fl, MAX_EGRQ);
290 struct timer_list rx_timer;
291
292 /*
293 * State for cleaning up completed TX descriptors.
294 */
295 struct timer_list tx_timer;
296
297 /*
298 * Write-once/infrequently fields.
299 * -------------------------------
300 */
301
302 u16 max_ethqsets; /* # of available Ethernet queue sets */
303 u16 ethqsets; /* # of active Ethernet queue sets */
304 u16 ethtxq_rover; /* Tx queue to clean up next */
305 u16 timer_val[SGE_NTIMERS]; /* interrupt holdoff timer array */
306 u8 counter_val[SGE_NCOUNTERS]; /* interrupt RX threshold array */
307
308 /*
309 * Reverse maps from Absolute Queue IDs to associated queue pointers.
310 * The absolute Queue IDs are in a compact range which start at a
311 * [potentially large] Base Queue ID. We perform the reverse map by
312 * first converting the Absolute Queue ID into a Relative Queue ID by
313 * subtracting off the Base Queue ID and then use a Relative Queue ID
314 * indexed table to get the pointer to the corresponding software
315 * queue structure.
316 */
317 unsigned int egr_base;
318 unsigned int ingr_base;
319 void *egr_map[MAX_EGRQ];
320 struct sge_rspq *ingr_map[MAX_INGQ];
321};
322
323/*
324 * Utility macros to convert Absolute- to Relative-Queue indices and Egress-
325 * and Ingress-Queues. The EQ_MAP() and IQ_MAP() macros which provide
326 * pointers to Ingress- and Egress-Queues can be used as both L- and R-values
327 */
328#define EQ_IDX(s, abs_id) ((unsigned int)((abs_id) - (s)->egr_base))
329#define IQ_IDX(s, abs_id) ((unsigned int)((abs_id) - (s)->ingr_base))
330
331#define EQ_MAP(s, abs_id) ((s)->egr_map[EQ_IDX(s, abs_id)])
332#define IQ_MAP(s, abs_id) ((s)->ingr_map[IQ_IDX(s, abs_id)])
333
334/*
335 * Macro to iterate across Queue Sets ("rxq" is a historic misnomer).
336 */
337#define for_each_ethrxq(sge, iter) \
338 for (iter = 0; iter < (sge)->ethqsets; iter++)
339
340/*
341 * Per-"adapter" (Virtual Function) information.
342 */
343struct adapter {
344 /* PCI resources */
345 void __iomem *regs;
346 struct pci_dev *pdev;
347 struct device *pdev_dev;
348
349 /* "adapter" resources */
350 unsigned long registered_device_map;
351 unsigned long open_device_map;
352 unsigned long flags;
353 struct adapter_params params;
354
355 /* queue and interrupt resources */
356 struct {
357 unsigned short vec;
358 char desc[22];
359 } msix_info[MSIX_ENTRIES];
360 struct sge sge;
361
362 /* Linux network device resources */
363 struct net_device *port[MAX_NPORTS];
364 const char *name;
365 unsigned int msg_enable;
366
367 /* debugfs resources */
368 struct dentry *debugfs_root;
369
370 /* various locks */
371 spinlock_t stats_lock;
372};
373
374enum { /* adapter flags */
375 FULL_INIT_DONE = (1UL << 0),
376 USING_MSI = (1UL << 1),
377 USING_MSIX = (1UL << 2),
378 QUEUES_BOUND = (1UL << 3),
379};
380
381/*
382 * The following register read/write routine definitions are required by
383 * the common code.
384 */
385
386/**
387 * t4_read_reg - read a HW register
388 * @adapter: the adapter
389 * @reg_addr: the register address
390 *
391 * Returns the 32-bit value of the given HW register.
392 */
393static inline u32 t4_read_reg(struct adapter *adapter, u32 reg_addr)
394{
395 return readl(adapter->regs + reg_addr);
396}
397
398/**
399 * t4_write_reg - write a HW register
400 * @adapter: the adapter
401 * @reg_addr: the register address
402 * @val: the value to write
403 *
404 * Write a 32-bit value into the given HW register.
405 */
406static inline void t4_write_reg(struct adapter *adapter, u32 reg_addr, u32 val)
407{
408 writel(val, adapter->regs + reg_addr);
409}
410
411#ifndef readq
412static inline u64 readq(const volatile void __iomem *addr)
413{
414 return readl(addr) + ((u64)readl(addr + 4) << 32);
415}
416
417static inline void writeq(u64 val, volatile void __iomem *addr)
418{
419 writel(val, addr);
420 writel(val >> 32, addr + 4);
421}
422#endif
423
424/**
425 * t4_read_reg64 - read a 64-bit HW register
426 * @adapter: the adapter
427 * @reg_addr: the register address
428 *
429 * Returns the 64-bit value of the given HW register.
430 */
431static inline u64 t4_read_reg64(struct adapter *adapter, u32 reg_addr)
432{
433 return readq(adapter->regs + reg_addr);
434}
435
436/**
437 * t4_write_reg64 - write a 64-bit HW register
438 * @adapter: the adapter
439 * @reg_addr: the register address
440 * @val: the value to write
441 *
442 * Write a 64-bit value into the given HW register.
443 */
444static inline void t4_write_reg64(struct adapter *adapter, u32 reg_addr,
445 u64 val)
446{
447 writeq(val, adapter->regs + reg_addr);
448}
449
450/**
451 * port_name - return the string name of a port
452 * @adapter: the adapter
453 * @pidx: the port index
454 *
455 * Return the string name of the selected port.
456 */
457static inline const char *port_name(struct adapter *adapter, int pidx)
458{
459 return adapter->port[pidx]->name;
460}
461
462/**
463 * t4_os_set_hw_addr - store a port's MAC address in SW
464 * @adapter: the adapter
465 * @pidx: the port index
466 * @hw_addr: the Ethernet address
467 *
468 * Store the Ethernet address of the given port in SW. Called by the common
469 * code when it retrieves a port's Ethernet address from EEPROM.
470 */
471static inline void t4_os_set_hw_addr(struct adapter *adapter, int pidx,
472 u8 hw_addr[])
473{
474 memcpy(adapter->port[pidx]->dev_addr, hw_addr, ETH_ALEN);
475 memcpy(adapter->port[pidx]->perm_addr, hw_addr, ETH_ALEN);
476}
477
478/**
479 * netdev2pinfo - return the port_info structure associated with a net_device
480 * @dev: the netdev
481 *
482 * Return the struct port_info associated with a net_device
483 */
484static inline struct port_info *netdev2pinfo(const struct net_device *dev)
485{
486 return netdev_priv(dev);
487}
488
489/**
490 * adap2pinfo - return the port_info of a port
491 * @adap: the adapter
492 * @pidx: the port index
493 *
494 * Return the port_info structure for the adapter.
495 */
496static inline struct port_info *adap2pinfo(struct adapter *adapter, int pidx)
497{
498 return netdev_priv(adapter->port[pidx]);
499}
500
501/**
502 * netdev2adap - return the adapter structure associated with a net_device
503 * @dev: the netdev
504 *
505 * Return the struct adapter associated with a net_device
506 */
507static inline struct adapter *netdev2adap(const struct net_device *dev)
508{
509 return netdev2pinfo(dev)->adapter;
510}
511
512/*
513 * OS "Callback" function declarations. These are functions that the OS code
514 * is "contracted" to provide for the common code.
515 */
516void t4vf_os_link_changed(struct adapter *, int, int);
517
518/*
519 * SGE function prototype declarations.
520 */
521int t4vf_sge_alloc_rxq(struct adapter *, struct sge_rspq *, bool,
522 struct net_device *, int,
523 struct sge_fl *, rspq_handler_t);
524int t4vf_sge_alloc_eth_txq(struct adapter *, struct sge_eth_txq *,
525 struct net_device *, struct netdev_queue *,
526 unsigned int);
527void t4vf_free_sge_resources(struct adapter *);
528
529int t4vf_eth_xmit(struct sk_buff *, struct net_device *);
530int t4vf_ethrx_handler(struct sge_rspq *, const __be64 *,
531 const struct pkt_gl *);
532
533irq_handler_t t4vf_intr_handler(struct adapter *);
534irqreturn_t t4vf_sge_intr_msix(int, void *);
535
536int t4vf_sge_init(struct adapter *);
537void t4vf_sge_start(struct adapter *);
538void t4vf_sge_stop(struct adapter *);
539
540#endif /* __CXGB4VF_ADAPTER_H__ */
diff --git a/drivers/net/cxgb4vf/cxgb4vf_main.c b/drivers/net/cxgb4vf/cxgb4vf_main.c
new file mode 100644
index 000000000000..bd73ff5b51b7
--- /dev/null
+++ b/drivers/net/cxgb4vf/cxgb4vf_main.c
@@ -0,0 +1,2906 @@
1/*
2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3 * driver for Linux.
4 *
5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/version.h>
37#include <linux/module.h>
38#include <linux/moduleparam.h>
39#include <linux/init.h>
40#include <linux/pci.h>
41#include <linux/dma-mapping.h>
42#include <linux/netdevice.h>
43#include <linux/etherdevice.h>
44#include <linux/debugfs.h>
45#include <linux/ethtool.h>
46
47#include "t4vf_common.h"
48#include "t4vf_defs.h"
49
50#include "../cxgb4/t4_regs.h"
51#include "../cxgb4/t4_msg.h"
52
53/*
54 * Generic information about the driver.
55 */
56#define DRV_VERSION "1.0.0"
57#define DRV_DESC "Chelsio T4 Virtual Function (VF) Network Driver"
58
59/*
60 * Module Parameters.
61 * ==================
62 */
63
64/*
65 * Default ethtool "message level" for adapters.
66 */
67#define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
68 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
69 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
70
71static int dflt_msg_enable = DFLT_MSG_ENABLE;
72
73module_param(dflt_msg_enable, int, 0644);
74MODULE_PARM_DESC(dflt_msg_enable,
75 "default adapter ethtool message level bitmap");
76
77/*
78 * The driver uses the best interrupt scheme available on a platform in the
79 * order MSI-X then MSI. This parameter determines which of these schemes the
80 * driver may consider as follows:
81 *
82 * msi = 2: choose from among MSI-X and MSI
83 * msi = 1: only consider MSI interrupts
84 *
85 * Note that unlike the Physical Function driver, this Virtual Function driver
86 * does _not_ support legacy INTx interrupts (this limitation is mandated by
87 * the PCI-E SR-IOV standard).
88 */
89#define MSI_MSIX 2
90#define MSI_MSI 1
91#define MSI_DEFAULT MSI_MSIX
92
93static int msi = MSI_DEFAULT;
94
95module_param(msi, int, 0644);
96MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
97
98/*
99 * Fundamental constants.
100 * ======================
101 */
102
103enum {
104 MAX_TXQ_ENTRIES = 16384,
105 MAX_RSPQ_ENTRIES = 16384,
106 MAX_RX_BUFFERS = 16384,
107
108 MIN_TXQ_ENTRIES = 32,
109 MIN_RSPQ_ENTRIES = 128,
110 MIN_FL_ENTRIES = 16,
111
112 /*
113 * For purposes of manipulating the Free List size we need to
114 * recognize that Free Lists are actually Egress Queues (the host
115 * produces free buffers which the hardware consumes), Egress Queues
116 * indices are all in units of Egress Context Units bytes, and free
117 * list entries are 64-bit PCI DMA addresses. And since the state of
118 * the Producer Index == the Consumer Index implies an EMPTY list, we
119 * always have at least one Egress Unit's worth of Free List entries
120 * unused. See sge.c for more details ...
121 */
122 EQ_UNIT = SGE_EQ_IDXSIZE,
123 FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
124 MIN_FL_RESID = FL_PER_EQ_UNIT,
125};
126
127/*
128 * Global driver state.
129 * ====================
130 */
131
132static struct dentry *cxgb4vf_debugfs_root;
133
134/*
135 * OS "Callback" functions.
136 * ========================
137 */
138
139/*
140 * The link status has changed on the indicated "port" (Virtual Interface).
141 */
142void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
143{
144 struct net_device *dev = adapter->port[pidx];
145
146 /*
147 * If the port is disabled or the current recorded "link up"
148 * status matches the new status, just return.
149 */
150 if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
151 return;
152
153 /*
154 * Tell the OS that the link status has changed and print a short
155 * informative message on the console about the event.
156 */
157 if (link_ok) {
158 const char *s;
159 const char *fc;
160 const struct port_info *pi = netdev_priv(dev);
161
162 netif_carrier_on(dev);
163
164 switch (pi->link_cfg.speed) {
165 case SPEED_10000:
166 s = "10Gbps";
167 break;
168
169 case SPEED_1000:
170 s = "1000Mbps";
171 break;
172
173 case SPEED_100:
174 s = "100Mbps";
175 break;
176
177 default:
178 s = "unknown";
179 break;
180 }
181
182 switch (pi->link_cfg.fc) {
183 case PAUSE_RX:
184 fc = "RX";
185 break;
186
187 case PAUSE_TX:
188 fc = "TX";
189 break;
190
191 case PAUSE_RX|PAUSE_TX:
192 fc = "RX/TX";
193 break;
194
195 default:
196 fc = "no";
197 break;
198 }
199
200 printk(KERN_INFO "%s: link up, %s, full-duplex, %s PAUSE\n",
201 dev->name, s, fc);
202 } else {
203 netif_carrier_off(dev);
204 printk(KERN_INFO "%s: link down\n", dev->name);
205 }
206}
207
208/*
209 * Net device operations.
210 * ======================
211 */
212
213/*
214 * Record our new VLAN Group and enable/disable hardware VLAN Tag extraction
215 * based on whether the specified VLAN Group pointer is NULL or not.
216 */
217static void cxgb4vf_vlan_rx_register(struct net_device *dev,
218 struct vlan_group *grp)
219{
220 struct port_info *pi = netdev_priv(dev);
221
222 pi->vlan_grp = grp;
223 t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1, grp != NULL, 0);
224}
225
226/*
227 * Perform the MAC and PHY actions needed to enable a "port" (Virtual
228 * Interface).
229 */
230static int link_start(struct net_device *dev)
231{
232 int ret;
233 struct port_info *pi = netdev_priv(dev);
234
235 /*
236 * We do not set address filters and promiscuity here, the stack does
237 * that step explicitly.
238 */
239 ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, -1,
240 true);
241 if (ret == 0) {
242 ret = t4vf_change_mac(pi->adapter, pi->viid,
243 pi->xact_addr_filt, dev->dev_addr, true);
244 if (ret >= 0) {
245 pi->xact_addr_filt = ret;
246 ret = 0;
247 }
248 }
249
250 /*
251 * We don't need to actually "start the link" itself since the
252 * firmware will do that for us when the first Virtual Interface
253 * is enabled on a port.
254 */
255 if (ret == 0)
256 ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
257 return ret;
258}
259
260/*
261 * Name the MSI-X interrupts.
262 */
263static void name_msix_vecs(struct adapter *adapter)
264{
265 int namelen = sizeof(adapter->msix_info[0].desc) - 1;
266 int pidx;
267
268 /*
269 * Firmware events.
270 */
271 snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
272 "%s-FWeventq", adapter->name);
273 adapter->msix_info[MSIX_FW].desc[namelen] = 0;
274
275 /*
276 * Ethernet queues.
277 */
278 for_each_port(adapter, pidx) {
279 struct net_device *dev = adapter->port[pidx];
280 const struct port_info *pi = netdev_priv(dev);
281 int qs, msi;
282
283 for (qs = 0, msi = MSIX_NIQFLINT;
284 qs < pi->nqsets;
285 qs++, msi++) {
286 snprintf(adapter->msix_info[msi].desc, namelen,
287 "%s-%d", dev->name, qs);
288 adapter->msix_info[msi].desc[namelen] = 0;
289 }
290 }
291}
292
293/*
294 * Request all of our MSI-X resources.
295 */
296static int request_msix_queue_irqs(struct adapter *adapter)
297{
298 struct sge *s = &adapter->sge;
299 int rxq, msi, err;
300
301 /*
302 * Firmware events.
303 */
304 err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
305 0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
306 if (err)
307 return err;
308
309 /*
310 * Ethernet queues.
311 */
312 msi = MSIX_NIQFLINT;
313 for_each_ethrxq(s, rxq) {
314 err = request_irq(adapter->msix_info[msi].vec,
315 t4vf_sge_intr_msix, 0,
316 adapter->msix_info[msi].desc,
317 &s->ethrxq[rxq].rspq);
318 if (err)
319 goto err_free_irqs;
320 msi++;
321 }
322 return 0;
323
324err_free_irqs:
325 while (--rxq >= 0)
326 free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
327 free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
328 return err;
329}
330
331/*
332 * Free our MSI-X resources.
333 */
334static void free_msix_queue_irqs(struct adapter *adapter)
335{
336 struct sge *s = &adapter->sge;
337 int rxq, msi;
338
339 free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
340 msi = MSIX_NIQFLINT;
341 for_each_ethrxq(s, rxq)
342 free_irq(adapter->msix_info[msi++].vec,
343 &s->ethrxq[rxq].rspq);
344}
345
346/*
347 * Turn on NAPI and start up interrupts on a response queue.
348 */
349static void qenable(struct sge_rspq *rspq)
350{
351 napi_enable(&rspq->napi);
352
353 /*
354 * 0-increment the Going To Sleep register to start the timer and
355 * enable interrupts.
356 */
357 t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
358 CIDXINC(0) |
359 SEINTARM(rspq->intr_params) |
360 INGRESSQID(rspq->cntxt_id));
361}
362
363/*
364 * Enable NAPI scheduling and interrupt generation for all Receive Queues.
365 */
366static void enable_rx(struct adapter *adapter)
367{
368 int rxq;
369 struct sge *s = &adapter->sge;
370
371 for_each_ethrxq(s, rxq)
372 qenable(&s->ethrxq[rxq].rspq);
373 qenable(&s->fw_evtq);
374
375 /*
376 * The interrupt queue doesn't use NAPI so we do the 0-increment of
377 * its Going To Sleep register here to get it started.
378 */
379 if (adapter->flags & USING_MSI)
380 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
381 CIDXINC(0) |
382 SEINTARM(s->intrq.intr_params) |
383 INGRESSQID(s->intrq.cntxt_id));
384
385}
386
387/*
388 * Wait until all NAPI handlers are descheduled.
389 */
390static void quiesce_rx(struct adapter *adapter)
391{
392 struct sge *s = &adapter->sge;
393 int rxq;
394
395 for_each_ethrxq(s, rxq)
396 napi_disable(&s->ethrxq[rxq].rspq.napi);
397 napi_disable(&s->fw_evtq.napi);
398}
399
400/*
401 * Response queue handler for the firmware event queue.
402 */
403static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
404 const struct pkt_gl *gl)
405{
406 /*
407 * Extract response opcode and get pointer to CPL message body.
408 */
409 struct adapter *adapter = rspq->adapter;
410 u8 opcode = ((const struct rss_header *)rsp)->opcode;
411 void *cpl = (void *)(rsp + 1);
412
413 switch (opcode) {
414 case CPL_FW6_MSG: {
415 /*
416 * We've received an asynchronous message from the firmware.
417 */
418 const struct cpl_fw6_msg *fw_msg = cpl;
419 if (fw_msg->type == FW6_TYPE_CMD_RPL)
420 t4vf_handle_fw_rpl(adapter, fw_msg->data);
421 break;
422 }
423
424 case CPL_SGE_EGR_UPDATE: {
425 /*
426 * We've received an Egress Queue status update message.
427 * We get these, as the SGE is currently configured, when
428 * the firmware passes certain points in processing our
429 * TX Ethernet Queue. We use these updates to determine
430 * when we may need to restart a TX Ethernet Queue which
431 * was stopped for lack of free slots ...
432 */
433 const struct cpl_sge_egr_update *p = (void *)cpl;
434 unsigned int qid = EGR_QID(be32_to_cpu(p->opcode_qid));
435 struct sge *s = &adapter->sge;
436 struct sge_txq *tq;
437 struct sge_eth_txq *txq;
438 unsigned int eq_idx;
439 int hw_cidx, reclaimable, in_use;
440
441 /*
442 * Perform sanity checking on the Queue ID to make sure it
443 * really refers to one of our TX Ethernet Egress Queues which
444 * is active and matches the queue's ID. None of these error
445 * conditions should ever happen so we may want to either make
446 * them fatal and/or conditionalized under DEBUG.
447 */
448 eq_idx = EQ_IDX(s, qid);
449 if (unlikely(eq_idx >= MAX_EGRQ)) {
450 dev_err(adapter->pdev_dev,
451 "Egress Update QID %d out of range\n", qid);
452 break;
453 }
454 tq = s->egr_map[eq_idx];
455 if (unlikely(tq == NULL)) {
456 dev_err(adapter->pdev_dev,
457 "Egress Update QID %d TXQ=NULL\n", qid);
458 break;
459 }
460 txq = container_of(tq, struct sge_eth_txq, q);
461 if (unlikely(tq->abs_id != qid)) {
462 dev_err(adapter->pdev_dev,
463 "Egress Update QID %d refers to TXQ %d\n",
464 qid, tq->abs_id);
465 break;
466 }
467
468 /*
469 * Skip TX Queues which aren't stopped.
470 */
471 if (likely(!netif_tx_queue_stopped(txq->txq)))
472 break;
473
474 /*
475 * Skip stopped TX Queues which have more than half of their
476 * DMA rings occupied with unacknowledged writes.
477 */
478 hw_cidx = be16_to_cpu(txq->q.stat->cidx);
479 reclaimable = hw_cidx - txq->q.cidx;
480 if (reclaimable < 0)
481 reclaimable += txq->q.size;
482 in_use = txq->q.in_use - reclaimable;
483 if (in_use >= txq->q.size/2)
484 break;
485
486 /*
487 * Restart a stopped TX Queue which has less than half of its
488 * TX ring in use ...
489 */
490 txq->q.restarts++;
491 netif_tx_wake_queue(txq->txq);
492 break;
493 }
494
495 default:
496 dev_err(adapter->pdev_dev,
497 "unexpected CPL %#x on FW event queue\n", opcode);
498 }
499
500 return 0;
501}
502
503/*
504 * Allocate SGE TX/RX response queues. Determine how many sets of SGE queues
505 * to use and initializes them. We support multiple "Queue Sets" per port if
506 * we have MSI-X, otherwise just one queue set per port.
507 */
508static int setup_sge_queues(struct adapter *adapter)
509{
510 struct sge *s = &adapter->sge;
511 int err, pidx, msix;
512
513 /*
514 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
515 * state.
516 */
517 bitmap_zero(s->starving_fl, MAX_EGRQ);
518
519 /*
520 * If we're using MSI interrupt mode we need to set up a "forwarded
521 * interrupt" queue which we'll set up with our MSI vector. The rest
522 * of the ingress queues will be set up to forward their interrupts to
523 * this queue ... This must be first since t4vf_sge_alloc_rxq() uses
524 * the intrq's queue ID as the interrupt forwarding queue for the
525 * subsequent calls ...
526 */
527 if (adapter->flags & USING_MSI) {
528 err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
529 adapter->port[0], 0, NULL, NULL);
530 if (err)
531 goto err_free_queues;
532 }
533
534 /*
535 * Allocate our ingress queue for asynchronous firmware messages.
536 */
537 err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
538 MSIX_FW, NULL, fwevtq_handler);
539 if (err)
540 goto err_free_queues;
541
542 /*
543 * Allocate each "port"'s initial Queue Sets. These can be changed
544 * later on ... up to the point where any interface on the adapter is
545 * brought up at which point lots of things get nailed down
546 * permanently ...
547 */
548 msix = MSIX_NIQFLINT;
549 for_each_port(adapter, pidx) {
550 struct net_device *dev = adapter->port[pidx];
551 struct port_info *pi = netdev_priv(dev);
552 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
553 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
554 int nqsets = (adapter->flags & USING_MSIX) ? pi->nqsets : 1;
555 int qs;
556
557 for (qs = 0; qs < nqsets; qs++, rxq++, txq++) {
558 err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
559 dev, msix++,
560 &rxq->fl, t4vf_ethrx_handler);
561 if (err)
562 goto err_free_queues;
563
564 err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
565 netdev_get_tx_queue(dev, qs),
566 s->fw_evtq.cntxt_id);
567 if (err)
568 goto err_free_queues;
569
570 rxq->rspq.idx = qs;
571 memset(&rxq->stats, 0, sizeof(rxq->stats));
572 }
573 }
574
575 /*
576 * Create the reverse mappings for the queues.
577 */
578 s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
579 s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
580 IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
581 for_each_port(adapter, pidx) {
582 struct net_device *dev = adapter->port[pidx];
583 struct port_info *pi = netdev_priv(dev);
584 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
585 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
586 int nqsets = (adapter->flags & USING_MSIX) ? pi->nqsets : 1;
587 int qs;
588
589 for (qs = 0; qs < nqsets; qs++, rxq++, txq++) {
590 IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
591 EQ_MAP(s, txq->q.abs_id) = &txq->q;
592
593 /*
594 * The FW_IQ_CMD doesn't return the Absolute Queue IDs
595 * for Free Lists but since all of the Egress Queues
596 * (including Free Lists) have Relative Queue IDs
597 * which are computed as Absolute - Base Queue ID, we
598 * can synthesize the Absolute Queue IDs for the Free
599 * Lists. This is useful for debugging purposes when
600 * we want to dump Queue Contexts via the PF Driver.
601 */
602 rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
603 EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
604 }
605 }
606 return 0;
607
608err_free_queues:
609 t4vf_free_sge_resources(adapter);
610 return err;
611}
612
613/*
614 * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
615 * queues. We configure the RSS CPU lookup table to distribute to the number
616 * of HW receive queues, and the response queue lookup table to narrow that
617 * down to the response queues actually configured for each "port" (Virtual
618 * Interface). We always configure the RSS mapping for all ports since the
619 * mapping table has plenty of entries.
620 */
621static int setup_rss(struct adapter *adapter)
622{
623 int pidx;
624
625 for_each_port(adapter, pidx) {
626 struct port_info *pi = adap2pinfo(adapter, pidx);
627 struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
628 u16 rss[MAX_PORT_QSETS];
629 int qs, err;
630
631 for (qs = 0; qs < pi->nqsets; qs++)
632 rss[qs] = rxq[qs].rspq.abs_id;
633
634 err = t4vf_config_rss_range(adapter, pi->viid,
635 0, pi->rss_size, rss, pi->nqsets);
636 if (err)
637 return err;
638
639 /*
640 * Perform Global RSS Mode-specific initialization.
641 */
642 switch (adapter->params.rss.mode) {
643 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
644 /*
645 * If Tunnel All Lookup isn't specified in the global
646 * RSS Configuration, then we need to specify a
647 * default Ingress Queue for any ingress packets which
648 * aren't hashed. We'll use our first ingress queue
649 * ...
650 */
651 if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
652 union rss_vi_config config;
653 err = t4vf_read_rss_vi_config(adapter,
654 pi->viid,
655 &config);
656 if (err)
657 return err;
658 config.basicvirtual.defaultq =
659 rxq[0].rspq.abs_id;
660 err = t4vf_write_rss_vi_config(adapter,
661 pi->viid,
662 &config);
663 if (err)
664 return err;
665 }
666 break;
667 }
668 }
669
670 return 0;
671}
672
673/*
674 * Bring the adapter up. Called whenever we go from no "ports" open to having
675 * one open. This function performs the actions necessary to make an adapter
676 * operational, such as completing the initialization of HW modules, and
677 * enabling interrupts. Must be called with the rtnl lock held. (Note that
678 * this is called "cxgb_up" in the PF Driver.)
679 */
680static int adapter_up(struct adapter *adapter)
681{
682 int err;
683
684 /*
685 * If this is the first time we've been called, perform basic
686 * adapter setup. Once we've done this, many of our adapter
687 * parameters can no longer be changed ...
688 */
689 if ((adapter->flags & FULL_INIT_DONE) == 0) {
690 err = setup_sge_queues(adapter);
691 if (err)
692 return err;
693 err = setup_rss(adapter);
694 if (err) {
695 t4vf_free_sge_resources(adapter);
696 return err;
697 }
698
699 if (adapter->flags & USING_MSIX)
700 name_msix_vecs(adapter);
701 adapter->flags |= FULL_INIT_DONE;
702 }
703
704 /*
705 * Acquire our interrupt resources. We only support MSI-X and MSI.
706 */
707 BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
708 if (adapter->flags & USING_MSIX)
709 err = request_msix_queue_irqs(adapter);
710 else
711 err = request_irq(adapter->pdev->irq,
712 t4vf_intr_handler(adapter), 0,
713 adapter->name, adapter);
714 if (err) {
715 dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
716 err);
717 return err;
718 }
719
720 /*
721 * Enable NAPI ingress processing and return success.
722 */
723 enable_rx(adapter);
724 t4vf_sge_start(adapter);
725 return 0;
726}
727
728/*
729 * Bring the adapter down. Called whenever the last "port" (Virtual
730 * Interface) closed. (Note that this routine is called "cxgb_down" in the PF
731 * Driver.)
732 */
733static void adapter_down(struct adapter *adapter)
734{
735 /*
736 * Free interrupt resources.
737 */
738 if (adapter->flags & USING_MSIX)
739 free_msix_queue_irqs(adapter);
740 else
741 free_irq(adapter->pdev->irq, adapter);
742
743 /*
744 * Wait for NAPI handlers to finish.
745 */
746 quiesce_rx(adapter);
747}
748
749/*
750 * Start up a net device.
751 */
752static int cxgb4vf_open(struct net_device *dev)
753{
754 int err;
755 struct port_info *pi = netdev_priv(dev);
756 struct adapter *adapter = pi->adapter;
757
758 /*
759 * If this is the first interface that we're opening on the "adapter",
760 * bring the "adapter" up now.
761 */
762 if (adapter->open_device_map == 0) {
763 err = adapter_up(adapter);
764 if (err)
765 return err;
766 }
767
768 /*
769 * Note that this interface is up and start everything up ...
770 */
771 dev->real_num_tx_queues = pi->nqsets;
772 set_bit(pi->port_id, &adapter->open_device_map);
773 link_start(dev);
774 netif_tx_start_all_queues(dev);
775 return 0;
776}
777
778/*
779 * Shut down a net device. This routine is called "cxgb_close" in the PF
780 * Driver ...
781 */
782static int cxgb4vf_stop(struct net_device *dev)
783{
784 int ret;
785 struct port_info *pi = netdev_priv(dev);
786 struct adapter *adapter = pi->adapter;
787
788 netif_tx_stop_all_queues(dev);
789 netif_carrier_off(dev);
790 ret = t4vf_enable_vi(adapter, pi->viid, false, false);
791 pi->link_cfg.link_ok = 0;
792
793 clear_bit(pi->port_id, &adapter->open_device_map);
794 if (adapter->open_device_map == 0)
795 adapter_down(adapter);
796 return 0;
797}
798
799/*
800 * Translate our basic statistics into the standard "ifconfig" statistics.
801 */
802static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
803{
804 struct t4vf_port_stats stats;
805 struct port_info *pi = netdev2pinfo(dev);
806 struct adapter *adapter = pi->adapter;
807 struct net_device_stats *ns = &dev->stats;
808 int err;
809
810 spin_lock(&adapter->stats_lock);
811 err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
812 spin_unlock(&adapter->stats_lock);
813
814 memset(ns, 0, sizeof(*ns));
815 if (err)
816 return ns;
817
818 ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
819 stats.tx_ucast_bytes + stats.tx_offload_bytes);
820 ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
821 stats.tx_ucast_frames + stats.tx_offload_frames);
822 ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
823 stats.rx_ucast_bytes);
824 ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
825 stats.rx_ucast_frames);
826 ns->multicast = stats.rx_mcast_frames;
827 ns->tx_errors = stats.tx_drop_frames;
828 ns->rx_errors = stats.rx_err_frames;
829
830 return ns;
831}
832
833/*
834 * Collect up to maxaddrs worth of a netdevice's unicast addresses into an
835 * array of addrss pointers and return the number collected.
836 */
837static inline int collect_netdev_uc_list_addrs(const struct net_device *dev,
838 const u8 **addr,
839 unsigned int maxaddrs)
840{
841 unsigned int naddr = 0;
842 const struct netdev_hw_addr *ha;
843
844 for_each_dev_addr(dev, ha) {
845 addr[naddr++] = ha->addr;
846 if (naddr >= maxaddrs)
847 break;
848 }
849 return naddr;
850}
851
852/*
853 * Collect up to maxaddrs worth of a netdevice's multicast addresses into an
854 * array of addrss pointers and return the number collected.
855 */
856static inline int collect_netdev_mc_list_addrs(const struct net_device *dev,
857 const u8 **addr,
858 unsigned int maxaddrs)
859{
860 unsigned int naddr = 0;
861 const struct netdev_hw_addr *ha;
862
863 netdev_for_each_mc_addr(ha, dev) {
864 addr[naddr++] = ha->addr;
865 if (naddr >= maxaddrs)
866 break;
867 }
868 return naddr;
869}
870
871/*
872 * Configure the exact and hash address filters to handle a port's multicast
873 * and secondary unicast MAC addresses.
874 */
875static int set_addr_filters(const struct net_device *dev, bool sleep)
876{
877 u64 mhash = 0;
878 u64 uhash = 0;
879 bool free = true;
880 u16 filt_idx[7];
881 const u8 *addr[7];
882 int ret, naddr = 0;
883 const struct port_info *pi = netdev_priv(dev);
884
885 /* first do the secondary unicast addresses */
886 naddr = collect_netdev_uc_list_addrs(dev, addr, ARRAY_SIZE(addr));
887 if (naddr > 0) {
888 ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
889 naddr, addr, filt_idx, &uhash, sleep);
890 if (ret < 0)
891 return ret;
892
893 free = false;
894 }
895
896 /* next set up the multicast addresses */
897 naddr = collect_netdev_mc_list_addrs(dev, addr, ARRAY_SIZE(addr));
898 if (naddr > 0) {
899 ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
900 naddr, addr, filt_idx, &mhash, sleep);
901 if (ret < 0)
902 return ret;
903 }
904
905 return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0,
906 uhash | mhash, sleep);
907}
908
909/*
910 * Set RX properties of a port, such as promiscruity, address filters, and MTU.
911 * If @mtu is -1 it is left unchanged.
912 */
913static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
914{
915 int ret;
916 struct port_info *pi = netdev_priv(dev);
917
918 ret = set_addr_filters(dev, sleep_ok);
919 if (ret == 0)
920 ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1,
921 (dev->flags & IFF_PROMISC) != 0,
922 (dev->flags & IFF_ALLMULTI) != 0,
923 1, -1, sleep_ok);
924 return ret;
925}
926
927/*
928 * Set the current receive modes on the device.
929 */
930static void cxgb4vf_set_rxmode(struct net_device *dev)
931{
932 /* unfortunately we can't return errors to the stack */
933 set_rxmode(dev, -1, false);
934}
935
936/*
937 * Find the entry in the interrupt holdoff timer value array which comes
938 * closest to the specified interrupt holdoff value.
939 */
940static int closest_timer(const struct sge *s, int us)
941{
942 int i, timer_idx = 0, min_delta = INT_MAX;
943
944 for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
945 int delta = us - s->timer_val[i];
946 if (delta < 0)
947 delta = -delta;
948 if (delta < min_delta) {
949 min_delta = delta;
950 timer_idx = i;
951 }
952 }
953 return timer_idx;
954}
955
956static int closest_thres(const struct sge *s, int thres)
957{
958 int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
959
960 for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
961 delta = thres - s->counter_val[i];
962 if (delta < 0)
963 delta = -delta;
964 if (delta < min_delta) {
965 min_delta = delta;
966 pktcnt_idx = i;
967 }
968 }
969 return pktcnt_idx;
970}
971
972/*
973 * Return a queue's interrupt hold-off time in us. 0 means no timer.
974 */
975static unsigned int qtimer_val(const struct adapter *adapter,
976 const struct sge_rspq *rspq)
977{
978 unsigned int timer_idx = QINTR_TIMER_IDX_GET(rspq->intr_params);
979
980 return timer_idx < SGE_NTIMERS
981 ? adapter->sge.timer_val[timer_idx]
982 : 0;
983}
984
985/**
986 * set_rxq_intr_params - set a queue's interrupt holdoff parameters
987 * @adapter: the adapter
988 * @rspq: the RX response queue
989 * @us: the hold-off time in us, or 0 to disable timer
990 * @cnt: the hold-off packet count, or 0 to disable counter
991 *
992 * Sets an RX response queue's interrupt hold-off time and packet count.
993 * At least one of the two needs to be enabled for the queue to generate
994 * interrupts.
995 */
996static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
997 unsigned int us, unsigned int cnt)
998{
999 unsigned int timer_idx;
1000
1001 /*
1002 * If both the interrupt holdoff timer and count are specified as
1003 * zero, default to a holdoff count of 1 ...
1004 */
1005 if ((us | cnt) == 0)
1006 cnt = 1;
1007
1008 /*
1009 * If an interrupt holdoff count has been specified, then find the
1010 * closest configured holdoff count and use that. If the response
1011 * queue has already been created, then update its queue context
1012 * parameters ...
1013 */
1014 if (cnt) {
1015 int err;
1016 u32 v, pktcnt_idx;
1017
1018 pktcnt_idx = closest_thres(&adapter->sge, cnt);
1019 if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1020 v = FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1021 FW_PARAMS_PARAM_X(
1022 FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1023 FW_PARAMS_PARAM_YZ(rspq->cntxt_id);
1024 err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1025 if (err)
1026 return err;
1027 }
1028 rspq->pktcnt_idx = pktcnt_idx;
1029 }
1030
1031 /*
1032 * Compute the closest holdoff timer index from the supplied holdoff
1033 * timer value.
1034 */
1035 timer_idx = (us == 0
1036 ? SGE_TIMER_RSTRT_CNTR
1037 : closest_timer(&adapter->sge, us));
1038
1039 /*
1040 * Update the response queue's interrupt coalescing parameters and
1041 * return success.
1042 */
1043 rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
1044 (cnt > 0 ? QINTR_CNT_EN : 0));
1045 return 0;
1046}
1047
1048/*
1049 * Return a version number to identify the type of adapter. The scheme is:
1050 * - bits 0..9: chip version
1051 * - bits 10..15: chip revision
1052 */
1053static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1054{
1055 /*
1056 * Chip version 4, revision 0x3f (cxgb4vf).
1057 */
1058 return 4 | (0x3f << 10);
1059}
1060
1061/*
1062 * Execute the specified ioctl command.
1063 */
1064static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1065{
1066 int ret = 0;
1067
1068 switch (cmd) {
1069 /*
1070 * The VF Driver doesn't have access to any of the other
1071 * common Ethernet device ioctl()'s (like reading/writing
1072 * PHY registers, etc.
1073 */
1074
1075 default:
1076 ret = -EOPNOTSUPP;
1077 break;
1078 }
1079 return ret;
1080}
1081
1082/*
1083 * Change the device's MTU.
1084 */
1085static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1086{
1087 int ret;
1088 struct port_info *pi = netdev_priv(dev);
1089
1090 /* accommodate SACK */
1091 if (new_mtu < 81)
1092 return -EINVAL;
1093
1094 ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1095 -1, -1, -1, -1, true);
1096 if (!ret)
1097 dev->mtu = new_mtu;
1098 return ret;
1099}
1100
1101/*
1102 * Change the devices MAC address.
1103 */
1104static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1105{
1106 int ret;
1107 struct sockaddr *addr = _addr;
1108 struct port_info *pi = netdev_priv(dev);
1109
1110 if (!is_valid_ether_addr(addr->sa_data))
1111 return -EINVAL;
1112
1113 ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1114 addr->sa_data, true);
1115 if (ret < 0)
1116 return ret;
1117
1118 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1119 pi->xact_addr_filt = ret;
1120 return 0;
1121}
1122
1123/*
1124 * Return a TX Queue on which to send the specified skb.
1125 */
1126static u16 cxgb4vf_select_queue(struct net_device *dev, struct sk_buff *skb)
1127{
1128 /*
1129 * XXX For now just use the default hash but we probably want to
1130 * XXX look at other possibilities ...
1131 */
1132 return skb_tx_hash(dev, skb);
1133}
1134
1135#ifdef CONFIG_NET_POLL_CONTROLLER
1136/*
1137 * Poll all of our receive queues. This is called outside of normal interrupt
1138 * context.
1139 */
1140static void cxgb4vf_poll_controller(struct net_device *dev)
1141{
1142 struct port_info *pi = netdev_priv(dev);
1143 struct adapter *adapter = pi->adapter;
1144
1145 if (adapter->flags & USING_MSIX) {
1146 struct sge_eth_rxq *rxq;
1147 int nqsets;
1148
1149 rxq = &adapter->sge.ethrxq[pi->first_qset];
1150 for (nqsets = pi->nqsets; nqsets; nqsets--) {
1151 t4vf_sge_intr_msix(0, &rxq->rspq);
1152 rxq++;
1153 }
1154 } else
1155 t4vf_intr_handler(adapter)(0, adapter);
1156}
1157#endif
1158
1159/*
1160 * Ethtool operations.
1161 * ===================
1162 *
1163 * Note that we don't support any ethtool operations which change the physical
1164 * state of the port to which we're linked.
1165 */
1166
1167/*
1168 * Return current port link settings.
1169 */
1170static int cxgb4vf_get_settings(struct net_device *dev,
1171 struct ethtool_cmd *cmd)
1172{
1173 const struct port_info *pi = netdev_priv(dev);
1174
1175 cmd->supported = pi->link_cfg.supported;
1176 cmd->advertising = pi->link_cfg.advertising;
1177 cmd->speed = netif_carrier_ok(dev) ? pi->link_cfg.speed : -1;
1178 cmd->duplex = DUPLEX_FULL;
1179
1180 cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE;
1181 cmd->phy_address = pi->port_id;
1182 cmd->transceiver = XCVR_EXTERNAL;
1183 cmd->autoneg = pi->link_cfg.autoneg;
1184 cmd->maxtxpkt = 0;
1185 cmd->maxrxpkt = 0;
1186 return 0;
1187}
1188
1189/*
1190 * Return our driver information.
1191 */
1192static void cxgb4vf_get_drvinfo(struct net_device *dev,
1193 struct ethtool_drvinfo *drvinfo)
1194{
1195 struct adapter *adapter = netdev2adap(dev);
1196
1197 strcpy(drvinfo->driver, KBUILD_MODNAME);
1198 strcpy(drvinfo->version, DRV_VERSION);
1199 strcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)));
1200 snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1201 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1202 FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.fwrev),
1203 FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.fwrev),
1204 FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.fwrev),
1205 FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.fwrev),
1206 FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.tprev),
1207 FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.tprev),
1208 FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.tprev),
1209 FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.tprev));
1210}
1211
1212/*
1213 * Return current adapter message level.
1214 */
1215static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1216{
1217 return netdev2adap(dev)->msg_enable;
1218}
1219
1220/*
1221 * Set current adapter message level.
1222 */
1223static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1224{
1225 netdev2adap(dev)->msg_enable = msglevel;
1226}
1227
1228/*
1229 * Return the device's current Queue Set ring size parameters along with the
1230 * allowed maximum values. Since ethtool doesn't understand the concept of
1231 * multi-queue devices, we just return the current values associated with the
1232 * first Queue Set.
1233 */
1234static void cxgb4vf_get_ringparam(struct net_device *dev,
1235 struct ethtool_ringparam *rp)
1236{
1237 const struct port_info *pi = netdev_priv(dev);
1238 const struct sge *s = &pi->adapter->sge;
1239
1240 rp->rx_max_pending = MAX_RX_BUFFERS;
1241 rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1242 rp->rx_jumbo_max_pending = 0;
1243 rp->tx_max_pending = MAX_TXQ_ENTRIES;
1244
1245 rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1246 rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1247 rp->rx_jumbo_pending = 0;
1248 rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1249}
1250
1251/*
1252 * Set the Queue Set ring size parameters for the device. Again, since
1253 * ethtool doesn't allow for the concept of multiple queues per device, we'll
1254 * apply these new values across all of the Queue Sets associated with the
1255 * device -- after vetting them of course!
1256 */
1257static int cxgb4vf_set_ringparam(struct net_device *dev,
1258 struct ethtool_ringparam *rp)
1259{
1260 const struct port_info *pi = netdev_priv(dev);
1261 struct adapter *adapter = pi->adapter;
1262 struct sge *s = &adapter->sge;
1263 int qs;
1264
1265 if (rp->rx_pending > MAX_RX_BUFFERS ||
1266 rp->rx_jumbo_pending ||
1267 rp->tx_pending > MAX_TXQ_ENTRIES ||
1268 rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1269 rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1270 rp->rx_pending < MIN_FL_ENTRIES ||
1271 rp->tx_pending < MIN_TXQ_ENTRIES)
1272 return -EINVAL;
1273
1274 if (adapter->flags & FULL_INIT_DONE)
1275 return -EBUSY;
1276
1277 for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1278 s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1279 s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1280 s->ethtxq[qs].q.size = rp->tx_pending;
1281 }
1282 return 0;
1283}
1284
1285/*
1286 * Return the interrupt holdoff timer and count for the first Queue Set on the
1287 * device. Our extension ioctl() (the cxgbtool interface) allows the
1288 * interrupt holdoff timer to be read on all of the device's Queue Sets.
1289 */
1290static int cxgb4vf_get_coalesce(struct net_device *dev,
1291 struct ethtool_coalesce *coalesce)
1292{
1293 const struct port_info *pi = netdev_priv(dev);
1294 const struct adapter *adapter = pi->adapter;
1295 const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1296
1297 coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1298 coalesce->rx_max_coalesced_frames =
1299 ((rspq->intr_params & QINTR_CNT_EN)
1300 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1301 : 0);
1302 return 0;
1303}
1304
1305/*
1306 * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1307 * interface. Our extension ioctl() (the cxgbtool interface) allows us to set
1308 * the interrupt holdoff timer on any of the device's Queue Sets.
1309 */
1310static int cxgb4vf_set_coalesce(struct net_device *dev,
1311 struct ethtool_coalesce *coalesce)
1312{
1313 const struct port_info *pi = netdev_priv(dev);
1314 struct adapter *adapter = pi->adapter;
1315
1316 return set_rxq_intr_params(adapter,
1317 &adapter->sge.ethrxq[pi->first_qset].rspq,
1318 coalesce->rx_coalesce_usecs,
1319 coalesce->rx_max_coalesced_frames);
1320}
1321
1322/*
1323 * Report current port link pause parameter settings.
1324 */
1325static void cxgb4vf_get_pauseparam(struct net_device *dev,
1326 struct ethtool_pauseparam *pauseparam)
1327{
1328 struct port_info *pi = netdev_priv(dev);
1329
1330 pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1331 pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1332 pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1333}
1334
1335/*
1336 * Return whether RX Checksum Offloading is currently enabled for the device.
1337 */
1338static u32 cxgb4vf_get_rx_csum(struct net_device *dev)
1339{
1340 struct port_info *pi = netdev_priv(dev);
1341
1342 return (pi->rx_offload & RX_CSO) != 0;
1343}
1344
1345/*
1346 * Turn RX Checksum Offloading on or off for the device.
1347 */
1348static int cxgb4vf_set_rx_csum(struct net_device *dev, u32 csum)
1349{
1350 struct port_info *pi = netdev_priv(dev);
1351
1352 if (csum)
1353 pi->rx_offload |= RX_CSO;
1354 else
1355 pi->rx_offload &= ~RX_CSO;
1356 return 0;
1357}
1358
1359/*
1360 * Identify the port by blinking the port's LED.
1361 */
1362static int cxgb4vf_phys_id(struct net_device *dev, u32 id)
1363{
1364 struct port_info *pi = netdev_priv(dev);
1365
1366 return t4vf_identify_port(pi->adapter, pi->viid, 5);
1367}
1368
1369/*
1370 * Port stats maintained per queue of the port.
1371 */
1372struct queue_port_stats {
1373 u64 tso;
1374 u64 tx_csum;
1375 u64 rx_csum;
1376 u64 vlan_ex;
1377 u64 vlan_ins;
1378};
1379
1380/*
1381 * Strings for the ETH_SS_STATS statistics set ("ethtool -S"). Note that
1382 * these need to match the order of statistics returned by
1383 * t4vf_get_port_stats().
1384 */
1385static const char stats_strings[][ETH_GSTRING_LEN] = {
1386 /*
1387 * These must match the layout of the t4vf_port_stats structure.
1388 */
1389 "TxBroadcastBytes ",
1390 "TxBroadcastFrames ",
1391 "TxMulticastBytes ",
1392 "TxMulticastFrames ",
1393 "TxUnicastBytes ",
1394 "TxUnicastFrames ",
1395 "TxDroppedFrames ",
1396 "TxOffloadBytes ",
1397 "TxOffloadFrames ",
1398 "RxBroadcastBytes ",
1399 "RxBroadcastFrames ",
1400 "RxMulticastBytes ",
1401 "RxMulticastFrames ",
1402 "RxUnicastBytes ",
1403 "RxUnicastFrames ",
1404 "RxErrorFrames ",
1405
1406 /*
1407 * These are accumulated per-queue statistics and must match the
1408 * order of the fields in the queue_port_stats structure.
1409 */
1410 "TSO ",
1411 "TxCsumOffload ",
1412 "RxCsumGood ",
1413 "VLANextractions ",
1414 "VLANinsertions ",
1415};
1416
1417/*
1418 * Return the number of statistics in the specified statistics set.
1419 */
1420static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1421{
1422 switch (sset) {
1423 case ETH_SS_STATS:
1424 return ARRAY_SIZE(stats_strings);
1425 default:
1426 return -EOPNOTSUPP;
1427 }
1428 /*NOTREACHED*/
1429}
1430
1431/*
1432 * Return the strings for the specified statistics set.
1433 */
1434static void cxgb4vf_get_strings(struct net_device *dev,
1435 u32 sset,
1436 u8 *data)
1437{
1438 switch (sset) {
1439 case ETH_SS_STATS:
1440 memcpy(data, stats_strings, sizeof(stats_strings));
1441 break;
1442 }
1443}
1444
1445/*
1446 * Small utility routine to accumulate queue statistics across the queues of
1447 * a "port".
1448 */
1449static void collect_sge_port_stats(const struct adapter *adapter,
1450 const struct port_info *pi,
1451 struct queue_port_stats *stats)
1452{
1453 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1454 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1455 int qs;
1456
1457 memset(stats, 0, sizeof(*stats));
1458 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1459 stats->tso += txq->tso;
1460 stats->tx_csum += txq->tx_cso;
1461 stats->rx_csum += rxq->stats.rx_cso;
1462 stats->vlan_ex += rxq->stats.vlan_ex;
1463 stats->vlan_ins += txq->vlan_ins;
1464 }
1465}
1466
1467/*
1468 * Return the ETH_SS_STATS statistics set.
1469 */
1470static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1471 struct ethtool_stats *stats,
1472 u64 *data)
1473{
1474 struct port_info *pi = netdev2pinfo(dev);
1475 struct adapter *adapter = pi->adapter;
1476 int err = t4vf_get_port_stats(adapter, pi->pidx,
1477 (struct t4vf_port_stats *)data);
1478 if (err)
1479 memset(data, 0, sizeof(struct t4vf_port_stats));
1480
1481 data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1482 collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1483}
1484
1485/*
1486 * Return the size of our register map.
1487 */
1488static int cxgb4vf_get_regs_len(struct net_device *dev)
1489{
1490 return T4VF_REGMAP_SIZE;
1491}
1492
1493/*
1494 * Dump a block of registers, start to end inclusive, into a buffer.
1495 */
1496static void reg_block_dump(struct adapter *adapter, void *regbuf,
1497 unsigned int start, unsigned int end)
1498{
1499 u32 *bp = regbuf + start - T4VF_REGMAP_START;
1500
1501 for ( ; start <= end; start += sizeof(u32)) {
1502 /*
1503 * Avoid reading the Mailbox Control register since that
1504 * can trigger a Mailbox Ownership Arbitration cycle and
1505 * interfere with communication with the firmware.
1506 */
1507 if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1508 *bp++ = 0xffff;
1509 else
1510 *bp++ = t4_read_reg(adapter, start);
1511 }
1512}
1513
1514/*
1515 * Copy our entire register map into the provided buffer.
1516 */
1517static void cxgb4vf_get_regs(struct net_device *dev,
1518 struct ethtool_regs *regs,
1519 void *regbuf)
1520{
1521 struct adapter *adapter = netdev2adap(dev);
1522
1523 regs->version = mk_adap_vers(adapter);
1524
1525 /*
1526 * Fill in register buffer with our register map.
1527 */
1528 memset(regbuf, 0, T4VF_REGMAP_SIZE);
1529
1530 reg_block_dump(adapter, regbuf,
1531 T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1532 T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1533 reg_block_dump(adapter, regbuf,
1534 T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1535 T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1536 reg_block_dump(adapter, regbuf,
1537 T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1538 T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_LAST);
1539 reg_block_dump(adapter, regbuf,
1540 T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1541 T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1542
1543 reg_block_dump(adapter, regbuf,
1544 T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1545 T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1546}
1547
1548/*
1549 * Report current Wake On LAN settings.
1550 */
1551static void cxgb4vf_get_wol(struct net_device *dev,
1552 struct ethtool_wolinfo *wol)
1553{
1554 wol->supported = 0;
1555 wol->wolopts = 0;
1556 memset(&wol->sopass, 0, sizeof(wol->sopass));
1557}
1558
1559/*
1560 * Set TCP Segmentation Offloading feature capabilities.
1561 */
1562static int cxgb4vf_set_tso(struct net_device *dev, u32 tso)
1563{
1564 if (tso)
1565 dev->features |= NETIF_F_TSO | NETIF_F_TSO6;
1566 else
1567 dev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
1568 return 0;
1569}
1570
1571static struct ethtool_ops cxgb4vf_ethtool_ops = {
1572 .get_settings = cxgb4vf_get_settings,
1573 .get_drvinfo = cxgb4vf_get_drvinfo,
1574 .get_msglevel = cxgb4vf_get_msglevel,
1575 .set_msglevel = cxgb4vf_set_msglevel,
1576 .get_ringparam = cxgb4vf_get_ringparam,
1577 .set_ringparam = cxgb4vf_set_ringparam,
1578 .get_coalesce = cxgb4vf_get_coalesce,
1579 .set_coalesce = cxgb4vf_set_coalesce,
1580 .get_pauseparam = cxgb4vf_get_pauseparam,
1581 .get_rx_csum = cxgb4vf_get_rx_csum,
1582 .set_rx_csum = cxgb4vf_set_rx_csum,
1583 .set_tx_csum = ethtool_op_set_tx_ipv6_csum,
1584 .set_sg = ethtool_op_set_sg,
1585 .get_link = ethtool_op_get_link,
1586 .get_strings = cxgb4vf_get_strings,
1587 .phys_id = cxgb4vf_phys_id,
1588 .get_sset_count = cxgb4vf_get_sset_count,
1589 .get_ethtool_stats = cxgb4vf_get_ethtool_stats,
1590 .get_regs_len = cxgb4vf_get_regs_len,
1591 .get_regs = cxgb4vf_get_regs,
1592 .get_wol = cxgb4vf_get_wol,
1593 .set_tso = cxgb4vf_set_tso,
1594};
1595
1596/*
1597 * /sys/kernel/debug/cxgb4vf support code and data.
1598 * ================================================
1599 */
1600
1601/*
1602 * Show SGE Queue Set information. We display QPL Queues Sets per line.
1603 */
1604#define QPL 4
1605
1606static int sge_qinfo_show(struct seq_file *seq, void *v)
1607{
1608 struct adapter *adapter = seq->private;
1609 int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1610 int qs, r = (uintptr_t)v - 1;
1611
1612 if (r)
1613 seq_putc(seq, '\n');
1614
1615 #define S3(fmt_spec, s, v) \
1616 do {\
1617 seq_printf(seq, "%-12s", s); \
1618 for (qs = 0; qs < n; ++qs) \
1619 seq_printf(seq, " %16" fmt_spec, v); \
1620 seq_putc(seq, '\n'); \
1621 } while (0)
1622 #define S(s, v) S3("s", s, v)
1623 #define T(s, v) S3("u", s, txq[qs].v)
1624 #define R(s, v) S3("u", s, rxq[qs].v)
1625
1626 if (r < eth_entries) {
1627 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1628 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1629 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1630
1631 S("QType:", "Ethernet");
1632 S("Interface:",
1633 (rxq[qs].rspq.netdev
1634 ? rxq[qs].rspq.netdev->name
1635 : "N/A"));
1636 S3("d", "Port:",
1637 (rxq[qs].rspq.netdev
1638 ? ((struct port_info *)
1639 netdev_priv(rxq[qs].rspq.netdev))->port_id
1640 : -1));
1641 T("TxQ ID:", q.abs_id);
1642 T("TxQ size:", q.size);
1643 T("TxQ inuse:", q.in_use);
1644 T("TxQ PIdx:", q.pidx);
1645 T("TxQ CIdx:", q.cidx);
1646 R("RspQ ID:", rspq.abs_id);
1647 R("RspQ size:", rspq.size);
1648 R("RspQE size:", rspq.iqe_len);
1649 S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1650 S3("u", "Intr pktcnt:",
1651 adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1652 R("RspQ CIdx:", rspq.cidx);
1653 R("RspQ Gen:", rspq.gen);
1654 R("FL ID:", fl.abs_id);
1655 R("FL size:", fl.size - MIN_FL_RESID);
1656 R("FL avail:", fl.avail);
1657 R("FL PIdx:", fl.pidx);
1658 R("FL CIdx:", fl.cidx);
1659 return 0;
1660 }
1661
1662 r -= eth_entries;
1663 if (r == 0) {
1664 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1665
1666 seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1667 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1668 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1669 qtimer_val(adapter, evtq));
1670 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1671 adapter->sge.counter_val[evtq->pktcnt_idx]);
1672 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1673 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1674 } else if (r == 1) {
1675 const struct sge_rspq *intrq = &adapter->sge.intrq;
1676
1677 seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1678 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1679 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1680 qtimer_val(adapter, intrq));
1681 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1682 adapter->sge.counter_val[intrq->pktcnt_idx]);
1683 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1684 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1685 }
1686
1687 #undef R
1688 #undef T
1689 #undef S
1690 #undef S3
1691
1692 return 0;
1693}
1694
1695/*
1696 * Return the number of "entries" in our "file". We group the multi-Queue
1697 * sections with QPL Queue Sets per "entry". The sections of the output are:
1698 *
1699 * Ethernet RX/TX Queue Sets
1700 * Firmware Event Queue
1701 * Forwarded Interrupt Queue (if in MSI mode)
1702 */
1703static int sge_queue_entries(const struct adapter *adapter)
1704{
1705 return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1706 ((adapter->flags & USING_MSI) != 0);
1707}
1708
1709static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1710{
1711 int entries = sge_queue_entries(seq->private);
1712
1713 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1714}
1715
1716static void sge_queue_stop(struct seq_file *seq, void *v)
1717{
1718}
1719
1720static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
1721{
1722 int entries = sge_queue_entries(seq->private);
1723
1724 ++*pos;
1725 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1726}
1727
1728static const struct seq_operations sge_qinfo_seq_ops = {
1729 .start = sge_queue_start,
1730 .next = sge_queue_next,
1731 .stop = sge_queue_stop,
1732 .show = sge_qinfo_show
1733};
1734
1735static int sge_qinfo_open(struct inode *inode, struct file *file)
1736{
1737 int res = seq_open(file, &sge_qinfo_seq_ops);
1738
1739 if (!res) {
1740 struct seq_file *seq = file->private_data;
1741 seq->private = inode->i_private;
1742 }
1743 return res;
1744}
1745
1746static const struct file_operations sge_qinfo_debugfs_fops = {
1747 .owner = THIS_MODULE,
1748 .open = sge_qinfo_open,
1749 .read = seq_read,
1750 .llseek = seq_lseek,
1751 .release = seq_release,
1752};
1753
1754/*
1755 * Show SGE Queue Set statistics. We display QPL Queues Sets per line.
1756 */
1757#define QPL 4
1758
1759static int sge_qstats_show(struct seq_file *seq, void *v)
1760{
1761 struct adapter *adapter = seq->private;
1762 int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1763 int qs, r = (uintptr_t)v - 1;
1764
1765 if (r)
1766 seq_putc(seq, '\n');
1767
1768 #define S3(fmt, s, v) \
1769 do { \
1770 seq_printf(seq, "%-16s", s); \
1771 for (qs = 0; qs < n; ++qs) \
1772 seq_printf(seq, " %8" fmt, v); \
1773 seq_putc(seq, '\n'); \
1774 } while (0)
1775 #define S(s, v) S3("s", s, v)
1776
1777 #define T3(fmt, s, v) S3(fmt, s, txq[qs].v)
1778 #define T(s, v) T3("lu", s, v)
1779
1780 #define R3(fmt, s, v) S3(fmt, s, rxq[qs].v)
1781 #define R(s, v) R3("lu", s, v)
1782
1783 if (r < eth_entries) {
1784 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1785 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1786 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1787
1788 S("QType:", "Ethernet");
1789 S("Interface:",
1790 (rxq[qs].rspq.netdev
1791 ? rxq[qs].rspq.netdev->name
1792 : "N/A"));
1793 R3("u", "RspQNullInts", rspq.unhandled_irqs);
1794 R("RxPackets:", stats.pkts);
1795 R("RxCSO:", stats.rx_cso);
1796 R("VLANxtract:", stats.vlan_ex);
1797 R("LROmerged:", stats.lro_merged);
1798 R("LROpackets:", stats.lro_pkts);
1799 R("RxDrops:", stats.rx_drops);
1800 T("TSO:", tso);
1801 T("TxCSO:", tx_cso);
1802 T("VLANins:", vlan_ins);
1803 T("TxQFull:", q.stops);
1804 T("TxQRestarts:", q.restarts);
1805 T("TxMapErr:", mapping_err);
1806 R("FLAllocErr:", fl.alloc_failed);
1807 R("FLLrgAlcErr:", fl.large_alloc_failed);
1808 R("FLStarving:", fl.starving);
1809 return 0;
1810 }
1811
1812 r -= eth_entries;
1813 if (r == 0) {
1814 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1815
1816 seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
1817 /* no real response queue statistics available to display */
1818 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
1819 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
1820 } else if (r == 1) {
1821 const struct sge_rspq *intrq = &adapter->sge.intrq;
1822
1823 seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
1824 /* no real response queue statistics available to display */
1825 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
1826 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
1827 }
1828
1829 #undef R
1830 #undef T
1831 #undef S
1832 #undef R3
1833 #undef T3
1834 #undef S3
1835
1836 return 0;
1837}
1838
1839/*
1840 * Return the number of "entries" in our "file". We group the multi-Queue
1841 * sections with QPL Queue Sets per "entry". The sections of the output are:
1842 *
1843 * Ethernet RX/TX Queue Sets
1844 * Firmware Event Queue
1845 * Forwarded Interrupt Queue (if in MSI mode)
1846 */
1847static int sge_qstats_entries(const struct adapter *adapter)
1848{
1849 return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1850 ((adapter->flags & USING_MSI) != 0);
1851}
1852
1853static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
1854{
1855 int entries = sge_qstats_entries(seq->private);
1856
1857 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1858}
1859
1860static void sge_qstats_stop(struct seq_file *seq, void *v)
1861{
1862}
1863
1864static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
1865{
1866 int entries = sge_qstats_entries(seq->private);
1867
1868 (*pos)++;
1869 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1870}
1871
1872static const struct seq_operations sge_qstats_seq_ops = {
1873 .start = sge_qstats_start,
1874 .next = sge_qstats_next,
1875 .stop = sge_qstats_stop,
1876 .show = sge_qstats_show
1877};
1878
1879static int sge_qstats_open(struct inode *inode, struct file *file)
1880{
1881 int res = seq_open(file, &sge_qstats_seq_ops);
1882
1883 if (res == 0) {
1884 struct seq_file *seq = file->private_data;
1885 seq->private = inode->i_private;
1886 }
1887 return res;
1888}
1889
1890static const struct file_operations sge_qstats_proc_fops = {
1891 .owner = THIS_MODULE,
1892 .open = sge_qstats_open,
1893 .read = seq_read,
1894 .llseek = seq_lseek,
1895 .release = seq_release,
1896};
1897
1898/*
1899 * Show PCI-E SR-IOV Virtual Function Resource Limits.
1900 */
1901static int resources_show(struct seq_file *seq, void *v)
1902{
1903 struct adapter *adapter = seq->private;
1904 struct vf_resources *vfres = &adapter->params.vfres;
1905
1906 #define S(desc, fmt, var) \
1907 seq_printf(seq, "%-60s " fmt "\n", \
1908 desc " (" #var "):", vfres->var)
1909
1910 S("Virtual Interfaces", "%d", nvi);
1911 S("Egress Queues", "%d", neq);
1912 S("Ethernet Control", "%d", nethctrl);
1913 S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
1914 S("Ingress Queues", "%d", niq);
1915 S("Traffic Class", "%d", tc);
1916 S("Port Access Rights Mask", "%#x", pmask);
1917 S("MAC Address Filters", "%d", nexactf);
1918 S("Firmware Command Read Capabilities", "%#x", r_caps);
1919 S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
1920
1921 #undef S
1922
1923 return 0;
1924}
1925
1926static int resources_open(struct inode *inode, struct file *file)
1927{
1928 return single_open(file, resources_show, inode->i_private);
1929}
1930
1931static const struct file_operations resources_proc_fops = {
1932 .owner = THIS_MODULE,
1933 .open = resources_open,
1934 .read = seq_read,
1935 .llseek = seq_lseek,
1936 .release = single_release,
1937};
1938
1939/*
1940 * Show Virtual Interfaces.
1941 */
1942static int interfaces_show(struct seq_file *seq, void *v)
1943{
1944 if (v == SEQ_START_TOKEN) {
1945 seq_puts(seq, "Interface Port VIID\n");
1946 } else {
1947 struct adapter *adapter = seq->private;
1948 int pidx = (uintptr_t)v - 2;
1949 struct net_device *dev = adapter->port[pidx];
1950 struct port_info *pi = netdev_priv(dev);
1951
1952 seq_printf(seq, "%9s %4d %#5x\n",
1953 dev->name, pi->port_id, pi->viid);
1954 }
1955 return 0;
1956}
1957
1958static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
1959{
1960 return pos <= adapter->params.nports
1961 ? (void *)(uintptr_t)(pos + 1)
1962 : NULL;
1963}
1964
1965static void *interfaces_start(struct seq_file *seq, loff_t *pos)
1966{
1967 return *pos
1968 ? interfaces_get_idx(seq->private, *pos)
1969 : SEQ_START_TOKEN;
1970}
1971
1972static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
1973{
1974 (*pos)++;
1975 return interfaces_get_idx(seq->private, *pos);
1976}
1977
1978static void interfaces_stop(struct seq_file *seq, void *v)
1979{
1980}
1981
1982static const struct seq_operations interfaces_seq_ops = {
1983 .start = interfaces_start,
1984 .next = interfaces_next,
1985 .stop = interfaces_stop,
1986 .show = interfaces_show
1987};
1988
1989static int interfaces_open(struct inode *inode, struct file *file)
1990{
1991 int res = seq_open(file, &interfaces_seq_ops);
1992
1993 if (res == 0) {
1994 struct seq_file *seq = file->private_data;
1995 seq->private = inode->i_private;
1996 }
1997 return res;
1998}
1999
2000static const struct file_operations interfaces_proc_fops = {
2001 .owner = THIS_MODULE,
2002 .open = interfaces_open,
2003 .read = seq_read,
2004 .llseek = seq_lseek,
2005 .release = seq_release,
2006};
2007
2008/*
2009 * /sys/kernel/debugfs/cxgb4vf/ files list.
2010 */
2011struct cxgb4vf_debugfs_entry {
2012 const char *name; /* name of debugfs node */
2013 mode_t mode; /* file system mode */
2014 const struct file_operations *fops;
2015};
2016
2017static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2018 { "sge_qinfo", S_IRUGO, &sge_qinfo_debugfs_fops },
2019 { "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2020 { "resources", S_IRUGO, &resources_proc_fops },
2021 { "interfaces", S_IRUGO, &interfaces_proc_fops },
2022};
2023
2024/*
2025 * Module and device initialization and cleanup code.
2026 * ==================================================
2027 */
2028
2029/*
2030 * Set up out /sys/kernel/debug/cxgb4vf sub-nodes. We assume that the
2031 * directory (debugfs_root) has already been set up.
2032 */
2033static int __devinit setup_debugfs(struct adapter *adapter)
2034{
2035 int i;
2036
2037 BUG_ON(adapter->debugfs_root == NULL);
2038
2039 /*
2040 * Debugfs support is best effort.
2041 */
2042 for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2043 (void)debugfs_create_file(debugfs_files[i].name,
2044 debugfs_files[i].mode,
2045 adapter->debugfs_root,
2046 (void *)adapter,
2047 debugfs_files[i].fops);
2048
2049 return 0;
2050}
2051
2052/*
2053 * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above. We leave
2054 * it to our caller to tear down the directory (debugfs_root).
2055 */
2056static void __devexit cleanup_debugfs(struct adapter *adapter)
2057{
2058 BUG_ON(adapter->debugfs_root == NULL);
2059
2060 /*
2061 * Unlike our sister routine cleanup_proc(), we don't need to remove
2062 * individual entries because a call will be made to
2063 * debugfs_remove_recursive(). We just need to clean up any ancillary
2064 * persistent state.
2065 */
2066 /* nothing to do */
2067}
2068
2069/*
2070 * Perform early "adapter" initialization. This is where we discover what
2071 * adapter parameters we're going to be using and initialize basic adapter
2072 * hardware support.
2073 */
2074static int adap_init0(struct adapter *adapter)
2075{
2076 struct vf_resources *vfres = &adapter->params.vfres;
2077 struct sge_params *sge_params = &adapter->params.sge;
2078 struct sge *s = &adapter->sge;
2079 unsigned int ethqsets;
2080 int err;
2081
2082 /*
2083 * Wait for the device to become ready before proceeding ...
2084 */
2085 err = t4vf_wait_dev_ready(adapter);
2086 if (err) {
2087 dev_err(adapter->pdev_dev, "device didn't become ready:"
2088 " err=%d\n", err);
2089 return err;
2090 }
2091
2092 /*
2093 * Grab basic operational parameters. These will predominantly have
2094 * been set up by the Physical Function Driver or will be hard coded
2095 * into the adapter. We just have to live with them ... Note that
2096 * we _must_ get our VPD parameters before our SGE parameters because
2097 * we need to know the adapter's core clock from the VPD in order to
2098 * properly decode the SGE Timer Values.
2099 */
2100 err = t4vf_get_dev_params(adapter);
2101 if (err) {
2102 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2103 " device parameters: err=%d\n", err);
2104 return err;
2105 }
2106 err = t4vf_get_vpd_params(adapter);
2107 if (err) {
2108 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2109 " VPD parameters: err=%d\n", err);
2110 return err;
2111 }
2112 err = t4vf_get_sge_params(adapter);
2113 if (err) {
2114 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2115 " SGE parameters: err=%d\n", err);
2116 return err;
2117 }
2118 err = t4vf_get_rss_glb_config(adapter);
2119 if (err) {
2120 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2121 " RSS parameters: err=%d\n", err);
2122 return err;
2123 }
2124 if (adapter->params.rss.mode !=
2125 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2126 dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2127 " mode %d\n", adapter->params.rss.mode);
2128 return -EINVAL;
2129 }
2130 err = t4vf_sge_init(adapter);
2131 if (err) {
2132 dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2133 " err=%d\n", err);
2134 return err;
2135 }
2136
2137 /*
2138 * Retrieve our RX interrupt holdoff timer values and counter
2139 * threshold values from the SGE parameters.
2140 */
2141 s->timer_val[0] = core_ticks_to_us(adapter,
2142 TIMERVALUE0_GET(sge_params->sge_timer_value_0_and_1));
2143 s->timer_val[1] = core_ticks_to_us(adapter,
2144 TIMERVALUE1_GET(sge_params->sge_timer_value_0_and_1));
2145 s->timer_val[2] = core_ticks_to_us(adapter,
2146 TIMERVALUE0_GET(sge_params->sge_timer_value_2_and_3));
2147 s->timer_val[3] = core_ticks_to_us(adapter,
2148 TIMERVALUE1_GET(sge_params->sge_timer_value_2_and_3));
2149 s->timer_val[4] = core_ticks_to_us(adapter,
2150 TIMERVALUE0_GET(sge_params->sge_timer_value_4_and_5));
2151 s->timer_val[5] = core_ticks_to_us(adapter,
2152 TIMERVALUE1_GET(sge_params->sge_timer_value_4_and_5));
2153
2154 s->counter_val[0] =
2155 THRESHOLD_0_GET(sge_params->sge_ingress_rx_threshold);
2156 s->counter_val[1] =
2157 THRESHOLD_1_GET(sge_params->sge_ingress_rx_threshold);
2158 s->counter_val[2] =
2159 THRESHOLD_2_GET(sge_params->sge_ingress_rx_threshold);
2160 s->counter_val[3] =
2161 THRESHOLD_3_GET(sge_params->sge_ingress_rx_threshold);
2162
2163 /*
2164 * Grab our Virtual Interface resource allocation, extract the
2165 * features that we're interested in and do a bit of sanity testing on
2166 * what we discover.
2167 */
2168 err = t4vf_get_vfres(adapter);
2169 if (err) {
2170 dev_err(adapter->pdev_dev, "unable to get virtual interface"
2171 " resources: err=%d\n", err);
2172 return err;
2173 }
2174
2175 /*
2176 * The number of "ports" which we support is equal to the number of
2177 * Virtual Interfaces with which we've been provisioned.
2178 */
2179 adapter->params.nports = vfres->nvi;
2180 if (adapter->params.nports > MAX_NPORTS) {
2181 dev_warn(adapter->pdev_dev, "only using %d of %d allowed"
2182 " virtual interfaces\n", MAX_NPORTS,
2183 adapter->params.nports);
2184 adapter->params.nports = MAX_NPORTS;
2185 }
2186
2187 /*
2188 * We need to reserve a number of the ingress queues with Free List
2189 * and Interrupt capabilities for special interrupt purposes (like
2190 * asynchronous firmware messages, or forwarded interrupts if we're
2191 * using MSI). The rest of the FL/Intr-capable ingress queues will be
2192 * matched up one-for-one with Ethernet/Control egress queues in order
2193 * to form "Queue Sets" which will be aportioned between the "ports".
2194 * For each Queue Set, we'll need the ability to allocate two Egress
2195 * Contexts -- one for the Ingress Queue Free List and one for the TX
2196 * Ethernet Queue.
2197 */
2198 ethqsets = vfres->niqflint - INGQ_EXTRAS;
2199 if (vfres->nethctrl != ethqsets) {
2200 dev_warn(adapter->pdev_dev, "unequal number of [available]"
2201 " ingress/egress queues (%d/%d); using minimum for"
2202 " number of Queue Sets\n", ethqsets, vfres->nethctrl);
2203 ethqsets = min(vfres->nethctrl, ethqsets);
2204 }
2205 if (vfres->neq < ethqsets*2) {
2206 dev_warn(adapter->pdev_dev, "Not enough Egress Contexts (%d)"
2207 " to support Queue Sets (%d); reducing allowed Queue"
2208 " Sets\n", vfres->neq, ethqsets);
2209 ethqsets = vfres->neq/2;
2210 }
2211 if (ethqsets > MAX_ETH_QSETS) {
2212 dev_warn(adapter->pdev_dev, "only using %d of %d allowed Queue"
2213 " Sets\n", MAX_ETH_QSETS, adapter->sge.max_ethqsets);
2214 ethqsets = MAX_ETH_QSETS;
2215 }
2216 if (vfres->niq != 0 || vfres->neq > ethqsets*2) {
2217 dev_warn(adapter->pdev_dev, "unused resources niq/neq (%d/%d)"
2218 " ignored\n", vfres->niq, vfres->neq - ethqsets*2);
2219 }
2220 adapter->sge.max_ethqsets = ethqsets;
2221
2222 /*
2223 * Check for various parameter sanity issues. Most checks simply
2224 * result in us using fewer resources than our provissioning but we
2225 * do need at least one "port" with which to work ...
2226 */
2227 if (adapter->sge.max_ethqsets < adapter->params.nports) {
2228 dev_warn(adapter->pdev_dev, "only using %d of %d available"
2229 " virtual interfaces (too few Queue Sets)\n",
2230 adapter->sge.max_ethqsets, adapter->params.nports);
2231 adapter->params.nports = adapter->sge.max_ethqsets;
2232 }
2233 if (adapter->params.nports == 0) {
2234 dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2235 "usable!\n");
2236 return -EINVAL;
2237 }
2238 return 0;
2239}
2240
2241static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2242 u8 pkt_cnt_idx, unsigned int size,
2243 unsigned int iqe_size)
2244{
2245 rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
2246 (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0));
2247 rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2248 ? pkt_cnt_idx
2249 : 0);
2250 rspq->iqe_len = iqe_size;
2251 rspq->size = size;
2252}
2253
2254/*
2255 * Perform default configuration of DMA queues depending on the number and
2256 * type of ports we found and the number of available CPUs. Most settings can
2257 * be modified by the admin via ethtool and cxgbtool prior to the adapter
2258 * being brought up for the first time.
2259 */
2260static void __devinit cfg_queues(struct adapter *adapter)
2261{
2262 struct sge *s = &adapter->sge;
2263 int q10g, n10g, qidx, pidx, qs;
2264
2265 /*
2266 * We should not be called till we know how many Queue Sets we can
2267 * support. In particular, this means that we need to know what kind
2268 * of interrupts we'll be using ...
2269 */
2270 BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2271
2272 /*
2273 * Count the number of 10GbE Virtual Interfaces that we have.
2274 */
2275 n10g = 0;
2276 for_each_port(adapter, pidx)
2277 n10g += is_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2278
2279 /*
2280 * We default to 1 queue per non-10G port and up to # of cores queues
2281 * per 10G port.
2282 */
2283 if (n10g == 0)
2284 q10g = 0;
2285 else {
2286 int n1g = (adapter->params.nports - n10g);
2287 q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2288 if (q10g > num_online_cpus())
2289 q10g = num_online_cpus();
2290 }
2291
2292 /*
2293 * Allocate the "Queue Sets" to the various Virtual Interfaces.
2294 * The layout will be established in setup_sge_queues() when the
2295 * adapter is brough up for the first time.
2296 */
2297 qidx = 0;
2298 for_each_port(adapter, pidx) {
2299 struct port_info *pi = adap2pinfo(adapter, pidx);
2300
2301 pi->first_qset = qidx;
2302 pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1;
2303 qidx += pi->nqsets;
2304 }
2305 s->ethqsets = qidx;
2306
2307 /*
2308 * Set up default Queue Set parameters ... Start off with the
2309 * shortest interrupt holdoff timer.
2310 */
2311 for (qs = 0; qs < s->max_ethqsets; qs++) {
2312 struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2313 struct sge_eth_txq *txq = &s->ethtxq[qs];
2314
2315 init_rspq(&rxq->rspq, 0, 0, 1024, L1_CACHE_BYTES);
2316 rxq->fl.size = 72;
2317 txq->q.size = 1024;
2318 }
2319
2320 /*
2321 * The firmware event queue is used for link state changes and
2322 * notifications of TX DMA completions.
2323 */
2324 init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512,
2325 L1_CACHE_BYTES);
2326
2327 /*
2328 * The forwarded interrupt queue is used when we're in MSI interrupt
2329 * mode. In this mode all interrupts associated with RX queues will
2330 * be forwarded to a single queue which we'll associate with our MSI
2331 * interrupt vector. The messages dropped in the forwarded interrupt
2332 * queue will indicate which ingress queue needs servicing ... This
2333 * queue needs to be large enough to accommodate all of the ingress
2334 * queues which are forwarding their interrupt (+1 to prevent the PIDX
2335 * from equalling the CIDX if every ingress queue has an outstanding
2336 * interrupt). The queue doesn't need to be any larger because no
2337 * ingress queue will ever have more than one outstanding interrupt at
2338 * any time ...
2339 */
2340 init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2341 L1_CACHE_BYTES);
2342}
2343
2344/*
2345 * Reduce the number of Ethernet queues across all ports to at most n.
2346 * n provides at least one queue per port.
2347 */
2348static void __devinit reduce_ethqs(struct adapter *adapter, int n)
2349{
2350 int i;
2351 struct port_info *pi;
2352
2353 /*
2354 * While we have too many active Ether Queue Sets, interate across the
2355 * "ports" and reduce their individual Queue Set allocations.
2356 */
2357 BUG_ON(n < adapter->params.nports);
2358 while (n < adapter->sge.ethqsets)
2359 for_each_port(adapter, i) {
2360 pi = adap2pinfo(adapter, i);
2361 if (pi->nqsets > 1) {
2362 pi->nqsets--;
2363 adapter->sge.ethqsets--;
2364 if (adapter->sge.ethqsets <= n)
2365 break;
2366 }
2367 }
2368
2369 /*
2370 * Reassign the starting Queue Sets for each of the "ports" ...
2371 */
2372 n = 0;
2373 for_each_port(adapter, i) {
2374 pi = adap2pinfo(adapter, i);
2375 pi->first_qset = n;
2376 n += pi->nqsets;
2377 }
2378}
2379
2380/*
2381 * We need to grab enough MSI-X vectors to cover our interrupt needs. Ideally
2382 * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2383 * need. Minimally we need one for every Virtual Interface plus those needed
2384 * for our "extras". Note that this process may lower the maximum number of
2385 * allowed Queue Sets ...
2386 */
2387static int __devinit enable_msix(struct adapter *adapter)
2388{
2389 int i, err, want, need;
2390 struct msix_entry entries[MSIX_ENTRIES];
2391 struct sge *s = &adapter->sge;
2392
2393 for (i = 0; i < MSIX_ENTRIES; ++i)
2394 entries[i].entry = i;
2395
2396 /*
2397 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2398 * plus those needed for our "extras" (for example, the firmware
2399 * message queue). We _need_ at least one "Queue Set" per Virtual
2400 * Interface plus those needed for our "extras". So now we get to see
2401 * if the song is right ...
2402 */
2403 want = s->max_ethqsets + MSIX_EXTRAS;
2404 need = adapter->params.nports + MSIX_EXTRAS;
2405 while ((err = pci_enable_msix(adapter->pdev, entries, want)) >= need)
2406 want = err;
2407
2408 if (err == 0) {
2409 int nqsets = want - MSIX_EXTRAS;
2410 if (nqsets < s->max_ethqsets) {
2411 dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2412 " for %d Queue Sets\n", nqsets);
2413 s->max_ethqsets = nqsets;
2414 if (nqsets < s->ethqsets)
2415 reduce_ethqs(adapter, nqsets);
2416 }
2417 for (i = 0; i < want; ++i)
2418 adapter->msix_info[i].vec = entries[i].vector;
2419 } else if (err > 0) {
2420 pci_disable_msix(adapter->pdev);
2421 dev_info(adapter->pdev_dev, "only %d MSI-X vectors left,"
2422 " not using MSI-X\n", err);
2423 }
2424 return err;
2425}
2426
2427#ifdef HAVE_NET_DEVICE_OPS
2428static const struct net_device_ops cxgb4vf_netdev_ops = {
2429 .ndo_open = cxgb4vf_open,
2430 .ndo_stop = cxgb4vf_stop,
2431 .ndo_start_xmit = t4vf_eth_xmit,
2432 .ndo_get_stats = cxgb4vf_get_stats,
2433 .ndo_set_rx_mode = cxgb4vf_set_rxmode,
2434 .ndo_set_mac_address = cxgb4vf_set_mac_addr,
2435 .ndo_select_queue = cxgb4vf_select_queue,
2436 .ndo_validate_addr = eth_validate_addr,
2437 .ndo_do_ioctl = cxgb4vf_do_ioctl,
2438 .ndo_change_mtu = cxgb4vf_change_mtu,
2439 .ndo_vlan_rx_register = cxgb4vf_vlan_rx_register,
2440#ifdef CONFIG_NET_POLL_CONTROLLER
2441 .ndo_poll_controller = cxgb4vf_poll_controller,
2442#endif
2443};
2444#endif
2445
2446/*
2447 * "Probe" a device: initialize a device and construct all kernel and driver
2448 * state needed to manage the device. This routine is called "init_one" in
2449 * the PF Driver ...
2450 */
2451static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev,
2452 const struct pci_device_id *ent)
2453{
2454 static int version_printed;
2455
2456 int pci_using_dac;
2457 int err, pidx;
2458 unsigned int pmask;
2459 struct adapter *adapter;
2460 struct port_info *pi;
2461 struct net_device *netdev;
2462
2463 /*
2464 * Vet our module parameters.
2465 */
2466 if (msi != MSI_MSIX && msi != MSI_MSI) {
2467 dev_err(&pdev->dev, "bad module parameter msi=%d; must be %d"
2468 " (MSI-X or MSI) or %d (MSI)\n", msi, MSI_MSIX,
2469 MSI_MSI);
2470 err = -EINVAL;
2471 goto err_out;
2472 }
2473
2474 /*
2475 * Print our driver banner the first time we're called to initialize a
2476 * device.
2477 */
2478 if (version_printed == 0) {
2479 printk(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
2480 version_printed = 1;
2481 }
2482
2483 /*
2484 * Reserve PCI resources for the device. If we can't get them some
2485 * other driver may have already claimed the device ...
2486 */
2487 err = pci_request_regions(pdev, KBUILD_MODNAME);
2488 if (err) {
2489 dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2490 return err;
2491 }
2492
2493 /*
2494 * Initialize generic PCI device state.
2495 */
2496 err = pci_enable_device(pdev);
2497 if (err) {
2498 dev_err(&pdev->dev, "cannot enable PCI device\n");
2499 goto err_release_regions;
2500 }
2501
2502 /*
2503 * Set up our DMA mask: try for 64-bit address masking first and
2504 * fall back to 32-bit if we can't get 64 bits ...
2505 */
2506 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2507 if (err == 0) {
2508 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2509 if (err) {
2510 dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2511 " coherent allocations\n");
2512 goto err_disable_device;
2513 }
2514 pci_using_dac = 1;
2515 } else {
2516 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2517 if (err != 0) {
2518 dev_err(&pdev->dev, "no usable DMA configuration\n");
2519 goto err_disable_device;
2520 }
2521 pci_using_dac = 0;
2522 }
2523
2524 /*
2525 * Enable bus mastering for the device ...
2526 */
2527 pci_set_master(pdev);
2528
2529 /*
2530 * Allocate our adapter data structure and attach it to the device.
2531 */
2532 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2533 if (!adapter) {
2534 err = -ENOMEM;
2535 goto err_disable_device;
2536 }
2537 pci_set_drvdata(pdev, adapter);
2538 adapter->pdev = pdev;
2539 adapter->pdev_dev = &pdev->dev;
2540
2541 /*
2542 * Initialize SMP data synchronization resources.
2543 */
2544 spin_lock_init(&adapter->stats_lock);
2545
2546 /*
2547 * Map our I/O registers in BAR0.
2548 */
2549 adapter->regs = pci_ioremap_bar(pdev, 0);
2550 if (!adapter->regs) {
2551 dev_err(&pdev->dev, "cannot map device registers\n");
2552 err = -ENOMEM;
2553 goto err_free_adapter;
2554 }
2555
2556 /*
2557 * Initialize adapter level features.
2558 */
2559 adapter->name = pci_name(pdev);
2560 adapter->msg_enable = dflt_msg_enable;
2561 err = adap_init0(adapter);
2562 if (err)
2563 goto err_unmap_bar;
2564
2565 /*
2566 * Allocate our "adapter ports" and stitch everything together.
2567 */
2568 pmask = adapter->params.vfres.pmask;
2569 for_each_port(adapter, pidx) {
2570 int port_id, viid;
2571
2572 /*
2573 * We simplistically allocate our virtual interfaces
2574 * sequentially across the port numbers to which we have
2575 * access rights. This should be configurable in some manner
2576 * ...
2577 */
2578 if (pmask == 0)
2579 break;
2580 port_id = ffs(pmask) - 1;
2581 pmask &= ~(1 << port_id);
2582 viid = t4vf_alloc_vi(adapter, port_id);
2583 if (viid < 0) {
2584 dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2585 " err=%d\n", port_id, viid);
2586 err = viid;
2587 goto err_free_dev;
2588 }
2589
2590 /*
2591 * Allocate our network device and stitch things together.
2592 */
2593 netdev = alloc_etherdev_mq(sizeof(struct port_info),
2594 MAX_PORT_QSETS);
2595 if (netdev == NULL) {
2596 dev_err(&pdev->dev, "cannot allocate netdev for"
2597 " port %d\n", port_id);
2598 t4vf_free_vi(adapter, viid);
2599 err = -ENOMEM;
2600 goto err_free_dev;
2601 }
2602 adapter->port[pidx] = netdev;
2603 SET_NETDEV_DEV(netdev, &pdev->dev);
2604 pi = netdev_priv(netdev);
2605 pi->adapter = adapter;
2606 pi->pidx = pidx;
2607 pi->port_id = port_id;
2608 pi->viid = viid;
2609
2610 /*
2611 * Initialize the starting state of our "port" and register
2612 * it.
2613 */
2614 pi->xact_addr_filt = -1;
2615 pi->rx_offload = RX_CSO;
2616 netif_carrier_off(netdev);
2617 netif_tx_stop_all_queues(netdev);
2618 netdev->irq = pdev->irq;
2619
2620 netdev->features = (NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
2621 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2622 NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX |
2623 NETIF_F_GRO);
2624 if (pci_using_dac)
2625 netdev->features |= NETIF_F_HIGHDMA;
2626 netdev->vlan_features =
2627 (netdev->features &
2628 ~(NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX));
2629
2630#ifdef HAVE_NET_DEVICE_OPS
2631 netdev->netdev_ops = &cxgb4vf_netdev_ops;
2632#else
2633 netdev->vlan_rx_register = cxgb4vf_vlan_rx_register;
2634 netdev->open = cxgb4vf_open;
2635 netdev->stop = cxgb4vf_stop;
2636 netdev->hard_start_xmit = t4vf_eth_xmit;
2637 netdev->get_stats = cxgb4vf_get_stats;
2638 netdev->set_rx_mode = cxgb4vf_set_rxmode;
2639 netdev->do_ioctl = cxgb4vf_do_ioctl;
2640 netdev->change_mtu = cxgb4vf_change_mtu;
2641 netdev->set_mac_address = cxgb4vf_set_mac_addr;
2642 netdev->select_queue = cxgb4vf_select_queue;
2643#ifdef CONFIG_NET_POLL_CONTROLLER
2644 netdev->poll_controller = cxgb4vf_poll_controller;
2645#endif
2646#endif
2647 SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops);
2648
2649 /*
2650 * Initialize the hardware/software state for the port.
2651 */
2652 err = t4vf_port_init(adapter, pidx);
2653 if (err) {
2654 dev_err(&pdev->dev, "cannot initialize port %d\n",
2655 pidx);
2656 goto err_free_dev;
2657 }
2658 }
2659
2660 /*
2661 * The "card" is now ready to go. If any errors occur during device
2662 * registration we do not fail the whole "card" but rather proceed
2663 * only with the ports we manage to register successfully. However we
2664 * must register at least one net device.
2665 */
2666 for_each_port(adapter, pidx) {
2667 netdev = adapter->port[pidx];
2668 if (netdev == NULL)
2669 continue;
2670
2671 err = register_netdev(netdev);
2672 if (err) {
2673 dev_warn(&pdev->dev, "cannot register net device %s,"
2674 " skipping\n", netdev->name);
2675 continue;
2676 }
2677
2678 set_bit(pidx, &adapter->registered_device_map);
2679 }
2680 if (adapter->registered_device_map == 0) {
2681 dev_err(&pdev->dev, "could not register any net devices\n");
2682 goto err_free_dev;
2683 }
2684
2685 /*
2686 * Set up our debugfs entries.
2687 */
2688 if (cxgb4vf_debugfs_root) {
2689 adapter->debugfs_root =
2690 debugfs_create_dir(pci_name(pdev),
2691 cxgb4vf_debugfs_root);
2692 if (adapter->debugfs_root == NULL)
2693 dev_warn(&pdev->dev, "could not create debugfs"
2694 " directory");
2695 else
2696 setup_debugfs(adapter);
2697 }
2698
2699 /*
2700 * See what interrupts we'll be using. If we've been configured to
2701 * use MSI-X interrupts, try to enable them but fall back to using
2702 * MSI interrupts if we can't enable MSI-X interrupts. If we can't
2703 * get MSI interrupts we bail with the error.
2704 */
2705 if (msi == MSI_MSIX && enable_msix(adapter) == 0)
2706 adapter->flags |= USING_MSIX;
2707 else {
2708 err = pci_enable_msi(pdev);
2709 if (err) {
2710 dev_err(&pdev->dev, "Unable to allocate %s interrupts;"
2711 " err=%d\n",
2712 msi == MSI_MSIX ? "MSI-X or MSI" : "MSI", err);
2713 goto err_free_debugfs;
2714 }
2715 adapter->flags |= USING_MSI;
2716 }
2717
2718 /*
2719 * Now that we know how many "ports" we have and what their types are,
2720 * and how many Queue Sets we can support, we can configure our queue
2721 * resources.
2722 */
2723 cfg_queues(adapter);
2724
2725 /*
2726 * Print a short notice on the existance and configuration of the new
2727 * VF network device ...
2728 */
2729 for_each_port(adapter, pidx) {
2730 dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
2731 adapter->port[pidx]->name,
2732 (adapter->flags & USING_MSIX) ? "MSI-X" :
2733 (adapter->flags & USING_MSI) ? "MSI" : "");
2734 }
2735
2736 /*
2737 * Return success!
2738 */
2739 return 0;
2740
2741 /*
2742 * Error recovery and exit code. Unwind state that's been created
2743 * so far and return the error.
2744 */
2745
2746err_free_debugfs:
2747 if (adapter->debugfs_root) {
2748 cleanup_debugfs(adapter);
2749 debugfs_remove_recursive(adapter->debugfs_root);
2750 }
2751
2752err_free_dev:
2753 for_each_port(adapter, pidx) {
2754 netdev = adapter->port[pidx];
2755 if (netdev == NULL)
2756 continue;
2757 pi = netdev_priv(netdev);
2758 t4vf_free_vi(adapter, pi->viid);
2759 if (test_bit(pidx, &adapter->registered_device_map))
2760 unregister_netdev(netdev);
2761 free_netdev(netdev);
2762 }
2763
2764err_unmap_bar:
2765 iounmap(adapter->regs);
2766
2767err_free_adapter:
2768 kfree(adapter);
2769 pci_set_drvdata(pdev, NULL);
2770
2771err_disable_device:
2772 pci_disable_device(pdev);
2773 pci_clear_master(pdev);
2774
2775err_release_regions:
2776 pci_release_regions(pdev);
2777 pci_set_drvdata(pdev, NULL);
2778
2779err_out:
2780 return err;
2781}
2782
2783/*
2784 * "Remove" a device: tear down all kernel and driver state created in the
2785 * "probe" routine and quiesce the device (disable interrupts, etc.). (Note
2786 * that this is called "remove_one" in the PF Driver.)
2787 */
2788static void __devexit cxgb4vf_pci_remove(struct pci_dev *pdev)
2789{
2790 struct adapter *adapter = pci_get_drvdata(pdev);
2791
2792 /*
2793 * Tear down driver state associated with device.
2794 */
2795 if (adapter) {
2796 int pidx;
2797
2798 /*
2799 * Stop all of our activity. Unregister network port,
2800 * disable interrupts, etc.
2801 */
2802 for_each_port(adapter, pidx)
2803 if (test_bit(pidx, &adapter->registered_device_map))
2804 unregister_netdev(adapter->port[pidx]);
2805 t4vf_sge_stop(adapter);
2806 if (adapter->flags & USING_MSIX) {
2807 pci_disable_msix(adapter->pdev);
2808 adapter->flags &= ~USING_MSIX;
2809 } else if (adapter->flags & USING_MSI) {
2810 pci_disable_msi(adapter->pdev);
2811 adapter->flags &= ~USING_MSI;
2812 }
2813
2814 /*
2815 * Tear down our debugfs entries.
2816 */
2817 if (adapter->debugfs_root) {
2818 cleanup_debugfs(adapter);
2819 debugfs_remove_recursive(adapter->debugfs_root);
2820 }
2821
2822 /*
2823 * Free all of the various resources which we've acquired ...
2824 */
2825 t4vf_free_sge_resources(adapter);
2826 for_each_port(adapter, pidx) {
2827 struct net_device *netdev = adapter->port[pidx];
2828 struct port_info *pi;
2829
2830 if (netdev == NULL)
2831 continue;
2832
2833 pi = netdev_priv(netdev);
2834 t4vf_free_vi(adapter, pi->viid);
2835 free_netdev(netdev);
2836 }
2837 iounmap(adapter->regs);
2838 kfree(adapter);
2839 pci_set_drvdata(pdev, NULL);
2840 }
2841
2842 /*
2843 * Disable the device and release its PCI resources.
2844 */
2845 pci_disable_device(pdev);
2846 pci_clear_master(pdev);
2847 pci_release_regions(pdev);
2848}
2849
2850/*
2851 * PCI Device registration data structures.
2852 */
2853#define CH_DEVICE(devid, idx) \
2854 { PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, PCI_ANY_ID, 0, 0, idx }
2855
2856static struct pci_device_id cxgb4vf_pci_tbl[] = {
2857 CH_DEVICE(0xb000, 0), /* PE10K FPGA */
2858 CH_DEVICE(0x4800, 0), /* T440-dbg */
2859 CH_DEVICE(0x4801, 0), /* T420-cr */
2860 CH_DEVICE(0x4802, 0), /* T422-cr */
2861 { 0, }
2862};
2863
2864MODULE_DESCRIPTION(DRV_DESC);
2865MODULE_AUTHOR("Chelsio Communications");
2866MODULE_LICENSE("Dual BSD/GPL");
2867MODULE_VERSION(DRV_VERSION);
2868MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
2869
2870static struct pci_driver cxgb4vf_driver = {
2871 .name = KBUILD_MODNAME,
2872 .id_table = cxgb4vf_pci_tbl,
2873 .probe = cxgb4vf_pci_probe,
2874 .remove = __devexit_p(cxgb4vf_pci_remove),
2875};
2876
2877/*
2878 * Initialize global driver state.
2879 */
2880static int __init cxgb4vf_module_init(void)
2881{
2882 int ret;
2883
2884 /* Debugfs support is optional, just warn if this fails */
2885 cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
2886 if (!cxgb4vf_debugfs_root)
2887 printk(KERN_WARNING KBUILD_MODNAME ": could not create"
2888 " debugfs entry, continuing\n");
2889
2890 ret = pci_register_driver(&cxgb4vf_driver);
2891 if (ret < 0)
2892 debugfs_remove(cxgb4vf_debugfs_root);
2893 return ret;
2894}
2895
2896/*
2897 * Tear down global driver state.
2898 */
2899static void __exit cxgb4vf_module_exit(void)
2900{
2901 pci_unregister_driver(&cxgb4vf_driver);
2902 debugfs_remove(cxgb4vf_debugfs_root);
2903}
2904
2905module_init(cxgb4vf_module_init);
2906module_exit(cxgb4vf_module_exit);