aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/sfc/siena_sriov.c
diff options
context:
space:
mode:
authorBen Hutchings <bhutchings@solarflare.com>2012-02-13 19:48:07 -0500
committerBen Hutchings <bhutchings@solarflare.com>2012-02-15 19:25:13 -0500
commitcd2d5b529cdb9bd274f3e4bc68d37d4d63b7f383 (patch)
treef21a5f98185b8e227b843a28b54d4260a0e57033 /drivers/net/ethernet/sfc/siena_sriov.c
parent28e47c498a931200125e299e9d60d22e27b4ab0d (diff)
sfc: Add SR-IOV back-end support for SFC9000 family
On the SFC9000 family, each port has 1024 Virtual Interfaces (VIs), each with an RX queue, a TX queue, an event queue and a mailbox register. These may be assigned to up to 127 SR-IOV virtual functions per port, with up to 64 VIs per VF. We allocate an extra channel (IRQ and event queue only) to receive requests from VF drivers. There is a per-port limit of 4 concurrent RX queue flushes, and queue flushes may be initiated by the MC in response to a Function Level Reset (FLR) of a VF. Therefore, when SR-IOV is in use, we submit all flush requests via the MC. The RSS indirection table is shared with VFs, so the number of RX queues used in the PF is limited to the number of VIs per VF. This is almost entirely the work of Steve Hodgson, formerly shodgson@solarflare.com. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Diffstat (limited to 'drivers/net/ethernet/sfc/siena_sriov.c')
-rw-r--r--drivers/net/ethernet/sfc/siena_sriov.c1642
1 files changed, 1642 insertions, 0 deletions
diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c
new file mode 100644
index 000000000000..5c6839ec3a83
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena_sriov.c
@@ -0,0 +1,1642 @@
1/****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2010-2011 Solarflare Communications Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation, incorporated herein by reference.
8 */
9#include <linux/pci.h>
10#include <linux/module.h>
11#include "net_driver.h"
12#include "efx.h"
13#include "nic.h"
14#include "io.h"
15#include "mcdi.h"
16#include "filter.h"
17#include "mcdi_pcol.h"
18#include "regs.h"
19#include "vfdi.h"
20
21/* Number of longs required to track all the VIs in a VF */
22#define VI_MASK_LENGTH BITS_TO_LONGS(1 << EFX_VI_SCALE_MAX)
23
24/**
25 * enum efx_vf_tx_filter_mode - TX MAC filtering behaviour
26 * @VF_TX_FILTER_OFF: Disabled
27 * @VF_TX_FILTER_AUTO: Enabled if MAC address assigned to VF and only
28 * 2 TX queues allowed per VF.
29 * @VF_TX_FILTER_ON: Enabled
30 */
31enum efx_vf_tx_filter_mode {
32 VF_TX_FILTER_OFF,
33 VF_TX_FILTER_AUTO,
34 VF_TX_FILTER_ON,
35};
36
37/**
38 * struct efx_vf - Back-end resource and protocol state for a PCI VF
39 * @efx: The Efx NIC owning this VF
40 * @pci_rid: The PCI requester ID for this VF
41 * @pci_name: The PCI name (formatted address) of this VF
42 * @index: Index of VF within its port and PF.
43 * @req: VFDI incoming request work item. Incoming USR_EV events are received
44 * by the NAPI handler, but must be handled by executing MCDI requests
45 * inside a work item.
46 * @req_addr: VFDI incoming request DMA address (in VF's PCI address space).
47 * @req_type: Expected next incoming (from VF) %VFDI_EV_TYPE member.
48 * @req_seqno: Expected next incoming (from VF) %VFDI_EV_SEQ member.
49 * @msg_seqno: Next %VFDI_EV_SEQ member to reply to VF. Protected by
50 * @status_lock
51 * @busy: VFDI request queued to be processed or being processed. Receiving
52 * a VFDI request when @busy is set is an error condition.
53 * @buf: Incoming VFDI requests are DMA from the VF into this buffer.
54 * @buftbl_base: Buffer table entries for this VF start at this index.
55 * @rx_filtering: Receive filtering has been requested by the VF driver.
56 * @rx_filter_flags: The flags sent in the %VFDI_OP_INSERT_FILTER request.
57 * @rx_filter_qid: VF relative qid for RX filter requested by VF.
58 * @rx_filter_id: Receive MAC filter ID. Only one filter per VF is supported.
59 * @tx_filter_mode: Transmit MAC filtering mode.
60 * @tx_filter_id: Transmit MAC filter ID.
61 * @addr: The MAC address and outer vlan tag of the VF.
62 * @status_addr: VF DMA address of page for &struct vfdi_status updates.
63 * @status_lock: Mutex protecting @msg_seqno, @status_addr, @addr,
64 * @peer_page_addrs and @peer_page_count from simultaneous
65 * updates by the VM and consumption by
66 * efx_sriov_update_vf_addr()
67 * @peer_page_addrs: Pointer to an array of guest pages for local addresses.
68 * @peer_page_count: Number of entries in @peer_page_count.
69 * @evq0_addrs: Array of guest pages backing evq0.
70 * @evq0_count: Number of entries in @evq0_addrs.
71 * @flush_waitq: wait queue used by %VFDI_OP_FINI_ALL_QUEUES handler
72 * to wait for flush completions.
73 * @txq_lock: Mutex for TX queue allocation.
74 * @txq_mask: Mask of initialized transmit queues.
75 * @txq_count: Number of initialized transmit queues.
76 * @rxq_mask: Mask of initialized receive queues.
77 * @rxq_count: Number of initialized receive queues.
78 * @rxq_retry_mask: Mask or receive queues that need to be flushed again
79 * due to flush failure.
80 * @rxq_retry_count: Number of receive queues in @rxq_retry_mask.
81 * @reset_work: Work item to schedule a VF reset.
82 */
83struct efx_vf {
84 struct efx_nic *efx;
85 unsigned int pci_rid;
86 char pci_name[13]; /* dddd:bb:dd.f */
87 unsigned int index;
88 struct work_struct req;
89 u64 req_addr;
90 int req_type;
91 unsigned req_seqno;
92 unsigned msg_seqno;
93 bool busy;
94 struct efx_buffer buf;
95 unsigned buftbl_base;
96 bool rx_filtering;
97 enum efx_filter_flags rx_filter_flags;
98 unsigned rx_filter_qid;
99 int rx_filter_id;
100 enum efx_vf_tx_filter_mode tx_filter_mode;
101 int tx_filter_id;
102 struct vfdi_endpoint addr;
103 u64 status_addr;
104 struct mutex status_lock;
105 u64 *peer_page_addrs;
106 unsigned peer_page_count;
107 u64 evq0_addrs[EFX_MAX_VF_EVQ_SIZE * sizeof(efx_qword_t) /
108 EFX_BUF_SIZE];
109 unsigned evq0_count;
110 wait_queue_head_t flush_waitq;
111 struct mutex txq_lock;
112 unsigned long txq_mask[VI_MASK_LENGTH];
113 unsigned txq_count;
114 unsigned long rxq_mask[VI_MASK_LENGTH];
115 unsigned rxq_count;
116 unsigned long rxq_retry_mask[VI_MASK_LENGTH];
117 atomic_t rxq_retry_count;
118 struct work_struct reset_work;
119};
120
121struct efx_memcpy_req {
122 unsigned int from_rid;
123 void *from_buf;
124 u64 from_addr;
125 unsigned int to_rid;
126 u64 to_addr;
127 unsigned length;
128};
129
130/**
131 * struct efx_local_addr - A MAC address on the vswitch without a VF.
132 *
133 * Siena does not have a switch, so VFs can't transmit data to each
134 * other. Instead the VFs must be made aware of the local addresses
135 * on the vswitch, so that they can arrange for an alternative
136 * software datapath to be used.
137 *
138 * @link: List head for insertion into efx->local_addr_list.
139 * @addr: Ethernet address
140 */
141struct efx_local_addr {
142 struct list_head link;
143 u8 addr[ETH_ALEN];
144};
145
146/**
147 * struct efx_endpoint_page - Page of vfdi_endpoint structures
148 *
149 * @link: List head for insertion into efx->local_page_list.
150 * @ptr: Pointer to page.
151 * @addr: DMA address of page.
152 */
153struct efx_endpoint_page {
154 struct list_head link;
155 void *ptr;
156 dma_addr_t addr;
157};
158
159/* Buffer table entries are reserved txq0,rxq0,evq0,txq1,rxq1,evq1 */
160#define EFX_BUFTBL_TXQ_BASE(_vf, _qid) \
161 ((_vf)->buftbl_base + EFX_VF_BUFTBL_PER_VI * (_qid))
162#define EFX_BUFTBL_RXQ_BASE(_vf, _qid) \
163 (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \
164 (EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE))
165#define EFX_BUFTBL_EVQ_BASE(_vf, _qid) \
166 (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \
167 (2 * EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE))
168
169#define EFX_FIELD_MASK(_field) \
170 ((1 << _field ## _WIDTH) - 1)
171
172/* VFs can only use this many transmit channels */
173static unsigned int vf_max_tx_channels = 2;
174module_param(vf_max_tx_channels, uint, 0444);
175MODULE_PARM_DESC(vf_max_tx_channels,
176 "Limit the number of TX channels VFs can use");
177
178static int max_vfs = -1;
179module_param(max_vfs, int, 0444);
180MODULE_PARM_DESC(max_vfs,
181 "Reduce the number of VFs initialized by the driver");
182
183/* Workqueue used by VFDI communication. We can't use the global
184 * workqueue because it may be running the VF driver's probe()
185 * routine, which will be blocked there waiting for a VFDI response.
186 */
187static struct workqueue_struct *vfdi_workqueue;
188
189static unsigned abs_index(struct efx_vf *vf, unsigned index)
190{
191 return EFX_VI_BASE + vf->index * efx_vf_size(vf->efx) + index;
192}
193
194static int efx_sriov_cmd(struct efx_nic *efx, bool enable,
195 unsigned *vi_scale_out, unsigned *vf_total_out)
196{
197 u8 inbuf[MC_CMD_SRIOV_IN_LEN];
198 u8 outbuf[MC_CMD_SRIOV_OUT_LEN];
199 unsigned vi_scale, vf_total;
200 size_t outlen;
201 int rc;
202
203 MCDI_SET_DWORD(inbuf, SRIOV_IN_ENABLE, enable ? 1 : 0);
204 MCDI_SET_DWORD(inbuf, SRIOV_IN_VI_BASE, EFX_VI_BASE);
205 MCDI_SET_DWORD(inbuf, SRIOV_IN_VF_COUNT, efx->vf_count);
206
207 rc = efx_mcdi_rpc(efx, MC_CMD_SRIOV, inbuf, MC_CMD_SRIOV_IN_LEN,
208 outbuf, MC_CMD_SRIOV_OUT_LEN, &outlen);
209 if (rc)
210 return rc;
211 if (outlen < MC_CMD_SRIOV_OUT_LEN)
212 return -EIO;
213
214 vf_total = MCDI_DWORD(outbuf, SRIOV_OUT_VF_TOTAL);
215 vi_scale = MCDI_DWORD(outbuf, SRIOV_OUT_VI_SCALE);
216 if (vi_scale > EFX_VI_SCALE_MAX)
217 return -EOPNOTSUPP;
218
219 if (vi_scale_out)
220 *vi_scale_out = vi_scale;
221 if (vf_total_out)
222 *vf_total_out = vf_total;
223
224 return 0;
225}
226
227static void efx_sriov_usrev(struct efx_nic *efx, bool enabled)
228{
229 efx_oword_t reg;
230
231 EFX_POPULATE_OWORD_2(reg,
232 FRF_CZ_USREV_DIS, enabled ? 0 : 1,
233 FRF_CZ_DFLT_EVQ, efx->vfdi_channel->channel);
234 efx_writeo(efx, &reg, FR_CZ_USR_EV_CFG);
235}
236
237static int efx_sriov_memcpy(struct efx_nic *efx, struct efx_memcpy_req *req,
238 unsigned int count)
239{
240 u8 *inbuf, *record;
241 unsigned int used;
242 u32 from_rid, from_hi, from_lo;
243 int rc;
244
245 mb(); /* Finish writing source/reading dest before DMA starts */
246
247 used = MC_CMD_MEMCPY_IN_LEN(count);
248 if (WARN_ON(used > MCDI_CTL_SDU_LEN_MAX))
249 return -ENOBUFS;
250
251 /* Allocate room for the largest request */
252 inbuf = kzalloc(MCDI_CTL_SDU_LEN_MAX, GFP_KERNEL);
253 if (inbuf == NULL)
254 return -ENOMEM;
255
256 record = inbuf;
257 MCDI_SET_DWORD(record, MEMCPY_IN_RECORD, count);
258 while (count-- > 0) {
259 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_RID,
260 req->to_rid);
261 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_ADDR_LO,
262 (u32)req->to_addr);
263 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_ADDR_HI,
264 (u32)(req->to_addr >> 32));
265 if (req->from_buf == NULL) {
266 from_rid = req->from_rid;
267 from_lo = (u32)req->from_addr;
268 from_hi = (u32)(req->from_addr >> 32);
269 } else {
270 if (WARN_ON(used + req->length > MCDI_CTL_SDU_LEN_MAX)) {
271 rc = -ENOBUFS;
272 goto out;
273 }
274
275 from_rid = MC_CMD_MEMCPY_RECORD_TYPEDEF_RID_INLINE;
276 from_lo = used;
277 from_hi = 0;
278 memcpy(inbuf + used, req->from_buf, req->length);
279 used += req->length;
280 }
281
282 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_RID, from_rid);
283 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_ADDR_LO,
284 from_lo);
285 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_ADDR_HI,
286 from_hi);
287 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_LENGTH,
288 req->length);
289
290 ++req;
291 record += MC_CMD_MEMCPY_IN_RECORD_LEN;
292 }
293
294 rc = efx_mcdi_rpc(efx, MC_CMD_MEMCPY, inbuf, used, NULL, 0, NULL);
295out:
296 kfree(inbuf);
297
298 mb(); /* Don't write source/read dest before DMA is complete */
299
300 return rc;
301}
302
303/* The TX filter is entirely controlled by this driver, and is modified
304 * underneath the feet of the VF
305 */
306static void efx_sriov_reset_tx_filter(struct efx_vf *vf)
307{
308 struct efx_nic *efx = vf->efx;
309 struct efx_filter_spec filter;
310 u16 vlan;
311 int rc;
312
313 if (vf->tx_filter_id != -1) {
314 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
315 vf->tx_filter_id);
316 netif_dbg(efx, hw, efx->net_dev, "Removed vf %s tx filter %d\n",
317 vf->pci_name, vf->tx_filter_id);
318 vf->tx_filter_id = -1;
319 }
320
321 if (is_zero_ether_addr(vf->addr.mac_addr))
322 return;
323
324 /* Turn on TX filtering automatically if not explicitly
325 * enabled or disabled.
326 */
327 if (vf->tx_filter_mode == VF_TX_FILTER_AUTO && vf_max_tx_channels <= 2)
328 vf->tx_filter_mode = VF_TX_FILTER_ON;
329
330 vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK;
331 efx_filter_init_tx(&filter, abs_index(vf, 0));
332 rc = efx_filter_set_eth_local(&filter,
333 vlan ? vlan : EFX_FILTER_VID_UNSPEC,
334 vf->addr.mac_addr);
335 BUG_ON(rc);
336
337 rc = efx_filter_insert_filter(efx, &filter, true);
338 if (rc < 0) {
339 netif_warn(efx, hw, efx->net_dev,
340 "Unable to migrate tx filter for vf %s\n",
341 vf->pci_name);
342 } else {
343 netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s tx filter %d\n",
344 vf->pci_name, rc);
345 vf->tx_filter_id = rc;
346 }
347}
348
349/* The RX filter is managed here on behalf of the VF driver */
350static void efx_sriov_reset_rx_filter(struct efx_vf *vf)
351{
352 struct efx_nic *efx = vf->efx;
353 struct efx_filter_spec filter;
354 u16 vlan;
355 int rc;
356
357 if (vf->rx_filter_id != -1) {
358 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
359 vf->rx_filter_id);
360 netif_dbg(efx, hw, efx->net_dev, "Removed vf %s rx filter %d\n",
361 vf->pci_name, vf->rx_filter_id);
362 vf->rx_filter_id = -1;
363 }
364
365 if (!vf->rx_filtering || is_zero_ether_addr(vf->addr.mac_addr))
366 return;
367
368 vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK;
369 efx_filter_init_rx(&filter, EFX_FILTER_PRI_REQUIRED,
370 vf->rx_filter_flags,
371 abs_index(vf, vf->rx_filter_qid));
372 rc = efx_filter_set_eth_local(&filter,
373 vlan ? vlan : EFX_FILTER_VID_UNSPEC,
374 vf->addr.mac_addr);
375 BUG_ON(rc);
376
377 rc = efx_filter_insert_filter(efx, &filter, true);
378 if (rc < 0) {
379 netif_warn(efx, hw, efx->net_dev,
380 "Unable to insert rx filter for vf %s\n",
381 vf->pci_name);
382 } else {
383 netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s rx filter %d\n",
384 vf->pci_name, rc);
385 vf->rx_filter_id = rc;
386 }
387}
388
389static void __efx_sriov_update_vf_addr(struct efx_vf *vf)
390{
391 efx_sriov_reset_tx_filter(vf);
392 efx_sriov_reset_rx_filter(vf);
393 queue_work(vfdi_workqueue, &vf->efx->peer_work);
394}
395
396/* Push the peer list to this VF. The caller must hold status_lock to interlock
397 * with VFDI requests, and they must be serialised against manipulation of
398 * local_page_list, either by acquiring local_lock or by running from
399 * efx_sriov_peer_work()
400 */
401static void __efx_sriov_push_vf_status(struct efx_vf *vf)
402{
403 struct efx_nic *efx = vf->efx;
404 struct vfdi_status *status = efx->vfdi_status.addr;
405 struct efx_memcpy_req copy[4];
406 struct efx_endpoint_page *epp;
407 unsigned int pos, count;
408 unsigned data_offset;
409 efx_qword_t event;
410
411 WARN_ON(!mutex_is_locked(&vf->status_lock));
412 WARN_ON(!vf->status_addr);
413
414 status->local = vf->addr;
415 status->generation_end = ++status->generation_start;
416
417 memset(copy, '\0', sizeof(copy));
418 /* Write generation_start */
419 copy[0].from_buf = &status->generation_start;
420 copy[0].to_rid = vf->pci_rid;
421 copy[0].to_addr = vf->status_addr + offsetof(struct vfdi_status,
422 generation_start);
423 copy[0].length = sizeof(status->generation_start);
424 /* DMA the rest of the structure (excluding the generations). This
425 * assumes that the non-generation portion of vfdi_status is in
426 * one chunk starting at the version member.
427 */
428 data_offset = offsetof(struct vfdi_status, version);
429 copy[1].from_rid = efx->pci_dev->devfn;
430 copy[1].from_addr = efx->vfdi_status.dma_addr + data_offset;
431 copy[1].to_rid = vf->pci_rid;
432 copy[1].to_addr = vf->status_addr + data_offset;
433 copy[1].length = status->length - data_offset;
434
435 /* Copy the peer pages */
436 pos = 2;
437 count = 0;
438 list_for_each_entry(epp, &efx->local_page_list, link) {
439 if (count == vf->peer_page_count) {
440 /* The VF driver will know they need to provide more
441 * pages because peer_addr_count is too large.
442 */
443 break;
444 }
445 copy[pos].from_buf = NULL;
446 copy[pos].from_rid = efx->pci_dev->devfn;
447 copy[pos].from_addr = epp->addr;
448 copy[pos].to_rid = vf->pci_rid;
449 copy[pos].to_addr = vf->peer_page_addrs[count];
450 copy[pos].length = EFX_PAGE_SIZE;
451
452 if (++pos == ARRAY_SIZE(copy)) {
453 efx_sriov_memcpy(efx, copy, ARRAY_SIZE(copy));
454 pos = 0;
455 }
456 ++count;
457 }
458
459 /* Write generation_end */
460 copy[pos].from_buf = &status->generation_end;
461 copy[pos].to_rid = vf->pci_rid;
462 copy[pos].to_addr = vf->status_addr + offsetof(struct vfdi_status,
463 generation_end);
464 copy[pos].length = sizeof(status->generation_end);
465 efx_sriov_memcpy(efx, copy, pos + 1);
466
467 /* Notify the guest */
468 EFX_POPULATE_QWORD_3(event,
469 FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV,
470 VFDI_EV_SEQ, (vf->msg_seqno & 0xff),
471 VFDI_EV_TYPE, VFDI_EV_TYPE_STATUS);
472 ++vf->msg_seqno;
473 efx_generate_event(efx, EFX_VI_BASE + vf->index * efx_vf_size(efx),
474 &event);
475}
476
477static void efx_sriov_bufs(struct efx_nic *efx, unsigned offset,
478 u64 *addr, unsigned count)
479{
480 efx_qword_t buf;
481 unsigned pos;
482
483 for (pos = 0; pos < count; ++pos) {
484 EFX_POPULATE_QWORD_3(buf,
485 FRF_AZ_BUF_ADR_REGION, 0,
486 FRF_AZ_BUF_ADR_FBUF,
487 addr ? addr[pos] >> 12 : 0,
488 FRF_AZ_BUF_OWNER_ID_FBUF, 0);
489 efx_sram_writeq(efx, efx->membase + FR_BZ_BUF_FULL_TBL,
490 &buf, offset + pos);
491 }
492}
493
494static bool bad_vf_index(struct efx_nic *efx, unsigned index)
495{
496 return index >= efx_vf_size(efx);
497}
498
499static bool bad_buf_count(unsigned buf_count, unsigned max_entry_count)
500{
501 unsigned max_buf_count = max_entry_count *
502 sizeof(efx_qword_t) / EFX_BUF_SIZE;
503
504 return ((buf_count & (buf_count - 1)) || buf_count > max_buf_count);
505}
506
507/* Check that VI specified by per-port index belongs to a VF.
508 * Optionally set VF index and VI index within the VF.
509 */
510static bool map_vi_index(struct efx_nic *efx, unsigned abs_index,
511 struct efx_vf **vf_out, unsigned *rel_index_out)
512{
513 unsigned vf_i;
514
515 if (abs_index < EFX_VI_BASE)
516 return true;
517 vf_i = (abs_index - EFX_VI_BASE) * efx_vf_size(efx);
518 if (vf_i >= efx->vf_init_count)
519 return true;
520
521 if (vf_out)
522 *vf_out = efx->vf + vf_i;
523 if (rel_index_out)
524 *rel_index_out = abs_index % efx_vf_size(efx);
525 return false;
526}
527
528static int efx_vfdi_init_evq(struct efx_vf *vf)
529{
530 struct efx_nic *efx = vf->efx;
531 struct vfdi_req *req = vf->buf.addr;
532 unsigned vf_evq = req->u.init_evq.index;
533 unsigned buf_count = req->u.init_evq.buf_count;
534 unsigned abs_evq = abs_index(vf, vf_evq);
535 unsigned buftbl = EFX_BUFTBL_EVQ_BASE(vf, vf_evq);
536 efx_oword_t reg;
537
538 if (bad_vf_index(efx, vf_evq) ||
539 bad_buf_count(buf_count, EFX_MAX_VF_EVQ_SIZE)) {
540 if (net_ratelimit())
541 netif_err(efx, hw, efx->net_dev,
542 "ERROR: Invalid INIT_EVQ from %s: evq %d bufs %d\n",
543 vf->pci_name, vf_evq, buf_count);
544 return VFDI_RC_EINVAL;
545 }
546
547 efx_sriov_bufs(efx, buftbl, req->u.init_evq.addr, buf_count);
548
549 EFX_POPULATE_OWORD_3(reg,
550 FRF_CZ_TIMER_Q_EN, 1,
551 FRF_CZ_HOST_NOTIFY_MODE, 0,
552 FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
553 efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, abs_evq);
554 EFX_POPULATE_OWORD_3(reg,
555 FRF_AZ_EVQ_EN, 1,
556 FRF_AZ_EVQ_SIZE, __ffs(buf_count),
557 FRF_AZ_EVQ_BUF_BASE_ID, buftbl);
558 efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL, abs_evq);
559
560 if (vf_evq == 0) {
561 memcpy(vf->evq0_addrs, req->u.init_evq.addr,
562 buf_count * sizeof(u64));
563 vf->evq0_count = buf_count;
564 }
565
566 return VFDI_RC_SUCCESS;
567}
568
569static int efx_vfdi_init_rxq(struct efx_vf *vf)
570{
571 struct efx_nic *efx = vf->efx;
572 struct vfdi_req *req = vf->buf.addr;
573 unsigned vf_rxq = req->u.init_rxq.index;
574 unsigned vf_evq = req->u.init_rxq.evq;
575 unsigned buf_count = req->u.init_rxq.buf_count;
576 unsigned buftbl = EFX_BUFTBL_RXQ_BASE(vf, vf_rxq);
577 unsigned label;
578 efx_oword_t reg;
579
580 if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_rxq) ||
581 bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) {
582 if (net_ratelimit())
583 netif_err(efx, hw, efx->net_dev,
584 "ERROR: Invalid INIT_RXQ from %s: rxq %d evq %d "
585 "buf_count %d\n", vf->pci_name, vf_rxq,
586 vf_evq, buf_count);
587 return VFDI_RC_EINVAL;
588 }
589 if (__test_and_set_bit(req->u.init_rxq.index, vf->rxq_mask))
590 ++vf->rxq_count;
591 efx_sriov_bufs(efx, buftbl, req->u.init_rxq.addr, buf_count);
592
593 label = req->u.init_rxq.label & EFX_FIELD_MASK(FRF_AZ_RX_DESCQ_LABEL);
594 EFX_POPULATE_OWORD_6(reg,
595 FRF_AZ_RX_DESCQ_BUF_BASE_ID, buftbl,
596 FRF_AZ_RX_DESCQ_EVQ_ID, abs_index(vf, vf_evq),
597 FRF_AZ_RX_DESCQ_LABEL, label,
598 FRF_AZ_RX_DESCQ_SIZE, __ffs(buf_count),
599 FRF_AZ_RX_DESCQ_JUMBO,
600 !!(req->u.init_rxq.flags &
601 VFDI_RXQ_FLAG_SCATTER_EN),
602 FRF_AZ_RX_DESCQ_EN, 1);
603 efx_writeo_table(efx, &reg, FR_BZ_RX_DESC_PTR_TBL,
604 abs_index(vf, vf_rxq));
605
606 return VFDI_RC_SUCCESS;
607}
608
609static int efx_vfdi_init_txq(struct efx_vf *vf)
610{
611 struct efx_nic *efx = vf->efx;
612 struct vfdi_req *req = vf->buf.addr;
613 unsigned vf_txq = req->u.init_txq.index;
614 unsigned vf_evq = req->u.init_txq.evq;
615 unsigned buf_count = req->u.init_txq.buf_count;
616 unsigned buftbl = EFX_BUFTBL_TXQ_BASE(vf, vf_txq);
617 unsigned label, eth_filt_en;
618 efx_oword_t reg;
619
620 if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_txq) ||
621 vf_txq >= vf_max_tx_channels ||
622 bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) {
623 if (net_ratelimit())
624 netif_err(efx, hw, efx->net_dev,
625 "ERROR: Invalid INIT_TXQ from %s: txq %d evq %d "
626 "buf_count %d\n", vf->pci_name, vf_txq,
627 vf_evq, buf_count);
628 return VFDI_RC_EINVAL;
629 }
630
631 mutex_lock(&vf->txq_lock);
632 if (__test_and_set_bit(req->u.init_txq.index, vf->txq_mask))
633 ++vf->txq_count;
634 mutex_unlock(&vf->txq_lock);
635 efx_sriov_bufs(efx, buftbl, req->u.init_txq.addr, buf_count);
636
637 eth_filt_en = vf->tx_filter_mode == VF_TX_FILTER_ON;
638
639 label = req->u.init_txq.label & EFX_FIELD_MASK(FRF_AZ_TX_DESCQ_LABEL);
640 EFX_POPULATE_OWORD_8(reg,
641 FRF_CZ_TX_DPT_Q_MASK_WIDTH, min(efx->vi_scale, 1U),
642 FRF_CZ_TX_DPT_ETH_FILT_EN, eth_filt_en,
643 FRF_AZ_TX_DESCQ_EN, 1,
644 FRF_AZ_TX_DESCQ_BUF_BASE_ID, buftbl,
645 FRF_AZ_TX_DESCQ_EVQ_ID, abs_index(vf, vf_evq),
646 FRF_AZ_TX_DESCQ_LABEL, label,
647 FRF_AZ_TX_DESCQ_SIZE, __ffs(buf_count),
648 FRF_BZ_TX_NON_IP_DROP_DIS, 1);
649 efx_writeo_table(efx, &reg, FR_BZ_TX_DESC_PTR_TBL,
650 abs_index(vf, vf_txq));
651
652 return VFDI_RC_SUCCESS;
653}
654
655/* Returns true when efx_vfdi_fini_all_queues should wake */
656static bool efx_vfdi_flush_wake(struct efx_vf *vf)
657{
658 /* Ensure that all updates are visible to efx_vfdi_fini_all_queues() */
659 smp_mb();
660
661 return (!vf->txq_count && !vf->rxq_count) ||
662 atomic_read(&vf->rxq_retry_count);
663}
664
665static void efx_vfdi_flush_clear(struct efx_vf *vf)
666{
667 memset(vf->txq_mask, 0, sizeof(vf->txq_mask));
668 vf->txq_count = 0;
669 memset(vf->rxq_mask, 0, sizeof(vf->rxq_mask));
670 vf->rxq_count = 0;
671 memset(vf->rxq_retry_mask, 0, sizeof(vf->rxq_retry_mask));
672 atomic_set(&vf->rxq_retry_count, 0);
673}
674
675static int efx_vfdi_fini_all_queues(struct efx_vf *vf)
676{
677 struct efx_nic *efx = vf->efx;
678 efx_oword_t reg;
679 unsigned count = efx_vf_size(efx);
680 unsigned vf_offset = EFX_VI_BASE + vf->index * efx_vf_size(efx);
681 unsigned timeout = HZ;
682 unsigned index, rxqs_count;
683 __le32 *rxqs;
684 int rc;
685
686 rxqs = kmalloc(count * sizeof(*rxqs), GFP_KERNEL);
687 if (rxqs == NULL)
688 return VFDI_RC_ENOMEM;
689
690 rtnl_lock();
691 if (efx->fc_disable++ == 0)
692 efx_mcdi_set_mac(efx);
693 rtnl_unlock();
694
695 /* Flush all the initialized queues */
696 rxqs_count = 0;
697 for (index = 0; index < count; ++index) {
698 if (test_bit(index, vf->txq_mask)) {
699 EFX_POPULATE_OWORD_2(reg,
700 FRF_AZ_TX_FLUSH_DESCQ_CMD, 1,
701 FRF_AZ_TX_FLUSH_DESCQ,
702 vf_offset + index);
703 efx_writeo(efx, &reg, FR_AZ_TX_FLUSH_DESCQ);
704 }
705 if (test_bit(index, vf->rxq_mask))
706 rxqs[rxqs_count++] = cpu_to_le32(vf_offset + index);
707 }
708
709 atomic_set(&vf->rxq_retry_count, 0);
710 while (timeout && (vf->rxq_count || vf->txq_count)) {
711 rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, (u8 *)rxqs,
712 rxqs_count * sizeof(*rxqs), NULL, 0, NULL);
713 WARN_ON(rc < 0);
714
715 timeout = wait_event_timeout(vf->flush_waitq,
716 efx_vfdi_flush_wake(vf),
717 timeout);
718 rxqs_count = 0;
719 for (index = 0; index < count; ++index) {
720 if (test_and_clear_bit(index, vf->rxq_retry_mask)) {
721 atomic_dec(&vf->rxq_retry_count);
722 rxqs[rxqs_count++] =
723 cpu_to_le32(vf_offset + index);
724 }
725 }
726 }
727
728 rtnl_lock();
729 if (--efx->fc_disable == 0)
730 efx_mcdi_set_mac(efx);
731 rtnl_unlock();
732
733 /* Irrespective of success/failure, fini the queues */
734 EFX_ZERO_OWORD(reg);
735 for (index = 0; index < count; ++index) {
736 efx_writeo_table(efx, &reg, FR_BZ_RX_DESC_PTR_TBL,
737 vf_offset + index);
738 efx_writeo_table(efx, &reg, FR_BZ_TX_DESC_PTR_TBL,
739 vf_offset + index);
740 efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL,
741 vf_offset + index);
742 efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL,
743 vf_offset + index);
744 }
745 efx_sriov_bufs(efx, vf->buftbl_base, NULL,
746 EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx));
747 kfree(rxqs);
748 efx_vfdi_flush_clear(vf);
749
750 vf->evq0_count = 0;
751
752 return timeout ? 0 : VFDI_RC_ETIMEDOUT;
753}
754
755static int efx_vfdi_insert_filter(struct efx_vf *vf)
756{
757 struct efx_nic *efx = vf->efx;
758 struct vfdi_req *req = vf->buf.addr;
759 unsigned vf_rxq = req->u.mac_filter.rxq;
760 unsigned flags;
761
762 if (bad_vf_index(efx, vf_rxq) || vf->rx_filtering) {
763 if (net_ratelimit())
764 netif_err(efx, hw, efx->net_dev,
765 "ERROR: Invalid INSERT_FILTER from %s: rxq %d "
766 "flags 0x%x\n", vf->pci_name, vf_rxq,
767 req->u.mac_filter.flags);
768 return VFDI_RC_EINVAL;
769 }
770
771 flags = 0;
772 if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_RSS)
773 flags |= EFX_FILTER_FLAG_RX_RSS;
774 if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_SCATTER)
775 flags |= EFX_FILTER_FLAG_RX_SCATTER;
776 vf->rx_filter_flags = flags;
777 vf->rx_filter_qid = vf_rxq;
778 vf->rx_filtering = true;
779
780 efx_sriov_reset_rx_filter(vf);
781 queue_work(vfdi_workqueue, &efx->peer_work);
782
783 return VFDI_RC_SUCCESS;
784}
785
786static int efx_vfdi_remove_all_filters(struct efx_vf *vf)
787{
788 vf->rx_filtering = false;
789 efx_sriov_reset_rx_filter(vf);
790 queue_work(vfdi_workqueue, &vf->efx->peer_work);
791
792 return VFDI_RC_SUCCESS;
793}
794
795static int efx_vfdi_set_status_page(struct efx_vf *vf)
796{
797 struct efx_nic *efx = vf->efx;
798 struct vfdi_req *req = vf->buf.addr;
799 unsigned int page_count;
800
801 page_count = req->u.set_status_page.peer_page_count;
802 if (!req->u.set_status_page.dma_addr || EFX_PAGE_SIZE <
803 offsetof(struct vfdi_req,
804 u.set_status_page.peer_page_addr[page_count])) {
805 if (net_ratelimit())
806 netif_err(efx, hw, efx->net_dev,
807 "ERROR: Invalid SET_STATUS_PAGE from %s\n",
808 vf->pci_name);
809 return VFDI_RC_EINVAL;
810 }
811
812 mutex_lock(&efx->local_lock);
813 mutex_lock(&vf->status_lock);
814 vf->status_addr = req->u.set_status_page.dma_addr;
815
816 kfree(vf->peer_page_addrs);
817 vf->peer_page_addrs = NULL;
818 vf->peer_page_count = 0;
819
820 if (page_count) {
821 vf->peer_page_addrs = kcalloc(page_count, sizeof(u64),
822 GFP_KERNEL);
823 if (vf->peer_page_addrs) {
824 memcpy(vf->peer_page_addrs,
825 req->u.set_status_page.peer_page_addr,
826 page_count * sizeof(u64));
827 vf->peer_page_count = page_count;
828 }
829 }
830
831 __efx_sriov_push_vf_status(vf);
832 mutex_unlock(&vf->status_lock);
833 mutex_unlock(&efx->local_lock);
834
835 return VFDI_RC_SUCCESS;
836}
837
838static int efx_vfdi_clear_status_page(struct efx_vf *vf)
839{
840 mutex_lock(&vf->status_lock);
841 vf->status_addr = 0;
842 mutex_unlock(&vf->status_lock);
843
844 return VFDI_RC_SUCCESS;
845}
846
847typedef int (*efx_vfdi_op_t)(struct efx_vf *vf);
848
849static const efx_vfdi_op_t vfdi_ops[VFDI_OP_LIMIT] = {
850 [VFDI_OP_INIT_EVQ] = efx_vfdi_init_evq,
851 [VFDI_OP_INIT_TXQ] = efx_vfdi_init_txq,
852 [VFDI_OP_INIT_RXQ] = efx_vfdi_init_rxq,
853 [VFDI_OP_FINI_ALL_QUEUES] = efx_vfdi_fini_all_queues,
854 [VFDI_OP_INSERT_FILTER] = efx_vfdi_insert_filter,
855 [VFDI_OP_REMOVE_ALL_FILTERS] = efx_vfdi_remove_all_filters,
856 [VFDI_OP_SET_STATUS_PAGE] = efx_vfdi_set_status_page,
857 [VFDI_OP_CLEAR_STATUS_PAGE] = efx_vfdi_clear_status_page,
858};
859
860static void efx_sriov_vfdi(struct work_struct *work)
861{
862 struct efx_vf *vf = container_of(work, struct efx_vf, req);
863 struct efx_nic *efx = vf->efx;
864 struct vfdi_req *req = vf->buf.addr;
865 struct efx_memcpy_req copy[2];
866 int rc;
867
868 /* Copy this page into the local address space */
869 memset(copy, '\0', sizeof(copy));
870 copy[0].from_rid = vf->pci_rid;
871 copy[0].from_addr = vf->req_addr;
872 copy[0].to_rid = efx->pci_dev->devfn;
873 copy[0].to_addr = vf->buf.dma_addr;
874 copy[0].length = EFX_PAGE_SIZE;
875 rc = efx_sriov_memcpy(efx, copy, 1);
876 if (rc) {
877 /* If we can't get the request, we can't reply to the caller */
878 if (net_ratelimit())
879 netif_err(efx, hw, efx->net_dev,
880 "ERROR: Unable to fetch VFDI request from %s rc %d\n",
881 vf->pci_name, -rc);
882 vf->busy = false;
883 return;
884 }
885
886 if (req->op < VFDI_OP_LIMIT && vfdi_ops[req->op] != NULL) {
887 rc = vfdi_ops[req->op](vf);
888 if (rc == 0) {
889 netif_dbg(efx, hw, efx->net_dev,
890 "vfdi request %d from %s ok\n",
891 req->op, vf->pci_name);
892 }
893 } else {
894 netif_dbg(efx, hw, efx->net_dev,
895 "ERROR: Unrecognised request %d from VF %s addr "
896 "%llx\n", req->op, vf->pci_name,
897 (unsigned long long)vf->req_addr);
898 rc = VFDI_RC_EOPNOTSUPP;
899 }
900
901 /* Allow subsequent VF requests */
902 vf->busy = false;
903 smp_wmb();
904
905 /* Respond to the request */
906 req->rc = rc;
907 req->op = VFDI_OP_RESPONSE;
908
909 memset(copy, '\0', sizeof(copy));
910 copy[0].from_buf = &req->rc;
911 copy[0].to_rid = vf->pci_rid;
912 copy[0].to_addr = vf->req_addr + offsetof(struct vfdi_req, rc);
913 copy[0].length = sizeof(req->rc);
914 copy[1].from_buf = &req->op;
915 copy[1].to_rid = vf->pci_rid;
916 copy[1].to_addr = vf->req_addr + offsetof(struct vfdi_req, op);
917 copy[1].length = sizeof(req->op);
918
919 (void) efx_sriov_memcpy(efx, copy, ARRAY_SIZE(copy));
920}
921
922
923
924/* After a reset the event queues inside the guests no longer exist. Fill the
925 * event ring in guest memory with VFDI reset events, then (re-initialise) the
926 * event queue to raise an interrupt. The guest driver will then recover.
927 */
928static void efx_sriov_reset_vf(struct efx_vf *vf, struct efx_buffer *buffer)
929{
930 struct efx_nic *efx = vf->efx;
931 struct efx_memcpy_req copy_req[4];
932 efx_qword_t event;
933 unsigned int pos, count, k, buftbl, abs_evq;
934 efx_oword_t reg;
935 efx_dword_t ptr;
936 int rc;
937
938 BUG_ON(buffer->len != EFX_PAGE_SIZE);
939
940 if (!vf->evq0_count)
941 return;
942 BUG_ON(vf->evq0_count & (vf->evq0_count - 1));
943
944 mutex_lock(&vf->status_lock);
945 EFX_POPULATE_QWORD_3(event,
946 FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV,
947 VFDI_EV_SEQ, vf->msg_seqno,
948 VFDI_EV_TYPE, VFDI_EV_TYPE_RESET);
949 vf->msg_seqno++;
950 for (pos = 0; pos < EFX_PAGE_SIZE; pos += sizeof(event))
951 memcpy(buffer->addr + pos, &event, sizeof(event));
952
953 for (pos = 0; pos < vf->evq0_count; pos += count) {
954 count = min_t(unsigned, vf->evq0_count - pos,
955 ARRAY_SIZE(copy_req));
956 for (k = 0; k < count; k++) {
957 copy_req[k].from_buf = NULL;
958 copy_req[k].from_rid = efx->pci_dev->devfn;
959 copy_req[k].from_addr = buffer->dma_addr;
960 copy_req[k].to_rid = vf->pci_rid;
961 copy_req[k].to_addr = vf->evq0_addrs[pos + k];
962 copy_req[k].length = EFX_PAGE_SIZE;
963 }
964 rc = efx_sriov_memcpy(efx, copy_req, count);
965 if (rc) {
966 if (net_ratelimit())
967 netif_err(efx, hw, efx->net_dev,
968 "ERROR: Unable to notify %s of reset"
969 ": %d\n", vf->pci_name, -rc);
970 break;
971 }
972 }
973
974 /* Reinitialise, arm and trigger evq0 */
975 abs_evq = abs_index(vf, 0);
976 buftbl = EFX_BUFTBL_EVQ_BASE(vf, 0);
977 efx_sriov_bufs(efx, buftbl, vf->evq0_addrs, vf->evq0_count);
978
979 EFX_POPULATE_OWORD_3(reg,
980 FRF_CZ_TIMER_Q_EN, 1,
981 FRF_CZ_HOST_NOTIFY_MODE, 0,
982 FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
983 efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, abs_evq);
984 EFX_POPULATE_OWORD_3(reg,
985 FRF_AZ_EVQ_EN, 1,
986 FRF_AZ_EVQ_SIZE, __ffs(vf->evq0_count),
987 FRF_AZ_EVQ_BUF_BASE_ID, buftbl);
988 efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL, abs_evq);
989 EFX_POPULATE_DWORD_1(ptr, FRF_AZ_EVQ_RPTR, 0);
990 efx_writed_table(efx, &ptr, FR_BZ_EVQ_RPTR, abs_evq);
991
992 mutex_unlock(&vf->status_lock);
993}
994
995static void efx_sriov_reset_vf_work(struct work_struct *work)
996{
997 struct efx_vf *vf = container_of(work, struct efx_vf, req);
998 struct efx_nic *efx = vf->efx;
999 struct efx_buffer buf;
1000
1001 if (!efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE)) {
1002 efx_sriov_reset_vf(vf, &buf);
1003 efx_nic_free_buffer(efx, &buf);
1004 }
1005}
1006
1007static void efx_sriov_handle_no_channel(struct efx_nic *efx)
1008{
1009 netif_err(efx, drv, efx->net_dev,
1010 "ERROR: IOV requires MSI-X and 1 additional interrupt"
1011 "vector. IOV disabled\n");
1012 efx->vf_count = 0;
1013}
1014
1015static int efx_sriov_probe_channel(struct efx_channel *channel)
1016{
1017 channel->efx->vfdi_channel = channel;
1018 return 0;
1019}
1020
1021static void
1022efx_sriov_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
1023{
1024 snprintf(buf, len, "%s-iov", channel->efx->name);
1025}
1026
1027static const struct efx_channel_type efx_sriov_channel_type = {
1028 .handle_no_channel = efx_sriov_handle_no_channel,
1029 .pre_probe = efx_sriov_probe_channel,
1030 .get_name = efx_sriov_get_channel_name,
1031 /* no copy operation; channel must not be reallocated */
1032 .keep_eventq = true,
1033};
1034
1035void efx_sriov_probe(struct efx_nic *efx)
1036{
1037 unsigned count;
1038
1039 if (!max_vfs)
1040 return;
1041
1042 if (efx_sriov_cmd(efx, false, &efx->vi_scale, &count))
1043 return;
1044 if (count > 0 && count > max_vfs)
1045 count = max_vfs;
1046
1047 /* efx_nic_dimension_resources() will reduce vf_count as appopriate */
1048 efx->vf_count = count;
1049
1050 efx->extra_channel_type[EFX_EXTRA_CHANNEL_IOV] = &efx_sriov_channel_type;
1051}
1052
1053/* Copy the list of individual addresses into the vfdi_status.peers
1054 * array and auxillary pages, protected by %local_lock. Drop that lock
1055 * and then broadcast the address list to every VF.
1056 */
1057static void efx_sriov_peer_work(struct work_struct *data)
1058{
1059 struct efx_nic *efx = container_of(data, struct efx_nic, peer_work);
1060 struct vfdi_status *vfdi_status = efx->vfdi_status.addr;
1061 struct efx_vf *vf;
1062 struct efx_local_addr *local_addr;
1063 struct vfdi_endpoint *peer;
1064 struct efx_endpoint_page *epp;
1065 struct list_head pages;
1066 unsigned int peer_space;
1067 unsigned int peer_count;
1068 unsigned int pos;
1069
1070 mutex_lock(&efx->local_lock);
1071
1072 /* Move the existing peer pages off %local_page_list */
1073 INIT_LIST_HEAD(&pages);
1074 list_splice_tail_init(&efx->local_page_list, &pages);
1075
1076 /* Populate the VF addresses starting from entry 1 (entry 0 is
1077 * the PF address)
1078 */
1079 peer = vfdi_status->peers + 1;
1080 peer_space = ARRAY_SIZE(vfdi_status->peers) - 1;
1081 peer_count = 1;
1082 for (pos = 0; pos < efx->vf_count; ++pos) {
1083 vf = efx->vf + pos;
1084
1085 mutex_lock(&vf->status_lock);
1086 if (vf->rx_filtering && !is_zero_ether_addr(vf->addr.mac_addr)) {
1087 *peer++ = vf->addr;
1088 ++peer_count;
1089 --peer_space;
1090 BUG_ON(peer_space == 0);
1091 }
1092 mutex_unlock(&vf->status_lock);
1093 }
1094
1095 /* Fill the remaining addresses */
1096 list_for_each_entry(local_addr, &efx->local_addr_list, link) {
1097 memcpy(peer->mac_addr, local_addr->addr, ETH_ALEN);
1098 peer->tci = 0;
1099 ++peer;
1100 ++peer_count;
1101 if (--peer_space == 0) {
1102 if (list_empty(&pages)) {
1103 epp = kmalloc(sizeof(*epp), GFP_KERNEL);
1104 if (!epp)
1105 break;
1106 epp->ptr = dma_alloc_coherent(
1107 &efx->pci_dev->dev, EFX_PAGE_SIZE,
1108 &epp->addr, GFP_KERNEL);
1109 if (!epp->ptr) {
1110 kfree(epp);
1111 break;
1112 }
1113 } else {
1114 epp = list_first_entry(
1115 &pages, struct efx_endpoint_page, link);
1116 list_del(&epp->link);
1117 }
1118
1119 list_add_tail(&epp->link, &efx->local_page_list);
1120 peer = (struct vfdi_endpoint *)epp->ptr;
1121 peer_space = EFX_PAGE_SIZE / sizeof(struct vfdi_endpoint);
1122 }
1123 }
1124 vfdi_status->peer_count = peer_count;
1125 mutex_unlock(&efx->local_lock);
1126
1127 /* Free any now unused endpoint pages */
1128 while (!list_empty(&pages)) {
1129 epp = list_first_entry(
1130 &pages, struct efx_endpoint_page, link);
1131 list_del(&epp->link);
1132 dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE,
1133 epp->ptr, epp->addr);
1134 kfree(epp);
1135 }
1136
1137 /* Finally, push the pages */
1138 for (pos = 0; pos < efx->vf_count; ++pos) {
1139 vf = efx->vf + pos;
1140
1141 mutex_lock(&vf->status_lock);
1142 if (vf->status_addr)
1143 __efx_sriov_push_vf_status(vf);
1144 mutex_unlock(&vf->status_lock);
1145 }
1146}
1147
1148static void efx_sriov_free_local(struct efx_nic *efx)
1149{
1150 struct efx_local_addr *local_addr;
1151 struct efx_endpoint_page *epp;
1152
1153 while (!list_empty(&efx->local_addr_list)) {
1154 local_addr = list_first_entry(&efx->local_addr_list,
1155 struct efx_local_addr, link);
1156 list_del(&local_addr->link);
1157 kfree(local_addr);
1158 }
1159
1160 while (!list_empty(&efx->local_page_list)) {
1161 epp = list_first_entry(&efx->local_page_list,
1162 struct efx_endpoint_page, link);
1163 list_del(&epp->link);
1164 dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE,
1165 epp->ptr, epp->addr);
1166 kfree(epp);
1167 }
1168}
1169
1170static int efx_sriov_vf_alloc(struct efx_nic *efx)
1171{
1172 unsigned index;
1173 struct efx_vf *vf;
1174
1175 efx->vf = kzalloc(sizeof(struct efx_vf) * efx->vf_count, GFP_KERNEL);
1176 if (!efx->vf)
1177 return -ENOMEM;
1178
1179 for (index = 0; index < efx->vf_count; ++index) {
1180 vf = efx->vf + index;
1181
1182 vf->efx = efx;
1183 vf->index = index;
1184 vf->rx_filter_id = -1;
1185 vf->tx_filter_mode = VF_TX_FILTER_AUTO;
1186 vf->tx_filter_id = -1;
1187 INIT_WORK(&vf->req, efx_sriov_vfdi);
1188 INIT_WORK(&vf->reset_work, efx_sriov_reset_vf_work);
1189 init_waitqueue_head(&vf->flush_waitq);
1190 mutex_init(&vf->status_lock);
1191 mutex_init(&vf->txq_lock);
1192 }
1193
1194 return 0;
1195}
1196
1197static void efx_sriov_vfs_fini(struct efx_nic *efx)
1198{
1199 struct efx_vf *vf;
1200 unsigned int pos;
1201
1202 for (pos = 0; pos < efx->vf_count; ++pos) {
1203 vf = efx->vf + pos;
1204
1205 efx_nic_free_buffer(efx, &vf->buf);
1206 kfree(vf->peer_page_addrs);
1207 vf->peer_page_addrs = NULL;
1208 vf->peer_page_count = 0;
1209
1210 vf->evq0_count = 0;
1211 }
1212}
1213
1214static int efx_sriov_vfs_init(struct efx_nic *efx)
1215{
1216 struct pci_dev *pci_dev = efx->pci_dev;
1217 unsigned index, devfn, sriov, buftbl_base;
1218 u16 offset, stride;
1219 struct efx_vf *vf;
1220 int rc;
1221
1222 sriov = pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV);
1223 if (!sriov)
1224 return -ENOENT;
1225
1226 pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_OFFSET, &offset);
1227 pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_STRIDE, &stride);
1228
1229 buftbl_base = efx->vf_buftbl_base;
1230 devfn = pci_dev->devfn + offset;
1231 for (index = 0; index < efx->vf_count; ++index) {
1232 vf = efx->vf + index;
1233
1234 /* Reserve buffer entries */
1235 vf->buftbl_base = buftbl_base;
1236 buftbl_base += EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx);
1237
1238 vf->pci_rid = devfn;
1239 snprintf(vf->pci_name, sizeof(vf->pci_name),
1240 "%04x:%02x:%02x.%d",
1241 pci_domain_nr(pci_dev->bus), pci_dev->bus->number,
1242 PCI_SLOT(devfn), PCI_FUNC(devfn));
1243
1244 rc = efx_nic_alloc_buffer(efx, &vf->buf, EFX_PAGE_SIZE);
1245 if (rc)
1246 goto fail;
1247
1248 devfn += stride;
1249 }
1250
1251 return 0;
1252
1253fail:
1254 efx_sriov_vfs_fini(efx);
1255 return rc;
1256}
1257
1258int efx_sriov_init(struct efx_nic *efx)
1259{
1260 struct net_device *net_dev = efx->net_dev;
1261 struct vfdi_status *vfdi_status;
1262 int rc;
1263
1264 /* Ensure there's room for vf_channel */
1265 BUILD_BUG_ON(EFX_MAX_CHANNELS + 1 >= EFX_VI_BASE);
1266 /* Ensure that VI_BASE is aligned on VI_SCALE */
1267 BUILD_BUG_ON(EFX_VI_BASE & ((1 << EFX_VI_SCALE_MAX) - 1));
1268
1269 if (efx->vf_count == 0)
1270 return 0;
1271
1272 rc = efx_sriov_cmd(efx, true, NULL, NULL);
1273 if (rc)
1274 goto fail_cmd;
1275
1276 rc = efx_nic_alloc_buffer(efx, &efx->vfdi_status, sizeof(*vfdi_status));
1277 if (rc)
1278 goto fail_status;
1279 vfdi_status = efx->vfdi_status.addr;
1280 memset(vfdi_status, 0, sizeof(*vfdi_status));
1281 vfdi_status->version = 1;
1282 vfdi_status->length = sizeof(*vfdi_status);
1283 vfdi_status->max_tx_channels = vf_max_tx_channels;
1284 vfdi_status->vi_scale = efx->vi_scale;
1285 vfdi_status->rss_rxq_count = efx->rss_spread;
1286 vfdi_status->peer_count = 1 + efx->vf_count;
1287 vfdi_status->timer_quantum_ns = efx->timer_quantum_ns;
1288
1289 rc = efx_sriov_vf_alloc(efx);
1290 if (rc)
1291 goto fail_alloc;
1292
1293 mutex_init(&efx->local_lock);
1294 INIT_WORK(&efx->peer_work, efx_sriov_peer_work);
1295 INIT_LIST_HEAD(&efx->local_addr_list);
1296 INIT_LIST_HEAD(&efx->local_page_list);
1297
1298 rc = efx_sriov_vfs_init(efx);
1299 if (rc)
1300 goto fail_vfs;
1301
1302 rtnl_lock();
1303 memcpy(vfdi_status->peers[0].mac_addr,
1304 net_dev->dev_addr, ETH_ALEN);
1305 efx->vf_init_count = efx->vf_count;
1306 rtnl_unlock();
1307
1308 efx_sriov_usrev(efx, true);
1309
1310 /* At this point we must be ready to accept VFDI requests */
1311
1312 rc = pci_enable_sriov(efx->pci_dev, efx->vf_count);
1313 if (rc)
1314 goto fail_pci;
1315
1316 netif_info(efx, probe, net_dev,
1317 "enabled SR-IOV for %d VFs, %d VI per VF\n",
1318 efx->vf_count, efx_vf_size(efx));
1319 return 0;
1320
1321fail_pci:
1322 efx_sriov_usrev(efx, false);
1323 rtnl_lock();
1324 efx->vf_init_count = 0;
1325 rtnl_unlock();
1326 efx_sriov_vfs_fini(efx);
1327fail_vfs:
1328 cancel_work_sync(&efx->peer_work);
1329 efx_sriov_free_local(efx);
1330 kfree(efx->vf);
1331fail_alloc:
1332 efx_nic_free_buffer(efx, &efx->vfdi_status);
1333fail_status:
1334 efx_sriov_cmd(efx, false, NULL, NULL);
1335fail_cmd:
1336 return rc;
1337}
1338
1339void efx_sriov_fini(struct efx_nic *efx)
1340{
1341 struct efx_vf *vf;
1342 unsigned int pos;
1343
1344 if (efx->vf_init_count == 0)
1345 return;
1346
1347 /* Disable all interfaces to reconfiguration */
1348 BUG_ON(efx->vfdi_channel->enabled);
1349 efx_sriov_usrev(efx, false);
1350 rtnl_lock();
1351 efx->vf_init_count = 0;
1352 rtnl_unlock();
1353
1354 /* Flush all reconfiguration work */
1355 for (pos = 0; pos < efx->vf_count; ++pos) {
1356 vf = efx->vf + pos;
1357 cancel_work_sync(&vf->req);
1358 cancel_work_sync(&vf->reset_work);
1359 }
1360 cancel_work_sync(&efx->peer_work);
1361
1362 pci_disable_sriov(efx->pci_dev);
1363
1364 /* Tear down back-end state */
1365 efx_sriov_vfs_fini(efx);
1366 efx_sriov_free_local(efx);
1367 kfree(efx->vf);
1368 efx_nic_free_buffer(efx, &efx->vfdi_status);
1369 efx_sriov_cmd(efx, false, NULL, NULL);
1370}
1371
1372void efx_sriov_event(struct efx_channel *channel, efx_qword_t *event)
1373{
1374 struct efx_nic *efx = channel->efx;
1375 struct efx_vf *vf;
1376 unsigned qid, seq, type, data;
1377
1378 qid = EFX_QWORD_FIELD(*event, FSF_CZ_USER_QID);
1379
1380 /* USR_EV_REG_VALUE is dword0, so access the VFDI_EV fields directly */
1381 BUILD_BUG_ON(FSF_CZ_USER_EV_REG_VALUE_LBN != 0);
1382 seq = EFX_QWORD_FIELD(*event, VFDI_EV_SEQ);
1383 type = EFX_QWORD_FIELD(*event, VFDI_EV_TYPE);
1384 data = EFX_QWORD_FIELD(*event, VFDI_EV_DATA);
1385
1386 netif_vdbg(efx, hw, efx->net_dev,
1387 "USR_EV event from qid %d seq 0x%x type %d data 0x%x\n",
1388 qid, seq, type, data);
1389
1390 if (map_vi_index(efx, qid, &vf, NULL))
1391 return;
1392 if (vf->busy)
1393 goto error;
1394
1395 if (type == VFDI_EV_TYPE_REQ_WORD0) {
1396 /* Resynchronise */
1397 vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
1398 vf->req_seqno = seq + 1;
1399 vf->req_addr = 0;
1400 } else if (seq != (vf->req_seqno++ & 0xff) || type != vf->req_type)
1401 goto error;
1402
1403 switch (vf->req_type) {
1404 case VFDI_EV_TYPE_REQ_WORD0:
1405 case VFDI_EV_TYPE_REQ_WORD1:
1406 case VFDI_EV_TYPE_REQ_WORD2:
1407 vf->req_addr |= (u64)data << (vf->req_type << 4);
1408 ++vf->req_type;
1409 return;
1410
1411 case VFDI_EV_TYPE_REQ_WORD3:
1412 vf->req_addr |= (u64)data << 48;
1413 vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
1414 vf->busy = true;
1415 queue_work(vfdi_workqueue, &vf->req);
1416 return;
1417 }
1418
1419error:
1420 if (net_ratelimit())
1421 netif_err(efx, hw, efx->net_dev,
1422 "ERROR: Screaming VFDI request from %s\n",
1423 vf->pci_name);
1424 /* Reset the request and sequence number */
1425 vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
1426 vf->req_seqno = seq + 1;
1427}
1428
1429void efx_sriov_flr(struct efx_nic *efx, unsigned vf_i)
1430{
1431 struct efx_vf *vf;
1432
1433 if (vf_i > efx->vf_init_count)
1434 return;
1435 vf = efx->vf + vf_i;
1436 netif_info(efx, hw, efx->net_dev,
1437 "FLR on VF %s\n", vf->pci_name);
1438
1439 vf->status_addr = 0;
1440 efx_vfdi_remove_all_filters(vf);
1441 efx_vfdi_flush_clear(vf);
1442
1443 vf->evq0_count = 0;
1444}
1445
1446void efx_sriov_mac_address_changed(struct efx_nic *efx)
1447{
1448 struct vfdi_status *vfdi_status = efx->vfdi_status.addr;
1449
1450 if (!efx->vf_init_count)
1451 return;
1452 memcpy(vfdi_status->peers[0].mac_addr,
1453 efx->net_dev->dev_addr, ETH_ALEN);
1454 queue_work(vfdi_workqueue, &efx->peer_work);
1455}
1456
1457void efx_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
1458{
1459 struct efx_vf *vf;
1460 unsigned queue, qid;
1461
1462 queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
1463 if (map_vi_index(efx, queue, &vf, &qid))
1464 return;
1465 /* Ignore flush completions triggered by an FLR */
1466 if (!test_bit(qid, vf->txq_mask))
1467 return;
1468
1469 __clear_bit(qid, vf->txq_mask);
1470 --vf->txq_count;
1471
1472 if (efx_vfdi_flush_wake(vf))
1473 wake_up(&vf->flush_waitq);
1474}
1475
1476void efx_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event)
1477{
1478 struct efx_vf *vf;
1479 unsigned ev_failed, queue, qid;
1480
1481 queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID);
1482 ev_failed = EFX_QWORD_FIELD(*event,
1483 FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL);
1484 if (map_vi_index(efx, queue, &vf, &qid))
1485 return;
1486 if (!test_bit(qid, vf->rxq_mask))
1487 return;
1488
1489 if (ev_failed) {
1490 set_bit(qid, vf->rxq_retry_mask);
1491 atomic_inc(&vf->rxq_retry_count);
1492 } else {
1493 __clear_bit(qid, vf->rxq_mask);
1494 --vf->rxq_count;
1495 }
1496 if (efx_vfdi_flush_wake(vf))
1497 wake_up(&vf->flush_waitq);
1498}
1499
1500/* Called from napi. Schedule the reset work item */
1501void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq)
1502{
1503 struct efx_vf *vf;
1504 unsigned int rel;
1505
1506 if (map_vi_index(efx, dmaq, &vf, &rel))
1507 return;
1508
1509 if (net_ratelimit())
1510 netif_err(efx, hw, efx->net_dev,
1511 "VF %d DMA Q %d reports descriptor fetch error.\n",
1512 vf->index, rel);
1513 queue_work(vfdi_workqueue, &vf->reset_work);
1514}
1515
1516/* Reset all VFs */
1517void efx_sriov_reset(struct efx_nic *efx)
1518{
1519 unsigned int vf_i;
1520 struct efx_buffer buf;
1521 struct efx_vf *vf;
1522
1523 ASSERT_RTNL();
1524
1525 if (efx->vf_init_count == 0)
1526 return;
1527
1528 efx_sriov_usrev(efx, true);
1529 (void)efx_sriov_cmd(efx, true, NULL, NULL);
1530
1531 if (efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE))
1532 return;
1533
1534 for (vf_i = 0; vf_i < efx->vf_init_count; ++vf_i) {
1535 vf = efx->vf + vf_i;
1536 efx_sriov_reset_vf(vf, &buf);
1537 }
1538
1539 efx_nic_free_buffer(efx, &buf);
1540}
1541
1542int efx_init_sriov(void)
1543{
1544 /* A single threaded workqueue is sufficient. efx_sriov_vfdi() and
1545 * efx_sriov_peer_work() spend almost all their time sleeping for
1546 * MCDI to complete anyway
1547 */
1548 vfdi_workqueue = create_singlethread_workqueue("sfc_vfdi");
1549 if (!vfdi_workqueue)
1550 return -ENOMEM;
1551
1552 return 0;
1553}
1554
1555void efx_fini_sriov(void)
1556{
1557 destroy_workqueue(vfdi_workqueue);
1558}
1559
1560int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac)
1561{
1562 struct efx_nic *efx = netdev_priv(net_dev);
1563 struct efx_vf *vf;
1564
1565 if (vf_i >= efx->vf_init_count)
1566 return -EINVAL;
1567 vf = efx->vf + vf_i;
1568
1569 mutex_lock(&vf->status_lock);
1570 memcpy(vf->addr.mac_addr, mac, ETH_ALEN);
1571 __efx_sriov_update_vf_addr(vf);
1572 mutex_unlock(&vf->status_lock);
1573
1574 return 0;
1575}
1576
1577int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i,
1578 u16 vlan, u8 qos)
1579{
1580 struct efx_nic *efx = netdev_priv(net_dev);
1581 struct efx_vf *vf;
1582 u16 tci;
1583
1584 if (vf_i >= efx->vf_init_count)
1585 return -EINVAL;
1586 vf = efx->vf + vf_i;
1587
1588 mutex_lock(&vf->status_lock);
1589 tci = (vlan & VLAN_VID_MASK) | ((qos & 0x7) << VLAN_PRIO_SHIFT);
1590 vf->addr.tci = htons(tci);
1591 __efx_sriov_update_vf_addr(vf);
1592 mutex_unlock(&vf->status_lock);
1593
1594 return 0;
1595}
1596
1597int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i,
1598 bool spoofchk)
1599{
1600 struct efx_nic *efx = netdev_priv(net_dev);
1601 struct efx_vf *vf;
1602 int rc;
1603
1604 if (vf_i >= efx->vf_init_count)
1605 return -EINVAL;
1606 vf = efx->vf + vf_i;
1607
1608 mutex_lock(&vf->txq_lock);
1609 if (vf->txq_count == 0) {
1610 vf->tx_filter_mode =
1611 spoofchk ? VF_TX_FILTER_ON : VF_TX_FILTER_OFF;
1612 rc = 0;
1613 } else {
1614 /* This cannot be changed while TX queues are running */
1615 rc = -EBUSY;
1616 }
1617 mutex_unlock(&vf->txq_lock);
1618 return rc;
1619}
1620
1621int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i,
1622 struct ifla_vf_info *ivi)
1623{
1624 struct efx_nic *efx = netdev_priv(net_dev);
1625 struct efx_vf *vf;
1626 u16 tci;
1627
1628 if (vf_i >= efx->vf_init_count)
1629 return -EINVAL;
1630 vf = efx->vf + vf_i;
1631
1632 ivi->vf = vf_i;
1633 memcpy(ivi->mac, vf->addr.mac_addr, ETH_ALEN);
1634 ivi->tx_rate = 0;
1635 tci = ntohs(vf->addr.tci);
1636 ivi->vlan = tci & VLAN_VID_MASK;
1637 ivi->qos = (tci >> VLAN_PRIO_SHIFT) & 0x7;
1638 ivi->spoofchk = vf->tx_filter_mode == VF_TX_FILTER_ON;
1639
1640 return 0;
1641}
1642