aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorBen Hutchings <bhutchings@solarflare.com>2012-02-13 19:48:07 -0500
committerBen Hutchings <bhutchings@solarflare.com>2012-02-15 19:25:13 -0500
commitcd2d5b529cdb9bd274f3e4bc68d37d4d63b7f383 (patch)
treef21a5f98185b8e227b843a28b54d4260a0e57033 /drivers
parent28e47c498a931200125e299e9d60d22e27b4ab0d (diff)
sfc: Add SR-IOV back-end support for SFC9000 family
On the SFC9000 family, each port has 1024 Virtual Interfaces (VIs), each with an RX queue, a TX queue, an event queue and a mailbox register. These may be assigned to up to 127 SR-IOV virtual functions per port, with up to 64 VIs per VF. We allocate an extra channel (IRQ and event queue only) to receive requests from VF drivers. There is a per-port limit of 4 concurrent RX queue flushes, and queue flushes may be initiated by the MC in response to a Function Level Reset (FLR) of a VF. Therefore, when SR-IOV is in use, we submit all flush requests via the MC. The RSS indirection table is shared with VFs, so the number of RX queues used in the PF is limited to the number of VIs per VF. This is almost entirely the work of Steve Hodgson, formerly shodgson@solarflare.com. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/ethernet/sfc/Kconfig8
-rw-r--r--drivers/net/ethernet/sfc/Makefile1
-rw-r--r--drivers/net/ethernet/sfc/efx.c70
-rw-r--r--drivers/net/ethernet/sfc/ethtool.c3
-rw-r--r--drivers/net/ethernet/sfc/mcdi.c34
-rw-r--r--drivers/net/ethernet/sfc/mcdi.h2
-rw-r--r--drivers/net/ethernet/sfc/mcdi_mac.c2
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h32
-rw-r--r--drivers/net/ethernet/sfc/nic.c79
-rw-r--r--drivers/net/ethernet/sfc/nic.h89
-rw-r--r--drivers/net/ethernet/sfc/siena.c2
-rw-r--r--drivers/net/ethernet/sfc/siena_sriov.c1642
-rw-r--r--drivers/net/ethernet/sfc/vfdi.h254
13 files changed, 2192 insertions, 26 deletions
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 8d423544a7e6..fb3cbc27063c 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -26,3 +26,11 @@ config SFC_MCDI_MON
26 ----help--- 26 ----help---
27 This exposes the on-board firmware-managed sensors as a 27 This exposes the on-board firmware-managed sensors as a
28 hardware monitor device. 28 hardware monitor device.
29config SFC_SRIOV
30 bool "Solarflare SFC9000-family SR-IOV support"
31 depends on SFC && PCI_IOV
32 default y
33 ---help---
34 This enables support for the SFC9000 I/O Virtualization
35 features, allowing accelerated network performance in
36 virtualized environments.
diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile
index 3fa2e25ccc45..ea1f8db57318 100644
--- a/drivers/net/ethernet/sfc/Makefile
+++ b/drivers/net/ethernet/sfc/Makefile
@@ -4,5 +4,6 @@ sfc-y += efx.o nic.o falcon.o siena.o tx.o rx.o filter.o \
4 tenxpress.o txc43128_phy.o falcon_boards.o \ 4 tenxpress.o txc43128_phy.o falcon_boards.o \
5 mcdi.o mcdi_phy.o mcdi_mon.o 5 mcdi.o mcdi_phy.o mcdi_mon.o
6sfc-$(CONFIG_SFC_MTD) += mtd.o 6sfc-$(CONFIG_SFC_MTD) += mtd.o
7sfc-$(CONFIG_SFC_SRIOV) += siena_sriov.o
7 8
8obj-$(CONFIG_SFC) += sfc.o 9obj-$(CONFIG_SFC) += sfc.o
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index c9c306aef2d9..ac571cf14485 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1175,25 +1175,40 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
1175 unsigned int count; 1175 unsigned int count;
1176 int cpu; 1176 int cpu;
1177 1177
1178 if (rss_cpus) 1178 if (rss_cpus) {
1179 return rss_cpus; 1179 count = rss_cpus;
1180 } else {
1181 if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
1182 netif_warn(efx, probe, efx->net_dev,
1183 "RSS disabled due to allocation failure\n");
1184 return 1;
1185 }
1180 1186
1181 if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { 1187 count = 0;
1182 netif_warn(efx, probe, efx->net_dev, 1188 for_each_online_cpu(cpu) {
1183 "RSS disabled due to allocation failure\n"); 1189 if (!cpumask_test_cpu(cpu, thread_mask)) {
1184 return 1; 1190 ++count;
1191 cpumask_or(thread_mask, thread_mask,
1192 topology_thread_cpumask(cpu));
1193 }
1194 }
1195
1196 free_cpumask_var(thread_mask);
1185 } 1197 }
1186 1198
1187 count = 0; 1199 /* If RSS is requested for the PF *and* VFs then we can't write RSS
1188 for_each_online_cpu(cpu) { 1200 * table entries that are inaccessible to VFs
1189 if (!cpumask_test_cpu(cpu, thread_mask)) { 1201 */
1190 ++count; 1202 if (efx_sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
1191 cpumask_or(thread_mask, thread_mask, 1203 count > efx_vf_size(efx)) {
1192 topology_thread_cpumask(cpu)); 1204 netif_warn(efx, probe, efx->net_dev,
1193 } 1205 "Reducing number of RSS channels from %u to %u for "
1206 "VF support. Increase vf-msix-limit to use more "
1207 "channels on the PF.\n",
1208 count, efx_vf_size(efx));
1209 count = efx_vf_size(efx);
1194 } 1210 }
1195 1211
1196 free_cpumask_var(thread_mask);
1197 return count; 1212 return count;
1198} 1213}
1199 1214
@@ -1327,6 +1342,10 @@ static int efx_probe_interrupts(struct efx_nic *efx)
1327 } 1342 }
1328 } 1343 }
1329 1344
1345 /* RSS might be usable on VFs even if it is disabled on the PF */
1346 efx->rss_spread = (efx->n_rx_channels > 1 ?
1347 efx->n_rx_channels : efx_vf_size(efx));
1348
1330 return 0; 1349 return 0;
1331} 1350}
1332 1351
@@ -1426,7 +1445,7 @@ static int efx_probe_nic(struct efx_nic *efx)
1426 get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key)); 1445 get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key));
1427 for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++) 1446 for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
1428 efx->rx_indir_table[i] = 1447 efx->rx_indir_table[i] =
1429 ethtool_rxfh_indir_default(i, efx->n_rx_channels); 1448 ethtool_rxfh_indir_default(i, efx->rss_spread);
1430 1449
1431 efx_set_channels(efx); 1450 efx_set_channels(efx);
1432 netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); 1451 netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
@@ -1915,6 +1934,7 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data)
1915 } 1934 }
1916 1935
1917 memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len); 1936 memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len);
1937 efx_sriov_mac_address_changed(efx);
1918 1938
1919 /* Reconfigure the MAC */ 1939 /* Reconfigure the MAC */
1920 mutex_lock(&efx->mac_lock); 1940 mutex_lock(&efx->mac_lock);
@@ -1981,6 +2001,12 @@ static const struct net_device_ops efx_netdev_ops = {
1981 .ndo_set_mac_address = efx_set_mac_address, 2001 .ndo_set_mac_address = efx_set_mac_address,
1982 .ndo_set_rx_mode = efx_set_rx_mode, 2002 .ndo_set_rx_mode = efx_set_rx_mode,
1983 .ndo_set_features = efx_set_features, 2003 .ndo_set_features = efx_set_features,
2004#ifdef CONFIG_SFC_SRIOV
2005 .ndo_set_vf_mac = efx_sriov_set_vf_mac,
2006 .ndo_set_vf_vlan = efx_sriov_set_vf_vlan,
2007 .ndo_set_vf_spoofchk = efx_sriov_set_vf_spoofchk,
2008 .ndo_get_vf_config = efx_sriov_get_vf_config,
2009#endif
1984#ifdef CONFIG_NET_POLL_CONTROLLER 2010#ifdef CONFIG_NET_POLL_CONTROLLER
1985 .ndo_poll_controller = efx_netpoll, 2011 .ndo_poll_controller = efx_netpoll,
1986#endif 2012#endif
@@ -2150,6 +2176,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
2150 2176
2151 efx_start_interrupts(efx, false); 2177 efx_start_interrupts(efx, false);
2152 efx_restore_filters(efx); 2178 efx_restore_filters(efx);
2179 efx_sriov_reset(efx);
2153 2180
2154 mutex_unlock(&efx->mac_lock); 2181 mutex_unlock(&efx->mac_lock);
2155 2182
@@ -2440,6 +2467,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
2440 rtnl_unlock(); 2467 rtnl_unlock();
2441 2468
2442 efx_stop_interrupts(efx, false); 2469 efx_stop_interrupts(efx, false);
2470 efx_sriov_fini(efx);
2443 efx_unregister_netdev(efx); 2471 efx_unregister_netdev(efx);
2444 2472
2445 efx_mtd_remove(efx); 2473 efx_mtd_remove(efx);
@@ -2581,6 +2609,11 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
2581 if (rc) 2609 if (rc)
2582 goto fail4; 2610 goto fail4;
2583 2611
2612 rc = efx_sriov_init(efx);
2613 if (rc)
2614 netif_err(efx, probe, efx->net_dev,
2615 "SR-IOV can't be enabled rc %d\n", rc);
2616
2584 netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n"); 2617 netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
2585 2618
2586 /* Try to create MTDs, but allow this to fail */ 2619 /* Try to create MTDs, but allow this to fail */
@@ -2732,6 +2765,10 @@ static int __init efx_init_module(void)
2732 if (rc) 2765 if (rc)
2733 goto err_notifier; 2766 goto err_notifier;
2734 2767
2768 rc = efx_init_sriov();
2769 if (rc)
2770 goto err_sriov;
2771
2735 reset_workqueue = create_singlethread_workqueue("sfc_reset"); 2772 reset_workqueue = create_singlethread_workqueue("sfc_reset");
2736 if (!reset_workqueue) { 2773 if (!reset_workqueue) {
2737 rc = -ENOMEM; 2774 rc = -ENOMEM;
@@ -2747,6 +2784,8 @@ static int __init efx_init_module(void)
2747 err_pci: 2784 err_pci:
2748 destroy_workqueue(reset_workqueue); 2785 destroy_workqueue(reset_workqueue);
2749 err_reset: 2786 err_reset:
2787 efx_fini_sriov();
2788 err_sriov:
2750 unregister_netdevice_notifier(&efx_netdev_notifier); 2789 unregister_netdevice_notifier(&efx_netdev_notifier);
2751 err_notifier: 2790 err_notifier:
2752 return rc; 2791 return rc;
@@ -2758,6 +2797,7 @@ static void __exit efx_exit_module(void)
2758 2797
2759 pci_unregister_driver(&efx_pci_driver); 2798 pci_unregister_driver(&efx_pci_driver);
2760 destroy_workqueue(reset_workqueue); 2799 destroy_workqueue(reset_workqueue);
2800 efx_fini_sriov();
2761 unregister_netdevice_notifier(&efx_netdev_notifier); 2801 unregister_netdevice_notifier(&efx_netdev_notifier);
2762 2802
2763} 2803}
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 83191151b650..f22f45f515a8 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -1085,7 +1085,8 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
1085{ 1085{
1086 struct efx_nic *efx = netdev_priv(net_dev); 1086 struct efx_nic *efx = netdev_priv(net_dev);
1087 1087
1088 return (efx_nic_rev(efx) < EFX_REV_FALCON_B0 ? 1088 return ((efx_nic_rev(efx) < EFX_REV_FALCON_B0 ||
1089 efx->n_rx_channels == 1) ?
1089 0 : ARRAY_SIZE(efx->rx_indir_table)); 1090 0 : ARRAY_SIZE(efx->rx_indir_table));
1090} 1091}
1091 1092
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 619f63a66ce7..17b6463e459c 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -560,6 +560,9 @@ void efx_mcdi_process_event(struct efx_channel *channel,
560 case MCDI_EVENT_CODE_MAC_STATS_DMA: 560 case MCDI_EVENT_CODE_MAC_STATS_DMA:
561 /* MAC stats are gather lazily. We can ignore this. */ 561 /* MAC stats are gather lazily. We can ignore this. */
562 break; 562 break;
563 case MCDI_EVENT_CODE_FLR:
564 efx_sriov_flr(efx, MCDI_EVENT_FIELD(*event, FLR_VF));
565 break;
563 566
564 default: 567 default:
565 netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n", 568 netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n",
@@ -1154,6 +1157,37 @@ fail:
1154 return rc; 1157 return rc;
1155} 1158}
1156 1159
1160int efx_mcdi_flush_rxqs(struct efx_nic *efx)
1161{
1162 struct efx_channel *channel;
1163 struct efx_rx_queue *rx_queue;
1164 __le32 *qid;
1165 int rc, count;
1166
1167 qid = kmalloc(EFX_MAX_CHANNELS * sizeof(*qid), GFP_KERNEL);
1168 if (qid == NULL)
1169 return -ENOMEM;
1170
1171 count = 0;
1172 efx_for_each_channel(channel, efx) {
1173 efx_for_each_channel_rx_queue(rx_queue, channel) {
1174 if (rx_queue->flush_pending) {
1175 rx_queue->flush_pending = false;
1176 atomic_dec(&efx->rxq_flush_pending);
1177 qid[count++] = cpu_to_le32(
1178 efx_rx_queue_index(rx_queue));
1179 }
1180 }
1181 }
1182
1183 rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, (u8 *)qid,
1184 count * sizeof(*qid), NULL, 0, NULL);
1185 WARN_ON(rc > 0);
1186
1187 kfree(qid);
1188
1189 return rc;
1190}
1157 1191
1158int efx_mcdi_wol_filter_reset(struct efx_nic *efx) 1192int efx_mcdi_wol_filter_reset(struct efx_nic *efx)
1159{ 1193{
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index fbaa6efcd744..0bdf3e331832 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -146,6 +146,8 @@ extern int efx_mcdi_wol_filter_set_magic(struct efx_nic *efx,
146extern int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out); 146extern int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out);
147extern int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id); 147extern int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id);
148extern int efx_mcdi_wol_filter_reset(struct efx_nic *efx); 148extern int efx_mcdi_wol_filter_reset(struct efx_nic *efx);
149extern int efx_mcdi_flush_rxqs(struct efx_nic *efx);
150extern int efx_mcdi_set_mac(struct efx_nic *efx);
149extern int efx_mcdi_mac_stats(struct efx_nic *efx, dma_addr_t dma_addr, 151extern int efx_mcdi_mac_stats(struct efx_nic *efx, dma_addr_t dma_addr,
150 u32 dma_len, int enable, int clear); 152 u32 dma_len, int enable, int clear);
151extern int efx_mcdi_mac_reconfigure(struct efx_nic *efx); 153extern int efx_mcdi_mac_reconfigure(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/mcdi_mac.c b/drivers/net/ethernet/sfc/mcdi_mac.c
index 98afe1c1165d..1003f309cba7 100644
--- a/drivers/net/ethernet/sfc/mcdi_mac.c
+++ b/drivers/net/ethernet/sfc/mcdi_mac.c
@@ -12,7 +12,7 @@
12#include "mcdi.h" 12#include "mcdi.h"
13#include "mcdi_pcol.h" 13#include "mcdi_pcol.h"
14 14
15static int efx_mcdi_set_mac(struct efx_nic *efx) 15int efx_mcdi_set_mac(struct efx_nic *efx)
16{ 16{
17 u32 reject, fcntl; 17 u32 reject, fcntl;
18 u8 cmdbytes[MC_CMD_SET_MAC_IN_LEN]; 18 u8 cmdbytes[MC_CMD_SET_MAC_IN_LEN];
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 7870cefcb203..3fbec458c323 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -24,6 +24,7 @@
24#include <linux/device.h> 24#include <linux/device.h>
25#include <linux/highmem.h> 25#include <linux/highmem.h>
26#include <linux/workqueue.h> 26#include <linux/workqueue.h>
27#include <linux/mutex.h>
27#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
28#include <linux/i2c.h> 29#include <linux/i2c.h>
29 30
@@ -54,7 +55,8 @@
54 55
55#define EFX_MAX_CHANNELS 32U 56#define EFX_MAX_CHANNELS 32U
56#define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS 57#define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS
57#define EFX_MAX_EXTRA_CHANNELS 0U 58#define EFX_EXTRA_CHANNEL_IOV 0
59#define EFX_MAX_EXTRA_CHANNELS 1U
58 60
59/* Checksum generation is a per-queue option in hardware, so each 61/* Checksum generation is a per-queue option in hardware, so each
60 * queue visible to the networking core is backed by two hardware TX 62 * queue visible to the networking core is backed by two hardware TX
@@ -629,6 +631,8 @@ union efx_multicast_hash {
629}; 631};
630 632
631struct efx_filter_state; 633struct efx_filter_state;
634struct efx_vf;
635struct vfdi_status;
632 636
633/** 637/**
634 * struct efx_nic - an Efx NIC 638 * struct efx_nic - an Efx NIC
@@ -712,6 +716,17 @@ struct efx_filter_state;
712 * completed (either success or failure). Not used when MCDI is used to 716 * completed (either success or failure). Not used when MCDI is used to
713 * flush receive queues. 717 * flush receive queues.
714 * @flush_wq: wait queue used by efx_nic_flush_queues() to wait for flush completions. 718 * @flush_wq: wait queue used by efx_nic_flush_queues() to wait for flush completions.
719 * @vf: Array of &struct efx_vf objects.
720 * @vf_count: Number of VFs intended to be enabled.
721 * @vf_init_count: Number of VFs that have been fully initialised.
722 * @vi_scale: log2 number of vnics per VF.
723 * @vf_buftbl_base: The zeroth buffer table index used to back VF queues.
724 * @vfdi_status: Common VFDI status page to be dmad to VF address space.
725 * @local_addr_list: List of local addresses. Protected by %local_lock.
726 * @local_page_list: List of DMA addressable pages used to broadcast
727 * %local_addr_list. Protected by %local_lock.
728 * @local_lock: Mutex protecting %local_addr_list and %local_page_list.
729 * @peer_work: Work item to broadcast peer addresses to VMs.
715 * @monitor_work: Hardware monitor workitem 730 * @monitor_work: Hardware monitor workitem
716 * @biu_lock: BIU (bus interface unit) lock 731 * @biu_lock: BIU (bus interface unit) lock
717 * @last_irq_cpu: Last CPU to handle a possible test interrupt. This 732 * @last_irq_cpu: Last CPU to handle a possible test interrupt. This
@@ -762,6 +777,7 @@ struct efx_nic {
762 unsigned next_buffer_table; 777 unsigned next_buffer_table;
763 unsigned n_channels; 778 unsigned n_channels;
764 unsigned n_rx_channels; 779 unsigned n_rx_channels;
780 unsigned rss_spread;
765 unsigned tx_channel_offset; 781 unsigned tx_channel_offset;
766 unsigned n_tx_channels; 782 unsigned n_tx_channels;
767 unsigned int rx_buffer_len; 783 unsigned int rx_buffer_len;
@@ -820,6 +836,20 @@ struct efx_nic {
820 atomic_t rxq_flush_outstanding; 836 atomic_t rxq_flush_outstanding;
821 wait_queue_head_t flush_wq; 837 wait_queue_head_t flush_wq;
822 838
839#ifdef CONFIG_SFC_SRIOV
840 struct efx_channel *vfdi_channel;
841 struct efx_vf *vf;
842 unsigned vf_count;
843 unsigned vf_init_count;
844 unsigned vi_scale;
845 unsigned vf_buftbl_base;
846 struct efx_buffer vfdi_status;
847 struct list_head local_addr_list;
848 struct list_head local_page_list;
849 struct mutex local_lock;
850 struct work_struct peer_work;
851#endif
852
823 /* The following fields may be written more often */ 853 /* The following fields may be written more often */
824 854
825 struct delayed_work monitor_work ____cacheline_aligned_in_smp; 855 struct delayed_work monitor_work ____cacheline_aligned_in_smp;
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index 747cf9439164..2bf4283f05fe 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -264,6 +264,10 @@ static int efx_alloc_special_buffer(struct efx_nic *efx,
264 /* Select new buffer ID */ 264 /* Select new buffer ID */
265 buffer->index = efx->next_buffer_table; 265 buffer->index = efx->next_buffer_table;
266 efx->next_buffer_table += buffer->entries; 266 efx->next_buffer_table += buffer->entries;
267#ifdef CONFIG_SFC_SRIOV
268 BUG_ON(efx_sriov_enabled(efx) &&
269 efx->vf_buftbl_base < efx->next_buffer_table);
270#endif
267 271
268 netif_dbg(efx, probe, efx->net_dev, 272 netif_dbg(efx, probe, efx->net_dev,
269 "allocating special buffers %d-%d at %llx+%x " 273 "allocating special buffers %d-%d at %llx+%x "
@@ -693,6 +697,16 @@ int efx_nic_flush_queues(struct efx_nic *efx)
693 } 697 }
694 698
695 while (timeout && atomic_read(&efx->drain_pending) > 0) { 699 while (timeout && atomic_read(&efx->drain_pending) > 0) {
700 /* If SRIOV is enabled, then offload receive queue flushing to
701 * the firmware (though we will still have to poll for
702 * completion). If that fails, fall back to the old scheme.
703 */
704 if (efx_sriov_enabled(efx)) {
705 rc = efx_mcdi_flush_rxqs(efx);
706 if (!rc)
707 goto wait;
708 }
709
696 /* The hardware supports four concurrent rx flushes, each of 710 /* The hardware supports four concurrent rx flushes, each of
697 * which may need to be retried if there is an outstanding 711 * which may need to be retried if there is an outstanding
698 * descriptor fetch 712 * descriptor fetch
@@ -712,6 +726,7 @@ int efx_nic_flush_queues(struct efx_nic *efx)
712 } 726 }
713 } 727 }
714 728
729 wait:
715 timeout = wait_event_timeout(efx->flush_wq, efx_flush_wake(efx), 730 timeout = wait_event_timeout(efx->flush_wq, efx_flush_wake(efx),
716 timeout); 731 timeout);
717 } 732 }
@@ -1102,11 +1117,13 @@ efx_handle_driver_event(struct efx_channel *channel, efx_qword_t *event)
1102 netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n", 1117 netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n",
1103 channel->channel, ev_sub_data); 1118 channel->channel, ev_sub_data);
1104 efx_handle_tx_flush_done(efx, event); 1119 efx_handle_tx_flush_done(efx, event);
1120 efx_sriov_tx_flush_done(efx, event);
1105 break; 1121 break;
1106 case FSE_AZ_RX_DESCQ_FLS_DONE_EV: 1122 case FSE_AZ_RX_DESCQ_FLS_DONE_EV:
1107 netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n", 1123 netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n",
1108 channel->channel, ev_sub_data); 1124 channel->channel, ev_sub_data);
1109 efx_handle_rx_flush_done(efx, event); 1125 efx_handle_rx_flush_done(efx, event);
1126 efx_sriov_rx_flush_done(efx, event);
1110 break; 1127 break;
1111 case FSE_AZ_EVQ_INIT_DONE_EV: 1128 case FSE_AZ_EVQ_INIT_DONE_EV:
1112 netif_dbg(efx, hw, efx->net_dev, 1129 netif_dbg(efx, hw, efx->net_dev,
@@ -1138,16 +1155,24 @@ efx_handle_driver_event(struct efx_channel *channel, efx_qword_t *event)
1138 RESET_TYPE_DISABLE); 1155 RESET_TYPE_DISABLE);
1139 break; 1156 break;
1140 case FSE_BZ_RX_DSC_ERROR_EV: 1157 case FSE_BZ_RX_DSC_ERROR_EV:
1141 netif_err(efx, rx_err, efx->net_dev, 1158 if (ev_sub_data < EFX_VI_BASE) {
1142 "RX DMA Q %d reports descriptor fetch error." 1159 netif_err(efx, rx_err, efx->net_dev,
1143 " RX Q %d is disabled.\n", ev_sub_data, ev_sub_data); 1160 "RX DMA Q %d reports descriptor fetch error."
1144 efx_schedule_reset(efx, RESET_TYPE_RX_DESC_FETCH); 1161 " RX Q %d is disabled.\n", ev_sub_data,
1162 ev_sub_data);
1163 efx_schedule_reset(efx, RESET_TYPE_RX_DESC_FETCH);
1164 } else
1165 efx_sriov_desc_fetch_err(efx, ev_sub_data);
1145 break; 1166 break;
1146 case FSE_BZ_TX_DSC_ERROR_EV: 1167 case FSE_BZ_TX_DSC_ERROR_EV:
1147 netif_err(efx, tx_err, efx->net_dev, 1168 if (ev_sub_data < EFX_VI_BASE) {
1148 "TX DMA Q %d reports descriptor fetch error." 1169 netif_err(efx, tx_err, efx->net_dev,
1149 " TX Q %d is disabled.\n", ev_sub_data, ev_sub_data); 1170 "TX DMA Q %d reports descriptor fetch error."
1150 efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH); 1171 " TX Q %d is disabled.\n", ev_sub_data,
1172 ev_sub_data);
1173 efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH);
1174 } else
1175 efx_sriov_desc_fetch_err(efx, ev_sub_data);
1151 break; 1176 break;
1152 default: 1177 default:
1153 netif_vdbg(efx, hw, efx->net_dev, 1178 netif_vdbg(efx, hw, efx->net_dev,
@@ -1207,6 +1232,9 @@ int efx_nic_process_eventq(struct efx_channel *channel, int budget)
1207 case FSE_AZ_EV_CODE_DRIVER_EV: 1232 case FSE_AZ_EV_CODE_DRIVER_EV:
1208 efx_handle_driver_event(channel, &event); 1233 efx_handle_driver_event(channel, &event);
1209 break; 1234 break;
1235 case FSE_CZ_EV_CODE_USER_EV:
1236 efx_sriov_event(channel, &event);
1237 break;
1210 case FSE_CZ_EV_CODE_MCDI_EV: 1238 case FSE_CZ_EV_CODE_MCDI_EV:
1211 efx_mcdi_process_event(channel, &event); 1239 efx_mcdi_process_event(channel, &event);
1212 break; 1240 break;
@@ -1609,6 +1637,15 @@ void efx_nic_fini_interrupt(struct efx_nic *efx)
1609 free_irq(efx->legacy_irq, efx); 1637 free_irq(efx->legacy_irq, efx);
1610} 1638}
1611 1639
1640/* Looks at available SRAM resources and works out how many queues we
1641 * can support, and where things like descriptor caches should live.
1642 *
1643 * SRAM is split up as follows:
1644 * 0 buftbl entries for channels
1645 * efx->vf_buftbl_base buftbl entries for SR-IOV
1646 * efx->rx_dc_base RX descriptor caches
1647 * efx->tx_dc_base TX descriptor caches
1648 */
1612void efx_nic_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw) 1649void efx_nic_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw)
1613{ 1650{
1614 unsigned vi_count, buftbl_min; 1651 unsigned vi_count, buftbl_min;
@@ -1622,6 +1659,32 @@ void efx_nic_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw)
1622 * sizeof(efx_qword_t) / EFX_BUF_SIZE); 1659 * sizeof(efx_qword_t) / EFX_BUF_SIZE);
1623 vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); 1660 vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES);
1624 1661
1662#ifdef CONFIG_SFC_SRIOV
1663 if (efx_sriov_wanted(efx)) {
1664 unsigned vi_dc_entries, buftbl_free, entries_per_vf, vf_limit;
1665
1666 efx->vf_buftbl_base = buftbl_min;
1667
1668 vi_dc_entries = RX_DC_ENTRIES + TX_DC_ENTRIES;
1669 vi_count = max(vi_count, EFX_VI_BASE);
1670 buftbl_free = (sram_lim_qw - buftbl_min -
1671 vi_count * vi_dc_entries);
1672
1673 entries_per_vf = ((vi_dc_entries + EFX_VF_BUFTBL_PER_VI) *
1674 efx_vf_size(efx));
1675 vf_limit = min(buftbl_free / entries_per_vf,
1676 (1024U - EFX_VI_BASE) >> efx->vi_scale);
1677
1678 if (efx->vf_count > vf_limit) {
1679 netif_err(efx, probe, efx->net_dev,
1680 "Reducing VF count from from %d to %d\n",
1681 efx->vf_count, vf_limit);
1682 efx->vf_count = vf_limit;
1683 }
1684 vi_count += efx->vf_count * efx_vf_size(efx);
1685 }
1686#endif
1687
1625 efx->tx_dc_base = sram_lim_qw - vi_count * TX_DC_ENTRIES; 1688 efx->tx_dc_base = sram_lim_qw - vi_count * TX_DC_ENTRIES;
1626 efx->rx_dc_base = efx->tx_dc_base - vi_count * RX_DC_ENTRIES; 1689 efx->rx_dc_base = efx->tx_dc_base - vi_count * RX_DC_ENTRIES;
1627} 1690}
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 5df7da8b8ebf..246c4140453c 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -169,6 +169,95 @@ static inline struct efx_mcdi_mon *efx_mcdi_mon(struct efx_nic *efx)
169} 169}
170#endif 170#endif
171 171
172/*
173 * On the SFC9000 family each port is associated with 1 PCI physical
174 * function (PF) handled by sfc and a configurable number of virtual
175 * functions (VFs) that may be handled by some other driver, often in
176 * a VM guest. The queue pointer registers are mapped in both PF and
177 * VF BARs such that an 8K region provides access to a single RX, TX
178 * and event queue (collectively a Virtual Interface, VI or VNIC).
179 *
180 * The PF has access to all 1024 VIs while VFs are mapped to VIs
181 * according to VI_BASE and VI_SCALE: VF i has access to VIs numbered
182 * in range [VI_BASE + i << VI_SCALE, VI_BASE + i + 1 << VI_SCALE).
183 * The number of VIs and the VI_SCALE value are configurable but must
184 * be established at boot time by firmware.
185 */
186
187/* Maximum VI_SCALE parameter supported by Siena */
188#define EFX_VI_SCALE_MAX 6
189/* Base VI to use for SR-IOV. Must be aligned to (1 << EFX_VI_SCALE_MAX),
190 * so this is the smallest allowed value. */
191#define EFX_VI_BASE 128U
192/* Maximum number of VFs allowed */
193#define EFX_VF_COUNT_MAX 127
194/* Limit EVQs on VFs to be only 8k to reduce buffer table reservation */
195#define EFX_MAX_VF_EVQ_SIZE 8192UL
196/* The number of buffer table entries reserved for each VI on a VF */
197#define EFX_VF_BUFTBL_PER_VI \
198 ((EFX_MAX_VF_EVQ_SIZE + 2 * EFX_MAX_DMAQ_SIZE) * \
199 sizeof(efx_qword_t) / EFX_BUF_SIZE)
200
201#ifdef CONFIG_SFC_SRIOV
202
203static inline bool efx_sriov_wanted(struct efx_nic *efx)
204{
205 return efx->vf_count != 0;
206}
207static inline bool efx_sriov_enabled(struct efx_nic *efx)
208{
209 return efx->vf_init_count != 0;
210}
211static inline unsigned int efx_vf_size(struct efx_nic *efx)
212{
213 return 1 << efx->vi_scale;
214}
215
216extern int efx_init_sriov(void);
217extern void efx_sriov_probe(struct efx_nic *efx);
218extern int efx_sriov_init(struct efx_nic *efx);
219extern void efx_sriov_mac_address_changed(struct efx_nic *efx);
220extern void efx_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event);
221extern void efx_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event);
222extern void efx_sriov_event(struct efx_channel *channel, efx_qword_t *event);
223extern void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq);
224extern void efx_sriov_flr(struct efx_nic *efx, unsigned flr);
225extern void efx_sriov_reset(struct efx_nic *efx);
226extern void efx_sriov_fini(struct efx_nic *efx);
227extern void efx_fini_sriov(void);
228
229#else
230
231static inline bool efx_sriov_wanted(struct efx_nic *efx) { return false; }
232static inline bool efx_sriov_enabled(struct efx_nic *efx) { return false; }
233static inline unsigned int efx_vf_size(struct efx_nic *efx) { return 0; }
234
235static inline int efx_init_sriov(void) { return 0; }
236static inline void efx_sriov_probe(struct efx_nic *efx) {}
237static inline int efx_sriov_init(struct efx_nic *efx) { return -EOPNOTSUPP; }
238static inline void efx_sriov_mac_address_changed(struct efx_nic *efx) {}
239static inline void efx_sriov_tx_flush_done(struct efx_nic *efx,
240 efx_qword_t *event) {}
241static inline void efx_sriov_rx_flush_done(struct efx_nic *efx,
242 efx_qword_t *event) {}
243static inline void efx_sriov_event(struct efx_channel *channel,
244 efx_qword_t *event) {}
245static inline void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq) {}
246static inline void efx_sriov_flr(struct efx_nic *efx, unsigned flr) {}
247static inline void efx_sriov_reset(struct efx_nic *efx) {}
248static inline void efx_sriov_fini(struct efx_nic *efx) {}
249static inline void efx_fini_sriov(void) {}
250
251#endif
252
253extern int efx_sriov_set_vf_mac(struct net_device *dev, int vf, u8 *mac);
254extern int efx_sriov_set_vf_vlan(struct net_device *dev, int vf,
255 u16 vlan, u8 qos);
256extern int efx_sriov_get_vf_config(struct net_device *dev, int vf,
257 struct ifla_vf_info *ivf);
258extern int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf,
259 bool spoofchk);
260
172extern const struct efx_nic_type falcon_a1_nic_type; 261extern const struct efx_nic_type falcon_a1_nic_type;
173extern const struct efx_nic_type falcon_b0_nic_type; 262extern const struct efx_nic_type falcon_b0_nic_type;
174extern const struct efx_nic_type siena_a0_nic_type; 263extern const struct efx_nic_type siena_a0_nic_type;
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 657f3fa93bcf..7bea79017a05 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -313,6 +313,8 @@ static int siena_probe_nic(struct efx_nic *efx)
313 if (rc) 313 if (rc)
314 goto fail5; 314 goto fail5;
315 315
316 efx_sriov_probe(efx);
317
316 return 0; 318 return 0;
317 319
318fail5: 320fail5:
diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c
new file mode 100644
index 000000000000..5c6839ec3a83
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena_sriov.c
@@ -0,0 +1,1642 @@
1/****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2010-2011 Solarflare Communications Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation, incorporated herein by reference.
8 */
9#include <linux/pci.h>
10#include <linux/module.h>
11#include "net_driver.h"
12#include "efx.h"
13#include "nic.h"
14#include "io.h"
15#include "mcdi.h"
16#include "filter.h"
17#include "mcdi_pcol.h"
18#include "regs.h"
19#include "vfdi.h"
20
21/* Number of longs required to track all the VIs in a VF */
22#define VI_MASK_LENGTH BITS_TO_LONGS(1 << EFX_VI_SCALE_MAX)
23
24/**
25 * enum efx_vf_tx_filter_mode - TX MAC filtering behaviour
26 * @VF_TX_FILTER_OFF: Disabled
27 * @VF_TX_FILTER_AUTO: Enabled if MAC address assigned to VF and only
28 * 2 TX queues allowed per VF.
29 * @VF_TX_FILTER_ON: Enabled
30 */
31enum efx_vf_tx_filter_mode {
32 VF_TX_FILTER_OFF,
33 VF_TX_FILTER_AUTO,
34 VF_TX_FILTER_ON,
35};
36
37/**
38 * struct efx_vf - Back-end resource and protocol state for a PCI VF
39 * @efx: The Efx NIC owning this VF
40 * @pci_rid: The PCI requester ID for this VF
41 * @pci_name: The PCI name (formatted address) of this VF
42 * @index: Index of VF within its port and PF.
43 * @req: VFDI incoming request work item. Incoming USR_EV events are received
44 * by the NAPI handler, but must be handled by executing MCDI requests
45 * inside a work item.
46 * @req_addr: VFDI incoming request DMA address (in VF's PCI address space).
47 * @req_type: Expected next incoming (from VF) %VFDI_EV_TYPE member.
48 * @req_seqno: Expected next incoming (from VF) %VFDI_EV_SEQ member.
49 * @msg_seqno: Next %VFDI_EV_SEQ member to reply to VF. Protected by
50 * @status_lock
51 * @busy: VFDI request queued to be processed or being processed. Receiving
52 * a VFDI request when @busy is set is an error condition.
53 * @buf: Incoming VFDI requests are DMA from the VF into this buffer.
54 * @buftbl_base: Buffer table entries for this VF start at this index.
55 * @rx_filtering: Receive filtering has been requested by the VF driver.
56 * @rx_filter_flags: The flags sent in the %VFDI_OP_INSERT_FILTER request.
57 * @rx_filter_qid: VF relative qid for RX filter requested by VF.
58 * @rx_filter_id: Receive MAC filter ID. Only one filter per VF is supported.
59 * @tx_filter_mode: Transmit MAC filtering mode.
60 * @tx_filter_id: Transmit MAC filter ID.
61 * @addr: The MAC address and outer vlan tag of the VF.
62 * @status_addr: VF DMA address of page for &struct vfdi_status updates.
63 * @status_lock: Mutex protecting @msg_seqno, @status_addr, @addr,
64 * @peer_page_addrs and @peer_page_count from simultaneous
65 * updates by the VM and consumption by
66 * efx_sriov_update_vf_addr()
67 * @peer_page_addrs: Pointer to an array of guest pages for local addresses.
68 * @peer_page_count: Number of entries in @peer_page_count.
69 * @evq0_addrs: Array of guest pages backing evq0.
70 * @evq0_count: Number of entries in @evq0_addrs.
71 * @flush_waitq: wait queue used by %VFDI_OP_FINI_ALL_QUEUES handler
72 * to wait for flush completions.
73 * @txq_lock: Mutex for TX queue allocation.
74 * @txq_mask: Mask of initialized transmit queues.
75 * @txq_count: Number of initialized transmit queues.
76 * @rxq_mask: Mask of initialized receive queues.
77 * @rxq_count: Number of initialized receive queues.
78 * @rxq_retry_mask: Mask or receive queues that need to be flushed again
79 * due to flush failure.
80 * @rxq_retry_count: Number of receive queues in @rxq_retry_mask.
81 * @reset_work: Work item to schedule a VF reset.
82 */
83struct efx_vf {
84 struct efx_nic *efx;
85 unsigned int pci_rid;
86 char pci_name[13]; /* dddd:bb:dd.f */
87 unsigned int index;
88 struct work_struct req;
89 u64 req_addr;
90 int req_type;
91 unsigned req_seqno;
92 unsigned msg_seqno;
93 bool busy;
94 struct efx_buffer buf;
95 unsigned buftbl_base;
96 bool rx_filtering;
97 enum efx_filter_flags rx_filter_flags;
98 unsigned rx_filter_qid;
99 int rx_filter_id;
100 enum efx_vf_tx_filter_mode tx_filter_mode;
101 int tx_filter_id;
102 struct vfdi_endpoint addr;
103 u64 status_addr;
104 struct mutex status_lock;
105 u64 *peer_page_addrs;
106 unsigned peer_page_count;
107 u64 evq0_addrs[EFX_MAX_VF_EVQ_SIZE * sizeof(efx_qword_t) /
108 EFX_BUF_SIZE];
109 unsigned evq0_count;
110 wait_queue_head_t flush_waitq;
111 struct mutex txq_lock;
112 unsigned long txq_mask[VI_MASK_LENGTH];
113 unsigned txq_count;
114 unsigned long rxq_mask[VI_MASK_LENGTH];
115 unsigned rxq_count;
116 unsigned long rxq_retry_mask[VI_MASK_LENGTH];
117 atomic_t rxq_retry_count;
118 struct work_struct reset_work;
119};
120
121struct efx_memcpy_req {
122 unsigned int from_rid;
123 void *from_buf;
124 u64 from_addr;
125 unsigned int to_rid;
126 u64 to_addr;
127 unsigned length;
128};
129
130/**
131 * struct efx_local_addr - A MAC address on the vswitch without a VF.
132 *
133 * Siena does not have a switch, so VFs can't transmit data to each
134 * other. Instead the VFs must be made aware of the local addresses
135 * on the vswitch, so that they can arrange for an alternative
136 * software datapath to be used.
137 *
138 * @link: List head for insertion into efx->local_addr_list.
139 * @addr: Ethernet address
140 */
141struct efx_local_addr {
142 struct list_head link;
143 u8 addr[ETH_ALEN];
144};
145
146/**
147 * struct efx_endpoint_page - Page of vfdi_endpoint structures
148 *
149 * @link: List head for insertion into efx->local_page_list.
150 * @ptr: Pointer to page.
151 * @addr: DMA address of page.
152 */
153struct efx_endpoint_page {
154 struct list_head link;
155 void *ptr;
156 dma_addr_t addr;
157};
158
159/* Buffer table entries are reserved txq0,rxq0,evq0,txq1,rxq1,evq1 */
160#define EFX_BUFTBL_TXQ_BASE(_vf, _qid) \
161 ((_vf)->buftbl_base + EFX_VF_BUFTBL_PER_VI * (_qid))
162#define EFX_BUFTBL_RXQ_BASE(_vf, _qid) \
163 (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \
164 (EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE))
165#define EFX_BUFTBL_EVQ_BASE(_vf, _qid) \
166 (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \
167 (2 * EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE))
168
169#define EFX_FIELD_MASK(_field) \
170 ((1 << _field ## _WIDTH) - 1)
171
172/* VFs can only use this many transmit channels */
173static unsigned int vf_max_tx_channels = 2;
174module_param(vf_max_tx_channels, uint, 0444);
175MODULE_PARM_DESC(vf_max_tx_channels,
176 "Limit the number of TX channels VFs can use");
177
178static int max_vfs = -1;
179module_param(max_vfs, int, 0444);
180MODULE_PARM_DESC(max_vfs,
181 "Reduce the number of VFs initialized by the driver");
182
183/* Workqueue used by VFDI communication. We can't use the global
184 * workqueue because it may be running the VF driver's probe()
185 * routine, which will be blocked there waiting for a VFDI response.
186 */
187static struct workqueue_struct *vfdi_workqueue;
188
189static unsigned abs_index(struct efx_vf *vf, unsigned index)
190{
191 return EFX_VI_BASE + vf->index * efx_vf_size(vf->efx) + index;
192}
193
194static int efx_sriov_cmd(struct efx_nic *efx, bool enable,
195 unsigned *vi_scale_out, unsigned *vf_total_out)
196{
197 u8 inbuf[MC_CMD_SRIOV_IN_LEN];
198 u8 outbuf[MC_CMD_SRIOV_OUT_LEN];
199 unsigned vi_scale, vf_total;
200 size_t outlen;
201 int rc;
202
203 MCDI_SET_DWORD(inbuf, SRIOV_IN_ENABLE, enable ? 1 : 0);
204 MCDI_SET_DWORD(inbuf, SRIOV_IN_VI_BASE, EFX_VI_BASE);
205 MCDI_SET_DWORD(inbuf, SRIOV_IN_VF_COUNT, efx->vf_count);
206
207 rc = efx_mcdi_rpc(efx, MC_CMD_SRIOV, inbuf, MC_CMD_SRIOV_IN_LEN,
208 outbuf, MC_CMD_SRIOV_OUT_LEN, &outlen);
209 if (rc)
210 return rc;
211 if (outlen < MC_CMD_SRIOV_OUT_LEN)
212 return -EIO;
213
214 vf_total = MCDI_DWORD(outbuf, SRIOV_OUT_VF_TOTAL);
215 vi_scale = MCDI_DWORD(outbuf, SRIOV_OUT_VI_SCALE);
216 if (vi_scale > EFX_VI_SCALE_MAX)
217 return -EOPNOTSUPP;
218
219 if (vi_scale_out)
220 *vi_scale_out = vi_scale;
221 if (vf_total_out)
222 *vf_total_out = vf_total;
223
224 return 0;
225}
226
227static void efx_sriov_usrev(struct efx_nic *efx, bool enabled)
228{
229 efx_oword_t reg;
230
231 EFX_POPULATE_OWORD_2(reg,
232 FRF_CZ_USREV_DIS, enabled ? 0 : 1,
233 FRF_CZ_DFLT_EVQ, efx->vfdi_channel->channel);
234 efx_writeo(efx, &reg, FR_CZ_USR_EV_CFG);
235}
236
237static int efx_sriov_memcpy(struct efx_nic *efx, struct efx_memcpy_req *req,
238 unsigned int count)
239{
240 u8 *inbuf, *record;
241 unsigned int used;
242 u32 from_rid, from_hi, from_lo;
243 int rc;
244
245 mb(); /* Finish writing source/reading dest before DMA starts */
246
247 used = MC_CMD_MEMCPY_IN_LEN(count);
248 if (WARN_ON(used > MCDI_CTL_SDU_LEN_MAX))
249 return -ENOBUFS;
250
251 /* Allocate room for the largest request */
252 inbuf = kzalloc(MCDI_CTL_SDU_LEN_MAX, GFP_KERNEL);
253 if (inbuf == NULL)
254 return -ENOMEM;
255
256 record = inbuf;
257 MCDI_SET_DWORD(record, MEMCPY_IN_RECORD, count);
258 while (count-- > 0) {
259 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_RID,
260 req->to_rid);
261 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_ADDR_LO,
262 (u32)req->to_addr);
263 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_ADDR_HI,
264 (u32)(req->to_addr >> 32));
265 if (req->from_buf == NULL) {
266 from_rid = req->from_rid;
267 from_lo = (u32)req->from_addr;
268 from_hi = (u32)(req->from_addr >> 32);
269 } else {
270 if (WARN_ON(used + req->length > MCDI_CTL_SDU_LEN_MAX)) {
271 rc = -ENOBUFS;
272 goto out;
273 }
274
275 from_rid = MC_CMD_MEMCPY_RECORD_TYPEDEF_RID_INLINE;
276 from_lo = used;
277 from_hi = 0;
278 memcpy(inbuf + used, req->from_buf, req->length);
279 used += req->length;
280 }
281
282 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_RID, from_rid);
283 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_ADDR_LO,
284 from_lo);
285 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_ADDR_HI,
286 from_hi);
287 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_LENGTH,
288 req->length);
289
290 ++req;
291 record += MC_CMD_MEMCPY_IN_RECORD_LEN;
292 }
293
294 rc = efx_mcdi_rpc(efx, MC_CMD_MEMCPY, inbuf, used, NULL, 0, NULL);
295out:
296 kfree(inbuf);
297
298 mb(); /* Don't write source/read dest before DMA is complete */
299
300 return rc;
301}
302
303/* The TX filter is entirely controlled by this driver, and is modified
304 * underneath the feet of the VF
305 */
306static void efx_sriov_reset_tx_filter(struct efx_vf *vf)
307{
308 struct efx_nic *efx = vf->efx;
309 struct efx_filter_spec filter;
310 u16 vlan;
311 int rc;
312
313 if (vf->tx_filter_id != -1) {
314 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
315 vf->tx_filter_id);
316 netif_dbg(efx, hw, efx->net_dev, "Removed vf %s tx filter %d\n",
317 vf->pci_name, vf->tx_filter_id);
318 vf->tx_filter_id = -1;
319 }
320
321 if (is_zero_ether_addr(vf->addr.mac_addr))
322 return;
323
324 /* Turn on TX filtering automatically if not explicitly
325 * enabled or disabled.
326 */
327 if (vf->tx_filter_mode == VF_TX_FILTER_AUTO && vf_max_tx_channels <= 2)
328 vf->tx_filter_mode = VF_TX_FILTER_ON;
329
330 vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK;
331 efx_filter_init_tx(&filter, abs_index(vf, 0));
332 rc = efx_filter_set_eth_local(&filter,
333 vlan ? vlan : EFX_FILTER_VID_UNSPEC,
334 vf->addr.mac_addr);
335 BUG_ON(rc);
336
337 rc = efx_filter_insert_filter(efx, &filter, true);
338 if (rc < 0) {
339 netif_warn(efx, hw, efx->net_dev,
340 "Unable to migrate tx filter for vf %s\n",
341 vf->pci_name);
342 } else {
343 netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s tx filter %d\n",
344 vf->pci_name, rc);
345 vf->tx_filter_id = rc;
346 }
347}
348
349/* The RX filter is managed here on behalf of the VF driver */
350static void efx_sriov_reset_rx_filter(struct efx_vf *vf)
351{
352 struct efx_nic *efx = vf->efx;
353 struct efx_filter_spec filter;
354 u16 vlan;
355 int rc;
356
357 if (vf->rx_filter_id != -1) {
358 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
359 vf->rx_filter_id);
360 netif_dbg(efx, hw, efx->net_dev, "Removed vf %s rx filter %d\n",
361 vf->pci_name, vf->rx_filter_id);
362 vf->rx_filter_id = -1;
363 }
364
365 if (!vf->rx_filtering || is_zero_ether_addr(vf->addr.mac_addr))
366 return;
367
368 vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK;
369 efx_filter_init_rx(&filter, EFX_FILTER_PRI_REQUIRED,
370 vf->rx_filter_flags,
371 abs_index(vf, vf->rx_filter_qid));
372 rc = efx_filter_set_eth_local(&filter,
373 vlan ? vlan : EFX_FILTER_VID_UNSPEC,
374 vf->addr.mac_addr);
375 BUG_ON(rc);
376
377 rc = efx_filter_insert_filter(efx, &filter, true);
378 if (rc < 0) {
379 netif_warn(efx, hw, efx->net_dev,
380 "Unable to insert rx filter for vf %s\n",
381 vf->pci_name);
382 } else {
383 netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s rx filter %d\n",
384 vf->pci_name, rc);
385 vf->rx_filter_id = rc;
386 }
387}
388
389static void __efx_sriov_update_vf_addr(struct efx_vf *vf)
390{
391 efx_sriov_reset_tx_filter(vf);
392 efx_sriov_reset_rx_filter(vf);
393 queue_work(vfdi_workqueue, &vf->efx->peer_work);
394}
395
396/* Push the peer list to this VF. The caller must hold status_lock to interlock
397 * with VFDI requests, and they must be serialised against manipulation of
398 * local_page_list, either by acquiring local_lock or by running from
399 * efx_sriov_peer_work()
400 */
401static void __efx_sriov_push_vf_status(struct efx_vf *vf)
402{
403 struct efx_nic *efx = vf->efx;
404 struct vfdi_status *status = efx->vfdi_status.addr;
405 struct efx_memcpy_req copy[4];
406 struct efx_endpoint_page *epp;
407 unsigned int pos, count;
408 unsigned data_offset;
409 efx_qword_t event;
410
411 WARN_ON(!mutex_is_locked(&vf->status_lock));
412 WARN_ON(!vf->status_addr);
413
414 status->local = vf->addr;
415 status->generation_end = ++status->generation_start;
416
417 memset(copy, '\0', sizeof(copy));
418 /* Write generation_start */
419 copy[0].from_buf = &status->generation_start;
420 copy[0].to_rid = vf->pci_rid;
421 copy[0].to_addr = vf->status_addr + offsetof(struct vfdi_status,
422 generation_start);
423 copy[0].length = sizeof(status->generation_start);
424 /* DMA the rest of the structure (excluding the generations). This
425 * assumes that the non-generation portion of vfdi_status is in
426 * one chunk starting at the version member.
427 */
428 data_offset = offsetof(struct vfdi_status, version);
429 copy[1].from_rid = efx->pci_dev->devfn;
430 copy[1].from_addr = efx->vfdi_status.dma_addr + data_offset;
431 copy[1].to_rid = vf->pci_rid;
432 copy[1].to_addr = vf->status_addr + data_offset;
433 copy[1].length = status->length - data_offset;
434
435 /* Copy the peer pages */
436 pos = 2;
437 count = 0;
438 list_for_each_entry(epp, &efx->local_page_list, link) {
439 if (count == vf->peer_page_count) {
440 /* The VF driver will know they need to provide more
441 * pages because peer_addr_count is too large.
442 */
443 break;
444 }
445 copy[pos].from_buf = NULL;
446 copy[pos].from_rid = efx->pci_dev->devfn;
447 copy[pos].from_addr = epp->addr;
448 copy[pos].to_rid = vf->pci_rid;
449 copy[pos].to_addr = vf->peer_page_addrs[count];
450 copy[pos].length = EFX_PAGE_SIZE;
451
452 if (++pos == ARRAY_SIZE(copy)) {
453 efx_sriov_memcpy(efx, copy, ARRAY_SIZE(copy));
454 pos = 0;
455 }
456 ++count;
457 }
458
459 /* Write generation_end */
460 copy[pos].from_buf = &status->generation_end;
461 copy[pos].to_rid = vf->pci_rid;
462 copy[pos].to_addr = vf->status_addr + offsetof(struct vfdi_status,
463 generation_end);
464 copy[pos].length = sizeof(status->generation_end);
465 efx_sriov_memcpy(efx, copy, pos + 1);
466
467 /* Notify the guest */
468 EFX_POPULATE_QWORD_3(event,
469 FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV,
470 VFDI_EV_SEQ, (vf->msg_seqno & 0xff),
471 VFDI_EV_TYPE, VFDI_EV_TYPE_STATUS);
472 ++vf->msg_seqno;
473 efx_generate_event(efx, EFX_VI_BASE + vf->index * efx_vf_size(efx),
474 &event);
475}
476
477static void efx_sriov_bufs(struct efx_nic *efx, unsigned offset,
478 u64 *addr, unsigned count)
479{
480 efx_qword_t buf;
481 unsigned pos;
482
483 for (pos = 0; pos < count; ++pos) {
484 EFX_POPULATE_QWORD_3(buf,
485 FRF_AZ_BUF_ADR_REGION, 0,
486 FRF_AZ_BUF_ADR_FBUF,
487 addr ? addr[pos] >> 12 : 0,
488 FRF_AZ_BUF_OWNER_ID_FBUF, 0);
489 efx_sram_writeq(efx, efx->membase + FR_BZ_BUF_FULL_TBL,
490 &buf, offset + pos);
491 }
492}
493
494static bool bad_vf_index(struct efx_nic *efx, unsigned index)
495{
496 return index >= efx_vf_size(efx);
497}
498
499static bool bad_buf_count(unsigned buf_count, unsigned max_entry_count)
500{
501 unsigned max_buf_count = max_entry_count *
502 sizeof(efx_qword_t) / EFX_BUF_SIZE;
503
504 return ((buf_count & (buf_count - 1)) || buf_count > max_buf_count);
505}
506
507/* Check that VI specified by per-port index belongs to a VF.
508 * Optionally set VF index and VI index within the VF.
509 */
510static bool map_vi_index(struct efx_nic *efx, unsigned abs_index,
511 struct efx_vf **vf_out, unsigned *rel_index_out)
512{
513 unsigned vf_i;
514
515 if (abs_index < EFX_VI_BASE)
516 return true;
517 vf_i = (abs_index - EFX_VI_BASE) * efx_vf_size(efx);
518 if (vf_i >= efx->vf_init_count)
519 return true;
520
521 if (vf_out)
522 *vf_out = efx->vf + vf_i;
523 if (rel_index_out)
524 *rel_index_out = abs_index % efx_vf_size(efx);
525 return false;
526}
527
528static int efx_vfdi_init_evq(struct efx_vf *vf)
529{
530 struct efx_nic *efx = vf->efx;
531 struct vfdi_req *req = vf->buf.addr;
532 unsigned vf_evq = req->u.init_evq.index;
533 unsigned buf_count = req->u.init_evq.buf_count;
534 unsigned abs_evq = abs_index(vf, vf_evq);
535 unsigned buftbl = EFX_BUFTBL_EVQ_BASE(vf, vf_evq);
536 efx_oword_t reg;
537
538 if (bad_vf_index(efx, vf_evq) ||
539 bad_buf_count(buf_count, EFX_MAX_VF_EVQ_SIZE)) {
540 if (net_ratelimit())
541 netif_err(efx, hw, efx->net_dev,
542 "ERROR: Invalid INIT_EVQ from %s: evq %d bufs %d\n",
543 vf->pci_name, vf_evq, buf_count);
544 return VFDI_RC_EINVAL;
545 }
546
547 efx_sriov_bufs(efx, buftbl, req->u.init_evq.addr, buf_count);
548
549 EFX_POPULATE_OWORD_3(reg,
550 FRF_CZ_TIMER_Q_EN, 1,
551 FRF_CZ_HOST_NOTIFY_MODE, 0,
552 FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
553 efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, abs_evq);
554 EFX_POPULATE_OWORD_3(reg,
555 FRF_AZ_EVQ_EN, 1,
556 FRF_AZ_EVQ_SIZE, __ffs(buf_count),
557 FRF_AZ_EVQ_BUF_BASE_ID, buftbl);
558 efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL, abs_evq);
559
560 if (vf_evq == 0) {
561 memcpy(vf->evq0_addrs, req->u.init_evq.addr,
562 buf_count * sizeof(u64));
563 vf->evq0_count = buf_count;
564 }
565
566 return VFDI_RC_SUCCESS;
567}
568
569static int efx_vfdi_init_rxq(struct efx_vf *vf)
570{
571 struct efx_nic *efx = vf->efx;
572 struct vfdi_req *req = vf->buf.addr;
573 unsigned vf_rxq = req->u.init_rxq.index;
574 unsigned vf_evq = req->u.init_rxq.evq;
575 unsigned buf_count = req->u.init_rxq.buf_count;
576 unsigned buftbl = EFX_BUFTBL_RXQ_BASE(vf, vf_rxq);
577 unsigned label;
578 efx_oword_t reg;
579
580 if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_rxq) ||
581 bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) {
582 if (net_ratelimit())
583 netif_err(efx, hw, efx->net_dev,
584 "ERROR: Invalid INIT_RXQ from %s: rxq %d evq %d "
585 "buf_count %d\n", vf->pci_name, vf_rxq,
586 vf_evq, buf_count);
587 return VFDI_RC_EINVAL;
588 }
589 if (__test_and_set_bit(req->u.init_rxq.index, vf->rxq_mask))
590 ++vf->rxq_count;
591 efx_sriov_bufs(efx, buftbl, req->u.init_rxq.addr, buf_count);
592
593 label = req->u.init_rxq.label & EFX_FIELD_MASK(FRF_AZ_RX_DESCQ_LABEL);
594 EFX_POPULATE_OWORD_6(reg,
595 FRF_AZ_RX_DESCQ_BUF_BASE_ID, buftbl,
596 FRF_AZ_RX_DESCQ_EVQ_ID, abs_index(vf, vf_evq),
597 FRF_AZ_RX_DESCQ_LABEL, label,
598 FRF_AZ_RX_DESCQ_SIZE, __ffs(buf_count),
599 FRF_AZ_RX_DESCQ_JUMBO,
600 !!(req->u.init_rxq.flags &
601 VFDI_RXQ_FLAG_SCATTER_EN),
602 FRF_AZ_RX_DESCQ_EN, 1);
603 efx_writeo_table(efx, &reg, FR_BZ_RX_DESC_PTR_TBL,
604 abs_index(vf, vf_rxq));
605
606 return VFDI_RC_SUCCESS;
607}
608
609static int efx_vfdi_init_txq(struct efx_vf *vf)
610{
611 struct efx_nic *efx = vf->efx;
612 struct vfdi_req *req = vf->buf.addr;
613 unsigned vf_txq = req->u.init_txq.index;
614 unsigned vf_evq = req->u.init_txq.evq;
615 unsigned buf_count = req->u.init_txq.buf_count;
616 unsigned buftbl = EFX_BUFTBL_TXQ_BASE(vf, vf_txq);
617 unsigned label, eth_filt_en;
618 efx_oword_t reg;
619
620 if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_txq) ||
621 vf_txq >= vf_max_tx_channels ||
622 bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) {
623 if (net_ratelimit())
624 netif_err(efx, hw, efx->net_dev,
625 "ERROR: Invalid INIT_TXQ from %s: txq %d evq %d "
626 "buf_count %d\n", vf->pci_name, vf_txq,
627 vf_evq, buf_count);
628 return VFDI_RC_EINVAL;
629 }
630
631 mutex_lock(&vf->txq_lock);
632 if (__test_and_set_bit(req->u.init_txq.index, vf->txq_mask))
633 ++vf->txq_count;
634 mutex_unlock(&vf->txq_lock);
635 efx_sriov_bufs(efx, buftbl, req->u.init_txq.addr, buf_count);
636
637 eth_filt_en = vf->tx_filter_mode == VF_TX_FILTER_ON;
638
639 label = req->u.init_txq.label & EFX_FIELD_MASK(FRF_AZ_TX_DESCQ_LABEL);
640 EFX_POPULATE_OWORD_8(reg,
641 FRF_CZ_TX_DPT_Q_MASK_WIDTH, min(efx->vi_scale, 1U),
642 FRF_CZ_TX_DPT_ETH_FILT_EN, eth_filt_en,
643 FRF_AZ_TX_DESCQ_EN, 1,
644 FRF_AZ_TX_DESCQ_BUF_BASE_ID, buftbl,
645 FRF_AZ_TX_DESCQ_EVQ_ID, abs_index(vf, vf_evq),
646 FRF_AZ_TX_DESCQ_LABEL, label,
647 FRF_AZ_TX_DESCQ_SIZE, __ffs(buf_count),
648 FRF_BZ_TX_NON_IP_DROP_DIS, 1);
649 efx_writeo_table(efx, &reg, FR_BZ_TX_DESC_PTR_TBL,
650 abs_index(vf, vf_txq));
651
652 return VFDI_RC_SUCCESS;
653}
654
655/* Returns true when efx_vfdi_fini_all_queues should wake */
656static bool efx_vfdi_flush_wake(struct efx_vf *vf)
657{
658 /* Ensure that all updates are visible to efx_vfdi_fini_all_queues() */
659 smp_mb();
660
661 return (!vf->txq_count && !vf->rxq_count) ||
662 atomic_read(&vf->rxq_retry_count);
663}
664
665static void efx_vfdi_flush_clear(struct efx_vf *vf)
666{
667 memset(vf->txq_mask, 0, sizeof(vf->txq_mask));
668 vf->txq_count = 0;
669 memset(vf->rxq_mask, 0, sizeof(vf->rxq_mask));
670 vf->rxq_count = 0;
671 memset(vf->rxq_retry_mask, 0, sizeof(vf->rxq_retry_mask));
672 atomic_set(&vf->rxq_retry_count, 0);
673}
674
675static int efx_vfdi_fini_all_queues(struct efx_vf *vf)
676{
677 struct efx_nic *efx = vf->efx;
678 efx_oword_t reg;
679 unsigned count = efx_vf_size(efx);
680 unsigned vf_offset = EFX_VI_BASE + vf->index * efx_vf_size(efx);
681 unsigned timeout = HZ;
682 unsigned index, rxqs_count;
683 __le32 *rxqs;
684 int rc;
685
686 rxqs = kmalloc(count * sizeof(*rxqs), GFP_KERNEL);
687 if (rxqs == NULL)
688 return VFDI_RC_ENOMEM;
689
690 rtnl_lock();
691 if (efx->fc_disable++ == 0)
692 efx_mcdi_set_mac(efx);
693 rtnl_unlock();
694
695 /* Flush all the initialized queues */
696 rxqs_count = 0;
697 for (index = 0; index < count; ++index) {
698 if (test_bit(index, vf->txq_mask)) {
699 EFX_POPULATE_OWORD_2(reg,
700 FRF_AZ_TX_FLUSH_DESCQ_CMD, 1,
701 FRF_AZ_TX_FLUSH_DESCQ,
702 vf_offset + index);
703 efx_writeo(efx, &reg, FR_AZ_TX_FLUSH_DESCQ);
704 }
705 if (test_bit(index, vf->rxq_mask))
706 rxqs[rxqs_count++] = cpu_to_le32(vf_offset + index);
707 }
708
709 atomic_set(&vf->rxq_retry_count, 0);
710 while (timeout && (vf->rxq_count || vf->txq_count)) {
711 rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, (u8 *)rxqs,
712 rxqs_count * sizeof(*rxqs), NULL, 0, NULL);
713 WARN_ON(rc < 0);
714
715 timeout = wait_event_timeout(vf->flush_waitq,
716 efx_vfdi_flush_wake(vf),
717 timeout);
718 rxqs_count = 0;
719 for (index = 0; index < count; ++index) {
720 if (test_and_clear_bit(index, vf->rxq_retry_mask)) {
721 atomic_dec(&vf->rxq_retry_count);
722 rxqs[rxqs_count++] =
723 cpu_to_le32(vf_offset + index);
724 }
725 }
726 }
727
728 rtnl_lock();
729 if (--efx->fc_disable == 0)
730 efx_mcdi_set_mac(efx);
731 rtnl_unlock();
732
733 /* Irrespective of success/failure, fini the queues */
734 EFX_ZERO_OWORD(reg);
735 for (index = 0; index < count; ++index) {
736 efx_writeo_table(efx, &reg, FR_BZ_RX_DESC_PTR_TBL,
737 vf_offset + index);
738 efx_writeo_table(efx, &reg, FR_BZ_TX_DESC_PTR_TBL,
739 vf_offset + index);
740 efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL,
741 vf_offset + index);
742 efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL,
743 vf_offset + index);
744 }
745 efx_sriov_bufs(efx, vf->buftbl_base, NULL,
746 EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx));
747 kfree(rxqs);
748 efx_vfdi_flush_clear(vf);
749
750 vf->evq0_count = 0;
751
752 return timeout ? 0 : VFDI_RC_ETIMEDOUT;
753}
754
755static int efx_vfdi_insert_filter(struct efx_vf *vf)
756{
757 struct efx_nic *efx = vf->efx;
758 struct vfdi_req *req = vf->buf.addr;
759 unsigned vf_rxq = req->u.mac_filter.rxq;
760 unsigned flags;
761
762 if (bad_vf_index(efx, vf_rxq) || vf->rx_filtering) {
763 if (net_ratelimit())
764 netif_err(efx, hw, efx->net_dev,
765 "ERROR: Invalid INSERT_FILTER from %s: rxq %d "
766 "flags 0x%x\n", vf->pci_name, vf_rxq,
767 req->u.mac_filter.flags);
768 return VFDI_RC_EINVAL;
769 }
770
771 flags = 0;
772 if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_RSS)
773 flags |= EFX_FILTER_FLAG_RX_RSS;
774 if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_SCATTER)
775 flags |= EFX_FILTER_FLAG_RX_SCATTER;
776 vf->rx_filter_flags = flags;
777 vf->rx_filter_qid = vf_rxq;
778 vf->rx_filtering = true;
779
780 efx_sriov_reset_rx_filter(vf);
781 queue_work(vfdi_workqueue, &efx->peer_work);
782
783 return VFDI_RC_SUCCESS;
784}
785
786static int efx_vfdi_remove_all_filters(struct efx_vf *vf)
787{
788 vf->rx_filtering = false;
789 efx_sriov_reset_rx_filter(vf);
790 queue_work(vfdi_workqueue, &vf->efx->peer_work);
791
792 return VFDI_RC_SUCCESS;
793}
794
795static int efx_vfdi_set_status_page(struct efx_vf *vf)
796{
797 struct efx_nic *efx = vf->efx;
798 struct vfdi_req *req = vf->buf.addr;
799 unsigned int page_count;
800
801 page_count = req->u.set_status_page.peer_page_count;
802 if (!req->u.set_status_page.dma_addr || EFX_PAGE_SIZE <
803 offsetof(struct vfdi_req,
804 u.set_status_page.peer_page_addr[page_count])) {
805 if (net_ratelimit())
806 netif_err(efx, hw, efx->net_dev,
807 "ERROR: Invalid SET_STATUS_PAGE from %s\n",
808 vf->pci_name);
809 return VFDI_RC_EINVAL;
810 }
811
812 mutex_lock(&efx->local_lock);
813 mutex_lock(&vf->status_lock);
814 vf->status_addr = req->u.set_status_page.dma_addr;
815
816 kfree(vf->peer_page_addrs);
817 vf->peer_page_addrs = NULL;
818 vf->peer_page_count = 0;
819
820 if (page_count) {
821 vf->peer_page_addrs = kcalloc(page_count, sizeof(u64),
822 GFP_KERNEL);
823 if (vf->peer_page_addrs) {
824 memcpy(vf->peer_page_addrs,
825 req->u.set_status_page.peer_page_addr,
826 page_count * sizeof(u64));
827 vf->peer_page_count = page_count;
828 }
829 }
830
831 __efx_sriov_push_vf_status(vf);
832 mutex_unlock(&vf->status_lock);
833 mutex_unlock(&efx->local_lock);
834
835 return VFDI_RC_SUCCESS;
836}
837
838static int efx_vfdi_clear_status_page(struct efx_vf *vf)
839{
840 mutex_lock(&vf->status_lock);
841 vf->status_addr = 0;
842 mutex_unlock(&vf->status_lock);
843
844 return VFDI_RC_SUCCESS;
845}
846
847typedef int (*efx_vfdi_op_t)(struct efx_vf *vf);
848
849static const efx_vfdi_op_t vfdi_ops[VFDI_OP_LIMIT] = {
850 [VFDI_OP_INIT_EVQ] = efx_vfdi_init_evq,
851 [VFDI_OP_INIT_TXQ] = efx_vfdi_init_txq,
852 [VFDI_OP_INIT_RXQ] = efx_vfdi_init_rxq,
853 [VFDI_OP_FINI_ALL_QUEUES] = efx_vfdi_fini_all_queues,
854 [VFDI_OP_INSERT_FILTER] = efx_vfdi_insert_filter,
855 [VFDI_OP_REMOVE_ALL_FILTERS] = efx_vfdi_remove_all_filters,
856 [VFDI_OP_SET_STATUS_PAGE] = efx_vfdi_set_status_page,
857 [VFDI_OP_CLEAR_STATUS_PAGE] = efx_vfdi_clear_status_page,
858};
859
860static void efx_sriov_vfdi(struct work_struct *work)
861{
862 struct efx_vf *vf = container_of(work, struct efx_vf, req);
863 struct efx_nic *efx = vf->efx;
864 struct vfdi_req *req = vf->buf.addr;
865 struct efx_memcpy_req copy[2];
866 int rc;
867
868 /* Copy this page into the local address space */
869 memset(copy, '\0', sizeof(copy));
870 copy[0].from_rid = vf->pci_rid;
871 copy[0].from_addr = vf->req_addr;
872 copy[0].to_rid = efx->pci_dev->devfn;
873 copy[0].to_addr = vf->buf.dma_addr;
874 copy[0].length = EFX_PAGE_SIZE;
875 rc = efx_sriov_memcpy(efx, copy, 1);
876 if (rc) {
877 /* If we can't get the request, we can't reply to the caller */
878 if (net_ratelimit())
879 netif_err(efx, hw, efx->net_dev,
880 "ERROR: Unable to fetch VFDI request from %s rc %d\n",
881 vf->pci_name, -rc);
882 vf->busy = false;
883 return;
884 }
885
886 if (req->op < VFDI_OP_LIMIT && vfdi_ops[req->op] != NULL) {
887 rc = vfdi_ops[req->op](vf);
888 if (rc == 0) {
889 netif_dbg(efx, hw, efx->net_dev,
890 "vfdi request %d from %s ok\n",
891 req->op, vf->pci_name);
892 }
893 } else {
894 netif_dbg(efx, hw, efx->net_dev,
895 "ERROR: Unrecognised request %d from VF %s addr "
896 "%llx\n", req->op, vf->pci_name,
897 (unsigned long long)vf->req_addr);
898 rc = VFDI_RC_EOPNOTSUPP;
899 }
900
901 /* Allow subsequent VF requests */
902 vf->busy = false;
903 smp_wmb();
904
905 /* Respond to the request */
906 req->rc = rc;
907 req->op = VFDI_OP_RESPONSE;
908
909 memset(copy, '\0', sizeof(copy));
910 copy[0].from_buf = &req->rc;
911 copy[0].to_rid = vf->pci_rid;
912 copy[0].to_addr = vf->req_addr + offsetof(struct vfdi_req, rc);
913 copy[0].length = sizeof(req->rc);
914 copy[1].from_buf = &req->op;
915 copy[1].to_rid = vf->pci_rid;
916 copy[1].to_addr = vf->req_addr + offsetof(struct vfdi_req, op);
917 copy[1].length = sizeof(req->op);
918
919 (void) efx_sriov_memcpy(efx, copy, ARRAY_SIZE(copy));
920}
921
922
923
924/* After a reset the event queues inside the guests no longer exist. Fill the
925 * event ring in guest memory with VFDI reset events, then (re-initialise) the
926 * event queue to raise an interrupt. The guest driver will then recover.
927 */
928static void efx_sriov_reset_vf(struct efx_vf *vf, struct efx_buffer *buffer)
929{
930 struct efx_nic *efx = vf->efx;
931 struct efx_memcpy_req copy_req[4];
932 efx_qword_t event;
933 unsigned int pos, count, k, buftbl, abs_evq;
934 efx_oword_t reg;
935 efx_dword_t ptr;
936 int rc;
937
938 BUG_ON(buffer->len != EFX_PAGE_SIZE);
939
940 if (!vf->evq0_count)
941 return;
942 BUG_ON(vf->evq0_count & (vf->evq0_count - 1));
943
944 mutex_lock(&vf->status_lock);
945 EFX_POPULATE_QWORD_3(event,
946 FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV,
947 VFDI_EV_SEQ, vf->msg_seqno,
948 VFDI_EV_TYPE, VFDI_EV_TYPE_RESET);
949 vf->msg_seqno++;
950 for (pos = 0; pos < EFX_PAGE_SIZE; pos += sizeof(event))
951 memcpy(buffer->addr + pos, &event, sizeof(event));
952
953 for (pos = 0; pos < vf->evq0_count; pos += count) {
954 count = min_t(unsigned, vf->evq0_count - pos,
955 ARRAY_SIZE(copy_req));
956 for (k = 0; k < count; k++) {
957 copy_req[k].from_buf = NULL;
958 copy_req[k].from_rid = efx->pci_dev->devfn;
959 copy_req[k].from_addr = buffer->dma_addr;
960 copy_req[k].to_rid = vf->pci_rid;
961 copy_req[k].to_addr = vf->evq0_addrs[pos + k];
962 copy_req[k].length = EFX_PAGE_SIZE;
963 }
964 rc = efx_sriov_memcpy(efx, copy_req, count);
965 if (rc) {
966 if (net_ratelimit())
967 netif_err(efx, hw, efx->net_dev,
968 "ERROR: Unable to notify %s of reset"
969 ": %d\n", vf->pci_name, -rc);
970 break;
971 }
972 }
973
974 /* Reinitialise, arm and trigger evq0 */
975 abs_evq = abs_index(vf, 0);
976 buftbl = EFX_BUFTBL_EVQ_BASE(vf, 0);
977 efx_sriov_bufs(efx, buftbl, vf->evq0_addrs, vf->evq0_count);
978
979 EFX_POPULATE_OWORD_3(reg,
980 FRF_CZ_TIMER_Q_EN, 1,
981 FRF_CZ_HOST_NOTIFY_MODE, 0,
982 FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
983 efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, abs_evq);
984 EFX_POPULATE_OWORD_3(reg,
985 FRF_AZ_EVQ_EN, 1,
986 FRF_AZ_EVQ_SIZE, __ffs(vf->evq0_count),
987 FRF_AZ_EVQ_BUF_BASE_ID, buftbl);
988 efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL, abs_evq);
989 EFX_POPULATE_DWORD_1(ptr, FRF_AZ_EVQ_RPTR, 0);
990 efx_writed_table(efx, &ptr, FR_BZ_EVQ_RPTR, abs_evq);
991
992 mutex_unlock(&vf->status_lock);
993}
994
995static void efx_sriov_reset_vf_work(struct work_struct *work)
996{
997 struct efx_vf *vf = container_of(work, struct efx_vf, req);
998 struct efx_nic *efx = vf->efx;
999 struct efx_buffer buf;
1000
1001 if (!efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE)) {
1002 efx_sriov_reset_vf(vf, &buf);
1003 efx_nic_free_buffer(efx, &buf);
1004 }
1005}
1006
1007static void efx_sriov_handle_no_channel(struct efx_nic *efx)
1008{
1009 netif_err(efx, drv, efx->net_dev,
1010 "ERROR: IOV requires MSI-X and 1 additional interrupt"
1011 "vector. IOV disabled\n");
1012 efx->vf_count = 0;
1013}
1014
1015static int efx_sriov_probe_channel(struct efx_channel *channel)
1016{
1017 channel->efx->vfdi_channel = channel;
1018 return 0;
1019}
1020
1021static void
1022efx_sriov_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
1023{
1024 snprintf(buf, len, "%s-iov", channel->efx->name);
1025}
1026
1027static const struct efx_channel_type efx_sriov_channel_type = {
1028 .handle_no_channel = efx_sriov_handle_no_channel,
1029 .pre_probe = efx_sriov_probe_channel,
1030 .get_name = efx_sriov_get_channel_name,
1031 /* no copy operation; channel must not be reallocated */
1032 .keep_eventq = true,
1033};
1034
1035void efx_sriov_probe(struct efx_nic *efx)
1036{
1037 unsigned count;
1038
1039 if (!max_vfs)
1040 return;
1041
1042 if (efx_sriov_cmd(efx, false, &efx->vi_scale, &count))
1043 return;
1044 if (count > 0 && count > max_vfs)
1045 count = max_vfs;
1046
1047 /* efx_nic_dimension_resources() will reduce vf_count as appopriate */
1048 efx->vf_count = count;
1049
1050 efx->extra_channel_type[EFX_EXTRA_CHANNEL_IOV] = &efx_sriov_channel_type;
1051}
1052
1053/* Copy the list of individual addresses into the vfdi_status.peers
1054 * array and auxillary pages, protected by %local_lock. Drop that lock
1055 * and then broadcast the address list to every VF.
1056 */
1057static void efx_sriov_peer_work(struct work_struct *data)
1058{
1059 struct efx_nic *efx = container_of(data, struct efx_nic, peer_work);
1060 struct vfdi_status *vfdi_status = efx->vfdi_status.addr;
1061 struct efx_vf *vf;
1062 struct efx_local_addr *local_addr;
1063 struct vfdi_endpoint *peer;
1064 struct efx_endpoint_page *epp;
1065 struct list_head pages;
1066 unsigned int peer_space;
1067 unsigned int peer_count;
1068 unsigned int pos;
1069
1070 mutex_lock(&efx->local_lock);
1071
1072 /* Move the existing peer pages off %local_page_list */
1073 INIT_LIST_HEAD(&pages);
1074 list_splice_tail_init(&efx->local_page_list, &pages);
1075
1076 /* Populate the VF addresses starting from entry 1 (entry 0 is
1077 * the PF address)
1078 */
1079 peer = vfdi_status->peers + 1;
1080 peer_space = ARRAY_SIZE(vfdi_status->peers) - 1;
1081 peer_count = 1;
1082 for (pos = 0; pos < efx->vf_count; ++pos) {
1083 vf = efx->vf + pos;
1084
1085 mutex_lock(&vf->status_lock);
1086 if (vf->rx_filtering && !is_zero_ether_addr(vf->addr.mac_addr)) {
1087 *peer++ = vf->addr;
1088 ++peer_count;
1089 --peer_space;
1090 BUG_ON(peer_space == 0);
1091 }
1092 mutex_unlock(&vf->status_lock);
1093 }
1094
1095 /* Fill the remaining addresses */
1096 list_for_each_entry(local_addr, &efx->local_addr_list, link) {
1097 memcpy(peer->mac_addr, local_addr->addr, ETH_ALEN);
1098 peer->tci = 0;
1099 ++peer;
1100 ++peer_count;
1101 if (--peer_space == 0) {
1102 if (list_empty(&pages)) {
1103 epp = kmalloc(sizeof(*epp), GFP_KERNEL);
1104 if (!epp)
1105 break;
1106 epp->ptr = dma_alloc_coherent(
1107 &efx->pci_dev->dev, EFX_PAGE_SIZE,
1108 &epp->addr, GFP_KERNEL);
1109 if (!epp->ptr) {
1110 kfree(epp);
1111 break;
1112 }
1113 } else {
1114 epp = list_first_entry(
1115 &pages, struct efx_endpoint_page, link);
1116 list_del(&epp->link);
1117 }
1118
1119 list_add_tail(&epp->link, &efx->local_page_list);
1120 peer = (struct vfdi_endpoint *)epp->ptr;
1121 peer_space = EFX_PAGE_SIZE / sizeof(struct vfdi_endpoint);
1122 }
1123 }
1124 vfdi_status->peer_count = peer_count;
1125 mutex_unlock(&efx->local_lock);
1126
1127 /* Free any now unused endpoint pages */
1128 while (!list_empty(&pages)) {
1129 epp = list_first_entry(
1130 &pages, struct efx_endpoint_page, link);
1131 list_del(&epp->link);
1132 dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE,
1133 epp->ptr, epp->addr);
1134 kfree(epp);
1135 }
1136
1137 /* Finally, push the pages */
1138 for (pos = 0; pos < efx->vf_count; ++pos) {
1139 vf = efx->vf + pos;
1140
1141 mutex_lock(&vf->status_lock);
1142 if (vf->status_addr)
1143 __efx_sriov_push_vf_status(vf);
1144 mutex_unlock(&vf->status_lock);
1145 }
1146}
1147
1148static void efx_sriov_free_local(struct efx_nic *efx)
1149{
1150 struct efx_local_addr *local_addr;
1151 struct efx_endpoint_page *epp;
1152
1153 while (!list_empty(&efx->local_addr_list)) {
1154 local_addr = list_first_entry(&efx->local_addr_list,
1155 struct efx_local_addr, link);
1156 list_del(&local_addr->link);
1157 kfree(local_addr);
1158 }
1159
1160 while (!list_empty(&efx->local_page_list)) {
1161 epp = list_first_entry(&efx->local_page_list,
1162 struct efx_endpoint_page, link);
1163 list_del(&epp->link);
1164 dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE,
1165 epp->ptr, epp->addr);
1166 kfree(epp);
1167 }
1168}
1169
1170static int efx_sriov_vf_alloc(struct efx_nic *efx)
1171{
1172 unsigned index;
1173 struct efx_vf *vf;
1174
1175 efx->vf = kzalloc(sizeof(struct efx_vf) * efx->vf_count, GFP_KERNEL);
1176 if (!efx->vf)
1177 return -ENOMEM;
1178
1179 for (index = 0; index < efx->vf_count; ++index) {
1180 vf = efx->vf + index;
1181
1182 vf->efx = efx;
1183 vf->index = index;
1184 vf->rx_filter_id = -1;
1185 vf->tx_filter_mode = VF_TX_FILTER_AUTO;
1186 vf->tx_filter_id = -1;
1187 INIT_WORK(&vf->req, efx_sriov_vfdi);
1188 INIT_WORK(&vf->reset_work, efx_sriov_reset_vf_work);
1189 init_waitqueue_head(&vf->flush_waitq);
1190 mutex_init(&vf->status_lock);
1191 mutex_init(&vf->txq_lock);
1192 }
1193
1194 return 0;
1195}
1196
1197static void efx_sriov_vfs_fini(struct efx_nic *efx)
1198{
1199 struct efx_vf *vf;
1200 unsigned int pos;
1201
1202 for (pos = 0; pos < efx->vf_count; ++pos) {
1203 vf = efx->vf + pos;
1204
1205 efx_nic_free_buffer(efx, &vf->buf);
1206 kfree(vf->peer_page_addrs);
1207 vf->peer_page_addrs = NULL;
1208 vf->peer_page_count = 0;
1209
1210 vf->evq0_count = 0;
1211 }
1212}
1213
1214static int efx_sriov_vfs_init(struct efx_nic *efx)
1215{
1216 struct pci_dev *pci_dev = efx->pci_dev;
1217 unsigned index, devfn, sriov, buftbl_base;
1218 u16 offset, stride;
1219 struct efx_vf *vf;
1220 int rc;
1221
1222 sriov = pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV);
1223 if (!sriov)
1224 return -ENOENT;
1225
1226 pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_OFFSET, &offset);
1227 pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_STRIDE, &stride);
1228
1229 buftbl_base = efx->vf_buftbl_base;
1230 devfn = pci_dev->devfn + offset;
1231 for (index = 0; index < efx->vf_count; ++index) {
1232 vf = efx->vf + index;
1233
1234 /* Reserve buffer entries */
1235 vf->buftbl_base = buftbl_base;
1236 buftbl_base += EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx);
1237
1238 vf->pci_rid = devfn;
1239 snprintf(vf->pci_name, sizeof(vf->pci_name),
1240 "%04x:%02x:%02x.%d",
1241 pci_domain_nr(pci_dev->bus), pci_dev->bus->number,
1242 PCI_SLOT(devfn), PCI_FUNC(devfn));
1243
1244 rc = efx_nic_alloc_buffer(efx, &vf->buf, EFX_PAGE_SIZE);
1245 if (rc)
1246 goto fail;
1247
1248 devfn += stride;
1249 }
1250
1251 return 0;
1252
1253fail:
1254 efx_sriov_vfs_fini(efx);
1255 return rc;
1256}
1257
1258int efx_sriov_init(struct efx_nic *efx)
1259{
1260 struct net_device *net_dev = efx->net_dev;
1261 struct vfdi_status *vfdi_status;
1262 int rc;
1263
1264 /* Ensure there's room for vf_channel */
1265 BUILD_BUG_ON(EFX_MAX_CHANNELS + 1 >= EFX_VI_BASE);
1266 /* Ensure that VI_BASE is aligned on VI_SCALE */
1267 BUILD_BUG_ON(EFX_VI_BASE & ((1 << EFX_VI_SCALE_MAX) - 1));
1268
1269 if (efx->vf_count == 0)
1270 return 0;
1271
1272 rc = efx_sriov_cmd(efx, true, NULL, NULL);
1273 if (rc)
1274 goto fail_cmd;
1275
1276 rc = efx_nic_alloc_buffer(efx, &efx->vfdi_status, sizeof(*vfdi_status));
1277 if (rc)
1278 goto fail_status;
1279 vfdi_status = efx->vfdi_status.addr;
1280 memset(vfdi_status, 0, sizeof(*vfdi_status));
1281 vfdi_status->version = 1;
1282 vfdi_status->length = sizeof(*vfdi_status);
1283 vfdi_status->max_tx_channels = vf_max_tx_channels;
1284 vfdi_status->vi_scale = efx->vi_scale;
1285 vfdi_status->rss_rxq_count = efx->rss_spread;
1286 vfdi_status->peer_count = 1 + efx->vf_count;
1287 vfdi_status->timer_quantum_ns = efx->timer_quantum_ns;
1288
1289 rc = efx_sriov_vf_alloc(efx);
1290 if (rc)
1291 goto fail_alloc;
1292
1293 mutex_init(&efx->local_lock);
1294 INIT_WORK(&efx->peer_work, efx_sriov_peer_work);
1295 INIT_LIST_HEAD(&efx->local_addr_list);
1296 INIT_LIST_HEAD(&efx->local_page_list);
1297
1298 rc = efx_sriov_vfs_init(efx);
1299 if (rc)
1300 goto fail_vfs;
1301
1302 rtnl_lock();
1303 memcpy(vfdi_status->peers[0].mac_addr,
1304 net_dev->dev_addr, ETH_ALEN);
1305 efx->vf_init_count = efx->vf_count;
1306 rtnl_unlock();
1307
1308 efx_sriov_usrev(efx, true);
1309
1310 /* At this point we must be ready to accept VFDI requests */
1311
1312 rc = pci_enable_sriov(efx->pci_dev, efx->vf_count);
1313 if (rc)
1314 goto fail_pci;
1315
1316 netif_info(efx, probe, net_dev,
1317 "enabled SR-IOV for %d VFs, %d VI per VF\n",
1318 efx->vf_count, efx_vf_size(efx));
1319 return 0;
1320
1321fail_pci:
1322 efx_sriov_usrev(efx, false);
1323 rtnl_lock();
1324 efx->vf_init_count = 0;
1325 rtnl_unlock();
1326 efx_sriov_vfs_fini(efx);
1327fail_vfs:
1328 cancel_work_sync(&efx->peer_work);
1329 efx_sriov_free_local(efx);
1330 kfree(efx->vf);
1331fail_alloc:
1332 efx_nic_free_buffer(efx, &efx->vfdi_status);
1333fail_status:
1334 efx_sriov_cmd(efx, false, NULL, NULL);
1335fail_cmd:
1336 return rc;
1337}
1338
1339void efx_sriov_fini(struct efx_nic *efx)
1340{
1341 struct efx_vf *vf;
1342 unsigned int pos;
1343
1344 if (efx->vf_init_count == 0)
1345 return;
1346
1347 /* Disable all interfaces to reconfiguration */
1348 BUG_ON(efx->vfdi_channel->enabled);
1349 efx_sriov_usrev(efx, false);
1350 rtnl_lock();
1351 efx->vf_init_count = 0;
1352 rtnl_unlock();
1353
1354 /* Flush all reconfiguration work */
1355 for (pos = 0; pos < efx->vf_count; ++pos) {
1356 vf = efx->vf + pos;
1357 cancel_work_sync(&vf->req);
1358 cancel_work_sync(&vf->reset_work);
1359 }
1360 cancel_work_sync(&efx->peer_work);
1361
1362 pci_disable_sriov(efx->pci_dev);
1363
1364 /* Tear down back-end state */
1365 efx_sriov_vfs_fini(efx);
1366 efx_sriov_free_local(efx);
1367 kfree(efx->vf);
1368 efx_nic_free_buffer(efx, &efx->vfdi_status);
1369 efx_sriov_cmd(efx, false, NULL, NULL);
1370}
1371
1372void efx_sriov_event(struct efx_channel *channel, efx_qword_t *event)
1373{
1374 struct efx_nic *efx = channel->efx;
1375 struct efx_vf *vf;
1376 unsigned qid, seq, type, data;
1377
1378 qid = EFX_QWORD_FIELD(*event, FSF_CZ_USER_QID);
1379
1380 /* USR_EV_REG_VALUE is dword0, so access the VFDI_EV fields directly */
1381 BUILD_BUG_ON(FSF_CZ_USER_EV_REG_VALUE_LBN != 0);
1382 seq = EFX_QWORD_FIELD(*event, VFDI_EV_SEQ);
1383 type = EFX_QWORD_FIELD(*event, VFDI_EV_TYPE);
1384 data = EFX_QWORD_FIELD(*event, VFDI_EV_DATA);
1385
1386 netif_vdbg(efx, hw, efx->net_dev,
1387 "USR_EV event from qid %d seq 0x%x type %d data 0x%x\n",
1388 qid, seq, type, data);
1389
1390 if (map_vi_index(efx, qid, &vf, NULL))
1391 return;
1392 if (vf->busy)
1393 goto error;
1394
1395 if (type == VFDI_EV_TYPE_REQ_WORD0) {
1396 /* Resynchronise */
1397 vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
1398 vf->req_seqno = seq + 1;
1399 vf->req_addr = 0;
1400 } else if (seq != (vf->req_seqno++ & 0xff) || type != vf->req_type)
1401 goto error;
1402
1403 switch (vf->req_type) {
1404 case VFDI_EV_TYPE_REQ_WORD0:
1405 case VFDI_EV_TYPE_REQ_WORD1:
1406 case VFDI_EV_TYPE_REQ_WORD2:
1407 vf->req_addr |= (u64)data << (vf->req_type << 4);
1408 ++vf->req_type;
1409 return;
1410
1411 case VFDI_EV_TYPE_REQ_WORD3:
1412 vf->req_addr |= (u64)data << 48;
1413 vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
1414 vf->busy = true;
1415 queue_work(vfdi_workqueue, &vf->req);
1416 return;
1417 }
1418
1419error:
1420 if (net_ratelimit())
1421 netif_err(efx, hw, efx->net_dev,
1422 "ERROR: Screaming VFDI request from %s\n",
1423 vf->pci_name);
1424 /* Reset the request and sequence number */
1425 vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
1426 vf->req_seqno = seq + 1;
1427}
1428
1429void efx_sriov_flr(struct efx_nic *efx, unsigned vf_i)
1430{
1431 struct efx_vf *vf;
1432
1433 if (vf_i > efx->vf_init_count)
1434 return;
1435 vf = efx->vf + vf_i;
1436 netif_info(efx, hw, efx->net_dev,
1437 "FLR on VF %s\n", vf->pci_name);
1438
1439 vf->status_addr = 0;
1440 efx_vfdi_remove_all_filters(vf);
1441 efx_vfdi_flush_clear(vf);
1442
1443 vf->evq0_count = 0;
1444}
1445
1446void efx_sriov_mac_address_changed(struct efx_nic *efx)
1447{
1448 struct vfdi_status *vfdi_status = efx->vfdi_status.addr;
1449
1450 if (!efx->vf_init_count)
1451 return;
1452 memcpy(vfdi_status->peers[0].mac_addr,
1453 efx->net_dev->dev_addr, ETH_ALEN);
1454 queue_work(vfdi_workqueue, &efx->peer_work);
1455}
1456
1457void efx_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
1458{
1459 struct efx_vf *vf;
1460 unsigned queue, qid;
1461
1462 queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
1463 if (map_vi_index(efx, queue, &vf, &qid))
1464 return;
1465 /* Ignore flush completions triggered by an FLR */
1466 if (!test_bit(qid, vf->txq_mask))
1467 return;
1468
1469 __clear_bit(qid, vf->txq_mask);
1470 --vf->txq_count;
1471
1472 if (efx_vfdi_flush_wake(vf))
1473 wake_up(&vf->flush_waitq);
1474}
1475
1476void efx_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event)
1477{
1478 struct efx_vf *vf;
1479 unsigned ev_failed, queue, qid;
1480
1481 queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID);
1482 ev_failed = EFX_QWORD_FIELD(*event,
1483 FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL);
1484 if (map_vi_index(efx, queue, &vf, &qid))
1485 return;
1486 if (!test_bit(qid, vf->rxq_mask))
1487 return;
1488
1489 if (ev_failed) {
1490 set_bit(qid, vf->rxq_retry_mask);
1491 atomic_inc(&vf->rxq_retry_count);
1492 } else {
1493 __clear_bit(qid, vf->rxq_mask);
1494 --vf->rxq_count;
1495 }
1496 if (efx_vfdi_flush_wake(vf))
1497 wake_up(&vf->flush_waitq);
1498}
1499
1500/* Called from napi. Schedule the reset work item */
1501void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq)
1502{
1503 struct efx_vf *vf;
1504 unsigned int rel;
1505
1506 if (map_vi_index(efx, dmaq, &vf, &rel))
1507 return;
1508
1509 if (net_ratelimit())
1510 netif_err(efx, hw, efx->net_dev,
1511 "VF %d DMA Q %d reports descriptor fetch error.\n",
1512 vf->index, rel);
1513 queue_work(vfdi_workqueue, &vf->reset_work);
1514}
1515
1516/* Reset all VFs */
1517void efx_sriov_reset(struct efx_nic *efx)
1518{
1519 unsigned int vf_i;
1520 struct efx_buffer buf;
1521 struct efx_vf *vf;
1522
1523 ASSERT_RTNL();
1524
1525 if (efx->vf_init_count == 0)
1526 return;
1527
1528 efx_sriov_usrev(efx, true);
1529 (void)efx_sriov_cmd(efx, true, NULL, NULL);
1530
1531 if (efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE))
1532 return;
1533
1534 for (vf_i = 0; vf_i < efx->vf_init_count; ++vf_i) {
1535 vf = efx->vf + vf_i;
1536 efx_sriov_reset_vf(vf, &buf);
1537 }
1538
1539 efx_nic_free_buffer(efx, &buf);
1540}
1541
1542int efx_init_sriov(void)
1543{
1544 /* A single threaded workqueue is sufficient. efx_sriov_vfdi() and
1545 * efx_sriov_peer_work() spend almost all their time sleeping for
1546 * MCDI to complete anyway
1547 */
1548 vfdi_workqueue = create_singlethread_workqueue("sfc_vfdi");
1549 if (!vfdi_workqueue)
1550 return -ENOMEM;
1551
1552 return 0;
1553}
1554
1555void efx_fini_sriov(void)
1556{
1557 destroy_workqueue(vfdi_workqueue);
1558}
1559
1560int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac)
1561{
1562 struct efx_nic *efx = netdev_priv(net_dev);
1563 struct efx_vf *vf;
1564
1565 if (vf_i >= efx->vf_init_count)
1566 return -EINVAL;
1567 vf = efx->vf + vf_i;
1568
1569 mutex_lock(&vf->status_lock);
1570 memcpy(vf->addr.mac_addr, mac, ETH_ALEN);
1571 __efx_sriov_update_vf_addr(vf);
1572 mutex_unlock(&vf->status_lock);
1573
1574 return 0;
1575}
1576
1577int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i,
1578 u16 vlan, u8 qos)
1579{
1580 struct efx_nic *efx = netdev_priv(net_dev);
1581 struct efx_vf *vf;
1582 u16 tci;
1583
1584 if (vf_i >= efx->vf_init_count)
1585 return -EINVAL;
1586 vf = efx->vf + vf_i;
1587
1588 mutex_lock(&vf->status_lock);
1589 tci = (vlan & VLAN_VID_MASK) | ((qos & 0x7) << VLAN_PRIO_SHIFT);
1590 vf->addr.tci = htons(tci);
1591 __efx_sriov_update_vf_addr(vf);
1592 mutex_unlock(&vf->status_lock);
1593
1594 return 0;
1595}
1596
1597int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i,
1598 bool spoofchk)
1599{
1600 struct efx_nic *efx = netdev_priv(net_dev);
1601 struct efx_vf *vf;
1602 int rc;
1603
1604 if (vf_i >= efx->vf_init_count)
1605 return -EINVAL;
1606 vf = efx->vf + vf_i;
1607
1608 mutex_lock(&vf->txq_lock);
1609 if (vf->txq_count == 0) {
1610 vf->tx_filter_mode =
1611 spoofchk ? VF_TX_FILTER_ON : VF_TX_FILTER_OFF;
1612 rc = 0;
1613 } else {
1614 /* This cannot be changed while TX queues are running */
1615 rc = -EBUSY;
1616 }
1617 mutex_unlock(&vf->txq_lock);
1618 return rc;
1619}
1620
1621int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i,
1622 struct ifla_vf_info *ivi)
1623{
1624 struct efx_nic *efx = netdev_priv(net_dev);
1625 struct efx_vf *vf;
1626 u16 tci;
1627
1628 if (vf_i >= efx->vf_init_count)
1629 return -EINVAL;
1630 vf = efx->vf + vf_i;
1631
1632 ivi->vf = vf_i;
1633 memcpy(ivi->mac, vf->addr.mac_addr, ETH_ALEN);
1634 ivi->tx_rate = 0;
1635 tci = ntohs(vf->addr.tci);
1636 ivi->vlan = tci & VLAN_VID_MASK;
1637 ivi->qos = (tci >> VLAN_PRIO_SHIFT) & 0x7;
1638 ivi->spoofchk = vf->tx_filter_mode == VF_TX_FILTER_ON;
1639
1640 return 0;
1641}
1642
diff --git a/drivers/net/ethernet/sfc/vfdi.h b/drivers/net/ethernet/sfc/vfdi.h
new file mode 100644
index 000000000000..656fa70f9993
--- /dev/null
+++ b/drivers/net/ethernet/sfc/vfdi.h
@@ -0,0 +1,254 @@
1/****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2010-2012 Solarflare Communications Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation, incorporated herein by reference.
8 */
9#ifndef _VFDI_H
10#define _VFDI_H
11
12/**
13 * DOC: Virtual Function Driver Interface
14 *
15 * This file contains software structures used to form a two way
16 * communication channel between the VF driver and the PF driver,
17 * named Virtual Function Driver Interface (VFDI).
18 *
19 * For the purposes of VFDI, a page is a memory region with size and
20 * alignment of 4K. All addresses are DMA addresses to be used within
21 * the domain of the relevant VF.
22 *
23 * The only hardware-defined channels for a VF driver to communicate
24 * with the PF driver are the event mailboxes (%FR_CZ_USR_EV
25 * registers). Writing to these registers generates an event with
26 * EV_CODE = EV_CODE_USR_EV, USER_QID set to the index of the mailbox
27 * and USER_EV_REG_VALUE set to the value written. The PF driver may
28 * direct or disable delivery of these events by setting
29 * %FR_CZ_USR_EV_CFG.
30 *
31 * The PF driver can send arbitrary events to arbitrary event queues.
32 * However, for consistency, VFDI events from the PF are defined to
33 * follow the same form and be sent to the first event queue assigned
34 * to the VF while that queue is enabled by the VF driver.
35 *
36 * The general form of the variable bits of VFDI events is:
37 *
38 * 0 16 24 31
39 * | DATA | TYPE | SEQ |
40 *
41 * SEQ is a sequence number which should be incremented by 1 (modulo
42 * 256) for each event. The sequence numbers used in each direction
43 * are independent.
44 *
45 * The VF submits requests of type &struct vfdi_req by sending the
46 * address of the request (ADDR) in a series of 4 events:
47 *
48 * 0 16 24 31
49 * | ADDR[0:15] | VFDI_EV_TYPE_REQ_WORD0 | SEQ |
50 * | ADDR[16:31] | VFDI_EV_TYPE_REQ_WORD1 | SEQ+1 |
51 * | ADDR[32:47] | VFDI_EV_TYPE_REQ_WORD2 | SEQ+2 |
52 * | ADDR[48:63] | VFDI_EV_TYPE_REQ_WORD3 | SEQ+3 |
53 *
54 * The address must be page-aligned. After receiving such a valid
55 * series of events, the PF driver will attempt to read the request
56 * and write a response to the same address. In case of an invalid
57 * sequence of events or a DMA error, there will be no response.
58 *
59 * The VF driver may request that the PF driver writes status
60 * information into its domain asynchronously. After writing the
61 * status, the PF driver will send an event of the form:
62 *
63 * 0 16 24 31
64 * | reserved | VFDI_EV_TYPE_STATUS | SEQ |
65 *
66 * In case the VF must be reset for any reason, the PF driver will
67 * send an event of the form:
68 *
69 * 0 16 24 31
70 * | reserved | VFDI_EV_TYPE_RESET | SEQ |
71 *
72 * It is then the responsibility of the VF driver to request
73 * reinitialisation of its queues.
74 */
75#define VFDI_EV_SEQ_LBN 24
76#define VFDI_EV_SEQ_WIDTH 8
77#define VFDI_EV_TYPE_LBN 16
78#define VFDI_EV_TYPE_WIDTH 8
79#define VFDI_EV_TYPE_REQ_WORD0 0
80#define VFDI_EV_TYPE_REQ_WORD1 1
81#define VFDI_EV_TYPE_REQ_WORD2 2
82#define VFDI_EV_TYPE_REQ_WORD3 3
83#define VFDI_EV_TYPE_STATUS 4
84#define VFDI_EV_TYPE_RESET 5
85#define VFDI_EV_DATA_LBN 0
86#define VFDI_EV_DATA_WIDTH 16
87
88struct vfdi_endpoint {
89 u8 mac_addr[ETH_ALEN];
90 __be16 tci;
91};
92
93/**
94 * enum vfdi_op - VFDI operation enumeration
95 * @VFDI_OP_RESPONSE: Indicates a response to the request.
96 * @VFDI_OP_INIT_EVQ: Initialize SRAM entries and initialize an EVQ.
97 * @VFDI_OP_INIT_RXQ: Initialize SRAM entries and initialize an RXQ.
98 * @VFDI_OP_INIT_TXQ: Initialize SRAM entries and initialize a TXQ.
99 * @VFDI_OP_FINI_ALL_QUEUES: Flush all queues, finalize all queues, then
100 * finalize the SRAM entries.
101 * @VFDI_OP_INSERT_FILTER: Insert a MAC filter targetting the given RXQ.
102 * @VFDI_OP_REMOVE_ALL_FILTERS: Remove all filters.
103 * @VFDI_OP_SET_STATUS_PAGE: Set the DMA page(s) used for status updates
104 * from PF and write the initial status.
105 * @VFDI_OP_CLEAR_STATUS_PAGE: Clear the DMA page(s) used for status
106 * updates from PF.
107 */
108enum vfdi_op {
109 VFDI_OP_RESPONSE = 0,
110 VFDI_OP_INIT_EVQ = 1,
111 VFDI_OP_INIT_RXQ = 2,
112 VFDI_OP_INIT_TXQ = 3,
113 VFDI_OP_FINI_ALL_QUEUES = 4,
114 VFDI_OP_INSERT_FILTER = 5,
115 VFDI_OP_REMOVE_ALL_FILTERS = 6,
116 VFDI_OP_SET_STATUS_PAGE = 7,
117 VFDI_OP_CLEAR_STATUS_PAGE = 8,
118 VFDI_OP_LIMIT,
119};
120
121/* Response codes for VFDI operations. Other values may be used in future. */
122#define VFDI_RC_SUCCESS 0
123#define VFDI_RC_ENOMEM (-12)
124#define VFDI_RC_EINVAL (-22)
125#define VFDI_RC_EOPNOTSUPP (-95)
126#define VFDI_RC_ETIMEDOUT (-110)
127
128/**
129 * struct vfdi_req - Request from VF driver to PF driver
130 * @op: Operation code or response indicator, taken from &enum vfdi_op.
131 * @rc: Response code. Set to 0 on success or a negative error code on failure.
132 * @u.init_evq.index: Index of event queue to create.
133 * @u.init_evq.buf_count: Number of 4k buffers backing event queue.
134 * @u.init_evq.addr: Array of length %u.init_evq.buf_count containing DMA
135 * address of each page backing the event queue.
136 * @u.init_rxq.index: Index of receive queue to create.
137 * @u.init_rxq.buf_count: Number of 4k buffers backing receive queue.
138 * @u.init_rxq.evq: Instance of event queue to target receive events at.
139 * @u.init_rxq.label: Label used in receive events.
140 * @u.init_rxq.flags: Unused.
141 * @u.init_rxq.addr: Array of length %u.init_rxq.buf_count containing DMA
142 * address of each page backing the receive queue.
143 * @u.init_txq.index: Index of transmit queue to create.
144 * @u.init_txq.buf_count: Number of 4k buffers backing transmit queue.
145 * @u.init_txq.evq: Instance of event queue to target transmit completion
146 * events at.
147 * @u.init_txq.label: Label used in transmit completion events.
148 * @u.init_txq.flags: Checksum offload flags.
149 * @u.init_txq.addr: Array of length %u.init_txq.buf_count containing DMA
150 * address of each page backing the transmit queue.
151 * @u.mac_filter.rxq: Insert MAC filter at VF local address/VLAN targetting
152 * all traffic at this receive queue.
153 * @u.mac_filter.flags: MAC filter flags.
154 * @u.set_status_page.dma_addr: Base address for the &struct vfdi_status.
155 * This address must be such that the structure fits within a page.
156 * @u.set_status_page.peer_page_count: Number of additional pages the VF
157 * has provided into which peer addresses may be DMAd.
158 * @u.set_status_page.peer_page_addr: Array of DMA addresses of pages.
159 * If the number of peers exceeds 256, then the VF must provide
160 * additional pages in this array. The PF will then DMA up to
161 * 512 vfdi_endpoint structures into each page. These addresses
162 * must be page-aligned.
163 */
164struct vfdi_req {
165 u32 op;
166 u32 reserved1;
167 s32 rc;
168 u32 reserved2;
169 union {
170 struct {
171 u32 index;
172 u32 buf_count;
173 u64 addr[];
174 } init_evq;
175 struct {
176 u32 index;
177 u32 buf_count;
178 u32 evq;
179 u32 label;
180 u32 flags;
181#define VFDI_RXQ_FLAG_SCATTER_EN 1
182 u32 reserved;
183 u64 addr[];
184 } init_rxq;
185 struct {
186 u32 index;
187 u32 buf_count;
188 u32 evq;
189 u32 label;
190 u32 flags;
191#define VFDI_TXQ_FLAG_IP_CSUM_DIS 1
192#define VFDI_TXQ_FLAG_TCPUDP_CSUM_DIS 2
193 u32 reserved;
194 u64 addr[];
195 } init_txq;
196 struct {
197 u32 rxq;
198 u32 flags;
199#define VFDI_MAC_FILTER_FLAG_RSS 1
200#define VFDI_MAC_FILTER_FLAG_SCATTER 2
201 } mac_filter;
202 struct {
203 u64 dma_addr;
204 u64 peer_page_count;
205 u64 peer_page_addr[];
206 } set_status_page;
207 } u;
208};
209
210/**
211 * struct vfdi_status - Status provided by PF driver to VF driver
212 * @generation_start: A generation count DMA'd to VF *before* the
213 * rest of the structure.
214 * @generation_end: A generation count DMA'd to VF *after* the
215 * rest of the structure.
216 * @version: Version of this structure; currently set to 1. Later
217 * versions must either be layout-compatible or only be sent to VFs
218 * that specifically request them.
219 * @length: Total length of this structure including embedded tables
220 * @vi_scale: log2 the number of VIs available on this VF. This quantity
221 * is used by the hardware for register decoding.
222 * @max_tx_channels: The maximum number of transmit queues the VF can use.
223 * @rss_rxq_count: The number of receive queues present in the shared RSS
224 * indirection table.
225 * @peer_count: Total number of peers in the complete peer list. If larger
226 * than ARRAY_SIZE(%peers), then the VF must provide sufficient
227 * additional pages each of which is filled with vfdi_endpoint structures.
228 * @local: The MAC address and outer VLAN tag of *this* VF
229 * @peers: Table of peer addresses. The @tci fields in these structures
230 * are currently unused and must be ignored. Additional peers are
231 * written into any additional pages provided by the VF.
232 * @timer_quantum_ns: Timer quantum (nominal period between timer ticks)
233 * for interrupt moderation timers, in nanoseconds. This member is only
234 * present if @length is sufficiently large.
235 */
236struct vfdi_status {
237 u32 generation_start;
238 u32 generation_end;
239 u32 version;
240 u32 length;
241 u8 vi_scale;
242 u8 max_tx_channels;
243 u8 rss_rxq_count;
244 u8 reserved1;
245 u16 peer_count;
246 u16 reserved2;
247 struct vfdi_endpoint local;
248 struct vfdi_endpoint peers[256];
249
250 /* Members below here extend version 1 of this structure */
251 u32 timer_quantum_ns;
252};
253
254#endif