aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/sfc/Kconfig8
-rw-r--r--drivers/net/ethernet/sfc/Makefile1
-rw-r--r--drivers/net/ethernet/sfc/efx.c70
-rw-r--r--drivers/net/ethernet/sfc/ethtool.c3
-rw-r--r--drivers/net/ethernet/sfc/mcdi.c34
-rw-r--r--drivers/net/ethernet/sfc/mcdi.h2
-rw-r--r--drivers/net/ethernet/sfc/mcdi_mac.c2
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h32
-rw-r--r--drivers/net/ethernet/sfc/nic.c79
-rw-r--r--drivers/net/ethernet/sfc/nic.h89
-rw-r--r--drivers/net/ethernet/sfc/siena.c2
-rw-r--r--drivers/net/ethernet/sfc/siena_sriov.c1642
-rw-r--r--drivers/net/ethernet/sfc/vfdi.h254
13 files changed, 2192 insertions, 26 deletions
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 8d423544a7e6..fb3cbc27063c 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -26,3 +26,11 @@ config SFC_MCDI_MON
26 ----help--- 26 ----help---
27 This exposes the on-board firmware-managed sensors as a 27 This exposes the on-board firmware-managed sensors as a
28 hardware monitor device. 28 hardware monitor device.
29config SFC_SRIOV
30 bool "Solarflare SFC9000-family SR-IOV support"
31 depends on SFC && PCI_IOV
32 default y
33 ---help---
34 This enables support for the SFC9000 I/O Virtualization
35 features, allowing accelerated network performance in
36 virtualized environments.
diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile
index 3fa2e25ccc45..ea1f8db57318 100644
--- a/drivers/net/ethernet/sfc/Makefile
+++ b/drivers/net/ethernet/sfc/Makefile
@@ -4,5 +4,6 @@ sfc-y += efx.o nic.o falcon.o siena.o tx.o rx.o filter.o \
4 tenxpress.o txc43128_phy.o falcon_boards.o \ 4 tenxpress.o txc43128_phy.o falcon_boards.o \
5 mcdi.o mcdi_phy.o mcdi_mon.o 5 mcdi.o mcdi_phy.o mcdi_mon.o
6sfc-$(CONFIG_SFC_MTD) += mtd.o 6sfc-$(CONFIG_SFC_MTD) += mtd.o
7sfc-$(CONFIG_SFC_SRIOV) += siena_sriov.o
7 8
8obj-$(CONFIG_SFC) += sfc.o 9obj-$(CONFIG_SFC) += sfc.o
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index c9c306aef2d9..ac571cf14485 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1175,25 +1175,40 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
1175 unsigned int count; 1175 unsigned int count;
1176 int cpu; 1176 int cpu;
1177 1177
1178 if (rss_cpus) 1178 if (rss_cpus) {
1179 return rss_cpus; 1179 count = rss_cpus;
1180 } else {
1181 if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
1182 netif_warn(efx, probe, efx->net_dev,
1183 "RSS disabled due to allocation failure\n");
1184 return 1;
1185 }
1180 1186
1181 if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { 1187 count = 0;
1182 netif_warn(efx, probe, efx->net_dev, 1188 for_each_online_cpu(cpu) {
1183 "RSS disabled due to allocation failure\n"); 1189 if (!cpumask_test_cpu(cpu, thread_mask)) {
1184 return 1; 1190 ++count;
1191 cpumask_or(thread_mask, thread_mask,
1192 topology_thread_cpumask(cpu));
1193 }
1194 }
1195
1196 free_cpumask_var(thread_mask);
1185 } 1197 }
1186 1198
1187 count = 0; 1199 /* If RSS is requested for the PF *and* VFs then we can't write RSS
1188 for_each_online_cpu(cpu) { 1200 * table entries that are inaccessible to VFs
1189 if (!cpumask_test_cpu(cpu, thread_mask)) { 1201 */
1190 ++count; 1202 if (efx_sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
1191 cpumask_or(thread_mask, thread_mask, 1203 count > efx_vf_size(efx)) {
1192 topology_thread_cpumask(cpu)); 1204 netif_warn(efx, probe, efx->net_dev,
1193 } 1205 "Reducing number of RSS channels from %u to %u for "
1206 "VF support. Increase vf-msix-limit to use more "
1207 "channels on the PF.\n",
1208 count, efx_vf_size(efx));
1209 count = efx_vf_size(efx);
1194 } 1210 }
1195 1211
1196 free_cpumask_var(thread_mask);
1197 return count; 1212 return count;
1198} 1213}
1199 1214
@@ -1327,6 +1342,10 @@ static int efx_probe_interrupts(struct efx_nic *efx)
1327 } 1342 }
1328 } 1343 }
1329 1344
1345 /* RSS might be usable on VFs even if it is disabled on the PF */
1346 efx->rss_spread = (efx->n_rx_channels > 1 ?
1347 efx->n_rx_channels : efx_vf_size(efx));
1348
1330 return 0; 1349 return 0;
1331} 1350}
1332 1351
@@ -1426,7 +1445,7 @@ static int efx_probe_nic(struct efx_nic *efx)
1426 get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key)); 1445 get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key));
1427 for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++) 1446 for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
1428 efx->rx_indir_table[i] = 1447 efx->rx_indir_table[i] =
1429 ethtool_rxfh_indir_default(i, efx->n_rx_channels); 1448 ethtool_rxfh_indir_default(i, efx->rss_spread);
1430 1449
1431 efx_set_channels(efx); 1450 efx_set_channels(efx);
1432 netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); 1451 netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
@@ -1915,6 +1934,7 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data)
1915 } 1934 }
1916 1935
1917 memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len); 1936 memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len);
1937 efx_sriov_mac_address_changed(efx);
1918 1938
1919 /* Reconfigure the MAC */ 1939 /* Reconfigure the MAC */
1920 mutex_lock(&efx->mac_lock); 1940 mutex_lock(&efx->mac_lock);
@@ -1981,6 +2001,12 @@ static const struct net_device_ops efx_netdev_ops = {
1981 .ndo_set_mac_address = efx_set_mac_address, 2001 .ndo_set_mac_address = efx_set_mac_address,
1982 .ndo_set_rx_mode = efx_set_rx_mode, 2002 .ndo_set_rx_mode = efx_set_rx_mode,
1983 .ndo_set_features = efx_set_features, 2003 .ndo_set_features = efx_set_features,
2004#ifdef CONFIG_SFC_SRIOV
2005 .ndo_set_vf_mac = efx_sriov_set_vf_mac,
2006 .ndo_set_vf_vlan = efx_sriov_set_vf_vlan,
2007 .ndo_set_vf_spoofchk = efx_sriov_set_vf_spoofchk,
2008 .ndo_get_vf_config = efx_sriov_get_vf_config,
2009#endif
1984#ifdef CONFIG_NET_POLL_CONTROLLER 2010#ifdef CONFIG_NET_POLL_CONTROLLER
1985 .ndo_poll_controller = efx_netpoll, 2011 .ndo_poll_controller = efx_netpoll,
1986#endif 2012#endif
@@ -2150,6 +2176,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
2150 2176
2151 efx_start_interrupts(efx, false); 2177 efx_start_interrupts(efx, false);
2152 efx_restore_filters(efx); 2178 efx_restore_filters(efx);
2179 efx_sriov_reset(efx);
2153 2180
2154 mutex_unlock(&efx->mac_lock); 2181 mutex_unlock(&efx->mac_lock);
2155 2182
@@ -2440,6 +2467,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
2440 rtnl_unlock(); 2467 rtnl_unlock();
2441 2468
2442 efx_stop_interrupts(efx, false); 2469 efx_stop_interrupts(efx, false);
2470 efx_sriov_fini(efx);
2443 efx_unregister_netdev(efx); 2471 efx_unregister_netdev(efx);
2444 2472
2445 efx_mtd_remove(efx); 2473 efx_mtd_remove(efx);
@@ -2581,6 +2609,11 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
2581 if (rc) 2609 if (rc)
2582 goto fail4; 2610 goto fail4;
2583 2611
2612 rc = efx_sriov_init(efx);
2613 if (rc)
2614 netif_err(efx, probe, efx->net_dev,
2615 "SR-IOV can't be enabled rc %d\n", rc);
2616
2584 netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n"); 2617 netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
2585 2618
2586 /* Try to create MTDs, but allow this to fail */ 2619 /* Try to create MTDs, but allow this to fail */
@@ -2732,6 +2765,10 @@ static int __init efx_init_module(void)
2732 if (rc) 2765 if (rc)
2733 goto err_notifier; 2766 goto err_notifier;
2734 2767
2768 rc = efx_init_sriov();
2769 if (rc)
2770 goto err_sriov;
2771
2735 reset_workqueue = create_singlethread_workqueue("sfc_reset"); 2772 reset_workqueue = create_singlethread_workqueue("sfc_reset");
2736 if (!reset_workqueue) { 2773 if (!reset_workqueue) {
2737 rc = -ENOMEM; 2774 rc = -ENOMEM;
@@ -2747,6 +2784,8 @@ static int __init efx_init_module(void)
2747 err_pci: 2784 err_pci:
2748 destroy_workqueue(reset_workqueue); 2785 destroy_workqueue(reset_workqueue);
2749 err_reset: 2786 err_reset:
2787 efx_fini_sriov();
2788 err_sriov:
2750 unregister_netdevice_notifier(&efx_netdev_notifier); 2789 unregister_netdevice_notifier(&efx_netdev_notifier);
2751 err_notifier: 2790 err_notifier:
2752 return rc; 2791 return rc;
@@ -2758,6 +2797,7 @@ static void __exit efx_exit_module(void)
2758 2797
2759 pci_unregister_driver(&efx_pci_driver); 2798 pci_unregister_driver(&efx_pci_driver);
2760 destroy_workqueue(reset_workqueue); 2799 destroy_workqueue(reset_workqueue);
2800 efx_fini_sriov();
2761 unregister_netdevice_notifier(&efx_netdev_notifier); 2801 unregister_netdevice_notifier(&efx_netdev_notifier);
2762 2802
2763} 2803}
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 83191151b650..f22f45f515a8 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -1085,7 +1085,8 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
1085{ 1085{
1086 struct efx_nic *efx = netdev_priv(net_dev); 1086 struct efx_nic *efx = netdev_priv(net_dev);
1087 1087
1088 return (efx_nic_rev(efx) < EFX_REV_FALCON_B0 ? 1088 return ((efx_nic_rev(efx) < EFX_REV_FALCON_B0 ||
1089 efx->n_rx_channels == 1) ?
1089 0 : ARRAY_SIZE(efx->rx_indir_table)); 1090 0 : ARRAY_SIZE(efx->rx_indir_table));
1090} 1091}
1091 1092
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 619f63a66ce7..17b6463e459c 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -560,6 +560,9 @@ void efx_mcdi_process_event(struct efx_channel *channel,
560 case MCDI_EVENT_CODE_MAC_STATS_DMA: 560 case MCDI_EVENT_CODE_MAC_STATS_DMA:
561 /* MAC stats are gather lazily. We can ignore this. */ 561 /* MAC stats are gather lazily. We can ignore this. */
562 break; 562 break;
563 case MCDI_EVENT_CODE_FLR:
564 efx_sriov_flr(efx, MCDI_EVENT_FIELD(*event, FLR_VF));
565 break;
563 566
564 default: 567 default:
565 netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n", 568 netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n",
@@ -1154,6 +1157,37 @@ fail:
1154 return rc; 1157 return rc;
1155} 1158}
1156 1159
1160int efx_mcdi_flush_rxqs(struct efx_nic *efx)
1161{
1162 struct efx_channel *channel;
1163 struct efx_rx_queue *rx_queue;
1164 __le32 *qid;
1165 int rc, count;
1166
1167 qid = kmalloc(EFX_MAX_CHANNELS * sizeof(*qid), GFP_KERNEL);
1168 if (qid == NULL)
1169 return -ENOMEM;
1170
1171 count = 0;
1172 efx_for_each_channel(channel, efx) {
1173 efx_for_each_channel_rx_queue(rx_queue, channel) {
1174 if (rx_queue->flush_pending) {
1175 rx_queue->flush_pending = false;
1176 atomic_dec(&efx->rxq_flush_pending);
1177 qid[count++] = cpu_to_le32(
1178 efx_rx_queue_index(rx_queue));
1179 }
1180 }
1181 }
1182
1183 rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, (u8 *)qid,
1184 count * sizeof(*qid), NULL, 0, NULL);
1185 WARN_ON(rc > 0);
1186
1187 kfree(qid);
1188
1189 return rc;
1190}
1157 1191
1158int efx_mcdi_wol_filter_reset(struct efx_nic *efx) 1192int efx_mcdi_wol_filter_reset(struct efx_nic *efx)
1159{ 1193{
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index fbaa6efcd744..0bdf3e331832 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -146,6 +146,8 @@ extern int efx_mcdi_wol_filter_set_magic(struct efx_nic *efx,
146extern int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out); 146extern int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out);
147extern int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id); 147extern int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id);
148extern int efx_mcdi_wol_filter_reset(struct efx_nic *efx); 148extern int efx_mcdi_wol_filter_reset(struct efx_nic *efx);
149extern int efx_mcdi_flush_rxqs(struct efx_nic *efx);
150extern int efx_mcdi_set_mac(struct efx_nic *efx);
149extern int efx_mcdi_mac_stats(struct efx_nic *efx, dma_addr_t dma_addr, 151extern int efx_mcdi_mac_stats(struct efx_nic *efx, dma_addr_t dma_addr,
150 u32 dma_len, int enable, int clear); 152 u32 dma_len, int enable, int clear);
151extern int efx_mcdi_mac_reconfigure(struct efx_nic *efx); 153extern int efx_mcdi_mac_reconfigure(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/mcdi_mac.c b/drivers/net/ethernet/sfc/mcdi_mac.c
index 98afe1c1165d..1003f309cba7 100644
--- a/drivers/net/ethernet/sfc/mcdi_mac.c
+++ b/drivers/net/ethernet/sfc/mcdi_mac.c
@@ -12,7 +12,7 @@
12#include "mcdi.h" 12#include "mcdi.h"
13#include "mcdi_pcol.h" 13#include "mcdi_pcol.h"
14 14
15static int efx_mcdi_set_mac(struct efx_nic *efx) 15int efx_mcdi_set_mac(struct efx_nic *efx)
16{ 16{
17 u32 reject, fcntl; 17 u32 reject, fcntl;
18 u8 cmdbytes[MC_CMD_SET_MAC_IN_LEN]; 18 u8 cmdbytes[MC_CMD_SET_MAC_IN_LEN];
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 7870cefcb203..3fbec458c323 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -24,6 +24,7 @@
24#include <linux/device.h> 24#include <linux/device.h>
25#include <linux/highmem.h> 25#include <linux/highmem.h>
26#include <linux/workqueue.h> 26#include <linux/workqueue.h>
27#include <linux/mutex.h>
27#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
28#include <linux/i2c.h> 29#include <linux/i2c.h>
29 30
@@ -54,7 +55,8 @@
54 55
55#define EFX_MAX_CHANNELS 32U 56#define EFX_MAX_CHANNELS 32U
56#define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS 57#define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS
57#define EFX_MAX_EXTRA_CHANNELS 0U 58#define EFX_EXTRA_CHANNEL_IOV 0
59#define EFX_MAX_EXTRA_CHANNELS 1U
58 60
59/* Checksum generation is a per-queue option in hardware, so each 61/* Checksum generation is a per-queue option in hardware, so each
60 * queue visible to the networking core is backed by two hardware TX 62 * queue visible to the networking core is backed by two hardware TX
@@ -629,6 +631,8 @@ union efx_multicast_hash {
629}; 631};
630 632
631struct efx_filter_state; 633struct efx_filter_state;
634struct efx_vf;
635struct vfdi_status;
632 636
633/** 637/**
634 * struct efx_nic - an Efx NIC 638 * struct efx_nic - an Efx NIC
@@ -712,6 +716,17 @@ struct efx_filter_state;
712 * completed (either success or failure). Not used when MCDI is used to 716 * completed (either success or failure). Not used when MCDI is used to
713 * flush receive queues. 717 * flush receive queues.
714 * @flush_wq: wait queue used by efx_nic_flush_queues() to wait for flush completions. 718 * @flush_wq: wait queue used by efx_nic_flush_queues() to wait for flush completions.
719 * @vf: Array of &struct efx_vf objects.
720 * @vf_count: Number of VFs intended to be enabled.
721 * @vf_init_count: Number of VFs that have been fully initialised.
722 * @vi_scale: log2 number of vnics per VF.
723 * @vf_buftbl_base: The zeroth buffer table index used to back VF queues.
724 * @vfdi_status: Common VFDI status page to be dmad to VF address space.
725 * @local_addr_list: List of local addresses. Protected by %local_lock.
726 * @local_page_list: List of DMA addressable pages used to broadcast
727 * %local_addr_list. Protected by %local_lock.
728 * @local_lock: Mutex protecting %local_addr_list and %local_page_list.
729 * @peer_work: Work item to broadcast peer addresses to VMs.
715 * @monitor_work: Hardware monitor workitem 730 * @monitor_work: Hardware monitor workitem
716 * @biu_lock: BIU (bus interface unit) lock 731 * @biu_lock: BIU (bus interface unit) lock
717 * @last_irq_cpu: Last CPU to handle a possible test interrupt. This 732 * @last_irq_cpu: Last CPU to handle a possible test interrupt. This
@@ -762,6 +777,7 @@ struct efx_nic {
762 unsigned next_buffer_table; 777 unsigned next_buffer_table;
763 unsigned n_channels; 778 unsigned n_channels;
764 unsigned n_rx_channels; 779 unsigned n_rx_channels;
780 unsigned rss_spread;
765 unsigned tx_channel_offset; 781 unsigned tx_channel_offset;
766 unsigned n_tx_channels; 782 unsigned n_tx_channels;
767 unsigned int rx_buffer_len; 783 unsigned int rx_buffer_len;
@@ -820,6 +836,20 @@ struct efx_nic {
820 atomic_t rxq_flush_outstanding; 836 atomic_t rxq_flush_outstanding;
821 wait_queue_head_t flush_wq; 837 wait_queue_head_t flush_wq;
822 838
839#ifdef CONFIG_SFC_SRIOV
840 struct efx_channel *vfdi_channel;
841 struct efx_vf *vf;
842 unsigned vf_count;
843 unsigned vf_init_count;
844 unsigned vi_scale;
845 unsigned vf_buftbl_base;
846 struct efx_buffer vfdi_status;
847 struct list_head local_addr_list;
848 struct list_head local_page_list;
849 struct mutex local_lock;
850 struct work_struct peer_work;
851#endif
852
823 /* The following fields may be written more often */ 853 /* The following fields may be written more often */
824 854
825 struct delayed_work monitor_work ____cacheline_aligned_in_smp; 855 struct delayed_work monitor_work ____cacheline_aligned_in_smp;
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index 747cf9439164..2bf4283f05fe 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -264,6 +264,10 @@ static int efx_alloc_special_buffer(struct efx_nic *efx,
264 /* Select new buffer ID */ 264 /* Select new buffer ID */
265 buffer->index = efx->next_buffer_table; 265 buffer->index = efx->next_buffer_table;
266 efx->next_buffer_table += buffer->entries; 266 efx->next_buffer_table += buffer->entries;
267#ifdef CONFIG_SFC_SRIOV
268 BUG_ON(efx_sriov_enabled(efx) &&
269 efx->vf_buftbl_base < efx->next_buffer_table);
270#endif
267 271
268 netif_dbg(efx, probe, efx->net_dev, 272 netif_dbg(efx, probe, efx->net_dev,
269 "allocating special buffers %d-%d at %llx+%x " 273 "allocating special buffers %d-%d at %llx+%x "
@@ -693,6 +697,16 @@ int efx_nic_flush_queues(struct efx_nic *efx)
693 } 697 }
694 698
695 while (timeout && atomic_read(&efx->drain_pending) > 0) { 699 while (timeout && atomic_read(&efx->drain_pending) > 0) {
700 /* If SRIOV is enabled, then offload receive queue flushing to
701 * the firmware (though we will still have to poll for
702 * completion). If that fails, fall back to the old scheme.
703 */
704 if (efx_sriov_enabled(efx)) {
705 rc = efx_mcdi_flush_rxqs(efx);
706 if (!rc)
707 goto wait;
708 }
709
696 /* The hardware supports four concurrent rx flushes, each of 710 /* The hardware supports four concurrent rx flushes, each of
697 * which may need to be retried if there is an outstanding 711 * which may need to be retried if there is an outstanding
698 * descriptor fetch 712 * descriptor fetch
@@ -712,6 +726,7 @@ int efx_nic_flush_queues(struct efx_nic *efx)
712 } 726 }
713 } 727 }
714 728
729 wait:
715 timeout = wait_event_timeout(efx->flush_wq, efx_flush_wake(efx), 730 timeout = wait_event_timeout(efx->flush_wq, efx_flush_wake(efx),
716 timeout); 731 timeout);
717 } 732 }
@@ -1102,11 +1117,13 @@ efx_handle_driver_event(struct efx_channel *channel, efx_qword_t *event)
1102 netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n", 1117 netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n",
1103 channel->channel, ev_sub_data); 1118 channel->channel, ev_sub_data);
1104 efx_handle_tx_flush_done(efx, event); 1119 efx_handle_tx_flush_done(efx, event);
1120 efx_sriov_tx_flush_done(efx, event);
1105 break; 1121 break;
1106 case FSE_AZ_RX_DESCQ_FLS_DONE_EV: 1122 case FSE_AZ_RX_DESCQ_FLS_DONE_EV:
1107 netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n", 1123 netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n",
1108 channel->channel, ev_sub_data); 1124 channel->channel, ev_sub_data);
1109 efx_handle_rx_flush_done(efx, event); 1125 efx_handle_rx_flush_done(efx, event);
1126 efx_sriov_rx_flush_done(efx, event);
1110 break; 1127 break;
1111 case FSE_AZ_EVQ_INIT_DONE_EV: 1128 case FSE_AZ_EVQ_INIT_DONE_EV:
1112 netif_dbg(efx, hw, efx->net_dev, 1129 netif_dbg(efx, hw, efx->net_dev,
@@ -1138,16 +1155,24 @@ efx_handle_driver_event(struct efx_channel *channel, efx_qword_t *event)
1138 RESET_TYPE_DISABLE); 1155 RESET_TYPE_DISABLE);
1139 break; 1156 break;
1140 case FSE_BZ_RX_DSC_ERROR_EV: 1157 case FSE_BZ_RX_DSC_ERROR_EV:
1141 netif_err(efx, rx_err, efx->net_dev, 1158 if (ev_sub_data < EFX_VI_BASE) {
1142 "RX DMA Q %d reports descriptor fetch error." 1159 netif_err(efx, rx_err, efx->net_dev,
1143 " RX Q %d is disabled.\n", ev_sub_data, ev_sub_data); 1160 "RX DMA Q %d reports descriptor fetch error."
1144 efx_schedule_reset(efx, RESET_TYPE_RX_DESC_FETCH); 1161 " RX Q %d is disabled.\n", ev_sub_data,
1162 ev_sub_data);
1163 efx_schedule_reset(efx, RESET_TYPE_RX_DESC_FETCH);
1164 } else
1165 efx_sriov_desc_fetch_err(efx, ev_sub_data);
1145 break; 1166 break;
1146 case FSE_BZ_TX_DSC_ERROR_EV: 1167 case FSE_BZ_TX_DSC_ERROR_EV:
1147 netif_err(efx, tx_err, efx->net_dev, 1168 if (ev_sub_data < EFX_VI_BASE) {
1148 "TX DMA Q %d reports descriptor fetch error." 1169 netif_err(efx, tx_err, efx->net_dev,
1149 " TX Q %d is disabled.\n", ev_sub_data, ev_sub_data); 1170 "TX DMA Q %d reports descriptor fetch error."
1150 efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH); 1171 " TX Q %d is disabled.\n", ev_sub_data,
1172 ev_sub_data);
1173 efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH);
1174 } else
1175 efx_sriov_desc_fetch_err(efx, ev_sub_data);
1151 break; 1176 break;
1152 default: 1177 default:
1153 netif_vdbg(efx, hw, efx->net_dev, 1178 netif_vdbg(efx, hw, efx->net_dev,
@@ -1207,6 +1232,9 @@ int efx_nic_process_eventq(struct efx_channel *channel, int budget)
1207 case FSE_AZ_EV_CODE_DRIVER_EV: 1232 case FSE_AZ_EV_CODE_DRIVER_EV:
1208 efx_handle_driver_event(channel, &event); 1233 efx_handle_driver_event(channel, &event);
1209 break; 1234 break;
1235 case FSE_CZ_EV_CODE_USER_EV:
1236 efx_sriov_event(channel, &event);
1237 break;
1210 case FSE_CZ_EV_CODE_MCDI_EV: 1238 case FSE_CZ_EV_CODE_MCDI_EV:
1211 efx_mcdi_process_event(channel, &event); 1239 efx_mcdi_process_event(channel, &event);
1212 break; 1240 break;
@@ -1609,6 +1637,15 @@ void efx_nic_fini_interrupt(struct efx_nic *efx)
1609 free_irq(efx->legacy_irq, efx); 1637 free_irq(efx->legacy_irq, efx);
1610} 1638}
1611 1639
1640/* Looks at available SRAM resources and works out how many queues we
1641 * can support, and where things like descriptor caches should live.
1642 *
1643 * SRAM is split up as follows:
1644 * 0 buftbl entries for channels
1645 * efx->vf_buftbl_base buftbl entries for SR-IOV
1646 * efx->rx_dc_base RX descriptor caches
1647 * efx->tx_dc_base TX descriptor caches
1648 */
1612void efx_nic_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw) 1649void efx_nic_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw)
1613{ 1650{
1614 unsigned vi_count, buftbl_min; 1651 unsigned vi_count, buftbl_min;
@@ -1622,6 +1659,32 @@ void efx_nic_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw)
1622 * sizeof(efx_qword_t) / EFX_BUF_SIZE); 1659 * sizeof(efx_qword_t) / EFX_BUF_SIZE);
1623 vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); 1660 vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES);
1624 1661
1662#ifdef CONFIG_SFC_SRIOV
1663 if (efx_sriov_wanted(efx)) {
1664 unsigned vi_dc_entries, buftbl_free, entries_per_vf, vf_limit;
1665
1666 efx->vf_buftbl_base = buftbl_min;
1667
1668 vi_dc_entries = RX_DC_ENTRIES + TX_DC_ENTRIES;
1669 vi_count = max(vi_count, EFX_VI_BASE);
1670 buftbl_free = (sram_lim_qw - buftbl_min -
1671 vi_count * vi_dc_entries);
1672
1673 entries_per_vf = ((vi_dc_entries + EFX_VF_BUFTBL_PER_VI) *
1674 efx_vf_size(efx));
1675 vf_limit = min(buftbl_free / entries_per_vf,
1676 (1024U - EFX_VI_BASE) >> efx->vi_scale);
1677
1678 if (efx->vf_count > vf_limit) {
1679 netif_err(efx, probe, efx->net_dev,
1680 "Reducing VF count from from %d to %d\n",
1681 efx->vf_count, vf_limit);
1682 efx->vf_count = vf_limit;
1683 }
1684 vi_count += efx->vf_count * efx_vf_size(efx);
1685 }
1686#endif
1687
1625 efx->tx_dc_base = sram_lim_qw - vi_count * TX_DC_ENTRIES; 1688 efx->tx_dc_base = sram_lim_qw - vi_count * TX_DC_ENTRIES;
1626 efx->rx_dc_base = efx->tx_dc_base - vi_count * RX_DC_ENTRIES; 1689 efx->rx_dc_base = efx->tx_dc_base - vi_count * RX_DC_ENTRIES;
1627} 1690}
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 5df7da8b8ebf..246c4140453c 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -169,6 +169,95 @@ static inline struct efx_mcdi_mon *efx_mcdi_mon(struct efx_nic *efx)
169} 169}
170#endif 170#endif
171 171
172/*
173 * On the SFC9000 family each port is associated with 1 PCI physical
174 * function (PF) handled by sfc and a configurable number of virtual
175 * functions (VFs) that may be handled by some other driver, often in
176 * a VM guest. The queue pointer registers are mapped in both PF and
177 * VF BARs such that an 8K region provides access to a single RX, TX
178 * and event queue (collectively a Virtual Interface, VI or VNIC).
179 *
180 * The PF has access to all 1024 VIs while VFs are mapped to VIs
181 * according to VI_BASE and VI_SCALE: VF i has access to VIs numbered
182 * in range [VI_BASE + i << VI_SCALE, VI_BASE + i + 1 << VI_SCALE).
183 * The number of VIs and the VI_SCALE value are configurable but must
184 * be established at boot time by firmware.
185 */
186
187/* Maximum VI_SCALE parameter supported by Siena */
188#define EFX_VI_SCALE_MAX 6
189/* Base VI to use for SR-IOV. Must be aligned to (1 << EFX_VI_SCALE_MAX),
190 * so this is the smallest allowed value. */
191#define EFX_VI_BASE 128U
192/* Maximum number of VFs allowed */
193#define EFX_VF_COUNT_MAX 127
194/* Limit EVQs on VFs to be only 8k to reduce buffer table reservation */
195#define EFX_MAX_VF_EVQ_SIZE 8192UL
196/* The number of buffer table entries reserved for each VI on a VF */
197#define EFX_VF_BUFTBL_PER_VI \
198 ((EFX_MAX_VF_EVQ_SIZE + 2 * EFX_MAX_DMAQ_SIZE) * \
199 sizeof(efx_qword_t) / EFX_BUF_SIZE)
200
201#ifdef CONFIG_SFC_SRIOV
202
203static inline bool efx_sriov_wanted(struct efx_nic *efx)
204{
205 return efx->vf_count != 0;
206}
207static inline bool efx_sriov_enabled(struct efx_nic *efx)
208{
209 return efx->vf_init_count != 0;
210}
211static inline unsigned int efx_vf_size(struct efx_nic *efx)
212{
213 return 1 << efx->vi_scale;
214}
215
216extern int efx_init_sriov(void);
217extern void efx_sriov_probe(struct efx_nic *efx);
218extern int efx_sriov_init(struct efx_nic *efx);
219extern void efx_sriov_mac_address_changed(struct efx_nic *efx);
220extern void efx_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event);
221extern void efx_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event);
222extern void efx_sriov_event(struct efx_channel *channel, efx_qword_t *event);
223extern void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq);
224extern void efx_sriov_flr(struct efx_nic *efx, unsigned flr);
225extern void efx_sriov_reset(struct efx_nic *efx);
226extern void efx_sriov_fini(struct efx_nic *efx);
227extern void efx_fini_sriov(void);
228
229#else
230
231static inline bool efx_sriov_wanted(struct efx_nic *efx) { return false; }
232static inline bool efx_sriov_enabled(struct efx_nic *efx) { return false; }
233static inline unsigned int efx_vf_size(struct efx_nic *efx) { return 0; }
234
235static inline int efx_init_sriov(void) { return 0; }
236static inline void efx_sriov_probe(struct efx_nic *efx) {}
237static inline int efx_sriov_init(struct efx_nic *efx) { return -EOPNOTSUPP; }
238static inline void efx_sriov_mac_address_changed(struct efx_nic *efx) {}
239static inline void efx_sriov_tx_flush_done(struct efx_nic *efx,
240 efx_qword_t *event) {}
241static inline void efx_sriov_rx_flush_done(struct efx_nic *efx,
242 efx_qword_t *event) {}
243static inline void efx_sriov_event(struct efx_channel *channel,
244 efx_qword_t *event) {}
245static inline void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq) {}
246static inline void efx_sriov_flr(struct efx_nic *efx, unsigned flr) {}
247static inline void efx_sriov_reset(struct efx_nic *efx) {}
248static inline void efx_sriov_fini(struct efx_nic *efx) {}
249static inline void efx_fini_sriov(void) {}
250
251#endif
252
253extern int efx_sriov_set_vf_mac(struct net_device *dev, int vf, u8 *mac);
254extern int efx_sriov_set_vf_vlan(struct net_device *dev, int vf,
255 u16 vlan, u8 qos);
256extern int efx_sriov_get_vf_config(struct net_device *dev, int vf,
257 struct ifla_vf_info *ivf);
258extern int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf,
259 bool spoofchk);
260
172extern const struct efx_nic_type falcon_a1_nic_type; 261extern const struct efx_nic_type falcon_a1_nic_type;
173extern const struct efx_nic_type falcon_b0_nic_type; 262extern const struct efx_nic_type falcon_b0_nic_type;
174extern const struct efx_nic_type siena_a0_nic_type; 263extern const struct efx_nic_type siena_a0_nic_type;
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 657f3fa93bcf..7bea79017a05 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -313,6 +313,8 @@ static int siena_probe_nic(struct efx_nic *efx)
313 if (rc) 313 if (rc)
314 goto fail5; 314 goto fail5;
315 315
316 efx_sriov_probe(efx);
317
316 return 0; 318 return 0;
317 319
318fail5: 320fail5:
diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c
new file mode 100644
index 000000000000..5c6839ec3a83
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena_sriov.c
@@ -0,0 +1,1642 @@
1/****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2010-2011 Solarflare Communications Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation, incorporated herein by reference.
8 */
9#include <linux/pci.h>
10#include <linux/module.h>
11#include "net_driver.h"
12#include "efx.h"
13#include "nic.h"
14#include "io.h"
15#include "mcdi.h"
16#include "filter.h"
17#include "mcdi_pcol.h"
18#include "regs.h"
19#include "vfdi.h"
20
21/* Number of longs required to track all the VIs in a VF */
22#define VI_MASK_LENGTH BITS_TO_LONGS(1 << EFX_VI_SCALE_MAX)
23
24/**
25 * enum efx_vf_tx_filter_mode - TX MAC filtering behaviour
26 * @VF_TX_FILTER_OFF: Disabled
27 * @VF_TX_FILTER_AUTO: Enabled if MAC address assigned to VF and only
28 * 2 TX queues allowed per VF.
29 * @VF_TX_FILTER_ON: Enabled
30 */
31enum efx_vf_tx_filter_mode {
32 VF_TX_FILTER_OFF,
33 VF_TX_FILTER_AUTO,
34 VF_TX_FILTER_ON,
35};
36
37/**
38 * struct efx_vf - Back-end resource and protocol state for a PCI VF
39 * @efx: The Efx NIC owning this VF
40 * @pci_rid: The PCI requester ID for this VF
41 * @pci_name: The PCI name (formatted address) of this VF
42 * @index: Index of VF within its port and PF.
43 * @req: VFDI incoming request work item. Incoming USR_EV events are received
44 * by the NAPI handler, but must be handled by executing MCDI requests
45 * inside a work item.
46 * @req_addr: VFDI incoming request DMA address (in VF's PCI address space).
47 * @req_type: Expected next incoming (from VF) %VFDI_EV_TYPE member.
48 * @req_seqno: Expected next incoming (from VF) %VFDI_EV_SEQ member.
49 * @msg_seqno: Next %VFDI_EV_SEQ member to reply to VF. Protected by
50 * @status_lock
51 * @busy: VFDI request queued to be processed or being processed. Receiving
52 * a VFDI request when @busy is set is an error condition.
53 * @buf: Incoming VFDI requests are DMA from the VF into this buffer.
54 * @buftbl_base: Buffer table entries for this VF start at this index.
55 * @rx_filtering: Receive filtering has been requested by the VF driver.
56 * @rx_filter_flags: The flags sent in the %VFDI_OP_INSERT_FILTER request.
57 * @rx_filter_qid: VF relative qid for RX filter requested by VF.
58 * @rx_filter_id: Receive MAC filter ID. Only one filter per VF is supported.
59 * @tx_filter_mode: Transmit MAC filtering mode.
60 * @tx_filter_id: Transmit MAC filter ID.
61 * @addr: The MAC address and outer vlan tag of the VF.
62 * @status_addr: VF DMA address of page for &struct vfdi_status updates.
63 * @status_lock: Mutex protecting @msg_seqno, @status_addr, @addr,
64 * @peer_page_addrs and @peer_page_count from simultaneous
65 * updates by the VM and consumption by
66 * efx_sriov_update_vf_addr()
67 * @peer_page_addrs: Pointer to an array of guest pages for local addresses.
68 * @peer_page_count: Number of entries in @peer_page_count.
69 * @evq0_addrs: Array of guest pages backing evq0.
70 * @evq0_count: Number of entries in @evq0_addrs.
71 * @flush_waitq: wait queue used by %VFDI_OP_FINI_ALL_QUEUES handler
72 * to wait for flush completions.
73 * @txq_lock: Mutex for TX queue allocation.
74 * @txq_mask: Mask of initialized transmit queues.
75 * @txq_count: Number of initialized transmit queues.
76 * @rxq_mask: Mask of initialized receive queues.
77 * @rxq_count: Number of initialized receive queues.
78 * @rxq_retry_mask: Mask or receive queues that need to be flushed again
79 * due to flush failure.
80 * @rxq_retry_count: Number of receive queues in @rxq_retry_mask.
81 * @reset_work: Work item to schedule a VF reset.
82 */
83struct efx_vf {
84 struct efx_nic *efx;
85 unsigned int pci_rid;
86 char pci_name[13]; /* dddd:bb:dd.f */
87 unsigned int index;
88 struct work_struct req;
89 u64 req_addr;
90 int req_type;
91 unsigned req_seqno;
92 unsigned msg_seqno;
93 bool busy;
94 struct efx_buffer buf;
95 unsigned buftbl_base;
96 bool rx_filtering;
97 enum efx_filter_flags rx_filter_flags;
98 unsigned rx_filter_qid;
99 int rx_filter_id;
100 enum efx_vf_tx_filter_mode tx_filter_mode;
101 int tx_filter_id;
102 struct vfdi_endpoint addr;
103 u64 status_addr;
104 struct mutex status_lock;
105 u64 *peer_page_addrs;
106 unsigned peer_page_count;
107 u64 evq0_addrs[EFX_MAX_VF_EVQ_SIZE * sizeof(efx_qword_t) /
108 EFX_BUF_SIZE];
109 unsigned evq0_count;
110 wait_queue_head_t flush_waitq;
111 struct mutex txq_lock;
112 unsigned long txq_mask[VI_MASK_LENGTH];
113 unsigned txq_count;
114 unsigned long rxq_mask[VI_MASK_LENGTH];
115 unsigned rxq_count;
116 unsigned long rxq_retry_mask[VI_MASK_LENGTH];
117 atomic_t rxq_retry_count;
118 struct work_struct reset_work;
119};
120
121struct efx_memcpy_req {
122 unsigned int from_rid;
123 void *from_buf;
124 u64 from_addr;
125 unsigned int to_rid;
126 u64 to_addr;
127 unsigned length;
128};
129
130/**
131 * struct efx_local_addr - A MAC address on the vswitch without a VF.
132 *
133 * Siena does not have a switch, so VFs can't transmit data to each
134 * other. Instead the VFs must be made aware of the local addresses
135 * on the vswitch, so that they can arrange for an alternative
136 * software datapath to be used.
137 *
138 * @link: List head for insertion into efx->local_addr_list.
139 * @addr: Ethernet address
140 */
141struct efx_local_addr {
142 struct list_head link;
143 u8 addr[ETH_ALEN];
144};
145
146/**
147 * struct efx_endpoint_page - Page of vfdi_endpoint structures
148 *
149 * @link: List head for insertion into efx->local_page_list.
150 * @ptr: Pointer to page.
151 * @addr: DMA address of page.
152 */
153struct efx_endpoint_page {
154 struct list_head link;
155 void *ptr;
156 dma_addr_t addr;
157};
158
159/* Buffer table entries are reserved txq0,rxq0,evq0,txq1,rxq1,evq1 */
160#define EFX_BUFTBL_TXQ_BASE(_vf, _qid) \
161 ((_vf)->buftbl_base + EFX_VF_BUFTBL_PER_VI * (_qid))
162#define EFX_BUFTBL_RXQ_BASE(_vf, _qid) \
163 (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \
164 (EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE))
165#define EFX_BUFTBL_EVQ_BASE(_vf, _qid) \
166 (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \
167 (2 * EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE))
168
169#define EFX_FIELD_MASK(_field) \
170 ((1 << _field ## _WIDTH) - 1)
171
172/* VFs can only use this many transmit channels */
173static unsigned int vf_max_tx_channels = 2;
174module_param(vf_max_tx_channels, uint, 0444);
175MODULE_PARM_DESC(vf_max_tx_channels,
176 "Limit the number of TX channels VFs can use");
177
178static int max_vfs = -1;
179module_param(max_vfs, int, 0444);
180MODULE_PARM_DESC(max_vfs,
181 "Reduce the number of VFs initialized by the driver");
182
183/* Workqueue used by VFDI communication. We can't use the global
184 * workqueue because it may be running the VF driver's probe()
185 * routine, which will be blocked there waiting for a VFDI response.
186 */
187static struct workqueue_struct *vfdi_workqueue;
188
189static unsigned abs_index(struct efx_vf *vf, unsigned index)
190{
191 return EFX_VI_BASE + vf->index * efx_vf_size(vf->efx) + index;
192}
193
194static int efx_sriov_cmd(struct efx_nic *efx, bool enable,
195 unsigned *vi_scale_out, unsigned *vf_total_out)
196{
197 u8 inbuf[MC_CMD_SRIOV_IN_LEN];
198 u8 outbuf[MC_CMD_SRIOV_OUT_LEN];
199 unsigned vi_scale, vf_total;
200 size_t outlen;
201 int rc;
202
203 MCDI_SET_DWORD(inbuf, SRIOV_IN_ENABLE, enable ? 1 : 0);
204 MCDI_SET_DWORD(inbuf, SRIOV_IN_VI_BASE, EFX_VI_BASE);
205 MCDI_SET_DWORD(inbuf, SRIOV_IN_VF_COUNT, efx->vf_count);
206
207 rc = efx_mcdi_rpc(efx, MC_CMD_SRIOV, inbuf, MC_CMD_SRIOV_IN_LEN,
208 outbuf, MC_CMD_SRIOV_OUT_LEN, &outlen);
209 if (rc)
210 return rc;
211 if (outlen < MC_CMD_SRIOV_OUT_LEN)
212 return -EIO;
213
214 vf_total = MCDI_DWORD(outbuf, SRIOV_OUT_VF_TOTAL);
215 vi_scale = MCDI_DWORD(outbuf, SRIOV_OUT_VI_SCALE);
216 if (vi_scale > EFX_VI_SCALE_MAX)
217 return -EOPNOTSUPP;
218
219 if (vi_scale_out)
220 *vi_scale_out = vi_scale;
221 if (vf_total_out)
222 *vf_total_out = vf_total;
223
224 return 0;
225}
226
227static void efx_sriov_usrev(struct efx_nic *efx, bool enabled)
228{
229 efx_oword_t reg;
230
231 EFX_POPULATE_OWORD_2(reg,
232 FRF_CZ_USREV_DIS, enabled ? 0 : 1,
233 FRF_CZ_DFLT_EVQ, efx->vfdi_channel->channel);
234 efx_writeo(efx, &reg, FR_CZ_USR_EV_CFG);
235}
236
237static int efx_sriov_memcpy(struct efx_nic *efx, struct efx_memcpy_req *req,
238 unsigned int count)
239{
240 u8 *inbuf, *record;
241 unsigned int used;
242 u32 from_rid, from_hi, from_lo;
243 int rc;
244
245 mb(); /* Finish writing source/reading dest before DMA starts */
246
247 used = MC_CMD_MEMCPY_IN_LEN(count);
248 if (WARN_ON(used > MCDI_CTL_SDU_LEN_MAX))
249 return -ENOBUFS;
250
251 /* Allocate room for the largest request */
252 inbuf = kzalloc(MCDI_CTL_SDU_LEN_MAX, GFP_KERNEL);
253 if (inbuf == NULL)
254 return -ENOMEM;
255
256 record = inbuf;
257 MCDI_SET_DWORD(record, MEMCPY_IN_RECORD, count);
258 while (count-- > 0) {
259 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_RID,
260 req->to_rid);
261 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_ADDR_LO,
262 (u32)req->to_addr);
263 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_ADDR_HI,
264 (u32)(req->to_addr >> 32));
265 if (req->from_buf == NULL) {
266 from_rid = req->from_rid;
267 from_lo = (u32)req->from_addr;
268 from_hi = (u32)(req->from_addr >> 32);
269 } else {
270 if (WARN_ON(used + req->length > MCDI_CTL_SDU_LEN_MAX)) {
271 rc = -ENOBUFS;
272 goto out;
273 }
274
275 from_rid = MC_CMD_MEMCPY_RECORD_TYPEDEF_RID_INLINE;
276 from_lo = used;
277 from_hi = 0;
278 memcpy(inbuf + used, req->from_buf, req->length);
279 used += req->length;
280 }
281
282 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_RID, from_rid);
283 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_ADDR_LO,
284 from_lo);
285 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_ADDR_HI,
286 from_hi);
287 MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_LENGTH,
288 req->length);
289
290 ++req;
291 record += MC_CMD_MEMCPY_IN_RECORD_LEN;
292 }
293
294 rc = efx_mcdi_rpc(efx, MC_CMD_MEMCPY, inbuf, used, NULL, 0, NULL);
295out:
296 kfree(inbuf);
297
298 mb(); /* Don't write source/read dest before DMA is complete */
299
300 return rc;
301}
302
303/* The TX filter is entirely controlled by this driver, and is modified
304 * underneath the feet of the VF
305 */
306static void efx_sriov_reset_tx_filter(struct efx_vf *vf)
307{
308 struct efx_nic *efx = vf->efx;
309 struct efx_filter_spec filter;
310 u16 vlan;
311 int rc;
312
313 if (vf->tx_filter_id != -1) {
314 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
315 vf->tx_filter_id);
316 netif_dbg(efx, hw, efx->net_dev, "Removed vf %s tx filter %d\n",
317 vf->pci_name, vf->tx_filter_id);
318 vf->tx_filter_id = -1;
319 }
320
321 if (is_zero_ether_addr(vf->addr.mac_addr))
322 return;
323
324 /* Turn on TX filtering automatically if not explicitly
325 * enabled or disabled.
326 */
327 if (vf->tx_filter_mode == VF_TX_FILTER_AUTO && vf_max_tx_channels <= 2)
328 vf->tx_filter_mode = VF_TX_FILTER_ON;
329
330 vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK;
331 efx_filter_init_tx(&filter, abs_index(vf, 0));
332 rc = efx_filter_set_eth_local(&filter,
333 vlan ? vlan : EFX_FILTER_VID_UNSPEC,
334 vf->addr.mac_addr);
335 BUG_ON(rc);
336
337 rc = efx_filter_insert_filter(efx, &filter, true);
338 if (rc < 0) {
339 netif_warn(efx, hw, efx->net_dev,
340 "Unable to migrate tx filter for vf %s\n",
341 vf->pci_name);
342 } else {
343 netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s tx filter %d\n",
344 vf->pci_name, rc);
345 vf->tx_filter_id = rc;
346 }
347}
348
349/* The RX filter is managed here on behalf of the VF driver */
350static void efx_sriov_reset_rx_filter(struct efx_vf *vf)
351{
352 struct efx_nic *efx = vf->efx;
353 struct efx_filter_spec filter;
354 u16 vlan;
355 int rc;
356
357 if (vf->rx_filter_id != -1) {
358 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
359 vf->rx_filter_id);
360 netif_dbg(efx, hw, efx->net_dev, "Removed vf %s rx filter %d\n",
361 vf->pci_name, vf->rx_filter_id);
362 vf->rx_filter_id = -1;
363 }
364
365 if (!vf->rx_filtering || is_zero_ether_addr(vf->addr.mac_addr))
366 return;
367
368 vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK;
369 efx_filter_init_rx(&filter, EFX_FILTER_PRI_REQUIRED,
370 vf->rx_filter_flags,
371 abs_index(vf, vf->rx_filter_qid));
372 rc = efx_filter_set_eth_local(&filter,
373 vlan ? vlan : EFX_FILTER_VID_UNSPEC,
374 vf->addr.mac_addr);
375 BUG_ON(rc);
376
377 rc = efx_filter_insert_filter(efx, &filter, true);
378 if (rc < 0) {
379 netif_warn(efx, hw, efx->net_dev,
380 "Unable to insert rx filter for vf %s\n",
381 vf->pci_name);
382 } else {
383 netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s rx filter %d\n",
384 vf->pci_name, rc);
385 vf->rx_filter_id = rc;
386 }
387}
388
389static void __efx_sriov_update_vf_addr(struct efx_vf *vf)
390{
391 efx_sriov_reset_tx_filter(vf);
392 efx_sriov_reset_rx_filter(vf);
393 queue_work(vfdi_workqueue, &vf->efx->peer_work);
394}
395
396/* Push the peer list to this VF. The caller must hold status_lock to interlock
397 * with VFDI requests, and they must be serialised against manipulation of
398 * local_page_list, either by acquiring local_lock or by running from
399 * efx_sriov_peer_work()
400 */
401static void __efx_sriov_push_vf_status(struct efx_vf *vf)
402{
403 struct efx_nic *efx = vf->efx;
404 struct vfdi_status *status = efx->vfdi_status.addr;
405 struct efx_memcpy_req copy[4];
406 struct efx_endpoint_page *epp;
407 unsigned int pos, count;
408 unsigned data_offset;
409 efx_qword_t event;
410
411 WARN_ON(!mutex_is_locked(&vf->status_lock));
412 WARN_ON(!vf->status_addr);
413
414 status->local = vf->addr;
415 status->generation_end = ++status->generation_start;
416
417 memset(copy, '\0', sizeof(copy));
418 /* Write generation_start */
419 copy[0].from_buf = &status->generation_start;
420 copy[0].to_rid = vf->pci_rid;
421 copy[0].to_addr = vf->status_addr + offsetof(struct vfdi_status,
422 generation_start);
423 copy[0].length = sizeof(status->generation_start);
424 /* DMA the rest of the structure (excluding the generations). This
425 * assumes that the non-generation portion of vfdi_status is in
426 * one chunk starting at the version member.
427 */
428 data_offset = offsetof(struct vfdi_status, version);
429 copy[1].from_rid = efx->pci_dev->devfn;
430 copy[1].from_addr = efx->vfdi_status.dma_addr + data_offset;
431 copy[1].to_rid = vf->pci_rid;
432 copy[1].to_addr = vf->status_addr + data_offset;
433 copy[1].length = status->length - data_offset;
434
435 /* Copy the peer pages */
436 pos = 2;
437 count = 0;
438 list_for_each_entry(epp, &efx->local_page_list, link) {
439 if (count == vf->peer_page_count) {
440 /* The VF driver will know they need to provide more
441 * pages because peer_addr_count is too large.
442 */
443 break;
444 }
445 copy[pos].from_buf = NULL;
446 copy[pos].from_rid = efx->pci_dev->devfn;
447 copy[pos].from_addr = epp->addr;
448 copy[pos].to_rid = vf->pci_rid;
449 copy[pos].to_addr = vf->peer_page_addrs[count];
450 copy[pos].length = EFX_PAGE_SIZE;
451
452 if (++pos == ARRAY_SIZE(copy)) {
453 efx_sriov_memcpy(efx, copy, ARRAY_SIZE(copy));
454 pos = 0;
455 }
456 ++count;
457 }
458
459 /* Write generation_end */
460 copy[pos].from_buf = &status->generation_end;
461 copy[pos].to_rid = vf->pci_rid;
462 copy[pos].to_addr = vf->status_addr + offsetof(struct vfdi_status,
463 generation_end);
464 copy[pos].length = sizeof(status->generation_end);
465 efx_sriov_memcpy(efx, copy, pos + 1);
466
467 /* Notify the guest */
468 EFX_POPULATE_QWORD_3(event,
469 FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV,
470 VFDI_EV_SEQ, (vf->msg_seqno & 0xff),
471 VFDI_EV_TYPE, VFDI_EV_TYPE_STATUS);
472 ++vf->msg_seqno;
473 efx_generate_event(efx, EFX_VI_BASE + vf->index * efx_vf_size(efx),
474 &event);
475}
476
477static void efx_sriov_bufs(struct efx_nic *efx, unsigned offset,
478 u64 *addr, unsigned count)
479{
480 efx_qword_t buf;
481 unsigned pos;
482
483 for (pos = 0; pos < count; ++pos) {
484 EFX_POPULATE_QWORD_3(buf,
485 FRF_AZ_BUF_ADR_REGION, 0,
486 FRF_AZ_BUF_ADR_FBUF,
487 addr ? addr[pos] >> 12 : 0,
488 FRF_AZ_BUF_OWNER_ID_FBUF, 0);
489 efx_sram_writeq(efx, efx->membase + FR_BZ_BUF_FULL_TBL,
490 &buf, offset + pos);
491 }
492}
493
494static bool bad_vf_index(struct efx_nic *efx, unsigned index)
495{
496 return index >= efx_vf_size(efx);
497}
498
499static bool bad_buf_count(unsigned buf_count, unsigned max_entry_count)
500{
501 unsigned max_buf_count = max_entry_count *
502 sizeof(efx_qword_t) / EFX_BUF_SIZE;
503
504 return ((buf_count & (buf_count - 1)) || buf_count > max_buf_count);
505}
506
507/* Check that VI specified by per-port index belongs to a VF.
508 * Optionally set VF index and VI index within the VF.
509 */
510static bool map_vi_index(struct efx_nic *efx, unsigned abs_index,
511 struct efx_vf **vf_out, unsigned *rel_index_out)
512{
513 unsigned vf_i;
514
515 if (abs_index < EFX_VI_BASE)
516 return true;
517 vf_i = (abs_index - EFX_VI_BASE) * efx_vf_size(efx);
518 if (vf_i >= efx->vf_init_count)
519 return true;
520
521 if (vf_out)
522 *vf_out = efx->vf + vf_i;
523 if (rel_index_out)
524 *rel_index_out = abs_index % efx_vf_size(efx);
525 return false;
526}
527
528static int efx_vfdi_init_evq(struct efx_vf *vf)
529{
530 struct efx_nic *efx = vf->efx;
531 struct vfdi_req *req = vf->buf.addr;
532 unsigned vf_evq = req->u.init_evq.index;
533 unsigned buf_count = req->u.init_evq.buf_count;
534 unsigned abs_evq = abs_index(vf, vf_evq);
535 unsigned buftbl = EFX_BUFTBL_EVQ_BASE(vf, vf_evq);
536 efx_oword_t reg;
537
538 if (bad_vf_index(efx, vf_evq) ||
539 bad_buf_count(buf_count, EFX_MAX_VF_EVQ_SIZE)) {
540 if (net_ratelimit())
541 netif_err(efx, hw, efx->net_dev,
542 "ERROR: Invalid INIT_EVQ from %s: evq %d bufs %d\n",
543 vf->pci_name, vf_evq, buf_count);
544 return VFDI_RC_EINVAL;
545 }
546
547 efx_sriov_bufs(efx, buftbl, req->u.init_evq.addr, buf_count);
548
549 EFX_POPULATE_OWORD_3(reg,
550 FRF_CZ_TIMER_Q_EN, 1,
551 FRF_CZ_HOST_NOTIFY_MODE, 0,
552 FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
553 efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, abs_evq);
554 EFX_POPULATE_OWORD_3(reg,
555 FRF_AZ_EVQ_EN, 1,
556 FRF_AZ_EVQ_SIZE, __ffs(buf_count),
557 FRF_AZ_EVQ_BUF_BASE_ID, buftbl);
558 efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL, abs_evq);
559
560 if (vf_evq == 0) {
561 memcpy(vf->evq0_addrs, req->u.init_evq.addr,
562 buf_count * sizeof(u64));
563 vf->evq0_count = buf_count;
564 }
565
566 return VFDI_RC_SUCCESS;
567}
568
569static int efx_vfdi_init_rxq(struct efx_vf *vf)
570{
571 struct efx_nic *efx = vf->efx;
572 struct vfdi_req *req = vf->buf.addr;
573 unsigned vf_rxq = req->u.init_rxq.index;
574 unsigned vf_evq = req->u.init_rxq.evq;
575 unsigned buf_count = req->u.init_rxq.buf_count;
576 unsigned buftbl = EFX_BUFTBL_RXQ_BASE(vf, vf_rxq);
577 unsigned label;
578 efx_oword_t reg;
579
580 if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_rxq) ||
581 bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) {
582 if (net_ratelimit())
583 netif_err(efx, hw, efx->net_dev,
584 "ERROR: Invalid INIT_RXQ from %s: rxq %d evq %d "
585 "buf_count %d\n", vf->pci_name, vf_rxq,
586 vf_evq, buf_count);
587 return VFDI_RC_EINVAL;
588 }
589 if (__test_and_set_bit(req->u.init_rxq.index, vf->rxq_mask))
590 ++vf->rxq_count;
591 efx_sriov_bufs(efx, buftbl, req->u.init_rxq.addr, buf_count);
592
593 label = req->u.init_rxq.label & EFX_FIELD_MASK(FRF_AZ_RX_DESCQ_LABEL);
594 EFX_POPULATE_OWORD_6(reg,
595 FRF_AZ_RX_DESCQ_BUF_BASE_ID, buftbl,
596 FRF_AZ_RX_DESCQ_EVQ_ID, abs_index(vf, vf_evq),
597 FRF_AZ_RX_DESCQ_LABEL, label,
598 FRF_AZ_RX_DESCQ_SIZE, __ffs(buf_count),
599 FRF_AZ_RX_DESCQ_JUMBO,
600 !!(req->u.init_rxq.flags &
601 VFDI_RXQ_FLAG_SCATTER_EN),
602 FRF_AZ_RX_DESCQ_EN, 1);
603 efx_writeo_table(efx, &reg, FR_BZ_RX_DESC_PTR_TBL,
604 abs_index(vf, vf_rxq));
605
606 return VFDI_RC_SUCCESS;
607}
608
609static int efx_vfdi_init_txq(struct efx_vf *vf)
610{
611 struct efx_nic *efx = vf->efx;
612 struct vfdi_req *req = vf->buf.addr;
613 unsigned vf_txq = req->u.init_txq.index;
614 unsigned vf_evq = req->u.init_txq.evq;
615 unsigned buf_count = req->u.init_txq.buf_count;
616 unsigned buftbl = EFX_BUFTBL_TXQ_BASE(vf, vf_txq);
617 unsigned label, eth_filt_en;
618 efx_oword_t reg;
619
620 if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_txq) ||
621 vf_txq >= vf_max_tx_channels ||
622 bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) {
623 if (net_ratelimit())
624 netif_err(efx, hw, efx->net_dev,
625 "ERROR: Invalid INIT_TXQ from %s: txq %d evq %d "
626 "buf_count %d\n", vf->pci_name, vf_txq,
627 vf_evq, buf_count);
628 return VFDI_RC_EINVAL;
629 }
630
631 mutex_lock(&vf->txq_lock);
632 if (__test_and_set_bit(req->u.init_txq.index, vf->txq_mask))
633 ++vf->txq_count;
634 mutex_unlock(&vf->txq_lock);
635 efx_sriov_bufs(efx, buftbl, req->u.init_txq.addr, buf_count);
636
637 eth_filt_en = vf->tx_filter_mode == VF_TX_FILTER_ON;
638
639 label = req->u.init_txq.label & EFX_FIELD_MASK(FRF_AZ_TX_DESCQ_LABEL);
640 EFX_POPULATE_OWORD_8(reg,
641 FRF_CZ_TX_DPT_Q_MASK_WIDTH, min(efx->vi_scale, 1U),
642 FRF_CZ_TX_DPT_ETH_FILT_EN, eth_filt_en,
643 FRF_AZ_TX_DESCQ_EN, 1,
644 FRF_AZ_TX_DESCQ_BUF_BASE_ID, buftbl,
645 FRF_AZ_TX_DESCQ_EVQ_ID, abs_index(vf, vf_evq),
646 FRF_AZ_TX_DESCQ_LABEL, label,
647 FRF_AZ_TX_DESCQ_SIZE, __ffs(buf_count),
648 FRF_BZ_TX_NON_IP_DROP_DIS, 1);
649 efx_writeo_table(efx, &reg, FR_BZ_TX_DESC_PTR_TBL,
650 abs_index(vf, vf_txq));
651
652 return VFDI_RC_SUCCESS;
653}
654
655/* Returns true when efx_vfdi_fini_all_queues should wake */
656static bool efx_vfdi_flush_wake(struct efx_vf *vf)
657{
658 /* Ensure that all updates are visible to efx_vfdi_fini_all_queues() */
659 smp_mb();
660
661 return (!vf->txq_count && !vf->rxq_count) ||
662 atomic_read(&vf->rxq_retry_count);
663}
664
665static void efx_vfdi_flush_clear(struct efx_vf *vf)
666{
667 memset(vf->txq_mask, 0, sizeof(vf->txq_mask));
668 vf->txq_count = 0;
669 memset(vf->rxq_mask, 0, sizeof(vf->rxq_mask));
670 vf->rxq_count = 0;
671 memset(vf->rxq_retry_mask, 0, sizeof(vf->rxq_retry_mask));
672 atomic_set(&vf->rxq_retry_count, 0);
673}
674
675static int efx_vfdi_fini_all_queues(struct efx_vf *vf)
676{
677 struct efx_nic *efx = vf->efx;
678 efx_oword_t reg;
679 unsigned count = efx_vf_size(efx);
680 unsigned vf_offset = EFX_VI_BASE + vf->index * efx_vf_size(efx);
681 unsigned timeout = HZ;
682 unsigned index, rxqs_count;
683 __le32 *rxqs;
684 int rc;
685
686 rxqs = kmalloc(count * sizeof(*rxqs), GFP_KERNEL);
687 if (rxqs == NULL)
688 return VFDI_RC_ENOMEM;
689
690 rtnl_lock();
691 if (efx->fc_disable++ == 0)
692 efx_mcdi_set_mac(efx);
693 rtnl_unlock();
694
695 /* Flush all the initialized queues */
696 rxqs_count = 0;
697 for (index = 0; index < count; ++index) {
698 if (test_bit(index, vf->txq_mask)) {
699 EFX_POPULATE_OWORD_2(reg,
700 FRF_AZ_TX_FLUSH_DESCQ_CMD, 1,
701 FRF_AZ_TX_FLUSH_DESCQ,
702 vf_offset + index);
703 efx_writeo(efx, &reg, FR_AZ_TX_FLUSH_DESCQ);
704 }
705 if (test_bit(index, vf->rxq_mask))
706 rxqs[rxqs_count++] = cpu_to_le32(vf_offset + index);
707 }
708
709 atomic_set(&vf->rxq_retry_count, 0);
710 while (timeout && (vf->rxq_count || vf->txq_count)) {
711 rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, (u8 *)rxqs,
712 rxqs_count * sizeof(*rxqs), NULL, 0, NULL);
713 WARN_ON(rc < 0);
714
715 timeout = wait_event_timeout(vf->flush_waitq,
716 efx_vfdi_flush_wake(vf),
717 timeout);
718 rxqs_count = 0;
719 for (index = 0; index < count; ++index) {
720 if (test_and_clear_bit(index, vf->rxq_retry_mask)) {
721 atomic_dec(&vf->rxq_retry_count);
722 rxqs[rxqs_count++] =
723 cpu_to_le32(vf_offset + index);
724 }
725 }
726 }
727
728 rtnl_lock();
729 if (--efx->fc_disable == 0)
730 efx_mcdi_set_mac(efx);
731 rtnl_unlock();
732
733 /* Irrespective of success/failure, fini the queues */
734 EFX_ZERO_OWORD(reg);
735 for (index = 0; index < count; ++index) {
736 efx_writeo_table(efx, &reg, FR_BZ_RX_DESC_PTR_TBL,
737 vf_offset + index);
738 efx_writeo_table(efx, &reg, FR_BZ_TX_DESC_PTR_TBL,
739 vf_offset + index);
740 efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL,
741 vf_offset + index);
742 efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL,
743 vf_offset + index);
744 }
745 efx_sriov_bufs(efx, vf->buftbl_base, NULL,
746 EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx));
747 kfree(rxqs);
748 efx_vfdi_flush_clear(vf);
749
750 vf->evq0_count = 0;
751
752 return timeout ? 0 : VFDI_RC_ETIMEDOUT;
753}
754
755static int efx_vfdi_insert_filter(struct efx_vf *vf)
756{
757 struct efx_nic *efx = vf->efx;
758 struct vfdi_req *req = vf->buf.addr;
759 unsigned vf_rxq = req->u.mac_filter.rxq;
760 unsigned flags;
761
762 if (bad_vf_index(efx, vf_rxq) || vf->rx_filtering) {
763 if (net_ratelimit())
764 netif_err(efx, hw, efx->net_dev,
765 "ERROR: Invalid INSERT_FILTER from %s: rxq %d "
766 "flags 0x%x\n", vf->pci_name, vf_rxq,
767 req->u.mac_filter.flags);
768 return VFDI_RC_EINVAL;
769 }
770
771 flags = 0;
772 if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_RSS)
773 flags |= EFX_FILTER_FLAG_RX_RSS;
774 if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_SCATTER)
775 flags |= EFX_FILTER_FLAG_RX_SCATTER;
776 vf->rx_filter_flags = flags;
777 vf->rx_filter_qid = vf_rxq;
778 vf->rx_filtering = true;
779
780 efx_sriov_reset_rx_filter(vf);
781 queue_work(vfdi_workqueue, &efx->peer_work);
782
783 return VFDI_RC_SUCCESS;
784}
785
786static int efx_vfdi_remove_all_filters(struct efx_vf *vf)
787{
788 vf->rx_filtering = false;
789 efx_sriov_reset_rx_filter(vf);
790 queue_work(vfdi_workqueue, &vf->efx->peer_work);
791
792 return VFDI_RC_SUCCESS;
793}
794
795static int efx_vfdi_set_status_page(struct efx_vf *vf)
796{
797 struct efx_nic *efx = vf->efx;
798 struct vfdi_req *req = vf->buf.addr;
799 unsigned int page_count;
800
801 page_count = req->u.set_status_page.peer_page_count;
802 if (!req->u.set_status_page.dma_addr || EFX_PAGE_SIZE <
803 offsetof(struct vfdi_req,
804 u.set_status_page.peer_page_addr[page_count])) {
805 if (net_ratelimit())
806 netif_err(efx, hw, efx->net_dev,
807 "ERROR: Invalid SET_STATUS_PAGE from %s\n",
808 vf->pci_name);
809 return VFDI_RC_EINVAL;
810 }
811
812 mutex_lock(&efx->local_lock);
813 mutex_lock(&vf->status_lock);
814 vf->status_addr = req->u.set_status_page.dma_addr;
815
816 kfree(vf->peer_page_addrs);
817 vf->peer_page_addrs = NULL;
818 vf->peer_page_count = 0;
819
820 if (page_count) {
821 vf->peer_page_addrs = kcalloc(page_count, sizeof(u64),
822 GFP_KERNEL);
823 if (vf->peer_page_addrs) {
824 memcpy(vf->peer_page_addrs,
825 req->u.set_status_page.peer_page_addr,
826 page_count * sizeof(u64));
827 vf->peer_page_count = page_count;
828 }
829 }
830
831 __efx_sriov_push_vf_status(vf);
832 mutex_unlock(&vf->status_lock);
833 mutex_unlock(&efx->local_lock);
834
835 return VFDI_RC_SUCCESS;
836}
837
838static int efx_vfdi_clear_status_page(struct efx_vf *vf)
839{
840 mutex_lock(&vf->status_lock);
841 vf->status_addr = 0;
842 mutex_unlock(&vf->status_lock);
843
844 return VFDI_RC_SUCCESS;
845}
846
847typedef int (*efx_vfdi_op_t)(struct efx_vf *vf);
848
849static const efx_vfdi_op_t vfdi_ops[VFDI_OP_LIMIT] = {
850 [VFDI_OP_INIT_EVQ] = efx_vfdi_init_evq,
851 [VFDI_OP_INIT_TXQ] = efx_vfdi_init_txq,
852 [VFDI_OP_INIT_RXQ] = efx_vfdi_init_rxq,
853 [VFDI_OP_FINI_ALL_QUEUES] = efx_vfdi_fini_all_queues,
854 [VFDI_OP_INSERT_FILTER] = efx_vfdi_insert_filter,
855 [VFDI_OP_REMOVE_ALL_FILTERS] = efx_vfdi_remove_all_filters,
856 [VFDI_OP_SET_STATUS_PAGE] = efx_vfdi_set_status_page,
857 [VFDI_OP_CLEAR_STATUS_PAGE] = efx_vfdi_clear_status_page,
858};
859
860static void efx_sriov_vfdi(struct work_struct *work)
861{
862 struct efx_vf *vf = container_of(work, struct efx_vf, req);
863 struct efx_nic *efx = vf->efx;
864 struct vfdi_req *req = vf->buf.addr;
865 struct efx_memcpy_req copy[2];
866 int rc;
867
868 /* Copy this page into the local address space */
869 memset(copy, '\0', sizeof(copy));
870 copy[0].from_rid = vf->pci_rid;
871 copy[0].from_addr = vf->req_addr;
872 copy[0].to_rid = efx->pci_dev->devfn;
873 copy[0].to_addr = vf->buf.dma_addr;
874 copy[0].length = EFX_PAGE_SIZE;
875 rc = efx_sriov_memcpy(efx, copy, 1);
876 if (rc) {
877 /* If we can't get the request, we can't reply to the caller */
878 if (net_ratelimit())
879 netif_err(efx, hw, efx->net_dev,
880 "ERROR: Unable to fetch VFDI request from %s rc %d\n",
881 vf->pci_name, -rc);
882 vf->busy = false;
883 return;
884 }
885
886 if (req->op < VFDI_OP_LIMIT && vfdi_ops[req->op] != NULL) {
887 rc = vfdi_ops[req->op](vf);
888 if (rc == 0) {
889 netif_dbg(efx, hw, efx->net_dev,
890 "vfdi request %d from %s ok\n",
891 req->op, vf->pci_name);
892 }
893 } else {
894 netif_dbg(efx, hw, efx->net_dev,
895 "ERROR: Unrecognised request %d from VF %s addr "
896 "%llx\n", req->op, vf->pci_name,
897 (unsigned long long)vf->req_addr);
898 rc = VFDI_RC_EOPNOTSUPP;
899 }
900
901 /* Allow subsequent VF requests */
902 vf->busy = false;
903 smp_wmb();
904
905 /* Respond to the request */
906 req->rc = rc;
907 req->op = VFDI_OP_RESPONSE;
908
909 memset(copy, '\0', sizeof(copy));
910 copy[0].from_buf = &req->rc;
911 copy[0].to_rid = vf->pci_rid;
912 copy[0].to_addr = vf->req_addr + offsetof(struct vfdi_req, rc);
913 copy[0].length = sizeof(req->rc);
914 copy[1].from_buf = &req->op;
915 copy[1].to_rid = vf->pci_rid;
916 copy[1].to_addr = vf->req_addr + offsetof(struct vfdi_req, op);
917 copy[1].length = sizeof(req->op);
918
919 (void) efx_sriov_memcpy(efx, copy, ARRAY_SIZE(copy));
920}
921
922
923
924/* After a reset the event queues inside the guests no longer exist. Fill the
925 * event ring in guest memory with VFDI reset events, then (re-initialise) the
926 * event queue to raise an interrupt. The guest driver will then recover.
927 */
928static void efx_sriov_reset_vf(struct efx_vf *vf, struct efx_buffer *buffer)
929{
930 struct efx_nic *efx = vf->efx;
931 struct efx_memcpy_req copy_req[4];
932 efx_qword_t event;
933 unsigned int pos, count, k, buftbl, abs_evq;
934 efx_oword_t reg;
935 efx_dword_t ptr;
936 int rc;
937
938 BUG_ON(buffer->len != EFX_PAGE_SIZE);
939
940 if (!vf->evq0_count)
941 return;
942 BUG_ON(vf->evq0_count & (vf->evq0_count - 1));
943
944 mutex_lock(&vf->status_lock);
945 EFX_POPULATE_QWORD_3(event,
946 FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV,
947 VFDI_EV_SEQ, vf->msg_seqno,
948 VFDI_EV_TYPE, VFDI_EV_TYPE_RESET);
949 vf->msg_seqno++;
950 for (pos = 0; pos < EFX_PAGE_SIZE; pos += sizeof(event))
951 memcpy(buffer->addr + pos, &event, sizeof(event));
952
953 for (pos = 0; pos < vf->evq0_count; pos += count) {
954 count = min_t(unsigned, vf->evq0_count - pos,
955 ARRAY_SIZE(copy_req));
956 for (k = 0; k < count; k++) {
957 copy_req[k].from_buf = NULL;
958 copy_req[k].from_rid = efx->pci_dev->devfn;
959 copy_req[k].from_addr = buffer->dma_addr;
960 copy_req[k].to_rid = vf->pci_rid;
961 copy_req[k].to_addr = vf->evq0_addrs[pos + k];
962 copy_req[k].length = EFX_PAGE_SIZE;
963 }
964 rc = efx_sriov_memcpy(efx, copy_req, count);
965 if (rc) {
966 if (net_ratelimit())
967 netif_err(efx, hw, efx->net_dev,
968 "ERROR: Unable to notify %s of reset"
969 ": %d\n", vf->pci_name, -rc);
970 break;
971 }
972 }
973
974 /* Reinitialise, arm and trigger evq0 */
975 abs_evq = abs_index(vf, 0);
976 buftbl = EFX_BUFTBL_EVQ_BASE(vf, 0);
977 efx_sriov_bufs(efx, buftbl, vf->evq0_addrs, vf->evq0_count);
978
979 EFX_POPULATE_OWORD_3(reg,
980 FRF_CZ_TIMER_Q_EN, 1,
981 FRF_CZ_HOST_NOTIFY_MODE, 0,
982 FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
983 efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, abs_evq);
984 EFX_POPULATE_OWORD_3(reg,
985 FRF_AZ_EVQ_EN, 1,
986 FRF_AZ_EVQ_SIZE, __ffs(vf->evq0_count),
987 FRF_AZ_EVQ_BUF_BASE_ID, buftbl);
988 efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL, abs_evq);
989 EFX_POPULATE_DWORD_1(ptr, FRF_AZ_EVQ_RPTR, 0);
990 efx_writed_table(efx, &ptr, FR_BZ_EVQ_RPTR, abs_evq);
991
992 mutex_unlock(&vf->status_lock);
993}
994
995static void efx_sriov_reset_vf_work(struct work_struct *work)
996{
997 struct efx_vf *vf = container_of(work, struct efx_vf, req);
998 struct efx_nic *efx = vf->efx;
999 struct efx_buffer buf;
1000
1001 if (!efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE)) {
1002 efx_sriov_reset_vf(vf, &buf);
1003 efx_nic_free_buffer(efx, &buf);
1004 }
1005}
1006
1007static void efx_sriov_handle_no_channel(struct efx_nic *efx)
1008{
1009 netif_err(efx, drv, efx->net_dev,
1010 "ERROR: IOV requires MSI-X and 1 additional interrupt"
1011 "vector. IOV disabled\n");
1012 efx->vf_count = 0;
1013}
1014
1015static int efx_sriov_probe_channel(struct efx_channel *channel)
1016{
1017 channel->efx->vfdi_channel = channel;
1018 return 0;
1019}
1020
1021static void
1022efx_sriov_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
1023{
1024 snprintf(buf, len, "%s-iov", channel->efx->name);
1025}
1026
1027static const struct efx_channel_type efx_sriov_channel_type = {
1028 .handle_no_channel = efx_sriov_handle_no_channel,
1029 .pre_probe = efx_sriov_probe_channel,
1030 .get_name = efx_sriov_get_channel_name,
1031 /* no copy operation; channel must not be reallocated */
1032 .keep_eventq = true,
1033};
1034
1035void efx_sriov_probe(struct efx_nic *efx)
1036{
1037 unsigned count;
1038
1039 if (!max_vfs)
1040 return;
1041
1042 if (efx_sriov_cmd(efx, false, &efx->vi_scale, &count))
1043 return;
1044 if (count > 0 && count > max_vfs)
1045 count = max_vfs;
1046
1047 /* efx_nic_dimension_resources() will reduce vf_count as appopriate */
1048 efx->vf_count = count;
1049
1050 efx->extra_channel_type[EFX_EXTRA_CHANNEL_IOV] = &efx_sriov_channel_type;
1051}
1052
1053/* Copy the list of individual addresses into the vfdi_status.peers
1054 * array and auxillary pages, protected by %local_lock. Drop that lock
1055 * and then broadcast the address list to every VF.
1056 */
1057static void efx_sriov_peer_work(struct work_struct *data)
1058{
1059 struct efx_nic *efx = container_of(data, struct efx_nic, peer_work);
1060 struct vfdi_status *vfdi_status = efx->vfdi_status.addr;
1061 struct efx_vf *vf;
1062 struct efx_local_addr *local_addr;
1063 struct vfdi_endpoint *peer;
1064 struct efx_endpoint_page *epp;
1065 struct list_head pages;
1066 unsigned int peer_space;
1067 unsigned int peer_count;
1068 unsigned int pos;
1069
1070 mutex_lock(&efx->local_lock);
1071
1072 /* Move the existing peer pages off %local_page_list */
1073 INIT_LIST_HEAD(&pages);
1074 list_splice_tail_init(&efx->local_page_list, &pages);
1075
1076 /* Populate the VF addresses starting from entry 1 (entry 0 is
1077 * the PF address)
1078 */
1079 peer = vfdi_status->peers + 1;
1080 peer_space = ARRAY_SIZE(vfdi_status->peers) - 1;
1081 peer_count = 1;
1082 for (pos = 0; pos < efx->vf_count; ++pos) {
1083 vf = efx->vf + pos;
1084
1085 mutex_lock(&vf->status_lock);
1086 if (vf->rx_filtering && !is_zero_ether_addr(vf->addr.mac_addr)) {
1087 *peer++ = vf->addr;
1088 ++peer_count;
1089 --peer_space;
1090 BUG_ON(peer_space == 0);
1091 }
1092 mutex_unlock(&vf->status_lock);
1093 }
1094
1095 /* Fill the remaining addresses */
1096 list_for_each_entry(local_addr, &efx->local_addr_list, link) {
1097 memcpy(peer->mac_addr, local_addr->addr, ETH_ALEN);
1098 peer->tci = 0;
1099 ++peer;
1100 ++peer_count;
1101 if (--peer_space == 0) {
1102 if (list_empty(&pages)) {
1103 epp = kmalloc(sizeof(*epp), GFP_KERNEL);
1104 if (!epp)
1105 break;
1106 epp->ptr = dma_alloc_coherent(
1107 &efx->pci_dev->dev, EFX_PAGE_SIZE,
1108 &epp->addr, GFP_KERNEL);
1109 if (!epp->ptr) {
1110 kfree(epp);
1111 break;
1112 }
1113 } else {
1114 epp = list_first_entry(
1115 &pages, struct efx_endpoint_page, link);
1116 list_del(&epp->link);
1117 }
1118
1119 list_add_tail(&epp->link, &efx->local_page_list);
1120 peer = (struct vfdi_endpoint *)epp->ptr;
1121 peer_space = EFX_PAGE_SIZE / sizeof(struct vfdi_endpoint);
1122 }
1123 }
1124 vfdi_status->peer_count = peer_count;
1125 mutex_unlock(&efx->local_lock);
1126
1127 /* Free any now unused endpoint pages */
1128 while (!list_empty(&pages)) {
1129 epp = list_first_entry(
1130 &pages, struct efx_endpoint_page, link);
1131 list_del(&epp->link);
1132 dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE,
1133 epp->ptr, epp->addr);
1134 kfree(epp);
1135 }
1136
1137 /* Finally, push the pages */
1138 for (pos = 0; pos < efx->vf_count; ++pos) {
1139 vf = efx->vf + pos;
1140
1141 mutex_lock(&vf->status_lock);
1142 if (vf->status_addr)
1143 __efx_sriov_push_vf_status(vf);
1144 mutex_unlock(&vf->status_lock);
1145 }
1146}
1147
1148static void efx_sriov_free_local(struct efx_nic *efx)
1149{
1150 struct efx_local_addr *local_addr;
1151 struct efx_endpoint_page *epp;
1152
1153 while (!list_empty(&efx->local_addr_list)) {
1154 local_addr = list_first_entry(&efx->local_addr_list,
1155 struct efx_local_addr, link);
1156 list_del(&local_addr->link);
1157 kfree(local_addr);
1158 }
1159
1160 while (!list_empty(&efx->local_page_list)) {
1161 epp = list_first_entry(&efx->local_page_list,
1162 struct efx_endpoint_page, link);
1163 list_del(&epp->link);
1164 dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE,
1165 epp->ptr, epp->addr);
1166 kfree(epp);
1167 }
1168}
1169
1170static int efx_sriov_vf_alloc(struct efx_nic *efx)
1171{
1172 unsigned index;
1173 struct efx_vf *vf;
1174
1175 efx->vf = kzalloc(sizeof(struct efx_vf) * efx->vf_count, GFP_KERNEL);
1176 if (!efx->vf)
1177 return -ENOMEM;
1178
1179 for (index = 0; index < efx->vf_count; ++index) {
1180 vf = efx->vf + index;
1181
1182 vf->efx = efx;
1183 vf->index = index;
1184 vf->rx_filter_id = -1;
1185 vf->tx_filter_mode = VF_TX_FILTER_AUTO;
1186 vf->tx_filter_id = -1;
1187 INIT_WORK(&vf->req, efx_sriov_vfdi);
1188 INIT_WORK(&vf->reset_work, efx_sriov_reset_vf_work);
1189 init_waitqueue_head(&vf->flush_waitq);
1190 mutex_init(&vf->status_lock);
1191 mutex_init(&vf->txq_lock);
1192 }
1193
1194 return 0;
1195}
1196
1197static void efx_sriov_vfs_fini(struct efx_nic *efx)
1198{
1199 struct efx_vf *vf;
1200 unsigned int pos;
1201
1202 for (pos = 0; pos < efx->vf_count; ++pos) {
1203 vf = efx->vf + pos;
1204
1205 efx_nic_free_buffer(efx, &vf->buf);
1206 kfree(vf->peer_page_addrs);
1207 vf->peer_page_addrs = NULL;
1208 vf->peer_page_count = 0;
1209
1210 vf->evq0_count = 0;
1211 }
1212}
1213
1214static int efx_sriov_vfs_init(struct efx_nic *efx)
1215{
1216 struct pci_dev *pci_dev = efx->pci_dev;
1217 unsigned index, devfn, sriov, buftbl_base;
1218 u16 offset, stride;
1219 struct efx_vf *vf;
1220 int rc;
1221
1222 sriov = pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV);
1223 if (!sriov)
1224 return -ENOENT;
1225
1226 pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_OFFSET, &offset);
1227 pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_STRIDE, &stride);
1228
1229 buftbl_base = efx->vf_buftbl_base;
1230 devfn = pci_dev->devfn + offset;
1231 for (index = 0; index < efx->vf_count; ++index) {
1232 vf = efx->vf + index;
1233
1234 /* Reserve buffer entries */
1235 vf->buftbl_base = buftbl_base;
1236 buftbl_base += EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx);
1237
1238 vf->pci_rid = devfn;
1239 snprintf(vf->pci_name, sizeof(vf->pci_name),
1240 "%04x:%02x:%02x.%d",
1241 pci_domain_nr(pci_dev->bus), pci_dev->bus->number,
1242 PCI_SLOT(devfn), PCI_FUNC(devfn));
1243
1244 rc = efx_nic_alloc_buffer(efx, &vf->buf, EFX_PAGE_SIZE);
1245 if (rc)
1246 goto fail;
1247
1248 devfn += stride;
1249 }
1250
1251 return 0;
1252
1253fail:
1254 efx_sriov_vfs_fini(efx);
1255 return rc;
1256}
1257
1258int efx_sriov_init(struct efx_nic *efx)
1259{
1260 struct net_device *net_dev = efx->net_dev;
1261 struct vfdi_status *vfdi_status;
1262 int rc;
1263
1264 /* Ensure there's room for vf_channel */
1265 BUILD_BUG_ON(EFX_MAX_CHANNELS + 1 >= EFX_VI_BASE);
1266 /* Ensure that VI_BASE is aligned on VI_SCALE */
1267 BUILD_BUG_ON(EFX_VI_BASE & ((1 << EFX_VI_SCALE_MAX) - 1));
1268
1269 if (efx->vf_count == 0)
1270 return 0;
1271
1272 rc = efx_sriov_cmd(efx, true, NULL, NULL);
1273 if (rc)
1274 goto fail_cmd;
1275
1276 rc = efx_nic_alloc_buffer(efx, &efx->vfdi_status, sizeof(*vfdi_status));
1277 if (rc)
1278 goto fail_status;
1279 vfdi_status = efx->vfdi_status.addr;
1280 memset(vfdi_status, 0, sizeof(*vfdi_status));
1281 vfdi_status->version = 1;
1282 vfdi_status->length = sizeof(*vfdi_status);
1283 vfdi_status->max_tx_channels = vf_max_tx_channels;
1284 vfdi_status->vi_scale = efx->vi_scale;
1285 vfdi_status->rss_rxq_count = efx->rss_spread;
1286 vfdi_status->peer_count = 1 + efx->vf_count;
1287 vfdi_status->timer_quantum_ns = efx->timer_quantum_ns;
1288
1289 rc = efx_sriov_vf_alloc(efx);
1290 if (rc)
1291 goto fail_alloc;
1292
1293 mutex_init(&efx->local_lock);
1294 INIT_WORK(&efx->peer_work, efx_sriov_peer_work);
1295 INIT_LIST_HEAD(&efx->local_addr_list);
1296 INIT_LIST_HEAD(&efx->local_page_list);
1297
1298 rc = efx_sriov_vfs_init(efx);
1299 if (rc)
1300 goto fail_vfs;
1301
1302 rtnl_lock();
1303 memcpy(vfdi_status->peers[0].mac_addr,
1304 net_dev->dev_addr, ETH_ALEN);
1305 efx->vf_init_count = efx->vf_count;
1306 rtnl_unlock();
1307
1308 efx_sriov_usrev(efx, true);
1309
1310 /* At this point we must be ready to accept VFDI requests */
1311
1312 rc = pci_enable_sriov(efx->pci_dev, efx->vf_count);
1313 if (rc)
1314 goto fail_pci;
1315
1316 netif_info(efx, probe, net_dev,
1317 "enabled SR-IOV for %d VFs, %d VI per VF\n",
1318 efx->vf_count, efx_vf_size(efx));
1319 return 0;
1320
1321fail_pci:
1322 efx_sriov_usrev(efx, false);
1323 rtnl_lock();
1324 efx->vf_init_count = 0;
1325 rtnl_unlock();
1326 efx_sriov_vfs_fini(efx);
1327fail_vfs:
1328 cancel_work_sync(&efx->peer_work);
1329 efx_sriov_free_local(efx);
1330 kfree(efx->vf);
1331fail_alloc:
1332 efx_nic_free_buffer(efx, &efx->vfdi_status);
1333fail_status:
1334 efx_sriov_cmd(efx, false, NULL, NULL);
1335fail_cmd:
1336 return rc;
1337}
1338
1339void efx_sriov_fini(struct efx_nic *efx)
1340{
1341 struct efx_vf *vf;
1342 unsigned int pos;
1343
1344 if (efx->vf_init_count == 0)
1345 return;
1346
1347 /* Disable all interfaces to reconfiguration */
1348 BUG_ON(efx->vfdi_channel->enabled);
1349 efx_sriov_usrev(efx, false);
1350 rtnl_lock();
1351 efx->vf_init_count = 0;
1352 rtnl_unlock();
1353
1354 /* Flush all reconfiguration work */
1355 for (pos = 0; pos < efx->vf_count; ++pos) {
1356 vf = efx->vf + pos;
1357 cancel_work_sync(&vf->req);
1358 cancel_work_sync(&vf->reset_work);
1359 }
1360 cancel_work_sync(&efx->peer_work);
1361
1362 pci_disable_sriov(efx->pci_dev);
1363
1364 /* Tear down back-end state */
1365 efx_sriov_vfs_fini(efx);
1366 efx_sriov_free_local(efx);
1367 kfree(efx->vf);
1368 efx_nic_free_buffer(efx, &efx->vfdi_status);
1369 efx_sriov_cmd(efx, false, NULL, NULL);
1370}
1371
1372void efx_sriov_event(struct efx_channel *channel, efx_qword_t *event)
1373{
1374 struct efx_nic *efx = channel->efx;
1375 struct efx_vf *vf;
1376 unsigned qid, seq, type, data;
1377
1378 qid = EFX_QWORD_FIELD(*event, FSF_CZ_USER_QID);
1379
1380 /* USR_EV_REG_VALUE is dword0, so access the VFDI_EV fields directly */
1381 BUILD_BUG_ON(FSF_CZ_USER_EV_REG_VALUE_LBN != 0);
1382 seq = EFX_QWORD_FIELD(*event, VFDI_EV_SEQ);
1383 type = EFX_QWORD_FIELD(*event, VFDI_EV_TYPE);
1384 data = EFX_QWORD_FIELD(*event, VFDI_EV_DATA);
1385
1386 netif_vdbg(efx, hw, efx->net_dev,
1387 "USR_EV event from qid %d seq 0x%x type %d data 0x%x\n",
1388 qid, seq, type, data);
1389
1390 if (map_vi_index(efx, qid, &vf, NULL))
1391 return;
1392 if (vf->busy)
1393 goto error;
1394
1395 if (type == VFDI_EV_TYPE_REQ_WORD0) {
1396 /* Resynchronise */
1397 vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
1398 vf->req_seqno = seq + 1;
1399 vf->req_addr = 0;
1400 } else if (seq != (vf->req_seqno++ & 0xff) || type != vf->req_type)
1401 goto error;
1402
1403 switch (vf->req_type) {
1404 case VFDI_EV_TYPE_REQ_WORD0:
1405 case VFDI_EV_TYPE_REQ_WORD1:
1406 case VFDI_EV_TYPE_REQ_WORD2:
1407 vf->req_addr |= (u64)data << (vf->req_type << 4);
1408 ++vf->req_type;
1409 return;
1410
1411 case VFDI_EV_TYPE_REQ_WORD3:
1412 vf->req_addr |= (u64)data << 48;
1413 vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
1414 vf->busy = true;
1415 queue_work(vfdi_workqueue, &vf->req);
1416 return;
1417 }
1418
1419error:
1420 if (net_ratelimit())
1421 netif_err(efx, hw, efx->net_dev,
1422 "ERROR: Screaming VFDI request from %s\n",
1423 vf->pci_name);
1424 /* Reset the request and sequence number */
1425 vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
1426 vf->req_seqno = seq + 1;
1427}
1428
1429void efx_sriov_flr(struct efx_nic *efx, unsigned vf_i)
1430{
1431 struct efx_vf *vf;
1432
1433 if (vf_i > efx->vf_init_count)
1434 return;
1435 vf = efx->vf + vf_i;
1436 netif_info(efx, hw, efx->net_dev,
1437 "FLR on VF %s\n", vf->pci_name);
1438
1439 vf->status_addr = 0;
1440 efx_vfdi_remove_all_filters(vf);
1441 efx_vfdi_flush_clear(vf);
1442
1443 vf->evq0_count = 0;
1444}
1445
1446void efx_sriov_mac_address_changed(struct efx_nic *efx)
1447{
1448 struct vfdi_status *vfdi_status = efx->vfdi_status.addr;
1449
1450 if (!efx->vf_init_count)
1451 return;
1452 memcpy(vfdi_status->peers[0].mac_addr,
1453 efx->net_dev->dev_addr, ETH_ALEN);
1454 queue_work(vfdi_workqueue, &efx->peer_work);
1455}
1456
1457void efx_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
1458{
1459 struct efx_vf *vf;
1460 unsigned queue, qid;
1461
1462 queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
1463 if (map_vi_index(efx, queue, &vf, &qid))
1464 return;
1465 /* Ignore flush completions triggered by an FLR */
1466 if (!test_bit(qid, vf->txq_mask))
1467 return;
1468
1469 __clear_bit(qid, vf->txq_mask);
1470 --vf->txq_count;
1471
1472 if (efx_vfdi_flush_wake(vf))
1473 wake_up(&vf->flush_waitq);
1474}
1475
1476void efx_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event)
1477{
1478 struct efx_vf *vf;
1479 unsigned ev_failed, queue, qid;
1480
1481 queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID);
1482 ev_failed = EFX_QWORD_FIELD(*event,
1483 FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL);
1484 if (map_vi_index(efx, queue, &vf, &qid))
1485 return;
1486 if (!test_bit(qid, vf->rxq_mask))
1487 return;
1488
1489 if (ev_failed) {
1490 set_bit(qid, vf->rxq_retry_mask);
1491 atomic_inc(&vf->rxq_retry_count);
1492 } else {
1493 __clear_bit(qid, vf->rxq_mask);
1494 --vf->rxq_count;
1495 }
1496 if (efx_vfdi_flush_wake(vf))
1497 wake_up(&vf->flush_waitq);
1498}
1499
1500/* Called from napi. Schedule the reset work item */
1501void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq)
1502{
1503 struct efx_vf *vf;
1504 unsigned int rel;
1505
1506 if (map_vi_index(efx, dmaq, &vf, &rel))
1507 return;
1508
1509 if (net_ratelimit())
1510 netif_err(efx, hw, efx->net_dev,
1511 "VF %d DMA Q %d reports descriptor fetch error.\n",
1512 vf->index, rel);
1513 queue_work(vfdi_workqueue, &vf->reset_work);
1514}
1515
1516/* Reset all VFs */
1517void efx_sriov_reset(struct efx_nic *efx)
1518{
1519 unsigned int vf_i;
1520 struct efx_buffer buf;
1521 struct efx_vf *vf;
1522
1523 ASSERT_RTNL();
1524
1525 if (efx->vf_init_count == 0)
1526 return;
1527
1528 efx_sriov_usrev(efx, true);
1529 (void)efx_sriov_cmd(efx, true, NULL, NULL);
1530
1531 if (efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE))
1532 return;
1533
1534 for (vf_i = 0; vf_i < efx->vf_init_count; ++vf_i) {
1535 vf = efx->vf + vf_i;
1536 efx_sriov_reset_vf(vf, &buf);
1537 }
1538
1539 efx_nic_free_buffer(efx, &buf);
1540}
1541
1542int efx_init_sriov(void)
1543{
1544 /* A single threaded workqueue is sufficient. efx_sriov_vfdi() and
1545 * efx_sriov_peer_work() spend almost all their time sleeping for
1546 * MCDI to complete anyway
1547 */
1548 vfdi_workqueue = create_singlethread_workqueue("sfc_vfdi");
1549 if (!vfdi_workqueue)
1550 return -ENOMEM;
1551
1552 return 0;
1553}
1554
1555void efx_fini_sriov(void)
1556{
1557 destroy_workqueue(vfdi_workqueue);
1558}
1559
1560int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac)
1561{
1562 struct efx_nic *efx = netdev_priv(net_dev);
1563 struct efx_vf *vf;
1564
1565 if (vf_i >= efx->vf_init_count)
1566 return -EINVAL;
1567 vf = efx->vf + vf_i;
1568
1569 mutex_lock(&vf->status_lock);
1570 memcpy(vf->addr.mac_addr, mac, ETH_ALEN);
1571 __efx_sriov_update_vf_addr(vf);
1572 mutex_unlock(&vf->status_lock);
1573
1574 return 0;
1575}
1576
1577int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i,
1578 u16 vlan, u8 qos)
1579{
1580 struct efx_nic *efx = netdev_priv(net_dev);
1581 struct efx_vf *vf;
1582 u16 tci;
1583
1584 if (vf_i >= efx->vf_init_count)
1585 return -EINVAL;
1586 vf = efx->vf + vf_i;
1587
1588 mutex_lock(&vf->status_lock);
1589 tci = (vlan & VLAN_VID_MASK) | ((qos & 0x7) << VLAN_PRIO_SHIFT);
1590 vf->addr.tci = htons(tci);
1591 __efx_sriov_update_vf_addr(vf);
1592 mutex_unlock(&vf->status_lock);
1593
1594 return 0;
1595}
1596
1597int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i,
1598 bool spoofchk)
1599{
1600 struct efx_nic *efx = netdev_priv(net_dev);
1601 struct efx_vf *vf;
1602 int rc;
1603
1604 if (vf_i >= efx->vf_init_count)
1605 return -EINVAL;
1606 vf = efx->vf + vf_i;
1607
1608 mutex_lock(&vf->txq_lock);
1609 if (vf->txq_count == 0) {
1610 vf->tx_filter_mode =
1611 spoofchk ? VF_TX_FILTER_ON : VF_TX_FILTER_OFF;
1612 rc = 0;
1613 } else {
1614 /* This cannot be changed while TX queues are running */
1615 rc = -EBUSY;
1616 }
1617 mutex_unlock(&vf->txq_lock);
1618 return rc;
1619}
1620
1621int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i,
1622 struct ifla_vf_info *ivi)
1623{
1624 struct efx_nic *efx = netdev_priv(net_dev);
1625 struct efx_vf *vf;
1626 u16 tci;
1627
1628 if (vf_i >= efx->vf_init_count)
1629 return -EINVAL;
1630 vf = efx->vf + vf_i;
1631
1632 ivi->vf = vf_i;
1633 memcpy(ivi->mac, vf->addr.mac_addr, ETH_ALEN);
1634 ivi->tx_rate = 0;
1635 tci = ntohs(vf->addr.tci);
1636 ivi->vlan = tci & VLAN_VID_MASK;
1637 ivi->qos = (tci >> VLAN_PRIO_SHIFT) & 0x7;
1638 ivi->spoofchk = vf->tx_filter_mode == VF_TX_FILTER_ON;
1639
1640 return 0;
1641}
1642
diff --git a/drivers/net/ethernet/sfc/vfdi.h b/drivers/net/ethernet/sfc/vfdi.h
new file mode 100644
index 000000000000..656fa70f9993
--- /dev/null
+++ b/drivers/net/ethernet/sfc/vfdi.h
@@ -0,0 +1,254 @@
1/****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2010-2012 Solarflare Communications Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation, incorporated herein by reference.
8 */
9#ifndef _VFDI_H
10#define _VFDI_H
11
12/**
13 * DOC: Virtual Function Driver Interface
14 *
15 * This file contains software structures used to form a two way
16 * communication channel between the VF driver and the PF driver,
17 * named Virtual Function Driver Interface (VFDI).
18 *
19 * For the purposes of VFDI, a page is a memory region with size and
20 * alignment of 4K. All addresses are DMA addresses to be used within
21 * the domain of the relevant VF.
22 *
23 * The only hardware-defined channels for a VF driver to communicate
24 * with the PF driver are the event mailboxes (%FR_CZ_USR_EV
25 * registers). Writing to these registers generates an event with
26 * EV_CODE = EV_CODE_USR_EV, USER_QID set to the index of the mailbox
27 * and USER_EV_REG_VALUE set to the value written. The PF driver may
28 * direct or disable delivery of these events by setting
29 * %FR_CZ_USR_EV_CFG.
30 *
31 * The PF driver can send arbitrary events to arbitrary event queues.
32 * However, for consistency, VFDI events from the PF are defined to
33 * follow the same form and be sent to the first event queue assigned
34 * to the VF while that queue is enabled by the VF driver.
35 *
36 * The general form of the variable bits of VFDI events is:
37 *
38 * 0 16 24 31
39 * | DATA | TYPE | SEQ |
40 *
41 * SEQ is a sequence number which should be incremented by 1 (modulo
42 * 256) for each event. The sequence numbers used in each direction
43 * are independent.
44 *
45 * The VF submits requests of type &struct vfdi_req by sending the
46 * address of the request (ADDR) in a series of 4 events:
47 *
48 * 0 16 24 31
49 * | ADDR[0:15] | VFDI_EV_TYPE_REQ_WORD0 | SEQ |
50 * | ADDR[16:31] | VFDI_EV_TYPE_REQ_WORD1 | SEQ+1 |
51 * | ADDR[32:47] | VFDI_EV_TYPE_REQ_WORD2 | SEQ+2 |
52 * | ADDR[48:63] | VFDI_EV_TYPE_REQ_WORD3 | SEQ+3 |
53 *
54 * The address must be page-aligned. After receiving such a valid
55 * series of events, the PF driver will attempt to read the request
56 * and write a response to the same address. In case of an invalid
57 * sequence of events or a DMA error, there will be no response.
58 *
59 * The VF driver may request that the PF driver writes status
60 * information into its domain asynchronously. After writing the
61 * status, the PF driver will send an event of the form:
62 *
63 * 0 16 24 31
64 * | reserved | VFDI_EV_TYPE_STATUS | SEQ |
65 *
66 * In case the VF must be reset for any reason, the PF driver will
67 * send an event of the form:
68 *
69 * 0 16 24 31
70 * | reserved | VFDI_EV_TYPE_RESET | SEQ |
71 *
72 * It is then the responsibility of the VF driver to request
73 * reinitialisation of its queues.
74 */
75#define VFDI_EV_SEQ_LBN 24
76#define VFDI_EV_SEQ_WIDTH 8
77#define VFDI_EV_TYPE_LBN 16
78#define VFDI_EV_TYPE_WIDTH 8
79#define VFDI_EV_TYPE_REQ_WORD0 0
80#define VFDI_EV_TYPE_REQ_WORD1 1
81#define VFDI_EV_TYPE_REQ_WORD2 2
82#define VFDI_EV_TYPE_REQ_WORD3 3
83#define VFDI_EV_TYPE_STATUS 4
84#define VFDI_EV_TYPE_RESET 5
85#define VFDI_EV_DATA_LBN 0
86#define VFDI_EV_DATA_WIDTH 16
87
88struct vfdi_endpoint {
89 u8 mac_addr[ETH_ALEN];
90 __be16 tci;
91};
92
93/**
94 * enum vfdi_op - VFDI operation enumeration
95 * @VFDI_OP_RESPONSE: Indicates a response to the request.
96 * @VFDI_OP_INIT_EVQ: Initialize SRAM entries and initialize an EVQ.
97 * @VFDI_OP_INIT_RXQ: Initialize SRAM entries and initialize an RXQ.
98 * @VFDI_OP_INIT_TXQ: Initialize SRAM entries and initialize a TXQ.
99 * @VFDI_OP_FINI_ALL_QUEUES: Flush all queues, finalize all queues, then
100 * finalize the SRAM entries.
101 * @VFDI_OP_INSERT_FILTER: Insert a MAC filter targetting the given RXQ.
102 * @VFDI_OP_REMOVE_ALL_FILTERS: Remove all filters.
103 * @VFDI_OP_SET_STATUS_PAGE: Set the DMA page(s) used for status updates
104 * from PF and write the initial status.
105 * @VFDI_OP_CLEAR_STATUS_PAGE: Clear the DMA page(s) used for status
106 * updates from PF.
107 */
108enum vfdi_op {
109 VFDI_OP_RESPONSE = 0,
110 VFDI_OP_INIT_EVQ = 1,
111 VFDI_OP_INIT_RXQ = 2,
112 VFDI_OP_INIT_TXQ = 3,
113 VFDI_OP_FINI_ALL_QUEUES = 4,
114 VFDI_OP_INSERT_FILTER = 5,
115 VFDI_OP_REMOVE_ALL_FILTERS = 6,
116 VFDI_OP_SET_STATUS_PAGE = 7,
117 VFDI_OP_CLEAR_STATUS_PAGE = 8,
118 VFDI_OP_LIMIT,
119};
120
121/* Response codes for VFDI operations. Other values may be used in future. */
122#define VFDI_RC_SUCCESS 0
123#define VFDI_RC_ENOMEM (-12)
124#define VFDI_RC_EINVAL (-22)
125#define VFDI_RC_EOPNOTSUPP (-95)
126#define VFDI_RC_ETIMEDOUT (-110)
127
128/**
129 * struct vfdi_req - Request from VF driver to PF driver
130 * @op: Operation code or response indicator, taken from &enum vfdi_op.
131 * @rc: Response code. Set to 0 on success or a negative error code on failure.
132 * @u.init_evq.index: Index of event queue to create.
133 * @u.init_evq.buf_count: Number of 4k buffers backing event queue.
134 * @u.init_evq.addr: Array of length %u.init_evq.buf_count containing DMA
135 * address of each page backing the event queue.
136 * @u.init_rxq.index: Index of receive queue to create.
137 * @u.init_rxq.buf_count: Number of 4k buffers backing receive queue.
138 * @u.init_rxq.evq: Instance of event queue to target receive events at.
139 * @u.init_rxq.label: Label used in receive events.
140 * @u.init_rxq.flags: Unused.
141 * @u.init_rxq.addr: Array of length %u.init_rxq.buf_count containing DMA
142 * address of each page backing the receive queue.
143 * @u.init_txq.index: Index of transmit queue to create.
144 * @u.init_txq.buf_count: Number of 4k buffers backing transmit queue.
145 * @u.init_txq.evq: Instance of event queue to target transmit completion
146 * events at.
147 * @u.init_txq.label: Label used in transmit completion events.
148 * @u.init_txq.flags: Checksum offload flags.
149 * @u.init_txq.addr: Array of length %u.init_txq.buf_count containing DMA
150 * address of each page backing the transmit queue.
151 * @u.mac_filter.rxq: Insert MAC filter at VF local address/VLAN targetting
152 * all traffic at this receive queue.
153 * @u.mac_filter.flags: MAC filter flags.
154 * @u.set_status_page.dma_addr: Base address for the &struct vfdi_status.
155 * This address must be such that the structure fits within a page.
156 * @u.set_status_page.peer_page_count: Number of additional pages the VF
157 * has provided into which peer addresses may be DMAd.
158 * @u.set_status_page.peer_page_addr: Array of DMA addresses of pages.
159 * If the number of peers exceeds 256, then the VF must provide
160 * additional pages in this array. The PF will then DMA up to
161 * 512 vfdi_endpoint structures into each page. These addresses
162 * must be page-aligned.
163 */
164struct vfdi_req {
165 u32 op;
166 u32 reserved1;
167 s32 rc;
168 u32 reserved2;
169 union {
170 struct {
171 u32 index;
172 u32 buf_count;
173 u64 addr[];
174 } init_evq;
175 struct {
176 u32 index;
177 u32 buf_count;
178 u32 evq;
179 u32 label;
180 u32 flags;
181#define VFDI_RXQ_FLAG_SCATTER_EN 1
182 u32 reserved;
183 u64 addr[];
184 } init_rxq;
185 struct {
186 u32 index;
187 u32 buf_count;
188 u32 evq;
189 u32 label;
190 u32 flags;
191#define VFDI_TXQ_FLAG_IP_CSUM_DIS 1
192#define VFDI_TXQ_FLAG_TCPUDP_CSUM_DIS 2
193 u32 reserved;
194 u64 addr[];
195 } init_txq;
196 struct {
197 u32 rxq;
198 u32 flags;
199#define VFDI_MAC_FILTER_FLAG_RSS 1
200#define VFDI_MAC_FILTER_FLAG_SCATTER 2
201 } mac_filter;
202 struct {
203 u64 dma_addr;
204 u64 peer_page_count;
205 u64 peer_page_addr[];
206 } set_status_page;
207 } u;
208};
209
210/**
211 * struct vfdi_status - Status provided by PF driver to VF driver
212 * @generation_start: A generation count DMA'd to VF *before* the
213 * rest of the structure.
214 * @generation_end: A generation count DMA'd to VF *after* the
215 * rest of the structure.
216 * @version: Version of this structure; currently set to 1. Later
217 * versions must either be layout-compatible or only be sent to VFs
218 * that specifically request them.
219 * @length: Total length of this structure including embedded tables
220 * @vi_scale: log2 the number of VIs available on this VF. This quantity
221 * is used by the hardware for register decoding.
222 * @max_tx_channels: The maximum number of transmit queues the VF can use.
223 * @rss_rxq_count: The number of receive queues present in the shared RSS
224 * indirection table.
225 * @peer_count: Total number of peers in the complete peer list. If larger
226 * than ARRAY_SIZE(%peers), then the VF must provide sufficient
227 * additional pages each of which is filled with vfdi_endpoint structures.
228 * @local: The MAC address and outer VLAN tag of *this* VF
229 * @peers: Table of peer addresses. The @tci fields in these structures
230 * are currently unused and must be ignored. Additional peers are
231 * written into any additional pages provided by the VF.
232 * @timer_quantum_ns: Timer quantum (nominal period between timer ticks)
233 * for interrupt moderation timers, in nanoseconds. This member is only
234 * present if @length is sufficiently large.
235 */
236struct vfdi_status {
237 u32 generation_start;
238 u32 generation_end;
239 u32 version;
240 u32 length;
241 u8 vi_scale;
242 u8 max_tx_channels;
243 u8 rss_rxq_count;
244 u8 reserved1;
245 u16 peer_count;
246 u16 reserved2;
247 struct vfdi_endpoint local;
248 struct vfdi_endpoint peers[256];
249
250 /* Members below here extend version 1 of this structure */
251 u32 timer_quantum_ns;
252};
253
254#endif