diff options
-rw-r--r-- | drivers/net/ethernet/sfc/Kconfig | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/Makefile | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/efx.c | 70 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/ethtool.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi.c | 34 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi_mac.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/net_driver.h | 32 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/nic.c | 79 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/nic.h | 89 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/siena.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/siena_sriov.c | 1642 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/vfdi.h | 254 |
13 files changed, 2192 insertions, 26 deletions
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 8d423544a7e6..fb3cbc27063c 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig | |||
@@ -26,3 +26,11 @@ config SFC_MCDI_MON | |||
26 | ----help--- | 26 | ----help--- |
27 | This exposes the on-board firmware-managed sensors as a | 27 | This exposes the on-board firmware-managed sensors as a |
28 | hardware monitor device. | 28 | hardware monitor device. |
29 | config SFC_SRIOV | ||
30 | bool "Solarflare SFC9000-family SR-IOV support" | ||
31 | depends on SFC && PCI_IOV | ||
32 | default y | ||
33 | ---help--- | ||
34 | This enables support for the SFC9000 I/O Virtualization | ||
35 | features, allowing accelerated network performance in | ||
36 | virtualized environments. | ||
diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index 3fa2e25ccc45..ea1f8db57318 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile | |||
@@ -4,5 +4,6 @@ sfc-y += efx.o nic.o falcon.o siena.o tx.o rx.o filter.o \ | |||
4 | tenxpress.o txc43128_phy.o falcon_boards.o \ | 4 | tenxpress.o txc43128_phy.o falcon_boards.o \ |
5 | mcdi.o mcdi_phy.o mcdi_mon.o | 5 | mcdi.o mcdi_phy.o mcdi_mon.o |
6 | sfc-$(CONFIG_SFC_MTD) += mtd.o | 6 | sfc-$(CONFIG_SFC_MTD) += mtd.o |
7 | sfc-$(CONFIG_SFC_SRIOV) += siena_sriov.o | ||
7 | 8 | ||
8 | obj-$(CONFIG_SFC) += sfc.o | 9 | obj-$(CONFIG_SFC) += sfc.o |
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index c9c306aef2d9..ac571cf14485 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c | |||
@@ -1175,25 +1175,40 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx) | |||
1175 | unsigned int count; | 1175 | unsigned int count; |
1176 | int cpu; | 1176 | int cpu; |
1177 | 1177 | ||
1178 | if (rss_cpus) | 1178 | if (rss_cpus) { |
1179 | return rss_cpus; | 1179 | count = rss_cpus; |
1180 | } else { | ||
1181 | if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { | ||
1182 | netif_warn(efx, probe, efx->net_dev, | ||
1183 | "RSS disabled due to allocation failure\n"); | ||
1184 | return 1; | ||
1185 | } | ||
1180 | 1186 | ||
1181 | if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { | 1187 | count = 0; |
1182 | netif_warn(efx, probe, efx->net_dev, | 1188 | for_each_online_cpu(cpu) { |
1183 | "RSS disabled due to allocation failure\n"); | 1189 | if (!cpumask_test_cpu(cpu, thread_mask)) { |
1184 | return 1; | 1190 | ++count; |
1191 | cpumask_or(thread_mask, thread_mask, | ||
1192 | topology_thread_cpumask(cpu)); | ||
1193 | } | ||
1194 | } | ||
1195 | |||
1196 | free_cpumask_var(thread_mask); | ||
1185 | } | 1197 | } |
1186 | 1198 | ||
1187 | count = 0; | 1199 | /* If RSS is requested for the PF *and* VFs then we can't write RSS |
1188 | for_each_online_cpu(cpu) { | 1200 | * table entries that are inaccessible to VFs |
1189 | if (!cpumask_test_cpu(cpu, thread_mask)) { | 1201 | */ |
1190 | ++count; | 1202 | if (efx_sriov_wanted(efx) && efx_vf_size(efx) > 1 && |
1191 | cpumask_or(thread_mask, thread_mask, | 1203 | count > efx_vf_size(efx)) { |
1192 | topology_thread_cpumask(cpu)); | 1204 | netif_warn(efx, probe, efx->net_dev, |
1193 | } | 1205 | "Reducing number of RSS channels from %u to %u for " |
1206 | "VF support. Increase vf-msix-limit to use more " | ||
1207 | "channels on the PF.\n", | ||
1208 | count, efx_vf_size(efx)); | ||
1209 | count = efx_vf_size(efx); | ||
1194 | } | 1210 | } |
1195 | 1211 | ||
1196 | free_cpumask_var(thread_mask); | ||
1197 | return count; | 1212 | return count; |
1198 | } | 1213 | } |
1199 | 1214 | ||
@@ -1327,6 +1342,10 @@ static int efx_probe_interrupts(struct efx_nic *efx) | |||
1327 | } | 1342 | } |
1328 | } | 1343 | } |
1329 | 1344 | ||
1345 | /* RSS might be usable on VFs even if it is disabled on the PF */ | ||
1346 | efx->rss_spread = (efx->n_rx_channels > 1 ? | ||
1347 | efx->n_rx_channels : efx_vf_size(efx)); | ||
1348 | |||
1330 | return 0; | 1349 | return 0; |
1331 | } | 1350 | } |
1332 | 1351 | ||
@@ -1426,7 +1445,7 @@ static int efx_probe_nic(struct efx_nic *efx) | |||
1426 | get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key)); | 1445 | get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key)); |
1427 | for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++) | 1446 | for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++) |
1428 | efx->rx_indir_table[i] = | 1447 | efx->rx_indir_table[i] = |
1429 | ethtool_rxfh_indir_default(i, efx->n_rx_channels); | 1448 | ethtool_rxfh_indir_default(i, efx->rss_spread); |
1430 | 1449 | ||
1431 | efx_set_channels(efx); | 1450 | efx_set_channels(efx); |
1432 | netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); | 1451 | netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); |
@@ -1915,6 +1934,7 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data) | |||
1915 | } | 1934 | } |
1916 | 1935 | ||
1917 | memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len); | 1936 | memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len); |
1937 | efx_sriov_mac_address_changed(efx); | ||
1918 | 1938 | ||
1919 | /* Reconfigure the MAC */ | 1939 | /* Reconfigure the MAC */ |
1920 | mutex_lock(&efx->mac_lock); | 1940 | mutex_lock(&efx->mac_lock); |
@@ -1981,6 +2001,12 @@ static const struct net_device_ops efx_netdev_ops = { | |||
1981 | .ndo_set_mac_address = efx_set_mac_address, | 2001 | .ndo_set_mac_address = efx_set_mac_address, |
1982 | .ndo_set_rx_mode = efx_set_rx_mode, | 2002 | .ndo_set_rx_mode = efx_set_rx_mode, |
1983 | .ndo_set_features = efx_set_features, | 2003 | .ndo_set_features = efx_set_features, |
2004 | #ifdef CONFIG_SFC_SRIOV | ||
2005 | .ndo_set_vf_mac = efx_sriov_set_vf_mac, | ||
2006 | .ndo_set_vf_vlan = efx_sriov_set_vf_vlan, | ||
2007 | .ndo_set_vf_spoofchk = efx_sriov_set_vf_spoofchk, | ||
2008 | .ndo_get_vf_config = efx_sriov_get_vf_config, | ||
2009 | #endif | ||
1984 | #ifdef CONFIG_NET_POLL_CONTROLLER | 2010 | #ifdef CONFIG_NET_POLL_CONTROLLER |
1985 | .ndo_poll_controller = efx_netpoll, | 2011 | .ndo_poll_controller = efx_netpoll, |
1986 | #endif | 2012 | #endif |
@@ -2150,6 +2176,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) | |||
2150 | 2176 | ||
2151 | efx_start_interrupts(efx, false); | 2177 | efx_start_interrupts(efx, false); |
2152 | efx_restore_filters(efx); | 2178 | efx_restore_filters(efx); |
2179 | efx_sriov_reset(efx); | ||
2153 | 2180 | ||
2154 | mutex_unlock(&efx->mac_lock); | 2181 | mutex_unlock(&efx->mac_lock); |
2155 | 2182 | ||
@@ -2440,6 +2467,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev) | |||
2440 | rtnl_unlock(); | 2467 | rtnl_unlock(); |
2441 | 2468 | ||
2442 | efx_stop_interrupts(efx, false); | 2469 | efx_stop_interrupts(efx, false); |
2470 | efx_sriov_fini(efx); | ||
2443 | efx_unregister_netdev(efx); | 2471 | efx_unregister_netdev(efx); |
2444 | 2472 | ||
2445 | efx_mtd_remove(efx); | 2473 | efx_mtd_remove(efx); |
@@ -2581,6 +2609,11 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, | |||
2581 | if (rc) | 2609 | if (rc) |
2582 | goto fail4; | 2610 | goto fail4; |
2583 | 2611 | ||
2612 | rc = efx_sriov_init(efx); | ||
2613 | if (rc) | ||
2614 | netif_err(efx, probe, efx->net_dev, | ||
2615 | "SR-IOV can't be enabled rc %d\n", rc); | ||
2616 | |||
2584 | netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n"); | 2617 | netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n"); |
2585 | 2618 | ||
2586 | /* Try to create MTDs, but allow this to fail */ | 2619 | /* Try to create MTDs, but allow this to fail */ |
@@ -2732,6 +2765,10 @@ static int __init efx_init_module(void) | |||
2732 | if (rc) | 2765 | if (rc) |
2733 | goto err_notifier; | 2766 | goto err_notifier; |
2734 | 2767 | ||
2768 | rc = efx_init_sriov(); | ||
2769 | if (rc) | ||
2770 | goto err_sriov; | ||
2771 | |||
2735 | reset_workqueue = create_singlethread_workqueue("sfc_reset"); | 2772 | reset_workqueue = create_singlethread_workqueue("sfc_reset"); |
2736 | if (!reset_workqueue) { | 2773 | if (!reset_workqueue) { |
2737 | rc = -ENOMEM; | 2774 | rc = -ENOMEM; |
@@ -2747,6 +2784,8 @@ static int __init efx_init_module(void) | |||
2747 | err_pci: | 2784 | err_pci: |
2748 | destroy_workqueue(reset_workqueue); | 2785 | destroy_workqueue(reset_workqueue); |
2749 | err_reset: | 2786 | err_reset: |
2787 | efx_fini_sriov(); | ||
2788 | err_sriov: | ||
2750 | unregister_netdevice_notifier(&efx_netdev_notifier); | 2789 | unregister_netdevice_notifier(&efx_netdev_notifier); |
2751 | err_notifier: | 2790 | err_notifier: |
2752 | return rc; | 2791 | return rc; |
@@ -2758,6 +2797,7 @@ static void __exit efx_exit_module(void) | |||
2758 | 2797 | ||
2759 | pci_unregister_driver(&efx_pci_driver); | 2798 | pci_unregister_driver(&efx_pci_driver); |
2760 | destroy_workqueue(reset_workqueue); | 2799 | destroy_workqueue(reset_workqueue); |
2800 | efx_fini_sriov(); | ||
2761 | unregister_netdevice_notifier(&efx_netdev_notifier); | 2801 | unregister_netdevice_notifier(&efx_netdev_notifier); |
2762 | 2802 | ||
2763 | } | 2803 | } |
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 83191151b650..f22f45f515a8 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c | |||
@@ -1085,7 +1085,8 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev) | |||
1085 | { | 1085 | { |
1086 | struct efx_nic *efx = netdev_priv(net_dev); | 1086 | struct efx_nic *efx = netdev_priv(net_dev); |
1087 | 1087 | ||
1088 | return (efx_nic_rev(efx) < EFX_REV_FALCON_B0 ? | 1088 | return ((efx_nic_rev(efx) < EFX_REV_FALCON_B0 || |
1089 | efx->n_rx_channels == 1) ? | ||
1089 | 0 : ARRAY_SIZE(efx->rx_indir_table)); | 1090 | 0 : ARRAY_SIZE(efx->rx_indir_table)); |
1090 | } | 1091 | } |
1091 | 1092 | ||
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 619f63a66ce7..17b6463e459c 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c | |||
@@ -560,6 +560,9 @@ void efx_mcdi_process_event(struct efx_channel *channel, | |||
560 | case MCDI_EVENT_CODE_MAC_STATS_DMA: | 560 | case MCDI_EVENT_CODE_MAC_STATS_DMA: |
561 | /* MAC stats are gather lazily. We can ignore this. */ | 561 | /* MAC stats are gather lazily. We can ignore this. */ |
562 | break; | 562 | break; |
563 | case MCDI_EVENT_CODE_FLR: | ||
564 | efx_sriov_flr(efx, MCDI_EVENT_FIELD(*event, FLR_VF)); | ||
565 | break; | ||
563 | 566 | ||
564 | default: | 567 | default: |
565 | netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n", | 568 | netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n", |
@@ -1154,6 +1157,37 @@ fail: | |||
1154 | return rc; | 1157 | return rc; |
1155 | } | 1158 | } |
1156 | 1159 | ||
1160 | int efx_mcdi_flush_rxqs(struct efx_nic *efx) | ||
1161 | { | ||
1162 | struct efx_channel *channel; | ||
1163 | struct efx_rx_queue *rx_queue; | ||
1164 | __le32 *qid; | ||
1165 | int rc, count; | ||
1166 | |||
1167 | qid = kmalloc(EFX_MAX_CHANNELS * sizeof(*qid), GFP_KERNEL); | ||
1168 | if (qid == NULL) | ||
1169 | return -ENOMEM; | ||
1170 | |||
1171 | count = 0; | ||
1172 | efx_for_each_channel(channel, efx) { | ||
1173 | efx_for_each_channel_rx_queue(rx_queue, channel) { | ||
1174 | if (rx_queue->flush_pending) { | ||
1175 | rx_queue->flush_pending = false; | ||
1176 | atomic_dec(&efx->rxq_flush_pending); | ||
1177 | qid[count++] = cpu_to_le32( | ||
1178 | efx_rx_queue_index(rx_queue)); | ||
1179 | } | ||
1180 | } | ||
1181 | } | ||
1182 | |||
1183 | rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, (u8 *)qid, | ||
1184 | count * sizeof(*qid), NULL, 0, NULL); | ||
1185 | WARN_ON(rc > 0); | ||
1186 | |||
1187 | kfree(qid); | ||
1188 | |||
1189 | return rc; | ||
1190 | } | ||
1157 | 1191 | ||
1158 | int efx_mcdi_wol_filter_reset(struct efx_nic *efx) | 1192 | int efx_mcdi_wol_filter_reset(struct efx_nic *efx) |
1159 | { | 1193 | { |
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index fbaa6efcd744..0bdf3e331832 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h | |||
@@ -146,6 +146,8 @@ extern int efx_mcdi_wol_filter_set_magic(struct efx_nic *efx, | |||
146 | extern int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out); | 146 | extern int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out); |
147 | extern int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id); | 147 | extern int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id); |
148 | extern int efx_mcdi_wol_filter_reset(struct efx_nic *efx); | 148 | extern int efx_mcdi_wol_filter_reset(struct efx_nic *efx); |
149 | extern int efx_mcdi_flush_rxqs(struct efx_nic *efx); | ||
150 | extern int efx_mcdi_set_mac(struct efx_nic *efx); | ||
149 | extern int efx_mcdi_mac_stats(struct efx_nic *efx, dma_addr_t dma_addr, | 151 | extern int efx_mcdi_mac_stats(struct efx_nic *efx, dma_addr_t dma_addr, |
150 | u32 dma_len, int enable, int clear); | 152 | u32 dma_len, int enable, int clear); |
151 | extern int efx_mcdi_mac_reconfigure(struct efx_nic *efx); | 153 | extern int efx_mcdi_mac_reconfigure(struct efx_nic *efx); |
diff --git a/drivers/net/ethernet/sfc/mcdi_mac.c b/drivers/net/ethernet/sfc/mcdi_mac.c index 98afe1c1165d..1003f309cba7 100644 --- a/drivers/net/ethernet/sfc/mcdi_mac.c +++ b/drivers/net/ethernet/sfc/mcdi_mac.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include "mcdi.h" | 12 | #include "mcdi.h" |
13 | #include "mcdi_pcol.h" | 13 | #include "mcdi_pcol.h" |
14 | 14 | ||
15 | static int efx_mcdi_set_mac(struct efx_nic *efx) | 15 | int efx_mcdi_set_mac(struct efx_nic *efx) |
16 | { | 16 | { |
17 | u32 reject, fcntl; | 17 | u32 reject, fcntl; |
18 | u8 cmdbytes[MC_CMD_SET_MAC_IN_LEN]; | 18 | u8 cmdbytes[MC_CMD_SET_MAC_IN_LEN]; |
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 7870cefcb203..3fbec458c323 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/device.h> | 24 | #include <linux/device.h> |
25 | #include <linux/highmem.h> | 25 | #include <linux/highmem.h> |
26 | #include <linux/workqueue.h> | 26 | #include <linux/workqueue.h> |
27 | #include <linux/mutex.h> | ||
27 | #include <linux/vmalloc.h> | 28 | #include <linux/vmalloc.h> |
28 | #include <linux/i2c.h> | 29 | #include <linux/i2c.h> |
29 | 30 | ||
@@ -54,7 +55,8 @@ | |||
54 | 55 | ||
55 | #define EFX_MAX_CHANNELS 32U | 56 | #define EFX_MAX_CHANNELS 32U |
56 | #define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS | 57 | #define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS |
57 | #define EFX_MAX_EXTRA_CHANNELS 0U | 58 | #define EFX_EXTRA_CHANNEL_IOV 0 |
59 | #define EFX_MAX_EXTRA_CHANNELS 1U | ||
58 | 60 | ||
59 | /* Checksum generation is a per-queue option in hardware, so each | 61 | /* Checksum generation is a per-queue option in hardware, so each |
60 | * queue visible to the networking core is backed by two hardware TX | 62 | * queue visible to the networking core is backed by two hardware TX |
@@ -629,6 +631,8 @@ union efx_multicast_hash { | |||
629 | }; | 631 | }; |
630 | 632 | ||
631 | struct efx_filter_state; | 633 | struct efx_filter_state; |
634 | struct efx_vf; | ||
635 | struct vfdi_status; | ||
632 | 636 | ||
633 | /** | 637 | /** |
634 | * struct efx_nic - an Efx NIC | 638 | * struct efx_nic - an Efx NIC |
@@ -712,6 +716,17 @@ struct efx_filter_state; | |||
712 | * completed (either success or failure). Not used when MCDI is used to | 716 | * completed (either success or failure). Not used when MCDI is used to |
713 | * flush receive queues. | 717 | * flush receive queues. |
714 | * @flush_wq: wait queue used by efx_nic_flush_queues() to wait for flush completions. | 718 | * @flush_wq: wait queue used by efx_nic_flush_queues() to wait for flush completions. |
719 | * @vf: Array of &struct efx_vf objects. | ||
720 | * @vf_count: Number of VFs intended to be enabled. | ||
721 | * @vf_init_count: Number of VFs that have been fully initialised. | ||
722 | * @vi_scale: log2 number of vnics per VF. | ||
723 | * @vf_buftbl_base: The zeroth buffer table index used to back VF queues. | ||
724 | * @vfdi_status: Common VFDI status page to be dmad to VF address space. | ||
725 | * @local_addr_list: List of local addresses. Protected by %local_lock. | ||
726 | * @local_page_list: List of DMA addressable pages used to broadcast | ||
727 | * %local_addr_list. Protected by %local_lock. | ||
728 | * @local_lock: Mutex protecting %local_addr_list and %local_page_list. | ||
729 | * @peer_work: Work item to broadcast peer addresses to VMs. | ||
715 | * @monitor_work: Hardware monitor workitem | 730 | * @monitor_work: Hardware monitor workitem |
716 | * @biu_lock: BIU (bus interface unit) lock | 731 | * @biu_lock: BIU (bus interface unit) lock |
717 | * @last_irq_cpu: Last CPU to handle a possible test interrupt. This | 732 | * @last_irq_cpu: Last CPU to handle a possible test interrupt. This |
@@ -762,6 +777,7 @@ struct efx_nic { | |||
762 | unsigned next_buffer_table; | 777 | unsigned next_buffer_table; |
763 | unsigned n_channels; | 778 | unsigned n_channels; |
764 | unsigned n_rx_channels; | 779 | unsigned n_rx_channels; |
780 | unsigned rss_spread; | ||
765 | unsigned tx_channel_offset; | 781 | unsigned tx_channel_offset; |
766 | unsigned n_tx_channels; | 782 | unsigned n_tx_channels; |
767 | unsigned int rx_buffer_len; | 783 | unsigned int rx_buffer_len; |
@@ -820,6 +836,20 @@ struct efx_nic { | |||
820 | atomic_t rxq_flush_outstanding; | 836 | atomic_t rxq_flush_outstanding; |
821 | wait_queue_head_t flush_wq; | 837 | wait_queue_head_t flush_wq; |
822 | 838 | ||
839 | #ifdef CONFIG_SFC_SRIOV | ||
840 | struct efx_channel *vfdi_channel; | ||
841 | struct efx_vf *vf; | ||
842 | unsigned vf_count; | ||
843 | unsigned vf_init_count; | ||
844 | unsigned vi_scale; | ||
845 | unsigned vf_buftbl_base; | ||
846 | struct efx_buffer vfdi_status; | ||
847 | struct list_head local_addr_list; | ||
848 | struct list_head local_page_list; | ||
849 | struct mutex local_lock; | ||
850 | struct work_struct peer_work; | ||
851 | #endif | ||
852 | |||
823 | /* The following fields may be written more often */ | 853 | /* The following fields may be written more often */ |
824 | 854 | ||
825 | struct delayed_work monitor_work ____cacheline_aligned_in_smp; | 855 | struct delayed_work monitor_work ____cacheline_aligned_in_smp; |
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index 747cf9439164..2bf4283f05fe 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c | |||
@@ -264,6 +264,10 @@ static int efx_alloc_special_buffer(struct efx_nic *efx, | |||
264 | /* Select new buffer ID */ | 264 | /* Select new buffer ID */ |
265 | buffer->index = efx->next_buffer_table; | 265 | buffer->index = efx->next_buffer_table; |
266 | efx->next_buffer_table += buffer->entries; | 266 | efx->next_buffer_table += buffer->entries; |
267 | #ifdef CONFIG_SFC_SRIOV | ||
268 | BUG_ON(efx_sriov_enabled(efx) && | ||
269 | efx->vf_buftbl_base < efx->next_buffer_table); | ||
270 | #endif | ||
267 | 271 | ||
268 | netif_dbg(efx, probe, efx->net_dev, | 272 | netif_dbg(efx, probe, efx->net_dev, |
269 | "allocating special buffers %d-%d at %llx+%x " | 273 | "allocating special buffers %d-%d at %llx+%x " |
@@ -693,6 +697,16 @@ int efx_nic_flush_queues(struct efx_nic *efx) | |||
693 | } | 697 | } |
694 | 698 | ||
695 | while (timeout && atomic_read(&efx->drain_pending) > 0) { | 699 | while (timeout && atomic_read(&efx->drain_pending) > 0) { |
700 | /* If SRIOV is enabled, then offload receive queue flushing to | ||
701 | * the firmware (though we will still have to poll for | ||
702 | * completion). If that fails, fall back to the old scheme. | ||
703 | */ | ||
704 | if (efx_sriov_enabled(efx)) { | ||
705 | rc = efx_mcdi_flush_rxqs(efx); | ||
706 | if (!rc) | ||
707 | goto wait; | ||
708 | } | ||
709 | |||
696 | /* The hardware supports four concurrent rx flushes, each of | 710 | /* The hardware supports four concurrent rx flushes, each of |
697 | * which may need to be retried if there is an outstanding | 711 | * which may need to be retried if there is an outstanding |
698 | * descriptor fetch | 712 | * descriptor fetch |
@@ -712,6 +726,7 @@ int efx_nic_flush_queues(struct efx_nic *efx) | |||
712 | } | 726 | } |
713 | } | 727 | } |
714 | 728 | ||
729 | wait: | ||
715 | timeout = wait_event_timeout(efx->flush_wq, efx_flush_wake(efx), | 730 | timeout = wait_event_timeout(efx->flush_wq, efx_flush_wake(efx), |
716 | timeout); | 731 | timeout); |
717 | } | 732 | } |
@@ -1102,11 +1117,13 @@ efx_handle_driver_event(struct efx_channel *channel, efx_qword_t *event) | |||
1102 | netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n", | 1117 | netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n", |
1103 | channel->channel, ev_sub_data); | 1118 | channel->channel, ev_sub_data); |
1104 | efx_handle_tx_flush_done(efx, event); | 1119 | efx_handle_tx_flush_done(efx, event); |
1120 | efx_sriov_tx_flush_done(efx, event); | ||
1105 | break; | 1121 | break; |
1106 | case FSE_AZ_RX_DESCQ_FLS_DONE_EV: | 1122 | case FSE_AZ_RX_DESCQ_FLS_DONE_EV: |
1107 | netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n", | 1123 | netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n", |
1108 | channel->channel, ev_sub_data); | 1124 | channel->channel, ev_sub_data); |
1109 | efx_handle_rx_flush_done(efx, event); | 1125 | efx_handle_rx_flush_done(efx, event); |
1126 | efx_sriov_rx_flush_done(efx, event); | ||
1110 | break; | 1127 | break; |
1111 | case FSE_AZ_EVQ_INIT_DONE_EV: | 1128 | case FSE_AZ_EVQ_INIT_DONE_EV: |
1112 | netif_dbg(efx, hw, efx->net_dev, | 1129 | netif_dbg(efx, hw, efx->net_dev, |
@@ -1138,16 +1155,24 @@ efx_handle_driver_event(struct efx_channel *channel, efx_qword_t *event) | |||
1138 | RESET_TYPE_DISABLE); | 1155 | RESET_TYPE_DISABLE); |
1139 | break; | 1156 | break; |
1140 | case FSE_BZ_RX_DSC_ERROR_EV: | 1157 | case FSE_BZ_RX_DSC_ERROR_EV: |
1141 | netif_err(efx, rx_err, efx->net_dev, | 1158 | if (ev_sub_data < EFX_VI_BASE) { |
1142 | "RX DMA Q %d reports descriptor fetch error." | 1159 | netif_err(efx, rx_err, efx->net_dev, |
1143 | " RX Q %d is disabled.\n", ev_sub_data, ev_sub_data); | 1160 | "RX DMA Q %d reports descriptor fetch error." |
1144 | efx_schedule_reset(efx, RESET_TYPE_RX_DESC_FETCH); | 1161 | " RX Q %d is disabled.\n", ev_sub_data, |
1162 | ev_sub_data); | ||
1163 | efx_schedule_reset(efx, RESET_TYPE_RX_DESC_FETCH); | ||
1164 | } else | ||
1165 | efx_sriov_desc_fetch_err(efx, ev_sub_data); | ||
1145 | break; | 1166 | break; |
1146 | case FSE_BZ_TX_DSC_ERROR_EV: | 1167 | case FSE_BZ_TX_DSC_ERROR_EV: |
1147 | netif_err(efx, tx_err, efx->net_dev, | 1168 | if (ev_sub_data < EFX_VI_BASE) { |
1148 | "TX DMA Q %d reports descriptor fetch error." | 1169 | netif_err(efx, tx_err, efx->net_dev, |
1149 | " TX Q %d is disabled.\n", ev_sub_data, ev_sub_data); | 1170 | "TX DMA Q %d reports descriptor fetch error." |
1150 | efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH); | 1171 | " TX Q %d is disabled.\n", ev_sub_data, |
1172 | ev_sub_data); | ||
1173 | efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH); | ||
1174 | } else | ||
1175 | efx_sriov_desc_fetch_err(efx, ev_sub_data); | ||
1151 | break; | 1176 | break; |
1152 | default: | 1177 | default: |
1153 | netif_vdbg(efx, hw, efx->net_dev, | 1178 | netif_vdbg(efx, hw, efx->net_dev, |
@@ -1207,6 +1232,9 @@ int efx_nic_process_eventq(struct efx_channel *channel, int budget) | |||
1207 | case FSE_AZ_EV_CODE_DRIVER_EV: | 1232 | case FSE_AZ_EV_CODE_DRIVER_EV: |
1208 | efx_handle_driver_event(channel, &event); | 1233 | efx_handle_driver_event(channel, &event); |
1209 | break; | 1234 | break; |
1235 | case FSE_CZ_EV_CODE_USER_EV: | ||
1236 | efx_sriov_event(channel, &event); | ||
1237 | break; | ||
1210 | case FSE_CZ_EV_CODE_MCDI_EV: | 1238 | case FSE_CZ_EV_CODE_MCDI_EV: |
1211 | efx_mcdi_process_event(channel, &event); | 1239 | efx_mcdi_process_event(channel, &event); |
1212 | break; | 1240 | break; |
@@ -1609,6 +1637,15 @@ void efx_nic_fini_interrupt(struct efx_nic *efx) | |||
1609 | free_irq(efx->legacy_irq, efx); | 1637 | free_irq(efx->legacy_irq, efx); |
1610 | } | 1638 | } |
1611 | 1639 | ||
1640 | /* Looks at available SRAM resources and works out how many queues we | ||
1641 | * can support, and where things like descriptor caches should live. | ||
1642 | * | ||
1643 | * SRAM is split up as follows: | ||
1644 | * 0 buftbl entries for channels | ||
1645 | * efx->vf_buftbl_base buftbl entries for SR-IOV | ||
1646 | * efx->rx_dc_base RX descriptor caches | ||
1647 | * efx->tx_dc_base TX descriptor caches | ||
1648 | */ | ||
1612 | void efx_nic_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw) | 1649 | void efx_nic_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw) |
1613 | { | 1650 | { |
1614 | unsigned vi_count, buftbl_min; | 1651 | unsigned vi_count, buftbl_min; |
@@ -1622,6 +1659,32 @@ void efx_nic_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw) | |||
1622 | * sizeof(efx_qword_t) / EFX_BUF_SIZE); | 1659 | * sizeof(efx_qword_t) / EFX_BUF_SIZE); |
1623 | vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); | 1660 | vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); |
1624 | 1661 | ||
1662 | #ifdef CONFIG_SFC_SRIOV | ||
1663 | if (efx_sriov_wanted(efx)) { | ||
1664 | unsigned vi_dc_entries, buftbl_free, entries_per_vf, vf_limit; | ||
1665 | |||
1666 | efx->vf_buftbl_base = buftbl_min; | ||
1667 | |||
1668 | vi_dc_entries = RX_DC_ENTRIES + TX_DC_ENTRIES; | ||
1669 | vi_count = max(vi_count, EFX_VI_BASE); | ||
1670 | buftbl_free = (sram_lim_qw - buftbl_min - | ||
1671 | vi_count * vi_dc_entries); | ||
1672 | |||
1673 | entries_per_vf = ((vi_dc_entries + EFX_VF_BUFTBL_PER_VI) * | ||
1674 | efx_vf_size(efx)); | ||
1675 | vf_limit = min(buftbl_free / entries_per_vf, | ||
1676 | (1024U - EFX_VI_BASE) >> efx->vi_scale); | ||
1677 | |||
1678 | if (efx->vf_count > vf_limit) { | ||
1679 | netif_err(efx, probe, efx->net_dev, | ||
1680 | "Reducing VF count from from %d to %d\n", | ||
1681 | efx->vf_count, vf_limit); | ||
1682 | efx->vf_count = vf_limit; | ||
1683 | } | ||
1684 | vi_count += efx->vf_count * efx_vf_size(efx); | ||
1685 | } | ||
1686 | #endif | ||
1687 | |||
1625 | efx->tx_dc_base = sram_lim_qw - vi_count * TX_DC_ENTRIES; | 1688 | efx->tx_dc_base = sram_lim_qw - vi_count * TX_DC_ENTRIES; |
1626 | efx->rx_dc_base = efx->tx_dc_base - vi_count * RX_DC_ENTRIES; | 1689 | efx->rx_dc_base = efx->tx_dc_base - vi_count * RX_DC_ENTRIES; |
1627 | } | 1690 | } |
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 5df7da8b8ebf..246c4140453c 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h | |||
@@ -169,6 +169,95 @@ static inline struct efx_mcdi_mon *efx_mcdi_mon(struct efx_nic *efx) | |||
169 | } | 169 | } |
170 | #endif | 170 | #endif |
171 | 171 | ||
172 | /* | ||
173 | * On the SFC9000 family each port is associated with 1 PCI physical | ||
174 | * function (PF) handled by sfc and a configurable number of virtual | ||
175 | * functions (VFs) that may be handled by some other driver, often in | ||
176 | * a VM guest. The queue pointer registers are mapped in both PF and | ||
177 | * VF BARs such that an 8K region provides access to a single RX, TX | ||
178 | * and event queue (collectively a Virtual Interface, VI or VNIC). | ||
179 | * | ||
180 | * The PF has access to all 1024 VIs while VFs are mapped to VIs | ||
181 | * according to VI_BASE and VI_SCALE: VF i has access to VIs numbered | ||
182 | * in range [VI_BASE + i << VI_SCALE, VI_BASE + i + 1 << VI_SCALE). | ||
183 | * The number of VIs and the VI_SCALE value are configurable but must | ||
184 | * be established at boot time by firmware. | ||
185 | */ | ||
186 | |||
187 | /* Maximum VI_SCALE parameter supported by Siena */ | ||
188 | #define EFX_VI_SCALE_MAX 6 | ||
189 | /* Base VI to use for SR-IOV. Must be aligned to (1 << EFX_VI_SCALE_MAX), | ||
190 | * so this is the smallest allowed value. */ | ||
191 | #define EFX_VI_BASE 128U | ||
192 | /* Maximum number of VFs allowed */ | ||
193 | #define EFX_VF_COUNT_MAX 127 | ||
194 | /* Limit EVQs on VFs to be only 8k to reduce buffer table reservation */ | ||
195 | #define EFX_MAX_VF_EVQ_SIZE 8192UL | ||
196 | /* The number of buffer table entries reserved for each VI on a VF */ | ||
197 | #define EFX_VF_BUFTBL_PER_VI \ | ||
198 | ((EFX_MAX_VF_EVQ_SIZE + 2 * EFX_MAX_DMAQ_SIZE) * \ | ||
199 | sizeof(efx_qword_t) / EFX_BUF_SIZE) | ||
200 | |||
201 | #ifdef CONFIG_SFC_SRIOV | ||
202 | |||
203 | static inline bool efx_sriov_wanted(struct efx_nic *efx) | ||
204 | { | ||
205 | return efx->vf_count != 0; | ||
206 | } | ||
207 | static inline bool efx_sriov_enabled(struct efx_nic *efx) | ||
208 | { | ||
209 | return efx->vf_init_count != 0; | ||
210 | } | ||
211 | static inline unsigned int efx_vf_size(struct efx_nic *efx) | ||
212 | { | ||
213 | return 1 << efx->vi_scale; | ||
214 | } | ||
215 | |||
216 | extern int efx_init_sriov(void); | ||
217 | extern void efx_sriov_probe(struct efx_nic *efx); | ||
218 | extern int efx_sriov_init(struct efx_nic *efx); | ||
219 | extern void efx_sriov_mac_address_changed(struct efx_nic *efx); | ||
220 | extern void efx_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event); | ||
221 | extern void efx_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event); | ||
222 | extern void efx_sriov_event(struct efx_channel *channel, efx_qword_t *event); | ||
223 | extern void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq); | ||
224 | extern void efx_sriov_flr(struct efx_nic *efx, unsigned flr); | ||
225 | extern void efx_sriov_reset(struct efx_nic *efx); | ||
226 | extern void efx_sriov_fini(struct efx_nic *efx); | ||
227 | extern void efx_fini_sriov(void); | ||
228 | |||
229 | #else | ||
230 | |||
231 | static inline bool efx_sriov_wanted(struct efx_nic *efx) { return false; } | ||
232 | static inline bool efx_sriov_enabled(struct efx_nic *efx) { return false; } | ||
233 | static inline unsigned int efx_vf_size(struct efx_nic *efx) { return 0; } | ||
234 | |||
235 | static inline int efx_init_sriov(void) { return 0; } | ||
236 | static inline void efx_sriov_probe(struct efx_nic *efx) {} | ||
237 | static inline int efx_sriov_init(struct efx_nic *efx) { return -EOPNOTSUPP; } | ||
238 | static inline void efx_sriov_mac_address_changed(struct efx_nic *efx) {} | ||
239 | static inline void efx_sriov_tx_flush_done(struct efx_nic *efx, | ||
240 | efx_qword_t *event) {} | ||
241 | static inline void efx_sriov_rx_flush_done(struct efx_nic *efx, | ||
242 | efx_qword_t *event) {} | ||
243 | static inline void efx_sriov_event(struct efx_channel *channel, | ||
244 | efx_qword_t *event) {} | ||
245 | static inline void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq) {} | ||
246 | static inline void efx_sriov_flr(struct efx_nic *efx, unsigned flr) {} | ||
247 | static inline void efx_sriov_reset(struct efx_nic *efx) {} | ||
248 | static inline void efx_sriov_fini(struct efx_nic *efx) {} | ||
249 | static inline void efx_fini_sriov(void) {} | ||
250 | |||
251 | #endif | ||
252 | |||
253 | extern int efx_sriov_set_vf_mac(struct net_device *dev, int vf, u8 *mac); | ||
254 | extern int efx_sriov_set_vf_vlan(struct net_device *dev, int vf, | ||
255 | u16 vlan, u8 qos); | ||
256 | extern int efx_sriov_get_vf_config(struct net_device *dev, int vf, | ||
257 | struct ifla_vf_info *ivf); | ||
258 | extern int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf, | ||
259 | bool spoofchk); | ||
260 | |||
172 | extern const struct efx_nic_type falcon_a1_nic_type; | 261 | extern const struct efx_nic_type falcon_a1_nic_type; |
173 | extern const struct efx_nic_type falcon_b0_nic_type; | 262 | extern const struct efx_nic_type falcon_b0_nic_type; |
174 | extern const struct efx_nic_type siena_a0_nic_type; | 263 | extern const struct efx_nic_type siena_a0_nic_type; |
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 657f3fa93bcf..7bea79017a05 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c | |||
@@ -313,6 +313,8 @@ static int siena_probe_nic(struct efx_nic *efx) | |||
313 | if (rc) | 313 | if (rc) |
314 | goto fail5; | 314 | goto fail5; |
315 | 315 | ||
316 | efx_sriov_probe(efx); | ||
317 | |||
316 | return 0; | 318 | return 0; |
317 | 319 | ||
318 | fail5: | 320 | fail5: |
diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c new file mode 100644 index 000000000000..5c6839ec3a83 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena_sriov.c | |||
@@ -0,0 +1,1642 @@ | |||
1 | /**************************************************************************** | ||
2 | * Driver for Solarflare Solarstorm network controllers and boards | ||
3 | * Copyright 2010-2011 Solarflare Communications Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published | ||
7 | * by the Free Software Foundation, incorporated herein by reference. | ||
8 | */ | ||
9 | #include <linux/pci.h> | ||
10 | #include <linux/module.h> | ||
11 | #include "net_driver.h" | ||
12 | #include "efx.h" | ||
13 | #include "nic.h" | ||
14 | #include "io.h" | ||
15 | #include "mcdi.h" | ||
16 | #include "filter.h" | ||
17 | #include "mcdi_pcol.h" | ||
18 | #include "regs.h" | ||
19 | #include "vfdi.h" | ||
20 | |||
21 | /* Number of longs required to track all the VIs in a VF */ | ||
22 | #define VI_MASK_LENGTH BITS_TO_LONGS(1 << EFX_VI_SCALE_MAX) | ||
23 | |||
24 | /** | ||
25 | * enum efx_vf_tx_filter_mode - TX MAC filtering behaviour | ||
26 | * @VF_TX_FILTER_OFF: Disabled | ||
27 | * @VF_TX_FILTER_AUTO: Enabled if MAC address assigned to VF and only | ||
28 | * 2 TX queues allowed per VF. | ||
29 | * @VF_TX_FILTER_ON: Enabled | ||
30 | */ | ||
31 | enum efx_vf_tx_filter_mode { | ||
32 | VF_TX_FILTER_OFF, | ||
33 | VF_TX_FILTER_AUTO, | ||
34 | VF_TX_FILTER_ON, | ||
35 | }; | ||
36 | |||
37 | /** | ||
38 | * struct efx_vf - Back-end resource and protocol state for a PCI VF | ||
39 | * @efx: The Efx NIC owning this VF | ||
40 | * @pci_rid: The PCI requester ID for this VF | ||
41 | * @pci_name: The PCI name (formatted address) of this VF | ||
42 | * @index: Index of VF within its port and PF. | ||
43 | * @req: VFDI incoming request work item. Incoming USR_EV events are received | ||
44 | * by the NAPI handler, but must be handled by executing MCDI requests | ||
45 | * inside a work item. | ||
46 | * @req_addr: VFDI incoming request DMA address (in VF's PCI address space). | ||
47 | * @req_type: Expected next incoming (from VF) %VFDI_EV_TYPE member. | ||
48 | * @req_seqno: Expected next incoming (from VF) %VFDI_EV_SEQ member. | ||
49 | * @msg_seqno: Next %VFDI_EV_SEQ member to reply to VF. Protected by | ||
50 | * @status_lock | ||
51 | * @busy: VFDI request queued to be processed or being processed. Receiving | ||
52 | * a VFDI request when @busy is set is an error condition. | ||
53 | * @buf: Incoming VFDI requests are DMA from the VF into this buffer. | ||
54 | * @buftbl_base: Buffer table entries for this VF start at this index. | ||
55 | * @rx_filtering: Receive filtering has been requested by the VF driver. | ||
56 | * @rx_filter_flags: The flags sent in the %VFDI_OP_INSERT_FILTER request. | ||
57 | * @rx_filter_qid: VF relative qid for RX filter requested by VF. | ||
58 | * @rx_filter_id: Receive MAC filter ID. Only one filter per VF is supported. | ||
59 | * @tx_filter_mode: Transmit MAC filtering mode. | ||
60 | * @tx_filter_id: Transmit MAC filter ID. | ||
61 | * @addr: The MAC address and outer vlan tag of the VF. | ||
62 | * @status_addr: VF DMA address of page for &struct vfdi_status updates. | ||
63 | * @status_lock: Mutex protecting @msg_seqno, @status_addr, @addr, | ||
64 | * @peer_page_addrs and @peer_page_count from simultaneous | ||
65 | * updates by the VM and consumption by | ||
66 | * efx_sriov_update_vf_addr() | ||
67 | * @peer_page_addrs: Pointer to an array of guest pages for local addresses. | ||
68 | * @peer_page_count: Number of entries in @peer_page_count. | ||
69 | * @evq0_addrs: Array of guest pages backing evq0. | ||
70 | * @evq0_count: Number of entries in @evq0_addrs. | ||
71 | * @flush_waitq: wait queue used by %VFDI_OP_FINI_ALL_QUEUES handler | ||
72 | * to wait for flush completions. | ||
73 | * @txq_lock: Mutex for TX queue allocation. | ||
74 | * @txq_mask: Mask of initialized transmit queues. | ||
75 | * @txq_count: Number of initialized transmit queues. | ||
76 | * @rxq_mask: Mask of initialized receive queues. | ||
77 | * @rxq_count: Number of initialized receive queues. | ||
78 | * @rxq_retry_mask: Mask or receive queues that need to be flushed again | ||
79 | * due to flush failure. | ||
80 | * @rxq_retry_count: Number of receive queues in @rxq_retry_mask. | ||
81 | * @reset_work: Work item to schedule a VF reset. | ||
82 | */ | ||
83 | struct efx_vf { | ||
84 | struct efx_nic *efx; | ||
85 | unsigned int pci_rid; | ||
86 | char pci_name[13]; /* dddd:bb:dd.f */ | ||
87 | unsigned int index; | ||
88 | struct work_struct req; | ||
89 | u64 req_addr; | ||
90 | int req_type; | ||
91 | unsigned req_seqno; | ||
92 | unsigned msg_seqno; | ||
93 | bool busy; | ||
94 | struct efx_buffer buf; | ||
95 | unsigned buftbl_base; | ||
96 | bool rx_filtering; | ||
97 | enum efx_filter_flags rx_filter_flags; | ||
98 | unsigned rx_filter_qid; | ||
99 | int rx_filter_id; | ||
100 | enum efx_vf_tx_filter_mode tx_filter_mode; | ||
101 | int tx_filter_id; | ||
102 | struct vfdi_endpoint addr; | ||
103 | u64 status_addr; | ||
104 | struct mutex status_lock; | ||
105 | u64 *peer_page_addrs; | ||
106 | unsigned peer_page_count; | ||
107 | u64 evq0_addrs[EFX_MAX_VF_EVQ_SIZE * sizeof(efx_qword_t) / | ||
108 | EFX_BUF_SIZE]; | ||
109 | unsigned evq0_count; | ||
110 | wait_queue_head_t flush_waitq; | ||
111 | struct mutex txq_lock; | ||
112 | unsigned long txq_mask[VI_MASK_LENGTH]; | ||
113 | unsigned txq_count; | ||
114 | unsigned long rxq_mask[VI_MASK_LENGTH]; | ||
115 | unsigned rxq_count; | ||
116 | unsigned long rxq_retry_mask[VI_MASK_LENGTH]; | ||
117 | atomic_t rxq_retry_count; | ||
118 | struct work_struct reset_work; | ||
119 | }; | ||
120 | |||
121 | struct efx_memcpy_req { | ||
122 | unsigned int from_rid; | ||
123 | void *from_buf; | ||
124 | u64 from_addr; | ||
125 | unsigned int to_rid; | ||
126 | u64 to_addr; | ||
127 | unsigned length; | ||
128 | }; | ||
129 | |||
130 | /** | ||
131 | * struct efx_local_addr - A MAC address on the vswitch without a VF. | ||
132 | * | ||
133 | * Siena does not have a switch, so VFs can't transmit data to each | ||
134 | * other. Instead the VFs must be made aware of the local addresses | ||
135 | * on the vswitch, so that they can arrange for an alternative | ||
136 | * software datapath to be used. | ||
137 | * | ||
138 | * @link: List head for insertion into efx->local_addr_list. | ||
139 | * @addr: Ethernet address | ||
140 | */ | ||
141 | struct efx_local_addr { | ||
142 | struct list_head link; | ||
143 | u8 addr[ETH_ALEN]; | ||
144 | }; | ||
145 | |||
146 | /** | ||
147 | * struct efx_endpoint_page - Page of vfdi_endpoint structures | ||
148 | * | ||
149 | * @link: List head for insertion into efx->local_page_list. | ||
150 | * @ptr: Pointer to page. | ||
151 | * @addr: DMA address of page. | ||
152 | */ | ||
153 | struct efx_endpoint_page { | ||
154 | struct list_head link; | ||
155 | void *ptr; | ||
156 | dma_addr_t addr; | ||
157 | }; | ||
158 | |||
159 | /* Buffer table entries are reserved txq0,rxq0,evq0,txq1,rxq1,evq1 */ | ||
160 | #define EFX_BUFTBL_TXQ_BASE(_vf, _qid) \ | ||
161 | ((_vf)->buftbl_base + EFX_VF_BUFTBL_PER_VI * (_qid)) | ||
162 | #define EFX_BUFTBL_RXQ_BASE(_vf, _qid) \ | ||
163 | (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \ | ||
164 | (EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE)) | ||
165 | #define EFX_BUFTBL_EVQ_BASE(_vf, _qid) \ | ||
166 | (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \ | ||
167 | (2 * EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE)) | ||
168 | |||
169 | #define EFX_FIELD_MASK(_field) \ | ||
170 | ((1 << _field ## _WIDTH) - 1) | ||
171 | |||
172 | /* VFs can only use this many transmit channels */ | ||
173 | static unsigned int vf_max_tx_channels = 2; | ||
174 | module_param(vf_max_tx_channels, uint, 0444); | ||
175 | MODULE_PARM_DESC(vf_max_tx_channels, | ||
176 | "Limit the number of TX channels VFs can use"); | ||
177 | |||
178 | static int max_vfs = -1; | ||
179 | module_param(max_vfs, int, 0444); | ||
180 | MODULE_PARM_DESC(max_vfs, | ||
181 | "Reduce the number of VFs initialized by the driver"); | ||
182 | |||
183 | /* Workqueue used by VFDI communication. We can't use the global | ||
184 | * workqueue because it may be running the VF driver's probe() | ||
185 | * routine, which will be blocked there waiting for a VFDI response. | ||
186 | */ | ||
187 | static struct workqueue_struct *vfdi_workqueue; | ||
188 | |||
189 | static unsigned abs_index(struct efx_vf *vf, unsigned index) | ||
190 | { | ||
191 | return EFX_VI_BASE + vf->index * efx_vf_size(vf->efx) + index; | ||
192 | } | ||
193 | |||
194 | static int efx_sriov_cmd(struct efx_nic *efx, bool enable, | ||
195 | unsigned *vi_scale_out, unsigned *vf_total_out) | ||
196 | { | ||
197 | u8 inbuf[MC_CMD_SRIOV_IN_LEN]; | ||
198 | u8 outbuf[MC_CMD_SRIOV_OUT_LEN]; | ||
199 | unsigned vi_scale, vf_total; | ||
200 | size_t outlen; | ||
201 | int rc; | ||
202 | |||
203 | MCDI_SET_DWORD(inbuf, SRIOV_IN_ENABLE, enable ? 1 : 0); | ||
204 | MCDI_SET_DWORD(inbuf, SRIOV_IN_VI_BASE, EFX_VI_BASE); | ||
205 | MCDI_SET_DWORD(inbuf, SRIOV_IN_VF_COUNT, efx->vf_count); | ||
206 | |||
207 | rc = efx_mcdi_rpc(efx, MC_CMD_SRIOV, inbuf, MC_CMD_SRIOV_IN_LEN, | ||
208 | outbuf, MC_CMD_SRIOV_OUT_LEN, &outlen); | ||
209 | if (rc) | ||
210 | return rc; | ||
211 | if (outlen < MC_CMD_SRIOV_OUT_LEN) | ||
212 | return -EIO; | ||
213 | |||
214 | vf_total = MCDI_DWORD(outbuf, SRIOV_OUT_VF_TOTAL); | ||
215 | vi_scale = MCDI_DWORD(outbuf, SRIOV_OUT_VI_SCALE); | ||
216 | if (vi_scale > EFX_VI_SCALE_MAX) | ||
217 | return -EOPNOTSUPP; | ||
218 | |||
219 | if (vi_scale_out) | ||
220 | *vi_scale_out = vi_scale; | ||
221 | if (vf_total_out) | ||
222 | *vf_total_out = vf_total; | ||
223 | |||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | static void efx_sriov_usrev(struct efx_nic *efx, bool enabled) | ||
228 | { | ||
229 | efx_oword_t reg; | ||
230 | |||
231 | EFX_POPULATE_OWORD_2(reg, | ||
232 | FRF_CZ_USREV_DIS, enabled ? 0 : 1, | ||
233 | FRF_CZ_DFLT_EVQ, efx->vfdi_channel->channel); | ||
234 | efx_writeo(efx, ®, FR_CZ_USR_EV_CFG); | ||
235 | } | ||
236 | |||
237 | static int efx_sriov_memcpy(struct efx_nic *efx, struct efx_memcpy_req *req, | ||
238 | unsigned int count) | ||
239 | { | ||
240 | u8 *inbuf, *record; | ||
241 | unsigned int used; | ||
242 | u32 from_rid, from_hi, from_lo; | ||
243 | int rc; | ||
244 | |||
245 | mb(); /* Finish writing source/reading dest before DMA starts */ | ||
246 | |||
247 | used = MC_CMD_MEMCPY_IN_LEN(count); | ||
248 | if (WARN_ON(used > MCDI_CTL_SDU_LEN_MAX)) | ||
249 | return -ENOBUFS; | ||
250 | |||
251 | /* Allocate room for the largest request */ | ||
252 | inbuf = kzalloc(MCDI_CTL_SDU_LEN_MAX, GFP_KERNEL); | ||
253 | if (inbuf == NULL) | ||
254 | return -ENOMEM; | ||
255 | |||
256 | record = inbuf; | ||
257 | MCDI_SET_DWORD(record, MEMCPY_IN_RECORD, count); | ||
258 | while (count-- > 0) { | ||
259 | MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_RID, | ||
260 | req->to_rid); | ||
261 | MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_ADDR_LO, | ||
262 | (u32)req->to_addr); | ||
263 | MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_ADDR_HI, | ||
264 | (u32)(req->to_addr >> 32)); | ||
265 | if (req->from_buf == NULL) { | ||
266 | from_rid = req->from_rid; | ||
267 | from_lo = (u32)req->from_addr; | ||
268 | from_hi = (u32)(req->from_addr >> 32); | ||
269 | } else { | ||
270 | if (WARN_ON(used + req->length > MCDI_CTL_SDU_LEN_MAX)) { | ||
271 | rc = -ENOBUFS; | ||
272 | goto out; | ||
273 | } | ||
274 | |||
275 | from_rid = MC_CMD_MEMCPY_RECORD_TYPEDEF_RID_INLINE; | ||
276 | from_lo = used; | ||
277 | from_hi = 0; | ||
278 | memcpy(inbuf + used, req->from_buf, req->length); | ||
279 | used += req->length; | ||
280 | } | ||
281 | |||
282 | MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_RID, from_rid); | ||
283 | MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_ADDR_LO, | ||
284 | from_lo); | ||
285 | MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_ADDR_HI, | ||
286 | from_hi); | ||
287 | MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_LENGTH, | ||
288 | req->length); | ||
289 | |||
290 | ++req; | ||
291 | record += MC_CMD_MEMCPY_IN_RECORD_LEN; | ||
292 | } | ||
293 | |||
294 | rc = efx_mcdi_rpc(efx, MC_CMD_MEMCPY, inbuf, used, NULL, 0, NULL); | ||
295 | out: | ||
296 | kfree(inbuf); | ||
297 | |||
298 | mb(); /* Don't write source/read dest before DMA is complete */ | ||
299 | |||
300 | return rc; | ||
301 | } | ||
302 | |||
303 | /* The TX filter is entirely controlled by this driver, and is modified | ||
304 | * underneath the feet of the VF | ||
305 | */ | ||
306 | static void efx_sriov_reset_tx_filter(struct efx_vf *vf) | ||
307 | { | ||
308 | struct efx_nic *efx = vf->efx; | ||
309 | struct efx_filter_spec filter; | ||
310 | u16 vlan; | ||
311 | int rc; | ||
312 | |||
313 | if (vf->tx_filter_id != -1) { | ||
314 | efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, | ||
315 | vf->tx_filter_id); | ||
316 | netif_dbg(efx, hw, efx->net_dev, "Removed vf %s tx filter %d\n", | ||
317 | vf->pci_name, vf->tx_filter_id); | ||
318 | vf->tx_filter_id = -1; | ||
319 | } | ||
320 | |||
321 | if (is_zero_ether_addr(vf->addr.mac_addr)) | ||
322 | return; | ||
323 | |||
324 | /* Turn on TX filtering automatically if not explicitly | ||
325 | * enabled or disabled. | ||
326 | */ | ||
327 | if (vf->tx_filter_mode == VF_TX_FILTER_AUTO && vf_max_tx_channels <= 2) | ||
328 | vf->tx_filter_mode = VF_TX_FILTER_ON; | ||
329 | |||
330 | vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK; | ||
331 | efx_filter_init_tx(&filter, abs_index(vf, 0)); | ||
332 | rc = efx_filter_set_eth_local(&filter, | ||
333 | vlan ? vlan : EFX_FILTER_VID_UNSPEC, | ||
334 | vf->addr.mac_addr); | ||
335 | BUG_ON(rc); | ||
336 | |||
337 | rc = efx_filter_insert_filter(efx, &filter, true); | ||
338 | if (rc < 0) { | ||
339 | netif_warn(efx, hw, efx->net_dev, | ||
340 | "Unable to migrate tx filter for vf %s\n", | ||
341 | vf->pci_name); | ||
342 | } else { | ||
343 | netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s tx filter %d\n", | ||
344 | vf->pci_name, rc); | ||
345 | vf->tx_filter_id = rc; | ||
346 | } | ||
347 | } | ||
348 | |||
349 | /* The RX filter is managed here on behalf of the VF driver */ | ||
350 | static void efx_sriov_reset_rx_filter(struct efx_vf *vf) | ||
351 | { | ||
352 | struct efx_nic *efx = vf->efx; | ||
353 | struct efx_filter_spec filter; | ||
354 | u16 vlan; | ||
355 | int rc; | ||
356 | |||
357 | if (vf->rx_filter_id != -1) { | ||
358 | efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, | ||
359 | vf->rx_filter_id); | ||
360 | netif_dbg(efx, hw, efx->net_dev, "Removed vf %s rx filter %d\n", | ||
361 | vf->pci_name, vf->rx_filter_id); | ||
362 | vf->rx_filter_id = -1; | ||
363 | } | ||
364 | |||
365 | if (!vf->rx_filtering || is_zero_ether_addr(vf->addr.mac_addr)) | ||
366 | return; | ||
367 | |||
368 | vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK; | ||
369 | efx_filter_init_rx(&filter, EFX_FILTER_PRI_REQUIRED, | ||
370 | vf->rx_filter_flags, | ||
371 | abs_index(vf, vf->rx_filter_qid)); | ||
372 | rc = efx_filter_set_eth_local(&filter, | ||
373 | vlan ? vlan : EFX_FILTER_VID_UNSPEC, | ||
374 | vf->addr.mac_addr); | ||
375 | BUG_ON(rc); | ||
376 | |||
377 | rc = efx_filter_insert_filter(efx, &filter, true); | ||
378 | if (rc < 0) { | ||
379 | netif_warn(efx, hw, efx->net_dev, | ||
380 | "Unable to insert rx filter for vf %s\n", | ||
381 | vf->pci_name); | ||
382 | } else { | ||
383 | netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s rx filter %d\n", | ||
384 | vf->pci_name, rc); | ||
385 | vf->rx_filter_id = rc; | ||
386 | } | ||
387 | } | ||
388 | |||
389 | static void __efx_sriov_update_vf_addr(struct efx_vf *vf) | ||
390 | { | ||
391 | efx_sriov_reset_tx_filter(vf); | ||
392 | efx_sriov_reset_rx_filter(vf); | ||
393 | queue_work(vfdi_workqueue, &vf->efx->peer_work); | ||
394 | } | ||
395 | |||
396 | /* Push the peer list to this VF. The caller must hold status_lock to interlock | ||
397 | * with VFDI requests, and they must be serialised against manipulation of | ||
398 | * local_page_list, either by acquiring local_lock or by running from | ||
399 | * efx_sriov_peer_work() | ||
400 | */ | ||
401 | static void __efx_sriov_push_vf_status(struct efx_vf *vf) | ||
402 | { | ||
403 | struct efx_nic *efx = vf->efx; | ||
404 | struct vfdi_status *status = efx->vfdi_status.addr; | ||
405 | struct efx_memcpy_req copy[4]; | ||
406 | struct efx_endpoint_page *epp; | ||
407 | unsigned int pos, count; | ||
408 | unsigned data_offset; | ||
409 | efx_qword_t event; | ||
410 | |||
411 | WARN_ON(!mutex_is_locked(&vf->status_lock)); | ||
412 | WARN_ON(!vf->status_addr); | ||
413 | |||
414 | status->local = vf->addr; | ||
415 | status->generation_end = ++status->generation_start; | ||
416 | |||
417 | memset(copy, '\0', sizeof(copy)); | ||
418 | /* Write generation_start */ | ||
419 | copy[0].from_buf = &status->generation_start; | ||
420 | copy[0].to_rid = vf->pci_rid; | ||
421 | copy[0].to_addr = vf->status_addr + offsetof(struct vfdi_status, | ||
422 | generation_start); | ||
423 | copy[0].length = sizeof(status->generation_start); | ||
424 | /* DMA the rest of the structure (excluding the generations). This | ||
425 | * assumes that the non-generation portion of vfdi_status is in | ||
426 | * one chunk starting at the version member. | ||
427 | */ | ||
428 | data_offset = offsetof(struct vfdi_status, version); | ||
429 | copy[1].from_rid = efx->pci_dev->devfn; | ||
430 | copy[1].from_addr = efx->vfdi_status.dma_addr + data_offset; | ||
431 | copy[1].to_rid = vf->pci_rid; | ||
432 | copy[1].to_addr = vf->status_addr + data_offset; | ||
433 | copy[1].length = status->length - data_offset; | ||
434 | |||
435 | /* Copy the peer pages */ | ||
436 | pos = 2; | ||
437 | count = 0; | ||
438 | list_for_each_entry(epp, &efx->local_page_list, link) { | ||
439 | if (count == vf->peer_page_count) { | ||
440 | /* The VF driver will know they need to provide more | ||
441 | * pages because peer_addr_count is too large. | ||
442 | */ | ||
443 | break; | ||
444 | } | ||
445 | copy[pos].from_buf = NULL; | ||
446 | copy[pos].from_rid = efx->pci_dev->devfn; | ||
447 | copy[pos].from_addr = epp->addr; | ||
448 | copy[pos].to_rid = vf->pci_rid; | ||
449 | copy[pos].to_addr = vf->peer_page_addrs[count]; | ||
450 | copy[pos].length = EFX_PAGE_SIZE; | ||
451 | |||
452 | if (++pos == ARRAY_SIZE(copy)) { | ||
453 | efx_sriov_memcpy(efx, copy, ARRAY_SIZE(copy)); | ||
454 | pos = 0; | ||
455 | } | ||
456 | ++count; | ||
457 | } | ||
458 | |||
459 | /* Write generation_end */ | ||
460 | copy[pos].from_buf = &status->generation_end; | ||
461 | copy[pos].to_rid = vf->pci_rid; | ||
462 | copy[pos].to_addr = vf->status_addr + offsetof(struct vfdi_status, | ||
463 | generation_end); | ||
464 | copy[pos].length = sizeof(status->generation_end); | ||
465 | efx_sriov_memcpy(efx, copy, pos + 1); | ||
466 | |||
467 | /* Notify the guest */ | ||
468 | EFX_POPULATE_QWORD_3(event, | ||
469 | FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV, | ||
470 | VFDI_EV_SEQ, (vf->msg_seqno & 0xff), | ||
471 | VFDI_EV_TYPE, VFDI_EV_TYPE_STATUS); | ||
472 | ++vf->msg_seqno; | ||
473 | efx_generate_event(efx, EFX_VI_BASE + vf->index * efx_vf_size(efx), | ||
474 | &event); | ||
475 | } | ||
476 | |||
477 | static void efx_sriov_bufs(struct efx_nic *efx, unsigned offset, | ||
478 | u64 *addr, unsigned count) | ||
479 | { | ||
480 | efx_qword_t buf; | ||
481 | unsigned pos; | ||
482 | |||
483 | for (pos = 0; pos < count; ++pos) { | ||
484 | EFX_POPULATE_QWORD_3(buf, | ||
485 | FRF_AZ_BUF_ADR_REGION, 0, | ||
486 | FRF_AZ_BUF_ADR_FBUF, | ||
487 | addr ? addr[pos] >> 12 : 0, | ||
488 | FRF_AZ_BUF_OWNER_ID_FBUF, 0); | ||
489 | efx_sram_writeq(efx, efx->membase + FR_BZ_BUF_FULL_TBL, | ||
490 | &buf, offset + pos); | ||
491 | } | ||
492 | } | ||
493 | |||
494 | static bool bad_vf_index(struct efx_nic *efx, unsigned index) | ||
495 | { | ||
496 | return index >= efx_vf_size(efx); | ||
497 | } | ||
498 | |||
499 | static bool bad_buf_count(unsigned buf_count, unsigned max_entry_count) | ||
500 | { | ||
501 | unsigned max_buf_count = max_entry_count * | ||
502 | sizeof(efx_qword_t) / EFX_BUF_SIZE; | ||
503 | |||
504 | return ((buf_count & (buf_count - 1)) || buf_count > max_buf_count); | ||
505 | } | ||
506 | |||
507 | /* Check that VI specified by per-port index belongs to a VF. | ||
508 | * Optionally set VF index and VI index within the VF. | ||
509 | */ | ||
510 | static bool map_vi_index(struct efx_nic *efx, unsigned abs_index, | ||
511 | struct efx_vf **vf_out, unsigned *rel_index_out) | ||
512 | { | ||
513 | unsigned vf_i; | ||
514 | |||
515 | if (abs_index < EFX_VI_BASE) | ||
516 | return true; | ||
517 | vf_i = (abs_index - EFX_VI_BASE) * efx_vf_size(efx); | ||
518 | if (vf_i >= efx->vf_init_count) | ||
519 | return true; | ||
520 | |||
521 | if (vf_out) | ||
522 | *vf_out = efx->vf + vf_i; | ||
523 | if (rel_index_out) | ||
524 | *rel_index_out = abs_index % efx_vf_size(efx); | ||
525 | return false; | ||
526 | } | ||
527 | |||
528 | static int efx_vfdi_init_evq(struct efx_vf *vf) | ||
529 | { | ||
530 | struct efx_nic *efx = vf->efx; | ||
531 | struct vfdi_req *req = vf->buf.addr; | ||
532 | unsigned vf_evq = req->u.init_evq.index; | ||
533 | unsigned buf_count = req->u.init_evq.buf_count; | ||
534 | unsigned abs_evq = abs_index(vf, vf_evq); | ||
535 | unsigned buftbl = EFX_BUFTBL_EVQ_BASE(vf, vf_evq); | ||
536 | efx_oword_t reg; | ||
537 | |||
538 | if (bad_vf_index(efx, vf_evq) || | ||
539 | bad_buf_count(buf_count, EFX_MAX_VF_EVQ_SIZE)) { | ||
540 | if (net_ratelimit()) | ||
541 | netif_err(efx, hw, efx->net_dev, | ||
542 | "ERROR: Invalid INIT_EVQ from %s: evq %d bufs %d\n", | ||
543 | vf->pci_name, vf_evq, buf_count); | ||
544 | return VFDI_RC_EINVAL; | ||
545 | } | ||
546 | |||
547 | efx_sriov_bufs(efx, buftbl, req->u.init_evq.addr, buf_count); | ||
548 | |||
549 | EFX_POPULATE_OWORD_3(reg, | ||
550 | FRF_CZ_TIMER_Q_EN, 1, | ||
551 | FRF_CZ_HOST_NOTIFY_MODE, 0, | ||
552 | FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS); | ||
553 | efx_writeo_table(efx, ®, FR_BZ_TIMER_TBL, abs_evq); | ||
554 | EFX_POPULATE_OWORD_3(reg, | ||
555 | FRF_AZ_EVQ_EN, 1, | ||
556 | FRF_AZ_EVQ_SIZE, __ffs(buf_count), | ||
557 | FRF_AZ_EVQ_BUF_BASE_ID, buftbl); | ||
558 | efx_writeo_table(efx, ®, FR_BZ_EVQ_PTR_TBL, abs_evq); | ||
559 | |||
560 | if (vf_evq == 0) { | ||
561 | memcpy(vf->evq0_addrs, req->u.init_evq.addr, | ||
562 | buf_count * sizeof(u64)); | ||
563 | vf->evq0_count = buf_count; | ||
564 | } | ||
565 | |||
566 | return VFDI_RC_SUCCESS; | ||
567 | } | ||
568 | |||
569 | static int efx_vfdi_init_rxq(struct efx_vf *vf) | ||
570 | { | ||
571 | struct efx_nic *efx = vf->efx; | ||
572 | struct vfdi_req *req = vf->buf.addr; | ||
573 | unsigned vf_rxq = req->u.init_rxq.index; | ||
574 | unsigned vf_evq = req->u.init_rxq.evq; | ||
575 | unsigned buf_count = req->u.init_rxq.buf_count; | ||
576 | unsigned buftbl = EFX_BUFTBL_RXQ_BASE(vf, vf_rxq); | ||
577 | unsigned label; | ||
578 | efx_oword_t reg; | ||
579 | |||
580 | if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_rxq) || | ||
581 | bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) { | ||
582 | if (net_ratelimit()) | ||
583 | netif_err(efx, hw, efx->net_dev, | ||
584 | "ERROR: Invalid INIT_RXQ from %s: rxq %d evq %d " | ||
585 | "buf_count %d\n", vf->pci_name, vf_rxq, | ||
586 | vf_evq, buf_count); | ||
587 | return VFDI_RC_EINVAL; | ||
588 | } | ||
589 | if (__test_and_set_bit(req->u.init_rxq.index, vf->rxq_mask)) | ||
590 | ++vf->rxq_count; | ||
591 | efx_sriov_bufs(efx, buftbl, req->u.init_rxq.addr, buf_count); | ||
592 | |||
593 | label = req->u.init_rxq.label & EFX_FIELD_MASK(FRF_AZ_RX_DESCQ_LABEL); | ||
594 | EFX_POPULATE_OWORD_6(reg, | ||
595 | FRF_AZ_RX_DESCQ_BUF_BASE_ID, buftbl, | ||
596 | FRF_AZ_RX_DESCQ_EVQ_ID, abs_index(vf, vf_evq), | ||
597 | FRF_AZ_RX_DESCQ_LABEL, label, | ||
598 | FRF_AZ_RX_DESCQ_SIZE, __ffs(buf_count), | ||
599 | FRF_AZ_RX_DESCQ_JUMBO, | ||
600 | !!(req->u.init_rxq.flags & | ||
601 | VFDI_RXQ_FLAG_SCATTER_EN), | ||
602 | FRF_AZ_RX_DESCQ_EN, 1); | ||
603 | efx_writeo_table(efx, ®, FR_BZ_RX_DESC_PTR_TBL, | ||
604 | abs_index(vf, vf_rxq)); | ||
605 | |||
606 | return VFDI_RC_SUCCESS; | ||
607 | } | ||
608 | |||
609 | static int efx_vfdi_init_txq(struct efx_vf *vf) | ||
610 | { | ||
611 | struct efx_nic *efx = vf->efx; | ||
612 | struct vfdi_req *req = vf->buf.addr; | ||
613 | unsigned vf_txq = req->u.init_txq.index; | ||
614 | unsigned vf_evq = req->u.init_txq.evq; | ||
615 | unsigned buf_count = req->u.init_txq.buf_count; | ||
616 | unsigned buftbl = EFX_BUFTBL_TXQ_BASE(vf, vf_txq); | ||
617 | unsigned label, eth_filt_en; | ||
618 | efx_oword_t reg; | ||
619 | |||
620 | if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_txq) || | ||
621 | vf_txq >= vf_max_tx_channels || | ||
622 | bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) { | ||
623 | if (net_ratelimit()) | ||
624 | netif_err(efx, hw, efx->net_dev, | ||
625 | "ERROR: Invalid INIT_TXQ from %s: txq %d evq %d " | ||
626 | "buf_count %d\n", vf->pci_name, vf_txq, | ||
627 | vf_evq, buf_count); | ||
628 | return VFDI_RC_EINVAL; | ||
629 | } | ||
630 | |||
631 | mutex_lock(&vf->txq_lock); | ||
632 | if (__test_and_set_bit(req->u.init_txq.index, vf->txq_mask)) | ||
633 | ++vf->txq_count; | ||
634 | mutex_unlock(&vf->txq_lock); | ||
635 | efx_sriov_bufs(efx, buftbl, req->u.init_txq.addr, buf_count); | ||
636 | |||
637 | eth_filt_en = vf->tx_filter_mode == VF_TX_FILTER_ON; | ||
638 | |||
639 | label = req->u.init_txq.label & EFX_FIELD_MASK(FRF_AZ_TX_DESCQ_LABEL); | ||
640 | EFX_POPULATE_OWORD_8(reg, | ||
641 | FRF_CZ_TX_DPT_Q_MASK_WIDTH, min(efx->vi_scale, 1U), | ||
642 | FRF_CZ_TX_DPT_ETH_FILT_EN, eth_filt_en, | ||
643 | FRF_AZ_TX_DESCQ_EN, 1, | ||
644 | FRF_AZ_TX_DESCQ_BUF_BASE_ID, buftbl, | ||
645 | FRF_AZ_TX_DESCQ_EVQ_ID, abs_index(vf, vf_evq), | ||
646 | FRF_AZ_TX_DESCQ_LABEL, label, | ||
647 | FRF_AZ_TX_DESCQ_SIZE, __ffs(buf_count), | ||
648 | FRF_BZ_TX_NON_IP_DROP_DIS, 1); | ||
649 | efx_writeo_table(efx, ®, FR_BZ_TX_DESC_PTR_TBL, | ||
650 | abs_index(vf, vf_txq)); | ||
651 | |||
652 | return VFDI_RC_SUCCESS; | ||
653 | } | ||
654 | |||
655 | /* Returns true when efx_vfdi_fini_all_queues should wake */ | ||
656 | static bool efx_vfdi_flush_wake(struct efx_vf *vf) | ||
657 | { | ||
658 | /* Ensure that all updates are visible to efx_vfdi_fini_all_queues() */ | ||
659 | smp_mb(); | ||
660 | |||
661 | return (!vf->txq_count && !vf->rxq_count) || | ||
662 | atomic_read(&vf->rxq_retry_count); | ||
663 | } | ||
664 | |||
665 | static void efx_vfdi_flush_clear(struct efx_vf *vf) | ||
666 | { | ||
667 | memset(vf->txq_mask, 0, sizeof(vf->txq_mask)); | ||
668 | vf->txq_count = 0; | ||
669 | memset(vf->rxq_mask, 0, sizeof(vf->rxq_mask)); | ||
670 | vf->rxq_count = 0; | ||
671 | memset(vf->rxq_retry_mask, 0, sizeof(vf->rxq_retry_mask)); | ||
672 | atomic_set(&vf->rxq_retry_count, 0); | ||
673 | } | ||
674 | |||
675 | static int efx_vfdi_fini_all_queues(struct efx_vf *vf) | ||
676 | { | ||
677 | struct efx_nic *efx = vf->efx; | ||
678 | efx_oword_t reg; | ||
679 | unsigned count = efx_vf_size(efx); | ||
680 | unsigned vf_offset = EFX_VI_BASE + vf->index * efx_vf_size(efx); | ||
681 | unsigned timeout = HZ; | ||
682 | unsigned index, rxqs_count; | ||
683 | __le32 *rxqs; | ||
684 | int rc; | ||
685 | |||
686 | rxqs = kmalloc(count * sizeof(*rxqs), GFP_KERNEL); | ||
687 | if (rxqs == NULL) | ||
688 | return VFDI_RC_ENOMEM; | ||
689 | |||
690 | rtnl_lock(); | ||
691 | if (efx->fc_disable++ == 0) | ||
692 | efx_mcdi_set_mac(efx); | ||
693 | rtnl_unlock(); | ||
694 | |||
695 | /* Flush all the initialized queues */ | ||
696 | rxqs_count = 0; | ||
697 | for (index = 0; index < count; ++index) { | ||
698 | if (test_bit(index, vf->txq_mask)) { | ||
699 | EFX_POPULATE_OWORD_2(reg, | ||
700 | FRF_AZ_TX_FLUSH_DESCQ_CMD, 1, | ||
701 | FRF_AZ_TX_FLUSH_DESCQ, | ||
702 | vf_offset + index); | ||
703 | efx_writeo(efx, ®, FR_AZ_TX_FLUSH_DESCQ); | ||
704 | } | ||
705 | if (test_bit(index, vf->rxq_mask)) | ||
706 | rxqs[rxqs_count++] = cpu_to_le32(vf_offset + index); | ||
707 | } | ||
708 | |||
709 | atomic_set(&vf->rxq_retry_count, 0); | ||
710 | while (timeout && (vf->rxq_count || vf->txq_count)) { | ||
711 | rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, (u8 *)rxqs, | ||
712 | rxqs_count * sizeof(*rxqs), NULL, 0, NULL); | ||
713 | WARN_ON(rc < 0); | ||
714 | |||
715 | timeout = wait_event_timeout(vf->flush_waitq, | ||
716 | efx_vfdi_flush_wake(vf), | ||
717 | timeout); | ||
718 | rxqs_count = 0; | ||
719 | for (index = 0; index < count; ++index) { | ||
720 | if (test_and_clear_bit(index, vf->rxq_retry_mask)) { | ||
721 | atomic_dec(&vf->rxq_retry_count); | ||
722 | rxqs[rxqs_count++] = | ||
723 | cpu_to_le32(vf_offset + index); | ||
724 | } | ||
725 | } | ||
726 | } | ||
727 | |||
728 | rtnl_lock(); | ||
729 | if (--efx->fc_disable == 0) | ||
730 | efx_mcdi_set_mac(efx); | ||
731 | rtnl_unlock(); | ||
732 | |||
733 | /* Irrespective of success/failure, fini the queues */ | ||
734 | EFX_ZERO_OWORD(reg); | ||
735 | for (index = 0; index < count; ++index) { | ||
736 | efx_writeo_table(efx, ®, FR_BZ_RX_DESC_PTR_TBL, | ||
737 | vf_offset + index); | ||
738 | efx_writeo_table(efx, ®, FR_BZ_TX_DESC_PTR_TBL, | ||
739 | vf_offset + index); | ||
740 | efx_writeo_table(efx, ®, FR_BZ_EVQ_PTR_TBL, | ||
741 | vf_offset + index); | ||
742 | efx_writeo_table(efx, ®, FR_BZ_TIMER_TBL, | ||
743 | vf_offset + index); | ||
744 | } | ||
745 | efx_sriov_bufs(efx, vf->buftbl_base, NULL, | ||
746 | EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx)); | ||
747 | kfree(rxqs); | ||
748 | efx_vfdi_flush_clear(vf); | ||
749 | |||
750 | vf->evq0_count = 0; | ||
751 | |||
752 | return timeout ? 0 : VFDI_RC_ETIMEDOUT; | ||
753 | } | ||
754 | |||
755 | static int efx_vfdi_insert_filter(struct efx_vf *vf) | ||
756 | { | ||
757 | struct efx_nic *efx = vf->efx; | ||
758 | struct vfdi_req *req = vf->buf.addr; | ||
759 | unsigned vf_rxq = req->u.mac_filter.rxq; | ||
760 | unsigned flags; | ||
761 | |||
762 | if (bad_vf_index(efx, vf_rxq) || vf->rx_filtering) { | ||
763 | if (net_ratelimit()) | ||
764 | netif_err(efx, hw, efx->net_dev, | ||
765 | "ERROR: Invalid INSERT_FILTER from %s: rxq %d " | ||
766 | "flags 0x%x\n", vf->pci_name, vf_rxq, | ||
767 | req->u.mac_filter.flags); | ||
768 | return VFDI_RC_EINVAL; | ||
769 | } | ||
770 | |||
771 | flags = 0; | ||
772 | if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_RSS) | ||
773 | flags |= EFX_FILTER_FLAG_RX_RSS; | ||
774 | if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_SCATTER) | ||
775 | flags |= EFX_FILTER_FLAG_RX_SCATTER; | ||
776 | vf->rx_filter_flags = flags; | ||
777 | vf->rx_filter_qid = vf_rxq; | ||
778 | vf->rx_filtering = true; | ||
779 | |||
780 | efx_sriov_reset_rx_filter(vf); | ||
781 | queue_work(vfdi_workqueue, &efx->peer_work); | ||
782 | |||
783 | return VFDI_RC_SUCCESS; | ||
784 | } | ||
785 | |||
786 | static int efx_vfdi_remove_all_filters(struct efx_vf *vf) | ||
787 | { | ||
788 | vf->rx_filtering = false; | ||
789 | efx_sriov_reset_rx_filter(vf); | ||
790 | queue_work(vfdi_workqueue, &vf->efx->peer_work); | ||
791 | |||
792 | return VFDI_RC_SUCCESS; | ||
793 | } | ||
794 | |||
795 | static int efx_vfdi_set_status_page(struct efx_vf *vf) | ||
796 | { | ||
797 | struct efx_nic *efx = vf->efx; | ||
798 | struct vfdi_req *req = vf->buf.addr; | ||
799 | unsigned int page_count; | ||
800 | |||
801 | page_count = req->u.set_status_page.peer_page_count; | ||
802 | if (!req->u.set_status_page.dma_addr || EFX_PAGE_SIZE < | ||
803 | offsetof(struct vfdi_req, | ||
804 | u.set_status_page.peer_page_addr[page_count])) { | ||
805 | if (net_ratelimit()) | ||
806 | netif_err(efx, hw, efx->net_dev, | ||
807 | "ERROR: Invalid SET_STATUS_PAGE from %s\n", | ||
808 | vf->pci_name); | ||
809 | return VFDI_RC_EINVAL; | ||
810 | } | ||
811 | |||
812 | mutex_lock(&efx->local_lock); | ||
813 | mutex_lock(&vf->status_lock); | ||
814 | vf->status_addr = req->u.set_status_page.dma_addr; | ||
815 | |||
816 | kfree(vf->peer_page_addrs); | ||
817 | vf->peer_page_addrs = NULL; | ||
818 | vf->peer_page_count = 0; | ||
819 | |||
820 | if (page_count) { | ||
821 | vf->peer_page_addrs = kcalloc(page_count, sizeof(u64), | ||
822 | GFP_KERNEL); | ||
823 | if (vf->peer_page_addrs) { | ||
824 | memcpy(vf->peer_page_addrs, | ||
825 | req->u.set_status_page.peer_page_addr, | ||
826 | page_count * sizeof(u64)); | ||
827 | vf->peer_page_count = page_count; | ||
828 | } | ||
829 | } | ||
830 | |||
831 | __efx_sriov_push_vf_status(vf); | ||
832 | mutex_unlock(&vf->status_lock); | ||
833 | mutex_unlock(&efx->local_lock); | ||
834 | |||
835 | return VFDI_RC_SUCCESS; | ||
836 | } | ||
837 | |||
838 | static int efx_vfdi_clear_status_page(struct efx_vf *vf) | ||
839 | { | ||
840 | mutex_lock(&vf->status_lock); | ||
841 | vf->status_addr = 0; | ||
842 | mutex_unlock(&vf->status_lock); | ||
843 | |||
844 | return VFDI_RC_SUCCESS; | ||
845 | } | ||
846 | |||
847 | typedef int (*efx_vfdi_op_t)(struct efx_vf *vf); | ||
848 | |||
849 | static const efx_vfdi_op_t vfdi_ops[VFDI_OP_LIMIT] = { | ||
850 | [VFDI_OP_INIT_EVQ] = efx_vfdi_init_evq, | ||
851 | [VFDI_OP_INIT_TXQ] = efx_vfdi_init_txq, | ||
852 | [VFDI_OP_INIT_RXQ] = efx_vfdi_init_rxq, | ||
853 | [VFDI_OP_FINI_ALL_QUEUES] = efx_vfdi_fini_all_queues, | ||
854 | [VFDI_OP_INSERT_FILTER] = efx_vfdi_insert_filter, | ||
855 | [VFDI_OP_REMOVE_ALL_FILTERS] = efx_vfdi_remove_all_filters, | ||
856 | [VFDI_OP_SET_STATUS_PAGE] = efx_vfdi_set_status_page, | ||
857 | [VFDI_OP_CLEAR_STATUS_PAGE] = efx_vfdi_clear_status_page, | ||
858 | }; | ||
859 | |||
860 | static void efx_sriov_vfdi(struct work_struct *work) | ||
861 | { | ||
862 | struct efx_vf *vf = container_of(work, struct efx_vf, req); | ||
863 | struct efx_nic *efx = vf->efx; | ||
864 | struct vfdi_req *req = vf->buf.addr; | ||
865 | struct efx_memcpy_req copy[2]; | ||
866 | int rc; | ||
867 | |||
868 | /* Copy this page into the local address space */ | ||
869 | memset(copy, '\0', sizeof(copy)); | ||
870 | copy[0].from_rid = vf->pci_rid; | ||
871 | copy[0].from_addr = vf->req_addr; | ||
872 | copy[0].to_rid = efx->pci_dev->devfn; | ||
873 | copy[0].to_addr = vf->buf.dma_addr; | ||
874 | copy[0].length = EFX_PAGE_SIZE; | ||
875 | rc = efx_sriov_memcpy(efx, copy, 1); | ||
876 | if (rc) { | ||
877 | /* If we can't get the request, we can't reply to the caller */ | ||
878 | if (net_ratelimit()) | ||
879 | netif_err(efx, hw, efx->net_dev, | ||
880 | "ERROR: Unable to fetch VFDI request from %s rc %d\n", | ||
881 | vf->pci_name, -rc); | ||
882 | vf->busy = false; | ||
883 | return; | ||
884 | } | ||
885 | |||
886 | if (req->op < VFDI_OP_LIMIT && vfdi_ops[req->op] != NULL) { | ||
887 | rc = vfdi_ops[req->op](vf); | ||
888 | if (rc == 0) { | ||
889 | netif_dbg(efx, hw, efx->net_dev, | ||
890 | "vfdi request %d from %s ok\n", | ||
891 | req->op, vf->pci_name); | ||
892 | } | ||
893 | } else { | ||
894 | netif_dbg(efx, hw, efx->net_dev, | ||
895 | "ERROR: Unrecognised request %d from VF %s addr " | ||
896 | "%llx\n", req->op, vf->pci_name, | ||
897 | (unsigned long long)vf->req_addr); | ||
898 | rc = VFDI_RC_EOPNOTSUPP; | ||
899 | } | ||
900 | |||
901 | /* Allow subsequent VF requests */ | ||
902 | vf->busy = false; | ||
903 | smp_wmb(); | ||
904 | |||
905 | /* Respond to the request */ | ||
906 | req->rc = rc; | ||
907 | req->op = VFDI_OP_RESPONSE; | ||
908 | |||
909 | memset(copy, '\0', sizeof(copy)); | ||
910 | copy[0].from_buf = &req->rc; | ||
911 | copy[0].to_rid = vf->pci_rid; | ||
912 | copy[0].to_addr = vf->req_addr + offsetof(struct vfdi_req, rc); | ||
913 | copy[0].length = sizeof(req->rc); | ||
914 | copy[1].from_buf = &req->op; | ||
915 | copy[1].to_rid = vf->pci_rid; | ||
916 | copy[1].to_addr = vf->req_addr + offsetof(struct vfdi_req, op); | ||
917 | copy[1].length = sizeof(req->op); | ||
918 | |||
919 | (void) efx_sriov_memcpy(efx, copy, ARRAY_SIZE(copy)); | ||
920 | } | ||
921 | |||
922 | |||
923 | |||
924 | /* After a reset the event queues inside the guests no longer exist. Fill the | ||
925 | * event ring in guest memory with VFDI reset events, then (re-initialise) the | ||
926 | * event queue to raise an interrupt. The guest driver will then recover. | ||
927 | */ | ||
928 | static void efx_sriov_reset_vf(struct efx_vf *vf, struct efx_buffer *buffer) | ||
929 | { | ||
930 | struct efx_nic *efx = vf->efx; | ||
931 | struct efx_memcpy_req copy_req[4]; | ||
932 | efx_qword_t event; | ||
933 | unsigned int pos, count, k, buftbl, abs_evq; | ||
934 | efx_oword_t reg; | ||
935 | efx_dword_t ptr; | ||
936 | int rc; | ||
937 | |||
938 | BUG_ON(buffer->len != EFX_PAGE_SIZE); | ||
939 | |||
940 | if (!vf->evq0_count) | ||
941 | return; | ||
942 | BUG_ON(vf->evq0_count & (vf->evq0_count - 1)); | ||
943 | |||
944 | mutex_lock(&vf->status_lock); | ||
945 | EFX_POPULATE_QWORD_3(event, | ||
946 | FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV, | ||
947 | VFDI_EV_SEQ, vf->msg_seqno, | ||
948 | VFDI_EV_TYPE, VFDI_EV_TYPE_RESET); | ||
949 | vf->msg_seqno++; | ||
950 | for (pos = 0; pos < EFX_PAGE_SIZE; pos += sizeof(event)) | ||
951 | memcpy(buffer->addr + pos, &event, sizeof(event)); | ||
952 | |||
953 | for (pos = 0; pos < vf->evq0_count; pos += count) { | ||
954 | count = min_t(unsigned, vf->evq0_count - pos, | ||
955 | ARRAY_SIZE(copy_req)); | ||
956 | for (k = 0; k < count; k++) { | ||
957 | copy_req[k].from_buf = NULL; | ||
958 | copy_req[k].from_rid = efx->pci_dev->devfn; | ||
959 | copy_req[k].from_addr = buffer->dma_addr; | ||
960 | copy_req[k].to_rid = vf->pci_rid; | ||
961 | copy_req[k].to_addr = vf->evq0_addrs[pos + k]; | ||
962 | copy_req[k].length = EFX_PAGE_SIZE; | ||
963 | } | ||
964 | rc = efx_sriov_memcpy(efx, copy_req, count); | ||
965 | if (rc) { | ||
966 | if (net_ratelimit()) | ||
967 | netif_err(efx, hw, efx->net_dev, | ||
968 | "ERROR: Unable to notify %s of reset" | ||
969 | ": %d\n", vf->pci_name, -rc); | ||
970 | break; | ||
971 | } | ||
972 | } | ||
973 | |||
974 | /* Reinitialise, arm and trigger evq0 */ | ||
975 | abs_evq = abs_index(vf, 0); | ||
976 | buftbl = EFX_BUFTBL_EVQ_BASE(vf, 0); | ||
977 | efx_sriov_bufs(efx, buftbl, vf->evq0_addrs, vf->evq0_count); | ||
978 | |||
979 | EFX_POPULATE_OWORD_3(reg, | ||
980 | FRF_CZ_TIMER_Q_EN, 1, | ||
981 | FRF_CZ_HOST_NOTIFY_MODE, 0, | ||
982 | FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS); | ||
983 | efx_writeo_table(efx, ®, FR_BZ_TIMER_TBL, abs_evq); | ||
984 | EFX_POPULATE_OWORD_3(reg, | ||
985 | FRF_AZ_EVQ_EN, 1, | ||
986 | FRF_AZ_EVQ_SIZE, __ffs(vf->evq0_count), | ||
987 | FRF_AZ_EVQ_BUF_BASE_ID, buftbl); | ||
988 | efx_writeo_table(efx, ®, FR_BZ_EVQ_PTR_TBL, abs_evq); | ||
989 | EFX_POPULATE_DWORD_1(ptr, FRF_AZ_EVQ_RPTR, 0); | ||
990 | efx_writed_table(efx, &ptr, FR_BZ_EVQ_RPTR, abs_evq); | ||
991 | |||
992 | mutex_unlock(&vf->status_lock); | ||
993 | } | ||
994 | |||
995 | static void efx_sriov_reset_vf_work(struct work_struct *work) | ||
996 | { | ||
997 | struct efx_vf *vf = container_of(work, struct efx_vf, req); | ||
998 | struct efx_nic *efx = vf->efx; | ||
999 | struct efx_buffer buf; | ||
1000 | |||
1001 | if (!efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE)) { | ||
1002 | efx_sriov_reset_vf(vf, &buf); | ||
1003 | efx_nic_free_buffer(efx, &buf); | ||
1004 | } | ||
1005 | } | ||
1006 | |||
1007 | static void efx_sriov_handle_no_channel(struct efx_nic *efx) | ||
1008 | { | ||
1009 | netif_err(efx, drv, efx->net_dev, | ||
1010 | "ERROR: IOV requires MSI-X and 1 additional interrupt" | ||
1011 | "vector. IOV disabled\n"); | ||
1012 | efx->vf_count = 0; | ||
1013 | } | ||
1014 | |||
1015 | static int efx_sriov_probe_channel(struct efx_channel *channel) | ||
1016 | { | ||
1017 | channel->efx->vfdi_channel = channel; | ||
1018 | return 0; | ||
1019 | } | ||
1020 | |||
1021 | static void | ||
1022 | efx_sriov_get_channel_name(struct efx_channel *channel, char *buf, size_t len) | ||
1023 | { | ||
1024 | snprintf(buf, len, "%s-iov", channel->efx->name); | ||
1025 | } | ||
1026 | |||
1027 | static const struct efx_channel_type efx_sriov_channel_type = { | ||
1028 | .handle_no_channel = efx_sriov_handle_no_channel, | ||
1029 | .pre_probe = efx_sriov_probe_channel, | ||
1030 | .get_name = efx_sriov_get_channel_name, | ||
1031 | /* no copy operation; channel must not be reallocated */ | ||
1032 | .keep_eventq = true, | ||
1033 | }; | ||
1034 | |||
1035 | void efx_sriov_probe(struct efx_nic *efx) | ||
1036 | { | ||
1037 | unsigned count; | ||
1038 | |||
1039 | if (!max_vfs) | ||
1040 | return; | ||
1041 | |||
1042 | if (efx_sriov_cmd(efx, false, &efx->vi_scale, &count)) | ||
1043 | return; | ||
1044 | if (count > 0 && count > max_vfs) | ||
1045 | count = max_vfs; | ||
1046 | |||
1047 | /* efx_nic_dimension_resources() will reduce vf_count as appopriate */ | ||
1048 | efx->vf_count = count; | ||
1049 | |||
1050 | efx->extra_channel_type[EFX_EXTRA_CHANNEL_IOV] = &efx_sriov_channel_type; | ||
1051 | } | ||
1052 | |||
1053 | /* Copy the list of individual addresses into the vfdi_status.peers | ||
1054 | * array and auxillary pages, protected by %local_lock. Drop that lock | ||
1055 | * and then broadcast the address list to every VF. | ||
1056 | */ | ||
1057 | static void efx_sriov_peer_work(struct work_struct *data) | ||
1058 | { | ||
1059 | struct efx_nic *efx = container_of(data, struct efx_nic, peer_work); | ||
1060 | struct vfdi_status *vfdi_status = efx->vfdi_status.addr; | ||
1061 | struct efx_vf *vf; | ||
1062 | struct efx_local_addr *local_addr; | ||
1063 | struct vfdi_endpoint *peer; | ||
1064 | struct efx_endpoint_page *epp; | ||
1065 | struct list_head pages; | ||
1066 | unsigned int peer_space; | ||
1067 | unsigned int peer_count; | ||
1068 | unsigned int pos; | ||
1069 | |||
1070 | mutex_lock(&efx->local_lock); | ||
1071 | |||
1072 | /* Move the existing peer pages off %local_page_list */ | ||
1073 | INIT_LIST_HEAD(&pages); | ||
1074 | list_splice_tail_init(&efx->local_page_list, &pages); | ||
1075 | |||
1076 | /* Populate the VF addresses starting from entry 1 (entry 0 is | ||
1077 | * the PF address) | ||
1078 | */ | ||
1079 | peer = vfdi_status->peers + 1; | ||
1080 | peer_space = ARRAY_SIZE(vfdi_status->peers) - 1; | ||
1081 | peer_count = 1; | ||
1082 | for (pos = 0; pos < efx->vf_count; ++pos) { | ||
1083 | vf = efx->vf + pos; | ||
1084 | |||
1085 | mutex_lock(&vf->status_lock); | ||
1086 | if (vf->rx_filtering && !is_zero_ether_addr(vf->addr.mac_addr)) { | ||
1087 | *peer++ = vf->addr; | ||
1088 | ++peer_count; | ||
1089 | --peer_space; | ||
1090 | BUG_ON(peer_space == 0); | ||
1091 | } | ||
1092 | mutex_unlock(&vf->status_lock); | ||
1093 | } | ||
1094 | |||
1095 | /* Fill the remaining addresses */ | ||
1096 | list_for_each_entry(local_addr, &efx->local_addr_list, link) { | ||
1097 | memcpy(peer->mac_addr, local_addr->addr, ETH_ALEN); | ||
1098 | peer->tci = 0; | ||
1099 | ++peer; | ||
1100 | ++peer_count; | ||
1101 | if (--peer_space == 0) { | ||
1102 | if (list_empty(&pages)) { | ||
1103 | epp = kmalloc(sizeof(*epp), GFP_KERNEL); | ||
1104 | if (!epp) | ||
1105 | break; | ||
1106 | epp->ptr = dma_alloc_coherent( | ||
1107 | &efx->pci_dev->dev, EFX_PAGE_SIZE, | ||
1108 | &epp->addr, GFP_KERNEL); | ||
1109 | if (!epp->ptr) { | ||
1110 | kfree(epp); | ||
1111 | break; | ||
1112 | } | ||
1113 | } else { | ||
1114 | epp = list_first_entry( | ||
1115 | &pages, struct efx_endpoint_page, link); | ||
1116 | list_del(&epp->link); | ||
1117 | } | ||
1118 | |||
1119 | list_add_tail(&epp->link, &efx->local_page_list); | ||
1120 | peer = (struct vfdi_endpoint *)epp->ptr; | ||
1121 | peer_space = EFX_PAGE_SIZE / sizeof(struct vfdi_endpoint); | ||
1122 | } | ||
1123 | } | ||
1124 | vfdi_status->peer_count = peer_count; | ||
1125 | mutex_unlock(&efx->local_lock); | ||
1126 | |||
1127 | /* Free any now unused endpoint pages */ | ||
1128 | while (!list_empty(&pages)) { | ||
1129 | epp = list_first_entry( | ||
1130 | &pages, struct efx_endpoint_page, link); | ||
1131 | list_del(&epp->link); | ||
1132 | dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE, | ||
1133 | epp->ptr, epp->addr); | ||
1134 | kfree(epp); | ||
1135 | } | ||
1136 | |||
1137 | /* Finally, push the pages */ | ||
1138 | for (pos = 0; pos < efx->vf_count; ++pos) { | ||
1139 | vf = efx->vf + pos; | ||
1140 | |||
1141 | mutex_lock(&vf->status_lock); | ||
1142 | if (vf->status_addr) | ||
1143 | __efx_sriov_push_vf_status(vf); | ||
1144 | mutex_unlock(&vf->status_lock); | ||
1145 | } | ||
1146 | } | ||
1147 | |||
1148 | static void efx_sriov_free_local(struct efx_nic *efx) | ||
1149 | { | ||
1150 | struct efx_local_addr *local_addr; | ||
1151 | struct efx_endpoint_page *epp; | ||
1152 | |||
1153 | while (!list_empty(&efx->local_addr_list)) { | ||
1154 | local_addr = list_first_entry(&efx->local_addr_list, | ||
1155 | struct efx_local_addr, link); | ||
1156 | list_del(&local_addr->link); | ||
1157 | kfree(local_addr); | ||
1158 | } | ||
1159 | |||
1160 | while (!list_empty(&efx->local_page_list)) { | ||
1161 | epp = list_first_entry(&efx->local_page_list, | ||
1162 | struct efx_endpoint_page, link); | ||
1163 | list_del(&epp->link); | ||
1164 | dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE, | ||
1165 | epp->ptr, epp->addr); | ||
1166 | kfree(epp); | ||
1167 | } | ||
1168 | } | ||
1169 | |||
1170 | static int efx_sriov_vf_alloc(struct efx_nic *efx) | ||
1171 | { | ||
1172 | unsigned index; | ||
1173 | struct efx_vf *vf; | ||
1174 | |||
1175 | efx->vf = kzalloc(sizeof(struct efx_vf) * efx->vf_count, GFP_KERNEL); | ||
1176 | if (!efx->vf) | ||
1177 | return -ENOMEM; | ||
1178 | |||
1179 | for (index = 0; index < efx->vf_count; ++index) { | ||
1180 | vf = efx->vf + index; | ||
1181 | |||
1182 | vf->efx = efx; | ||
1183 | vf->index = index; | ||
1184 | vf->rx_filter_id = -1; | ||
1185 | vf->tx_filter_mode = VF_TX_FILTER_AUTO; | ||
1186 | vf->tx_filter_id = -1; | ||
1187 | INIT_WORK(&vf->req, efx_sriov_vfdi); | ||
1188 | INIT_WORK(&vf->reset_work, efx_sriov_reset_vf_work); | ||
1189 | init_waitqueue_head(&vf->flush_waitq); | ||
1190 | mutex_init(&vf->status_lock); | ||
1191 | mutex_init(&vf->txq_lock); | ||
1192 | } | ||
1193 | |||
1194 | return 0; | ||
1195 | } | ||
1196 | |||
1197 | static void efx_sriov_vfs_fini(struct efx_nic *efx) | ||
1198 | { | ||
1199 | struct efx_vf *vf; | ||
1200 | unsigned int pos; | ||
1201 | |||
1202 | for (pos = 0; pos < efx->vf_count; ++pos) { | ||
1203 | vf = efx->vf + pos; | ||
1204 | |||
1205 | efx_nic_free_buffer(efx, &vf->buf); | ||
1206 | kfree(vf->peer_page_addrs); | ||
1207 | vf->peer_page_addrs = NULL; | ||
1208 | vf->peer_page_count = 0; | ||
1209 | |||
1210 | vf->evq0_count = 0; | ||
1211 | } | ||
1212 | } | ||
1213 | |||
1214 | static int efx_sriov_vfs_init(struct efx_nic *efx) | ||
1215 | { | ||
1216 | struct pci_dev *pci_dev = efx->pci_dev; | ||
1217 | unsigned index, devfn, sriov, buftbl_base; | ||
1218 | u16 offset, stride; | ||
1219 | struct efx_vf *vf; | ||
1220 | int rc; | ||
1221 | |||
1222 | sriov = pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV); | ||
1223 | if (!sriov) | ||
1224 | return -ENOENT; | ||
1225 | |||
1226 | pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_OFFSET, &offset); | ||
1227 | pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_STRIDE, &stride); | ||
1228 | |||
1229 | buftbl_base = efx->vf_buftbl_base; | ||
1230 | devfn = pci_dev->devfn + offset; | ||
1231 | for (index = 0; index < efx->vf_count; ++index) { | ||
1232 | vf = efx->vf + index; | ||
1233 | |||
1234 | /* Reserve buffer entries */ | ||
1235 | vf->buftbl_base = buftbl_base; | ||
1236 | buftbl_base += EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx); | ||
1237 | |||
1238 | vf->pci_rid = devfn; | ||
1239 | snprintf(vf->pci_name, sizeof(vf->pci_name), | ||
1240 | "%04x:%02x:%02x.%d", | ||
1241 | pci_domain_nr(pci_dev->bus), pci_dev->bus->number, | ||
1242 | PCI_SLOT(devfn), PCI_FUNC(devfn)); | ||
1243 | |||
1244 | rc = efx_nic_alloc_buffer(efx, &vf->buf, EFX_PAGE_SIZE); | ||
1245 | if (rc) | ||
1246 | goto fail; | ||
1247 | |||
1248 | devfn += stride; | ||
1249 | } | ||
1250 | |||
1251 | return 0; | ||
1252 | |||
1253 | fail: | ||
1254 | efx_sriov_vfs_fini(efx); | ||
1255 | return rc; | ||
1256 | } | ||
1257 | |||
1258 | int efx_sriov_init(struct efx_nic *efx) | ||
1259 | { | ||
1260 | struct net_device *net_dev = efx->net_dev; | ||
1261 | struct vfdi_status *vfdi_status; | ||
1262 | int rc; | ||
1263 | |||
1264 | /* Ensure there's room for vf_channel */ | ||
1265 | BUILD_BUG_ON(EFX_MAX_CHANNELS + 1 >= EFX_VI_BASE); | ||
1266 | /* Ensure that VI_BASE is aligned on VI_SCALE */ | ||
1267 | BUILD_BUG_ON(EFX_VI_BASE & ((1 << EFX_VI_SCALE_MAX) - 1)); | ||
1268 | |||
1269 | if (efx->vf_count == 0) | ||
1270 | return 0; | ||
1271 | |||
1272 | rc = efx_sriov_cmd(efx, true, NULL, NULL); | ||
1273 | if (rc) | ||
1274 | goto fail_cmd; | ||
1275 | |||
1276 | rc = efx_nic_alloc_buffer(efx, &efx->vfdi_status, sizeof(*vfdi_status)); | ||
1277 | if (rc) | ||
1278 | goto fail_status; | ||
1279 | vfdi_status = efx->vfdi_status.addr; | ||
1280 | memset(vfdi_status, 0, sizeof(*vfdi_status)); | ||
1281 | vfdi_status->version = 1; | ||
1282 | vfdi_status->length = sizeof(*vfdi_status); | ||
1283 | vfdi_status->max_tx_channels = vf_max_tx_channels; | ||
1284 | vfdi_status->vi_scale = efx->vi_scale; | ||
1285 | vfdi_status->rss_rxq_count = efx->rss_spread; | ||
1286 | vfdi_status->peer_count = 1 + efx->vf_count; | ||
1287 | vfdi_status->timer_quantum_ns = efx->timer_quantum_ns; | ||
1288 | |||
1289 | rc = efx_sriov_vf_alloc(efx); | ||
1290 | if (rc) | ||
1291 | goto fail_alloc; | ||
1292 | |||
1293 | mutex_init(&efx->local_lock); | ||
1294 | INIT_WORK(&efx->peer_work, efx_sriov_peer_work); | ||
1295 | INIT_LIST_HEAD(&efx->local_addr_list); | ||
1296 | INIT_LIST_HEAD(&efx->local_page_list); | ||
1297 | |||
1298 | rc = efx_sriov_vfs_init(efx); | ||
1299 | if (rc) | ||
1300 | goto fail_vfs; | ||
1301 | |||
1302 | rtnl_lock(); | ||
1303 | memcpy(vfdi_status->peers[0].mac_addr, | ||
1304 | net_dev->dev_addr, ETH_ALEN); | ||
1305 | efx->vf_init_count = efx->vf_count; | ||
1306 | rtnl_unlock(); | ||
1307 | |||
1308 | efx_sriov_usrev(efx, true); | ||
1309 | |||
1310 | /* At this point we must be ready to accept VFDI requests */ | ||
1311 | |||
1312 | rc = pci_enable_sriov(efx->pci_dev, efx->vf_count); | ||
1313 | if (rc) | ||
1314 | goto fail_pci; | ||
1315 | |||
1316 | netif_info(efx, probe, net_dev, | ||
1317 | "enabled SR-IOV for %d VFs, %d VI per VF\n", | ||
1318 | efx->vf_count, efx_vf_size(efx)); | ||
1319 | return 0; | ||
1320 | |||
1321 | fail_pci: | ||
1322 | efx_sriov_usrev(efx, false); | ||
1323 | rtnl_lock(); | ||
1324 | efx->vf_init_count = 0; | ||
1325 | rtnl_unlock(); | ||
1326 | efx_sriov_vfs_fini(efx); | ||
1327 | fail_vfs: | ||
1328 | cancel_work_sync(&efx->peer_work); | ||
1329 | efx_sriov_free_local(efx); | ||
1330 | kfree(efx->vf); | ||
1331 | fail_alloc: | ||
1332 | efx_nic_free_buffer(efx, &efx->vfdi_status); | ||
1333 | fail_status: | ||
1334 | efx_sriov_cmd(efx, false, NULL, NULL); | ||
1335 | fail_cmd: | ||
1336 | return rc; | ||
1337 | } | ||
1338 | |||
1339 | void efx_sriov_fini(struct efx_nic *efx) | ||
1340 | { | ||
1341 | struct efx_vf *vf; | ||
1342 | unsigned int pos; | ||
1343 | |||
1344 | if (efx->vf_init_count == 0) | ||
1345 | return; | ||
1346 | |||
1347 | /* Disable all interfaces to reconfiguration */ | ||
1348 | BUG_ON(efx->vfdi_channel->enabled); | ||
1349 | efx_sriov_usrev(efx, false); | ||
1350 | rtnl_lock(); | ||
1351 | efx->vf_init_count = 0; | ||
1352 | rtnl_unlock(); | ||
1353 | |||
1354 | /* Flush all reconfiguration work */ | ||
1355 | for (pos = 0; pos < efx->vf_count; ++pos) { | ||
1356 | vf = efx->vf + pos; | ||
1357 | cancel_work_sync(&vf->req); | ||
1358 | cancel_work_sync(&vf->reset_work); | ||
1359 | } | ||
1360 | cancel_work_sync(&efx->peer_work); | ||
1361 | |||
1362 | pci_disable_sriov(efx->pci_dev); | ||
1363 | |||
1364 | /* Tear down back-end state */ | ||
1365 | efx_sriov_vfs_fini(efx); | ||
1366 | efx_sriov_free_local(efx); | ||
1367 | kfree(efx->vf); | ||
1368 | efx_nic_free_buffer(efx, &efx->vfdi_status); | ||
1369 | efx_sriov_cmd(efx, false, NULL, NULL); | ||
1370 | } | ||
1371 | |||
1372 | void efx_sriov_event(struct efx_channel *channel, efx_qword_t *event) | ||
1373 | { | ||
1374 | struct efx_nic *efx = channel->efx; | ||
1375 | struct efx_vf *vf; | ||
1376 | unsigned qid, seq, type, data; | ||
1377 | |||
1378 | qid = EFX_QWORD_FIELD(*event, FSF_CZ_USER_QID); | ||
1379 | |||
1380 | /* USR_EV_REG_VALUE is dword0, so access the VFDI_EV fields directly */ | ||
1381 | BUILD_BUG_ON(FSF_CZ_USER_EV_REG_VALUE_LBN != 0); | ||
1382 | seq = EFX_QWORD_FIELD(*event, VFDI_EV_SEQ); | ||
1383 | type = EFX_QWORD_FIELD(*event, VFDI_EV_TYPE); | ||
1384 | data = EFX_QWORD_FIELD(*event, VFDI_EV_DATA); | ||
1385 | |||
1386 | netif_vdbg(efx, hw, efx->net_dev, | ||
1387 | "USR_EV event from qid %d seq 0x%x type %d data 0x%x\n", | ||
1388 | qid, seq, type, data); | ||
1389 | |||
1390 | if (map_vi_index(efx, qid, &vf, NULL)) | ||
1391 | return; | ||
1392 | if (vf->busy) | ||
1393 | goto error; | ||
1394 | |||
1395 | if (type == VFDI_EV_TYPE_REQ_WORD0) { | ||
1396 | /* Resynchronise */ | ||
1397 | vf->req_type = VFDI_EV_TYPE_REQ_WORD0; | ||
1398 | vf->req_seqno = seq + 1; | ||
1399 | vf->req_addr = 0; | ||
1400 | } else if (seq != (vf->req_seqno++ & 0xff) || type != vf->req_type) | ||
1401 | goto error; | ||
1402 | |||
1403 | switch (vf->req_type) { | ||
1404 | case VFDI_EV_TYPE_REQ_WORD0: | ||
1405 | case VFDI_EV_TYPE_REQ_WORD1: | ||
1406 | case VFDI_EV_TYPE_REQ_WORD2: | ||
1407 | vf->req_addr |= (u64)data << (vf->req_type << 4); | ||
1408 | ++vf->req_type; | ||
1409 | return; | ||
1410 | |||
1411 | case VFDI_EV_TYPE_REQ_WORD3: | ||
1412 | vf->req_addr |= (u64)data << 48; | ||
1413 | vf->req_type = VFDI_EV_TYPE_REQ_WORD0; | ||
1414 | vf->busy = true; | ||
1415 | queue_work(vfdi_workqueue, &vf->req); | ||
1416 | return; | ||
1417 | } | ||
1418 | |||
1419 | error: | ||
1420 | if (net_ratelimit()) | ||
1421 | netif_err(efx, hw, efx->net_dev, | ||
1422 | "ERROR: Screaming VFDI request from %s\n", | ||
1423 | vf->pci_name); | ||
1424 | /* Reset the request and sequence number */ | ||
1425 | vf->req_type = VFDI_EV_TYPE_REQ_WORD0; | ||
1426 | vf->req_seqno = seq + 1; | ||
1427 | } | ||
1428 | |||
1429 | void efx_sriov_flr(struct efx_nic *efx, unsigned vf_i) | ||
1430 | { | ||
1431 | struct efx_vf *vf; | ||
1432 | |||
1433 | if (vf_i > efx->vf_init_count) | ||
1434 | return; | ||
1435 | vf = efx->vf + vf_i; | ||
1436 | netif_info(efx, hw, efx->net_dev, | ||
1437 | "FLR on VF %s\n", vf->pci_name); | ||
1438 | |||
1439 | vf->status_addr = 0; | ||
1440 | efx_vfdi_remove_all_filters(vf); | ||
1441 | efx_vfdi_flush_clear(vf); | ||
1442 | |||
1443 | vf->evq0_count = 0; | ||
1444 | } | ||
1445 | |||
1446 | void efx_sriov_mac_address_changed(struct efx_nic *efx) | ||
1447 | { | ||
1448 | struct vfdi_status *vfdi_status = efx->vfdi_status.addr; | ||
1449 | |||
1450 | if (!efx->vf_init_count) | ||
1451 | return; | ||
1452 | memcpy(vfdi_status->peers[0].mac_addr, | ||
1453 | efx->net_dev->dev_addr, ETH_ALEN); | ||
1454 | queue_work(vfdi_workqueue, &efx->peer_work); | ||
1455 | } | ||
1456 | |||
1457 | void efx_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event) | ||
1458 | { | ||
1459 | struct efx_vf *vf; | ||
1460 | unsigned queue, qid; | ||
1461 | |||
1462 | queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA); | ||
1463 | if (map_vi_index(efx, queue, &vf, &qid)) | ||
1464 | return; | ||
1465 | /* Ignore flush completions triggered by an FLR */ | ||
1466 | if (!test_bit(qid, vf->txq_mask)) | ||
1467 | return; | ||
1468 | |||
1469 | __clear_bit(qid, vf->txq_mask); | ||
1470 | --vf->txq_count; | ||
1471 | |||
1472 | if (efx_vfdi_flush_wake(vf)) | ||
1473 | wake_up(&vf->flush_waitq); | ||
1474 | } | ||
1475 | |||
1476 | void efx_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event) | ||
1477 | { | ||
1478 | struct efx_vf *vf; | ||
1479 | unsigned ev_failed, queue, qid; | ||
1480 | |||
1481 | queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID); | ||
1482 | ev_failed = EFX_QWORD_FIELD(*event, | ||
1483 | FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL); | ||
1484 | if (map_vi_index(efx, queue, &vf, &qid)) | ||
1485 | return; | ||
1486 | if (!test_bit(qid, vf->rxq_mask)) | ||
1487 | return; | ||
1488 | |||
1489 | if (ev_failed) { | ||
1490 | set_bit(qid, vf->rxq_retry_mask); | ||
1491 | atomic_inc(&vf->rxq_retry_count); | ||
1492 | } else { | ||
1493 | __clear_bit(qid, vf->rxq_mask); | ||
1494 | --vf->rxq_count; | ||
1495 | } | ||
1496 | if (efx_vfdi_flush_wake(vf)) | ||
1497 | wake_up(&vf->flush_waitq); | ||
1498 | } | ||
1499 | |||
1500 | /* Called from napi. Schedule the reset work item */ | ||
1501 | void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq) | ||
1502 | { | ||
1503 | struct efx_vf *vf; | ||
1504 | unsigned int rel; | ||
1505 | |||
1506 | if (map_vi_index(efx, dmaq, &vf, &rel)) | ||
1507 | return; | ||
1508 | |||
1509 | if (net_ratelimit()) | ||
1510 | netif_err(efx, hw, efx->net_dev, | ||
1511 | "VF %d DMA Q %d reports descriptor fetch error.\n", | ||
1512 | vf->index, rel); | ||
1513 | queue_work(vfdi_workqueue, &vf->reset_work); | ||
1514 | } | ||
1515 | |||
1516 | /* Reset all VFs */ | ||
1517 | void efx_sriov_reset(struct efx_nic *efx) | ||
1518 | { | ||
1519 | unsigned int vf_i; | ||
1520 | struct efx_buffer buf; | ||
1521 | struct efx_vf *vf; | ||
1522 | |||
1523 | ASSERT_RTNL(); | ||
1524 | |||
1525 | if (efx->vf_init_count == 0) | ||
1526 | return; | ||
1527 | |||
1528 | efx_sriov_usrev(efx, true); | ||
1529 | (void)efx_sriov_cmd(efx, true, NULL, NULL); | ||
1530 | |||
1531 | if (efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE)) | ||
1532 | return; | ||
1533 | |||
1534 | for (vf_i = 0; vf_i < efx->vf_init_count; ++vf_i) { | ||
1535 | vf = efx->vf + vf_i; | ||
1536 | efx_sriov_reset_vf(vf, &buf); | ||
1537 | } | ||
1538 | |||
1539 | efx_nic_free_buffer(efx, &buf); | ||
1540 | } | ||
1541 | |||
1542 | int efx_init_sriov(void) | ||
1543 | { | ||
1544 | /* A single threaded workqueue is sufficient. efx_sriov_vfdi() and | ||
1545 | * efx_sriov_peer_work() spend almost all their time sleeping for | ||
1546 | * MCDI to complete anyway | ||
1547 | */ | ||
1548 | vfdi_workqueue = create_singlethread_workqueue("sfc_vfdi"); | ||
1549 | if (!vfdi_workqueue) | ||
1550 | return -ENOMEM; | ||
1551 | |||
1552 | return 0; | ||
1553 | } | ||
1554 | |||
1555 | void efx_fini_sriov(void) | ||
1556 | { | ||
1557 | destroy_workqueue(vfdi_workqueue); | ||
1558 | } | ||
1559 | |||
1560 | int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac) | ||
1561 | { | ||
1562 | struct efx_nic *efx = netdev_priv(net_dev); | ||
1563 | struct efx_vf *vf; | ||
1564 | |||
1565 | if (vf_i >= efx->vf_init_count) | ||
1566 | return -EINVAL; | ||
1567 | vf = efx->vf + vf_i; | ||
1568 | |||
1569 | mutex_lock(&vf->status_lock); | ||
1570 | memcpy(vf->addr.mac_addr, mac, ETH_ALEN); | ||
1571 | __efx_sriov_update_vf_addr(vf); | ||
1572 | mutex_unlock(&vf->status_lock); | ||
1573 | |||
1574 | return 0; | ||
1575 | } | ||
1576 | |||
1577 | int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, | ||
1578 | u16 vlan, u8 qos) | ||
1579 | { | ||
1580 | struct efx_nic *efx = netdev_priv(net_dev); | ||
1581 | struct efx_vf *vf; | ||
1582 | u16 tci; | ||
1583 | |||
1584 | if (vf_i >= efx->vf_init_count) | ||
1585 | return -EINVAL; | ||
1586 | vf = efx->vf + vf_i; | ||
1587 | |||
1588 | mutex_lock(&vf->status_lock); | ||
1589 | tci = (vlan & VLAN_VID_MASK) | ((qos & 0x7) << VLAN_PRIO_SHIFT); | ||
1590 | vf->addr.tci = htons(tci); | ||
1591 | __efx_sriov_update_vf_addr(vf); | ||
1592 | mutex_unlock(&vf->status_lock); | ||
1593 | |||
1594 | return 0; | ||
1595 | } | ||
1596 | |||
1597 | int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, | ||
1598 | bool spoofchk) | ||
1599 | { | ||
1600 | struct efx_nic *efx = netdev_priv(net_dev); | ||
1601 | struct efx_vf *vf; | ||
1602 | int rc; | ||
1603 | |||
1604 | if (vf_i >= efx->vf_init_count) | ||
1605 | return -EINVAL; | ||
1606 | vf = efx->vf + vf_i; | ||
1607 | |||
1608 | mutex_lock(&vf->txq_lock); | ||
1609 | if (vf->txq_count == 0) { | ||
1610 | vf->tx_filter_mode = | ||
1611 | spoofchk ? VF_TX_FILTER_ON : VF_TX_FILTER_OFF; | ||
1612 | rc = 0; | ||
1613 | } else { | ||
1614 | /* This cannot be changed while TX queues are running */ | ||
1615 | rc = -EBUSY; | ||
1616 | } | ||
1617 | mutex_unlock(&vf->txq_lock); | ||
1618 | return rc; | ||
1619 | } | ||
1620 | |||
1621 | int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, | ||
1622 | struct ifla_vf_info *ivi) | ||
1623 | { | ||
1624 | struct efx_nic *efx = netdev_priv(net_dev); | ||
1625 | struct efx_vf *vf; | ||
1626 | u16 tci; | ||
1627 | |||
1628 | if (vf_i >= efx->vf_init_count) | ||
1629 | return -EINVAL; | ||
1630 | vf = efx->vf + vf_i; | ||
1631 | |||
1632 | ivi->vf = vf_i; | ||
1633 | memcpy(ivi->mac, vf->addr.mac_addr, ETH_ALEN); | ||
1634 | ivi->tx_rate = 0; | ||
1635 | tci = ntohs(vf->addr.tci); | ||
1636 | ivi->vlan = tci & VLAN_VID_MASK; | ||
1637 | ivi->qos = (tci >> VLAN_PRIO_SHIFT) & 0x7; | ||
1638 | ivi->spoofchk = vf->tx_filter_mode == VF_TX_FILTER_ON; | ||
1639 | |||
1640 | return 0; | ||
1641 | } | ||
1642 | |||
diff --git a/drivers/net/ethernet/sfc/vfdi.h b/drivers/net/ethernet/sfc/vfdi.h new file mode 100644 index 000000000000..656fa70f9993 --- /dev/null +++ b/drivers/net/ethernet/sfc/vfdi.h | |||
@@ -0,0 +1,254 @@ | |||
1 | /**************************************************************************** | ||
2 | * Driver for Solarflare Solarstorm network controllers and boards | ||
3 | * Copyright 2010-2012 Solarflare Communications Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published | ||
7 | * by the Free Software Foundation, incorporated herein by reference. | ||
8 | */ | ||
9 | #ifndef _VFDI_H | ||
10 | #define _VFDI_H | ||
11 | |||
12 | /** | ||
13 | * DOC: Virtual Function Driver Interface | ||
14 | * | ||
15 | * This file contains software structures used to form a two way | ||
16 | * communication channel between the VF driver and the PF driver, | ||
17 | * named Virtual Function Driver Interface (VFDI). | ||
18 | * | ||
19 | * For the purposes of VFDI, a page is a memory region with size and | ||
20 | * alignment of 4K. All addresses are DMA addresses to be used within | ||
21 | * the domain of the relevant VF. | ||
22 | * | ||
23 | * The only hardware-defined channels for a VF driver to communicate | ||
24 | * with the PF driver are the event mailboxes (%FR_CZ_USR_EV | ||
25 | * registers). Writing to these registers generates an event with | ||
26 | * EV_CODE = EV_CODE_USR_EV, USER_QID set to the index of the mailbox | ||
27 | * and USER_EV_REG_VALUE set to the value written. The PF driver may | ||
28 | * direct or disable delivery of these events by setting | ||
29 | * %FR_CZ_USR_EV_CFG. | ||
30 | * | ||
31 | * The PF driver can send arbitrary events to arbitrary event queues. | ||
32 | * However, for consistency, VFDI events from the PF are defined to | ||
33 | * follow the same form and be sent to the first event queue assigned | ||
34 | * to the VF while that queue is enabled by the VF driver. | ||
35 | * | ||
36 | * The general form of the variable bits of VFDI events is: | ||
37 | * | ||
38 | * 0 16 24 31 | ||
39 | * | DATA | TYPE | SEQ | | ||
40 | * | ||
41 | * SEQ is a sequence number which should be incremented by 1 (modulo | ||
42 | * 256) for each event. The sequence numbers used in each direction | ||
43 | * are independent. | ||
44 | * | ||
45 | * The VF submits requests of type &struct vfdi_req by sending the | ||
46 | * address of the request (ADDR) in a series of 4 events: | ||
47 | * | ||
48 | * 0 16 24 31 | ||
49 | * | ADDR[0:15] | VFDI_EV_TYPE_REQ_WORD0 | SEQ | | ||
50 | * | ADDR[16:31] | VFDI_EV_TYPE_REQ_WORD1 | SEQ+1 | | ||
51 | * | ADDR[32:47] | VFDI_EV_TYPE_REQ_WORD2 | SEQ+2 | | ||
52 | * | ADDR[48:63] | VFDI_EV_TYPE_REQ_WORD3 | SEQ+3 | | ||
53 | * | ||
54 | * The address must be page-aligned. After receiving such a valid | ||
55 | * series of events, the PF driver will attempt to read the request | ||
56 | * and write a response to the same address. In case of an invalid | ||
57 | * sequence of events or a DMA error, there will be no response. | ||
58 | * | ||
59 | * The VF driver may request that the PF driver writes status | ||
60 | * information into its domain asynchronously. After writing the | ||
61 | * status, the PF driver will send an event of the form: | ||
62 | * | ||
63 | * 0 16 24 31 | ||
64 | * | reserved | VFDI_EV_TYPE_STATUS | SEQ | | ||
65 | * | ||
66 | * In case the VF must be reset for any reason, the PF driver will | ||
67 | * send an event of the form: | ||
68 | * | ||
69 | * 0 16 24 31 | ||
70 | * | reserved | VFDI_EV_TYPE_RESET | SEQ | | ||
71 | * | ||
72 | * It is then the responsibility of the VF driver to request | ||
73 | * reinitialisation of its queues. | ||
74 | */ | ||
75 | #define VFDI_EV_SEQ_LBN 24 | ||
76 | #define VFDI_EV_SEQ_WIDTH 8 | ||
77 | #define VFDI_EV_TYPE_LBN 16 | ||
78 | #define VFDI_EV_TYPE_WIDTH 8 | ||
79 | #define VFDI_EV_TYPE_REQ_WORD0 0 | ||
80 | #define VFDI_EV_TYPE_REQ_WORD1 1 | ||
81 | #define VFDI_EV_TYPE_REQ_WORD2 2 | ||
82 | #define VFDI_EV_TYPE_REQ_WORD3 3 | ||
83 | #define VFDI_EV_TYPE_STATUS 4 | ||
84 | #define VFDI_EV_TYPE_RESET 5 | ||
85 | #define VFDI_EV_DATA_LBN 0 | ||
86 | #define VFDI_EV_DATA_WIDTH 16 | ||
87 | |||
88 | struct vfdi_endpoint { | ||
89 | u8 mac_addr[ETH_ALEN]; | ||
90 | __be16 tci; | ||
91 | }; | ||
92 | |||
93 | /** | ||
94 | * enum vfdi_op - VFDI operation enumeration | ||
95 | * @VFDI_OP_RESPONSE: Indicates a response to the request. | ||
96 | * @VFDI_OP_INIT_EVQ: Initialize SRAM entries and initialize an EVQ. | ||
97 | * @VFDI_OP_INIT_RXQ: Initialize SRAM entries and initialize an RXQ. | ||
98 | * @VFDI_OP_INIT_TXQ: Initialize SRAM entries and initialize a TXQ. | ||
99 | * @VFDI_OP_FINI_ALL_QUEUES: Flush all queues, finalize all queues, then | ||
100 | * finalize the SRAM entries. | ||
101 | * @VFDI_OP_INSERT_FILTER: Insert a MAC filter targetting the given RXQ. | ||
102 | * @VFDI_OP_REMOVE_ALL_FILTERS: Remove all filters. | ||
103 | * @VFDI_OP_SET_STATUS_PAGE: Set the DMA page(s) used for status updates | ||
104 | * from PF and write the initial status. | ||
105 | * @VFDI_OP_CLEAR_STATUS_PAGE: Clear the DMA page(s) used for status | ||
106 | * updates from PF. | ||
107 | */ | ||
108 | enum vfdi_op { | ||
109 | VFDI_OP_RESPONSE = 0, | ||
110 | VFDI_OP_INIT_EVQ = 1, | ||
111 | VFDI_OP_INIT_RXQ = 2, | ||
112 | VFDI_OP_INIT_TXQ = 3, | ||
113 | VFDI_OP_FINI_ALL_QUEUES = 4, | ||
114 | VFDI_OP_INSERT_FILTER = 5, | ||
115 | VFDI_OP_REMOVE_ALL_FILTERS = 6, | ||
116 | VFDI_OP_SET_STATUS_PAGE = 7, | ||
117 | VFDI_OP_CLEAR_STATUS_PAGE = 8, | ||
118 | VFDI_OP_LIMIT, | ||
119 | }; | ||
120 | |||
121 | /* Response codes for VFDI operations. Other values may be used in future. */ | ||
122 | #define VFDI_RC_SUCCESS 0 | ||
123 | #define VFDI_RC_ENOMEM (-12) | ||
124 | #define VFDI_RC_EINVAL (-22) | ||
125 | #define VFDI_RC_EOPNOTSUPP (-95) | ||
126 | #define VFDI_RC_ETIMEDOUT (-110) | ||
127 | |||
128 | /** | ||
129 | * struct vfdi_req - Request from VF driver to PF driver | ||
130 | * @op: Operation code or response indicator, taken from &enum vfdi_op. | ||
131 | * @rc: Response code. Set to 0 on success or a negative error code on failure. | ||
132 | * @u.init_evq.index: Index of event queue to create. | ||
133 | * @u.init_evq.buf_count: Number of 4k buffers backing event queue. | ||
134 | * @u.init_evq.addr: Array of length %u.init_evq.buf_count containing DMA | ||
135 | * address of each page backing the event queue. | ||
136 | * @u.init_rxq.index: Index of receive queue to create. | ||
137 | * @u.init_rxq.buf_count: Number of 4k buffers backing receive queue. | ||
138 | * @u.init_rxq.evq: Instance of event queue to target receive events at. | ||
139 | * @u.init_rxq.label: Label used in receive events. | ||
140 | * @u.init_rxq.flags: Unused. | ||
141 | * @u.init_rxq.addr: Array of length %u.init_rxq.buf_count containing DMA | ||
142 | * address of each page backing the receive queue. | ||
143 | * @u.init_txq.index: Index of transmit queue to create. | ||
144 | * @u.init_txq.buf_count: Number of 4k buffers backing transmit queue. | ||
145 | * @u.init_txq.evq: Instance of event queue to target transmit completion | ||
146 | * events at. | ||
147 | * @u.init_txq.label: Label used in transmit completion events. | ||
148 | * @u.init_txq.flags: Checksum offload flags. | ||
149 | * @u.init_txq.addr: Array of length %u.init_txq.buf_count containing DMA | ||
150 | * address of each page backing the transmit queue. | ||
151 | * @u.mac_filter.rxq: Insert MAC filter at VF local address/VLAN targetting | ||
152 | * all traffic at this receive queue. | ||
153 | * @u.mac_filter.flags: MAC filter flags. | ||
154 | * @u.set_status_page.dma_addr: Base address for the &struct vfdi_status. | ||
155 | * This address must be such that the structure fits within a page. | ||
156 | * @u.set_status_page.peer_page_count: Number of additional pages the VF | ||
157 | * has provided into which peer addresses may be DMAd. | ||
158 | * @u.set_status_page.peer_page_addr: Array of DMA addresses of pages. | ||
159 | * If the number of peers exceeds 256, then the VF must provide | ||
160 | * additional pages in this array. The PF will then DMA up to | ||
161 | * 512 vfdi_endpoint structures into each page. These addresses | ||
162 | * must be page-aligned. | ||
163 | */ | ||
164 | struct vfdi_req { | ||
165 | u32 op; | ||
166 | u32 reserved1; | ||
167 | s32 rc; | ||
168 | u32 reserved2; | ||
169 | union { | ||
170 | struct { | ||
171 | u32 index; | ||
172 | u32 buf_count; | ||
173 | u64 addr[]; | ||
174 | } init_evq; | ||
175 | struct { | ||
176 | u32 index; | ||
177 | u32 buf_count; | ||
178 | u32 evq; | ||
179 | u32 label; | ||
180 | u32 flags; | ||
181 | #define VFDI_RXQ_FLAG_SCATTER_EN 1 | ||
182 | u32 reserved; | ||
183 | u64 addr[]; | ||
184 | } init_rxq; | ||
185 | struct { | ||
186 | u32 index; | ||
187 | u32 buf_count; | ||
188 | u32 evq; | ||
189 | u32 label; | ||
190 | u32 flags; | ||
191 | #define VFDI_TXQ_FLAG_IP_CSUM_DIS 1 | ||
192 | #define VFDI_TXQ_FLAG_TCPUDP_CSUM_DIS 2 | ||
193 | u32 reserved; | ||
194 | u64 addr[]; | ||
195 | } init_txq; | ||
196 | struct { | ||
197 | u32 rxq; | ||
198 | u32 flags; | ||
199 | #define VFDI_MAC_FILTER_FLAG_RSS 1 | ||
200 | #define VFDI_MAC_FILTER_FLAG_SCATTER 2 | ||
201 | } mac_filter; | ||
202 | struct { | ||
203 | u64 dma_addr; | ||
204 | u64 peer_page_count; | ||
205 | u64 peer_page_addr[]; | ||
206 | } set_status_page; | ||
207 | } u; | ||
208 | }; | ||
209 | |||
210 | /** | ||
211 | * struct vfdi_status - Status provided by PF driver to VF driver | ||
212 | * @generation_start: A generation count DMA'd to VF *before* the | ||
213 | * rest of the structure. | ||
214 | * @generation_end: A generation count DMA'd to VF *after* the | ||
215 | * rest of the structure. | ||
216 | * @version: Version of this structure; currently set to 1. Later | ||
217 | * versions must either be layout-compatible or only be sent to VFs | ||
218 | * that specifically request them. | ||
219 | * @length: Total length of this structure including embedded tables | ||
220 | * @vi_scale: log2 the number of VIs available on this VF. This quantity | ||
221 | * is used by the hardware for register decoding. | ||
222 | * @max_tx_channels: The maximum number of transmit queues the VF can use. | ||
223 | * @rss_rxq_count: The number of receive queues present in the shared RSS | ||
224 | * indirection table. | ||
225 | * @peer_count: Total number of peers in the complete peer list. If larger | ||
226 | * than ARRAY_SIZE(%peers), then the VF must provide sufficient | ||
227 | * additional pages each of which is filled with vfdi_endpoint structures. | ||
228 | * @local: The MAC address and outer VLAN tag of *this* VF | ||
229 | * @peers: Table of peer addresses. The @tci fields in these structures | ||
230 | * are currently unused and must be ignored. Additional peers are | ||
231 | * written into any additional pages provided by the VF. | ||
232 | * @timer_quantum_ns: Timer quantum (nominal period between timer ticks) | ||
233 | * for interrupt moderation timers, in nanoseconds. This member is only | ||
234 | * present if @length is sufficiently large. | ||
235 | */ | ||
236 | struct vfdi_status { | ||
237 | u32 generation_start; | ||
238 | u32 generation_end; | ||
239 | u32 version; | ||
240 | u32 length; | ||
241 | u8 vi_scale; | ||
242 | u8 max_tx_channels; | ||
243 | u8 rss_rxq_count; | ||
244 | u8 reserved1; | ||
245 | u16 peer_count; | ||
246 | u16 reserved2; | ||
247 | struct vfdi_endpoint local; | ||
248 | struct vfdi_endpoint peers[256]; | ||
249 | |||
250 | /* Members below here extend version 1 of this structure */ | ||
251 | u32 timer_quantum_ns; | ||
252 | }; | ||
253 | |||
254 | #endif | ||