diff options
-rw-r--r-- | drivers/net/ethernet/sfc/efx.c | 268 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/efx.h | 18 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/enum.h | 12 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/ethtool.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/falcon.c | 17 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/filter.c | 249 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi_pcol.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/net_driver.h | 97 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/nic.c | 90 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/ptp.c | 116 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/rx.c | 793 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/siena.c | 25 |
12 files changed, 1072 insertions, 618 deletions
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 0bc00991d310..f050248e9fba 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c | |||
@@ -21,7 +21,9 @@ | |||
21 | #include <linux/ethtool.h> | 21 | #include <linux/ethtool.h> |
22 | #include <linux/topology.h> | 22 | #include <linux/topology.h> |
23 | #include <linux/gfp.h> | 23 | #include <linux/gfp.h> |
24 | #include <linux/pci.h> | ||
24 | #include <linux/cpu_rmap.h> | 25 | #include <linux/cpu_rmap.h> |
26 | #include <linux/aer.h> | ||
25 | #include "net_driver.h" | 27 | #include "net_driver.h" |
26 | #include "efx.h" | 28 | #include "efx.h" |
27 | #include "nic.h" | 29 | #include "nic.h" |
@@ -71,21 +73,21 @@ const char *const efx_loopback_mode_names[] = { | |||
71 | 73 | ||
72 | const unsigned int efx_reset_type_max = RESET_TYPE_MAX; | 74 | const unsigned int efx_reset_type_max = RESET_TYPE_MAX; |
73 | const char *const efx_reset_type_names[] = { | 75 | const char *const efx_reset_type_names[] = { |
74 | [RESET_TYPE_INVISIBLE] = "INVISIBLE", | 76 | [RESET_TYPE_INVISIBLE] = "INVISIBLE", |
75 | [RESET_TYPE_ALL] = "ALL", | 77 | [RESET_TYPE_ALL] = "ALL", |
76 | [RESET_TYPE_WORLD] = "WORLD", | 78 | [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", |
77 | [RESET_TYPE_DISABLE] = "DISABLE", | 79 | [RESET_TYPE_WORLD] = "WORLD", |
78 | [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", | 80 | [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", |
79 | [RESET_TYPE_INT_ERROR] = "INT_ERROR", | 81 | [RESET_TYPE_DISABLE] = "DISABLE", |
80 | [RESET_TYPE_RX_RECOVERY] = "RX_RECOVERY", | 82 | [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", |
81 | [RESET_TYPE_RX_DESC_FETCH] = "RX_DESC_FETCH", | 83 | [RESET_TYPE_INT_ERROR] = "INT_ERROR", |
82 | [RESET_TYPE_TX_DESC_FETCH] = "TX_DESC_FETCH", | 84 | [RESET_TYPE_RX_RECOVERY] = "RX_RECOVERY", |
83 | [RESET_TYPE_TX_SKIP] = "TX_SKIP", | 85 | [RESET_TYPE_RX_DESC_FETCH] = "RX_DESC_FETCH", |
84 | [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", | 86 | [RESET_TYPE_TX_DESC_FETCH] = "TX_DESC_FETCH", |
87 | [RESET_TYPE_TX_SKIP] = "TX_SKIP", | ||
88 | [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", | ||
85 | }; | 89 | }; |
86 | 90 | ||
87 | #define EFX_MAX_MTU (9 * 1024) | ||
88 | |||
89 | /* Reset workqueue. If any NIC has a hardware failure then a reset will be | 91 | /* Reset workqueue. If any NIC has a hardware failure then a reset will be |
90 | * queued onto this work queue. This is not a per-nic work queue, because | 92 | * queued onto this work queue. This is not a per-nic work queue, because |
91 | * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. | 93 | * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. |
@@ -117,9 +119,12 @@ MODULE_PARM_DESC(separate_tx_channels, | |||
117 | static int napi_weight = 64; | 119 | static int napi_weight = 64; |
118 | 120 | ||
119 | /* This is the time (in jiffies) between invocations of the hardware | 121 | /* This is the time (in jiffies) between invocations of the hardware |
120 | * monitor. On Falcon-based NICs, this will: | 122 | * monitor. |
123 | * On Falcon-based NICs, this will: | ||
121 | * - Check the on-board hardware monitor; | 124 | * - Check the on-board hardware monitor; |
122 | * - Poll the link state and reconfigure the hardware as necessary. | 125 | * - Poll the link state and reconfigure the hardware as necessary. |
126 | * On Siena-based NICs for power systems with EEH support, this will give EEH a | ||
127 | * chance to start. | ||
123 | */ | 128 | */ |
124 | static unsigned int efx_monitor_interval = 1 * HZ; | 129 | static unsigned int efx_monitor_interval = 1 * HZ; |
125 | 130 | ||
@@ -203,13 +208,14 @@ static void efx_stop_all(struct efx_nic *efx); | |||
203 | #define EFX_ASSERT_RESET_SERIALISED(efx) \ | 208 | #define EFX_ASSERT_RESET_SERIALISED(efx) \ |
204 | do { \ | 209 | do { \ |
205 | if ((efx->state == STATE_READY) || \ | 210 | if ((efx->state == STATE_READY) || \ |
211 | (efx->state == STATE_RECOVERY) || \ | ||
206 | (efx->state == STATE_DISABLED)) \ | 212 | (efx->state == STATE_DISABLED)) \ |
207 | ASSERT_RTNL(); \ | 213 | ASSERT_RTNL(); \ |
208 | } while (0) | 214 | } while (0) |
209 | 215 | ||
210 | static int efx_check_disabled(struct efx_nic *efx) | 216 | static int efx_check_disabled(struct efx_nic *efx) |
211 | { | 217 | { |
212 | if (efx->state == STATE_DISABLED) { | 218 | if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) { |
213 | netif_err(efx, drv, efx->net_dev, | 219 | netif_err(efx, drv, efx->net_dev, |
214 | "device is disabled due to earlier errors\n"); | 220 | "device is disabled due to earlier errors\n"); |
215 | return -EIO; | 221 | return -EIO; |
@@ -242,15 +248,9 @@ static int efx_process_channel(struct efx_channel *channel, int budget) | |||
242 | struct efx_rx_queue *rx_queue = | 248 | struct efx_rx_queue *rx_queue = |
243 | efx_channel_get_rx_queue(channel); | 249 | efx_channel_get_rx_queue(channel); |
244 | 250 | ||
245 | /* Deliver last RX packet. */ | 251 | efx_rx_flush_packet(channel); |
246 | if (channel->rx_pkt) { | 252 | if (rx_queue->enabled) |
247 | __efx_rx_packet(channel, channel->rx_pkt); | ||
248 | channel->rx_pkt = NULL; | ||
249 | } | ||
250 | if (rx_queue->enabled) { | ||
251 | efx_rx_strategy(channel); | ||
252 | efx_fast_push_rx_descriptors(rx_queue); | 253 | efx_fast_push_rx_descriptors(rx_queue); |
253 | } | ||
254 | } | 254 | } |
255 | 255 | ||
256 | return spent; | 256 | return spent; |
@@ -625,20 +625,51 @@ fail: | |||
625 | */ | 625 | */ |
626 | static void efx_start_datapath(struct efx_nic *efx) | 626 | static void efx_start_datapath(struct efx_nic *efx) |
627 | { | 627 | { |
628 | bool old_rx_scatter = efx->rx_scatter; | ||
628 | struct efx_tx_queue *tx_queue; | 629 | struct efx_tx_queue *tx_queue; |
629 | struct efx_rx_queue *rx_queue; | 630 | struct efx_rx_queue *rx_queue; |
630 | struct efx_channel *channel; | 631 | struct efx_channel *channel; |
632 | size_t rx_buf_len; | ||
631 | 633 | ||
632 | /* Calculate the rx buffer allocation parameters required to | 634 | /* Calculate the rx buffer allocation parameters required to |
633 | * support the current MTU, including padding for header | 635 | * support the current MTU, including padding for header |
634 | * alignment and overruns. | 636 | * alignment and overruns. |
635 | */ | 637 | */ |
636 | efx->rx_buffer_len = (max(EFX_PAGE_IP_ALIGN, NET_IP_ALIGN) + | 638 | efx->rx_dma_len = (efx->type->rx_buffer_hash_size + |
637 | EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + | 639 | EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + |
638 | efx->type->rx_buffer_hash_size + | 640 | efx->type->rx_buffer_padding); |
639 | efx->type->rx_buffer_padding); | 641 | rx_buf_len = (sizeof(struct efx_rx_page_state) + |
640 | efx->rx_buffer_order = get_order(efx->rx_buffer_len + | 642 | EFX_PAGE_IP_ALIGN + efx->rx_dma_len); |
641 | sizeof(struct efx_rx_page_state)); | 643 | if (rx_buf_len <= PAGE_SIZE) { |
644 | efx->rx_scatter = false; | ||
645 | efx->rx_buffer_order = 0; | ||
646 | } else if (efx->type->can_rx_scatter) { | ||
647 | BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + | ||
648 | EFX_PAGE_IP_ALIGN + EFX_RX_USR_BUF_SIZE > | ||
649 | PAGE_SIZE / 2); | ||
650 | efx->rx_scatter = true; | ||
651 | efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; | ||
652 | efx->rx_buffer_order = 0; | ||
653 | } else { | ||
654 | efx->rx_scatter = false; | ||
655 | efx->rx_buffer_order = get_order(rx_buf_len); | ||
656 | } | ||
657 | |||
658 | efx_rx_config_page_split(efx); | ||
659 | if (efx->rx_buffer_order) | ||
660 | netif_dbg(efx, drv, efx->net_dev, | ||
661 | "RX buf len=%u; page order=%u batch=%u\n", | ||
662 | efx->rx_dma_len, efx->rx_buffer_order, | ||
663 | efx->rx_pages_per_batch); | ||
664 | else | ||
665 | netif_dbg(efx, drv, efx->net_dev, | ||
666 | "RX buf len=%u step=%u bpp=%u; page batch=%u\n", | ||
667 | efx->rx_dma_len, efx->rx_page_buf_step, | ||
668 | efx->rx_bufs_per_page, efx->rx_pages_per_batch); | ||
669 | |||
670 | /* RX filters also have scatter-enabled flags */ | ||
671 | if (efx->rx_scatter != old_rx_scatter) | ||
672 | efx_filter_update_rx_scatter(efx); | ||
642 | 673 | ||
643 | /* We must keep at least one descriptor in a TX ring empty. | 674 | /* We must keep at least one descriptor in a TX ring empty. |
644 | * We could avoid this when the queue size does not exactly | 675 | * We could avoid this when the queue size does not exactly |
@@ -655,16 +686,12 @@ static void efx_start_datapath(struct efx_nic *efx) | |||
655 | efx_for_each_channel_tx_queue(tx_queue, channel) | 686 | efx_for_each_channel_tx_queue(tx_queue, channel) |
656 | efx_init_tx_queue(tx_queue); | 687 | efx_init_tx_queue(tx_queue); |
657 | 688 | ||
658 | /* The rx buffer allocation strategy is MTU dependent */ | ||
659 | efx_rx_strategy(channel); | ||
660 | |||
661 | efx_for_each_channel_rx_queue(rx_queue, channel) { | 689 | efx_for_each_channel_rx_queue(rx_queue, channel) { |
662 | efx_init_rx_queue(rx_queue); | 690 | efx_init_rx_queue(rx_queue); |
663 | efx_nic_generate_fill_event(rx_queue); | 691 | efx_nic_generate_fill_event(rx_queue); |
664 | } | 692 | } |
665 | 693 | ||
666 | WARN_ON(channel->rx_pkt != NULL); | 694 | WARN_ON(channel->rx_pkt_n_frags); |
667 | efx_rx_strategy(channel); | ||
668 | } | 695 | } |
669 | 696 | ||
670 | if (netif_device_present(efx->net_dev)) | 697 | if (netif_device_present(efx->net_dev)) |
@@ -683,7 +710,7 @@ static void efx_stop_datapath(struct efx_nic *efx) | |||
683 | BUG_ON(efx->port_enabled); | 710 | BUG_ON(efx->port_enabled); |
684 | 711 | ||
685 | /* Only perform flush if dma is enabled */ | 712 | /* Only perform flush if dma is enabled */ |
686 | if (dev->is_busmaster) { | 713 | if (dev->is_busmaster && efx->state != STATE_RECOVERY) { |
687 | rc = efx_nic_flush_queues(efx); | 714 | rc = efx_nic_flush_queues(efx); |
688 | 715 | ||
689 | if (rc && EFX_WORKAROUND_7803(efx)) { | 716 | if (rc && EFX_WORKAROUND_7803(efx)) { |
@@ -1596,13 +1623,15 @@ static void efx_start_all(struct efx_nic *efx) | |||
1596 | efx_start_port(efx); | 1623 | efx_start_port(efx); |
1597 | efx_start_datapath(efx); | 1624 | efx_start_datapath(efx); |
1598 | 1625 | ||
1599 | /* Start the hardware monitor if there is one. Otherwise (we're link | 1626 | /* Start the hardware monitor if there is one */ |
1600 | * event driven), we have to poll the PHY because after an event queue | 1627 | if (efx->type->monitor != NULL) |
1601 | * flush, we could have a missed a link state change */ | ||
1602 | if (efx->type->monitor != NULL) { | ||
1603 | queue_delayed_work(efx->workqueue, &efx->monitor_work, | 1628 | queue_delayed_work(efx->workqueue, &efx->monitor_work, |
1604 | efx_monitor_interval); | 1629 | efx_monitor_interval); |
1605 | } else { | 1630 | |
1631 | /* If link state detection is normally event-driven, we have | ||
1632 | * to poll now because we could have missed a change | ||
1633 | */ | ||
1634 | if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) { | ||
1606 | mutex_lock(&efx->mac_lock); | 1635 | mutex_lock(&efx->mac_lock); |
1607 | if (efx->phy_op->poll(efx)) | 1636 | if (efx->phy_op->poll(efx)) |
1608 | efx_link_status_changed(efx); | 1637 | efx_link_status_changed(efx); |
@@ -2309,7 +2338,9 @@ int efx_reset(struct efx_nic *efx, enum reset_type method) | |||
2309 | 2338 | ||
2310 | out: | 2339 | out: |
2311 | /* Leave device stopped if necessary */ | 2340 | /* Leave device stopped if necessary */ |
2312 | disabled = rc || method == RESET_TYPE_DISABLE; | 2341 | disabled = rc || |
2342 | method == RESET_TYPE_DISABLE || | ||
2343 | method == RESET_TYPE_RECOVER_OR_DISABLE; | ||
2313 | rc2 = efx_reset_up(efx, method, !disabled); | 2344 | rc2 = efx_reset_up(efx, method, !disabled); |
2314 | if (rc2) { | 2345 | if (rc2) { |
2315 | disabled = true; | 2346 | disabled = true; |
@@ -2328,13 +2359,48 @@ out: | |||
2328 | return rc; | 2359 | return rc; |
2329 | } | 2360 | } |
2330 | 2361 | ||
2362 | /* Try recovery mechanisms. | ||
2363 | * For now only EEH is supported. | ||
2364 | * Returns 0 if the recovery mechanisms are unsuccessful. | ||
2365 | * Returns a non-zero value otherwise. | ||
2366 | */ | ||
2367 | static int efx_try_recovery(struct efx_nic *efx) | ||
2368 | { | ||
2369 | #ifdef CONFIG_EEH | ||
2370 | /* A PCI error can occur and not be seen by EEH because nothing | ||
2371 | * happens on the PCI bus. In this case the driver may fail and | ||
2372 | * schedule a 'recover or reset', leading to this recovery handler. | ||
2373 | * Manually call the eeh failure check function. | ||
2374 | */ | ||
2375 | struct eeh_dev *eehdev = | ||
2376 | of_node_to_eeh_dev(pci_device_to_OF_node(efx->pci_dev)); | ||
2377 | |||
2378 | if (eeh_dev_check_failure(eehdev)) { | ||
2379 | /* The EEH mechanisms will handle the error and reset the | ||
2380 | * device if necessary. | ||
2381 | */ | ||
2382 | return 1; | ||
2383 | } | ||
2384 | #endif | ||
2385 | return 0; | ||
2386 | } | ||
2387 | |||
2331 | /* The worker thread exists so that code that cannot sleep can | 2388 | /* The worker thread exists so that code that cannot sleep can |
2332 | * schedule a reset for later. | 2389 | * schedule a reset for later. |
2333 | */ | 2390 | */ |
2334 | static void efx_reset_work(struct work_struct *data) | 2391 | static void efx_reset_work(struct work_struct *data) |
2335 | { | 2392 | { |
2336 | struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); | 2393 | struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); |
2337 | unsigned long pending = ACCESS_ONCE(efx->reset_pending); | 2394 | unsigned long pending; |
2395 | enum reset_type method; | ||
2396 | |||
2397 | pending = ACCESS_ONCE(efx->reset_pending); | ||
2398 | method = fls(pending) - 1; | ||
2399 | |||
2400 | if ((method == RESET_TYPE_RECOVER_OR_DISABLE || | ||
2401 | method == RESET_TYPE_RECOVER_OR_ALL) && | ||
2402 | efx_try_recovery(efx)) | ||
2403 | return; | ||
2338 | 2404 | ||
2339 | if (!pending) | 2405 | if (!pending) |
2340 | return; | 2406 | return; |
@@ -2346,7 +2412,7 @@ static void efx_reset_work(struct work_struct *data) | |||
2346 | * it cannot change again. | 2412 | * it cannot change again. |
2347 | */ | 2413 | */ |
2348 | if (efx->state == STATE_READY) | 2414 | if (efx->state == STATE_READY) |
2349 | (void)efx_reset(efx, fls(pending) - 1); | 2415 | (void)efx_reset(efx, method); |
2350 | 2416 | ||
2351 | rtnl_unlock(); | 2417 | rtnl_unlock(); |
2352 | } | 2418 | } |
@@ -2355,11 +2421,20 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) | |||
2355 | { | 2421 | { |
2356 | enum reset_type method; | 2422 | enum reset_type method; |
2357 | 2423 | ||
2424 | if (efx->state == STATE_RECOVERY) { | ||
2425 | netif_dbg(efx, drv, efx->net_dev, | ||
2426 | "recovering: skip scheduling %s reset\n", | ||
2427 | RESET_TYPE(type)); | ||
2428 | return; | ||
2429 | } | ||
2430 | |||
2358 | switch (type) { | 2431 | switch (type) { |
2359 | case RESET_TYPE_INVISIBLE: | 2432 | case RESET_TYPE_INVISIBLE: |
2360 | case RESET_TYPE_ALL: | 2433 | case RESET_TYPE_ALL: |
2434 | case RESET_TYPE_RECOVER_OR_ALL: | ||
2361 | case RESET_TYPE_WORLD: | 2435 | case RESET_TYPE_WORLD: |
2362 | case RESET_TYPE_DISABLE: | 2436 | case RESET_TYPE_DISABLE: |
2437 | case RESET_TYPE_RECOVER_OR_DISABLE: | ||
2363 | method = type; | 2438 | method = type; |
2364 | netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", | 2439 | netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", |
2365 | RESET_TYPE(method)); | 2440 | RESET_TYPE(method)); |
@@ -2569,6 +2644,8 @@ static void efx_pci_remove(struct pci_dev *pci_dev) | |||
2569 | efx_fini_struct(efx); | 2644 | efx_fini_struct(efx); |
2570 | pci_set_drvdata(pci_dev, NULL); | 2645 | pci_set_drvdata(pci_dev, NULL); |
2571 | free_netdev(efx->net_dev); | 2646 | free_netdev(efx->net_dev); |
2647 | |||
2648 | pci_disable_pcie_error_reporting(pci_dev); | ||
2572 | }; | 2649 | }; |
2573 | 2650 | ||
2574 | /* NIC VPD information | 2651 | /* NIC VPD information |
@@ -2741,6 +2818,11 @@ static int efx_pci_probe(struct pci_dev *pci_dev, | |||
2741 | netif_warn(efx, probe, efx->net_dev, | 2818 | netif_warn(efx, probe, efx->net_dev, |
2742 | "failed to create MTDs (%d)\n", rc); | 2819 | "failed to create MTDs (%d)\n", rc); |
2743 | 2820 | ||
2821 | rc = pci_enable_pcie_error_reporting(pci_dev); | ||
2822 | if (rc && rc != -EINVAL) | ||
2823 | netif_warn(efx, probe, efx->net_dev, | ||
2824 | "pci_enable_pcie_error_reporting failed (%d)\n", rc); | ||
2825 | |||
2744 | return 0; | 2826 | return 0; |
2745 | 2827 | ||
2746 | fail4: | 2828 | fail4: |
@@ -2865,12 +2947,112 @@ static const struct dev_pm_ops efx_pm_ops = { | |||
2865 | .restore = efx_pm_resume, | 2947 | .restore = efx_pm_resume, |
2866 | }; | 2948 | }; |
2867 | 2949 | ||
2950 | /* A PCI error affecting this device was detected. | ||
2951 | * At this point MMIO and DMA may be disabled. | ||
2952 | * Stop the software path and request a slot reset. | ||
2953 | */ | ||
2954 | pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev, | ||
2955 | enum pci_channel_state state) | ||
2956 | { | ||
2957 | pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; | ||
2958 | struct efx_nic *efx = pci_get_drvdata(pdev); | ||
2959 | |||
2960 | if (state == pci_channel_io_perm_failure) | ||
2961 | return PCI_ERS_RESULT_DISCONNECT; | ||
2962 | |||
2963 | rtnl_lock(); | ||
2964 | |||
2965 | if (efx->state != STATE_DISABLED) { | ||
2966 | efx->state = STATE_RECOVERY; | ||
2967 | efx->reset_pending = 0; | ||
2968 | |||
2969 | efx_device_detach_sync(efx); | ||
2970 | |||
2971 | efx_stop_all(efx); | ||
2972 | efx_stop_interrupts(efx, false); | ||
2973 | |||
2974 | status = PCI_ERS_RESULT_NEED_RESET; | ||
2975 | } else { | ||
2976 | /* If the interface is disabled we don't want to do anything | ||
2977 | * with it. | ||
2978 | */ | ||
2979 | status = PCI_ERS_RESULT_RECOVERED; | ||
2980 | } | ||
2981 | |||
2982 | rtnl_unlock(); | ||
2983 | |||
2984 | pci_disable_device(pdev); | ||
2985 | |||
2986 | return status; | ||
2987 | } | ||
2988 | |||
2989 | /* Fake a successfull reset, which will be performed later in efx_io_resume. */ | ||
2990 | pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev) | ||
2991 | { | ||
2992 | struct efx_nic *efx = pci_get_drvdata(pdev); | ||
2993 | pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; | ||
2994 | int rc; | ||
2995 | |||
2996 | if (pci_enable_device(pdev)) { | ||
2997 | netif_err(efx, hw, efx->net_dev, | ||
2998 | "Cannot re-enable PCI device after reset.\n"); | ||
2999 | status = PCI_ERS_RESULT_DISCONNECT; | ||
3000 | } | ||
3001 | |||
3002 | rc = pci_cleanup_aer_uncorrect_error_status(pdev); | ||
3003 | if (rc) { | ||
3004 | netif_err(efx, hw, efx->net_dev, | ||
3005 | "pci_cleanup_aer_uncorrect_error_status failed (%d)\n", rc); | ||
3006 | /* Non-fatal error. Continue. */ | ||
3007 | } | ||
3008 | |||
3009 | return status; | ||
3010 | } | ||
3011 | |||
3012 | /* Perform the actual reset and resume I/O operations. */ | ||
3013 | static void efx_io_resume(struct pci_dev *pdev) | ||
3014 | { | ||
3015 | struct efx_nic *efx = pci_get_drvdata(pdev); | ||
3016 | int rc; | ||
3017 | |||
3018 | rtnl_lock(); | ||
3019 | |||
3020 | if (efx->state == STATE_DISABLED) | ||
3021 | goto out; | ||
3022 | |||
3023 | rc = efx_reset(efx, RESET_TYPE_ALL); | ||
3024 | if (rc) { | ||
3025 | netif_err(efx, hw, efx->net_dev, | ||
3026 | "efx_reset failed after PCI error (%d)\n", rc); | ||
3027 | } else { | ||
3028 | efx->state = STATE_READY; | ||
3029 | netif_dbg(efx, hw, efx->net_dev, | ||
3030 | "Done resetting and resuming IO after PCI error.\n"); | ||
3031 | } | ||
3032 | |||
3033 | out: | ||
3034 | rtnl_unlock(); | ||
3035 | } | ||
3036 | |||
3037 | /* For simplicity and reliability, we always require a slot reset and try to | ||
3038 | * reset the hardware when a pci error affecting the device is detected. | ||
3039 | * We leave both the link_reset and mmio_enabled callback unimplemented: | ||
3040 | * with our request for slot reset the mmio_enabled callback will never be | ||
3041 | * called, and the link_reset callback is not used by AER or EEH mechanisms. | ||
3042 | */ | ||
3043 | static struct pci_error_handlers efx_err_handlers = { | ||
3044 | .error_detected = efx_io_error_detected, | ||
3045 | .slot_reset = efx_io_slot_reset, | ||
3046 | .resume = efx_io_resume, | ||
3047 | }; | ||
3048 | |||
2868 | static struct pci_driver efx_pci_driver = { | 3049 | static struct pci_driver efx_pci_driver = { |
2869 | .name = KBUILD_MODNAME, | 3050 | .name = KBUILD_MODNAME, |
2870 | .id_table = efx_pci_table, | 3051 | .id_table = efx_pci_table, |
2871 | .probe = efx_pci_probe, | 3052 | .probe = efx_pci_probe, |
2872 | .remove = efx_pci_remove, | 3053 | .remove = efx_pci_remove, |
2873 | .driver.pm = &efx_pm_ops, | 3054 | .driver.pm = &efx_pm_ops, |
3055 | .err_handler = &efx_err_handlers, | ||
2874 | }; | 3056 | }; |
2875 | 3057 | ||
2876 | /************************************************************************** | 3058 | /************************************************************************** |
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 50247dfe8f57..8372da239b43 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h | |||
@@ -33,17 +33,22 @@ extern int efx_setup_tc(struct net_device *net_dev, u8 num_tc); | |||
33 | extern unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); | 33 | extern unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); |
34 | 34 | ||
35 | /* RX */ | 35 | /* RX */ |
36 | extern void efx_rx_config_page_split(struct efx_nic *efx); | ||
36 | extern int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); | 37 | extern int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); |
37 | extern void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); | 38 | extern void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); |
38 | extern void efx_init_rx_queue(struct efx_rx_queue *rx_queue); | 39 | extern void efx_init_rx_queue(struct efx_rx_queue *rx_queue); |
39 | extern void efx_fini_rx_queue(struct efx_rx_queue *rx_queue); | 40 | extern void efx_fini_rx_queue(struct efx_rx_queue *rx_queue); |
40 | extern void efx_rx_strategy(struct efx_channel *channel); | ||
41 | extern void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue); | 41 | extern void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue); |
42 | extern void efx_rx_slow_fill(unsigned long context); | 42 | extern void efx_rx_slow_fill(unsigned long context); |
43 | extern void __efx_rx_packet(struct efx_channel *channel, | 43 | extern void __efx_rx_packet(struct efx_channel *channel); |
44 | struct efx_rx_buffer *rx_buf); | 44 | extern void efx_rx_packet(struct efx_rx_queue *rx_queue, |
45 | extern void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, | 45 | unsigned int index, unsigned int n_frags, |
46 | unsigned int len, u16 flags); | 46 | unsigned int len, u16 flags); |
47 | static inline void efx_rx_flush_packet(struct efx_channel *channel) | ||
48 | { | ||
49 | if (channel->rx_pkt_n_frags) | ||
50 | __efx_rx_packet(channel); | ||
51 | } | ||
47 | extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); | 52 | extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); |
48 | 53 | ||
49 | #define EFX_MAX_DMAQ_SIZE 4096UL | 54 | #define EFX_MAX_DMAQ_SIZE 4096UL |
@@ -67,6 +72,7 @@ extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); | |||
67 | extern int efx_probe_filters(struct efx_nic *efx); | 72 | extern int efx_probe_filters(struct efx_nic *efx); |
68 | extern void efx_restore_filters(struct efx_nic *efx); | 73 | extern void efx_restore_filters(struct efx_nic *efx); |
69 | extern void efx_remove_filters(struct efx_nic *efx); | 74 | extern void efx_remove_filters(struct efx_nic *efx); |
75 | extern void efx_filter_update_rx_scatter(struct efx_nic *efx); | ||
70 | extern s32 efx_filter_insert_filter(struct efx_nic *efx, | 76 | extern s32 efx_filter_insert_filter(struct efx_nic *efx, |
71 | struct efx_filter_spec *spec, | 77 | struct efx_filter_spec *spec, |
72 | bool replace); | 78 | bool replace); |
@@ -171,9 +177,9 @@ static inline void efx_device_detach_sync(struct efx_nic *efx) | |||
171 | * TX scheduler is stopped when we're done and before | 177 | * TX scheduler is stopped when we're done and before |
172 | * netif_device_present() becomes false. | 178 | * netif_device_present() becomes false. |
173 | */ | 179 | */ |
174 | netif_tx_lock(dev); | 180 | netif_tx_lock_bh(dev); |
175 | netif_device_detach(dev); | 181 | netif_device_detach(dev); |
176 | netif_tx_unlock(dev); | 182 | netif_tx_unlock_bh(dev); |
177 | } | 183 | } |
178 | 184 | ||
179 | #endif /* EFX_EFX_H */ | 185 | #endif /* EFX_EFX_H */ |
diff --git a/drivers/net/ethernet/sfc/enum.h b/drivers/net/ethernet/sfc/enum.h index 182dbe2cc6e4..ab8fb5889e55 100644 --- a/drivers/net/ethernet/sfc/enum.h +++ b/drivers/net/ethernet/sfc/enum.h | |||
@@ -137,8 +137,12 @@ enum efx_loopback_mode { | |||
137 | * Reset methods are numbered in order of increasing scope. | 137 | * Reset methods are numbered in order of increasing scope. |
138 | * | 138 | * |
139 | * @RESET_TYPE_INVISIBLE: Reset datapath and MAC (Falcon only) | 139 | * @RESET_TYPE_INVISIBLE: Reset datapath and MAC (Falcon only) |
140 | * @RESET_TYPE_RECOVER_OR_ALL: Try to recover. Apply RESET_TYPE_ALL | ||
141 | * if unsuccessful. | ||
140 | * @RESET_TYPE_ALL: Reset datapath, MAC and PHY | 142 | * @RESET_TYPE_ALL: Reset datapath, MAC and PHY |
141 | * @RESET_TYPE_WORLD: Reset as much as possible | 143 | * @RESET_TYPE_WORLD: Reset as much as possible |
144 | * @RESET_TYPE_RECOVER_OR_DISABLE: Try to recover. Apply RESET_TYPE_DISABLE if | ||
145 | * unsuccessful. | ||
142 | * @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled | 146 | * @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled |
143 | * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog | 147 | * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog |
144 | * @RESET_TYPE_INT_ERROR: reset due to internal error | 148 | * @RESET_TYPE_INT_ERROR: reset due to internal error |
@@ -150,9 +154,11 @@ enum efx_loopback_mode { | |||
150 | */ | 154 | */ |
151 | enum reset_type { | 155 | enum reset_type { |
152 | RESET_TYPE_INVISIBLE = 0, | 156 | RESET_TYPE_INVISIBLE = 0, |
153 | RESET_TYPE_ALL = 1, | 157 | RESET_TYPE_RECOVER_OR_ALL = 1, |
154 | RESET_TYPE_WORLD = 2, | 158 | RESET_TYPE_ALL = 2, |
155 | RESET_TYPE_DISABLE = 3, | 159 | RESET_TYPE_WORLD = 3, |
160 | RESET_TYPE_RECOVER_OR_DISABLE = 4, | ||
161 | RESET_TYPE_DISABLE = 5, | ||
156 | RESET_TYPE_MAX_METHOD, | 162 | RESET_TYPE_MAX_METHOD, |
157 | RESET_TYPE_TX_WATCHDOG, | 163 | RESET_TYPE_TX_WATCHDOG, |
158 | RESET_TYPE_INT_ERROR, | 164 | RESET_TYPE_INT_ERROR, |
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 8e61cd06f66a..6e768175e7e0 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c | |||
@@ -154,6 +154,7 @@ static const struct efx_ethtool_stat efx_ethtool_stats[] = { | |||
154 | EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err), | 154 | EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err), |
155 | EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch), | 155 | EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch), |
156 | EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), | 156 | EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), |
157 | EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_nodesc_trunc), | ||
157 | }; | 158 | }; |
158 | 159 | ||
159 | /* Number of ethtool statistics */ | 160 | /* Number of ethtool statistics */ |
@@ -978,7 +979,8 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx, | |||
978 | rule->m_ext.data[1])) | 979 | rule->m_ext.data[1])) |
979 | return -EINVAL; | 980 | return -EINVAL; |
980 | 981 | ||
981 | efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, 0, | 982 | efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, |
983 | efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0, | ||
982 | (rule->ring_cookie == RX_CLS_FLOW_DISC) ? | 984 | (rule->ring_cookie == RX_CLS_FLOW_DISC) ? |
983 | 0xfff : rule->ring_cookie); | 985 | 0xfff : rule->ring_cookie); |
984 | 986 | ||
diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c index 49bcd196e10d..4486102fa9b3 100644 --- a/drivers/net/ethernet/sfc/falcon.c +++ b/drivers/net/ethernet/sfc/falcon.c | |||
@@ -1546,10 +1546,6 @@ static int falcon_probe_nic(struct efx_nic *efx) | |||
1546 | 1546 | ||
1547 | static void falcon_init_rx_cfg(struct efx_nic *efx) | 1547 | static void falcon_init_rx_cfg(struct efx_nic *efx) |
1548 | { | 1548 | { |
1549 | /* Prior to Siena the RX DMA engine will split each frame at | ||
1550 | * intervals of RX_USR_BUF_SIZE (32-byte units). We set it to | ||
1551 | * be so large that that never happens. */ | ||
1552 | const unsigned huge_buf_size = (3 * 4096) >> 5; | ||
1553 | /* RX control FIFO thresholds (32 entries) */ | 1549 | /* RX control FIFO thresholds (32 entries) */ |
1554 | const unsigned ctrl_xon_thr = 20; | 1550 | const unsigned ctrl_xon_thr = 20; |
1555 | const unsigned ctrl_xoff_thr = 25; | 1551 | const unsigned ctrl_xoff_thr = 25; |
@@ -1557,10 +1553,15 @@ static void falcon_init_rx_cfg(struct efx_nic *efx) | |||
1557 | 1553 | ||
1558 | efx_reado(efx, ®, FR_AZ_RX_CFG); | 1554 | efx_reado(efx, ®, FR_AZ_RX_CFG); |
1559 | if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1) { | 1555 | if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1) { |
1560 | /* Data FIFO size is 5.5K */ | 1556 | /* Data FIFO size is 5.5K. The RX DMA engine only |
1557 | * supports scattering for user-mode queues, but will | ||
1558 | * split DMA writes at intervals of RX_USR_BUF_SIZE | ||
1559 | * (32-byte units) even for kernel-mode queues. We | ||
1560 | * set it to be so large that that never happens. | ||
1561 | */ | ||
1561 | EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_DESC_PUSH_EN, 0); | 1562 | EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_DESC_PUSH_EN, 0); |
1562 | EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_USR_BUF_SIZE, | 1563 | EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_USR_BUF_SIZE, |
1563 | huge_buf_size); | 1564 | (3 * 4096) >> 5); |
1564 | EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_MAC_TH, 512 >> 8); | 1565 | EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_MAC_TH, 512 >> 8); |
1565 | EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_MAC_TH, 2048 >> 8); | 1566 | EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_MAC_TH, 2048 >> 8); |
1566 | EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_TX_TH, ctrl_xon_thr); | 1567 | EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_TX_TH, ctrl_xon_thr); |
@@ -1569,7 +1570,7 @@ static void falcon_init_rx_cfg(struct efx_nic *efx) | |||
1569 | /* Data FIFO size is 80K; register fields moved */ | 1570 | /* Data FIFO size is 80K; register fields moved */ |
1570 | EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_DESC_PUSH_EN, 0); | 1571 | EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_DESC_PUSH_EN, 0); |
1571 | EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_USR_BUF_SIZE, | 1572 | EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_USR_BUF_SIZE, |
1572 | huge_buf_size); | 1573 | EFX_RX_USR_BUF_SIZE >> 5); |
1573 | /* Send XON and XOFF at ~3 * max MTU away from empty/full */ | 1574 | /* Send XON and XOFF at ~3 * max MTU away from empty/full */ |
1574 | EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_MAC_TH, 27648 >> 8); | 1575 | EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_MAC_TH, 27648 >> 8); |
1575 | EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_MAC_TH, 54272 >> 8); | 1576 | EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_MAC_TH, 54272 >> 8); |
@@ -1815,6 +1816,7 @@ const struct efx_nic_type falcon_a1_nic_type = { | |||
1815 | .evq_rptr_tbl_base = FR_AA_EVQ_RPTR_KER, | 1816 | .evq_rptr_tbl_base = FR_AA_EVQ_RPTR_KER, |
1816 | .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), | 1817 | .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), |
1817 | .rx_buffer_padding = 0x24, | 1818 | .rx_buffer_padding = 0x24, |
1819 | .can_rx_scatter = false, | ||
1818 | .max_interrupt_mode = EFX_INT_MODE_MSI, | 1820 | .max_interrupt_mode = EFX_INT_MODE_MSI, |
1819 | .phys_addr_channels = 4, | 1821 | .phys_addr_channels = 4, |
1820 | .timer_period_max = 1 << FRF_AB_TC_TIMER_VAL_WIDTH, | 1822 | .timer_period_max = 1 << FRF_AB_TC_TIMER_VAL_WIDTH, |
@@ -1865,6 +1867,7 @@ const struct efx_nic_type falcon_b0_nic_type = { | |||
1865 | .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), | 1867 | .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), |
1866 | .rx_buffer_hash_size = 0x10, | 1868 | .rx_buffer_hash_size = 0x10, |
1867 | .rx_buffer_padding = 0, | 1869 | .rx_buffer_padding = 0, |
1870 | .can_rx_scatter = true, | ||
1868 | .max_interrupt_mode = EFX_INT_MODE_MSIX, | 1871 | .max_interrupt_mode = EFX_INT_MODE_MSIX, |
1869 | .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy | 1872 | .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy |
1870 | * interrupt handler only supports 32 | 1873 | * interrupt handler only supports 32 |
diff --git a/drivers/net/ethernet/sfc/filter.c b/drivers/net/ethernet/sfc/filter.c index 8af42cd1feda..2397f0e8d3eb 100644 --- a/drivers/net/ethernet/sfc/filter.c +++ b/drivers/net/ethernet/sfc/filter.c | |||
@@ -66,6 +66,10 @@ struct efx_filter_state { | |||
66 | #endif | 66 | #endif |
67 | }; | 67 | }; |
68 | 68 | ||
69 | static void efx_filter_table_clear_entry(struct efx_nic *efx, | ||
70 | struct efx_filter_table *table, | ||
71 | unsigned int filter_idx); | ||
72 | |||
69 | /* The filter hash function is LFSR polynomial x^16 + x^3 + 1 of a 32-bit | 73 | /* The filter hash function is LFSR polynomial x^16 + x^3 + 1 of a 32-bit |
70 | * key derived from the n-tuple. The initial LFSR state is 0xffff. */ | 74 | * key derived from the n-tuple. The initial LFSR state is 0xffff. */ |
71 | static u16 efx_filter_hash(u32 key) | 75 | static u16 efx_filter_hash(u32 key) |
@@ -168,6 +172,25 @@ static void efx_filter_push_rx_config(struct efx_nic *efx) | |||
168 | filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED, | 172 | filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED, |
169 | !!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags & | 173 | !!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags & |
170 | EFX_FILTER_FLAG_RX_RSS)); | 174 | EFX_FILTER_FLAG_RX_RSS)); |
175 | |||
176 | /* There is a single bit to enable RX scatter for all | ||
177 | * unmatched packets. Only set it if scatter is | ||
178 | * enabled in both filter specs. | ||
179 | */ | ||
180 | EFX_SET_OWORD_FIELD( | ||
181 | filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q, | ||
182 | !!(table->spec[EFX_FILTER_INDEX_UC_DEF].flags & | ||
183 | table->spec[EFX_FILTER_INDEX_MC_DEF].flags & | ||
184 | EFX_FILTER_FLAG_RX_SCATTER)); | ||
185 | } else if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) { | ||
186 | /* We don't expose 'default' filters because unmatched | ||
187 | * packets always go to the queue number found in the | ||
188 | * RSS table. But we still need to set the RX scatter | ||
189 | * bit here. | ||
190 | */ | ||
191 | EFX_SET_OWORD_FIELD( | ||
192 | filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q, | ||
193 | efx->rx_scatter); | ||
171 | } | 194 | } |
172 | 195 | ||
173 | efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL); | 196 | efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL); |
@@ -409,9 +432,18 @@ static void efx_filter_reset_rx_def(struct efx_nic *efx, unsigned filter_idx) | |||
409 | struct efx_filter_state *state = efx->filter_state; | 432 | struct efx_filter_state *state = efx->filter_state; |
410 | struct efx_filter_table *table = &state->table[EFX_FILTER_TABLE_RX_DEF]; | 433 | struct efx_filter_table *table = &state->table[EFX_FILTER_TABLE_RX_DEF]; |
411 | struct efx_filter_spec *spec = &table->spec[filter_idx]; | 434 | struct efx_filter_spec *spec = &table->spec[filter_idx]; |
435 | enum efx_filter_flags flags = 0; | ||
436 | |||
437 | /* If there's only one channel then disable RSS for non VF | ||
438 | * traffic, thereby allowing VFs to use RSS when the PF can't. | ||
439 | */ | ||
440 | if (efx->n_rx_channels > 1) | ||
441 | flags |= EFX_FILTER_FLAG_RX_RSS; | ||
412 | 442 | ||
413 | efx_filter_init_rx(spec, EFX_FILTER_PRI_MANUAL, | 443 | if (efx->rx_scatter) |
414 | EFX_FILTER_FLAG_RX_RSS, 0); | 444 | flags |= EFX_FILTER_FLAG_RX_SCATTER; |
445 | |||
446 | efx_filter_init_rx(spec, EFX_FILTER_PRI_MANUAL, flags, 0); | ||
415 | spec->type = EFX_FILTER_UC_DEF + filter_idx; | 447 | spec->type = EFX_FILTER_UC_DEF + filter_idx; |
416 | table->used_bitmap[0] |= 1 << filter_idx; | 448 | table->used_bitmap[0] |= 1 << filter_idx; |
417 | } | 449 | } |
@@ -463,13 +495,6 @@ static u32 efx_filter_build(efx_oword_t *filter, struct efx_filter_spec *spec) | |||
463 | break; | 495 | break; |
464 | } | 496 | } |
465 | 497 | ||
466 | case EFX_FILTER_TABLE_RX_DEF: | ||
467 | /* One filter spec per type */ | ||
468 | BUILD_BUG_ON(EFX_FILTER_INDEX_UC_DEF != 0); | ||
469 | BUILD_BUG_ON(EFX_FILTER_INDEX_MC_DEF != | ||
470 | EFX_FILTER_MC_DEF - EFX_FILTER_UC_DEF); | ||
471 | return spec->type - EFX_FILTER_UC_DEF; | ||
472 | |||
473 | case EFX_FILTER_TABLE_RX_MAC: { | 498 | case EFX_FILTER_TABLE_RX_MAC: { |
474 | bool is_wild = spec->type == EFX_FILTER_MAC_WILD; | 499 | bool is_wild = spec->type == EFX_FILTER_MAC_WILD; |
475 | EFX_POPULATE_OWORD_7( | 500 | EFX_POPULATE_OWORD_7( |
@@ -520,42 +545,6 @@ static bool efx_filter_equal(const struct efx_filter_spec *left, | |||
520 | return true; | 545 | return true; |
521 | } | 546 | } |
522 | 547 | ||
523 | static int efx_filter_search(struct efx_filter_table *table, | ||
524 | struct efx_filter_spec *spec, u32 key, | ||
525 | bool for_insert, unsigned int *depth_required) | ||
526 | { | ||
527 | unsigned hash, incr, filter_idx, depth, depth_max; | ||
528 | |||
529 | hash = efx_filter_hash(key); | ||
530 | incr = efx_filter_increment(key); | ||
531 | |||
532 | filter_idx = hash & (table->size - 1); | ||
533 | depth = 1; | ||
534 | depth_max = (for_insert ? | ||
535 | (spec->priority <= EFX_FILTER_PRI_HINT ? | ||
536 | FILTER_CTL_SRCH_HINT_MAX : FILTER_CTL_SRCH_MAX) : | ||
537 | table->search_depth[spec->type]); | ||
538 | |||
539 | for (;;) { | ||
540 | /* Return success if entry is used and matches this spec | ||
541 | * or entry is unused and we are trying to insert. | ||
542 | */ | ||
543 | if (test_bit(filter_idx, table->used_bitmap) ? | ||
544 | efx_filter_equal(spec, &table->spec[filter_idx]) : | ||
545 | for_insert) { | ||
546 | *depth_required = depth; | ||
547 | return filter_idx; | ||
548 | } | ||
549 | |||
550 | /* Return failure if we reached the maximum search depth */ | ||
551 | if (depth == depth_max) | ||
552 | return for_insert ? -EBUSY : -ENOENT; | ||
553 | |||
554 | filter_idx = (filter_idx + incr) & (table->size - 1); | ||
555 | ++depth; | ||
556 | } | ||
557 | } | ||
558 | |||
559 | /* | 548 | /* |
560 | * Construct/deconstruct external filter IDs. At least the RX filter | 549 | * Construct/deconstruct external filter IDs. At least the RX filter |
561 | * IDs must be ordered by matching priority, for RX NFC semantics. | 550 | * IDs must be ordered by matching priority, for RX NFC semantics. |
@@ -650,44 +639,111 @@ u32 efx_filter_get_rx_id_limit(struct efx_nic *efx) | |||
650 | * efx_filter_insert_filter - add or replace a filter | 639 | * efx_filter_insert_filter - add or replace a filter |
651 | * @efx: NIC in which to insert the filter | 640 | * @efx: NIC in which to insert the filter |
652 | * @spec: Specification for the filter | 641 | * @spec: Specification for the filter |
653 | * @replace: Flag for whether the specified filter may replace a filter | 642 | * @replace_equal: Flag for whether the specified filter may replace an |
654 | * with an identical match expression and equal or lower priority | 643 | * existing filter with equal priority |
655 | * | 644 | * |
656 | * On success, return the filter ID. | 645 | * On success, return the filter ID. |
657 | * On failure, return a negative error code. | 646 | * On failure, return a negative error code. |
647 | * | ||
648 | * If an existing filter has equal match values to the new filter | ||
649 | * spec, then the new filter might replace it, depending on the | ||
650 | * relative priorities. If the existing filter has lower priority, or | ||
651 | * if @replace_equal is set and it has equal priority, then it is | ||
652 | * replaced. Otherwise the function fails, returning -%EPERM if | ||
653 | * the existing filter has higher priority or -%EEXIST if it has | ||
654 | * equal priority. | ||
658 | */ | 655 | */ |
659 | s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, | 656 | s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, |
660 | bool replace) | 657 | bool replace_equal) |
661 | { | 658 | { |
662 | struct efx_filter_state *state = efx->filter_state; | 659 | struct efx_filter_state *state = efx->filter_state; |
663 | struct efx_filter_table *table = efx_filter_spec_table(state, spec); | 660 | struct efx_filter_table *table = efx_filter_spec_table(state, spec); |
664 | struct efx_filter_spec *saved_spec; | ||
665 | efx_oword_t filter; | 661 | efx_oword_t filter; |
666 | unsigned int filter_idx, depth = 0; | 662 | int rep_index, ins_index; |
667 | u32 key; | 663 | unsigned int depth = 0; |
668 | int rc; | 664 | int rc; |
669 | 665 | ||
670 | if (!table || table->size == 0) | 666 | if (!table || table->size == 0) |
671 | return -EINVAL; | 667 | return -EINVAL; |
672 | 668 | ||
673 | key = efx_filter_build(&filter, spec); | ||
674 | |||
675 | netif_vdbg(efx, hw, efx->net_dev, | 669 | netif_vdbg(efx, hw, efx->net_dev, |
676 | "%s: type %d search_depth=%d", __func__, spec->type, | 670 | "%s: type %d search_depth=%d", __func__, spec->type, |
677 | table->search_depth[spec->type]); | 671 | table->search_depth[spec->type]); |
678 | 672 | ||
679 | spin_lock_bh(&state->lock); | 673 | if (table->id == EFX_FILTER_TABLE_RX_DEF) { |
674 | /* One filter spec per type */ | ||
675 | BUILD_BUG_ON(EFX_FILTER_INDEX_UC_DEF != 0); | ||
676 | BUILD_BUG_ON(EFX_FILTER_INDEX_MC_DEF != | ||
677 | EFX_FILTER_MC_DEF - EFX_FILTER_UC_DEF); | ||
678 | rep_index = spec->type - EFX_FILTER_INDEX_UC_DEF; | ||
679 | ins_index = rep_index; | ||
680 | 680 | ||
681 | rc = efx_filter_search(table, spec, key, true, &depth); | 681 | spin_lock_bh(&state->lock); |
682 | if (rc < 0) | 682 | } else { |
683 | goto out; | 683 | /* Search concurrently for |
684 | filter_idx = rc; | 684 | * (1) a filter to be replaced (rep_index): any filter |
685 | BUG_ON(filter_idx >= table->size); | 685 | * with the same match values, up to the current |
686 | saved_spec = &table->spec[filter_idx]; | 686 | * search depth for this type, and |
687 | 687 | * (2) the insertion point (ins_index): (1) or any | |
688 | if (test_bit(filter_idx, table->used_bitmap)) { | 688 | * free slot before it or up to the maximum search |
689 | /* Should we replace the existing filter? */ | 689 | * depth for this priority |
690 | if (!replace) { | 690 | * We fail if we cannot find (2). |
691 | * | ||
692 | * We can stop once either | ||
693 | * (a) we find (1), in which case we have definitely | ||
694 | * found (2) as well; or | ||
695 | * (b) we have searched exhaustively for (1), and have | ||
696 | * either found (2) or searched exhaustively for it | ||
697 | */ | ||
698 | u32 key = efx_filter_build(&filter, spec); | ||
699 | unsigned int hash = efx_filter_hash(key); | ||
700 | unsigned int incr = efx_filter_increment(key); | ||
701 | unsigned int max_rep_depth = table->search_depth[spec->type]; | ||
702 | unsigned int max_ins_depth = | ||
703 | spec->priority <= EFX_FILTER_PRI_HINT ? | ||
704 | FILTER_CTL_SRCH_HINT_MAX : FILTER_CTL_SRCH_MAX; | ||
705 | unsigned int i = hash & (table->size - 1); | ||
706 | |||
707 | ins_index = -1; | ||
708 | depth = 1; | ||
709 | |||
710 | spin_lock_bh(&state->lock); | ||
711 | |||
712 | for (;;) { | ||
713 | if (!test_bit(i, table->used_bitmap)) { | ||
714 | if (ins_index < 0) | ||
715 | ins_index = i; | ||
716 | } else if (efx_filter_equal(spec, &table->spec[i])) { | ||
717 | /* Case (a) */ | ||
718 | if (ins_index < 0) | ||
719 | ins_index = i; | ||
720 | rep_index = i; | ||
721 | break; | ||
722 | } | ||
723 | |||
724 | if (depth >= max_rep_depth && | ||
725 | (ins_index >= 0 || depth >= max_ins_depth)) { | ||
726 | /* Case (b) */ | ||
727 | if (ins_index < 0) { | ||
728 | rc = -EBUSY; | ||
729 | goto out; | ||
730 | } | ||
731 | rep_index = -1; | ||
732 | break; | ||
733 | } | ||
734 | |||
735 | i = (i + incr) & (table->size - 1); | ||
736 | ++depth; | ||
737 | } | ||
738 | } | ||
739 | |||
740 | /* If we found a filter to be replaced, check whether we | ||
741 | * should do so | ||
742 | */ | ||
743 | if (rep_index >= 0) { | ||
744 | struct efx_filter_spec *saved_spec = &table->spec[rep_index]; | ||
745 | |||
746 | if (spec->priority == saved_spec->priority && !replace_equal) { | ||
691 | rc = -EEXIST; | 747 | rc = -EEXIST; |
692 | goto out; | 748 | goto out; |
693 | } | 749 | } |
@@ -695,11 +751,14 @@ s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, | |||
695 | rc = -EPERM; | 751 | rc = -EPERM; |
696 | goto out; | 752 | goto out; |
697 | } | 753 | } |
698 | } else { | 754 | } |
699 | __set_bit(filter_idx, table->used_bitmap); | 755 | |
756 | /* Insert the filter */ | ||
757 | if (ins_index != rep_index) { | ||
758 | __set_bit(ins_index, table->used_bitmap); | ||
700 | ++table->used; | 759 | ++table->used; |
701 | } | 760 | } |
702 | *saved_spec = *spec; | 761 | table->spec[ins_index] = *spec; |
703 | 762 | ||
704 | if (table->id == EFX_FILTER_TABLE_RX_DEF) { | 763 | if (table->id == EFX_FILTER_TABLE_RX_DEF) { |
705 | efx_filter_push_rx_config(efx); | 764 | efx_filter_push_rx_config(efx); |
@@ -713,13 +772,19 @@ s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, | |||
713 | } | 772 | } |
714 | 773 | ||
715 | efx_writeo(efx, &filter, | 774 | efx_writeo(efx, &filter, |
716 | table->offset + table->step * filter_idx); | 775 | table->offset + table->step * ins_index); |
776 | |||
777 | /* If we were able to replace a filter by inserting | ||
778 | * at a lower depth, clear the replaced filter | ||
779 | */ | ||
780 | if (ins_index != rep_index && rep_index >= 0) | ||
781 | efx_filter_table_clear_entry(efx, table, rep_index); | ||
717 | } | 782 | } |
718 | 783 | ||
719 | netif_vdbg(efx, hw, efx->net_dev, | 784 | netif_vdbg(efx, hw, efx->net_dev, |
720 | "%s: filter type %d index %d rxq %u set", | 785 | "%s: filter type %d index %d rxq %u set", |
721 | __func__, spec->type, filter_idx, spec->dmaq_id); | 786 | __func__, spec->type, ins_index, spec->dmaq_id); |
722 | rc = efx_filter_make_id(spec, filter_idx); | 787 | rc = efx_filter_make_id(spec, ins_index); |
723 | 788 | ||
724 | out: | 789 | out: |
725 | spin_unlock_bh(&state->lock); | 790 | spin_unlock_bh(&state->lock); |
@@ -1060,6 +1125,50 @@ void efx_remove_filters(struct efx_nic *efx) | |||
1060 | kfree(state); | 1125 | kfree(state); |
1061 | } | 1126 | } |
1062 | 1127 | ||
1128 | /* Update scatter enable flags for filters pointing to our own RX queues */ | ||
1129 | void efx_filter_update_rx_scatter(struct efx_nic *efx) | ||
1130 | { | ||
1131 | struct efx_filter_state *state = efx->filter_state; | ||
1132 | enum efx_filter_table_id table_id; | ||
1133 | struct efx_filter_table *table; | ||
1134 | efx_oword_t filter; | ||
1135 | unsigned int filter_idx; | ||
1136 | |||
1137 | spin_lock_bh(&state->lock); | ||
1138 | |||
1139 | for (table_id = EFX_FILTER_TABLE_RX_IP; | ||
1140 | table_id <= EFX_FILTER_TABLE_RX_DEF; | ||
1141 | table_id++) { | ||
1142 | table = &state->table[table_id]; | ||
1143 | |||
1144 | for (filter_idx = 0; filter_idx < table->size; filter_idx++) { | ||
1145 | if (!test_bit(filter_idx, table->used_bitmap) || | ||
1146 | table->spec[filter_idx].dmaq_id >= | ||
1147 | efx->n_rx_channels) | ||
1148 | continue; | ||
1149 | |||
1150 | if (efx->rx_scatter) | ||
1151 | table->spec[filter_idx].flags |= | ||
1152 | EFX_FILTER_FLAG_RX_SCATTER; | ||
1153 | else | ||
1154 | table->spec[filter_idx].flags &= | ||
1155 | ~EFX_FILTER_FLAG_RX_SCATTER; | ||
1156 | |||
1157 | if (table_id == EFX_FILTER_TABLE_RX_DEF) | ||
1158 | /* Pushed by efx_filter_push_rx_config() */ | ||
1159 | continue; | ||
1160 | |||
1161 | efx_filter_build(&filter, &table->spec[filter_idx]); | ||
1162 | efx_writeo(efx, &filter, | ||
1163 | table->offset + table->step * filter_idx); | ||
1164 | } | ||
1165 | } | ||
1166 | |||
1167 | efx_filter_push_rx_config(efx); | ||
1168 | |||
1169 | spin_unlock_bh(&state->lock); | ||
1170 | } | ||
1171 | |||
1063 | #ifdef CONFIG_RFS_ACCEL | 1172 | #ifdef CONFIG_RFS_ACCEL |
1064 | 1173 | ||
1065 | int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, | 1174 | int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, |
diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index 9d426d0457bd..c5c9747861ba 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h | |||
@@ -553,6 +553,7 @@ | |||
553 | #define MC_CMD_PTP_MODE_V1_VLAN 0x1 /* enum */ | 553 | #define MC_CMD_PTP_MODE_V1_VLAN 0x1 /* enum */ |
554 | #define MC_CMD_PTP_MODE_V2 0x2 /* enum */ | 554 | #define MC_CMD_PTP_MODE_V2 0x2 /* enum */ |
555 | #define MC_CMD_PTP_MODE_V2_VLAN 0x3 /* enum */ | 555 | #define MC_CMD_PTP_MODE_V2_VLAN 0x3 /* enum */ |
556 | #define MC_CMD_PTP_MODE_V2_ENHANCED 0x4 /* enum */ | ||
556 | 557 | ||
557 | /* MC_CMD_PTP_IN_DISABLE msgrequest */ | 558 | /* MC_CMD_PTP_IN_DISABLE msgrequest */ |
558 | #define MC_CMD_PTP_IN_DISABLE_LEN 8 | 559 | #define MC_CMD_PTP_IN_DISABLE_LEN 8 |
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 0a90abd2421b..9bd433a095c5 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h | |||
@@ -69,6 +69,12 @@ | |||
69 | #define EFX_TXQ_TYPES 4 | 69 | #define EFX_TXQ_TYPES 4 |
70 | #define EFX_MAX_TX_QUEUES (EFX_TXQ_TYPES * EFX_MAX_CHANNELS) | 70 | #define EFX_MAX_TX_QUEUES (EFX_TXQ_TYPES * EFX_MAX_CHANNELS) |
71 | 71 | ||
72 | /* Maximum possible MTU the driver supports */ | ||
73 | #define EFX_MAX_MTU (9 * 1024) | ||
74 | |||
75 | /* Size of an RX scatter buffer. Small enough to pack 2 into a 4K page. */ | ||
76 | #define EFX_RX_USR_BUF_SIZE 1824 | ||
77 | |||
72 | /* Forward declare Precision Time Protocol (PTP) support structure. */ | 78 | /* Forward declare Precision Time Protocol (PTP) support structure. */ |
73 | struct efx_ptp_data; | 79 | struct efx_ptp_data; |
74 | 80 | ||
@@ -206,25 +212,23 @@ struct efx_tx_queue { | |||
206 | /** | 212 | /** |
207 | * struct efx_rx_buffer - An Efx RX data buffer | 213 | * struct efx_rx_buffer - An Efx RX data buffer |
208 | * @dma_addr: DMA base address of the buffer | 214 | * @dma_addr: DMA base address of the buffer |
209 | * @skb: The associated socket buffer. Valid iff !(@flags & %EFX_RX_BUF_PAGE). | 215 | * @page: The associated page buffer. |
210 | * Will be %NULL if the buffer slot is currently free. | ||
211 | * @page: The associated page buffer. Valif iff @flags & %EFX_RX_BUF_PAGE. | ||
212 | * Will be %NULL if the buffer slot is currently free. | 216 | * Will be %NULL if the buffer slot is currently free. |
213 | * @page_offset: Offset within page. Valid iff @flags & %EFX_RX_BUF_PAGE. | 217 | * @page_offset: If pending: offset in @page of DMA base address. |
214 | * @len: Buffer length, in bytes. | 218 | * If completed: offset in @page of Ethernet header. |
215 | * @flags: Flags for buffer and packet state. | 219 | * @len: If pending: length for DMA descriptor. |
220 | * If completed: received length, excluding hash prefix. | ||
221 | * @flags: Flags for buffer and packet state. These are only set on the | ||
222 | * first buffer of a scattered packet. | ||
216 | */ | 223 | */ |
217 | struct efx_rx_buffer { | 224 | struct efx_rx_buffer { |
218 | dma_addr_t dma_addr; | 225 | dma_addr_t dma_addr; |
219 | union { | 226 | struct page *page; |
220 | struct sk_buff *skb; | ||
221 | struct page *page; | ||
222 | } u; | ||
223 | u16 page_offset; | 227 | u16 page_offset; |
224 | u16 len; | 228 | u16 len; |
225 | u16 flags; | 229 | u16 flags; |
226 | }; | 230 | }; |
227 | #define EFX_RX_BUF_PAGE 0x0001 | 231 | #define EFX_RX_BUF_LAST_IN_PAGE 0x0001 |
228 | #define EFX_RX_PKT_CSUMMED 0x0002 | 232 | #define EFX_RX_PKT_CSUMMED 0x0002 |
229 | #define EFX_RX_PKT_DISCARD 0x0004 | 233 | #define EFX_RX_PKT_DISCARD 0x0004 |
230 | 234 | ||
@@ -260,14 +264,23 @@ struct efx_rx_page_state { | |||
260 | * @added_count: Number of buffers added to the receive queue. | 264 | * @added_count: Number of buffers added to the receive queue. |
261 | * @notified_count: Number of buffers given to NIC (<= @added_count). | 265 | * @notified_count: Number of buffers given to NIC (<= @added_count). |
262 | * @removed_count: Number of buffers removed from the receive queue. | 266 | * @removed_count: Number of buffers removed from the receive queue. |
267 | * @scatter_n: Number of buffers used by current packet | ||
268 | * @page_ring: The ring to store DMA mapped pages for reuse. | ||
269 | * @page_add: Counter to calculate the write pointer for the recycle ring. | ||
270 | * @page_remove: Counter to calculate the read pointer for the recycle ring. | ||
271 | * @page_recycle_count: The number of pages that have been recycled. | ||
272 | * @page_recycle_failed: The number of pages that couldn't be recycled because | ||
273 | * the kernel still held a reference to them. | ||
274 | * @page_recycle_full: The number of pages that were released because the | ||
275 | * recycle ring was full. | ||
276 | * @page_ptr_mask: The number of pages in the RX recycle ring minus 1. | ||
263 | * @max_fill: RX descriptor maximum fill level (<= ring size) | 277 | * @max_fill: RX descriptor maximum fill level (<= ring size) |
264 | * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill | 278 | * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill |
265 | * (<= @max_fill) | 279 | * (<= @max_fill) |
266 | * @min_fill: RX descriptor minimum non-zero fill level. | 280 | * @min_fill: RX descriptor minimum non-zero fill level. |
267 | * This records the minimum fill level observed when a ring | 281 | * This records the minimum fill level observed when a ring |
268 | * refill was triggered. | 282 | * refill was triggered. |
269 | * @alloc_page_count: RX allocation strategy counter. | 283 | * @recycle_count: RX buffer recycle counter. |
270 | * @alloc_skb_count: RX allocation strategy counter. | ||
271 | * @slow_fill: Timer used to defer efx_nic_generate_fill_event(). | 284 | * @slow_fill: Timer used to defer efx_nic_generate_fill_event(). |
272 | */ | 285 | */ |
273 | struct efx_rx_queue { | 286 | struct efx_rx_queue { |
@@ -279,15 +292,22 @@ struct efx_rx_queue { | |||
279 | bool enabled; | 292 | bool enabled; |
280 | bool flush_pending; | 293 | bool flush_pending; |
281 | 294 | ||
282 | int added_count; | 295 | unsigned int added_count; |
283 | int notified_count; | 296 | unsigned int notified_count; |
284 | int removed_count; | 297 | unsigned int removed_count; |
298 | unsigned int scatter_n; | ||
299 | struct page **page_ring; | ||
300 | unsigned int page_add; | ||
301 | unsigned int page_remove; | ||
302 | unsigned int page_recycle_count; | ||
303 | unsigned int page_recycle_failed; | ||
304 | unsigned int page_recycle_full; | ||
305 | unsigned int page_ptr_mask; | ||
285 | unsigned int max_fill; | 306 | unsigned int max_fill; |
286 | unsigned int fast_fill_trigger; | 307 | unsigned int fast_fill_trigger; |
287 | unsigned int min_fill; | 308 | unsigned int min_fill; |
288 | unsigned int min_overfill; | 309 | unsigned int min_overfill; |
289 | unsigned int alloc_page_count; | 310 | unsigned int recycle_count; |
290 | unsigned int alloc_skb_count; | ||
291 | struct timer_list slow_fill; | 311 | struct timer_list slow_fill; |
292 | unsigned int slow_fill_count; | 312 | unsigned int slow_fill_count; |
293 | }; | 313 | }; |
@@ -336,10 +356,6 @@ enum efx_rx_alloc_method { | |||
336 | * @event_test_cpu: Last CPU to handle interrupt or test event for this channel | 356 | * @event_test_cpu: Last CPU to handle interrupt or test event for this channel |
337 | * @irq_count: Number of IRQs since last adaptive moderation decision | 357 | * @irq_count: Number of IRQs since last adaptive moderation decision |
338 | * @irq_mod_score: IRQ moderation score | 358 | * @irq_mod_score: IRQ moderation score |
339 | * @rx_alloc_level: Watermark based heuristic counter for pushing descriptors | ||
340 | * and diagnostic counters | ||
341 | * @rx_alloc_push_pages: RX allocation method currently in use for pushing | ||
342 | * descriptors | ||
343 | * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors | 359 | * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors |
344 | * @n_rx_ip_hdr_chksum_err: Count of RX IP header checksum errors | 360 | * @n_rx_ip_hdr_chksum_err: Count of RX IP header checksum errors |
345 | * @n_rx_tcp_udp_chksum_err: Count of RX TCP and UDP checksum errors | 361 | * @n_rx_tcp_udp_chksum_err: Count of RX TCP and UDP checksum errors |
@@ -347,6 +363,12 @@ enum efx_rx_alloc_method { | |||
347 | * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors | 363 | * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors |
348 | * @n_rx_overlength: Count of RX_OVERLENGTH errors | 364 | * @n_rx_overlength: Count of RX_OVERLENGTH errors |
349 | * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun | 365 | * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun |
366 | * @n_rx_nodesc_trunc: Number of RX packets truncated and then dropped due to | ||
367 | * lack of descriptors | ||
368 | * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by | ||
369 | * __efx_rx_packet(), or zero if there is none | ||
370 | * @rx_pkt_index: Ring index of first buffer for next packet to be delivered | ||
371 | * by __efx_rx_packet(), if @rx_pkt_n_frags != 0 | ||
350 | * @rx_queue: RX queue for this channel | 372 | * @rx_queue: RX queue for this channel |
351 | * @tx_queue: TX queues for this channel | 373 | * @tx_queue: TX queues for this channel |
352 | */ | 374 | */ |
@@ -371,9 +393,6 @@ struct efx_channel { | |||
371 | unsigned int rfs_filters_added; | 393 | unsigned int rfs_filters_added; |
372 | #endif | 394 | #endif |
373 | 395 | ||
374 | int rx_alloc_level; | ||
375 | int rx_alloc_push_pages; | ||
376 | |||
377 | unsigned n_rx_tobe_disc; | 396 | unsigned n_rx_tobe_disc; |
378 | unsigned n_rx_ip_hdr_chksum_err; | 397 | unsigned n_rx_ip_hdr_chksum_err; |
379 | unsigned n_rx_tcp_udp_chksum_err; | 398 | unsigned n_rx_tcp_udp_chksum_err; |
@@ -381,11 +400,10 @@ struct efx_channel { | |||
381 | unsigned n_rx_frm_trunc; | 400 | unsigned n_rx_frm_trunc; |
382 | unsigned n_rx_overlength; | 401 | unsigned n_rx_overlength; |
383 | unsigned n_skbuff_leaks; | 402 | unsigned n_skbuff_leaks; |
403 | unsigned int n_rx_nodesc_trunc; | ||
384 | 404 | ||
385 | /* Used to pipeline received packets in order to optimise memory | 405 | unsigned int rx_pkt_n_frags; |
386 | * access with prefetches. | 406 | unsigned int rx_pkt_index; |
387 | */ | ||
388 | struct efx_rx_buffer *rx_pkt; | ||
389 | 407 | ||
390 | struct efx_rx_queue rx_queue; | 408 | struct efx_rx_queue rx_queue; |
391 | struct efx_tx_queue tx_queue[EFX_TXQ_TYPES]; | 409 | struct efx_tx_queue tx_queue[EFX_TXQ_TYPES]; |
@@ -410,7 +428,7 @@ struct efx_channel_type { | |||
410 | void (*post_remove)(struct efx_channel *); | 428 | void (*post_remove)(struct efx_channel *); |
411 | void (*get_name)(struct efx_channel *, char *buf, size_t len); | 429 | void (*get_name)(struct efx_channel *, char *buf, size_t len); |
412 | struct efx_channel *(*copy)(const struct efx_channel *); | 430 | struct efx_channel *(*copy)(const struct efx_channel *); |
413 | void (*receive_skb)(struct efx_channel *, struct sk_buff *); | 431 | bool (*receive_skb)(struct efx_channel *, struct sk_buff *); |
414 | bool keep_eventq; | 432 | bool keep_eventq; |
415 | }; | 433 | }; |
416 | 434 | ||
@@ -446,6 +464,7 @@ enum nic_state { | |||
446 | STATE_UNINIT = 0, /* device being probed/removed or is frozen */ | 464 | STATE_UNINIT = 0, /* device being probed/removed or is frozen */ |
447 | STATE_READY = 1, /* hardware ready and netdev registered */ | 465 | STATE_READY = 1, /* hardware ready and netdev registered */ |
448 | STATE_DISABLED = 2, /* device disabled due to hardware errors */ | 466 | STATE_DISABLED = 2, /* device disabled due to hardware errors */ |
467 | STATE_RECOVERY = 3, /* device recovering from PCI error */ | ||
449 | }; | 468 | }; |
450 | 469 | ||
451 | /* | 470 | /* |
@@ -684,10 +703,13 @@ struct vfdi_status; | |||
684 | * @n_channels: Number of channels in use | 703 | * @n_channels: Number of channels in use |
685 | * @n_rx_channels: Number of channels used for RX (= number of RX queues) | 704 | * @n_rx_channels: Number of channels used for RX (= number of RX queues) |
686 | * @n_tx_channels: Number of channels used for TX | 705 | * @n_tx_channels: Number of channels used for TX |
687 | * @rx_buffer_len: RX buffer length | 706 | * @rx_dma_len: Current maximum RX DMA length |
688 | * @rx_buffer_order: Order (log2) of number of pages for each RX buffer | 707 | * @rx_buffer_order: Order (log2) of number of pages for each RX buffer |
708 | * @rx_buffer_truesize: Amortised allocation size of an RX buffer, | ||
709 | * for use in sk_buff::truesize | ||
689 | * @rx_hash_key: Toeplitz hash key for RSS | 710 | * @rx_hash_key: Toeplitz hash key for RSS |
690 | * @rx_indir_table: Indirection table for RSS | 711 | * @rx_indir_table: Indirection table for RSS |
712 | * @rx_scatter: Scatter mode enabled for receives | ||
691 | * @int_error_count: Number of internal errors seen recently | 713 | * @int_error_count: Number of internal errors seen recently |
692 | * @int_error_expire: Time at which error count will be expired | 714 | * @int_error_expire: Time at which error count will be expired |
693 | * @irq_status: Interrupt status buffer | 715 | * @irq_status: Interrupt status buffer |
@@ -800,10 +822,15 @@ struct efx_nic { | |||
800 | unsigned rss_spread; | 822 | unsigned rss_spread; |
801 | unsigned tx_channel_offset; | 823 | unsigned tx_channel_offset; |
802 | unsigned n_tx_channels; | 824 | unsigned n_tx_channels; |
803 | unsigned int rx_buffer_len; | 825 | unsigned int rx_dma_len; |
804 | unsigned int rx_buffer_order; | 826 | unsigned int rx_buffer_order; |
827 | unsigned int rx_buffer_truesize; | ||
828 | unsigned int rx_page_buf_step; | ||
829 | unsigned int rx_bufs_per_page; | ||
830 | unsigned int rx_pages_per_batch; | ||
805 | u8 rx_hash_key[40]; | 831 | u8 rx_hash_key[40]; |
806 | u32 rx_indir_table[128]; | 832 | u32 rx_indir_table[128]; |
833 | bool rx_scatter; | ||
807 | 834 | ||
808 | unsigned int_error_count; | 835 | unsigned int_error_count; |
809 | unsigned long int_error_expire; | 836 | unsigned long int_error_expire; |
@@ -934,8 +961,9 @@ static inline unsigned int efx_port_num(struct efx_nic *efx) | |||
934 | * @evq_ptr_tbl_base: Event queue pointer table base address | 961 | * @evq_ptr_tbl_base: Event queue pointer table base address |
935 | * @evq_rptr_tbl_base: Event queue read-pointer table base address | 962 | * @evq_rptr_tbl_base: Event queue read-pointer table base address |
936 | * @max_dma_mask: Maximum possible DMA mask | 963 | * @max_dma_mask: Maximum possible DMA mask |
937 | * @rx_buffer_hash_size: Size of hash at start of RX buffer | 964 | * @rx_buffer_hash_size: Size of hash at start of RX packet |
938 | * @rx_buffer_padding: Size of padding at end of RX buffer | 965 | * @rx_buffer_padding: Size of padding at end of RX packet |
966 | * @can_rx_scatter: NIC is able to scatter packet to multiple buffers | ||
939 | * @max_interrupt_mode: Highest capability interrupt mode supported | 967 | * @max_interrupt_mode: Highest capability interrupt mode supported |
940 | * from &enum efx_init_mode. | 968 | * from &enum efx_init_mode. |
941 | * @phys_addr_channels: Number of channels with physically addressed | 969 | * @phys_addr_channels: Number of channels with physically addressed |
@@ -983,6 +1011,7 @@ struct efx_nic_type { | |||
983 | u64 max_dma_mask; | 1011 | u64 max_dma_mask; |
984 | unsigned int rx_buffer_hash_size; | 1012 | unsigned int rx_buffer_hash_size; |
985 | unsigned int rx_buffer_padding; | 1013 | unsigned int rx_buffer_padding; |
1014 | bool can_rx_scatter; | ||
986 | unsigned int max_interrupt_mode; | 1015 | unsigned int max_interrupt_mode; |
987 | unsigned int phys_addr_channels; | 1016 | unsigned int phys_addr_channels; |
988 | unsigned int timer_period_max; | 1017 | unsigned int timer_period_max; |
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index 0ad790cc473c..f9f5df8b51fe 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c | |||
@@ -591,12 +591,22 @@ void efx_nic_init_rx(struct efx_rx_queue *rx_queue) | |||
591 | struct efx_nic *efx = rx_queue->efx; | 591 | struct efx_nic *efx = rx_queue->efx; |
592 | bool is_b0 = efx_nic_rev(efx) >= EFX_REV_FALCON_B0; | 592 | bool is_b0 = efx_nic_rev(efx) >= EFX_REV_FALCON_B0; |
593 | bool iscsi_digest_en = is_b0; | 593 | bool iscsi_digest_en = is_b0; |
594 | bool jumbo_en; | ||
595 | |||
596 | /* For kernel-mode queues in Falcon A1, the JUMBO flag enables | ||
597 | * DMA to continue after a PCIe page boundary (and scattering | ||
598 | * is not possible). In Falcon B0 and Siena, it enables | ||
599 | * scatter. | ||
600 | */ | ||
601 | jumbo_en = !is_b0 || efx->rx_scatter; | ||
594 | 602 | ||
595 | netif_dbg(efx, hw, efx->net_dev, | 603 | netif_dbg(efx, hw, efx->net_dev, |
596 | "RX queue %d ring in special buffers %d-%d\n", | 604 | "RX queue %d ring in special buffers %d-%d\n", |
597 | efx_rx_queue_index(rx_queue), rx_queue->rxd.index, | 605 | efx_rx_queue_index(rx_queue), rx_queue->rxd.index, |
598 | rx_queue->rxd.index + rx_queue->rxd.entries - 1); | 606 | rx_queue->rxd.index + rx_queue->rxd.entries - 1); |
599 | 607 | ||
608 | rx_queue->scatter_n = 0; | ||
609 | |||
600 | /* Pin RX descriptor ring */ | 610 | /* Pin RX descriptor ring */ |
601 | efx_init_special_buffer(efx, &rx_queue->rxd); | 611 | efx_init_special_buffer(efx, &rx_queue->rxd); |
602 | 612 | ||
@@ -613,8 +623,7 @@ void efx_nic_init_rx(struct efx_rx_queue *rx_queue) | |||
613 | FRF_AZ_RX_DESCQ_SIZE, | 623 | FRF_AZ_RX_DESCQ_SIZE, |
614 | __ffs(rx_queue->rxd.entries), | 624 | __ffs(rx_queue->rxd.entries), |
615 | FRF_AZ_RX_DESCQ_TYPE, 0 /* kernel queue */ , | 625 | FRF_AZ_RX_DESCQ_TYPE, 0 /* kernel queue */ , |
616 | /* For >=B0 this is scatter so disable */ | 626 | FRF_AZ_RX_DESCQ_JUMBO, jumbo_en, |
617 | FRF_AZ_RX_DESCQ_JUMBO, !is_b0, | ||
618 | FRF_AZ_RX_DESCQ_EN, 1); | 627 | FRF_AZ_RX_DESCQ_EN, 1); |
619 | efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base, | 628 | efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base, |
620 | efx_rx_queue_index(rx_queue)); | 629 | efx_rx_queue_index(rx_queue)); |
@@ -968,13 +977,24 @@ static u16 efx_handle_rx_not_ok(struct efx_rx_queue *rx_queue, | |||
968 | EFX_RX_PKT_DISCARD : 0; | 977 | EFX_RX_PKT_DISCARD : 0; |
969 | } | 978 | } |
970 | 979 | ||
971 | /* Handle receive events that are not in-order. */ | 980 | /* Handle receive events that are not in-order. Return true if this |
972 | static void | 981 | * can be handled as a partial packet discard, false if it's more |
982 | * serious. | ||
983 | */ | ||
984 | static bool | ||
973 | efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index) | 985 | efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index) |
974 | { | 986 | { |
987 | struct efx_channel *channel = efx_rx_queue_channel(rx_queue); | ||
975 | struct efx_nic *efx = rx_queue->efx; | 988 | struct efx_nic *efx = rx_queue->efx; |
976 | unsigned expected, dropped; | 989 | unsigned expected, dropped; |
977 | 990 | ||
991 | if (rx_queue->scatter_n && | ||
992 | index == ((rx_queue->removed_count + rx_queue->scatter_n - 1) & | ||
993 | rx_queue->ptr_mask)) { | ||
994 | ++channel->n_rx_nodesc_trunc; | ||
995 | return true; | ||
996 | } | ||
997 | |||
978 | expected = rx_queue->removed_count & rx_queue->ptr_mask; | 998 | expected = rx_queue->removed_count & rx_queue->ptr_mask; |
979 | dropped = (index - expected) & rx_queue->ptr_mask; | 999 | dropped = (index - expected) & rx_queue->ptr_mask; |
980 | netif_info(efx, rx_err, efx->net_dev, | 1000 | netif_info(efx, rx_err, efx->net_dev, |
@@ -983,6 +1003,7 @@ efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index) | |||
983 | 1003 | ||
984 | efx_schedule_reset(efx, EFX_WORKAROUND_5676(efx) ? | 1004 | efx_schedule_reset(efx, EFX_WORKAROUND_5676(efx) ? |
985 | RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE); | 1005 | RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE); |
1006 | return false; | ||
986 | } | 1007 | } |
987 | 1008 | ||
988 | /* Handle a packet received event | 1009 | /* Handle a packet received event |
@@ -998,7 +1019,7 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event) | |||
998 | unsigned int rx_ev_desc_ptr, rx_ev_byte_cnt; | 1019 | unsigned int rx_ev_desc_ptr, rx_ev_byte_cnt; |
999 | unsigned int rx_ev_hdr_type, rx_ev_mcast_pkt; | 1020 | unsigned int rx_ev_hdr_type, rx_ev_mcast_pkt; |
1000 | unsigned expected_ptr; | 1021 | unsigned expected_ptr; |
1001 | bool rx_ev_pkt_ok; | 1022 | bool rx_ev_pkt_ok, rx_ev_sop, rx_ev_cont; |
1002 | u16 flags; | 1023 | u16 flags; |
1003 | struct efx_rx_queue *rx_queue; | 1024 | struct efx_rx_queue *rx_queue; |
1004 | struct efx_nic *efx = channel->efx; | 1025 | struct efx_nic *efx = channel->efx; |
@@ -1006,21 +1027,56 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event) | |||
1006 | if (unlikely(ACCESS_ONCE(efx->reset_pending))) | 1027 | if (unlikely(ACCESS_ONCE(efx->reset_pending))) |
1007 | return; | 1028 | return; |
1008 | 1029 | ||
1009 | /* Basic packet information */ | 1030 | rx_ev_cont = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT); |
1010 | rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT); | 1031 | rx_ev_sop = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP); |
1011 | rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK); | ||
1012 | rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE); | ||
1013 | WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT)); | ||
1014 | WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP) != 1); | ||
1015 | WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_Q_LABEL) != | 1032 | WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_Q_LABEL) != |
1016 | channel->channel); | 1033 | channel->channel); |
1017 | 1034 | ||
1018 | rx_queue = efx_channel_get_rx_queue(channel); | 1035 | rx_queue = efx_channel_get_rx_queue(channel); |
1019 | 1036 | ||
1020 | rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_DESC_PTR); | 1037 | rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_DESC_PTR); |
1021 | expected_ptr = rx_queue->removed_count & rx_queue->ptr_mask; | 1038 | expected_ptr = ((rx_queue->removed_count + rx_queue->scatter_n) & |
1022 | if (unlikely(rx_ev_desc_ptr != expected_ptr)) | 1039 | rx_queue->ptr_mask); |
1023 | efx_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr); | 1040 | |
1041 | /* Check for partial drops and other errors */ | ||
1042 | if (unlikely(rx_ev_desc_ptr != expected_ptr) || | ||
1043 | unlikely(rx_ev_sop != (rx_queue->scatter_n == 0))) { | ||
1044 | if (rx_ev_desc_ptr != expected_ptr && | ||
1045 | !efx_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr)) | ||
1046 | return; | ||
1047 | |||
1048 | /* Discard all pending fragments */ | ||
1049 | if (rx_queue->scatter_n) { | ||
1050 | efx_rx_packet( | ||
1051 | rx_queue, | ||
1052 | rx_queue->removed_count & rx_queue->ptr_mask, | ||
1053 | rx_queue->scatter_n, 0, EFX_RX_PKT_DISCARD); | ||
1054 | rx_queue->removed_count += rx_queue->scatter_n; | ||
1055 | rx_queue->scatter_n = 0; | ||
1056 | } | ||
1057 | |||
1058 | /* Return if there is no new fragment */ | ||
1059 | if (rx_ev_desc_ptr != expected_ptr) | ||
1060 | return; | ||
1061 | |||
1062 | /* Discard new fragment if not SOP */ | ||
1063 | if (!rx_ev_sop) { | ||
1064 | efx_rx_packet( | ||
1065 | rx_queue, | ||
1066 | rx_queue->removed_count & rx_queue->ptr_mask, | ||
1067 | 1, 0, EFX_RX_PKT_DISCARD); | ||
1068 | ++rx_queue->removed_count; | ||
1069 | return; | ||
1070 | } | ||
1071 | } | ||
1072 | |||
1073 | ++rx_queue->scatter_n; | ||
1074 | if (rx_ev_cont) | ||
1075 | return; | ||
1076 | |||
1077 | rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT); | ||
1078 | rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK); | ||
1079 | rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE); | ||
1024 | 1080 | ||
1025 | if (likely(rx_ev_pkt_ok)) { | 1081 | if (likely(rx_ev_pkt_ok)) { |
1026 | /* If packet is marked as OK and packet type is TCP/IP or | 1082 | /* If packet is marked as OK and packet type is TCP/IP or |
@@ -1048,7 +1104,11 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event) | |||
1048 | channel->irq_mod_score += 2; | 1104 | channel->irq_mod_score += 2; |
1049 | 1105 | ||
1050 | /* Handle received packet */ | 1106 | /* Handle received packet */ |
1051 | efx_rx_packet(rx_queue, rx_ev_desc_ptr, rx_ev_byte_cnt, flags); | 1107 | efx_rx_packet(rx_queue, |
1108 | rx_queue->removed_count & rx_queue->ptr_mask, | ||
1109 | rx_queue->scatter_n, rx_ev_byte_cnt, flags); | ||
1110 | rx_queue->removed_count += rx_queue->scatter_n; | ||
1111 | rx_queue->scatter_n = 0; | ||
1052 | } | 1112 | } |
1053 | 1113 | ||
1054 | /* If this flush done event corresponds to a &struct efx_tx_queue, then | 1114 | /* If this flush done event corresponds to a &struct efx_tx_queue, then |
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 3f93624fc273..07f6baa15c0c 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c | |||
@@ -99,6 +99,9 @@ | |||
99 | #define PTP_V2_VERSION_LENGTH 1 | 99 | #define PTP_V2_VERSION_LENGTH 1 |
100 | #define PTP_V2_VERSION_OFFSET 29 | 100 | #define PTP_V2_VERSION_OFFSET 29 |
101 | 101 | ||
102 | #define PTP_V2_UUID_LENGTH 8 | ||
103 | #define PTP_V2_UUID_OFFSET 48 | ||
104 | |||
102 | /* Although PTP V2 UUIDs are comprised a ClockIdentity (8) and PortNumber (2), | 105 | /* Although PTP V2 UUIDs are comprised a ClockIdentity (8) and PortNumber (2), |
103 | * the MC only captures the last six bytes of the clock identity. These values | 106 | * the MC only captures the last six bytes of the clock identity. These values |
104 | * reflect those, not the ones used in the standard. The standard permits | 107 | * reflect those, not the ones used in the standard. The standard permits |
@@ -429,13 +432,10 @@ static int efx_ptp_process_times(struct efx_nic *efx, u8 *synch_buf, | |||
429 | unsigned number_readings = (response_length / | 432 | unsigned number_readings = (response_length / |
430 | MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_LEN); | 433 | MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_LEN); |
431 | unsigned i; | 434 | unsigned i; |
432 | unsigned min; | ||
433 | unsigned min_set = 0; | ||
434 | unsigned total; | 435 | unsigned total; |
435 | unsigned ngood = 0; | 436 | unsigned ngood = 0; |
436 | unsigned last_good = 0; | 437 | unsigned last_good = 0; |
437 | struct efx_ptp_data *ptp = efx->ptp_data; | 438 | struct efx_ptp_data *ptp = efx->ptp_data; |
438 | bool min_valid = false; | ||
439 | u32 last_sec; | 439 | u32 last_sec; |
440 | u32 start_sec; | 440 | u32 start_sec; |
441 | struct timespec delta; | 441 | struct timespec delta; |
@@ -443,35 +443,17 @@ static int efx_ptp_process_times(struct efx_nic *efx, u8 *synch_buf, | |||
443 | if (number_readings == 0) | 443 | if (number_readings == 0) |
444 | return -EAGAIN; | 444 | return -EAGAIN; |
445 | 445 | ||
446 | /* Find minimum value in this set of results, discarding clearly | 446 | /* Read the set of results and increment stats for any results that |
447 | * erroneous results. | 447 | * appera to be erroneous. |
448 | */ | 448 | */ |
449 | for (i = 0; i < number_readings; i++) { | 449 | for (i = 0; i < number_readings; i++) { |
450 | efx_ptp_read_timeset(synch_buf, &ptp->timeset[i]); | 450 | efx_ptp_read_timeset(synch_buf, &ptp->timeset[i]); |
451 | synch_buf += MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_LEN; | 451 | synch_buf += MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_LEN; |
452 | if (ptp->timeset[i].window > SYNCHRONISATION_GRANULARITY_NS) { | ||
453 | if (min_valid) { | ||
454 | if (ptp->timeset[i].window < min_set) | ||
455 | min_set = ptp->timeset[i].window; | ||
456 | } else { | ||
457 | min_valid = true; | ||
458 | min_set = ptp->timeset[i].window; | ||
459 | } | ||
460 | } | ||
461 | } | ||
462 | |||
463 | if (min_valid) { | ||
464 | if (ptp->base_sync_valid && (min_set > ptp->base_sync_ns)) | ||
465 | min = ptp->base_sync_ns; | ||
466 | else | ||
467 | min = min_set; | ||
468 | } else { | ||
469 | min = SYNCHRONISATION_GRANULARITY_NS; | ||
470 | } | 452 | } |
471 | 453 | ||
472 | /* Discard excessively long synchronise durations. The MC times | 454 | /* Find the last good host-MC synchronization result. The MC times |
473 | * when it finishes reading the host time so the corrected window | 455 | * when it finishes reading the host time so the corrected window time |
474 | * time should be fairly constant for a given platform. | 456 | * should be fairly constant for a given platform. |
475 | */ | 457 | */ |
476 | total = 0; | 458 | total = 0; |
477 | for (i = 0; i < number_readings; i++) | 459 | for (i = 0; i < number_readings; i++) |
@@ -489,8 +471,8 @@ static int efx_ptp_process_times(struct efx_nic *efx, u8 *synch_buf, | |||
489 | 471 | ||
490 | if (ngood == 0) { | 472 | if (ngood == 0) { |
491 | netif_warn(efx, drv, efx->net_dev, | 473 | netif_warn(efx, drv, efx->net_dev, |
492 | "PTP no suitable synchronisations %dns %dns\n", | 474 | "PTP no suitable synchronisations %dns\n", |
493 | ptp->base_sync_ns, min_set); | 475 | ptp->base_sync_ns); |
494 | return -EAGAIN; | 476 | return -EAGAIN; |
495 | } | 477 | } |
496 | 478 | ||
@@ -1006,43 +988,53 @@ bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) | |||
1006 | * the receive timestamp from the MC - this will probably occur after the | 988 | * the receive timestamp from the MC - this will probably occur after the |
1007 | * packet arrival because of the processing in the MC. | 989 | * packet arrival because of the processing in the MC. |
1008 | */ | 990 | */ |
1009 | static void efx_ptp_rx(struct efx_channel *channel, struct sk_buff *skb) | 991 | static bool efx_ptp_rx(struct efx_channel *channel, struct sk_buff *skb) |
1010 | { | 992 | { |
1011 | struct efx_nic *efx = channel->efx; | 993 | struct efx_nic *efx = channel->efx; |
1012 | struct efx_ptp_data *ptp = efx->ptp_data; | 994 | struct efx_ptp_data *ptp = efx->ptp_data; |
1013 | struct efx_ptp_match *match = (struct efx_ptp_match *)skb->cb; | 995 | struct efx_ptp_match *match = (struct efx_ptp_match *)skb->cb; |
1014 | u8 *data; | 996 | u8 *match_data_012, *match_data_345; |
1015 | unsigned int version; | 997 | unsigned int version; |
1016 | 998 | ||
1017 | match->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS); | 999 | match->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS); |
1018 | 1000 | ||
1019 | /* Correct version? */ | 1001 | /* Correct version? */ |
1020 | if (ptp->mode == MC_CMD_PTP_MODE_V1) { | 1002 | if (ptp->mode == MC_CMD_PTP_MODE_V1) { |
1021 | if (skb->len < PTP_V1_MIN_LENGTH) { | 1003 | if (!pskb_may_pull(skb, PTP_V1_MIN_LENGTH)) { |
1022 | netif_receive_skb(skb); | 1004 | return false; |
1023 | return; | ||
1024 | } | 1005 | } |
1025 | version = ntohs(*(__be16 *)&skb->data[PTP_V1_VERSION_OFFSET]); | 1006 | version = ntohs(*(__be16 *)&skb->data[PTP_V1_VERSION_OFFSET]); |
1026 | if (version != PTP_VERSION_V1) { | 1007 | if (version != PTP_VERSION_V1) { |
1027 | netif_receive_skb(skb); | 1008 | return false; |
1028 | return; | ||
1029 | } | 1009 | } |
1010 | |||
1011 | /* PTP V1 uses all six bytes of the UUID to match the packet | ||
1012 | * to the timestamp | ||
1013 | */ | ||
1014 | match_data_012 = skb->data + PTP_V1_UUID_OFFSET; | ||
1015 | match_data_345 = skb->data + PTP_V1_UUID_OFFSET + 3; | ||
1030 | } else { | 1016 | } else { |
1031 | if (skb->len < PTP_V2_MIN_LENGTH) { | 1017 | if (!pskb_may_pull(skb, PTP_V2_MIN_LENGTH)) { |
1032 | netif_receive_skb(skb); | 1018 | return false; |
1033 | return; | ||
1034 | } | 1019 | } |
1035 | version = skb->data[PTP_V2_VERSION_OFFSET]; | 1020 | version = skb->data[PTP_V2_VERSION_OFFSET]; |
1036 | |||
1037 | BUG_ON(ptp->mode != MC_CMD_PTP_MODE_V2); | ||
1038 | BUILD_BUG_ON(PTP_V1_UUID_OFFSET != PTP_V2_MC_UUID_OFFSET); | ||
1039 | BUILD_BUG_ON(PTP_V1_UUID_LENGTH != PTP_V2_MC_UUID_LENGTH); | ||
1040 | BUILD_BUG_ON(PTP_V1_SEQUENCE_OFFSET != PTP_V2_SEQUENCE_OFFSET); | ||
1041 | BUILD_BUG_ON(PTP_V1_SEQUENCE_LENGTH != PTP_V2_SEQUENCE_LENGTH); | ||
1042 | |||
1043 | if ((version & PTP_VERSION_V2_MASK) != PTP_VERSION_V2) { | 1021 | if ((version & PTP_VERSION_V2_MASK) != PTP_VERSION_V2) { |
1044 | netif_receive_skb(skb); | 1022 | return false; |
1045 | return; | 1023 | } |
1024 | |||
1025 | /* The original V2 implementation uses bytes 2-7 of | ||
1026 | * the UUID to match the packet to the timestamp. This | ||
1027 | * discards two of the bytes of the MAC address used | ||
1028 | * to create the UUID (SF bug 33070). The PTP V2 | ||
1029 | * enhanced mode fixes this issue and uses bytes 0-2 | ||
1030 | * and byte 5-7 of the UUID. | ||
1031 | */ | ||
1032 | match_data_345 = skb->data + PTP_V2_UUID_OFFSET + 5; | ||
1033 | if (ptp->mode == MC_CMD_PTP_MODE_V2) { | ||
1034 | match_data_012 = skb->data + PTP_V2_UUID_OFFSET + 2; | ||
1035 | } else { | ||
1036 | match_data_012 = skb->data + PTP_V2_UUID_OFFSET + 0; | ||
1037 | BUG_ON(ptp->mode != MC_CMD_PTP_MODE_V2_ENHANCED); | ||
1046 | } | 1038 | } |
1047 | } | 1039 | } |
1048 | 1040 | ||
@@ -1056,14 +1048,19 @@ static void efx_ptp_rx(struct efx_channel *channel, struct sk_buff *skb) | |||
1056 | timestamps = skb_hwtstamps(skb); | 1048 | timestamps = skb_hwtstamps(skb); |
1057 | memset(timestamps, 0, sizeof(*timestamps)); | 1049 | memset(timestamps, 0, sizeof(*timestamps)); |
1058 | 1050 | ||
1051 | /* We expect the sequence number to be in the same position in | ||
1052 | * the packet for PTP V1 and V2 | ||
1053 | */ | ||
1054 | BUILD_BUG_ON(PTP_V1_SEQUENCE_OFFSET != PTP_V2_SEQUENCE_OFFSET); | ||
1055 | BUILD_BUG_ON(PTP_V1_SEQUENCE_LENGTH != PTP_V2_SEQUENCE_LENGTH); | ||
1056 | |||
1059 | /* Extract UUID/Sequence information */ | 1057 | /* Extract UUID/Sequence information */ |
1060 | data = skb->data + PTP_V1_UUID_OFFSET; | 1058 | match->words[0] = (match_data_012[0] | |
1061 | match->words[0] = (data[0] | | 1059 | (match_data_012[1] << 8) | |
1062 | (data[1] << 8) | | 1060 | (match_data_012[2] << 16) | |
1063 | (data[2] << 16) | | 1061 | (match_data_345[0] << 24)); |
1064 | (data[3] << 24)); | 1062 | match->words[1] = (match_data_345[1] | |
1065 | match->words[1] = (data[4] | | 1063 | (match_data_345[2] << 8) | |
1066 | (data[5] << 8) | | ||
1067 | (skb->data[PTP_V1_SEQUENCE_OFFSET + | 1064 | (skb->data[PTP_V1_SEQUENCE_OFFSET + |
1068 | PTP_V1_SEQUENCE_LENGTH - 1] << | 1065 | PTP_V1_SEQUENCE_LENGTH - 1] << |
1069 | 16)); | 1066 | 16)); |
@@ -1073,6 +1070,8 @@ static void efx_ptp_rx(struct efx_channel *channel, struct sk_buff *skb) | |||
1073 | 1070 | ||
1074 | skb_queue_tail(&ptp->rxq, skb); | 1071 | skb_queue_tail(&ptp->rxq, skb); |
1075 | queue_work(ptp->workwq, &ptp->work); | 1072 | queue_work(ptp->workwq, &ptp->work); |
1073 | |||
1074 | return true; | ||
1076 | } | 1075 | } |
1077 | 1076 | ||
1078 | /* Transmit a PTP packet. This has to be transmitted by the MC | 1077 | /* Transmit a PTP packet. This has to be transmitted by the MC |
@@ -1167,7 +1166,7 @@ static int efx_ptp_ts_init(struct efx_nic *efx, struct hwtstamp_config *init) | |||
1167 | * timestamped | 1166 | * timestamped |
1168 | */ | 1167 | */ |
1169 | init->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; | 1168 | init->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; |
1170 | new_mode = MC_CMD_PTP_MODE_V2; | 1169 | new_mode = MC_CMD_PTP_MODE_V2_ENHANCED; |
1171 | enable_wanted = true; | 1170 | enable_wanted = true; |
1172 | break; | 1171 | break; |
1173 | case HWTSTAMP_FILTER_PTP_V2_EVENT: | 1172 | case HWTSTAMP_FILTER_PTP_V2_EVENT: |
@@ -1186,7 +1185,14 @@ static int efx_ptp_ts_init(struct efx_nic *efx, struct hwtstamp_config *init) | |||
1186 | if (init->tx_type != HWTSTAMP_TX_OFF) | 1185 | if (init->tx_type != HWTSTAMP_TX_OFF) |
1187 | enable_wanted = true; | 1186 | enable_wanted = true; |
1188 | 1187 | ||
1188 | /* Old versions of the firmware do not support the improved | ||
1189 | * UUID filtering option (SF bug 33070). If the firmware does | ||
1190 | * not accept the enhanced mode, fall back to the standard PTP | ||
1191 | * v2 UUID filtering. | ||
1192 | */ | ||
1189 | rc = efx_ptp_change_mode(efx, enable_wanted, new_mode); | 1193 | rc = efx_ptp_change_mode(efx, enable_wanted, new_mode); |
1194 | if ((rc != 0) && (new_mode == MC_CMD_PTP_MODE_V2_ENHANCED)) | ||
1195 | rc = efx_ptp_change_mode(efx, enable_wanted, MC_CMD_PTP_MODE_V2); | ||
1190 | if (rc != 0) | 1196 | if (rc != 0) |
1191 | return rc; | 1197 | return rc; |
1192 | 1198 | ||
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 879ff5849bbd..a948b36c1910 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/udp.h> | 16 | #include <linux/udp.h> |
17 | #include <linux/prefetch.h> | 17 | #include <linux/prefetch.h> |
18 | #include <linux/moduleparam.h> | 18 | #include <linux/moduleparam.h> |
19 | #include <linux/iommu.h> | ||
19 | #include <net/ip.h> | 20 | #include <net/ip.h> |
20 | #include <net/checksum.h> | 21 | #include <net/checksum.h> |
21 | #include "net_driver.h" | 22 | #include "net_driver.h" |
@@ -24,85 +25,39 @@ | |||
24 | #include "selftest.h" | 25 | #include "selftest.h" |
25 | #include "workarounds.h" | 26 | #include "workarounds.h" |
26 | 27 | ||
27 | /* Number of RX descriptors pushed at once. */ | 28 | /* Preferred number of descriptors to fill at once */ |
28 | #define EFX_RX_BATCH 8 | 29 | #define EFX_RX_PREFERRED_BATCH 8U |
29 | 30 | ||
30 | /* Maximum size of a buffer sharing a page */ | 31 | /* Number of RX buffers to recycle pages for. When creating the RX page recycle |
31 | #define EFX_RX_HALF_PAGE ((PAGE_SIZE >> 1) - sizeof(struct efx_rx_page_state)) | 32 | * ring, this number is divided by the number of buffers per page to calculate |
33 | * the number of pages to store in the RX page recycle ring. | ||
34 | */ | ||
35 | #define EFX_RECYCLE_RING_SIZE_IOMMU 4096 | ||
36 | #define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH) | ||
32 | 37 | ||
33 | /* Size of buffer allocated for skb header area. */ | 38 | /* Size of buffer allocated for skb header area. */ |
34 | #define EFX_SKB_HEADERS 64u | 39 | #define EFX_SKB_HEADERS 64u |
35 | 40 | ||
36 | /* | ||
37 | * rx_alloc_method - RX buffer allocation method | ||
38 | * | ||
39 | * This driver supports two methods for allocating and using RX buffers: | ||
40 | * each RX buffer may be backed by an skb or by an order-n page. | ||
41 | * | ||
42 | * When GRO is in use then the second method has a lower overhead, | ||
43 | * since we don't have to allocate then free skbs on reassembled frames. | ||
44 | * | ||
45 | * Values: | ||
46 | * - RX_ALLOC_METHOD_AUTO = 0 | ||
47 | * - RX_ALLOC_METHOD_SKB = 1 | ||
48 | * - RX_ALLOC_METHOD_PAGE = 2 | ||
49 | * | ||
50 | * The heuristic for %RX_ALLOC_METHOD_AUTO is a simple hysteresis count | ||
51 | * controlled by the parameters below. | ||
52 | * | ||
53 | * - Since pushing and popping descriptors are separated by the rx_queue | ||
54 | * size, so the watermarks should be ~rxd_size. | ||
55 | * - The performance win by using page-based allocation for GRO is less | ||
56 | * than the performance hit of using page-based allocation of non-GRO, | ||
57 | * so the watermarks should reflect this. | ||
58 | * | ||
59 | * Per channel we maintain a single variable, updated by each channel: | ||
60 | * | ||
61 | * rx_alloc_level += (gro_performed ? RX_ALLOC_FACTOR_GRO : | ||
62 | * RX_ALLOC_FACTOR_SKB) | ||
63 | * Per NAPI poll interval, we constrain rx_alloc_level to 0..MAX (which | ||
64 | * limits the hysteresis), and update the allocation strategy: | ||
65 | * | ||
66 | * rx_alloc_method = (rx_alloc_level > RX_ALLOC_LEVEL_GRO ? | ||
67 | * RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB) | ||
68 | */ | ||
69 | static int rx_alloc_method = RX_ALLOC_METHOD_AUTO; | ||
70 | |||
71 | #define RX_ALLOC_LEVEL_GRO 0x2000 | ||
72 | #define RX_ALLOC_LEVEL_MAX 0x3000 | ||
73 | #define RX_ALLOC_FACTOR_GRO 1 | ||
74 | #define RX_ALLOC_FACTOR_SKB (-2) | ||
75 | |||
76 | /* This is the percentage fill level below which new RX descriptors | 41 | /* This is the percentage fill level below which new RX descriptors |
77 | * will be added to the RX descriptor ring. | 42 | * will be added to the RX descriptor ring. |
78 | */ | 43 | */ |
79 | static unsigned int rx_refill_threshold; | 44 | static unsigned int rx_refill_threshold; |
80 | 45 | ||
46 | /* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */ | ||
47 | #define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \ | ||
48 | EFX_RX_USR_BUF_SIZE) | ||
49 | |||
81 | /* | 50 | /* |
82 | * RX maximum head room required. | 51 | * RX maximum head room required. |
83 | * | 52 | * |
84 | * This must be at least 1 to prevent overflow and at least 2 to allow | 53 | * This must be at least 1 to prevent overflow, plus one packet-worth |
85 | * pipelined receives. | 54 | * to allow pipelined receives. |
86 | */ | 55 | */ |
87 | #define EFX_RXD_HEAD_ROOM 2 | 56 | #define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS) |
88 | 57 | ||
89 | /* Offset of ethernet header within page */ | 58 | static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf) |
90 | static inline unsigned int efx_rx_buf_offset(struct efx_nic *efx, | ||
91 | struct efx_rx_buffer *buf) | ||
92 | { | 59 | { |
93 | return buf->page_offset + efx->type->rx_buffer_hash_size; | 60 | return page_address(buf->page) + buf->page_offset; |
94 | } | ||
95 | static inline unsigned int efx_rx_buf_size(struct efx_nic *efx) | ||
96 | { | ||
97 | return PAGE_SIZE << efx->rx_buffer_order; | ||
98 | } | ||
99 | |||
100 | static u8 *efx_rx_buf_eh(struct efx_nic *efx, struct efx_rx_buffer *buf) | ||
101 | { | ||
102 | if (buf->flags & EFX_RX_BUF_PAGE) | ||
103 | return page_address(buf->u.page) + efx_rx_buf_offset(efx, buf); | ||
104 | else | ||
105 | return (u8 *)buf->u.skb->data + efx->type->rx_buffer_hash_size; | ||
106 | } | 61 | } |
107 | 62 | ||
108 | static inline u32 efx_rx_buf_hash(const u8 *eh) | 63 | static inline u32 efx_rx_buf_hash(const u8 *eh) |
@@ -119,66 +74,81 @@ static inline u32 efx_rx_buf_hash(const u8 *eh) | |||
119 | #endif | 74 | #endif |
120 | } | 75 | } |
121 | 76 | ||
122 | /** | 77 | static inline struct efx_rx_buffer * |
123 | * efx_init_rx_buffers_skb - create EFX_RX_BATCH skb-based RX buffers | 78 | efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf) |
124 | * | 79 | { |
125 | * @rx_queue: Efx RX queue | 80 | if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask))) |
126 | * | 81 | return efx_rx_buffer(rx_queue, 0); |
127 | * This allocates EFX_RX_BATCH skbs, maps them for DMA, and populates a | 82 | else |
128 | * struct efx_rx_buffer for each one. Return a negative error code or 0 | 83 | return rx_buf + 1; |
129 | * on success. May fail having only inserted fewer than EFX_RX_BATCH | 84 | } |
130 | * buffers. | 85 | |
131 | */ | 86 | static inline void efx_sync_rx_buffer(struct efx_nic *efx, |
132 | static int efx_init_rx_buffers_skb(struct efx_rx_queue *rx_queue) | 87 | struct efx_rx_buffer *rx_buf, |
88 | unsigned int len) | ||
89 | { | ||
90 | dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len, | ||
91 | DMA_FROM_DEVICE); | ||
92 | } | ||
93 | |||
94 | void efx_rx_config_page_split(struct efx_nic *efx) | ||
95 | { | ||
96 | efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + EFX_PAGE_IP_ALIGN, | ||
97 | L1_CACHE_BYTES); | ||
98 | efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : | ||
99 | ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / | ||
100 | efx->rx_page_buf_step); | ||
101 | efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / | ||
102 | efx->rx_bufs_per_page; | ||
103 | efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH, | ||
104 | efx->rx_bufs_per_page); | ||
105 | } | ||
106 | |||
107 | /* Check the RX page recycle ring for a page that can be reused. */ | ||
108 | static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue) | ||
133 | { | 109 | { |
134 | struct efx_nic *efx = rx_queue->efx; | 110 | struct efx_nic *efx = rx_queue->efx; |
135 | struct net_device *net_dev = efx->net_dev; | 111 | struct page *page; |
136 | struct efx_rx_buffer *rx_buf; | 112 | struct efx_rx_page_state *state; |
137 | struct sk_buff *skb; | 113 | unsigned index; |
138 | int skb_len = efx->rx_buffer_len; | ||
139 | unsigned index, count; | ||
140 | 114 | ||
141 | for (count = 0; count < EFX_RX_BATCH; ++count) { | 115 | index = rx_queue->page_remove & rx_queue->page_ptr_mask; |
142 | index = rx_queue->added_count & rx_queue->ptr_mask; | 116 | page = rx_queue->page_ring[index]; |
143 | rx_buf = efx_rx_buffer(rx_queue, index); | 117 | if (page == NULL) |
144 | 118 | return NULL; | |
145 | rx_buf->u.skb = skb = netdev_alloc_skb(net_dev, skb_len); | 119 | |
146 | if (unlikely(!skb)) | 120 | rx_queue->page_ring[index] = NULL; |
147 | return -ENOMEM; | 121 | /* page_remove cannot exceed page_add. */ |
148 | 122 | if (rx_queue->page_remove != rx_queue->page_add) | |
149 | /* Adjust the SKB for padding */ | 123 | ++rx_queue->page_remove; |
150 | skb_reserve(skb, NET_IP_ALIGN); | ||
151 | rx_buf->len = skb_len - NET_IP_ALIGN; | ||
152 | rx_buf->flags = 0; | ||
153 | |||
154 | rx_buf->dma_addr = dma_map_single(&efx->pci_dev->dev, | ||
155 | skb->data, rx_buf->len, | ||
156 | DMA_FROM_DEVICE); | ||
157 | if (unlikely(dma_mapping_error(&efx->pci_dev->dev, | ||
158 | rx_buf->dma_addr))) { | ||
159 | dev_kfree_skb_any(skb); | ||
160 | rx_buf->u.skb = NULL; | ||
161 | return -EIO; | ||
162 | } | ||
163 | 124 | ||
164 | ++rx_queue->added_count; | 125 | /* If page_count is 1 then we hold the only reference to this page. */ |
165 | ++rx_queue->alloc_skb_count; | 126 | if (page_count(page) == 1) { |
127 | ++rx_queue->page_recycle_count; | ||
128 | return page; | ||
129 | } else { | ||
130 | state = page_address(page); | ||
131 | dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, | ||
132 | PAGE_SIZE << efx->rx_buffer_order, | ||
133 | DMA_FROM_DEVICE); | ||
134 | put_page(page); | ||
135 | ++rx_queue->page_recycle_failed; | ||
166 | } | 136 | } |
167 | 137 | ||
168 | return 0; | 138 | return NULL; |
169 | } | 139 | } |
170 | 140 | ||
171 | /** | 141 | /** |
172 | * efx_init_rx_buffers_page - create EFX_RX_BATCH page-based RX buffers | 142 | * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers |
173 | * | 143 | * |
174 | * @rx_queue: Efx RX queue | 144 | * @rx_queue: Efx RX queue |
175 | * | 145 | * |
176 | * This allocates memory for EFX_RX_BATCH receive buffers, maps them for DMA, | 146 | * This allocates a batch of pages, maps them for DMA, and populates |
177 | * and populates struct efx_rx_buffers for each one. Return a negative error | 147 | * struct efx_rx_buffers for each one. Return a negative error code or |
178 | * code or 0 on success. If a single page can be split between two buffers, | 148 | * 0 on success. If a single page can be used for multiple buffers, |
179 | * then the page will either be inserted fully, or not at at all. | 149 | * then the page will either be inserted fully, or not at all. |
180 | */ | 150 | */ |
181 | static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue) | 151 | static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue) |
182 | { | 152 | { |
183 | struct efx_nic *efx = rx_queue->efx; | 153 | struct efx_nic *efx = rx_queue->efx; |
184 | struct efx_rx_buffer *rx_buf; | 154 | struct efx_rx_buffer *rx_buf; |
@@ -188,150 +158,140 @@ static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue) | |||
188 | dma_addr_t dma_addr; | 158 | dma_addr_t dma_addr; |
189 | unsigned index, count; | 159 | unsigned index, count; |
190 | 160 | ||
191 | /* We can split a page between two buffers */ | 161 | count = 0; |
192 | BUILD_BUG_ON(EFX_RX_BATCH & 1); | 162 | do { |
193 | 163 | page = efx_reuse_page(rx_queue); | |
194 | for (count = 0; count < EFX_RX_BATCH; ++count) { | 164 | if (page == NULL) { |
195 | page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC, | 165 | page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC, |
196 | efx->rx_buffer_order); | 166 | efx->rx_buffer_order); |
197 | if (unlikely(page == NULL)) | 167 | if (unlikely(page == NULL)) |
198 | return -ENOMEM; | 168 | return -ENOMEM; |
199 | dma_addr = dma_map_page(&efx->pci_dev->dev, page, 0, | 169 | dma_addr = |
200 | efx_rx_buf_size(efx), | 170 | dma_map_page(&efx->pci_dev->dev, page, 0, |
201 | DMA_FROM_DEVICE); | 171 | PAGE_SIZE << efx->rx_buffer_order, |
202 | if (unlikely(dma_mapping_error(&efx->pci_dev->dev, dma_addr))) { | 172 | DMA_FROM_DEVICE); |
203 | __free_pages(page, efx->rx_buffer_order); | 173 | if (unlikely(dma_mapping_error(&efx->pci_dev->dev, |
204 | return -EIO; | 174 | dma_addr))) { |
175 | __free_pages(page, efx->rx_buffer_order); | ||
176 | return -EIO; | ||
177 | } | ||
178 | state = page_address(page); | ||
179 | state->dma_addr = dma_addr; | ||
180 | } else { | ||
181 | state = page_address(page); | ||
182 | dma_addr = state->dma_addr; | ||
205 | } | 183 | } |
206 | state = page_address(page); | ||
207 | state->refcnt = 0; | ||
208 | state->dma_addr = dma_addr; | ||
209 | 184 | ||
210 | dma_addr += sizeof(struct efx_rx_page_state); | 185 | dma_addr += sizeof(struct efx_rx_page_state); |
211 | page_offset = sizeof(struct efx_rx_page_state); | 186 | page_offset = sizeof(struct efx_rx_page_state); |
212 | 187 | ||
213 | split: | 188 | do { |
214 | index = rx_queue->added_count & rx_queue->ptr_mask; | 189 | index = rx_queue->added_count & rx_queue->ptr_mask; |
215 | rx_buf = efx_rx_buffer(rx_queue, index); | 190 | rx_buf = efx_rx_buffer(rx_queue, index); |
216 | rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN; | 191 | rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN; |
217 | rx_buf->u.page = page; | 192 | rx_buf->page = page; |
218 | rx_buf->page_offset = page_offset; | 193 | rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN; |
219 | rx_buf->len = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN; | 194 | rx_buf->len = efx->rx_dma_len; |
220 | rx_buf->flags = EFX_RX_BUF_PAGE; | 195 | rx_buf->flags = 0; |
221 | ++rx_queue->added_count; | 196 | ++rx_queue->added_count; |
222 | ++rx_queue->alloc_page_count; | ||
223 | ++state->refcnt; | ||
224 | |||
225 | if ((~count & 1) && (efx->rx_buffer_len <= EFX_RX_HALF_PAGE)) { | ||
226 | /* Use the second half of the page */ | ||
227 | get_page(page); | 197 | get_page(page); |
228 | dma_addr += (PAGE_SIZE >> 1); | 198 | dma_addr += efx->rx_page_buf_step; |
229 | page_offset += (PAGE_SIZE >> 1); | 199 | page_offset += efx->rx_page_buf_step; |
230 | ++count; | 200 | } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE); |
231 | goto split; | 201 | |
232 | } | 202 | rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE; |
233 | } | 203 | } while (++count < efx->rx_pages_per_batch); |
234 | 204 | ||
235 | return 0; | 205 | return 0; |
236 | } | 206 | } |
237 | 207 | ||
208 | /* Unmap a DMA-mapped page. This function is only called for the final RX | ||
209 | * buffer in a page. | ||
210 | */ | ||
238 | static void efx_unmap_rx_buffer(struct efx_nic *efx, | 211 | static void efx_unmap_rx_buffer(struct efx_nic *efx, |
239 | struct efx_rx_buffer *rx_buf, | 212 | struct efx_rx_buffer *rx_buf) |
240 | unsigned int used_len) | ||
241 | { | 213 | { |
242 | if ((rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.page) { | 214 | struct page *page = rx_buf->page; |
243 | struct efx_rx_page_state *state; | 215 | |
244 | 216 | if (page) { | |
245 | state = page_address(rx_buf->u.page); | 217 | struct efx_rx_page_state *state = page_address(page); |
246 | if (--state->refcnt == 0) { | 218 | dma_unmap_page(&efx->pci_dev->dev, |
247 | dma_unmap_page(&efx->pci_dev->dev, | 219 | state->dma_addr, |
248 | state->dma_addr, | 220 | PAGE_SIZE << efx->rx_buffer_order, |
249 | efx_rx_buf_size(efx), | 221 | DMA_FROM_DEVICE); |
250 | DMA_FROM_DEVICE); | ||
251 | } else if (used_len) { | ||
252 | dma_sync_single_for_cpu(&efx->pci_dev->dev, | ||
253 | rx_buf->dma_addr, used_len, | ||
254 | DMA_FROM_DEVICE); | ||
255 | } | ||
256 | } else if (!(rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.skb) { | ||
257 | dma_unmap_single(&efx->pci_dev->dev, rx_buf->dma_addr, | ||
258 | rx_buf->len, DMA_FROM_DEVICE); | ||
259 | } | 222 | } |
260 | } | 223 | } |
261 | 224 | ||
262 | static void efx_free_rx_buffer(struct efx_nic *efx, | 225 | static void efx_free_rx_buffer(struct efx_rx_buffer *rx_buf) |
263 | struct efx_rx_buffer *rx_buf) | ||
264 | { | 226 | { |
265 | if ((rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.page) { | 227 | if (rx_buf->page) { |
266 | __free_pages(rx_buf->u.page, efx->rx_buffer_order); | 228 | put_page(rx_buf->page); |
267 | rx_buf->u.page = NULL; | 229 | rx_buf->page = NULL; |
268 | } else if (!(rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.skb) { | ||
269 | dev_kfree_skb_any(rx_buf->u.skb); | ||
270 | rx_buf->u.skb = NULL; | ||
271 | } | 230 | } |
272 | } | 231 | } |
273 | 232 | ||
274 | static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, | 233 | /* Attempt to recycle the page if there is an RX recycle ring; the page can |
275 | struct efx_rx_buffer *rx_buf) | 234 | * only be added if this is the final RX buffer, to prevent pages being used in |
235 | * the descriptor ring and appearing in the recycle ring simultaneously. | ||
236 | */ | ||
237 | static void efx_recycle_rx_page(struct efx_channel *channel, | ||
238 | struct efx_rx_buffer *rx_buf) | ||
276 | { | 239 | { |
277 | efx_unmap_rx_buffer(rx_queue->efx, rx_buf, 0); | 240 | struct page *page = rx_buf->page; |
278 | efx_free_rx_buffer(rx_queue->efx, rx_buf); | 241 | struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); |
279 | } | 242 | struct efx_nic *efx = rx_queue->efx; |
243 | unsigned index; | ||
280 | 244 | ||
281 | /* Attempt to resurrect the other receive buffer that used to share this page, | 245 | /* Only recycle the page after processing the final buffer. */ |
282 | * which had previously been passed up to the kernel and freed. */ | 246 | if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE)) |
283 | static void efx_resurrect_rx_buffer(struct efx_rx_queue *rx_queue, | ||
284 | struct efx_rx_buffer *rx_buf) | ||
285 | { | ||
286 | struct efx_rx_page_state *state = page_address(rx_buf->u.page); | ||
287 | struct efx_rx_buffer *new_buf; | ||
288 | unsigned fill_level, index; | ||
289 | |||
290 | /* +1 because efx_rx_packet() incremented removed_count. +1 because | ||
291 | * we'd like to insert an additional descriptor whilst leaving | ||
292 | * EFX_RXD_HEAD_ROOM for the non-recycle path */ | ||
293 | fill_level = (rx_queue->added_count - rx_queue->removed_count + 2); | ||
294 | if (unlikely(fill_level > rx_queue->max_fill)) { | ||
295 | /* We could place "state" on a list, and drain the list in | ||
296 | * efx_fast_push_rx_descriptors(). For now, this will do. */ | ||
297 | return; | 247 | return; |
298 | } | ||
299 | 248 | ||
300 | ++state->refcnt; | 249 | index = rx_queue->page_add & rx_queue->page_ptr_mask; |
301 | get_page(rx_buf->u.page); | 250 | if (rx_queue->page_ring[index] == NULL) { |
251 | unsigned read_index = rx_queue->page_remove & | ||
252 | rx_queue->page_ptr_mask; | ||
302 | 253 | ||
303 | index = rx_queue->added_count & rx_queue->ptr_mask; | 254 | /* The next slot in the recycle ring is available, but |
304 | new_buf = efx_rx_buffer(rx_queue, index); | 255 | * increment page_remove if the read pointer currently |
305 | new_buf->dma_addr = rx_buf->dma_addr ^ (PAGE_SIZE >> 1); | 256 | * points here. |
306 | new_buf->u.page = rx_buf->u.page; | 257 | */ |
307 | new_buf->len = rx_buf->len; | 258 | if (read_index == index) |
308 | new_buf->flags = EFX_RX_BUF_PAGE; | 259 | ++rx_queue->page_remove; |
309 | ++rx_queue->added_count; | 260 | rx_queue->page_ring[index] = page; |
261 | ++rx_queue->page_add; | ||
262 | return; | ||
263 | } | ||
264 | ++rx_queue->page_recycle_full; | ||
265 | efx_unmap_rx_buffer(efx, rx_buf); | ||
266 | put_page(rx_buf->page); | ||
310 | } | 267 | } |
311 | 268 | ||
312 | /* Recycle the given rx buffer directly back into the rx_queue. There is | 269 | static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, |
313 | * always room to add this buffer, because we've just popped a buffer. */ | 270 | struct efx_rx_buffer *rx_buf) |
314 | static void efx_recycle_rx_buffer(struct efx_channel *channel, | ||
315 | struct efx_rx_buffer *rx_buf) | ||
316 | { | 271 | { |
317 | struct efx_nic *efx = channel->efx; | 272 | /* Release the page reference we hold for the buffer. */ |
318 | struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); | 273 | if (rx_buf->page) |
319 | struct efx_rx_buffer *new_buf; | 274 | put_page(rx_buf->page); |
320 | unsigned index; | 275 | |
321 | 276 | /* If this is the last buffer in a page, unmap and free it. */ | |
322 | rx_buf->flags &= EFX_RX_BUF_PAGE; | 277 | if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) { |
323 | 278 | efx_unmap_rx_buffer(rx_queue->efx, rx_buf); | |
324 | if ((rx_buf->flags & EFX_RX_BUF_PAGE) && | 279 | efx_free_rx_buffer(rx_buf); |
325 | efx->rx_buffer_len <= EFX_RX_HALF_PAGE && | 280 | } |
326 | page_count(rx_buf->u.page) == 1) | 281 | rx_buf->page = NULL; |
327 | efx_resurrect_rx_buffer(rx_queue, rx_buf); | 282 | } |
328 | 283 | ||
329 | index = rx_queue->added_count & rx_queue->ptr_mask; | 284 | /* Recycle the pages that are used by buffers that have just been received. */ |
330 | new_buf = efx_rx_buffer(rx_queue, index); | 285 | static void efx_recycle_rx_buffers(struct efx_channel *channel, |
286 | struct efx_rx_buffer *rx_buf, | ||
287 | unsigned int n_frags) | ||
288 | { | ||
289 | struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); | ||
331 | 290 | ||
332 | memcpy(new_buf, rx_buf, sizeof(*new_buf)); | 291 | do { |
333 | rx_buf->u.page = NULL; | 292 | efx_recycle_rx_page(channel, rx_buf); |
334 | ++rx_queue->added_count; | 293 | rx_buf = efx_rx_buf_next(rx_queue, rx_buf); |
294 | } while (--n_frags); | ||
335 | } | 295 | } |
336 | 296 | ||
337 | /** | 297 | /** |
@@ -348,8 +308,8 @@ static void efx_recycle_rx_buffer(struct efx_channel *channel, | |||
348 | */ | 308 | */ |
349 | void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue) | 309 | void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue) |
350 | { | 310 | { |
351 | struct efx_channel *channel = efx_rx_queue_channel(rx_queue); | 311 | struct efx_nic *efx = rx_queue->efx; |
352 | unsigned fill_level; | 312 | unsigned int fill_level, batch_size; |
353 | int space, rc = 0; | 313 | int space, rc = 0; |
354 | 314 | ||
355 | /* Calculate current fill level, and exit if we don't need to fill */ | 315 | /* Calculate current fill level, and exit if we don't need to fill */ |
@@ -364,28 +324,26 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue) | |||
364 | rx_queue->min_fill = fill_level; | 324 | rx_queue->min_fill = fill_level; |
365 | } | 325 | } |
366 | 326 | ||
327 | batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page; | ||
367 | space = rx_queue->max_fill - fill_level; | 328 | space = rx_queue->max_fill - fill_level; |
368 | EFX_BUG_ON_PARANOID(space < EFX_RX_BATCH); | 329 | EFX_BUG_ON_PARANOID(space < batch_size); |
369 | 330 | ||
370 | netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, | 331 | netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, |
371 | "RX queue %d fast-filling descriptor ring from" | 332 | "RX queue %d fast-filling descriptor ring from" |
372 | " level %d to level %d using %s allocation\n", | 333 | " level %d to level %d\n", |
373 | efx_rx_queue_index(rx_queue), fill_level, | 334 | efx_rx_queue_index(rx_queue), fill_level, |
374 | rx_queue->max_fill, | 335 | rx_queue->max_fill); |
375 | channel->rx_alloc_push_pages ? "page" : "skb"); | 336 | |
376 | 337 | ||
377 | do { | 338 | do { |
378 | if (channel->rx_alloc_push_pages) | 339 | rc = efx_init_rx_buffers(rx_queue); |
379 | rc = efx_init_rx_buffers_page(rx_queue); | ||
380 | else | ||
381 | rc = efx_init_rx_buffers_skb(rx_queue); | ||
382 | if (unlikely(rc)) { | 340 | if (unlikely(rc)) { |
383 | /* Ensure that we don't leave the rx queue empty */ | 341 | /* Ensure that we don't leave the rx queue empty */ |
384 | if (rx_queue->added_count == rx_queue->removed_count) | 342 | if (rx_queue->added_count == rx_queue->removed_count) |
385 | efx_schedule_slow_fill(rx_queue); | 343 | efx_schedule_slow_fill(rx_queue); |
386 | goto out; | 344 | goto out; |
387 | } | 345 | } |
388 | } while ((space -= EFX_RX_BATCH) >= EFX_RX_BATCH); | 346 | } while ((space -= batch_size) >= batch_size); |
389 | 347 | ||
390 | netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, | 348 | netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, |
391 | "RX queue %d fast-filled descriptor ring " | 349 | "RX queue %d fast-filled descriptor ring " |
@@ -408,7 +366,7 @@ void efx_rx_slow_fill(unsigned long context) | |||
408 | 366 | ||
409 | static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, | 367 | static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, |
410 | struct efx_rx_buffer *rx_buf, | 368 | struct efx_rx_buffer *rx_buf, |
411 | int len, bool *leak_packet) | 369 | int len) |
412 | { | 370 | { |
413 | struct efx_nic *efx = rx_queue->efx; | 371 | struct efx_nic *efx = rx_queue->efx; |
414 | unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding; | 372 | unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding; |
@@ -428,11 +386,6 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, | |||
428 | "RX event (0x%x > 0x%x+0x%x). Leaking\n", | 386 | "RX event (0x%x > 0x%x+0x%x). Leaking\n", |
429 | efx_rx_queue_index(rx_queue), len, max_len, | 387 | efx_rx_queue_index(rx_queue), len, max_len, |
430 | efx->type->rx_buffer_padding); | 388 | efx->type->rx_buffer_padding); |
431 | /* If this buffer was skb-allocated, then the meta | ||
432 | * data at the end of the skb will be trashed. So | ||
433 | * we have no choice but to leak the fragment. | ||
434 | */ | ||
435 | *leak_packet = !(rx_buf->flags & EFX_RX_BUF_PAGE); | ||
436 | efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY); | 389 | efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY); |
437 | } else { | 390 | } else { |
438 | if (net_ratelimit()) | 391 | if (net_ratelimit()) |
@@ -448,212 +401,238 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, | |||
448 | /* Pass a received packet up through GRO. GRO can handle pages | 401 | /* Pass a received packet up through GRO. GRO can handle pages |
449 | * regardless of checksum state and skbs with a good checksum. | 402 | * regardless of checksum state and skbs with a good checksum. |
450 | */ | 403 | */ |
451 | static void efx_rx_packet_gro(struct efx_channel *channel, | 404 | static void |
452 | struct efx_rx_buffer *rx_buf, | 405 | efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, |
453 | const u8 *eh) | 406 | unsigned int n_frags, u8 *eh) |
454 | { | 407 | { |
455 | struct napi_struct *napi = &channel->napi_str; | 408 | struct napi_struct *napi = &channel->napi_str; |
456 | gro_result_t gro_result; | 409 | gro_result_t gro_result; |
410 | struct efx_nic *efx = channel->efx; | ||
411 | struct sk_buff *skb; | ||
457 | 412 | ||
458 | if (rx_buf->flags & EFX_RX_BUF_PAGE) { | 413 | skb = napi_get_frags(napi); |
459 | struct efx_nic *efx = channel->efx; | 414 | if (unlikely(!skb)) { |
460 | struct page *page = rx_buf->u.page; | 415 | while (n_frags--) { |
461 | struct sk_buff *skb; | 416 | put_page(rx_buf->page); |
417 | rx_buf->page = NULL; | ||
418 | rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); | ||
419 | } | ||
420 | return; | ||
421 | } | ||
462 | 422 | ||
463 | rx_buf->u.page = NULL; | 423 | if (efx->net_dev->features & NETIF_F_RXHASH) |
424 | skb->rxhash = efx_rx_buf_hash(eh); | ||
425 | skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? | ||
426 | CHECKSUM_UNNECESSARY : CHECKSUM_NONE); | ||
427 | |||
428 | for (;;) { | ||
429 | skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, | ||
430 | rx_buf->page, rx_buf->page_offset, | ||
431 | rx_buf->len); | ||
432 | rx_buf->page = NULL; | ||
433 | skb->len += rx_buf->len; | ||
434 | if (skb_shinfo(skb)->nr_frags == n_frags) | ||
435 | break; | ||
436 | |||
437 | rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); | ||
438 | } | ||
464 | 439 | ||
465 | skb = napi_get_frags(napi); | 440 | skb->data_len = skb->len; |
466 | if (!skb) { | 441 | skb->truesize += n_frags * efx->rx_buffer_truesize; |
467 | put_page(page); | 442 | |
468 | return; | 443 | skb_record_rx_queue(skb, channel->rx_queue.core_index); |
469 | } | 444 | |
445 | gro_result = napi_gro_frags(napi); | ||
446 | if (gro_result != GRO_DROP) | ||
447 | channel->irq_mod_score += 2; | ||
448 | } | ||
470 | 449 | ||
471 | if (efx->net_dev->features & NETIF_F_RXHASH) | 450 | /* Allocate and construct an SKB around page fragments */ |
472 | skb->rxhash = efx_rx_buf_hash(eh); | 451 | static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel, |
452 | struct efx_rx_buffer *rx_buf, | ||
453 | unsigned int n_frags, | ||
454 | u8 *eh, int hdr_len) | ||
455 | { | ||
456 | struct efx_nic *efx = channel->efx; | ||
457 | struct sk_buff *skb; | ||
473 | 458 | ||
474 | skb_fill_page_desc(skb, 0, page, | 459 | /* Allocate an SKB to store the headers */ |
475 | efx_rx_buf_offset(efx, rx_buf), rx_buf->len); | 460 | skb = netdev_alloc_skb(efx->net_dev, hdr_len + EFX_PAGE_SKB_ALIGN); |
461 | if (unlikely(skb == NULL)) | ||
462 | return NULL; | ||
476 | 463 | ||
477 | skb->len = rx_buf->len; | 464 | EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len); |
478 | skb->data_len = rx_buf->len; | ||
479 | skb->truesize += rx_buf->len; | ||
480 | skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? | ||
481 | CHECKSUM_UNNECESSARY : CHECKSUM_NONE); | ||
482 | 465 | ||
483 | skb_record_rx_queue(skb, channel->rx_queue.core_index); | 466 | skb_reserve(skb, EFX_PAGE_SKB_ALIGN); |
467 | memcpy(__skb_put(skb, hdr_len), eh, hdr_len); | ||
484 | 468 | ||
485 | gro_result = napi_gro_frags(napi); | 469 | /* Append the remaining page(s) onto the frag list */ |
486 | } else { | 470 | if (rx_buf->len > hdr_len) { |
487 | struct sk_buff *skb = rx_buf->u.skb; | 471 | rx_buf->page_offset += hdr_len; |
472 | rx_buf->len -= hdr_len; | ||
488 | 473 | ||
489 | EFX_BUG_ON_PARANOID(!(rx_buf->flags & EFX_RX_PKT_CSUMMED)); | 474 | for (;;) { |
490 | rx_buf->u.skb = NULL; | 475 | skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, |
491 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 476 | rx_buf->page, rx_buf->page_offset, |
477 | rx_buf->len); | ||
478 | rx_buf->page = NULL; | ||
479 | skb->len += rx_buf->len; | ||
480 | skb->data_len += rx_buf->len; | ||
481 | if (skb_shinfo(skb)->nr_frags == n_frags) | ||
482 | break; | ||
492 | 483 | ||
493 | gro_result = napi_gro_receive(napi, skb); | 484 | rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); |
485 | } | ||
486 | } else { | ||
487 | __free_pages(rx_buf->page, efx->rx_buffer_order); | ||
488 | rx_buf->page = NULL; | ||
489 | n_frags = 0; | ||
494 | } | 490 | } |
495 | 491 | ||
496 | if (gro_result == GRO_NORMAL) { | 492 | skb->truesize += n_frags * efx->rx_buffer_truesize; |
497 | channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; | 493 | |
498 | } else if (gro_result != GRO_DROP) { | 494 | /* Move past the ethernet header */ |
499 | channel->rx_alloc_level += RX_ALLOC_FACTOR_GRO; | 495 | skb->protocol = eth_type_trans(skb, efx->net_dev); |
500 | channel->irq_mod_score += 2; | 496 | |
501 | } | 497 | return skb; |
502 | } | 498 | } |
503 | 499 | ||
504 | void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, | 500 | void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, |
505 | unsigned int len, u16 flags) | 501 | unsigned int n_frags, unsigned int len, u16 flags) |
506 | { | 502 | { |
507 | struct efx_nic *efx = rx_queue->efx; | 503 | struct efx_nic *efx = rx_queue->efx; |
508 | struct efx_channel *channel = efx_rx_queue_channel(rx_queue); | 504 | struct efx_channel *channel = efx_rx_queue_channel(rx_queue); |
509 | struct efx_rx_buffer *rx_buf; | 505 | struct efx_rx_buffer *rx_buf; |
510 | bool leak_packet = false; | ||
511 | 506 | ||
512 | rx_buf = efx_rx_buffer(rx_queue, index); | 507 | rx_buf = efx_rx_buffer(rx_queue, index); |
513 | rx_buf->flags |= flags; | 508 | rx_buf->flags |= flags; |
514 | 509 | ||
515 | /* This allows the refill path to post another buffer. | 510 | /* Validate the number of fragments and completed length */ |
516 | * EFX_RXD_HEAD_ROOM ensures that the slot we are using | 511 | if (n_frags == 1) { |
517 | * isn't overwritten yet. | 512 | efx_rx_packet__check_len(rx_queue, rx_buf, len); |
518 | */ | 513 | } else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) || |
519 | rx_queue->removed_count++; | 514 | unlikely(len <= (n_frags - 1) * EFX_RX_USR_BUF_SIZE) || |
520 | 515 | unlikely(len > n_frags * EFX_RX_USR_BUF_SIZE) || | |
521 | /* Validate the length encoded in the event vs the descriptor pushed */ | 516 | unlikely(!efx->rx_scatter)) { |
522 | efx_rx_packet__check_len(rx_queue, rx_buf, len, &leak_packet); | 517 | /* If this isn't an explicit discard request, either |
518 | * the hardware or the driver is broken. | ||
519 | */ | ||
520 | WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD)); | ||
521 | rx_buf->flags |= EFX_RX_PKT_DISCARD; | ||
522 | } | ||
523 | 523 | ||
524 | netif_vdbg(efx, rx_status, efx->net_dev, | 524 | netif_vdbg(efx, rx_status, efx->net_dev, |
525 | "RX queue %d received id %x at %llx+%x %s%s\n", | 525 | "RX queue %d received ids %x-%x len %d %s%s\n", |
526 | efx_rx_queue_index(rx_queue), index, | 526 | efx_rx_queue_index(rx_queue), index, |
527 | (unsigned long long)rx_buf->dma_addr, len, | 527 | (index + n_frags - 1) & rx_queue->ptr_mask, len, |
528 | (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "", | 528 | (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "", |
529 | (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : ""); | 529 | (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : ""); |
530 | 530 | ||
531 | /* Discard packet, if instructed to do so */ | 531 | /* Discard packet, if instructed to do so. Process the |
532 | * previous receive first. | ||
533 | */ | ||
532 | if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { | 534 | if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { |
533 | if (unlikely(leak_packet)) | 535 | efx_rx_flush_packet(channel); |
534 | channel->n_skbuff_leaks++; | 536 | put_page(rx_buf->page); |
535 | else | 537 | efx_recycle_rx_buffers(channel, rx_buf, n_frags); |
536 | efx_recycle_rx_buffer(channel, rx_buf); | 538 | return; |
537 | |||
538 | /* Don't hold off the previous receive */ | ||
539 | rx_buf = NULL; | ||
540 | goto out; | ||
541 | } | 539 | } |
542 | 540 | ||
543 | /* Release and/or sync DMA mapping - assumes all RX buffers | 541 | if (n_frags == 1) |
544 | * consumed in-order per RX queue | 542 | rx_buf->len = len; |
543 | |||
544 | /* Release and/or sync the DMA mapping - assumes all RX buffers | ||
545 | * consumed in-order per RX queue. | ||
545 | */ | 546 | */ |
546 | efx_unmap_rx_buffer(efx, rx_buf, len); | 547 | efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); |
547 | 548 | ||
548 | /* Prefetch nice and early so data will (hopefully) be in cache by | 549 | /* Prefetch nice and early so data will (hopefully) be in cache by |
549 | * the time we look at it. | 550 | * the time we look at it. |
550 | */ | 551 | */ |
551 | prefetch(efx_rx_buf_eh(efx, rx_buf)); | 552 | prefetch(efx_rx_buf_va(rx_buf)); |
553 | |||
554 | rx_buf->page_offset += efx->type->rx_buffer_hash_size; | ||
555 | rx_buf->len -= efx->type->rx_buffer_hash_size; | ||
556 | |||
557 | if (n_frags > 1) { | ||
558 | /* Release/sync DMA mapping for additional fragments. | ||
559 | * Fix length for last fragment. | ||
560 | */ | ||
561 | unsigned int tail_frags = n_frags - 1; | ||
562 | |||
563 | for (;;) { | ||
564 | rx_buf = efx_rx_buf_next(rx_queue, rx_buf); | ||
565 | if (--tail_frags == 0) | ||
566 | break; | ||
567 | efx_sync_rx_buffer(efx, rx_buf, EFX_RX_USR_BUF_SIZE); | ||
568 | } | ||
569 | rx_buf->len = len - (n_frags - 1) * EFX_RX_USR_BUF_SIZE; | ||
570 | efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); | ||
571 | } | ||
572 | |||
573 | /* All fragments have been DMA-synced, so recycle buffers and pages. */ | ||
574 | rx_buf = efx_rx_buffer(rx_queue, index); | ||
575 | efx_recycle_rx_buffers(channel, rx_buf, n_frags); | ||
552 | 576 | ||
553 | /* Pipeline receives so that we give time for packet headers to be | 577 | /* Pipeline receives so that we give time for packet headers to be |
554 | * prefetched into cache. | 578 | * prefetched into cache. |
555 | */ | 579 | */ |
556 | rx_buf->len = len - efx->type->rx_buffer_hash_size; | 580 | efx_rx_flush_packet(channel); |
557 | out: | 581 | channel->rx_pkt_n_frags = n_frags; |
558 | if (channel->rx_pkt) | 582 | channel->rx_pkt_index = index; |
559 | __efx_rx_packet(channel, channel->rx_pkt); | ||
560 | channel->rx_pkt = rx_buf; | ||
561 | } | 583 | } |
562 | 584 | ||
563 | static void efx_rx_deliver(struct efx_channel *channel, | 585 | static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, |
564 | struct efx_rx_buffer *rx_buf) | 586 | struct efx_rx_buffer *rx_buf, |
587 | unsigned int n_frags) | ||
565 | { | 588 | { |
566 | struct sk_buff *skb; | 589 | struct sk_buff *skb; |
590 | u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS); | ||
567 | 591 | ||
568 | /* We now own the SKB */ | 592 | skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len); |
569 | skb = rx_buf->u.skb; | 593 | if (unlikely(skb == NULL)) { |
570 | rx_buf->u.skb = NULL; | 594 | efx_free_rx_buffer(rx_buf); |
595 | return; | ||
596 | } | ||
597 | skb_record_rx_queue(skb, channel->rx_queue.core_index); | ||
571 | 598 | ||
572 | /* Set the SKB flags */ | 599 | /* Set the SKB flags */ |
573 | skb_checksum_none_assert(skb); | 600 | skb_checksum_none_assert(skb); |
574 | 601 | ||
575 | /* Record the rx_queue */ | ||
576 | skb_record_rx_queue(skb, channel->rx_queue.core_index); | ||
577 | |||
578 | /* Pass the packet up */ | ||
579 | if (channel->type->receive_skb) | 602 | if (channel->type->receive_skb) |
580 | channel->type->receive_skb(channel, skb); | 603 | if (channel->type->receive_skb(channel, skb)) |
581 | else | 604 | return; |
582 | netif_receive_skb(skb); | ||
583 | 605 | ||
584 | /* Update allocation strategy method */ | 606 | /* Pass the packet up */ |
585 | channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; | 607 | netif_receive_skb(skb); |
586 | } | 608 | } |
587 | 609 | ||
588 | /* Handle a received packet. Second half: Touches packet payload. */ | 610 | /* Handle a received packet. Second half: Touches packet payload. */ |
589 | void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf) | 611 | void __efx_rx_packet(struct efx_channel *channel) |
590 | { | 612 | { |
591 | struct efx_nic *efx = channel->efx; | 613 | struct efx_nic *efx = channel->efx; |
592 | u8 *eh = efx_rx_buf_eh(efx, rx_buf); | 614 | struct efx_rx_buffer *rx_buf = |
615 | efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index); | ||
616 | u8 *eh = efx_rx_buf_va(rx_buf); | ||
593 | 617 | ||
594 | /* If we're in loopback test, then pass the packet directly to the | 618 | /* If we're in loopback test, then pass the packet directly to the |
595 | * loopback layer, and free the rx_buf here | 619 | * loopback layer, and free the rx_buf here |
596 | */ | 620 | */ |
597 | if (unlikely(efx->loopback_selftest)) { | 621 | if (unlikely(efx->loopback_selftest)) { |
598 | efx_loopback_rx_packet(efx, eh, rx_buf->len); | 622 | efx_loopback_rx_packet(efx, eh, rx_buf->len); |
599 | efx_free_rx_buffer(efx, rx_buf); | 623 | efx_free_rx_buffer(rx_buf); |
600 | return; | 624 | goto out; |
601 | } | ||
602 | |||
603 | if (!(rx_buf->flags & EFX_RX_BUF_PAGE)) { | ||
604 | struct sk_buff *skb = rx_buf->u.skb; | ||
605 | |||
606 | prefetch(skb_shinfo(skb)); | ||
607 | |||
608 | skb_reserve(skb, efx->type->rx_buffer_hash_size); | ||
609 | skb_put(skb, rx_buf->len); | ||
610 | |||
611 | if (efx->net_dev->features & NETIF_F_RXHASH) | ||
612 | skb->rxhash = efx_rx_buf_hash(eh); | ||
613 | |||
614 | /* Move past the ethernet header. rx_buf->data still points | ||
615 | * at the ethernet header */ | ||
616 | skb->protocol = eth_type_trans(skb, efx->net_dev); | ||
617 | |||
618 | skb_record_rx_queue(skb, channel->rx_queue.core_index); | ||
619 | } | 625 | } |
620 | 626 | ||
621 | if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) | 627 | if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) |
622 | rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; | 628 | rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; |
623 | 629 | ||
624 | if (likely(rx_buf->flags & (EFX_RX_BUF_PAGE | EFX_RX_PKT_CSUMMED)) && | 630 | if (!channel->type->receive_skb) |
625 | !channel->type->receive_skb) | 631 | efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh); |
626 | efx_rx_packet_gro(channel, rx_buf, eh); | ||
627 | else | 632 | else |
628 | efx_rx_deliver(channel, rx_buf); | 633 | efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags); |
629 | } | 634 | out: |
630 | 635 | channel->rx_pkt_n_frags = 0; | |
631 | void efx_rx_strategy(struct efx_channel *channel) | ||
632 | { | ||
633 | enum efx_rx_alloc_method method = rx_alloc_method; | ||
634 | |||
635 | if (channel->type->receive_skb) { | ||
636 | channel->rx_alloc_push_pages = false; | ||
637 | return; | ||
638 | } | ||
639 | |||
640 | /* Only makes sense to use page based allocation if GRO is enabled */ | ||
641 | if (!(channel->efx->net_dev->features & NETIF_F_GRO)) { | ||
642 | method = RX_ALLOC_METHOD_SKB; | ||
643 | } else if (method == RX_ALLOC_METHOD_AUTO) { | ||
644 | /* Constrain the rx_alloc_level */ | ||
645 | if (channel->rx_alloc_level < 0) | ||
646 | channel->rx_alloc_level = 0; | ||
647 | else if (channel->rx_alloc_level > RX_ALLOC_LEVEL_MAX) | ||
648 | channel->rx_alloc_level = RX_ALLOC_LEVEL_MAX; | ||
649 | |||
650 | /* Decide on the allocation method */ | ||
651 | method = ((channel->rx_alloc_level > RX_ALLOC_LEVEL_GRO) ? | ||
652 | RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB); | ||
653 | } | ||
654 | |||
655 | /* Push the option */ | ||
656 | channel->rx_alloc_push_pages = (method == RX_ALLOC_METHOD_PAGE); | ||
657 | } | 636 | } |
658 | 637 | ||
659 | int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) | 638 | int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) |
@@ -683,9 +662,32 @@ int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) | |||
683 | kfree(rx_queue->buffer); | 662 | kfree(rx_queue->buffer); |
684 | rx_queue->buffer = NULL; | 663 | rx_queue->buffer = NULL; |
685 | } | 664 | } |
665 | |||
686 | return rc; | 666 | return rc; |
687 | } | 667 | } |
688 | 668 | ||
669 | void efx_init_rx_recycle_ring(struct efx_nic *efx, | ||
670 | struct efx_rx_queue *rx_queue) | ||
671 | { | ||
672 | unsigned int bufs_in_recycle_ring, page_ring_size; | ||
673 | |||
674 | /* Set the RX recycle ring size */ | ||
675 | #ifdef CONFIG_PPC64 | ||
676 | bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; | ||
677 | #else | ||
678 | if (efx->pci_dev->dev.iommu_group) | ||
679 | bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; | ||
680 | else | ||
681 | bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU; | ||
682 | #endif /* CONFIG_PPC64 */ | ||
683 | |||
684 | page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring / | ||
685 | efx->rx_bufs_per_page); | ||
686 | rx_queue->page_ring = kcalloc(page_ring_size, | ||
687 | sizeof(*rx_queue->page_ring), GFP_KERNEL); | ||
688 | rx_queue->page_ptr_mask = page_ring_size - 1; | ||
689 | } | ||
690 | |||
689 | void efx_init_rx_queue(struct efx_rx_queue *rx_queue) | 691 | void efx_init_rx_queue(struct efx_rx_queue *rx_queue) |
690 | { | 692 | { |
691 | struct efx_nic *efx = rx_queue->efx; | 693 | struct efx_nic *efx = rx_queue->efx; |
@@ -699,10 +701,18 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue) | |||
699 | rx_queue->notified_count = 0; | 701 | rx_queue->notified_count = 0; |
700 | rx_queue->removed_count = 0; | 702 | rx_queue->removed_count = 0; |
701 | rx_queue->min_fill = -1U; | 703 | rx_queue->min_fill = -1U; |
704 | efx_init_rx_recycle_ring(efx, rx_queue); | ||
705 | |||
706 | rx_queue->page_remove = 0; | ||
707 | rx_queue->page_add = rx_queue->page_ptr_mask + 1; | ||
708 | rx_queue->page_recycle_count = 0; | ||
709 | rx_queue->page_recycle_failed = 0; | ||
710 | rx_queue->page_recycle_full = 0; | ||
702 | 711 | ||
703 | /* Initialise limit fields */ | 712 | /* Initialise limit fields */ |
704 | max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM; | 713 | max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM; |
705 | max_trigger = max_fill - EFX_RX_BATCH; | 714 | max_trigger = |
715 | max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page; | ||
706 | if (rx_refill_threshold != 0) { | 716 | if (rx_refill_threshold != 0) { |
707 | trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; | 717 | trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; |
708 | if (trigger > max_trigger) | 718 | if (trigger > max_trigger) |
@@ -722,6 +732,7 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue) | |||
722 | void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) | 732 | void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) |
723 | { | 733 | { |
724 | int i; | 734 | int i; |
735 | struct efx_nic *efx = rx_queue->efx; | ||
725 | struct efx_rx_buffer *rx_buf; | 736 | struct efx_rx_buffer *rx_buf; |
726 | 737 | ||
727 | netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, | 738 | netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, |
@@ -733,13 +744,32 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) | |||
733 | del_timer_sync(&rx_queue->slow_fill); | 744 | del_timer_sync(&rx_queue->slow_fill); |
734 | efx_nic_fini_rx(rx_queue); | 745 | efx_nic_fini_rx(rx_queue); |
735 | 746 | ||
736 | /* Release RX buffers NB start at index 0 not current HW ptr */ | 747 | /* Release RX buffers from the current read ptr to the write ptr */ |
737 | if (rx_queue->buffer) { | 748 | if (rx_queue->buffer) { |
738 | for (i = 0; i <= rx_queue->ptr_mask; i++) { | 749 | for (i = rx_queue->removed_count; i < rx_queue->added_count; |
739 | rx_buf = efx_rx_buffer(rx_queue, i); | 750 | i++) { |
751 | unsigned index = i & rx_queue->ptr_mask; | ||
752 | rx_buf = efx_rx_buffer(rx_queue, index); | ||
740 | efx_fini_rx_buffer(rx_queue, rx_buf); | 753 | efx_fini_rx_buffer(rx_queue, rx_buf); |
741 | } | 754 | } |
742 | } | 755 | } |
756 | |||
757 | /* Unmap and release the pages in the recycle ring. Remove the ring. */ | ||
758 | for (i = 0; i <= rx_queue->page_ptr_mask; i++) { | ||
759 | struct page *page = rx_queue->page_ring[i]; | ||
760 | struct efx_rx_page_state *state; | ||
761 | |||
762 | if (page == NULL) | ||
763 | continue; | ||
764 | |||
765 | state = page_address(page); | ||
766 | dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, | ||
767 | PAGE_SIZE << efx->rx_buffer_order, | ||
768 | DMA_FROM_DEVICE); | ||
769 | put_page(page); | ||
770 | } | ||
771 | kfree(rx_queue->page_ring); | ||
772 | rx_queue->page_ring = NULL; | ||
743 | } | 773 | } |
744 | 774 | ||
745 | void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) | 775 | void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) |
@@ -754,9 +784,6 @@ void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) | |||
754 | } | 784 | } |
755 | 785 | ||
756 | 786 | ||
757 | module_param(rx_alloc_method, int, 0644); | ||
758 | MODULE_PARM_DESC(rx_alloc_method, "Allocation method used for RX buffers"); | ||
759 | |||
760 | module_param(rx_refill_threshold, uint, 0444); | 787 | module_param(rx_refill_threshold, uint, 0444); |
761 | MODULE_PARM_DESC(rx_refill_threshold, | 788 | MODULE_PARM_DESC(rx_refill_threshold, |
762 | "RX descriptor ring refill threshold (%)"); | 789 | "RX descriptor ring refill threshold (%)"); |
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index ba40f67e4f05..51669244d154 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c | |||
@@ -202,7 +202,7 @@ out: | |||
202 | 202 | ||
203 | static enum reset_type siena_map_reset_reason(enum reset_type reason) | 203 | static enum reset_type siena_map_reset_reason(enum reset_type reason) |
204 | { | 204 | { |
205 | return RESET_TYPE_ALL; | 205 | return RESET_TYPE_RECOVER_OR_ALL; |
206 | } | 206 | } |
207 | 207 | ||
208 | static int siena_map_reset_flags(u32 *flags) | 208 | static int siena_map_reset_flags(u32 *flags) |
@@ -245,6 +245,22 @@ static int siena_reset_hw(struct efx_nic *efx, enum reset_type method) | |||
245 | return efx_mcdi_reset_port(efx); | 245 | return efx_mcdi_reset_port(efx); |
246 | } | 246 | } |
247 | 247 | ||
248 | #ifdef CONFIG_EEH | ||
249 | /* When a PCI device is isolated from the bus, a subsequent MMIO read is | ||
250 | * required for the kernel EEH mechanisms to notice. As the Solarflare driver | ||
251 | * was written to minimise MMIO read (for latency) then a periodic call to check | ||
252 | * the EEH status of the device is required so that device recovery can happen | ||
253 | * in a timely fashion. | ||
254 | */ | ||
255 | static void siena_monitor(struct efx_nic *efx) | ||
256 | { | ||
257 | struct eeh_dev *eehdev = | ||
258 | of_node_to_eeh_dev(pci_device_to_OF_node(efx->pci_dev)); | ||
259 | |||
260 | eeh_dev_check_failure(eehdev); | ||
261 | } | ||
262 | #endif | ||
263 | |||
248 | static int siena_probe_nvconfig(struct efx_nic *efx) | 264 | static int siena_probe_nvconfig(struct efx_nic *efx) |
249 | { | 265 | { |
250 | u32 caps = 0; | 266 | u32 caps = 0; |
@@ -398,6 +414,8 @@ static int siena_init_nic(struct efx_nic *efx) | |||
398 | EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_INSRT_HDR, 1); | 414 | EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_INSRT_HDR, 1); |
399 | EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_ALG, 1); | 415 | EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_ALG, 1); |
400 | EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_IP_HASH, 1); | 416 | EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_IP_HASH, 1); |
417 | EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_USR_BUF_SIZE, | ||
418 | EFX_RX_USR_BUF_SIZE >> 5); | ||
401 | efx_writeo(efx, &temp, FR_AZ_RX_CFG); | 419 | efx_writeo(efx, &temp, FR_AZ_RX_CFG); |
402 | 420 | ||
403 | /* Set hash key for IPv4 */ | 421 | /* Set hash key for IPv4 */ |
@@ -665,7 +683,11 @@ const struct efx_nic_type siena_a0_nic_type = { | |||
665 | .init = siena_init_nic, | 683 | .init = siena_init_nic, |
666 | .dimension_resources = siena_dimension_resources, | 684 | .dimension_resources = siena_dimension_resources, |
667 | .fini = efx_port_dummy_op_void, | 685 | .fini = efx_port_dummy_op_void, |
686 | #ifdef CONFIG_EEH | ||
687 | .monitor = siena_monitor, | ||
688 | #else | ||
668 | .monitor = NULL, | 689 | .monitor = NULL, |
690 | #endif | ||
669 | .map_reset_reason = siena_map_reset_reason, | 691 | .map_reset_reason = siena_map_reset_reason, |
670 | .map_reset_flags = siena_map_reset_flags, | 692 | .map_reset_flags = siena_map_reset_flags, |
671 | .reset = siena_reset_hw, | 693 | .reset = siena_reset_hw, |
@@ -698,6 +720,7 @@ const struct efx_nic_type siena_a0_nic_type = { | |||
698 | .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), | 720 | .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), |
699 | .rx_buffer_hash_size = 0x10, | 721 | .rx_buffer_hash_size = 0x10, |
700 | .rx_buffer_padding = 0, | 722 | .rx_buffer_padding = 0, |
723 | .can_rx_scatter = true, | ||
701 | .max_interrupt_mode = EFX_INT_MODE_MSIX, | 724 | .max_interrupt_mode = EFX_INT_MODE_MSIX, |
702 | .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy | 725 | .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy |
703 | * interrupt handler only supports 32 | 726 | * interrupt handler only supports 32 |