diff options
author | Edward Cree <ecree@solarflare.com> | 2014-04-16 14:27:48 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-04-16 14:33:57 -0400 |
commit | e283546c0465dd3026bc94f7b1a9de7f6b8969ec (patch) | |
tree | 3828d4faeed3986b0f01b93416b910b11cd33280 /drivers/net | |
parent | 10ec34fcb100412ab186c141a9c3557d1270effd (diff) |
sfc:On MCDI timeout, issue an FLR (and mark MCDI to fail-fast)
When an MCDI command times out (whether or not we find it
completed when we poll), call efx_mcdi_abandon(), which tells
all subsequent MCDI calls to fail-fast, and queues up an FLR.
Because an FLR doesn't lead to receiving any reboot even from
the MC (unlike most other types of reset), we have to call
efx_ef10_reset_mc_allocations.
In efx_start_all(), if a reset (of any kind) is pending, we
bail out.
Without this, attempts to reconfigure (e.g. change mtu) can
cause driver/mc state inconsistency if the first MCDI call
triggers an FLR.
For similar reasons, on EF10, in
efx_reset_down(method=RESET_TYPE_MCDI_TIMEOUT), set the number
of active queues to zero before calling efx_stop_all().
And, on farch, in efx_reset_up(method=RESET_TYPE_MCDI_TIMEOUT),
set active_queues and flushes pending & outstanding to zero.
efx_mcdi_mode_{poll,event}() should not take us out of fail-fast
mode. Instead, this is done by efx_mcdi_reset() after the FLR
completes.
The new FLR reset_type RESET_TYPE_MCDI_TIMEOUT doesn't really
fit into the hierarchy of reset 'scopes' whereby efx_reset()
decides some resets subsume others. Thus, it uses separate logic.
Also, fixed up some inconsistency around RESET_TYPE_MC_BIST,
which was in the wrong place in that hierarchy.
Signed-off-by: Shradha Shah <sshah@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r-- | drivers/net/ethernet/sfc/ef10.c | 12 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/efx.c | 19 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/enum.h | 23 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/falcon.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/farch.c | 22 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi.c | 55 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi.h | 13 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/net_driver.h | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/nic.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/siena.c | 2 |
10 files changed, 133 insertions, 22 deletions
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 21c20ea0dad0..b5ed30a39144 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c | |||
@@ -738,8 +738,11 @@ static int efx_ef10_reset(struct efx_nic *efx, enum reset_type reset_type) | |||
738 | /* If it was a port reset, trigger reallocation of MC resources. | 738 | /* If it was a port reset, trigger reallocation of MC resources. |
739 | * Note that on an MC reset nothing needs to be done now because we'll | 739 | * Note that on an MC reset nothing needs to be done now because we'll |
740 | * detect the MC reset later and handle it then. | 740 | * detect the MC reset later and handle it then. |
741 | * For an FLR, we never get an MC reset event, but the MC has reset all | ||
742 | * resources assigned to us, so we have to trigger reallocation now. | ||
741 | */ | 743 | */ |
742 | if (reset_type == RESET_TYPE_ALL && !rc) | 744 | if ((reset_type == RESET_TYPE_ALL || |
745 | reset_type == RESET_TYPE_MCDI_TIMEOUT) && !rc) | ||
743 | efx_ef10_reset_mc_allocations(efx); | 746 | efx_ef10_reset_mc_allocations(efx); |
744 | return rc; | 747 | return rc; |
745 | } | 748 | } |
@@ -2141,6 +2144,11 @@ static int efx_ef10_fini_dmaq(struct efx_nic *efx) | |||
2141 | return 0; | 2144 | return 0; |
2142 | } | 2145 | } |
2143 | 2146 | ||
2147 | static void efx_ef10_prepare_flr(struct efx_nic *efx) | ||
2148 | { | ||
2149 | atomic_set(&efx->active_queues, 0); | ||
2150 | } | ||
2151 | |||
2144 | static bool efx_ef10_filter_equal(const struct efx_filter_spec *left, | 2152 | static bool efx_ef10_filter_equal(const struct efx_filter_spec *left, |
2145 | const struct efx_filter_spec *right) | 2153 | const struct efx_filter_spec *right) |
2146 | { | 2154 | { |
@@ -3603,6 +3611,8 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { | |||
3603 | .probe_port = efx_mcdi_port_probe, | 3611 | .probe_port = efx_mcdi_port_probe, |
3604 | .remove_port = efx_mcdi_port_remove, | 3612 | .remove_port = efx_mcdi_port_remove, |
3605 | .fini_dmaq = efx_ef10_fini_dmaq, | 3613 | .fini_dmaq = efx_ef10_fini_dmaq, |
3614 | .prepare_flr = efx_ef10_prepare_flr, | ||
3615 | .finish_flr = efx_port_dummy_op_void, | ||
3606 | .describe_stats = efx_ef10_describe_stats, | 3616 | .describe_stats = efx_ef10_describe_stats, |
3607 | .update_stats = efx_ef10_update_stats, | 3617 | .update_stats = efx_ef10_update_stats, |
3608 | .start_stats = efx_mcdi_mac_start_stats, | 3618 | .start_stats = efx_mcdi_mac_start_stats, |
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 57b971e5e6b2..63d595fd3cc5 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c | |||
@@ -76,6 +76,7 @@ const char *const efx_reset_type_names[] = { | |||
76 | [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", | 76 | [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", |
77 | [RESET_TYPE_WORLD] = "WORLD", | 77 | [RESET_TYPE_WORLD] = "WORLD", |
78 | [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", | 78 | [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", |
79 | [RESET_TYPE_MC_BIST] = "MC_BIST", | ||
79 | [RESET_TYPE_DISABLE] = "DISABLE", | 80 | [RESET_TYPE_DISABLE] = "DISABLE", |
80 | [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", | 81 | [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", |
81 | [RESET_TYPE_INT_ERROR] = "INT_ERROR", | 82 | [RESET_TYPE_INT_ERROR] = "INT_ERROR", |
@@ -83,7 +84,7 @@ const char *const efx_reset_type_names[] = { | |||
83 | [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", | 84 | [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", |
84 | [RESET_TYPE_TX_SKIP] = "TX_SKIP", | 85 | [RESET_TYPE_TX_SKIP] = "TX_SKIP", |
85 | [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", | 86 | [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", |
86 | [RESET_TYPE_MC_BIST] = "MC_BIST", | 87 | [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", |
87 | }; | 88 | }; |
88 | 89 | ||
89 | /* Reset workqueue. If any NIC has a hardware failure then a reset will be | 90 | /* Reset workqueue. If any NIC has a hardware failure then a reset will be |
@@ -1739,7 +1740,8 @@ static void efx_start_all(struct efx_nic *efx) | |||
1739 | 1740 | ||
1740 | /* Check that it is appropriate to restart the interface. All | 1741 | /* Check that it is appropriate to restart the interface. All |
1741 | * of these flags are safe to read under just the rtnl lock */ | 1742 | * of these flags are safe to read under just the rtnl lock */ |
1742 | if (efx->port_enabled || !netif_running(efx->net_dev)) | 1743 | if (efx->port_enabled || !netif_running(efx->net_dev) || |
1744 | efx->reset_pending) | ||
1743 | return; | 1745 | return; |
1744 | 1746 | ||
1745 | efx_start_port(efx); | 1747 | efx_start_port(efx); |
@@ -2334,6 +2336,9 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method) | |||
2334 | { | 2336 | { |
2335 | EFX_ASSERT_RESET_SERIALISED(efx); | 2337 | EFX_ASSERT_RESET_SERIALISED(efx); |
2336 | 2338 | ||
2339 | if (method == RESET_TYPE_MCDI_TIMEOUT) | ||
2340 | efx->type->prepare_flr(efx); | ||
2341 | |||
2337 | efx_stop_all(efx); | 2342 | efx_stop_all(efx); |
2338 | efx_disable_interrupts(efx); | 2343 | efx_disable_interrupts(efx); |
2339 | 2344 | ||
@@ -2354,6 +2359,10 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) | |||
2354 | 2359 | ||
2355 | EFX_ASSERT_RESET_SERIALISED(efx); | 2360 | EFX_ASSERT_RESET_SERIALISED(efx); |
2356 | 2361 | ||
2362 | if (method == RESET_TYPE_MCDI_TIMEOUT) | ||
2363 | efx->type->finish_flr(efx); | ||
2364 | |||
2365 | /* Ensure that SRAM is initialised even if we're disabling the device */ | ||
2357 | rc = efx->type->init(efx); | 2366 | rc = efx->type->init(efx); |
2358 | if (rc) { | 2367 | if (rc) { |
2359 | netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); | 2368 | netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); |
@@ -2417,7 +2426,10 @@ int efx_reset(struct efx_nic *efx, enum reset_type method) | |||
2417 | /* Clear flags for the scopes we covered. We assume the NIC and | 2426 | /* Clear flags for the scopes we covered. We assume the NIC and |
2418 | * driver are now quiescent so that there is no race here. | 2427 | * driver are now quiescent so that there is no race here. |
2419 | */ | 2428 | */ |
2420 | efx->reset_pending &= -(1 << (method + 1)); | 2429 | if (method < RESET_TYPE_MAX_METHOD) |
2430 | efx->reset_pending &= -(1 << (method + 1)); | ||
2431 | else /* it doesn't fit into the well-ordered scope hierarchy */ | ||
2432 | __clear_bit(method, &efx->reset_pending); | ||
2421 | 2433 | ||
2422 | /* Reinitialise bus-mastering, which may have been turned off before | 2434 | /* Reinitialise bus-mastering, which may have been turned off before |
2423 | * the reset was scheduled. This is still appropriate, even in the | 2435 | * the reset was scheduled. This is still appropriate, even in the |
@@ -2546,6 +2558,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) | |||
2546 | case RESET_TYPE_DISABLE: | 2558 | case RESET_TYPE_DISABLE: |
2547 | case RESET_TYPE_RECOVER_OR_DISABLE: | 2559 | case RESET_TYPE_RECOVER_OR_DISABLE: |
2548 | case RESET_TYPE_MC_BIST: | 2560 | case RESET_TYPE_MC_BIST: |
2561 | case RESET_TYPE_MCDI_TIMEOUT: | ||
2549 | method = type; | 2562 | method = type; |
2550 | netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", | 2563 | netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", |
2551 | RESET_TYPE(method)); | 2564 | RESET_TYPE(method)); |
diff --git a/drivers/net/ethernet/sfc/enum.h b/drivers/net/ethernet/sfc/enum.h index 75ef7ef6450b..d1dbb5fb31bb 100644 --- a/drivers/net/ethernet/sfc/enum.h +++ b/drivers/net/ethernet/sfc/enum.h | |||
@@ -143,6 +143,7 @@ enum efx_loopback_mode { | |||
143 | * @RESET_TYPE_WORLD: Reset as much as possible | 143 | * @RESET_TYPE_WORLD: Reset as much as possible |
144 | * @RESET_TYPE_RECOVER_OR_DISABLE: Try to recover. Apply RESET_TYPE_DISABLE if | 144 | * @RESET_TYPE_RECOVER_OR_DISABLE: Try to recover. Apply RESET_TYPE_DISABLE if |
145 | * unsuccessful. | 145 | * unsuccessful. |
146 | * @RESET_TYPE_MC_BIST: MC entering BIST mode. | ||
146 | * @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled | 147 | * @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled |
147 | * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog | 148 | * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog |
148 | * @RESET_TYPE_INT_ERROR: reset due to internal error | 149 | * @RESET_TYPE_INT_ERROR: reset due to internal error |
@@ -150,14 +151,16 @@ enum efx_loopback_mode { | |||
150 | * @RESET_TYPE_DMA_ERROR: DMA error | 151 | * @RESET_TYPE_DMA_ERROR: DMA error |
151 | * @RESET_TYPE_TX_SKIP: hardware completed empty tx descriptors | 152 | * @RESET_TYPE_TX_SKIP: hardware completed empty tx descriptors |
152 | * @RESET_TYPE_MC_FAILURE: MC reboot/assertion | 153 | * @RESET_TYPE_MC_FAILURE: MC reboot/assertion |
154 | * @RESET_TYPE_MCDI_TIMEOUT: MCDI timeout. | ||
153 | */ | 155 | */ |
154 | enum reset_type { | 156 | enum reset_type { |
155 | RESET_TYPE_INVISIBLE = 0, | 157 | RESET_TYPE_INVISIBLE, |
156 | RESET_TYPE_RECOVER_OR_ALL = 1, | 158 | RESET_TYPE_RECOVER_OR_ALL, |
157 | RESET_TYPE_ALL = 2, | 159 | RESET_TYPE_ALL, |
158 | RESET_TYPE_WORLD = 3, | 160 | RESET_TYPE_WORLD, |
159 | RESET_TYPE_RECOVER_OR_DISABLE = 4, | 161 | RESET_TYPE_RECOVER_OR_DISABLE, |
160 | RESET_TYPE_DISABLE = 5, | 162 | RESET_TYPE_MC_BIST, |
163 | RESET_TYPE_DISABLE, | ||
161 | RESET_TYPE_MAX_METHOD, | 164 | RESET_TYPE_MAX_METHOD, |
162 | RESET_TYPE_TX_WATCHDOG, | 165 | RESET_TYPE_TX_WATCHDOG, |
163 | RESET_TYPE_INT_ERROR, | 166 | RESET_TYPE_INT_ERROR, |
@@ -165,7 +168,13 @@ enum reset_type { | |||
165 | RESET_TYPE_DMA_ERROR, | 168 | RESET_TYPE_DMA_ERROR, |
166 | RESET_TYPE_TX_SKIP, | 169 | RESET_TYPE_TX_SKIP, |
167 | RESET_TYPE_MC_FAILURE, | 170 | RESET_TYPE_MC_FAILURE, |
168 | RESET_TYPE_MC_BIST, | 171 | /* RESET_TYPE_MCDI_TIMEOUT is actually a method, not just a reason, but |
172 | * it doesn't fit the scope hierarchy (not well-ordered by inclusion). | ||
173 | * We encode this by having its enum value be greater than | ||
174 | * RESET_TYPE_MAX_METHOD. This also prevents issuing it with | ||
175 | * efx_ioctl_reset. | ||
176 | */ | ||
177 | RESET_TYPE_MCDI_TIMEOUT, | ||
169 | RESET_TYPE_MAX, | 178 | RESET_TYPE_MAX, |
170 | }; | 179 | }; |
171 | 180 | ||
diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c index 8ec20b713cc6..fae25a418647 100644 --- a/drivers/net/ethernet/sfc/falcon.c +++ b/drivers/net/ethernet/sfc/falcon.c | |||
@@ -2696,6 +2696,8 @@ const struct efx_nic_type falcon_a1_nic_type = { | |||
2696 | .fini_dmaq = efx_farch_fini_dmaq, | 2696 | .fini_dmaq = efx_farch_fini_dmaq, |
2697 | .prepare_flush = falcon_prepare_flush, | 2697 | .prepare_flush = falcon_prepare_flush, |
2698 | .finish_flush = efx_port_dummy_op_void, | 2698 | .finish_flush = efx_port_dummy_op_void, |
2699 | .prepare_flr = efx_port_dummy_op_void, | ||
2700 | .finish_flr = efx_farch_finish_flr, | ||
2699 | .describe_stats = falcon_describe_nic_stats, | 2701 | .describe_stats = falcon_describe_nic_stats, |
2700 | .update_stats = falcon_update_nic_stats, | 2702 | .update_stats = falcon_update_nic_stats, |
2701 | .start_stats = falcon_start_nic_stats, | 2703 | .start_stats = falcon_start_nic_stats, |
@@ -2790,6 +2792,8 @@ const struct efx_nic_type falcon_b0_nic_type = { | |||
2790 | .fini_dmaq = efx_farch_fini_dmaq, | 2792 | .fini_dmaq = efx_farch_fini_dmaq, |
2791 | .prepare_flush = falcon_prepare_flush, | 2793 | .prepare_flush = falcon_prepare_flush, |
2792 | .finish_flush = efx_port_dummy_op_void, | 2794 | .finish_flush = efx_port_dummy_op_void, |
2795 | .prepare_flr = efx_port_dummy_op_void, | ||
2796 | .finish_flr = efx_farch_finish_flr, | ||
2793 | .describe_stats = falcon_describe_nic_stats, | 2797 | .describe_stats = falcon_describe_nic_stats, |
2794 | .update_stats = falcon_update_nic_stats, | 2798 | .update_stats = falcon_update_nic_stats, |
2795 | .start_stats = falcon_start_nic_stats, | 2799 | .start_stats = falcon_start_nic_stats, |
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index a08761360cdf..0537381cd2f6 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c | |||
@@ -741,6 +741,28 @@ int efx_farch_fini_dmaq(struct efx_nic *efx) | |||
741 | return rc; | 741 | return rc; |
742 | } | 742 | } |
743 | 743 | ||
744 | /* Reset queue and flush accounting after FLR | ||
745 | * | ||
746 | * One possible cause of FLR recovery is that DMA may be failing (eg. if bus | ||
747 | * mastering was disabled), in which case we don't receive (RXQ) flush | ||
748 | * completion events. This means that efx->rxq_flush_outstanding remained at 4 | ||
749 | * after the FLR; also, efx->active_queues was non-zero (as no flush completion | ||
750 | * events were received, and we didn't go through efx_check_tx_flush_complete()) | ||
751 | * If we don't fix this up, on the next call to efx_realloc_channels() we won't | ||
752 | * flush any RX queues because efx->rxq_flush_outstanding is at the limit of 4 | ||
753 | * for batched flush requests; and the efx->active_queues gets messed up because | ||
754 | * we keep incrementing for the newly initialised queues, but it never went to | ||
755 | * zero previously. Then we get a timeout every time we try to restart the | ||
756 | * queues, as it doesn't go back to zero when we should be flushing the queues. | ||
757 | */ | ||
758 | void efx_farch_finish_flr(struct efx_nic *efx) | ||
759 | { | ||
760 | atomic_set(&efx->rxq_flush_pending, 0); | ||
761 | atomic_set(&efx->rxq_flush_outstanding, 0); | ||
762 | atomic_set(&efx->active_queues, 0); | ||
763 | } | ||
764 | |||
765 | |||
744 | /************************************************************************** | 766 | /************************************************************************** |
745 | * | 767 | * |
746 | * Event queue processing | 768 | * Event queue processing |
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 7bd4b14bf3b3..5239cf9bdc56 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c | |||
@@ -52,12 +52,7 @@ static void efx_mcdi_timeout_async(unsigned long context); | |||
52 | static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, | 52 | static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, |
53 | bool *was_attached_out); | 53 | bool *was_attached_out); |
54 | static bool efx_mcdi_poll_once(struct efx_nic *efx); | 54 | static bool efx_mcdi_poll_once(struct efx_nic *efx); |
55 | 55 | static void efx_mcdi_abandon(struct efx_nic *efx); | |
56 | static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx) | ||
57 | { | ||
58 | EFX_BUG_ON_PARANOID(!efx->mcdi); | ||
59 | return &efx->mcdi->iface; | ||
60 | } | ||
61 | 56 | ||
62 | int efx_mcdi_init(struct efx_nic *efx) | 57 | int efx_mcdi_init(struct efx_nic *efx) |
63 | { | 58 | { |
@@ -558,6 +553,8 @@ static int _efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen, | |||
558 | rc = 0; | 553 | rc = 0; |
559 | } | 554 | } |
560 | 555 | ||
556 | efx_mcdi_abandon(efx); | ||
557 | |||
561 | /* Close the race with efx_mcdi_ev_cpl() executing just too late | 558 | /* Close the race with efx_mcdi_ev_cpl() executing just too late |
562 | * and completing a request we've just cancelled, by ensuring | 559 | * and completing a request we've just cancelled, by ensuring |
563 | * that the seqno check therein fails. | 560 | * that the seqno check therein fails. |
@@ -672,6 +669,9 @@ int efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd, | |||
672 | if (efx->mc_bist_for_other_fn) | 669 | if (efx->mc_bist_for_other_fn) |
673 | return -ENETDOWN; | 670 | return -ENETDOWN; |
674 | 671 | ||
672 | if (mcdi->mode == MCDI_MODE_FAIL) | ||
673 | return -ENETDOWN; | ||
674 | |||
675 | efx_mcdi_acquire_sync(mcdi); | 675 | efx_mcdi_acquire_sync(mcdi); |
676 | efx_mcdi_send_request(efx, cmd, inbuf, inlen); | 676 | efx_mcdi_send_request(efx, cmd, inbuf, inlen); |
677 | return 0; | 677 | return 0; |
@@ -812,7 +812,11 @@ void efx_mcdi_mode_poll(struct efx_nic *efx) | |||
812 | return; | 812 | return; |
813 | 813 | ||
814 | mcdi = efx_mcdi(efx); | 814 | mcdi = efx_mcdi(efx); |
815 | if (mcdi->mode == MCDI_MODE_POLL) | 815 | /* If already in polling mode, nothing to do. |
816 | * If in fail-fast state, don't switch to polled completion. | ||
817 | * FLR recovery will do that later. | ||
818 | */ | ||
819 | if (mcdi->mode == MCDI_MODE_POLL || mcdi->mode == MCDI_MODE_FAIL) | ||
816 | return; | 820 | return; |
817 | 821 | ||
818 | /* We can switch from event completion to polled completion, because | 822 | /* We can switch from event completion to polled completion, because |
@@ -841,8 +845,8 @@ void efx_mcdi_flush_async(struct efx_nic *efx) | |||
841 | 845 | ||
842 | mcdi = efx_mcdi(efx); | 846 | mcdi = efx_mcdi(efx); |
843 | 847 | ||
844 | /* We must be in polling mode so no more requests can be queued */ | 848 | /* We must be in poll or fail mode so no more requests can be queued */ |
845 | BUG_ON(mcdi->mode != MCDI_MODE_POLL); | 849 | BUG_ON(mcdi->mode == MCDI_MODE_EVENTS); |
846 | 850 | ||
847 | del_timer_sync(&mcdi->async_timer); | 851 | del_timer_sync(&mcdi->async_timer); |
848 | 852 | ||
@@ -875,8 +879,11 @@ void efx_mcdi_mode_event(struct efx_nic *efx) | |||
875 | return; | 879 | return; |
876 | 880 | ||
877 | mcdi = efx_mcdi(efx); | 881 | mcdi = efx_mcdi(efx); |
878 | 882 | /* If already in event completion mode, nothing to do. | |
879 | if (mcdi->mode == MCDI_MODE_EVENTS) | 883 | * If in fail-fast state, don't switch to event completion. FLR |
884 | * recovery will do that later. | ||
885 | */ | ||
886 | if (mcdi->mode == MCDI_MODE_EVENTS || mcdi->mode == MCDI_MODE_FAIL) | ||
880 | return; | 887 | return; |
881 | 888 | ||
882 | /* We can't switch from polled to event completion in the middle of a | 889 | /* We can't switch from polled to event completion in the middle of a |
@@ -966,6 +973,19 @@ static void efx_mcdi_ev_bist(struct efx_nic *efx) | |||
966 | spin_unlock(&mcdi->iface_lock); | 973 | spin_unlock(&mcdi->iface_lock); |
967 | } | 974 | } |
968 | 975 | ||
976 | /* MCDI timeouts seen, so make all MCDI calls fail-fast and issue an FLR to try | ||
977 | * to recover. | ||
978 | */ | ||
979 | static void efx_mcdi_abandon(struct efx_nic *efx) | ||
980 | { | ||
981 | struct efx_mcdi_iface *mcdi = efx_mcdi(efx); | ||
982 | |||
983 | if (xchg(&mcdi->mode, MCDI_MODE_FAIL) == MCDI_MODE_FAIL) | ||
984 | return; /* it had already been done */ | ||
985 | netif_dbg(efx, hw, efx->net_dev, "MCDI is timing out; trying to recover\n"); | ||
986 | efx_schedule_reset(efx, RESET_TYPE_MCDI_TIMEOUT); | ||
987 | } | ||
988 | |||
969 | /* Called from falcon_process_eventq for MCDI events */ | 989 | /* Called from falcon_process_eventq for MCDI events */ |
970 | void efx_mcdi_process_event(struct efx_channel *channel, | 990 | void efx_mcdi_process_event(struct efx_channel *channel, |
971 | efx_qword_t *event) | 991 | efx_qword_t *event) |
@@ -1512,6 +1532,19 @@ int efx_mcdi_reset(struct efx_nic *efx, enum reset_type method) | |||
1512 | { | 1532 | { |
1513 | int rc; | 1533 | int rc; |
1514 | 1534 | ||
1535 | /* If MCDI is down, we can't handle_assertion */ | ||
1536 | if (method == RESET_TYPE_MCDI_TIMEOUT) { | ||
1537 | rc = pci_reset_function(efx->pci_dev); | ||
1538 | if (rc) | ||
1539 | return rc; | ||
1540 | /* Re-enable polled MCDI completion */ | ||
1541 | if (efx->mcdi) { | ||
1542 | struct efx_mcdi_iface *mcdi = efx_mcdi(efx); | ||
1543 | mcdi->mode = MCDI_MODE_POLL; | ||
1544 | } | ||
1545 | return 0; | ||
1546 | } | ||
1547 | |||
1515 | /* Recover from a failed assertion pre-reset */ | 1548 | /* Recover from a failed assertion pre-reset */ |
1516 | rc = efx_mcdi_handle_assertion(efx); | 1549 | rc = efx_mcdi_handle_assertion(efx); |
1517 | if (rc) | 1550 | if (rc) |
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index 52931aebf3c3..56465f7465a2 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h | |||
@@ -28,9 +28,16 @@ enum efx_mcdi_state { | |||
28 | MCDI_STATE_COMPLETED, | 28 | MCDI_STATE_COMPLETED, |
29 | }; | 29 | }; |
30 | 30 | ||
31 | /** | ||
32 | * enum efx_mcdi_mode - MCDI transaction mode | ||
33 | * @MCDI_MODE_POLL: poll for MCDI completion, until timeout | ||
34 | * @MCDI_MODE_EVENTS: wait for an mcdi_event. On timeout, poll once | ||
35 | * @MCDI_MODE_FAIL: we think MCDI is dead, so fail-fast all calls | ||
36 | */ | ||
31 | enum efx_mcdi_mode { | 37 | enum efx_mcdi_mode { |
32 | MCDI_MODE_POLL, | 38 | MCDI_MODE_POLL, |
33 | MCDI_MODE_EVENTS, | 39 | MCDI_MODE_EVENTS, |
40 | MCDI_MODE_FAIL, | ||
34 | }; | 41 | }; |
35 | 42 | ||
36 | /** | 43 | /** |
@@ -104,6 +111,12 @@ struct efx_mcdi_data { | |||
104 | u32 fn_flags; | 111 | u32 fn_flags; |
105 | }; | 112 | }; |
106 | 113 | ||
114 | static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx) | ||
115 | { | ||
116 | EFX_BUG_ON_PARANOID(!efx->mcdi); | ||
117 | return &efx->mcdi->iface; | ||
118 | } | ||
119 | |||
107 | #ifdef CONFIG_SFC_MCDI_MON | 120 | #ifdef CONFIG_SFC_MCDI_MON |
108 | static inline struct efx_mcdi_mon *efx_mcdi_mon(struct efx_nic *efx) | 121 | static inline struct efx_mcdi_mon *efx_mcdi_mon(struct efx_nic *efx) |
109 | { | 122 | { |
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 8a400a0595eb..5bdae8ed7c57 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h | |||
@@ -972,6 +972,8 @@ struct efx_mtd_partition { | |||
972 | * (for Falcon architecture) | 972 | * (for Falcon architecture) |
973 | * @finish_flush: Clean up after flushing the DMA queues (for Falcon | 973 | * @finish_flush: Clean up after flushing the DMA queues (for Falcon |
974 | * architecture) | 974 | * architecture) |
975 | * @prepare_flr: Prepare for an FLR | ||
976 | * @finish_flr: Clean up after an FLR | ||
975 | * @describe_stats: Describe statistics for ethtool | 977 | * @describe_stats: Describe statistics for ethtool |
976 | * @update_stats: Update statistics not provided by event handling. | 978 | * @update_stats: Update statistics not provided by event handling. |
977 | * Either argument may be %NULL. | 979 | * Either argument may be %NULL. |
@@ -1100,6 +1102,8 @@ struct efx_nic_type { | |||
1100 | int (*fini_dmaq)(struct efx_nic *efx); | 1102 | int (*fini_dmaq)(struct efx_nic *efx); |
1101 | void (*prepare_flush)(struct efx_nic *efx); | 1103 | void (*prepare_flush)(struct efx_nic *efx); |
1102 | void (*finish_flush)(struct efx_nic *efx); | 1104 | void (*finish_flush)(struct efx_nic *efx); |
1105 | void (*prepare_flr)(struct efx_nic *efx); | ||
1106 | void (*finish_flr)(struct efx_nic *efx); | ||
1103 | size_t (*describe_stats)(struct efx_nic *efx, u8 *names); | 1107 | size_t (*describe_stats)(struct efx_nic *efx, u8 *names); |
1104 | size_t (*update_stats)(struct efx_nic *efx, u64 *full_stats, | 1108 | size_t (*update_stats)(struct efx_nic *efx, u64 *full_stats, |
1105 | struct rtnl_link_stats64 *core_stats); | 1109 | struct rtnl_link_stats64 *core_stats); |
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index a001fae1a8d7..d3ad8ed8d901 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h | |||
@@ -757,6 +757,7 @@ static inline int efx_nic_irq_test_irq_cpu(struct efx_nic *efx) | |||
757 | int efx_nic_flush_queues(struct efx_nic *efx); | 757 | int efx_nic_flush_queues(struct efx_nic *efx); |
758 | void siena_prepare_flush(struct efx_nic *efx); | 758 | void siena_prepare_flush(struct efx_nic *efx); |
759 | int efx_farch_fini_dmaq(struct efx_nic *efx); | 759 | int efx_farch_fini_dmaq(struct efx_nic *efx); |
760 | void efx_farch_finish_flr(struct efx_nic *efx); | ||
760 | void siena_finish_flush(struct efx_nic *efx); | 761 | void siena_finish_flush(struct efx_nic *efx); |
761 | void falcon_start_nic_stats(struct efx_nic *efx); | 762 | void falcon_start_nic_stats(struct efx_nic *efx); |
762 | void falcon_stop_nic_stats(struct efx_nic *efx); | 763 | void falcon_stop_nic_stats(struct efx_nic *efx); |
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 23f3a6f7737a..50ffefed492c 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c | |||
@@ -921,6 +921,8 @@ const struct efx_nic_type siena_a0_nic_type = { | |||
921 | .fini_dmaq = efx_farch_fini_dmaq, | 921 | .fini_dmaq = efx_farch_fini_dmaq, |
922 | .prepare_flush = siena_prepare_flush, | 922 | .prepare_flush = siena_prepare_flush, |
923 | .finish_flush = siena_finish_flush, | 923 | .finish_flush = siena_finish_flush, |
924 | .prepare_flr = efx_port_dummy_op_void, | ||
925 | .finish_flr = efx_farch_finish_flr, | ||
924 | .describe_stats = siena_describe_nic_stats, | 926 | .describe_stats = siena_describe_nic_stats, |
925 | .update_stats = siena_update_nic_stats, | 927 | .update_stats = siena_update_nic_stats, |
926 | .start_stats = efx_mcdi_mac_start_stats, | 928 | .start_stats = efx_mcdi_mac_start_stats, |