diff options
author | Edward Cree <ecree@solarflare.com> | 2014-04-16 14:27:48 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-04-16 14:33:57 -0400 |
commit | e283546c0465dd3026bc94f7b1a9de7f6b8969ec (patch) | |
tree | 3828d4faeed3986b0f01b93416b910b11cd33280 /drivers/net/ethernet/sfc/efx.c | |
parent | 10ec34fcb100412ab186c141a9c3557d1270effd (diff) |
sfc:On MCDI timeout, issue an FLR (and mark MCDI to fail-fast)
When an MCDI command times out (whether or not we find it
completed when we poll), call efx_mcdi_abandon(), which tells
all subsequent MCDI calls to fail-fast, and queues up an FLR.
Because an FLR doesn't lead to receiving any reboot even from
the MC (unlike most other types of reset), we have to call
efx_ef10_reset_mc_allocations.
In efx_start_all(), if a reset (of any kind) is pending, we
bail out.
Without this, attempts to reconfigure (e.g. change mtu) can
cause driver/mc state inconsistency if the first MCDI call
triggers an FLR.
For similar reasons, on EF10, in
efx_reset_down(method=RESET_TYPE_MCDI_TIMEOUT), set the number
of active queues to zero before calling efx_stop_all().
And, on farch, in efx_reset_up(method=RESET_TYPE_MCDI_TIMEOUT),
set active_queues and flushes pending & outstanding to zero.
efx_mcdi_mode_{poll,event}() should not take us out of fail-fast
mode. Instead, this is done by efx_mcdi_reset() after the FLR
completes.
The new FLR reset_type RESET_TYPE_MCDI_TIMEOUT doesn't really
fit into the hierarchy of reset 'scopes' whereby efx_reset()
decides some resets subsume others. Thus, it uses separate logic.
Also, fixed up some inconsistency around RESET_TYPE_MC_BIST,
which was in the wrong place in that hierarchy.
Signed-off-by: Shradha Shah <sshah@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/sfc/efx.c')
-rw-r--r-- | drivers/net/ethernet/sfc/efx.c | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 57b971e5e6b2..63d595fd3cc5 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c | |||
@@ -76,6 +76,7 @@ const char *const efx_reset_type_names[] = { | |||
76 | [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", | 76 | [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", |
77 | [RESET_TYPE_WORLD] = "WORLD", | 77 | [RESET_TYPE_WORLD] = "WORLD", |
78 | [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", | 78 | [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", |
79 | [RESET_TYPE_MC_BIST] = "MC_BIST", | ||
79 | [RESET_TYPE_DISABLE] = "DISABLE", | 80 | [RESET_TYPE_DISABLE] = "DISABLE", |
80 | [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", | 81 | [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", |
81 | [RESET_TYPE_INT_ERROR] = "INT_ERROR", | 82 | [RESET_TYPE_INT_ERROR] = "INT_ERROR", |
@@ -83,7 +84,7 @@ const char *const efx_reset_type_names[] = { | |||
83 | [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", | 84 | [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", |
84 | [RESET_TYPE_TX_SKIP] = "TX_SKIP", | 85 | [RESET_TYPE_TX_SKIP] = "TX_SKIP", |
85 | [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", | 86 | [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", |
86 | [RESET_TYPE_MC_BIST] = "MC_BIST", | 87 | [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", |
87 | }; | 88 | }; |
88 | 89 | ||
89 | /* Reset workqueue. If any NIC has a hardware failure then a reset will be | 90 | /* Reset workqueue. If any NIC has a hardware failure then a reset will be |
@@ -1739,7 +1740,8 @@ static void efx_start_all(struct efx_nic *efx) | |||
1739 | 1740 | ||
1740 | /* Check that it is appropriate to restart the interface. All | 1741 | /* Check that it is appropriate to restart the interface. All |
1741 | * of these flags are safe to read under just the rtnl lock */ | 1742 | * of these flags are safe to read under just the rtnl lock */ |
1742 | if (efx->port_enabled || !netif_running(efx->net_dev)) | 1743 | if (efx->port_enabled || !netif_running(efx->net_dev) || |
1744 | efx->reset_pending) | ||
1743 | return; | 1745 | return; |
1744 | 1746 | ||
1745 | efx_start_port(efx); | 1747 | efx_start_port(efx); |
@@ -2334,6 +2336,9 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method) | |||
2334 | { | 2336 | { |
2335 | EFX_ASSERT_RESET_SERIALISED(efx); | 2337 | EFX_ASSERT_RESET_SERIALISED(efx); |
2336 | 2338 | ||
2339 | if (method == RESET_TYPE_MCDI_TIMEOUT) | ||
2340 | efx->type->prepare_flr(efx); | ||
2341 | |||
2337 | efx_stop_all(efx); | 2342 | efx_stop_all(efx); |
2338 | efx_disable_interrupts(efx); | 2343 | efx_disable_interrupts(efx); |
2339 | 2344 | ||
@@ -2354,6 +2359,10 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) | |||
2354 | 2359 | ||
2355 | EFX_ASSERT_RESET_SERIALISED(efx); | 2360 | EFX_ASSERT_RESET_SERIALISED(efx); |
2356 | 2361 | ||
2362 | if (method == RESET_TYPE_MCDI_TIMEOUT) | ||
2363 | efx->type->finish_flr(efx); | ||
2364 | |||
2365 | /* Ensure that SRAM is initialised even if we're disabling the device */ | ||
2357 | rc = efx->type->init(efx); | 2366 | rc = efx->type->init(efx); |
2358 | if (rc) { | 2367 | if (rc) { |
2359 | netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); | 2368 | netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); |
@@ -2417,7 +2426,10 @@ int efx_reset(struct efx_nic *efx, enum reset_type method) | |||
2417 | /* Clear flags for the scopes we covered. We assume the NIC and | 2426 | /* Clear flags for the scopes we covered. We assume the NIC and |
2418 | * driver are now quiescent so that there is no race here. | 2427 | * driver are now quiescent so that there is no race here. |
2419 | */ | 2428 | */ |
2420 | efx->reset_pending &= -(1 << (method + 1)); | 2429 | if (method < RESET_TYPE_MAX_METHOD) |
2430 | efx->reset_pending &= -(1 << (method + 1)); | ||
2431 | else /* it doesn't fit into the well-ordered scope hierarchy */ | ||
2432 | __clear_bit(method, &efx->reset_pending); | ||
2421 | 2433 | ||
2422 | /* Reinitialise bus-mastering, which may have been turned off before | 2434 | /* Reinitialise bus-mastering, which may have been turned off before |
2423 | * the reset was scheduled. This is still appropriate, even in the | 2435 | * the reset was scheduled. This is still appropriate, even in the |
@@ -2546,6 +2558,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) | |||
2546 | case RESET_TYPE_DISABLE: | 2558 | case RESET_TYPE_DISABLE: |
2547 | case RESET_TYPE_RECOVER_OR_DISABLE: | 2559 | case RESET_TYPE_RECOVER_OR_DISABLE: |
2548 | case RESET_TYPE_MC_BIST: | 2560 | case RESET_TYPE_MC_BIST: |
2561 | case RESET_TYPE_MCDI_TIMEOUT: | ||
2549 | method = type; | 2562 | method = type; |
2550 | netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", | 2563 | netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", |
2551 | RESET_TYPE(method)); | 2564 | RESET_TYPE(method)); |