aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/sfc/efx.c
diff options
context:
space:
mode:
authorEdward Cree <ecree@solarflare.com>2014-04-16 14:27:48 -0400
committerDavid S. Miller <davem@davemloft.net>2014-04-16 14:33:57 -0400
commite283546c0465dd3026bc94f7b1a9de7f6b8969ec (patch)
tree3828d4faeed3986b0f01b93416b910b11cd33280 /drivers/net/ethernet/sfc/efx.c
parent10ec34fcb100412ab186c141a9c3557d1270effd (diff)
sfc:On MCDI timeout, issue an FLR (and mark MCDI to fail-fast)
When an MCDI command times out (whether or not we find it completed when we poll), call efx_mcdi_abandon(), which tells all subsequent MCDI calls to fail-fast, and queues up an FLR. Because an FLR doesn't lead to receiving any reboot even from the MC (unlike most other types of reset), we have to call efx_ef10_reset_mc_allocations. In efx_start_all(), if a reset (of any kind) is pending, we bail out. Without this, attempts to reconfigure (e.g. change mtu) can cause driver/mc state inconsistency if the first MCDI call triggers an FLR. For similar reasons, on EF10, in efx_reset_down(method=RESET_TYPE_MCDI_TIMEOUT), set the number of active queues to zero before calling efx_stop_all(). And, on farch, in efx_reset_up(method=RESET_TYPE_MCDI_TIMEOUT), set active_queues and flushes pending & outstanding to zero. efx_mcdi_mode_{poll,event}() should not take us out of fail-fast mode. Instead, this is done by efx_mcdi_reset() after the FLR completes. The new FLR reset_type RESET_TYPE_MCDI_TIMEOUT doesn't really fit into the hierarchy of reset 'scopes' whereby efx_reset() decides some resets subsume others. Thus, it uses separate logic. Also, fixed up some inconsistency around RESET_TYPE_MC_BIST, which was in the wrong place in that hierarchy. Signed-off-by: Shradha Shah <sshah@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/sfc/efx.c')
-rw-r--r--drivers/net/ethernet/sfc/efx.c19
1 files changed, 16 insertions, 3 deletions
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 57b971e5e6b2..63d595fd3cc5 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -76,6 +76,7 @@ const char *const efx_reset_type_names[] = {
76 [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", 76 [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL",
77 [RESET_TYPE_WORLD] = "WORLD", 77 [RESET_TYPE_WORLD] = "WORLD",
78 [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", 78 [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
79 [RESET_TYPE_MC_BIST] = "MC_BIST",
79 [RESET_TYPE_DISABLE] = "DISABLE", 80 [RESET_TYPE_DISABLE] = "DISABLE",
80 [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", 81 [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG",
81 [RESET_TYPE_INT_ERROR] = "INT_ERROR", 82 [RESET_TYPE_INT_ERROR] = "INT_ERROR",
@@ -83,7 +84,7 @@ const char *const efx_reset_type_names[] = {
83 [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", 84 [RESET_TYPE_DMA_ERROR] = "DMA_ERROR",
84 [RESET_TYPE_TX_SKIP] = "TX_SKIP", 85 [RESET_TYPE_TX_SKIP] = "TX_SKIP",
85 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", 86 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
86 [RESET_TYPE_MC_BIST] = "MC_BIST", 87 [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)",
87}; 88};
88 89
89/* Reset workqueue. If any NIC has a hardware failure then a reset will be 90/* Reset workqueue. If any NIC has a hardware failure then a reset will be
@@ -1739,7 +1740,8 @@ static void efx_start_all(struct efx_nic *efx)
1739 1740
1740 /* Check that it is appropriate to restart the interface. All 1741 /* Check that it is appropriate to restart the interface. All
1741 * of these flags are safe to read under just the rtnl lock */ 1742 * of these flags are safe to read under just the rtnl lock */
1742 if (efx->port_enabled || !netif_running(efx->net_dev)) 1743 if (efx->port_enabled || !netif_running(efx->net_dev) ||
1744 efx->reset_pending)
1743 return; 1745 return;
1744 1746
1745 efx_start_port(efx); 1747 efx_start_port(efx);
@@ -2334,6 +2336,9 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method)
2334{ 2336{
2335 EFX_ASSERT_RESET_SERIALISED(efx); 2337 EFX_ASSERT_RESET_SERIALISED(efx);
2336 2338
2339 if (method == RESET_TYPE_MCDI_TIMEOUT)
2340 efx->type->prepare_flr(efx);
2341
2337 efx_stop_all(efx); 2342 efx_stop_all(efx);
2338 efx_disable_interrupts(efx); 2343 efx_disable_interrupts(efx);
2339 2344
@@ -2354,6 +2359,10 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
2354 2359
2355 EFX_ASSERT_RESET_SERIALISED(efx); 2360 EFX_ASSERT_RESET_SERIALISED(efx);
2356 2361
2362 if (method == RESET_TYPE_MCDI_TIMEOUT)
2363 efx->type->finish_flr(efx);
2364
2365 /* Ensure that SRAM is initialised even if we're disabling the device */
2357 rc = efx->type->init(efx); 2366 rc = efx->type->init(efx);
2358 if (rc) { 2367 if (rc) {
2359 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); 2368 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
@@ -2417,7 +2426,10 @@ int efx_reset(struct efx_nic *efx, enum reset_type method)
2417 /* Clear flags for the scopes we covered. We assume the NIC and 2426 /* Clear flags for the scopes we covered. We assume the NIC and
2418 * driver are now quiescent so that there is no race here. 2427 * driver are now quiescent so that there is no race here.
2419 */ 2428 */
2420 efx->reset_pending &= -(1 << (method + 1)); 2429 if (method < RESET_TYPE_MAX_METHOD)
2430 efx->reset_pending &= -(1 << (method + 1));
2431 else /* it doesn't fit into the well-ordered scope hierarchy */
2432 __clear_bit(method, &efx->reset_pending);
2421 2433
2422 /* Reinitialise bus-mastering, which may have been turned off before 2434 /* Reinitialise bus-mastering, which may have been turned off before
2423 * the reset was scheduled. This is still appropriate, even in the 2435 * the reset was scheduled. This is still appropriate, even in the
@@ -2546,6 +2558,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
2546 case RESET_TYPE_DISABLE: 2558 case RESET_TYPE_DISABLE:
2547 case RESET_TYPE_RECOVER_OR_DISABLE: 2559 case RESET_TYPE_RECOVER_OR_DISABLE:
2548 case RESET_TYPE_MC_BIST: 2560 case RESET_TYPE_MC_BIST:
2561 case RESET_TYPE_MCDI_TIMEOUT:
2549 method = type; 2562 method = type;
2550 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", 2563 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
2551 RESET_TYPE(method)); 2564 RESET_TYPE(method));