aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
authorEdward Cree <ecree@solarflare.com>2014-04-16 14:27:48 -0400
committerDavid S. Miller <davem@davemloft.net>2014-04-16 14:33:57 -0400
commite283546c0465dd3026bc94f7b1a9de7f6b8969ec (patch)
tree3828d4faeed3986b0f01b93416b910b11cd33280 /drivers/net
parent10ec34fcb100412ab186c141a9c3557d1270effd (diff)
sfc:On MCDI timeout, issue an FLR (and mark MCDI to fail-fast)
When an MCDI command times out (whether or not we find it completed when we poll), call efx_mcdi_abandon(), which tells all subsequent MCDI calls to fail-fast, and queues up an FLR. Because an FLR doesn't lead to receiving any reboot even from the MC (unlike most other types of reset), we have to call efx_ef10_reset_mc_allocations. In efx_start_all(), if a reset (of any kind) is pending, we bail out. Without this, attempts to reconfigure (e.g. change mtu) can cause driver/mc state inconsistency if the first MCDI call triggers an FLR. For similar reasons, on EF10, in efx_reset_down(method=RESET_TYPE_MCDI_TIMEOUT), set the number of active queues to zero before calling efx_stop_all(). And, on farch, in efx_reset_up(method=RESET_TYPE_MCDI_TIMEOUT), set active_queues and flushes pending & outstanding to zero. efx_mcdi_mode_{poll,event}() should not take us out of fail-fast mode. Instead, this is done by efx_mcdi_reset() after the FLR completes. The new FLR reset_type RESET_TYPE_MCDI_TIMEOUT doesn't really fit into the hierarchy of reset 'scopes' whereby efx_reset() decides some resets subsume others. Thus, it uses separate logic. Also, fixed up some inconsistency around RESET_TYPE_MC_BIST, which was in the wrong place in that hierarchy. Signed-off-by: Shradha Shah <sshah@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/ethernet/sfc/ef10.c12
-rw-r--r--drivers/net/ethernet/sfc/efx.c19
-rw-r--r--drivers/net/ethernet/sfc/enum.h23
-rw-r--r--drivers/net/ethernet/sfc/falcon.c4
-rw-r--r--drivers/net/ethernet/sfc/farch.c22
-rw-r--r--drivers/net/ethernet/sfc/mcdi.c55
-rw-r--r--drivers/net/ethernet/sfc/mcdi.h13
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h4
-rw-r--r--drivers/net/ethernet/sfc/nic.h1
-rw-r--r--drivers/net/ethernet/sfc/siena.c2
10 files changed, 133 insertions, 22 deletions
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 21c20ea0dad0..b5ed30a39144 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -738,8 +738,11 @@ static int efx_ef10_reset(struct efx_nic *efx, enum reset_type reset_type)
738 /* If it was a port reset, trigger reallocation of MC resources. 738 /* If it was a port reset, trigger reallocation of MC resources.
739 * Note that on an MC reset nothing needs to be done now because we'll 739 * Note that on an MC reset nothing needs to be done now because we'll
740 * detect the MC reset later and handle it then. 740 * detect the MC reset later and handle it then.
741 * For an FLR, we never get an MC reset event, but the MC has reset all
742 * resources assigned to us, so we have to trigger reallocation now.
741 */ 743 */
742 if (reset_type == RESET_TYPE_ALL && !rc) 744 if ((reset_type == RESET_TYPE_ALL ||
745 reset_type == RESET_TYPE_MCDI_TIMEOUT) && !rc)
743 efx_ef10_reset_mc_allocations(efx); 746 efx_ef10_reset_mc_allocations(efx);
744 return rc; 747 return rc;
745} 748}
@@ -2141,6 +2144,11 @@ static int efx_ef10_fini_dmaq(struct efx_nic *efx)
2141 return 0; 2144 return 0;
2142} 2145}
2143 2146
2147static void efx_ef10_prepare_flr(struct efx_nic *efx)
2148{
2149 atomic_set(&efx->active_queues, 0);
2150}
2151
2144static bool efx_ef10_filter_equal(const struct efx_filter_spec *left, 2152static bool efx_ef10_filter_equal(const struct efx_filter_spec *left,
2145 const struct efx_filter_spec *right) 2153 const struct efx_filter_spec *right)
2146{ 2154{
@@ -3603,6 +3611,8 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
3603 .probe_port = efx_mcdi_port_probe, 3611 .probe_port = efx_mcdi_port_probe,
3604 .remove_port = efx_mcdi_port_remove, 3612 .remove_port = efx_mcdi_port_remove,
3605 .fini_dmaq = efx_ef10_fini_dmaq, 3613 .fini_dmaq = efx_ef10_fini_dmaq,
3614 .prepare_flr = efx_ef10_prepare_flr,
3615 .finish_flr = efx_port_dummy_op_void,
3606 .describe_stats = efx_ef10_describe_stats, 3616 .describe_stats = efx_ef10_describe_stats,
3607 .update_stats = efx_ef10_update_stats, 3617 .update_stats = efx_ef10_update_stats,
3608 .start_stats = efx_mcdi_mac_start_stats, 3618 .start_stats = efx_mcdi_mac_start_stats,
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 57b971e5e6b2..63d595fd3cc5 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -76,6 +76,7 @@ const char *const efx_reset_type_names[] = {
76 [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", 76 [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL",
77 [RESET_TYPE_WORLD] = "WORLD", 77 [RESET_TYPE_WORLD] = "WORLD",
78 [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", 78 [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
79 [RESET_TYPE_MC_BIST] = "MC_BIST",
79 [RESET_TYPE_DISABLE] = "DISABLE", 80 [RESET_TYPE_DISABLE] = "DISABLE",
80 [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", 81 [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG",
81 [RESET_TYPE_INT_ERROR] = "INT_ERROR", 82 [RESET_TYPE_INT_ERROR] = "INT_ERROR",
@@ -83,7 +84,7 @@ const char *const efx_reset_type_names[] = {
83 [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", 84 [RESET_TYPE_DMA_ERROR] = "DMA_ERROR",
84 [RESET_TYPE_TX_SKIP] = "TX_SKIP", 85 [RESET_TYPE_TX_SKIP] = "TX_SKIP",
85 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", 86 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
86 [RESET_TYPE_MC_BIST] = "MC_BIST", 87 [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)",
87}; 88};
88 89
89/* Reset workqueue. If any NIC has a hardware failure then a reset will be 90/* Reset workqueue. If any NIC has a hardware failure then a reset will be
@@ -1739,7 +1740,8 @@ static void efx_start_all(struct efx_nic *efx)
1739 1740
1740 /* Check that it is appropriate to restart the interface. All 1741 /* Check that it is appropriate to restart the interface. All
1741 * of these flags are safe to read under just the rtnl lock */ 1742 * of these flags are safe to read under just the rtnl lock */
1742 if (efx->port_enabled || !netif_running(efx->net_dev)) 1743 if (efx->port_enabled || !netif_running(efx->net_dev) ||
1744 efx->reset_pending)
1743 return; 1745 return;
1744 1746
1745 efx_start_port(efx); 1747 efx_start_port(efx);
@@ -2334,6 +2336,9 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method)
2334{ 2336{
2335 EFX_ASSERT_RESET_SERIALISED(efx); 2337 EFX_ASSERT_RESET_SERIALISED(efx);
2336 2338
2339 if (method == RESET_TYPE_MCDI_TIMEOUT)
2340 efx->type->prepare_flr(efx);
2341
2337 efx_stop_all(efx); 2342 efx_stop_all(efx);
2338 efx_disable_interrupts(efx); 2343 efx_disable_interrupts(efx);
2339 2344
@@ -2354,6 +2359,10 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
2354 2359
2355 EFX_ASSERT_RESET_SERIALISED(efx); 2360 EFX_ASSERT_RESET_SERIALISED(efx);
2356 2361
2362 if (method == RESET_TYPE_MCDI_TIMEOUT)
2363 efx->type->finish_flr(efx);
2364
2365 /* Ensure that SRAM is initialised even if we're disabling the device */
2357 rc = efx->type->init(efx); 2366 rc = efx->type->init(efx);
2358 if (rc) { 2367 if (rc) {
2359 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); 2368 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
@@ -2417,7 +2426,10 @@ int efx_reset(struct efx_nic *efx, enum reset_type method)
2417 /* Clear flags for the scopes we covered. We assume the NIC and 2426 /* Clear flags for the scopes we covered. We assume the NIC and
2418 * driver are now quiescent so that there is no race here. 2427 * driver are now quiescent so that there is no race here.
2419 */ 2428 */
2420 efx->reset_pending &= -(1 << (method + 1)); 2429 if (method < RESET_TYPE_MAX_METHOD)
2430 efx->reset_pending &= -(1 << (method + 1));
2431 else /* it doesn't fit into the well-ordered scope hierarchy */
2432 __clear_bit(method, &efx->reset_pending);
2421 2433
2422 /* Reinitialise bus-mastering, which may have been turned off before 2434 /* Reinitialise bus-mastering, which may have been turned off before
2423 * the reset was scheduled. This is still appropriate, even in the 2435 * the reset was scheduled. This is still appropriate, even in the
@@ -2546,6 +2558,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
2546 case RESET_TYPE_DISABLE: 2558 case RESET_TYPE_DISABLE:
2547 case RESET_TYPE_RECOVER_OR_DISABLE: 2559 case RESET_TYPE_RECOVER_OR_DISABLE:
2548 case RESET_TYPE_MC_BIST: 2560 case RESET_TYPE_MC_BIST:
2561 case RESET_TYPE_MCDI_TIMEOUT:
2549 method = type; 2562 method = type;
2550 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", 2563 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
2551 RESET_TYPE(method)); 2564 RESET_TYPE(method));
diff --git a/drivers/net/ethernet/sfc/enum.h b/drivers/net/ethernet/sfc/enum.h
index 75ef7ef6450b..d1dbb5fb31bb 100644
--- a/drivers/net/ethernet/sfc/enum.h
+++ b/drivers/net/ethernet/sfc/enum.h
@@ -143,6 +143,7 @@ enum efx_loopback_mode {
143 * @RESET_TYPE_WORLD: Reset as much as possible 143 * @RESET_TYPE_WORLD: Reset as much as possible
144 * @RESET_TYPE_RECOVER_OR_DISABLE: Try to recover. Apply RESET_TYPE_DISABLE if 144 * @RESET_TYPE_RECOVER_OR_DISABLE: Try to recover. Apply RESET_TYPE_DISABLE if
145 * unsuccessful. 145 * unsuccessful.
146 * @RESET_TYPE_MC_BIST: MC entering BIST mode.
146 * @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled 147 * @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled
147 * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog 148 * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog
148 * @RESET_TYPE_INT_ERROR: reset due to internal error 149 * @RESET_TYPE_INT_ERROR: reset due to internal error
@@ -150,14 +151,16 @@ enum efx_loopback_mode {
150 * @RESET_TYPE_DMA_ERROR: DMA error 151 * @RESET_TYPE_DMA_ERROR: DMA error
151 * @RESET_TYPE_TX_SKIP: hardware completed empty tx descriptors 152 * @RESET_TYPE_TX_SKIP: hardware completed empty tx descriptors
152 * @RESET_TYPE_MC_FAILURE: MC reboot/assertion 153 * @RESET_TYPE_MC_FAILURE: MC reboot/assertion
154 * @RESET_TYPE_MCDI_TIMEOUT: MCDI timeout.
153 */ 155 */
154enum reset_type { 156enum reset_type {
155 RESET_TYPE_INVISIBLE = 0, 157 RESET_TYPE_INVISIBLE,
156 RESET_TYPE_RECOVER_OR_ALL = 1, 158 RESET_TYPE_RECOVER_OR_ALL,
157 RESET_TYPE_ALL = 2, 159 RESET_TYPE_ALL,
158 RESET_TYPE_WORLD = 3, 160 RESET_TYPE_WORLD,
159 RESET_TYPE_RECOVER_OR_DISABLE = 4, 161 RESET_TYPE_RECOVER_OR_DISABLE,
160 RESET_TYPE_DISABLE = 5, 162 RESET_TYPE_MC_BIST,
163 RESET_TYPE_DISABLE,
161 RESET_TYPE_MAX_METHOD, 164 RESET_TYPE_MAX_METHOD,
162 RESET_TYPE_TX_WATCHDOG, 165 RESET_TYPE_TX_WATCHDOG,
163 RESET_TYPE_INT_ERROR, 166 RESET_TYPE_INT_ERROR,
@@ -165,7 +168,13 @@ enum reset_type {
165 RESET_TYPE_DMA_ERROR, 168 RESET_TYPE_DMA_ERROR,
166 RESET_TYPE_TX_SKIP, 169 RESET_TYPE_TX_SKIP,
167 RESET_TYPE_MC_FAILURE, 170 RESET_TYPE_MC_FAILURE,
168 RESET_TYPE_MC_BIST, 171 /* RESET_TYPE_MCDI_TIMEOUT is actually a method, not just a reason, but
172 * it doesn't fit the scope hierarchy (not well-ordered by inclusion).
173 * We encode this by having its enum value be greater than
174 * RESET_TYPE_MAX_METHOD. This also prevents issuing it with
175 * efx_ioctl_reset.
176 */
177 RESET_TYPE_MCDI_TIMEOUT,
169 RESET_TYPE_MAX, 178 RESET_TYPE_MAX,
170}; 179};
171 180
diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c
index 8ec20b713cc6..fae25a418647 100644
--- a/drivers/net/ethernet/sfc/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon.c
@@ -2696,6 +2696,8 @@ const struct efx_nic_type falcon_a1_nic_type = {
2696 .fini_dmaq = efx_farch_fini_dmaq, 2696 .fini_dmaq = efx_farch_fini_dmaq,
2697 .prepare_flush = falcon_prepare_flush, 2697 .prepare_flush = falcon_prepare_flush,
2698 .finish_flush = efx_port_dummy_op_void, 2698 .finish_flush = efx_port_dummy_op_void,
2699 .prepare_flr = efx_port_dummy_op_void,
2700 .finish_flr = efx_farch_finish_flr,
2699 .describe_stats = falcon_describe_nic_stats, 2701 .describe_stats = falcon_describe_nic_stats,
2700 .update_stats = falcon_update_nic_stats, 2702 .update_stats = falcon_update_nic_stats,
2701 .start_stats = falcon_start_nic_stats, 2703 .start_stats = falcon_start_nic_stats,
@@ -2790,6 +2792,8 @@ const struct efx_nic_type falcon_b0_nic_type = {
2790 .fini_dmaq = efx_farch_fini_dmaq, 2792 .fini_dmaq = efx_farch_fini_dmaq,
2791 .prepare_flush = falcon_prepare_flush, 2793 .prepare_flush = falcon_prepare_flush,
2792 .finish_flush = efx_port_dummy_op_void, 2794 .finish_flush = efx_port_dummy_op_void,
2795 .prepare_flr = efx_port_dummy_op_void,
2796 .finish_flr = efx_farch_finish_flr,
2793 .describe_stats = falcon_describe_nic_stats, 2797 .describe_stats = falcon_describe_nic_stats,
2794 .update_stats = falcon_update_nic_stats, 2798 .update_stats = falcon_update_nic_stats,
2795 .start_stats = falcon_start_nic_stats, 2799 .start_stats = falcon_start_nic_stats,
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index a08761360cdf..0537381cd2f6 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -741,6 +741,28 @@ int efx_farch_fini_dmaq(struct efx_nic *efx)
741 return rc; 741 return rc;
742} 742}
743 743
744/* Reset queue and flush accounting after FLR
745 *
746 * One possible cause of FLR recovery is that DMA may be failing (eg. if bus
747 * mastering was disabled), in which case we don't receive (RXQ) flush
748 * completion events. This means that efx->rxq_flush_outstanding remained at 4
749 * after the FLR; also, efx->active_queues was non-zero (as no flush completion
750 * events were received, and we didn't go through efx_check_tx_flush_complete())
751 * If we don't fix this up, on the next call to efx_realloc_channels() we won't
752 * flush any RX queues because efx->rxq_flush_outstanding is at the limit of 4
753 * for batched flush requests; and the efx->active_queues gets messed up because
754 * we keep incrementing for the newly initialised queues, but it never went to
755 * zero previously. Then we get a timeout every time we try to restart the
756 * queues, as it doesn't go back to zero when we should be flushing the queues.
757 */
758void efx_farch_finish_flr(struct efx_nic *efx)
759{
760 atomic_set(&efx->rxq_flush_pending, 0);
761 atomic_set(&efx->rxq_flush_outstanding, 0);
762 atomic_set(&efx->active_queues, 0);
763}
764
765
744/************************************************************************** 766/**************************************************************************
745 * 767 *
746 * Event queue processing 768 * Event queue processing
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 7bd4b14bf3b3..5239cf9bdc56 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -52,12 +52,7 @@ static void efx_mcdi_timeout_async(unsigned long context);
52static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, 52static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating,
53 bool *was_attached_out); 53 bool *was_attached_out);
54static bool efx_mcdi_poll_once(struct efx_nic *efx); 54static bool efx_mcdi_poll_once(struct efx_nic *efx);
55 55static void efx_mcdi_abandon(struct efx_nic *efx);
56static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx)
57{
58 EFX_BUG_ON_PARANOID(!efx->mcdi);
59 return &efx->mcdi->iface;
60}
61 56
62int efx_mcdi_init(struct efx_nic *efx) 57int efx_mcdi_init(struct efx_nic *efx)
63{ 58{
@@ -558,6 +553,8 @@ static int _efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen,
558 rc = 0; 553 rc = 0;
559 } 554 }
560 555
556 efx_mcdi_abandon(efx);
557
561 /* Close the race with efx_mcdi_ev_cpl() executing just too late 558 /* Close the race with efx_mcdi_ev_cpl() executing just too late
562 * and completing a request we've just cancelled, by ensuring 559 * and completing a request we've just cancelled, by ensuring
563 * that the seqno check therein fails. 560 * that the seqno check therein fails.
@@ -672,6 +669,9 @@ int efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd,
672 if (efx->mc_bist_for_other_fn) 669 if (efx->mc_bist_for_other_fn)
673 return -ENETDOWN; 670 return -ENETDOWN;
674 671
672 if (mcdi->mode == MCDI_MODE_FAIL)
673 return -ENETDOWN;
674
675 efx_mcdi_acquire_sync(mcdi); 675 efx_mcdi_acquire_sync(mcdi);
676 efx_mcdi_send_request(efx, cmd, inbuf, inlen); 676 efx_mcdi_send_request(efx, cmd, inbuf, inlen);
677 return 0; 677 return 0;
@@ -812,7 +812,11 @@ void efx_mcdi_mode_poll(struct efx_nic *efx)
812 return; 812 return;
813 813
814 mcdi = efx_mcdi(efx); 814 mcdi = efx_mcdi(efx);
815 if (mcdi->mode == MCDI_MODE_POLL) 815 /* If already in polling mode, nothing to do.
816 * If in fail-fast state, don't switch to polled completion.
817 * FLR recovery will do that later.
818 */
819 if (mcdi->mode == MCDI_MODE_POLL || mcdi->mode == MCDI_MODE_FAIL)
816 return; 820 return;
817 821
818 /* We can switch from event completion to polled completion, because 822 /* We can switch from event completion to polled completion, because
@@ -841,8 +845,8 @@ void efx_mcdi_flush_async(struct efx_nic *efx)
841 845
842 mcdi = efx_mcdi(efx); 846 mcdi = efx_mcdi(efx);
843 847
844 /* We must be in polling mode so no more requests can be queued */ 848 /* We must be in poll or fail mode so no more requests can be queued */
845 BUG_ON(mcdi->mode != MCDI_MODE_POLL); 849 BUG_ON(mcdi->mode == MCDI_MODE_EVENTS);
846 850
847 del_timer_sync(&mcdi->async_timer); 851 del_timer_sync(&mcdi->async_timer);
848 852
@@ -875,8 +879,11 @@ void efx_mcdi_mode_event(struct efx_nic *efx)
875 return; 879 return;
876 880
877 mcdi = efx_mcdi(efx); 881 mcdi = efx_mcdi(efx);
878 882 /* If already in event completion mode, nothing to do.
879 if (mcdi->mode == MCDI_MODE_EVENTS) 883 * If in fail-fast state, don't switch to event completion. FLR
884 * recovery will do that later.
885 */
886 if (mcdi->mode == MCDI_MODE_EVENTS || mcdi->mode == MCDI_MODE_FAIL)
880 return; 887 return;
881 888
882 /* We can't switch from polled to event completion in the middle of a 889 /* We can't switch from polled to event completion in the middle of a
@@ -966,6 +973,19 @@ static void efx_mcdi_ev_bist(struct efx_nic *efx)
966 spin_unlock(&mcdi->iface_lock); 973 spin_unlock(&mcdi->iface_lock);
967} 974}
968 975
976/* MCDI timeouts seen, so make all MCDI calls fail-fast and issue an FLR to try
977 * to recover.
978 */
979static void efx_mcdi_abandon(struct efx_nic *efx)
980{
981 struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
982
983 if (xchg(&mcdi->mode, MCDI_MODE_FAIL) == MCDI_MODE_FAIL)
984 return; /* it had already been done */
985 netif_dbg(efx, hw, efx->net_dev, "MCDI is timing out; trying to recover\n");
986 efx_schedule_reset(efx, RESET_TYPE_MCDI_TIMEOUT);
987}
988
969/* Called from falcon_process_eventq for MCDI events */ 989/* Called from falcon_process_eventq for MCDI events */
970void efx_mcdi_process_event(struct efx_channel *channel, 990void efx_mcdi_process_event(struct efx_channel *channel,
971 efx_qword_t *event) 991 efx_qword_t *event)
@@ -1512,6 +1532,19 @@ int efx_mcdi_reset(struct efx_nic *efx, enum reset_type method)
1512{ 1532{
1513 int rc; 1533 int rc;
1514 1534
1535 /* If MCDI is down, we can't handle_assertion */
1536 if (method == RESET_TYPE_MCDI_TIMEOUT) {
1537 rc = pci_reset_function(efx->pci_dev);
1538 if (rc)
1539 return rc;
1540 /* Re-enable polled MCDI completion */
1541 if (efx->mcdi) {
1542 struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
1543 mcdi->mode = MCDI_MODE_POLL;
1544 }
1545 return 0;
1546 }
1547
1515 /* Recover from a failed assertion pre-reset */ 1548 /* Recover from a failed assertion pre-reset */
1516 rc = efx_mcdi_handle_assertion(efx); 1549 rc = efx_mcdi_handle_assertion(efx);
1517 if (rc) 1550 if (rc)
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index 52931aebf3c3..56465f7465a2 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -28,9 +28,16 @@ enum efx_mcdi_state {
28 MCDI_STATE_COMPLETED, 28 MCDI_STATE_COMPLETED,
29}; 29};
30 30
31/**
32 * enum efx_mcdi_mode - MCDI transaction mode
33 * @MCDI_MODE_POLL: poll for MCDI completion, until timeout
34 * @MCDI_MODE_EVENTS: wait for an mcdi_event. On timeout, poll once
35 * @MCDI_MODE_FAIL: we think MCDI is dead, so fail-fast all calls
36 */
31enum efx_mcdi_mode { 37enum efx_mcdi_mode {
32 MCDI_MODE_POLL, 38 MCDI_MODE_POLL,
33 MCDI_MODE_EVENTS, 39 MCDI_MODE_EVENTS,
40 MCDI_MODE_FAIL,
34}; 41};
35 42
36/** 43/**
@@ -104,6 +111,12 @@ struct efx_mcdi_data {
104 u32 fn_flags; 111 u32 fn_flags;
105}; 112};
106 113
114static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx)
115{
116 EFX_BUG_ON_PARANOID(!efx->mcdi);
117 return &efx->mcdi->iface;
118}
119
107#ifdef CONFIG_SFC_MCDI_MON 120#ifdef CONFIG_SFC_MCDI_MON
108static inline struct efx_mcdi_mon *efx_mcdi_mon(struct efx_nic *efx) 121static inline struct efx_mcdi_mon *efx_mcdi_mon(struct efx_nic *efx)
109{ 122{
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 8a400a0595eb..5bdae8ed7c57 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -972,6 +972,8 @@ struct efx_mtd_partition {
972 * (for Falcon architecture) 972 * (for Falcon architecture)
973 * @finish_flush: Clean up after flushing the DMA queues (for Falcon 973 * @finish_flush: Clean up after flushing the DMA queues (for Falcon
974 * architecture) 974 * architecture)
975 * @prepare_flr: Prepare for an FLR
976 * @finish_flr: Clean up after an FLR
975 * @describe_stats: Describe statistics for ethtool 977 * @describe_stats: Describe statistics for ethtool
976 * @update_stats: Update statistics not provided by event handling. 978 * @update_stats: Update statistics not provided by event handling.
977 * Either argument may be %NULL. 979 * Either argument may be %NULL.
@@ -1100,6 +1102,8 @@ struct efx_nic_type {
1100 int (*fini_dmaq)(struct efx_nic *efx); 1102 int (*fini_dmaq)(struct efx_nic *efx);
1101 void (*prepare_flush)(struct efx_nic *efx); 1103 void (*prepare_flush)(struct efx_nic *efx);
1102 void (*finish_flush)(struct efx_nic *efx); 1104 void (*finish_flush)(struct efx_nic *efx);
1105 void (*prepare_flr)(struct efx_nic *efx);
1106 void (*finish_flr)(struct efx_nic *efx);
1103 size_t (*describe_stats)(struct efx_nic *efx, u8 *names); 1107 size_t (*describe_stats)(struct efx_nic *efx, u8 *names);
1104 size_t (*update_stats)(struct efx_nic *efx, u64 *full_stats, 1108 size_t (*update_stats)(struct efx_nic *efx, u64 *full_stats,
1105 struct rtnl_link_stats64 *core_stats); 1109 struct rtnl_link_stats64 *core_stats);
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index a001fae1a8d7..d3ad8ed8d901 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -757,6 +757,7 @@ static inline int efx_nic_irq_test_irq_cpu(struct efx_nic *efx)
757int efx_nic_flush_queues(struct efx_nic *efx); 757int efx_nic_flush_queues(struct efx_nic *efx);
758void siena_prepare_flush(struct efx_nic *efx); 758void siena_prepare_flush(struct efx_nic *efx);
759int efx_farch_fini_dmaq(struct efx_nic *efx); 759int efx_farch_fini_dmaq(struct efx_nic *efx);
760void efx_farch_finish_flr(struct efx_nic *efx);
760void siena_finish_flush(struct efx_nic *efx); 761void siena_finish_flush(struct efx_nic *efx);
761void falcon_start_nic_stats(struct efx_nic *efx); 762void falcon_start_nic_stats(struct efx_nic *efx);
762void falcon_stop_nic_stats(struct efx_nic *efx); 763void falcon_stop_nic_stats(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 23f3a6f7737a..50ffefed492c 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -921,6 +921,8 @@ const struct efx_nic_type siena_a0_nic_type = {
921 .fini_dmaq = efx_farch_fini_dmaq, 921 .fini_dmaq = efx_farch_fini_dmaq,
922 .prepare_flush = siena_prepare_flush, 922 .prepare_flush = siena_prepare_flush,
923 .finish_flush = siena_finish_flush, 923 .finish_flush = siena_finish_flush,
924 .prepare_flr = efx_port_dummy_op_void,
925 .finish_flr = efx_farch_finish_flr,
924 .describe_stats = siena_describe_nic_stats, 926 .describe_stats = siena_describe_nic_stats,
925 .update_stats = siena_update_nic_stats, 927 .update_stats = siena_update_nic_stats,
926 .start_stats = efx_mcdi_mac_start_stats, 928 .start_stats = efx_mcdi_mac_start_stats,