[PATCH] iseries_veth: Try to avoid pathological reset behaviour

The iseries_veth driver contains a state machine which is used to manage how connections are setup and neogotiated between LPARs. If one side of a connection resets for some reason, the two LPARs can get stuck in a race to re-setup the connection. This can lead to the connection being declared dead by one or both ends. In practice the connection is declared dead by one or both ends approximately 8/10 times a connection is reset, although it is rare for connections to be reset. (an example here: http://michael.ellerman.id.au/files/misc/veth-trace.html) The core of the problem is that the end that resets the connection doesn't wait for the other end to become aware of the reset. So the resetting end starts setting the connection back up, and then receives a reset from the other end (which is the response to the initial reset). And so on. We're severely limited in what we can do to fix this. The protocol between LPARs is essentially fixed, as we have to interoperate with both OS/400 and old Linux drivers. Which also means we need a fix that only changes the code on one end. The only fix I've found given that, is to just blindly sleep for a bit when resetting the connection, in the hope that the other end will get itself sorted. Needless to say I'd love it if someone has a better idea. This does work, I've so far been unable to get it to break, whereas without the fix a reset of one end will lead to a dead connection ~8/10 times. Signed-off-by: Michael Ellerman <michael@ellerman.id.au> Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
author: Michael Ellerman <michael@ellerman.id.au> 2005-08-31 21:29:00 -0400
committer: Jeff Garzik <jgarzik@pobox.com> 2005-08-31 22:37:56 -0400
commit: 58c5900bdaffbf76afd7ad5e053410cb95eb3169 (patch)
tree: 1d6e6eba8392f496f9ac2f632fdda8aa48b0b732 /drivers/net
parent: abfda4719c61550be4efaf277d4a904a7930d410 (diff)
1 files changed, 23 insertions, 2 deletions
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index c19b32e0a5ad..db83b0d31327 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -324,8 +324,14 @@ static void veth_take_monitor_ack(struct veth_lpar_connection *cnx,
        spin_lock_irqsave(&cnx->lock, flags);
        veth_debug("cnx %d: lost connection.\n", cnx->remote_lp);
-        cnx->state |= VETH_STATE_RESET;
-        veth_kick_statemachine(cnx);
+        /* Avoid kicking the statemachine once we're shutdown.
+         * It's unnecessary and it could break veth_stop_connection(). */
+        if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
+                cnx->state |= VETH_STATE_RESET;
+                veth_kick_statemachine(cnx);
+        }
        spin_unlock_irqrestore(&cnx->lock, flags);
 }
@@ -483,6 +489,12 @@ static void veth_statemachine(void *p)
                if (cnx->state & VETH_STATE_RESET)
                        goto restart;
+                /* Hack, wait for the other end to reset itself. */
+                if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
+                        schedule_delayed_work(&cnx->statemachine_wq, 5 * HZ);
+                        goto out;
+                }
        }
        if (cnx->state & VETH_STATE_SHUTDOWN)
@@ -667,6 +679,15 @@ static void veth_stop_connection(u8 rlp)
        veth_kick_statemachine(cnx);
        spin_unlock_irq(&cnx->lock);
+        /* There's a slim chance the reset code has just queued the
+         * statemachine to run in five seconds. If so we need to cancel
+         * that and requeue the work to run now. */
+        if (cancel_delayed_work(&cnx->statemachine_wq)) {
+                spin_lock_irq(&cnx->lock);
+                veth_kick_statemachine(cnx);
+                spin_unlock_irq(&cnx->lock);
+        }
        /* Wait for the state machine to run. */
        flush_scheduled_work();
author	Michael Ellerman <michael@ellerman.id.au>	2005-08-31 21:29:00 -0400
committer	Jeff Garzik <jgarzik@pobox.com>	2005-08-31 22:37:56 -0400
commit	58c5900bdaffbf76afd7ad5e053410cb95eb3169 (patch)
tree	1d6e6eba8392f496f9ac2f632fdda8aa48b0b732 /drivers/net
parent	abfda4719c61550be4efaf277d4a904a7930d410 (diff)

diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index c19b32e0a5ad..db83b0d31327 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c
@@ -324,8 +324,14 @@ static void veth_take_monitor_ack(struct veth_lpar_connection *cnx,
324		324
325	spin_lock_irqsave(&cnx->lock, flags);	325	spin_lock_irqsave(&cnx->lock, flags);
326	veth_debug("cnx %d: lost connection.\n", cnx->remote_lp);	326	veth_debug("cnx %d: lost connection.\n", cnx->remote_lp);
327	cnx->state \|= VETH_STATE_RESET;	327
328	veth_kick_statemachine(cnx);	328	/* Avoid kicking the statemachine once we're shutdown.
		329	* It's unnecessary and it could break veth_stop_connection(). */
		330
		331	if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
		332	cnx->state \|= VETH_STATE_RESET;
		333	veth_kick_statemachine(cnx);
		334	}
329	spin_unlock_irqrestore(&cnx->lock, flags);	335	spin_unlock_irqrestore(&cnx->lock, flags);
330	}	336	}
331		337
@@ -483,6 +489,12 @@ static void veth_statemachine(void *p)
483		489
484	if (cnx->state & VETH_STATE_RESET)	490	if (cnx->state & VETH_STATE_RESET)
485	goto restart;	491	goto restart;
		492
		493	/* Hack, wait for the other end to reset itself. */
		494	if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
		495	schedule_delayed_work(&cnx->statemachine_wq, 5 * HZ);
		496	goto out;
		497	}
486	}	498	}
487		499
488	if (cnx->state & VETH_STATE_SHUTDOWN)	500	if (cnx->state & VETH_STATE_SHUTDOWN)
@@ -667,6 +679,15 @@ static void veth_stop_connection(u8 rlp)
667	veth_kick_statemachine(cnx);	679	veth_kick_statemachine(cnx);
668	spin_unlock_irq(&cnx->lock);	680	spin_unlock_irq(&cnx->lock);
669		681
		682	/* There's a slim chance the reset code has just queued the
		683	* statemachine to run in five seconds. If so we need to cancel
		684	* that and requeue the work to run now. */
		685	if (cancel_delayed_work(&cnx->statemachine_wq)) {
		686	spin_lock_irq(&cnx->lock);
		687	veth_kick_statemachine(cnx);
		688	spin_unlock_irq(&cnx->lock);
		689	}
		690
670	/* Wait for the state machine to run. */	691	/* Wait for the state machine to run. */
671	flush_scheduled_work();	692	flush_scheduled_work();
672		693