summaryrefslogtreecommitdiffstats
path: root/net/tipc
diff options
context:
space:
mode:
authorTuong Lien <tuong.t.lien@dektech.com.au>2019-06-17 00:56:12 -0400
committerDavid S. Miller <davem@davemloft.net>2019-06-18 13:03:37 -0400
commitd0f84d0856c11fbafadae3d580f6a9c98d818ccd (patch)
tree87f3ab42b8084f0eda9991f31d90f5d1364141b8 /net/tipc
parentcf18cecca911c0db96b868072665347efe6df46f (diff)
tipc: fix issues with early FAILOVER_MSG from peer
It appears that a FAILOVER_MSG can come from peer even when the failure link is resetting (i.e. just after the 'node_write_unlock()'...). This means the failover procedure on the node has not been started yet. The situation is as follows: node1 node2 linkb linka linka linkb | | | | | | x failure | | | RESETTING | | | | | | x failure RESET | | RESETTING FAILINGOVER | | | (FAILOVER_MSG) | | |<-------------------------------------------------| | *FAILINGOVER | | | | | (dummy FAILOVER_MSG) | | |------------------------------------------------->| | RESET | | FAILOVER_END | FAILINGOVER RESET | . . . . . . . . . . . . Once this happens, the link failover procedure will be triggered wrongly on the receiving node since the node isn't in FAILINGOVER state but then another link failover will be carried out. The consequences are: 1) A peer might get stuck in FAILINGOVER state because the 'sync_point' was set, reset and set incorrectly, the criteria to end the failover would not be met, it could keep waiting for a message that has already received. 2) The early FAILOVER_MSG(s) could be queued in the link failover deferdq but would be purged or not pulled out because the 'drop_point' was not set correctly. 3) The early FAILOVER_MSG(s) could be dropped too. 4) The dummy FAILOVER_MSG could make the peer leaving FAILINGOVER state shortly, but later on it would be restarted. The same situation can also happen when the link is in PEER_RESET state and a FAILOVER_MSG arrives. The commit resolves the issues by forcing the link down immediately, so the failover procedure will be started normally (which is the same as when receiving a FAILOVER_MSG and the link is in up state). Also, the function "tipc_node_link_failover()" is toughen to avoid such a situation from happening. Acked-by: Jon Maloy <jon.maloy@ericsson.se> Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/tipc')
-rw-r--r--net/tipc/link.c1
-rw-r--r--net/tipc/node.c10
2 files changed, 7 insertions, 4 deletions
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f5cd986e1e50..2050fd386642 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1728,7 +1728,6 @@ void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
1728 * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes 1728 * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes
1729 * would have to start over from scratch instead. 1729 * would have to start over from scratch instead.
1730 */ 1730 */
1731 WARN_ON(l && tipc_link_is_up(l));
1732 tnl->drop_point = 1; 1731 tnl->drop_point = 1;
1733 tnl->failover_reasm_skb = NULL; 1732 tnl->failover_reasm_skb = NULL;
1734 1733
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9e106d3ed187..550581d47d51 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -766,9 +766,9 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id,
766 * disturbance, wrong session, etc.) 766 * disturbance, wrong session, etc.)
767 * 3. Link <1B-2B> up 767 * 3. Link <1B-2B> up
768 * 4. Link endpoint 2A down (e.g. due to link tolerance timeout) 768 * 4. Link endpoint 2A down (e.g. due to link tolerance timeout)
769 * 5. Node B starts failover onto link <1B-2B> 769 * 5. Node 2 starts failover onto link <1B-2B>
770 * 770 *
771 * ==> Node A does never start link/node failover! 771 * ==> Node 1 does never start link/node failover!
772 * 772 *
773 * @n: tipc node structure 773 * @n: tipc node structure
774 * @l: link peer endpoint failingover (- can be NULL) 774 * @l: link peer endpoint failingover (- can be NULL)
@@ -783,6 +783,10 @@ static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l,
783 if (!tipc_link_is_up(tnl)) 783 if (!tipc_link_is_up(tnl))
784 return; 784 return;
785 785
786 /* Don't rush, failure link may be in the process of resetting */
787 if (l && !tipc_link_is_reset(l))
788 return;
789
786 tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); 790 tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT);
787 tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); 791 tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
788 792
@@ -1706,7 +1710,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
1706 /* Initiate or update failover mode if applicable */ 1710 /* Initiate or update failover mode if applicable */
1707 if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) { 1711 if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) {
1708 syncpt = oseqno + exp_pkts - 1; 1712 syncpt = oseqno + exp_pkts - 1;
1709 if (pl && tipc_link_is_up(pl)) { 1713 if (pl && !tipc_link_is_reset(pl)) {
1710 __tipc_node_link_down(n, &pb_id, xmitq, &maddr); 1714 __tipc_node_link_down(n, &pb_id, xmitq, &maddr);
1711 trace_tipc_node_link_down(n, true, 1715 trace_tipc_node_link_down(n, true,
1712 "node link down <- failover!"); 1716 "node link down <- failover!");