aboutsummaryrefslogtreecommitdiffstats
path: root/net/tipc/node.c
diff options
context:
space:
mode:
authorLUU Duc Canh <canh.d.luu@dektech.com.au>2018-09-26 15:00:54 -0400
committerDavid S. Miller <davem@davemloft.net>2018-09-29 14:45:14 -0400
commitc140eb166d681f66bd7e99fb121357db1a503e7f (patch)
tree9eda87117d15507035fd046a9f9c3d610f0013fc /net/tipc/node.c
parent418b9a353a821f5d1787fd310d2af31232e9ff32 (diff)
tipc: fix failover problem
We see the following scenario: 1) Link endpoint B on node 1 discovers that its peer endpoint is gone. Since there is a second working link, failover procedure is started. 2) Link endpoint A on node 1 sends a FAILOVER message to peer endpoint A on node 2. The node item 1->2 goes to state FAILINGOVER. 3) Linke endpoint A/2 receives the failover, and is supposed to take down its parallell link endpoint B/2, while producing a FAILOVER message to send back to A/1. 4) However, B/2 has already been deleted, so no FAILOVER message can created. 5) Node 1->2 remains in state FAILINGOVER forever, refusing to receive any messages that can bring B/1 up again. We are left with a non- redundant link between node 1 and 2. We fix this with letting endpoint A/2 build a dummy FAILOVER message to send to back to A/1, so that the situation can be resolved. Signed-off-by: LUU Duc Canh <canh.d.luu@dektech.com.au> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/tipc/node.c')
-rw-r--r--net/tipc/node.c11
1 files changed, 11 insertions, 0 deletions
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 68014f1b6976..b0ee25f1f2e6 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -111,6 +111,7 @@ struct tipc_node {
111 int action_flags; 111 int action_flags;
112 struct list_head list; 112 struct list_head list;
113 int state; 113 int state;
114 bool failover_sent;
114 u16 sync_point; 115 u16 sync_point;
115 int link_cnt; 116 int link_cnt;
116 u16 working_links; 117 u16 working_links;
@@ -680,6 +681,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
680 *slot0 = bearer_id; 681 *slot0 = bearer_id;
681 *slot1 = bearer_id; 682 *slot1 = bearer_id;
682 tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); 683 tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT);
684 n->failover_sent = false;
683 n->action_flags |= TIPC_NOTIFY_NODE_UP; 685 n->action_flags |= TIPC_NOTIFY_NODE_UP;
684 tipc_link_set_active(nl, true); 686 tipc_link_set_active(nl, true);
685 tipc_bcast_add_peer(n->net, nl, xmitq); 687 tipc_bcast_add_peer(n->net, nl, xmitq);
@@ -1615,6 +1617,15 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
1615 tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), 1617 tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl),
1616 tipc_link_inputq(l)); 1618 tipc_link_inputq(l));
1617 } 1619 }
1620 /* If parallel link was already down, and this happened before
1621 * the tunnel link came up, FAILOVER was never sent. Ensure that
1622 * FAILOVER is sent to get peer out of NODE_FAILINGOVER state.
1623 */
1624 if (n->state != NODE_FAILINGOVER && !n->failover_sent) {
1625 tipc_link_create_dummy_tnl_msg(l, xmitq);
1626 n->failover_sent = true;
1627 }
1628
1618 /* If pkts arrive out of order, use lowest calculated syncpt */ 1629 /* If pkts arrive out of order, use lowest calculated syncpt */
1619 if (less(syncpt, n->sync_point)) 1630 if (less(syncpt, n->sync_point))
1620 n->sync_point = syncpt; 1631 n->sync_point = syncpt;