summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTuong Lien <tuong.t.lien@dektech.com.au>2019-05-02 06:23:23 -0400
committerDavid S. Miller <davem@davemloft.net>2019-05-04 00:59:51 -0400
commitc0b14a0854fab0a0164aabfe49a76aae9216fe97 (patch)
tree67ed783089967ded1d9578e615076e4d54255ba4
parentf24098f80748ea95d53603a7bb7954a41bb3ca1b (diff)
tipc: fix missing Name entries due to half-failover
TIPC link can temporarily fall into "half-establish" that only one of the link endpoints is ESTABLISHED and starts to send traffic, PROTOCOL messages, whereas the other link endpoint is not up (e.g. immediately when the endpoint receives ACTIVATE_MSG, the network interface goes down...). This is a normal situation and will be settled because the link endpoint will be eventually brought down after the link tolerance time. However, the situation will become worse when the second link is established before the first link endpoint goes down, For example: 1. Both links <1A-2A>, <1B-2B> down 2. Link endpoint 2A up, but 1A still down (e.g. due to network disturbance, wrong session, etc.) 3. Link <1B-2B> up 4. Link endpoint 2A down (e.g. due to link tolerance timeout) 5. Node B starts failover onto link <1B-2B> ==> Node A does never start link failover. When the "half-failover" situation happens, two consequences have been observed: a) Peer link/node gets stuck in FAILINGOVER state; b) Traffic or user messages that peer node is trying to failover onto the second link can be partially or completely dropped by this node. The consequence a) was actually solved by commit c140eb166d68 ("tipc: fix failover problem"), but that commit didn't cover the b). It's due to the fact that the tunnel link endpoint has never been prepared for a failover, so the 'l->drop_point' (and the other data...) is not set correctly. When a TUNNEL_MSG from peer node arrives on the link, depending on the inner message's seqno and the current 'l->drop_point' value, the message can be dropped (- treated as a duplicate message) or processed. At this early stage, the traffic messages from peer are likely to be NAME_DISTRIBUTORs, this means some name table entries will be missed on the node forever! The commit resolves the issue by starting the FAILOVER process on this node as well. Another benefit from this solution is that we ensure the link will not be re-established until the failover ends. Acked-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/tipc/link.c35
-rw-r--r--net/tipc/link.h2
-rw-r--r--net/tipc/node.c54
3 files changed, 84 insertions, 7 deletions
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1c514b64a0a9..f5cd986e1e50 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1705,6 +1705,41 @@ tnl:
1705 } 1705 }
1706} 1706}
1707 1707
1708/**
1709 * tipc_link_failover_prepare() - prepare tnl for link failover
1710 *
1711 * This is a special version of the precursor - tipc_link_tnl_prepare(),
1712 * see the tipc_node_link_failover() for details
1713 *
1714 * @l: failover link
1715 * @tnl: tunnel link
1716 * @xmitq: queue for messages to be xmited
1717 */
1718void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
1719 struct sk_buff_head *xmitq)
1720{
1721 struct sk_buff_head *fdefq = &tnl->failover_deferdq;
1722
1723 tipc_link_create_dummy_tnl_msg(tnl, xmitq);
1724
1725 /* This failover link enpoint was never established before,
1726 * so it has not received anything from peer.
1727 * Otherwise, it must be a normal failover situation or the
1728 * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes
1729 * would have to start over from scratch instead.
1730 */
1731 WARN_ON(l && tipc_link_is_up(l));
1732 tnl->drop_point = 1;
1733 tnl->failover_reasm_skb = NULL;
1734
1735 /* Initiate the link's failover deferdq */
1736 if (unlikely(!skb_queue_empty(fdefq))) {
1737 pr_warn("Link failover deferdq not empty: %d!\n",
1738 skb_queue_len(fdefq));
1739 __skb_queue_purge(fdefq);
1740 }
1741}
1742
1708/* tipc_link_validate_msg(): validate message against current link state 1743/* tipc_link_validate_msg(): validate message against current link state
1709 * Returns true if message should be accepted, otherwise false 1744 * Returns true if message should be accepted, otherwise false
1710 */ 1745 */
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 8439e0ee53a8..adcad65e761c 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -90,6 +90,8 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
90 int mtyp, struct sk_buff_head *xmitq); 90 int mtyp, struct sk_buff_head *xmitq);
91void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl, 91void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl,
92 struct sk_buff_head *xmitq); 92 struct sk_buff_head *xmitq);
93void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
94 struct sk_buff_head *xmitq);
93void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); 95void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
94int tipc_link_fsm_evt(struct tipc_link *l, int evt); 96int tipc_link_fsm_evt(struct tipc_link *l, int evt);
95bool tipc_link_is_up(struct tipc_link *l); 97bool tipc_link_is_up(struct tipc_link *l);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 0eb1bf850219..9e106d3ed187 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -714,7 +714,6 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
714 *slot0 = bearer_id; 714 *slot0 = bearer_id;
715 *slot1 = bearer_id; 715 *slot1 = bearer_id;
716 tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); 716 tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT);
717 n->failover_sent = false;
718 n->action_flags |= TIPC_NOTIFY_NODE_UP; 717 n->action_flags |= TIPC_NOTIFY_NODE_UP;
719 tipc_link_set_active(nl, true); 718 tipc_link_set_active(nl, true);
720 tipc_bcast_add_peer(n->net, nl, xmitq); 719 tipc_bcast_add_peer(n->net, nl, xmitq);
@@ -757,6 +756,45 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id,
757} 756}
758 757
759/** 758/**
759 * tipc_node_link_failover() - start failover in case "half-failover"
760 *
761 * This function is only called in a very special situation where link
762 * failover can be already started on peer node but not on this node.
763 * This can happen when e.g.
764 * 1. Both links <1A-2A>, <1B-2B> down
765 * 2. Link endpoint 2A up, but 1A still down (e.g. due to network
766 * disturbance, wrong session, etc.)
767 * 3. Link <1B-2B> up
768 * 4. Link endpoint 2A down (e.g. due to link tolerance timeout)
769 * 5. Node B starts failover onto link <1B-2B>
770 *
771 * ==> Node A does never start link/node failover!
772 *
773 * @n: tipc node structure
774 * @l: link peer endpoint failingover (- can be NULL)
775 * @tnl: tunnel link
776 * @xmitq: queue for messages to be xmited on tnl link later
777 */
778static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l,
779 struct tipc_link *tnl,
780 struct sk_buff_head *xmitq)
781{
782 /* Avoid to be "self-failover" that can never end */
783 if (!tipc_link_is_up(tnl))
784 return;
785
786 tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT);
787 tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
788
789 n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1);
790 tipc_link_failover_prepare(l, tnl, xmitq);
791
792 if (l)
793 tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
794 tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT);
795}
796
797/**
760 * __tipc_node_link_down - handle loss of link 798 * __tipc_node_link_down - handle loss of link
761 */ 799 */
762static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, 800static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
@@ -1675,14 +1713,16 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
1675 tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), 1713 tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl),
1676 tipc_link_inputq(l)); 1714 tipc_link_inputq(l));
1677 } 1715 }
1716
1678 /* If parallel link was already down, and this happened before 1717 /* If parallel link was already down, and this happened before
1679 * the tunnel link came up, FAILOVER was never sent. Ensure that 1718 * the tunnel link came up, node failover was never started.
1680 * FAILOVER is sent to get peer out of NODE_FAILINGOVER state. 1719 * Ensure that a FAILOVER_MSG is sent to get peer out of
1720 * NODE_FAILINGOVER state, also this node must accept
1721 * TUNNEL_MSGs from peer.
1681 */ 1722 */
1682 if (n->state != NODE_FAILINGOVER && !n->failover_sent) { 1723 if (n->state != NODE_FAILINGOVER)
1683 tipc_link_create_dummy_tnl_msg(l, xmitq); 1724 tipc_node_link_failover(n, pl, l, xmitq);
1684 n->failover_sent = true; 1725
1685 }
1686 /* If pkts arrive out of order, use lowest calculated syncpt */ 1726 /* If pkts arrive out of order, use lowest calculated syncpt */
1687 if (less(syncpt, n->sync_point)) 1727 if (less(syncpt, n->sync_point))
1688 n->sync_point = syncpt; 1728 n->sync_point = syncpt;