aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTuong Lien <tuong.t.lien@dektech.com.au>2019-02-11 01:29:43 -0500
committerDavid S. Miller <davem@davemloft.net>2019-02-12 00:26:20 -0500
commit91986ee166cf0816ae92668476ea7872d51b0c6e (patch)
tree62fc8c2a5726a7dfe3f0498cc4f613207c285553
parente75913c93f7cd5f338ab373c34c93a655bd309cb (diff)
tipc: fix link session and re-establish issues
When a link endpoint is re-created (e.g. after a node reboot or interface reset), the link session number is varied by random, the peer endpoint will be synced with this new session number before the link is re-established. However, there is a shortcoming in this mechanism that can lead to the link never re-established or faced with a failure then. It happens when the peer endpoint is ready in ESTABLISHING state, the 'peer_session' as well as the 'in_session' flag have been set, but suddenly this link endpoint leaves. When it comes back with a random session number, there are two situations possible: 1/ If the random session number is larger than (or equal to) the previous one, the peer endpoint will be updated with this new session upon receipt of a RESET_MSG from this endpoint, and the link can be re- established as normal. Otherwise, all the RESET_MSGs from this endpoint will be rejected by the peer. In turn, when this link endpoint receives one ACTIVATE_MSG from the peer, it will move to ESTABLISHED and start to send STATE_MSGs, but again these messages will be dropped by the peer due to wrong session. The peer link endpoint can still become ESTABLISHED after receiving a traffic message from this endpoint (e.g. a BCAST_PROTOCOL or NAME_DISTRIBUTOR), but since all the STATE_MSGs are invalid, the link will be forced down sooner or later! Even in case the random session number is larger than the previous one, it can be that the ACTIVATE_MSG from the peer arrives first, and this link endpoint moves quickly to ESTABLISHED without sending out any RESET_MSG yet. Consequently, the peer link will not be updated with the new session number, and the same link failure scenario as above will happen. 2/ Another situation can be that, the peer link endpoint was reset due to any reasons in the meantime, its link state was set to RESET from ESTABLISHING but still in session, i.e. the 'in_session' flag is not reset... Now, if the random session number from this endpoint is less than the previous one, all the RESET_MSGs from this endpoint will be rejected by the peer. In the other direction, when this link endpoint receives a RESET_MSG from the peer, it moves to ESTABLISHING and starts to send ACTIVATE_MSGs, but all these messages will be rejected by the peer too. As a result, the link cannot be re-established but gets stuck with this link endpoint in state ESTABLISHING and the peer in RESET! Solution: =========== This link endpoint should not go directly to ESTABLISHED when getting ACTIVATE_MSG from the peer which may belong to the old session if the link was re-created. To ensure the session to be correct before the link is re-established, the peer endpoint in ESTABLISHING state will send back the last session number in ACTIVATE_MSG for a verification at this endpoint. Then, if needed, a new and more appropriate session number will be regenerated to force a re-synch first. In addition, when a link in ESTABLISHING state is reset, its state will move to RESET according to the link FSM, along with resetting the 'in_session' flag (and the other data) as a normal link reset, it will also be deleted if requested. The solution is backward compatible. Acked-by: Jon Maloy <jon.maloy@ericsson.com> Acked-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/tipc/link.c15
-rw-r--r--net/tipc/msg.h22
-rw-r--r--net/tipc/node.c11
3 files changed, 43 insertions, 5 deletions
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 7c70034b1073..85ad5c0678d0 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1425,6 +1425,10 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1425 l->rcv_unacked = 0; 1425 l->rcv_unacked = 0;
1426 } else { 1426 } else {
1427 /* RESET_MSG or ACTIVATE_MSG */ 1427 /* RESET_MSG or ACTIVATE_MSG */
1428 if (mtyp == ACTIVATE_MSG) {
1429 msg_set_dest_session_valid(hdr, 1);
1430 msg_set_dest_session(hdr, l->peer_session);
1431 }
1428 msg_set_max_pkt(hdr, l->advertised_mtu); 1432 msg_set_max_pkt(hdr, l->advertised_mtu);
1429 strcpy(data, l->if_name); 1433 strcpy(data, l->if_name);
1430 msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME); 1434 msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME);
@@ -1642,6 +1646,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1642 rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); 1646 rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
1643 break; 1647 break;
1644 } 1648 }
1649
1650 /* If this endpoint was re-created while peer was ESTABLISHING
1651 * it doesn't know current session number. Force re-synch.
1652 */
1653 if (mtyp == ACTIVATE_MSG && msg_dest_session_valid(hdr) &&
1654 l->session != msg_dest_session(hdr)) {
1655 if (less(l->session, msg_dest_session(hdr)))
1656 l->session = msg_dest_session(hdr) + 1;
1657 break;
1658 }
1659
1645 /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ 1660 /* ACTIVATE_MSG serves as PEER_RESET if link is already down */
1646 if (mtyp == RESET_MSG || !link_is_up(l)) 1661 if (mtyp == RESET_MSG || !link_is_up(l))
1647 rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); 1662 rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index a0924956bb61..d7e4b8b93f9d 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -360,6 +360,28 @@ static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n)
360 msg_set_bits(m, 1, 0, 0xffff, n); 360 msg_set_bits(m, 1, 0, 0xffff, n);
361} 361}
362 362
363/* Note: reusing bits in word 1 for ACTIVATE_MSG only, to re-synch
364 * link peer session number
365 */
366static inline bool msg_dest_session_valid(struct tipc_msg *m)
367{
368 return msg_bits(m, 1, 16, 0x1);
369}
370
371static inline void msg_set_dest_session_valid(struct tipc_msg *m, bool valid)
372{
373 msg_set_bits(m, 1, 16, 0x1, valid);
374}
375
376static inline u16 msg_dest_session(struct tipc_msg *m)
377{
378 return msg_bits(m, 1, 0, 0xffff);
379}
380
381static inline void msg_set_dest_session(struct tipc_msg *m, u16 n)
382{
383 msg_set_bits(m, 1, 0, 0xffff, n);
384}
363 385
364/* 386/*
365 * Word 2 387 * Word 2
diff --git a/net/tipc/node.c b/net/tipc/node.c
index db2a6c3e0be9..2dc4919ab23c 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -830,15 +830,16 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
830 tipc_node_write_lock(n); 830 tipc_node_write_lock(n);
831 if (!tipc_link_is_establishing(l)) { 831 if (!tipc_link_is_establishing(l)) {
832 __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); 832 __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
833 if (delete) {
834 kfree(l);
835 le->link = NULL;
836 n->link_cnt--;
837 }
838 } else { 833 } else {
839 /* Defuse pending tipc_node_link_up() */ 834 /* Defuse pending tipc_node_link_up() */
835 tipc_link_reset(l);
840 tipc_link_fsm_evt(l, LINK_RESET_EVT); 836 tipc_link_fsm_evt(l, LINK_RESET_EVT);
841 } 837 }
838 if (delete) {
839 kfree(l);
840 le->link = NULL;
841 n->link_cnt--;
842 }
842 trace_tipc_node_link_down(n, true, "node link down or deleted!"); 843 trace_tipc_node_link_down(n, true, "node link down or deleted!");
843 tipc_node_write_unlock(n); 844 tipc_node_write_unlock(n);
844 if (delete) 845 if (delete)