aboutsummaryrefslogtreecommitdiffstats
path: root/net/tipc
diff options
context:
space:
mode:
authorJon Paul Maloy <jon.maloy@ericsson.com>2015-07-16 16:54:30 -0400
committerDavid S. Miller <davem@davemloft.net>2015-07-20 23:41:16 -0400
commit1a20cc254e60e79929ef7edb5cf784df86b46e42 (patch)
tree20ace0bcaf31803fe6ffbd048df89be9bb26b8d6 /net/tipc
parent8a1577c96f122308ac9b5f195f9f9a7dd74ac541 (diff)
tipc: introduce node contact FSM
The logics for determining when a node is permitted to establish and maintain contact with its peer node becomes non-trivial in the presence of multiple parallel links that may come and go independently. A known failure scenario is that one endpoint registers both its links to the peer lost, cleans up it binding table, and prepares for a table update once contact is re-establihed, while the other endpoint may see its links reset and re-established one by one, hence seeing no need to re-synchronize the binding table. To avoid this, a node must not allow re-establishing contact until it has confirmation that even the peer has lost both links. Currently, the mechanism for handling this consists of setting and resetting two state flags from different locations in the code. This solution is hard to understand and maintain. A closer analysis even reveals that it is not completely safe. In this commit we do instead introduce an FSM that keeps track of the conditions for when the node can establish and maintain links. It has six states and four events, and is strictly based on explicit knowledge about the own node's and the peer node's contact states. Only events leading to state change are shown as edges in the figure below. +--------------+ | SELF_UP/ | +---------------->| PEER_COMING |-----------------+ SELF_ | +--------------+ |PEER_ ESTBL_ | | |ESTBL_ CONTACT| SELF_LOST_CONTACT | |CONTACT | v | | +--------------+ | | PEER_ | SELF_DOWN/ | SELF_ | | LOST_ +--| PEER_LEAVING |<--+ LOST_ v +-------------+ CONTACT | +--------------+ | CONTACT +-----------+ | SELF_DOWN/ |<----------+ +----------| SELF_UP/ | | PEER_DOWN |<----------+ +----------| PEER_UP | +-------------+ SELF_ | +--------------+ | PEER_ +-----------+ | LOST_ +--| SELF_LEAVING/|<--+ LOST_ A | CONTACT | PEER_DOWN | CONTACT | | +--------------+ | | A | PEER_ | PEER_LOST_CONTACT | |SELF_ ESTBL_ | | |ESTBL_ CONTACT| +--------------+ |CONTACT +---------------->| PEER_UP/ |-----------------+ | SELF_COMING | +--------------+ Reviewed-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/tipc')
-rw-r--r--net/tipc/link.c74
-rw-r--r--net/tipc/msg.h7
-rw-r--r--net/tipc/node.c130
-rw-r--r--net/tipc/node.h28
4 files changed, 185 insertions, 54 deletions
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 5b4609bd0ddc..eaccf4552d15 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -911,9 +911,13 @@ static void link_retransmit_failure(struct tipc_link *l_ptr,
911 911
912 if (l_ptr->addr) { 912 if (l_ptr->addr) {
913 /* Handle failure on standard link */ 913 /* Handle failure on standard link */
914 link_print(l_ptr, "Resetting link\n"); 914 link_print(l_ptr, "Resetting link ");
915 pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
916 msg_user(msg), msg_type(msg), msg_size(msg),
917 msg_errcode(msg));
918 pr_info("sqno %u, prev: %x, src: %x\n",
919 msg_seqno(msg), msg_prevnode(msg), msg_orignode(msg));
915 tipc_link_reset(l_ptr); 920 tipc_link_reset(l_ptr);
916
917 } else { 921 } else {
918 /* Handle failure on broadcast link */ 922 /* Handle failure on broadcast link */
919 struct tipc_node *n_ptr; 923 struct tipc_node *n_ptr;
@@ -1067,15 +1071,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
1067 if (unlikely(!l_ptr)) 1071 if (unlikely(!l_ptr))
1068 goto unlock; 1072 goto unlock;
1069 1073
1070 /* Verify that communication with node is currently allowed */ 1074 /* Is reception of this pkt permitted at the moment ? */
1071 if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) && 1075 if (!tipc_node_filter_skb(n_ptr, msg))
1072 msg_user(msg) == LINK_PROTOCOL &&
1073 (msg_type(msg) == RESET_MSG ||
1074 msg_type(msg) == ACTIVATE_MSG) &&
1075 !msg_redundant_link(msg))
1076 n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN;
1077
1078 if (tipc_node_blocked(n_ptr))
1079 goto unlock; 1076 goto unlock;
1080 1077
1081 /* Validate message sequence number info */ 1078 /* Validate message sequence number info */
@@ -1371,15 +1368,6 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr,
1371 if (less_eq(msg_session(msg), l_ptr->peer_session)) 1368 if (less_eq(msg_session(msg), l_ptr->peer_session))
1372 break; /* duplicate or old reset: ignore */ 1369 break; /* duplicate or old reset: ignore */
1373 } 1370 }
1374
1375 if (!msg_redundant_link(msg) && (link_working(l_ptr) ||
1376 link_probing(l_ptr))) {
1377 /* peer has lost contact -- don't allow peer's links
1378 * to reactivate before we recognize loss & clean up
1379 */
1380 l_ptr->owner->action_flags |= TIPC_WAIT_OWN_LINKS_DOWN;
1381 }
1382
1383 link_state_event(l_ptr, RESET_MSG); 1371 link_state_event(l_ptr, RESET_MSG);
1384 1372
1385 /* fall thru' */ 1373 /* fall thru' */
@@ -1408,6 +1396,8 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr,
1408 l_ptr->peer_session = msg_session(msg); 1396 l_ptr->peer_session = msg_session(msg);
1409 l_ptr->peer_bearer_id = msg_bearer_id(msg); 1397 l_ptr->peer_bearer_id = msg_bearer_id(msg);
1410 1398
1399 if (!msg_peer_is_up(msg))
1400 tipc_node_fsm_evt(l_ptr->owner, PEER_LOST_CONTACT_EVT);
1411 if (msg_type(msg) == ACTIVATE_MSG) 1401 if (msg_type(msg) == ACTIVATE_MSG)
1412 link_state_event(l_ptr, ACTIVATE_MSG); 1402 link_state_event(l_ptr, ACTIVATE_MSG);
1413 break; 1403 break;
@@ -1419,11 +1409,11 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr,
1419 1409
1420 if (msg_linkprio(msg) && 1410 if (msg_linkprio(msg) &&
1421 (msg_linkprio(msg) != l_ptr->priority)) { 1411 (msg_linkprio(msg) != l_ptr->priority)) {
1422 pr_debug("%s<%s>, priority change %u->%u\n", 1412 pr_info("%s<%s>, priority change %u->%u\n",
1423 link_rst_msg, l_ptr->name, 1413 link_rst_msg, l_ptr->name,
1424 l_ptr->priority, msg_linkprio(msg)); 1414 l_ptr->priority, msg_linkprio(msg));
1425 l_ptr->priority = msg_linkprio(msg); 1415 l_ptr->priority = msg_linkprio(msg);
1426 tipc_link_reset(l_ptr); /* Enforce change to take effect */ 1416 tipc_link_reset(l_ptr);
1427 break; 1417 break;
1428 } 1418 }
1429 1419
@@ -1446,15 +1436,18 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr,
1446 tipc_bclink_update_link_state(l_ptr->owner, 1436 tipc_bclink_update_link_state(l_ptr->owner,
1447 msg_last_bcast(msg)); 1437 msg_last_bcast(msg));
1448 1438
1449 if (rec_gap || (msg_probe(msg))) { 1439 if (rec_gap || (msg_probe(msg)))
1450 tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 1440 tipc_link_proto_xmit(l_ptr, STATE_MSG, 0,
1451 rec_gap, 0, 0); 1441 rec_gap, 0, 0);
1452 } 1442
1453 if (msg_seq_gap(msg)) { 1443 if (msg_seq_gap(msg)) {
1454 l_ptr->stats.recv_nacks++; 1444 l_ptr->stats.recv_nacks++;
1455 tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->transmq), 1445 tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->transmq),
1456 msg_seq_gap(msg)); 1446 msg_seq_gap(msg));
1457 } 1447 }
1448 if (tipc_link_is_up(l_ptr))
1449 tipc_node_fsm_evt(l_ptr->owner,
1450 PEER_ESTABL_CONTACT_EVT);
1458 break; 1451 break;
1459 } 1452 }
1460exit: 1453exit:
@@ -1478,10 +1471,6 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1478 if (l->exec_mode == TIPC_LINK_BLOCKED) 1471 if (l->exec_mode == TIPC_LINK_BLOCKED)
1479 return; 1472 return;
1480 1473
1481 /* Abort non-RESET send if communication with node is prohibited */
1482 if ((tipc_node_blocked(l->owner)) && (mtyp != RESET_MSG))
1483 return;
1484
1485 msg_set_type(hdr, mtyp); 1474 msg_set_type(hdr, mtyp);
1486 msg_set_net_plane(hdr, l->net_plane); 1475 msg_set_net_plane(hdr, l->net_plane);
1487 msg_set_bcast_ack(hdr, l->owner->bclink.last_in); 1476 msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
@@ -1799,27 +1788,28 @@ static void link_reset_statistics(struct tipc_link *l_ptr)
1799 l_ptr->stats.recv_info = l_ptr->rcv_nxt; 1788 l_ptr->stats.recv_info = l_ptr->rcv_nxt;
1800} 1789}
1801 1790
1802static void link_print(struct tipc_link *l_ptr, const char *str) 1791static void link_print(struct tipc_link *l, const char *str)
1803{ 1792{
1804 struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id); 1793 struct sk_buff *hskb = skb_peek(&l->transmq);
1805 struct tipc_bearer *b_ptr; 1794 u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt;
1795 u16 tail = l->snd_nxt - 1;
1806 1796
1807 rcu_read_lock(); 1797 pr_info("%s Link <%s>:", str, l->name);
1808 b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]);
1809 if (b_ptr)
1810 pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name);
1811 rcu_read_unlock();
1812 1798
1813 if (link_probing(l_ptr)) 1799 if (link_probing(l))
1814 pr_cont(":P\n"); 1800 pr_cont(":P\n");
1815 else if (link_establishing(l_ptr)) 1801 else if (link_establishing(l))
1816 pr_cont(":E\n"); 1802 pr_cont(":E\n");
1817 else if (link_resetting(l_ptr)) 1803 else if (link_resetting(l))
1818 pr_cont(":R\n"); 1804 pr_cont(":R\n");
1819 else if (link_working(l_ptr)) 1805 else if (link_working(l))
1820 pr_cont(":W\n"); 1806 pr_cont(":W\n");
1821 else 1807 else
1822 pr_cont("\n"); 1808 pr_cont("\n");
1809
1810 pr_info("XMTQ: %u [%u-%u], BKLGQ: %u, SNDNX: %u, RCVNX: %u\n",
1811 skb_queue_len(&l->transmq), head, tail,
1812 skb_queue_len(&l->backlogq), l->snd_nxt, l->rcv_nxt);
1823} 1813}
1824 1814
1825/* Parse and validate nested (link) properties valid for media, bearer and link 1815/* Parse and validate nested (link) properties valid for media, bearer and link
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 19c45fb66238..4dc66d9f69cc 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -766,6 +766,13 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
766 msg_set_bits(m, 9, 0, 0xffff, n); 766 msg_set_bits(m, 9, 0, 0xffff, n);
767} 767}
768 768
769static inline bool msg_peer_is_up(struct tipc_msg *m)
770{
771 if (likely(msg_user(m) != LINK_PROTOCOL) || (msg_type(m) == STATE_MSG))
772 return true;
773 return msg_redundant_link(m);
774}
775
769struct sk_buff *tipc_buf_acquire(u32 size); 776struct sk_buff *tipc_buf_acquire(u32 size);
770bool tipc_msg_validate(struct sk_buff *skb); 777bool tipc_msg_validate(struct sk_buff *skb);
771bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, 778bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 77effb233725..9dbbb5de287b 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -141,7 +141,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr)
141 break; 141 break;
142 } 142 }
143 list_add_tail_rcu(&n_ptr->list, &temp_node->list); 143 list_add_tail_rcu(&n_ptr->list, &temp_node->list);
144 n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; 144 n_ptr->state = SELF_DOWN_PEER_DOWN;
145 n_ptr->signature = INVALID_NODE_SIG; 145 n_ptr->signature = INVALID_NODE_SIG;
146 n_ptr->active_links[0] = INVALID_BEARER_ID; 146 n_ptr->active_links[0] = INVALID_BEARER_ID;
147 n_ptr->active_links[1] = INVALID_BEARER_ID; 147 n_ptr->active_links[1] = INVALID_BEARER_ID;
@@ -421,8 +421,131 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
421 } 421 }
422} 422}
423 423
424/* tipc_node_fsm_evt - node finite state machine
425 * Determines when contact is allowed with peer node
426 */
427void tipc_node_fsm_evt(struct tipc_node *n, int evt)
428{
429 int state = n->state;
430
431 switch (state) {
432 case SELF_DOWN_PEER_DOWN:
433 switch (evt) {
434 case SELF_ESTABL_CONTACT_EVT:
435 state = SELF_UP_PEER_COMING;
436 break;
437 case PEER_ESTABL_CONTACT_EVT:
438 state = SELF_COMING_PEER_UP;
439 break;
440 case SELF_LOST_CONTACT_EVT:
441 case PEER_LOST_CONTACT_EVT:
442 break;
443 default:
444 pr_err("Unknown node fsm evt %x/%x\n", state, evt);
445 }
446 break;
447 case SELF_UP_PEER_UP:
448 switch (evt) {
449 case SELF_LOST_CONTACT_EVT:
450 state = SELF_DOWN_PEER_LEAVING;
451 break;
452 case PEER_LOST_CONTACT_EVT:
453 state = SELF_LEAVING_PEER_DOWN;
454 break;
455 case SELF_ESTABL_CONTACT_EVT:
456 case PEER_ESTABL_CONTACT_EVT:
457 break;
458 default:
459 pr_err("Unknown node fsm evt %x/%x\n", state, evt);
460 }
461 break;
462 case SELF_DOWN_PEER_LEAVING:
463 switch (evt) {
464 case PEER_LOST_CONTACT_EVT:
465 state = SELF_DOWN_PEER_DOWN;
466 break;
467 case SELF_ESTABL_CONTACT_EVT:
468 case PEER_ESTABL_CONTACT_EVT:
469 case SELF_LOST_CONTACT_EVT:
470 break;
471 default:
472 pr_err("Unknown node fsm evt %x/%x\n", state, evt);
473 }
474 break;
475 case SELF_UP_PEER_COMING:
476 switch (evt) {
477 case PEER_ESTABL_CONTACT_EVT:
478 state = SELF_UP_PEER_UP;
479 break;
480 case SELF_LOST_CONTACT_EVT:
481 state = SELF_DOWN_PEER_LEAVING;
482 break;
483 case SELF_ESTABL_CONTACT_EVT:
484 case PEER_LOST_CONTACT_EVT:
485 break;
486 default:
487 pr_err("Unknown node fsm evt %x/%x\n", state, evt);
488 }
489 break;
490 case SELF_COMING_PEER_UP:
491 switch (evt) {
492 case SELF_ESTABL_CONTACT_EVT:
493 state = SELF_UP_PEER_UP;
494 break;
495 case PEER_LOST_CONTACT_EVT:
496 state = SELF_LEAVING_PEER_DOWN;
497 break;
498 case SELF_LOST_CONTACT_EVT:
499 case PEER_ESTABL_CONTACT_EVT:
500 break;
501 default:
502 pr_err("Unknown node fsm evt %x/%x\n", state, evt);
503 }
504 break;
505 case SELF_LEAVING_PEER_DOWN:
506 switch (evt) {
507 case SELF_LOST_CONTACT_EVT:
508 state = SELF_DOWN_PEER_DOWN;
509 break;
510 case SELF_ESTABL_CONTACT_EVT:
511 case PEER_ESTABL_CONTACT_EVT:
512 case PEER_LOST_CONTACT_EVT:
513 break;
514 default:
515 pr_err("Unknown node fsm evt %x/%x\n", state, evt);
516 }
517 break;
518 default:
519 pr_err("Unknown node fsm state %x\n", state);
520 break;
521 }
522
523 n->state = state;
524}
525
526bool tipc_node_filter_skb(struct tipc_node *n, struct tipc_msg *hdr)
527{
528 int state = n->state;
529
530 if (likely(state == SELF_UP_PEER_UP))
531 return true;
532 if (state == SELF_DOWN_PEER_DOWN)
533 return true;
534 if (state == SELF_UP_PEER_COMING)
535 return true;
536 if (state == SELF_COMING_PEER_UP)
537 return true;
538 if (state == SELF_LEAVING_PEER_DOWN)
539 return false;
540 if (state == SELF_DOWN_PEER_LEAVING)
541 if (!msg_peer_is_up(hdr))
542 return true;
543 return false;
544}
545
424static void node_established_contact(struct tipc_node *n_ptr) 546static void node_established_contact(struct tipc_node *n_ptr)
425{ 547{
548 tipc_node_fsm_evt(n_ptr, SELF_ESTABL_CONTACT_EVT);
426 n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP; 549 n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP;
427 n_ptr->bclink.oos_state = 0; 550 n_ptr->bclink.oos_state = 0;
428 n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net); 551 n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net);
@@ -468,11 +591,8 @@ static void node_lost_contact(struct tipc_node *n_ptr)
468 l_ptr->failover_skb = NULL; 591 l_ptr->failover_skb = NULL;
469 tipc_link_reset_fragments(l_ptr); 592 tipc_link_reset_fragments(l_ptr);
470 } 593 }
471
472 n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN;
473
474 /* Prevent re-contact with node until cleanup is done */ 594 /* Prevent re-contact with node until cleanup is done */
475 n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN; 595 tipc_node_fsm_evt(n_ptr, SELF_LOST_CONTACT_EVT);
476 596
477 /* Notify publications from this node */ 597 /* Notify publications from this node */
478 n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN; 598 n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 2d56344962e7..270256e09ee5 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -47,6 +47,24 @@
47 47
48#define INVALID_BEARER_ID -1 48#define INVALID_BEARER_ID -1
49 49
50/* Node FSM states and events:
51 */
52enum {
53 SELF_DOWN_PEER_DOWN = 0xdd,
54 SELF_UP_PEER_UP = 0xaa,
55 SELF_DOWN_PEER_LEAVING = 0xd1,
56 SELF_UP_PEER_COMING = 0xac,
57 SELF_COMING_PEER_UP = 0xca,
58 SELF_LEAVING_PEER_DOWN = 0x1d,
59};
60
61enum {
62 SELF_ESTABL_CONTACT_EVT = 0xec,
63 SELF_LOST_CONTACT_EVT = 0x1c,
64 PEER_ESTABL_CONTACT_EVT = 0xfec,
65 PEER_LOST_CONTACT_EVT = 0xf1c
66};
67
50/* Flags used to take different actions according to flag type 68/* Flags used to take different actions according to flag type
51 * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down 69 * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down
52 * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down 70 * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down
@@ -56,8 +74,6 @@
56 */ 74 */
57enum { 75enum {
58 TIPC_MSG_EVT = 1, 76 TIPC_MSG_EVT = 1,
59 TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1),
60 TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2),
61 TIPC_NOTIFY_NODE_DOWN = (1 << 3), 77 TIPC_NOTIFY_NODE_DOWN = (1 << 3),
62 TIPC_NOTIFY_NODE_UP = (1 << 4), 78 TIPC_NOTIFY_NODE_UP = (1 << 4),
63 TIPC_WAKEUP_BCAST_USERS = (1 << 5), 79 TIPC_WAKEUP_BCAST_USERS = (1 << 5),
@@ -133,6 +149,7 @@ struct tipc_node {
133 int action_flags; 149 int action_flags;
134 struct tipc_node_bclink bclink; 150 struct tipc_node_bclink bclink;
135 struct list_head list; 151 struct list_head list;
152 int state;
136 int link_cnt; 153 int link_cnt;
137 u16 working_links; 154 u16 working_links;
138 u16 capabilities; 155 u16 capabilities;
@@ -176,11 +193,8 @@ static inline void tipc_node_lock(struct tipc_node *node)
176 spin_lock_bh(&node->lock); 193 spin_lock_bh(&node->lock);
177} 194}
178 195
179static inline bool tipc_node_blocked(struct tipc_node *node) 196void tipc_node_fsm_evt(struct tipc_node *n, int evt);
180{ 197bool tipc_node_filter_skb(struct tipc_node *n, struct tipc_msg *hdr);
181 return (node->action_flags & (TIPC_WAIT_PEER_LINKS_DOWN |
182 TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN));
183}
184 198
185static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel) 199static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel)
186{ 200{