aboutsummaryrefslogtreecommitdiffstats
path: root/net/tipc
diff options
context:
space:
mode:
authorAllan Stephens <allan.stephens@windriver.com>2011-05-27 11:00:51 -0400
committerPaul Gortmaker <paul.gortmaker@windriver.com>2011-09-17 22:55:03 -0400
commitb4b5610223f17790419b03eaa962b0e3ecf930d7 (patch)
treea8cdae892e3e2eac0ea1e5493cded8394f8a5d6d /net/tipc
parent4b3743ef2ca67e1f8ef7e9d4c551d6ba6ee85584 (diff)
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a neighboring node does not allow contact to be re-established until it sees that its peer has also recognized the loss of contact. Previously, nodes that were connected by two or more links could encounter a situation in which node A would lose contact with node B on all of its links, purge its name table of names published by B, and then fail to repopulate those names once contact with B was restored. This would happen because B was able to re-establish one or more links so quickly that it never reached a point where it had no links to A -- meaning that B never saw a loss of contact with A, and consequently didn't re-publish its names to A. This problem is now prevented by enhancing the cleanup done by TIPC following a loss of contact with a neighboring node to ensure that node A ignores all messages sent by B until it receives a LINK_PROTOCOL message that indicates B has lost contact with A, thereby preventing the (re)establishment of links between the nodes. The loss of contact is recognized when a RESET or ACTIVATE message is received that has a "redundant link exists" field of 0, indicating that B's sending link endpoint is in a reset state and that B has no other working links. Additionally, TIPC now suppresses the sending of (most) link protocol messages to a neighboring node while it is cleaning up after an earlier loss of contact with that node. This stops the peer node from prematurely activating its link endpoint, which would prevent TIPC from later activating its own end. TIPC still allows outgoing RESET messages to occur during cleanup, to avoid problems if its own node recognizes the loss of contact first and tries to notify the peer of the situation. Finally, TIPC now recognizes an impending loss of contact with a peer node as soon as it receives a RESET message on a working link that is the peer's only link to the node, and ensures that the link protocol suppression mentioned above goes into effect right away -- that is, even before its own link endpoints have failed. This is necessary to ensure correct operation when there are redundant links between the nodes, since otherwise TIPC would send an ACTIVATE message upon receiving a RESET on its first link and only begin suppressing when a RESET on its second link was received, instead of initiating suppression with the first RESET message as it needs to. Note: The reworked cleanup code also eliminates a check that prevented a link endpoint's discovery object from responding to incoming messages while stale name table entries are being purged. This check is now unnecessary and would have slowed down re-establishment of communication between the nodes in some situations. Signed-off-by: Allan Stephens <allan.stephens@windriver.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Diffstat (limited to 'net/tipc')
-rw-r--r--net/tipc/discover.c6
-rw-r--r--net/tipc/link.c33
-rw-r--r--net/tipc/node.c11
-rw-r--r--net/tipc/node.h10
4 files changed, 42 insertions, 18 deletions
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 0987933155b..f2fb96e86ee 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -159,12 +159,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
159 } 159 }
160 tipc_node_lock(n_ptr); 160 tipc_node_lock(n_ptr);
161 161
162 /* Don't talk to neighbor during cleanup after last session */
163 if (n_ptr->cleanup_required) {
164 tipc_node_unlock(n_ptr);
165 return;
166 }
167
168 link = n_ptr->links[b_ptr->identity]; 162 link = n_ptr->links[b_ptr->identity];
169 163
170 /* Create a link endpoint for this bearer, if necessary */ 164 /* Create a link endpoint for this bearer, if necessary */
diff --git a/net/tipc/link.c b/net/tipc/link.c
index bc655f45649..74126db4597 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1669,17 +1669,24 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
1669 goto cont; 1669 goto cont;
1670 tipc_node_lock(n_ptr); 1670 tipc_node_lock(n_ptr);
1671 1671
1672 /* Don't talk to neighbor during cleanup after last session */ 1672 /* Locate unicast link endpoint that should handle message */
1673 1673
1674 if (n_ptr->cleanup_required) { 1674 l_ptr = n_ptr->links[b_ptr->identity];
1675 if (unlikely(!l_ptr)) {
1675 tipc_node_unlock(n_ptr); 1676 tipc_node_unlock(n_ptr);
1676 goto cont; 1677 goto cont;
1677 } 1678 }
1678 1679
1679 /* Locate unicast link endpoint that should handle message */ 1680 /* Verify that communication with node is currently allowed */
1680 1681
1681 l_ptr = n_ptr->links[b_ptr->identity]; 1682 if ((n_ptr->block_setup & WAIT_PEER_DOWN) &&
1682 if (unlikely(!l_ptr)) { 1683 msg_user(msg) == LINK_PROTOCOL &&
1684 (msg_type(msg) == RESET_MSG ||
1685 msg_type(msg) == ACTIVATE_MSG) &&
1686 !msg_redundant_link(msg))
1687 n_ptr->block_setup &= ~WAIT_PEER_DOWN;
1688
1689 if (n_ptr->block_setup) {
1683 tipc_node_unlock(n_ptr); 1690 tipc_node_unlock(n_ptr);
1684 goto cont; 1691 goto cont;
1685 } 1692 }
@@ -1914,6 +1921,12 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
1914 1921
1915 if (link_blocked(l_ptr)) 1922 if (link_blocked(l_ptr))
1916 return; 1923 return;
1924
1925 /* Abort non-RESET send if communication with node is prohibited */
1926
1927 if ((l_ptr->owner->block_setup) && (msg_typ != RESET_MSG))
1928 return;
1929
1917 msg_set_type(msg, msg_typ); 1930 msg_set_type(msg, msg_typ);
1918 msg_set_net_plane(msg, l_ptr->b_ptr->net_plane); 1931 msg_set_net_plane(msg, l_ptr->b_ptr->net_plane);
1919 msg_set_bcast_ack(msg, mod(l_ptr->owner->bclink.last_in)); 1932 msg_set_bcast_ack(msg, mod(l_ptr->owner->bclink.last_in));
@@ -2045,6 +2058,16 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
2045 if (less_eq(msg_session(msg), l_ptr->peer_session)) 2058 if (less_eq(msg_session(msg), l_ptr->peer_session))
2046 break; /* duplicate or old reset: ignore */ 2059 break; /* duplicate or old reset: ignore */
2047 } 2060 }
2061
2062 if (!msg_redundant_link(msg) && (link_working_working(l_ptr) ||
2063 link_working_unknown(l_ptr))) {
2064 /*
2065 * peer has lost contact -- don't allow peer's links
2066 * to reactivate before we recognize loss & clean up
2067 */
2068 l_ptr->owner->block_setup = WAIT_NODE_DOWN;
2069 }
2070
2048 /* fall thru' */ 2071 /* fall thru' */
2049 case ACTIVATE_MSG: 2072 case ACTIVATE_MSG:
2050 /* Update link settings according other endpoint's values */ 2073 /* Update link settings according other endpoint's values */
diff --git a/net/tipc/node.c b/net/tipc/node.c
index d75432f5e72..27b4bb0cca6 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -112,6 +112,7 @@ struct tipc_node *tipc_node_create(u32 addr)
112 break; 112 break;
113 } 113 }
114 list_add_tail(&n_ptr->list, &temp_node->list); 114 list_add_tail(&n_ptr->list, &temp_node->list);
115 n_ptr->block_setup = WAIT_PEER_DOWN;
115 116
116 tipc_num_nodes++; 117 tipc_num_nodes++;
117 118
@@ -312,7 +313,7 @@ static void node_established_contact(struct tipc_node *n_ptr)
312 } 313 }
313} 314}
314 315
315static void node_cleanup_finished(unsigned long node_addr) 316static void node_name_purge_complete(unsigned long node_addr)
316{ 317{
317 struct tipc_node *n_ptr; 318 struct tipc_node *n_ptr;
318 319
@@ -320,7 +321,7 @@ static void node_cleanup_finished(unsigned long node_addr)
320 n_ptr = tipc_node_find(node_addr); 321 n_ptr = tipc_node_find(node_addr);
321 if (n_ptr) { 322 if (n_ptr) {
322 tipc_node_lock(n_ptr); 323 tipc_node_lock(n_ptr);
323 n_ptr->cleanup_required = 0; 324 n_ptr->block_setup &= ~WAIT_NAMES_GONE;
324 tipc_node_unlock(n_ptr); 325 tipc_node_unlock(n_ptr);
325 } 326 }
326 read_unlock_bh(&tipc_net_lock); 327 read_unlock_bh(&tipc_net_lock);
@@ -371,10 +372,10 @@ static void node_lost_contact(struct tipc_node *n_ptr)
371 /* Notify subscribers */ 372 /* Notify subscribers */
372 tipc_nodesub_notify(n_ptr); 373 tipc_nodesub_notify(n_ptr);
373 374
374 /* Prevent re-contact with node until all cleanup is done */ 375 /* Prevent re-contact with node until cleanup is done */
375 376
376 n_ptr->cleanup_required = 1; 377 n_ptr->block_setup = WAIT_PEER_DOWN | WAIT_NAMES_GONE;
377 tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr); 378 tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr);
378} 379}
379 380
380struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) 381struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 5c61afc7a0b..4f15cb40aaa 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -42,6 +42,12 @@
42#include "net.h" 42#include "net.h"
43#include "bearer.h" 43#include "bearer.h"
44 44
45/* Flags used to block (re)establishment of contact with a neighboring node */
46
47#define WAIT_PEER_DOWN 0x0001 /* wait to see that peer's links are down */
48#define WAIT_NAMES_GONE 0x0002 /* wait for peer's publications to be purged */
49#define WAIT_NODE_DOWN 0x0004 /* wait until peer node is declared down */
50
45/** 51/**
46 * struct tipc_node - TIPC node structure 52 * struct tipc_node - TIPC node structure
47 * @addr: network address of node 53 * @addr: network address of node
@@ -52,7 +58,7 @@
52 * @active_links: pointers to active links to node 58 * @active_links: pointers to active links to node
53 * @links: pointers to all links to node 59 * @links: pointers to all links to node
54 * @working_links: number of working links to node (both active and standby) 60 * @working_links: number of working links to node (both active and standby)
55 * @cleanup_required: non-zero if cleaning up after a prior loss of contact 61 * @block_setup: bit mask of conditions preventing link establishment to node
56 * @link_cnt: number of links to node 62 * @link_cnt: number of links to node
57 * @permit_changeover: non-zero if node has redundant links to this system 63 * @permit_changeover: non-zero if node has redundant links to this system
58 * @bclink: broadcast-related info 64 * @bclink: broadcast-related info
@@ -77,7 +83,7 @@ struct tipc_node {
77 struct link *links[MAX_BEARERS]; 83 struct link *links[MAX_BEARERS];
78 int link_cnt; 84 int link_cnt;
79 int working_links; 85 int working_links;
80 int cleanup_required; 86 int block_setup;
81 int permit_changeover; 87 int permit_changeover;
82 struct { 88 struct {
83 int supported; 89 int supported;