aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Paul Maloy <jon.maloy@ericsson.com>2016-06-13 20:46:22 -0400
committerDavid S. Miller <davem@davemloft.net>2016-06-15 17:06:28 -0400
commit35c55c9877f8de0ab129fa1a309271d0ecc868b9 (patch)
tree5c8011a871be5083f1c36bdf4ca6c1e4168390c3
parent7889681f4a6c2148e1245604bac751a1cae8f882 (diff)
tipc: add neighbor monitoring framework
TIPC based clusters are by default set up with full-mesh link connectivity between all nodes. Those links are expected to provide a short failure detection time, by default set to 1500 ms. Because of this, the background load for neighbor monitoring in an N-node cluster increases with a factor N on each node, while the overall monitoring traffic through the network infrastructure increases at a ~(N * (N - 1)) rate. Experience has shown that such clusters don't scale well beyond ~100 nodes unless we significantly increase failure discovery tolerance. This commit introduces a framework and an algorithm that drastically reduces this background load, while basically maintaining the original failure detection times across the whole cluster. Using this algorithm, background load will now grow at a rate of ~(2 * sqrt(N)) per node, and at ~(2 * N * sqrt(N)) in traffic overhead. As an example, each node will now have to actively monitor 38 neighbors in a 400-node cluster, instead of as before 399. This "Overlapping Ring Supervision Algorithm" is completely distributed and employs no centralized or coordinated state. It goes as follows: - Each node makes up a linearly ascending, circular list of all its N known neighbors, based on their TIPC node identity. This algorithm must be the same on all nodes. - The node then selects the next M = sqrt(N) - 1 nodes downstream from itself in the list, and chooses to actively monitor those. This is called its "local monitoring domain". - It creates a domain record describing the monitoring domain, and piggy-backs this in the data area of all neighbor monitoring messages (LINK_PROTOCOL/STATE) leaving that node. This means that all nodes in the cluster eventually (default within 400 ms) will learn about its monitoring domain. - Whenever a node discovers a change in its local domain, e.g., a node has been added or has gone down, it creates and sends out a new version of its node record to inform all neighbors about the change. - A node receiving a domain record from anybody outside its local domain matches this against its own list (which may not look the same), and chooses to not actively monitor those members of the received domain record that are also present in its own list. Instead, it relies on indications from the direct monitoring nodes if an indirectly monitored node has gone up or down. If a node is indicated lost, the receiving node temporarily activates its own direct monitoring towards that node in order to confirm, or not, that it is actually gone. - Since each node is actively monitoring sqrt(N) downstream neighbors, each node is also actively monitored by the same number of upstream neighbors. This means that all non-direct monitoring nodes normally will receive sqrt(N) indications that a node is gone. - A major drawback with ring monitoring is how it handles failures that cause massive network partitionings. If both a lost node and all its direct monitoring neighbors are inside the lost partition, the nodes in the remaining partition will never receive indications about the loss. To overcome this, each node also chooses to actively monitor some nodes outside its local domain. Those nodes are called remote domain "heads", and are selected in such a way that no node in the cluster will be more than two direct monitoring hops away. Because of this, each node, apart from monitoring the member of its local domain, will also typically monitor sqrt(N) remote head nodes. - As an optimization, local list status, domain status and domain records are marked with a generation number. This saves senders from unnecessarily conveying unaltered domain records, and receivers from performing unneeded re-adaptations of their node monitoring list, such as re-assigning domain heads. - As a measure of caution we have added the possibility to disable the new algorithm through configuration. We do this by keeping a threshold value for the cluster size; a cluster that grows beyond this value will switch from full-mesh to ring monitoring, and vice versa when it shrinks below the value. This means that if the threshold is set to a value larger than any anticipated cluster size (default size is 32) the new algorithm is effectively disabled. A patch set for altering the threshold value and for listing the table contents will follow shortly. - This change is fully backwards compatible. Acked-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/tipc/Makefile2
-rw-r--r--net/tipc/addr.h1
-rw-r--r--net/tipc/bearer.c8
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/core.c1
-rw-r--r--net/tipc/core.h15
-rw-r--r--net/tipc/link.c49
-rw-r--r--net/tipc/monitor.c651
-rw-r--r--net/tipc/monitor.h73
-rw-r--r--net/tipc/node.c26
10 files changed, 797 insertions, 31 deletions
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 57e460be4692..31b9f9c52974 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_TIPC) := tipc.o
6 6
7tipc-y += addr.o bcast.o bearer.o \ 7tipc-y += addr.o bcast.o bearer.o \
8 core.o link.o discover.o msg.o \ 8 core.o link.o discover.o msg.o \
9 name_distr.o subscr.o name_table.o net.o \ 9 name_distr.o subscr.o monitor.o name_table.o net.o \
10 netlink.o netlink_compat.o node.o socket.o eth_media.o \ 10 netlink.o netlink_compat.o node.o socket.o eth_media.o \
11 server.o socket.o 11 server.o socket.o
12 12
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 93f7c983be33..64f4004a6fac 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -73,4 +73,5 @@ int tipc_addr_node_valid(u32 addr);
73int tipc_in_scope(u32 domain, u32 addr); 73int tipc_in_scope(u32 domain, u32 addr);
74int tipc_addr_scope(u32 domain); 74int tipc_addr_scope(u32 domain);
75char *tipc_addr_string_fill(char *string, u32 addr); 75char *tipc_addr_string_fill(char *string, u32 addr);
76
76#endif 77#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 6f11c62bc8f9..9a70e1d744d2 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/bearer.c: TIPC bearer code 2 * net/tipc/bearer.c: TIPC bearer code
3 * 3 *
4 * Copyright (c) 1996-2006, 2013-2014, Ericsson AB 4 * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
5 * Copyright (c) 2004-2006, 2010-2013, Wind River Systems 5 * Copyright (c) 2004-2006, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
@@ -39,6 +39,7 @@
39#include "bearer.h" 39#include "bearer.h"
40#include "link.h" 40#include "link.h"
41#include "discover.h" 41#include "discover.h"
42#include "monitor.h"
42#include "bcast.h" 43#include "bcast.h"
43#include "netlink.h" 44#include "netlink.h"
44 45
@@ -313,6 +314,10 @@ restart:
313 rcu_assign_pointer(tn->bearer_list[bearer_id], b); 314 rcu_assign_pointer(tn->bearer_list[bearer_id], b);
314 if (skb) 315 if (skb)
315 tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); 316 tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
317
318 if (tipc_mon_create(net, bearer_id))
319 return -ENOMEM;
320
316 pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", 321 pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
317 name, 322 name,
318 tipc_addr_string_fill(addr_string, disc_domain), priority); 323 tipc_addr_string_fill(addr_string, disc_domain), priority);
@@ -348,6 +353,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
348 tipc_disc_delete(b->link_req); 353 tipc_disc_delete(b->link_req);
349 RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL); 354 RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL);
350 kfree_rcu(b, rcu); 355 kfree_rcu(b, rcu);
356 tipc_mon_delete(net, bearer_id);
351} 357}
352 358
353int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, 359int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index f686e41b5abb..0d337c7b6fad 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/bearer.h: Include file for TIPC bearer code 2 * net/tipc/bearer.h: Include file for TIPC bearer code
3 * 3 *
4 * Copyright (c) 1996-2006, 2013-2014, Ericsson AB 4 * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
5 * Copyright (c) 2005, 2010-2011, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
diff --git a/net/tipc/core.c b/net/tipc/core.c
index fe1b062c4f18..236b043a4156 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -57,6 +57,7 @@ static int __net_init tipc_init_net(struct net *net)
57 57
58 tn->net_id = 4711; 58 tn->net_id = 4711;
59 tn->own_addr = 0; 59 tn->own_addr = 0;
60 tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
60 get_random_bytes(&tn->random, sizeof(int)); 61 get_random_bytes(&tn->random, sizeof(int));
61 INIT_LIST_HEAD(&tn->node_list); 62 INIT_LIST_HEAD(&tn->node_list);
62 spin_lock_init(&tn->node_list_lock); 63 spin_lock_init(&tn->node_list_lock);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index eff58dc53aa1..a1845fb27d80 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -66,11 +66,13 @@ struct tipc_bc_base;
66struct tipc_link; 66struct tipc_link;
67struct tipc_name_table; 67struct tipc_name_table;
68struct tipc_server; 68struct tipc_server;
69struct tipc_monitor;
69 70
70#define TIPC_MOD_VER "2.0.0" 71#define TIPC_MOD_VER "2.0.0"
71 72
72#define NODE_HTABLE_SIZE 512 73#define NODE_HTABLE_SIZE 512
73#define MAX_BEARERS 3 74#define MAX_BEARERS 3
75#define TIPC_DEF_MON_THRESHOLD 32
74 76
75extern int tipc_net_id __read_mostly; 77extern int tipc_net_id __read_mostly;
76extern int sysctl_tipc_rmem[3] __read_mostly; 78extern int sysctl_tipc_rmem[3] __read_mostly;
@@ -88,6 +90,10 @@ struct tipc_net {
88 u32 num_nodes; 90 u32 num_nodes;
89 u32 num_links; 91 u32 num_links;
90 92
93 /* Neighbor monitoring list */
94 struct tipc_monitor *monitors[MAX_BEARERS];
95 int mon_threshold;
96
91 /* Bearer list */ 97 /* Bearer list */
92 struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; 98 struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
93 99
@@ -126,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net)
126 return &tipc_net(net)->node_list; 132 return &tipc_net(net)->node_list;
127} 133}
128 134
135static inline unsigned int tipc_hashfn(u32 addr)
136{
137 return addr & (NODE_HTABLE_SIZE - 1);
138}
139
129static inline u16 mod(u16 x) 140static inline u16 mod(u16 x)
130{ 141{
131 return x & 0xffffu; 142 return x & 0xffffu;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a904ccd5a93a..03f8bdf70d8f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -42,6 +42,7 @@
42#include "name_distr.h" 42#include "name_distr.h"
43#include "discover.h" 43#include "discover.h"
44#include "netlink.h" 44#include "netlink.h"
45#include "monitor.h"
45 46
46#include <linux/pkt_sched.h> 47#include <linux/pkt_sched.h>
47 48
@@ -95,6 +96,7 @@ struct tipc_stats {
95 * @pmsg: convenience pointer to "proto_msg" field 96 * @pmsg: convenience pointer to "proto_msg" field
96 * @priority: current link priority 97 * @priority: current link priority
97 * @net_plane: current link network plane ('A' through 'H') 98 * @net_plane: current link network plane ('A' through 'H')
99 * @mon_state: cookie with information needed by link monitor
98 * @backlog_limit: backlog queue congestion thresholds (indexed by importance) 100 * @backlog_limit: backlog queue congestion thresholds (indexed by importance)
99 * @exp_msg_count: # of tunnelled messages expected during link changeover 101 * @exp_msg_count: # of tunnelled messages expected during link changeover
100 * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset 102 * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset
@@ -138,6 +140,7 @@ struct tipc_link {
138 char if_name[TIPC_MAX_IF_NAME]; 140 char if_name[TIPC_MAX_IF_NAME];
139 u32 priority; 141 u32 priority;
140 char net_plane; 142 char net_plane;
143 struct tipc_mon_state mon_state;
141 u16 rst_cnt; 144 u16 rst_cnt;
142 145
143 /* Failover/synch */ 146 /* Failover/synch */
@@ -708,18 +711,25 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
708 bool setup = false; 711 bool setup = false;
709 u16 bc_snt = l->bc_sndlink->snd_nxt - 1; 712 u16 bc_snt = l->bc_sndlink->snd_nxt - 1;
710 u16 bc_acked = l->bc_rcvlink->acked; 713 u16 bc_acked = l->bc_rcvlink->acked;
711 714 struct tipc_mon_state *mstate = &l->mon_state;
712 link_profile_stats(l);
713 715
714 switch (l->state) { 716 switch (l->state) {
715 case LINK_ESTABLISHED: 717 case LINK_ESTABLISHED:
716 case LINK_SYNCHING: 718 case LINK_SYNCHING:
717 if (l->silent_intv_cnt > l->abort_limit)
718 return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
719 mtyp = STATE_MSG; 719 mtyp = STATE_MSG;
720 link_profile_stats(l);
721 tipc_mon_get_state(l->net, l->addr, mstate, l->bearer_id);
722 if (mstate->reset || (l->silent_intv_cnt > l->abort_limit))
723 return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
720 state = bc_acked != bc_snt; 724 state = bc_acked != bc_snt;
721 probe = l->silent_intv_cnt; 725 state |= l->bc_rcvlink->rcv_unacked;
722 l->silent_intv_cnt++; 726 state |= l->rcv_unacked;
727 state |= !skb_queue_empty(&l->transmq);
728 state |= !skb_queue_empty(&l->deferdq);
729 probe = mstate->probing;
730 probe |= l->silent_intv_cnt;
731 if (probe || mstate->monitoring)
732 l->silent_intv_cnt++;
723 break; 733 break;
724 case LINK_RESET: 734 case LINK_RESET:
725 setup = l->rst_cnt++ <= 4; 735 setup = l->rst_cnt++ <= 4;
@@ -830,6 +840,7 @@ void tipc_link_reset(struct tipc_link *l)
830 l->stats.recv_info = 0; 840 l->stats.recv_info = 0;
831 l->stale_count = 0; 841 l->stale_count = 0;
832 l->bc_peer_is_up = false; 842 l->bc_peer_is_up = false;
843 memset(&l->mon_state, 0, sizeof(l->mon_state));
833 tipc_link_reset_stats(l); 844 tipc_link_reset_stats(l);
834} 845}
835 846
@@ -1238,6 +1249,9 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1238 struct tipc_msg *hdr; 1249 struct tipc_msg *hdr;
1239 struct sk_buff_head *dfq = &l->deferdq; 1250 struct sk_buff_head *dfq = &l->deferdq;
1240 bool node_up = link_is_up(l->bc_rcvlink); 1251 bool node_up = link_is_up(l->bc_rcvlink);
1252 struct tipc_mon_state *mstate = &l->mon_state;
1253 int dlen = 0;
1254 void *data;
1241 1255
1242 /* Don't send protocol message during reset or link failover */ 1256 /* Don't send protocol message during reset or link failover */
1243 if (tipc_link_is_blocked(l)) 1257 if (tipc_link_is_blocked(l))
@@ -1250,12 +1264,13 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1250 rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; 1264 rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
1251 1265
1252 skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, 1266 skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE,
1253 TIPC_MAX_IF_NAME, l->addr, 1267 tipc_max_domain_size, l->addr,
1254 tipc_own_addr(l->net), 0, 0, 0); 1268 tipc_own_addr(l->net), 0, 0, 0);
1255 if (!skb) 1269 if (!skb)
1256 return; 1270 return;
1257 1271
1258 hdr = buf_msg(skb); 1272 hdr = buf_msg(skb);
1273 data = msg_data(hdr);
1259 msg_set_session(hdr, l->session); 1274 msg_set_session(hdr, l->session);
1260 msg_set_bearer_id(hdr, l->bearer_id); 1275 msg_set_bearer_id(hdr, l->bearer_id);
1261 msg_set_net_plane(hdr, l->net_plane); 1276 msg_set_net_plane(hdr, l->net_plane);
@@ -1271,14 +1286,18 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1271 1286
1272 if (mtyp == STATE_MSG) { 1287 if (mtyp == STATE_MSG) {
1273 msg_set_seq_gap(hdr, rcvgap); 1288 msg_set_seq_gap(hdr, rcvgap);
1274 msg_set_size(hdr, INT_H_SIZE);
1275 msg_set_probe(hdr, probe); 1289 msg_set_probe(hdr, probe);
1290 tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
1291 msg_set_size(hdr, INT_H_SIZE + dlen);
1292 skb_trim(skb, INT_H_SIZE + dlen);
1276 l->stats.sent_states++; 1293 l->stats.sent_states++;
1277 l->rcv_unacked = 0; 1294 l->rcv_unacked = 0;
1278 } else { 1295 } else {
1279 /* RESET_MSG or ACTIVATE_MSG */ 1296 /* RESET_MSG or ACTIVATE_MSG */
1280 msg_set_max_pkt(hdr, l->advertised_mtu); 1297 msg_set_max_pkt(hdr, l->advertised_mtu);
1281 strcpy(msg_data(hdr), l->if_name); 1298 strcpy(data, l->if_name);
1299 msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME);
1300 skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME);
1282 } 1301 }
1283 if (probe) 1302 if (probe)
1284 l->stats.sent_probes++; 1303 l->stats.sent_probes++;
@@ -1371,7 +1390,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1371 u16 peers_tol = msg_link_tolerance(hdr); 1390 u16 peers_tol = msg_link_tolerance(hdr);
1372 u16 peers_prio = msg_linkprio(hdr); 1391 u16 peers_prio = msg_linkprio(hdr);
1373 u16 rcv_nxt = l->rcv_nxt; 1392 u16 rcv_nxt = l->rcv_nxt;
1393 u16 dlen = msg_data_sz(hdr);
1374 int mtyp = msg_type(hdr); 1394 int mtyp = msg_type(hdr);
1395 void *data;
1375 char *if_name; 1396 char *if_name;
1376 int rc = 0; 1397 int rc = 0;
1377 1398
@@ -1381,6 +1402,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1381 if (tipc_own_addr(l->net) > msg_prevnode(hdr)) 1402 if (tipc_own_addr(l->net) > msg_prevnode(hdr))
1382 l->net_plane = msg_net_plane(hdr); 1403 l->net_plane = msg_net_plane(hdr);
1383 1404
1405 skb_linearize(skb);
1406 hdr = buf_msg(skb);
1407 data = msg_data(hdr);
1408
1384 switch (mtyp) { 1409 switch (mtyp) {
1385 case RESET_MSG: 1410 case RESET_MSG:
1386 1411
@@ -1391,8 +1416,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1391 /* fall thru' */ 1416 /* fall thru' */
1392 1417
1393 case ACTIVATE_MSG: 1418 case ACTIVATE_MSG:
1394 skb_linearize(skb);
1395 hdr = buf_msg(skb);
1396 1419
1397 /* Complete own link name with peer's interface name */ 1420 /* Complete own link name with peer's interface name */
1398 if_name = strrchr(l->name, ':') + 1; 1421 if_name = strrchr(l->name, ':') + 1;
@@ -1400,7 +1423,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1400 break; 1423 break;
1401 if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME) 1424 if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME)
1402 break; 1425 break;
1403 strncpy(if_name, msg_data(hdr), TIPC_MAX_IF_NAME); 1426 strncpy(if_name, data, TIPC_MAX_IF_NAME);
1404 1427
1405 /* Update own tolerance if peer indicates a non-zero value */ 1428 /* Update own tolerance if peer indicates a non-zero value */
1406 if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) 1429 if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
@@ -1448,6 +1471,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1448 rc = TIPC_LINK_UP_EVT; 1471 rc = TIPC_LINK_UP_EVT;
1449 break; 1472 break;
1450 } 1473 }
1474 tipc_mon_rcv(l->net, data, dlen, l->addr,
1475 &l->mon_state, l->bearer_id);
1451 1476
1452 /* Send NACK if peer has sent pkts we haven't received yet */ 1477 /* Send NACK if peer has sent pkts we haven't received yet */
1453 if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) 1478 if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
new file mode 100644
index 000000000000..87d4efedd09f
--- /dev/null
+++ b/net/tipc/monitor.c
@@ -0,0 +1,651 @@
1/*
2 * net/tipc/monitor.c
3 *
4 * Copyright (c) 2016, Ericsson AB
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#include "core.h"
37#include "addr.h"
38#include "monitor.h"
39
40#define MAX_MON_DOMAIN 64
41#define MON_TIMEOUT 120000
42#define MAX_PEER_DOWN_EVENTS 4
43
44/* struct tipc_mon_domain: domain record to be transferred between peers
45 * @len: actual size of domain record
46 * @gen: current generation of sender's domain
47 * @ack_gen: most recent generation of self's domain acked by peer
48 * @member_cnt: number of domain member nodes described in this record
49 * @up_map: bit map indicating which of the members the sender considers up
50 * @members: identity of the domain members
51 */
52struct tipc_mon_domain {
53 u16 len;
54 u16 gen;
55 u16 ack_gen;
56 u16 member_cnt;
57 u64 up_map;
58 u32 members[MAX_MON_DOMAIN];
59};
60
61/* struct tipc_peer: state of a peer node and its domain
62 * @addr: tipc node identity of peer
63 * @head_map: shows which other nodes currently consider peer 'up'
64 * @domain: most recent domain record from peer
65 * @hash: position in hashed lookup list
66 * @list: position in linked list, in circular ascending order by 'addr'
67 * @applied: number of reported domain members applied on this monitor list
68 * @is_up: peer is up as seen from this node
69 * @is_head: peer is assigned domain head as seen from this node
70 * @is_local: peer is in local domain and should be continuously monitored
71 * @down_cnt: - numbers of other peers which have reported this on lost
72 */
73struct tipc_peer {
74 u32 addr;
75 struct tipc_mon_domain *domain;
76 struct hlist_node hash;
77 struct list_head list;
78 u8 applied;
79 u8 down_cnt;
80 bool is_up;
81 bool is_head;
82 bool is_local;
83};
84
85struct tipc_monitor {
86 struct hlist_head peers[NODE_HTABLE_SIZE];
87 int peer_cnt;
88 struct tipc_peer *self;
89 rwlock_t lock;
90 struct tipc_mon_domain cache;
91 u16 list_gen;
92 u16 dom_gen;
93 struct net *net;
94 struct timer_list timer;
95 unsigned long timer_intv;
96};
97
98static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
99{
100 return tipc_net(net)->monitors[bearer_id];
101}
102
103const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
104
105/* dom_rec_len(): actual length of domain record for transport
106 */
107static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
108{
109 return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32));
110}
111
112/* dom_size() : calculate size of own domain based on number of peers
113 */
114static int dom_size(int peers)
115{
116 int i = 0;
117
118 while ((i * i) < peers)
119 i++;
120 return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
121}
122
123static void map_set(u64 *up_map, int i, unsigned int v)
124{
125 *up_map &= ~(1 << i);
126 *up_map |= (v << i);
127}
128
129static int map_get(u64 up_map, int i)
130{
131 return (up_map & (1 << i)) >> i;
132}
133
134static struct tipc_peer *peer_prev(struct tipc_peer *peer)
135{
136 return list_last_entry(&peer->list, struct tipc_peer, list);
137}
138
139static struct tipc_peer *peer_nxt(struct tipc_peer *peer)
140{
141 return list_first_entry(&peer->list, struct tipc_peer, list);
142}
143
144static struct tipc_peer *peer_head(struct tipc_peer *peer)
145{
146 while (!peer->is_head)
147 peer = peer_prev(peer);
148 return peer;
149}
150
151static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr)
152{
153 struct tipc_peer *peer;
154 unsigned int thash = tipc_hashfn(addr);
155
156 hlist_for_each_entry(peer, &mon->peers[thash], hash) {
157 if (peer->addr == addr)
158 return peer;
159 }
160 return NULL;
161}
162
163static struct tipc_peer *get_self(struct net *net, int bearer_id)
164{
165 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
166
167 return mon->self;
168}
169
170static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon)
171{
172 struct tipc_net *tn = tipc_net(net);
173
174 return mon->peer_cnt > tn->mon_threshold;
175}
176
177/* mon_identify_lost_members() : - identify amd mark potentially lost members
178 */
179static void mon_identify_lost_members(struct tipc_peer *peer,
180 struct tipc_mon_domain *dom_bef,
181 int applied_bef)
182{
183 struct tipc_peer *member = peer;
184 struct tipc_mon_domain *dom_aft = peer->domain;
185 int applied_aft = peer->applied;
186 int i;
187
188 for (i = 0; i < applied_bef; i++) {
189 member = peer_nxt(member);
190
191 /* Do nothing if self or peer already see member as down */
192 if (!member->is_up || !map_get(dom_bef->up_map, i))
193 continue;
194
195 /* Loss of local node must be detected by active probing */
196 if (member->is_local)
197 continue;
198
199 /* Start probing if member was removed from applied domain */
200 if (!applied_aft || (applied_aft < i)) {
201 member->down_cnt = 1;
202 continue;
203 }
204
205 /* Member loss is confirmed if it is still in applied domain */
206 if (!map_get(dom_aft->up_map, i))
207 member->down_cnt++;
208 }
209}
210
211/* mon_apply_domain() : match a peer's domain record against monitor list
212 */
213static void mon_apply_domain(struct tipc_monitor *mon,
214 struct tipc_peer *peer)
215{
216 struct tipc_mon_domain *dom = peer->domain;
217 struct tipc_peer *member;
218 u32 addr;
219 int i;
220
221 if (!dom || !peer->is_up)
222 return;
223
224 /* Scan across domain members and match against monitor list */
225 peer->applied = 0;
226 member = peer_nxt(peer);
227 for (i = 0; i < dom->member_cnt; i++) {
228 addr = dom->members[i];
229 if (addr != member->addr)
230 return;
231 peer->applied++;
232 member = peer_nxt(member);
233 }
234}
235
236/* mon_update_local_domain() : update after peer addition/removal/up/down
237 */
238static void mon_update_local_domain(struct tipc_monitor *mon)
239{
240 struct tipc_peer *self = mon->self;
241 struct tipc_mon_domain *cache = &mon->cache;
242 struct tipc_mon_domain *dom = self->domain;
243 struct tipc_peer *peer = self;
244 u64 prev_up_map = dom->up_map;
245 u16 member_cnt, i;
246 bool diff;
247
248 /* Update local domain size based on current size of cluster */
249 member_cnt = dom_size(mon->peer_cnt) - 1;
250 self->applied = member_cnt;
251
252 /* Update native and cached outgoing local domain records */
253 dom->len = dom_rec_len(dom, member_cnt);
254 diff = dom->member_cnt != member_cnt;
255 dom->member_cnt = member_cnt;
256 for (i = 0; i < member_cnt; i++) {
257 peer = peer_nxt(peer);
258 diff |= dom->members[i] != peer->addr;
259 dom->members[i] = peer->addr;
260 map_set(&dom->up_map, i, peer->is_up);
261 cache->members[i] = htonl(peer->addr);
262 }
263 diff |= dom->up_map != prev_up_map;
264 if (!diff)
265 return;
266 dom->gen = ++mon->dom_gen;
267 cache->len = htons(dom->len);
268 cache->gen = htons(dom->gen);
269 cache->member_cnt = htons(member_cnt);
270 cache->up_map = cpu_to_be64(dom->up_map);
271 mon_apply_domain(mon, self);
272}
273
274/* mon_update_neighbors() : update preceding neighbors of added/removed peer
275 */
276static void mon_update_neighbors(struct tipc_monitor *mon,
277 struct tipc_peer *peer)
278{
279 int dz, i;
280
281 dz = dom_size(mon->peer_cnt);
282 for (i = 0; i < dz; i++) {
283 mon_apply_domain(mon, peer);
284 peer = peer_prev(peer);
285 }
286}
287
288/* mon_assign_roles() : reassign peer roles after a network change
289 * The monitor list is consistent at this stage; i.e., each peer is monitoring
290 * a set of domain members as matched between domain record and the monitor list
291 */
292static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head)
293{
294 struct tipc_peer *peer = peer_nxt(head);
295 struct tipc_peer *self = mon->self;
296 int i = 0;
297
298 for (; peer != self; peer = peer_nxt(peer)) {
299 peer->is_local = false;
300
301 /* Update domain member */
302 if (i++ < head->applied) {
303 peer->is_head = false;
304 if (head == self)
305 peer->is_local = true;
306 continue;
307 }
308 /* Assign next domain head */
309 if (!peer->is_up)
310 continue;
311 if (peer->is_head)
312 break;
313 head = peer;
314 head->is_head = true;
315 i = 0;
316 }
317 mon->list_gen++;
318}
319
320void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
321{
322 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
323 struct tipc_peer *self = get_self(net, bearer_id);
324 struct tipc_peer *peer, *prev, *head;
325
326 write_lock_bh(&mon->lock);
327 peer = get_peer(mon, addr);
328 if (!peer)
329 goto exit;
330 prev = peer_prev(peer);
331 list_del(&peer->list);
332 hlist_del(&peer->hash);
333 kfree(peer->domain);
334 kfree(peer);
335 mon->peer_cnt--;
336 head = peer_head(prev);
337 if (head == self)
338 mon_update_local_domain(mon);
339 mon_update_neighbors(mon, prev);
340
341 /* Revert to full-mesh monitoring if we reach threshold */
342 if (!tipc_mon_is_active(net, mon)) {
343 list_for_each_entry(peer, &self->list, list) {
344 kfree(peer->domain);
345 peer->domain = NULL;
346 peer->applied = 0;
347 }
348 }
349 mon_assign_roles(mon, head);
350exit:
351 write_unlock_bh(&mon->lock);
352}
353
354static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr,
355 struct tipc_peer **peer)
356{
357 struct tipc_peer *self = mon->self;
358 struct tipc_peer *cur, *prev, *p;
359
360 p = kzalloc(sizeof(*p), GFP_ATOMIC);
361 *peer = p;
362 if (!p)
363 return false;
364 p->addr = addr;
365
366 /* Add new peer to lookup list */
367 INIT_LIST_HEAD(&p->list);
368 hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]);
369
370 /* Sort new peer into iterator list, in ascending circular order */
371 prev = self;
372 list_for_each_entry(cur, &self->list, list) {
373 if ((addr > prev->addr) && (addr < cur->addr))
374 break;
375 if (((addr < cur->addr) || (addr > prev->addr)) &&
376 (prev->addr > cur->addr))
377 break;
378 prev = cur;
379 }
380 list_add_tail(&p->list, &cur->list);
381 mon->peer_cnt++;
382 mon_update_neighbors(mon, p);
383 return true;
384}
385
386void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id)
387{
388 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
389 struct tipc_peer *self = get_self(net, bearer_id);
390 struct tipc_peer *peer, *head;
391
392 write_lock_bh(&mon->lock);
393 peer = get_peer(mon, addr);
394 if (!peer && !tipc_mon_add_peer(mon, addr, &peer))
395 goto exit;
396 peer->is_up = true;
397 head = peer_head(peer);
398 if (head == self)
399 mon_update_local_domain(mon);
400 mon_assign_roles(mon, head);
401exit:
402 write_unlock_bh(&mon->lock);
403}
404
405void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
406{
407 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
408 struct tipc_peer *self = get_self(net, bearer_id);
409 struct tipc_peer *peer, *head;
410 struct tipc_mon_domain *dom;
411 int applied;
412
413 write_lock_bh(&mon->lock);
414 peer = get_peer(mon, addr);
415 if (!peer) {
416 pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id);
417 goto exit;
418 }
419 applied = peer->applied;
420 peer->applied = 0;
421 dom = peer->domain;
422 peer->domain = NULL;
423 if (peer->is_head)
424 mon_identify_lost_members(peer, dom, applied);
425 kfree(dom);
426 peer->is_up = false;
427 peer->is_head = false;
428 peer->is_local = false;
429 peer->down_cnt = 0;
430 head = peer_head(peer);
431 if (head == self)
432 mon_update_local_domain(mon);
433 mon_assign_roles(mon, head);
434exit:
435 write_unlock_bh(&mon->lock);
436}
437
438/* tipc_mon_rcv - process monitor domain event message
439 */
440void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
441 struct tipc_mon_state *state, int bearer_id)
442{
443 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
444 struct tipc_mon_domain *arrv_dom = data;
445 struct tipc_mon_domain dom_bef;
446 struct tipc_mon_domain *dom;
447 struct tipc_peer *peer;
448 u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
449 int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
450 u16 new_gen = ntohs(arrv_dom->gen);
451 u16 acked_gen = ntohs(arrv_dom->ack_gen);
452 bool probing = state->probing;
453 int i, applied_bef;
454
455 state->probing = false;
456 if (!dlen)
457 return;
458
459 /* Sanity check received domain record */
460 if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) {
461 pr_warn_ratelimited("Received illegal domain record\n");
462 return;
463 }
464
465 /* Synch generation numbers with peer if link just came up */
466 if (!state->synched) {
467 state->peer_gen = new_gen - 1;
468 state->acked_gen = acked_gen;
469 state->synched = true;
470 }
471
472 if (more(acked_gen, state->acked_gen))
473 state->acked_gen = acked_gen;
474
475 /* Drop duplicate unless we are waiting for a probe response */
476 if (!more(new_gen, state->peer_gen) && !probing)
477 return;
478
479 write_lock_bh(&mon->lock);
480 peer = get_peer(mon, addr);
481 if (!peer || !peer->is_up)
482 goto exit;
483
484 /* Peer is confirmed, stop any ongoing probing */
485 peer->down_cnt = 0;
486
487 /* Task is done for duplicate record */
488 if (!more(new_gen, state->peer_gen))
489 goto exit;
490
491 state->peer_gen = new_gen;
492
493 /* Cache current domain record for later use */
494 dom_bef.member_cnt = 0;
495 dom = peer->domain;
496 if (dom)
497 memcpy(&dom_bef, dom, dom->len);
498
499 /* Transform and store received domain record */
500 if (!dom || (dom->len < new_dlen)) {
501 kfree(dom);
502 dom = kmalloc(new_dlen, GFP_ATOMIC);
503 peer->domain = dom;
504 if (!dom)
505 goto exit;
506 }
507 dom->len = new_dlen;
508 dom->gen = new_gen;
509 dom->member_cnt = new_member_cnt;
510 dom->up_map = be64_to_cpu(arrv_dom->up_map);
511 for (i = 0; i < new_member_cnt; i++)
512 dom->members[i] = ntohl(arrv_dom->members[i]);
513
514 /* Update peers affected by this domain record */
515 applied_bef = peer->applied;
516 mon_apply_domain(mon, peer);
517 mon_identify_lost_members(peer, &dom_bef, applied_bef);
518 mon_assign_roles(mon, peer_head(peer));
519exit:
520 write_unlock_bh(&mon->lock);
521}
522
523void tipc_mon_prep(struct net *net, void *data, int *dlen,
524 struct tipc_mon_state *state, int bearer_id)
525{
526 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
527 struct tipc_mon_domain *dom = data;
528 u16 gen = mon->dom_gen;
529 u16 len;
530
531 if (!tipc_mon_is_active(net, mon))
532 return;
533
534 /* Send only a dummy record with ack if peer has acked our last sent */
535 if (likely(state->acked_gen == gen)) {
536 len = dom_rec_len(dom, 0);
537 *dlen = len;
538 dom->len = htons(len);
539 dom->gen = htons(gen);
540 dom->ack_gen = htons(state->peer_gen);
541 dom->member_cnt = 0;
542 return;
543 }
544 /* Send the full record */
545 read_lock_bh(&mon->lock);
546 len = ntohs(mon->cache.len);
547 *dlen = len;
548 memcpy(data, &mon->cache, len);
549 read_unlock_bh(&mon->lock);
550 dom->ack_gen = htons(state->peer_gen);
551}
552
553void tipc_mon_get_state(struct net *net, u32 addr,
554 struct tipc_mon_state *state,
555 int bearer_id)
556{
557 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
558 struct tipc_peer *peer;
559
560 /* Used cached state if table has not changed */
561 if (!state->probing &&
562 (state->list_gen == mon->list_gen) &&
563 (state->acked_gen == mon->dom_gen))
564 return;
565
566 read_lock_bh(&mon->lock);
567 peer = get_peer(mon, addr);
568 if (peer) {
569 state->probing = state->acked_gen != mon->dom_gen;
570 state->probing |= peer->down_cnt;
571 state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS;
572 state->monitoring = peer->is_local;
573 state->monitoring |= peer->is_head;
574 state->list_gen = mon->list_gen;
575 }
576 read_unlock_bh(&mon->lock);
577}
578
579static void mon_timeout(unsigned long m)
580{
581 struct tipc_monitor *mon = (void *)m;
582 struct tipc_peer *self;
583 int best_member_cnt = dom_size(mon->peer_cnt) - 1;
584
585 write_lock_bh(&mon->lock);
586 self = mon->self;
587 if (self && (best_member_cnt != self->applied)) {
588 mon_update_local_domain(mon);
589 mon_assign_roles(mon, self);
590 }
591 write_unlock_bh(&mon->lock);
592 mod_timer(&mon->timer, jiffies + mon->timer_intv);
593}
594
595int tipc_mon_create(struct net *net, int bearer_id)
596{
597 struct tipc_net *tn = tipc_net(net);
598 struct tipc_monitor *mon;
599 struct tipc_peer *self;
600 struct tipc_mon_domain *dom;
601
602 if (tn->monitors[bearer_id])
603 return 0;
604
605 mon = kzalloc(sizeof(*mon), GFP_ATOMIC);
606 self = kzalloc(sizeof(*self), GFP_ATOMIC);
607 dom = kzalloc(sizeof(*dom), GFP_ATOMIC);
608 if (!mon || !self || !dom) {
609 kfree(mon);
610 kfree(self);
611 kfree(dom);
612 return -ENOMEM;
613 }
614 tn->monitors[bearer_id] = mon;
615 rwlock_init(&mon->lock);
616 mon->net = net;
617 mon->peer_cnt = 1;
618 mon->self = self;
619 self->domain = dom;
620 self->addr = tipc_own_addr(net);
621 self->is_up = true;
622 self->is_head = true;
623 INIT_LIST_HEAD(&self->list);
624 setup_timer(&mon->timer, mon_timeout, (unsigned long)mon);
625 mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
626 mod_timer(&mon->timer, jiffies + mon->timer_intv);
627 return 0;
628}
629
630void tipc_mon_delete(struct net *net, int bearer_id)
631{
632 struct tipc_net *tn = tipc_net(net);
633 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
634 struct tipc_peer *self = get_self(net, bearer_id);
635 struct tipc_peer *peer, *tmp;
636
637 write_lock_bh(&mon->lock);
638 tn->monitors[bearer_id] = NULL;
639 list_for_each_entry_safe(peer, tmp, &self->list, list) {
640 list_del(&peer->list);
641 hlist_del(&peer->hash);
642 kfree(peer->domain);
643 kfree(peer);
644 }
645 mon->self = NULL;
646 write_unlock_bh(&mon->lock);
647 del_timer_sync(&mon->timer);
648 kfree(self->domain);
649 kfree(self);
650 kfree(mon);
651}
diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h
new file mode 100644
index 000000000000..598459cbed5d
--- /dev/null
+++ b/net/tipc/monitor.h
@@ -0,0 +1,73 @@
1/*
2 * net/tipc/monitor.h
3 *
4 * Copyright (c) 2015, Ericsson AB
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#ifndef _TIPC_MONITOR_H
37#define _TIPC_MONITOR_H
38
39/* struct tipc_mon_state: link instance's cache of monitor list and domain state
40 * @list_gen: current generation of this node's monitor list
41 * @gen: current generation of this node's local domain
42 * @peer_gen: most recent domain generation received from peer
43 * @acked_gen: most recent generation of self's domain acked by peer
44 * @monitoring: this peer endpoint should continuously monitored
45 * @probing: peer endpoint should be temporarily probed for potential loss
46 * @synched: domain record's generation has been synched with peer after reset
47 */
48struct tipc_mon_state {
49 u16 list_gen;
50 u16 peer_gen;
51 u16 acked_gen;
52 bool monitoring :1;
53 bool probing :1;
54 bool reset :1;
55 bool synched :1;
56};
57
58int tipc_mon_create(struct net *net, int bearer_id);
59void tipc_mon_delete(struct net *net, int bearer_id);
60
61void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id);
62void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id);
63void tipc_mon_prep(struct net *net, void *data, int *dlen,
64 struct tipc_mon_state *state, int bearer_id);
65void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
66 struct tipc_mon_state *state, int bearer_id);
67void tipc_mon_get_state(struct net *net, u32 addr,
68 struct tipc_mon_state *state,
69 int bearer_id);
70void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id);
71
72extern const int tipc_max_domain_size;
73#endif
diff --git a/net/tipc/node.c b/net/tipc/node.c
index d6a490f991a4..a3fc0a3f4077 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -40,6 +40,7 @@
40#include "name_distr.h" 40#include "name_distr.h"
41#include "socket.h" 41#include "socket.h"
42#include "bcast.h" 42#include "bcast.h"
43#include "monitor.h"
43#include "discover.h" 44#include "discover.h"
44#include "netlink.h" 45#include "netlink.h"
45 46
@@ -205,17 +206,6 @@ u16 tipc_node_get_capabilities(struct net *net, u32 addr)
205 return caps; 206 return caps;
206} 207}
207 208
208/*
209 * A trivial power-of-two bitmask technique is used for speed, since this
210 * operation is done for every incoming TIPC packet. The number of hash table
211 * entries has been chosen so that no hash chain exceeds 8 nodes and will
212 * usually be much smaller (typically only a single node).
213 */
214static unsigned int tipc_hashfn(u32 addr)
215{
216 return addr & (NODE_HTABLE_SIZE - 1);
217}
218
219static void tipc_node_kref_release(struct kref *kref) 209static void tipc_node_kref_release(struct kref *kref)
220{ 210{
221 struct tipc_node *n = container_of(kref, struct tipc_node, kref); 211 struct tipc_node *n = container_of(kref, struct tipc_node, kref);
@@ -279,6 +269,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
279 u32 addr = 0; 269 u32 addr = 0;
280 u32 flags = n->action_flags; 270 u32 flags = n->action_flags;
281 u32 link_id = 0; 271 u32 link_id = 0;
272 u32 bearer_id;
282 struct list_head *publ_list; 273 struct list_head *publ_list;
283 274
284 if (likely(!flags)) { 275 if (likely(!flags)) {
@@ -288,6 +279,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
288 279
289 addr = n->addr; 280 addr = n->addr;
290 link_id = n->link_id; 281 link_id = n->link_id;
282 bearer_id = link_id & 0xffff;
291 publ_list = &n->publ_list; 283 publ_list = &n->publ_list;
292 284
293 n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | 285 n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
@@ -301,13 +293,16 @@ static void tipc_node_write_unlock(struct tipc_node *n)
301 if (flags & TIPC_NOTIFY_NODE_UP) 293 if (flags & TIPC_NOTIFY_NODE_UP)
302 tipc_named_node_up(net, addr); 294 tipc_named_node_up(net, addr);
303 295
304 if (flags & TIPC_NOTIFY_LINK_UP) 296 if (flags & TIPC_NOTIFY_LINK_UP) {
297 tipc_mon_peer_up(net, addr, bearer_id);
305 tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, 298 tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
306 TIPC_NODE_SCOPE, link_id, addr); 299 TIPC_NODE_SCOPE, link_id, addr);
307 300 }
308 if (flags & TIPC_NOTIFY_LINK_DOWN) 301 if (flags & TIPC_NOTIFY_LINK_DOWN) {
302 tipc_mon_peer_down(net, addr, bearer_id);
309 tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, 303 tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
310 link_id, addr); 304 link_id, addr);
305 }
311} 306}
312 307
313struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) 308struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
@@ -691,6 +686,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
691 struct tipc_link *l = le->link; 686 struct tipc_link *l = le->link;
692 struct tipc_media_addr *maddr; 687 struct tipc_media_addr *maddr;
693 struct sk_buff_head xmitq; 688 struct sk_buff_head xmitq;
689 int old_bearer_id = bearer_id;
694 690
695 if (!l) 691 if (!l)
696 return; 692 return;
@@ -710,6 +706,8 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
710 tipc_link_fsm_evt(l, LINK_RESET_EVT); 706 tipc_link_fsm_evt(l, LINK_RESET_EVT);
711 } 707 }
712 tipc_node_write_unlock(n); 708 tipc_node_write_unlock(n);
709 if (delete)
710 tipc_mon_remove_peer(n->net, n->addr, old_bearer_id);
713 tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); 711 tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
714 tipc_sk_rcv(n->net, &le->inputq); 712 tipc_sk_rcv(n->net, &le->inputq);
715} 713}