aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorErik Hugne <erik.hugne@ericsson.com>2014-08-28 03:08:47 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-01 20:51:48 -0400
commita5325ae5b8bff051933a754db7727fc9823e6414 (patch)
treecd9c0a8f9609387c286d5c4462b890c1c082394e /net
parentf4ad8a4b8b9f490a15c3239e0d6ac99e7e438d34 (diff)
tipc: add name distributor resiliency queue
TIPC name table updates are distributed asynchronously in a cluster, entailing a risk of certain race conditions. E.g., if two nodes simultaneously issue conflicting (overlapping) publications, this may not be detected until both publications have reached a third node, in which case one of the publications will be silently dropped on that node. Hence, we end up with an inconsistent name table. In most cases this conflict is just a temporary race, e.g., one node is issuing a publication under the assumption that a previous, conflicting, publication has already been withdrawn by the other node. However, because of the (rtt related) distributed update delay, this may not yet hold true on all nodes. The symptom of this failure is a syslog message: "tipc: Cannot publish {%u,%u,%u}, overlap error". In this commit we add a resiliency queue at the receiving end of the name table distributor. When insertion of an arriving publication fails, we retain it in this queue for a short amount of time, assuming that another update will arrive very soon and clear the conflict. If so happens, we insert the publication, otherwise we drop it. The (configurable) retention value defaults to 2000 ms. Knowing from experience that the situation described above is extremely rare, there is no risk that the queue will accumulate any large number of items. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Acked-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/tipc/core.h1
-rw-r--r--net/tipc/name_distr.c69
-rw-r--r--net/tipc/name_distr.h1
-rw-r--r--net/tipc/name_table.c8
-rw-r--r--net/tipc/sysctl.c7
5 files changed, 79 insertions, 7 deletions
diff --git a/net/tipc/core.h b/net/tipc/core.h
index d2607a8e2b80..f773b148722f 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -81,6 +81,7 @@ extern u32 tipc_own_addr __read_mostly;
81extern int tipc_max_ports __read_mostly; 81extern int tipc_max_ports __read_mostly;
82extern int tipc_net_id __read_mostly; 82extern int tipc_net_id __read_mostly;
83extern int sysctl_tipc_rmem[3] __read_mostly; 83extern int sysctl_tipc_rmem[3] __read_mostly;
84extern int sysctl_tipc_named_timeout __read_mostly;
84 85
85/* 86/*
86 * Other global variables 87 * Other global variables
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 0591f33b8384..780ef710a849 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/name_distr.c: TIPC name distribution code 2 * net/tipc/name_distr.c: TIPC name distribution code
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, 2014, Ericsson AB
5 * Copyright (c) 2005, 2010-2011, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
@@ -71,6 +71,21 @@ static struct publ_list *publ_lists[] = {
71}; 71};
72 72
73 73
74int sysctl_tipc_named_timeout __read_mostly = 2000;
75
76/**
77 * struct tipc_dist_queue - queue holding deferred name table updates
78 */
79static struct list_head tipc_dist_queue = LIST_HEAD_INIT(tipc_dist_queue);
80
81struct distr_queue_item {
82 struct distr_item i;
83 u32 dtype;
84 u32 node;
85 unsigned long expires;
86 struct list_head next;
87};
88
74/** 89/**
75 * publ_to_item - add publication info to a publication message 90 * publ_to_item - add publication info to a publication message
76 */ 91 */
@@ -299,6 +314,52 @@ struct publication *tipc_update_nametbl(struct distr_item *i, u32 node,
299} 314}
300 315
301/** 316/**
317 * tipc_named_add_backlog - add a failed name table update to the backlog
318 *
319 */
320static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node)
321{
322 struct distr_queue_item *e;
323 unsigned long now = get_jiffies_64();
324
325 e = kzalloc(sizeof(*e), GFP_ATOMIC);
326 if (!e)
327 return;
328 e->dtype = type;
329 e->node = node;
330 e->expires = now + msecs_to_jiffies(sysctl_tipc_named_timeout);
331 memcpy(e, i, sizeof(*i));
332 list_add_tail(&e->next, &tipc_dist_queue);
333}
334
335/**
336 * tipc_named_process_backlog - try to process any pending name table updates
337 * from the network.
338 */
339void tipc_named_process_backlog(void)
340{
341 struct distr_queue_item *e, *tmp;
342 char addr[16];
343 unsigned long now = get_jiffies_64();
344
345 list_for_each_entry_safe(e, tmp, &tipc_dist_queue, next) {
346 if (time_after(e->expires, now)) {
347 if (!tipc_update_nametbl(&e->i, e->node, e->dtype))
348 continue;
349 } else {
350 tipc_addr_string_fill(addr, e->node);
351 pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %s key=%u\n",
352 e->dtype, ntohl(e->i.type),
353 ntohl(e->i.lower),
354 ntohl(e->i.upper),
355 addr, ntohl(e->i.key));
356 }
357 list_del(&e->next);
358 kfree(e);
359 }
360}
361
362/**
302 * tipc_named_rcv - process name table update message sent by another node 363 * tipc_named_rcv - process name table update message sent by another node
303 */ 364 */
304void tipc_named_rcv(struct sk_buff *buf) 365void tipc_named_rcv(struct sk_buff *buf)
@@ -306,13 +367,15 @@ void tipc_named_rcv(struct sk_buff *buf)
306 struct tipc_msg *msg = buf_msg(buf); 367 struct tipc_msg *msg = buf_msg(buf);
307 struct distr_item *item = (struct distr_item *)msg_data(msg); 368 struct distr_item *item = (struct distr_item *)msg_data(msg);
308 u32 count = msg_data_sz(msg) / ITEM_SIZE; 369 u32 count = msg_data_sz(msg) / ITEM_SIZE;
370 u32 node = msg_orignode(msg);
309 371
310 write_lock_bh(&tipc_nametbl_lock); 372 write_lock_bh(&tipc_nametbl_lock);
311 while (count--) { 373 while (count--) {
312 tipc_update_nametbl(item, msg_orignode(msg), 374 if (!tipc_update_nametbl(item, node, msg_type(msg)))
313 msg_type(msg)); 375 tipc_named_add_backlog(item, msg_type(msg), node);
314 item++; 376 item++;
315 } 377 }
378 tipc_named_process_backlog();
316 write_unlock_bh(&tipc_nametbl_lock); 379 write_unlock_bh(&tipc_nametbl_lock);
317 kfree_skb(buf); 380 kfree_skb(buf);
318} 381}
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 8afe32b7fc9a..b9e75feb3434 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -73,5 +73,6 @@ void named_cluster_distribute(struct sk_buff *buf);
73void tipc_named_node_up(u32 dnode); 73void tipc_named_node_up(u32 dnode);
74void tipc_named_rcv(struct sk_buff *buf); 74void tipc_named_rcv(struct sk_buff *buf);
75void tipc_named_reinit(void); 75void tipc_named_reinit(void);
76void tipc_named_process_backlog(void);
76 77
77#endif 78#endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index c058e30f84aa..3a6a0a7c0759 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -261,8 +261,6 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
261 261
262 /* Lower end overlaps existing entry => need an exact match */ 262 /* Lower end overlaps existing entry => need an exact match */
263 if ((sseq->lower != lower) || (sseq->upper != upper)) { 263 if ((sseq->lower != lower) || (sseq->upper != upper)) {
264 pr_warn("Cannot publish {%u,%u,%u}, overlap error\n",
265 type, lower, upper);
266 return NULL; 264 return NULL;
267 } 265 }
268 266
@@ -284,8 +282,6 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
284 /* Fail if upper end overlaps into an existing entry */ 282 /* Fail if upper end overlaps into an existing entry */
285 if ((inspos < nseq->first_free) && 283 if ((inspos < nseq->first_free) &&
286 (upper >= nseq->sseqs[inspos].lower)) { 284 (upper >= nseq->sseqs[inspos].lower)) {
287 pr_warn("Cannot publish {%u,%u,%u}, overlap error\n",
288 type, lower, upper);
289 return NULL; 285 return NULL;
290 } 286 }
291 287
@@ -677,6 +673,8 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
677 if (likely(publ)) { 673 if (likely(publ)) {
678 table.local_publ_count++; 674 table.local_publ_count++;
679 buf = tipc_named_publish(publ); 675 buf = tipc_named_publish(publ);
676 /* Any pending external events? */
677 tipc_named_process_backlog();
680 } 678 }
681 write_unlock_bh(&tipc_nametbl_lock); 679 write_unlock_bh(&tipc_nametbl_lock);
682 680
@@ -698,6 +696,8 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
698 if (likely(publ)) { 696 if (likely(publ)) {
699 table.local_publ_count--; 697 table.local_publ_count--;
700 buf = tipc_named_withdraw(publ); 698 buf = tipc_named_withdraw(publ);
699 /* Any pending external events? */
700 tipc_named_process_backlog();
701 write_unlock_bh(&tipc_nametbl_lock); 701 write_unlock_bh(&tipc_nametbl_lock);
702 list_del_init(&publ->pport_list); 702 list_del_init(&publ->pport_list);
703 kfree(publ); 703 kfree(publ);
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index f3fef93325a8..1a779b1e8510 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -47,6 +47,13 @@ static struct ctl_table tipc_table[] = {
47 .mode = 0644, 47 .mode = 0644,
48 .proc_handler = proc_dointvec, 48 .proc_handler = proc_dointvec,
49 }, 49 },
50 {
51 .procname = "named_timeout",
52 .data = &sysctl_tipc_named_timeout,
53 .maxlen = sizeof(sysctl_tipc_named_timeout),
54 .mode = 0644,
55 .proc_handler = proc_dointvec,
56 },
50 {} 57 {}
51}; 58};
52 59