aboutsummaryrefslogtreecommitdiffstats
path: root/net/tipc
diff options
context:
space:
mode:
authorJon Paul Maloy <jon.maloy@ericsson.com>2015-05-14 10:46:17 -0400
committerDavid S. Miller <davem@davemloft.net>2015-05-14 12:24:46 -0400
commitf21e897eccb5a236f4191ecc1b4391eda895d6ed (patch)
tree49f838a29cd13f79e31f32a02859691c362af3cc /net/tipc
parentcd4eee3c2e3e01590df5cada0d56b396dd726d05 (diff)
tipc: improve link congestion algorithm
The link congestion algorithm used until now implies two problems. - It is too generous towards lower-level messages in situations of high load by giving "absolute" bandwidth guarantees to the different priority levels. LOW traffic is guaranteed 10%, MEDIUM is guaranted 20%, HIGH is guaranteed 30%, and CRITICAL is guaranteed 40% of the available bandwidth. But, in the absence of higher level traffic, the ratio between two distinct levels becomes unreasonable. E.g. if there is only LOW and MEDIUM traffic on a system, the former is guaranteed 1/3 of the bandwidth, and the latter 2/3. This again means that if there is e.g. one LOW user and 10 MEDIUM users, the former will have 33.3% of the bandwidth, and the others will have to compete for the remainder, i.e. each will end up with 6.7% of the capacity. - Packets of type MSG_BUNDLER are created at SYSTEM importance level, but only after the packets bundled into it have passed the congestion test for their own respective levels. Since bundled packets don't result in incrementing the level counter for their own importance, only occasionally for the SYSTEM level counter, they do in practice obtain SYSTEM level importance. Hence, the current implementation provides a gap in the congestion algorithm that in the worst case may lead to a link reset. We now refine the congestion algorithm as follows: - A message is accepted to the link backlog only if its own level counter, and all superior level counters, permit it. - The importance of a created bundle packet is set according to its contents. A bundle packet created from messges at levels LOW to CRITICAL is given importance level CRITICAL, while a bundle created from a SYSTEM level message is given importance SYSTEM. In the latter case only subsequent SYSTEM level messages are allowed to be bundled into it. This solves the first problem described above, by making the bandwidth guarantee relative to the total number of users at all levels; only the upper limit for each level remains absolute. In the example described above, the single LOW user would use 1/11th of the bandwidth, the same as each of the ten MEDIUM users, but he still has the same guarantee against starvation as the latter ones. The fix also solves the second problem. If the CRITICAL level is filled up by bundle packets of that level, no lower level packets will be accepted any more. Suggested-by: Gergely Kiss <gergely.kiss@ericsson.com> Reviewed-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/tipc')
-rw-r--r--net/tipc/link.c11
-rw-r--r--net/tipc/msg.c7
-rw-r--r--net/tipc/msg.h14
3 files changed, 22 insertions, 10 deletions
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a5ea19e9690f..c1aba697776f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -645,7 +645,7 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
645{ 645{
646 struct tipc_msg *msg = buf_msg(skb_peek(list)); 646 struct tipc_msg *msg = buf_msg(skb_peek(list));
647 unsigned int maxwin = link->window; 647 unsigned int maxwin = link->window;
648 unsigned int imp = msg_importance(msg); 648 unsigned int i, imp = msg_importance(msg);
649 uint mtu = link->mtu; 649 uint mtu = link->mtu;
650 u16 ack = mod(link->rcv_nxt - 1); 650 u16 ack = mod(link->rcv_nxt - 1);
651 u16 seqno = link->snd_nxt; 651 u16 seqno = link->snd_nxt;
@@ -655,10 +655,11 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
655 struct sk_buff_head *backlogq = &link->backlogq; 655 struct sk_buff_head *backlogq = &link->backlogq;
656 struct sk_buff *skb, *tmp; 656 struct sk_buff *skb, *tmp;
657 657
658 /* Match backlog limit against msg importance: */ 658 /* Match msg importance against this and all higher backlog limits: */
659 if (unlikely(link->backlog[imp].len >= link->backlog[imp].limit)) 659 for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
660 return link_schedule_user(link, list); 660 if (unlikely(link->backlog[i].len >= link->backlog[i].limit))
661 661 return link_schedule_user(link, list);
662 }
662 if (unlikely(msg_size(msg) > mtu)) { 663 if (unlikely(msg_size(msg) > mtu)) {
663 __skb_queue_purge(list); 664 __skb_queue_purge(list);
664 return -EMSGSIZE; 665 return -EMSGSIZE;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index c3e96e815418..ff7362d40cb3 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -365,6 +365,9 @@ bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu)
365 return false; 365 return false;
366 if (unlikely(max < (start + msz))) 366 if (unlikely(max < (start + msz)))
367 return false; 367 return false;
368 if ((msg_importance(msg) < TIPC_SYSTEM_IMPORTANCE) &&
369 (msg_importance(bmsg) == TIPC_SYSTEM_IMPORTANCE))
370 return false;
368 371
369 skb_put(bskb, pad + msz); 372 skb_put(bskb, pad + msz);
370 skb_copy_to_linear_data_offset(bskb, start, skb->data, msz); 373 skb_copy_to_linear_data_offset(bskb, start, skb->data, msz);
@@ -448,6 +451,10 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode)
448 bmsg = buf_msg(bskb); 451 bmsg = buf_msg(bskb);
449 tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0, 452 tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0,
450 INT_H_SIZE, dnode); 453 INT_H_SIZE, dnode);
454 if (msg_isdata(msg))
455 msg_set_importance(bmsg, TIPC_CRITICAL_IMPORTANCE);
456 else
457 msg_set_importance(bmsg, TIPC_SYSTEM_IMPORTANCE);
451 msg_set_seqno(bmsg, msg_seqno(msg)); 458 msg_set_seqno(bmsg, msg_seqno(msg));
452 msg_set_ack(bmsg, msg_ack(msg)); 459 msg_set_ack(bmsg, msg_ack(msg));
453 msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); 460 msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 6ca2366f3a53..6caf16c475e0 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -352,18 +352,22 @@ static inline void msg_set_seqno(struct tipc_msg *m, u16 n)
352 */ 352 */
353static inline u32 msg_importance(struct tipc_msg *m) 353static inline u32 msg_importance(struct tipc_msg *m)
354{ 354{
355 if (unlikely(msg_user(m) == MSG_FRAGMENTER)) 355 int usr = msg_user(m);
356
357 if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m)))
358 return usr;
359 if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))
356 return msg_bits(m, 5, 13, 0x7); 360 return msg_bits(m, 5, 13, 0x7);
357 if (likely(msg_isdata(m) && !msg_errcode(m)))
358 return msg_user(m);
359 return TIPC_SYSTEM_IMPORTANCE; 361 return TIPC_SYSTEM_IMPORTANCE;
360} 362}
361 363
362static inline void msg_set_importance(struct tipc_msg *m, u32 i) 364static inline void msg_set_importance(struct tipc_msg *m, u32 i)
363{ 365{
364 if (unlikely(msg_user(m) == MSG_FRAGMENTER)) 366 int usr = msg_user(m);
367
368 if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)))
365 msg_set_bits(m, 5, 13, 0x7, i); 369 msg_set_bits(m, 5, 13, 0x7, i);
366 else if (likely(i < TIPC_SYSTEM_IMPORTANCE)) 370 else if (i < TIPC_SYSTEM_IMPORTANCE)
367 msg_set_user(m, i); 371 msg_set_user(m, i);
368 else 372 else
369 pr_warn("Trying to set illegal importance in message\n"); 373 pr_warn("Trying to set illegal importance in message\n");