aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTuong Lien <tuong.t.lien@dektech.com.au>2019-08-14 23:24:08 -0400
committerDavid S. Miller <davem@davemloft.net>2019-08-16 19:27:13 -0400
commit712042313b23b5df7451faf4b279beb3025e990c (patch)
tree907be3ce9063e05a8375baa21bd9e07c951cfab1
parentb9cbf8a64865b50fd0f4a3915fa00ac7365cdf8f (diff)
tipc: fix false detection of retransmit failures
This commit eliminates the use of the link 'stale_limit' & 'prev_from' (besides the already removed - 'stale_cnt') variables in the detection of repeated retransmit failures as there is no proper way to initialize them to avoid a false detection, i.e. it is not really a retransmission failure but due to a garbage values in the variables. Instead, a jiffies variable will be added to individual skbs (like the way we restrict the skb retransmissions) in order to mark the first skb retransmit time. Later on, at the next retransmissions, the timestamp will be checked to see if the skb in the link transmq is "too stale", that is, the link tolerance time has passed, so that a link reset will be ordered. Note, just checking on the first skb in the queue is fine enough since it must be the oldest one. A counter is also added to keep track the actual skb retransmissions' number for later checking when the failure happens. The downside of this approach is that the skb->cb[] buffer is about to be exhausted, however it is always able to allocate another memory area and keep a reference to it when needed. Fixes: 77cf8edbc0e7 ("tipc: simplify stale link failure criteria") Reported-by: Hoang Le <hoang.h.le@dektech.com.au> Acked-by: Ying Xue <ying.xue@windriver.com> Acked-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/tipc/link.c92
-rw-r--r--net/tipc/msg.h8
2 files changed, 57 insertions, 43 deletions
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 66d3a07bc571..c2c5c53cad22 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -106,8 +106,6 @@ struct tipc_stats {
106 * @transmitq: queue for sent, non-acked messages 106 * @transmitq: queue for sent, non-acked messages
107 * @backlogq: queue for messages waiting to be sent 107 * @backlogq: queue for messages waiting to be sent
108 * @snt_nxt: next sequence number to use for outbound messages 108 * @snt_nxt: next sequence number to use for outbound messages
109 * @prev_from: sequence number of most previous retransmission request
110 * @stale_limit: time when repeated identical retransmits must force link reset
111 * @ackers: # of peers that needs to ack each packet before it can be released 109 * @ackers: # of peers that needs to ack each packet before it can be released
112 * @acked: # last packet acked by a certain peer. Used for broadcast. 110 * @acked: # last packet acked by a certain peer. Used for broadcast.
113 * @rcv_nxt: next sequence number to expect for inbound messages 111 * @rcv_nxt: next sequence number to expect for inbound messages
@@ -164,9 +162,7 @@ struct tipc_link {
164 u16 limit; 162 u16 limit;
165 } backlog[5]; 163 } backlog[5];
166 u16 snd_nxt; 164 u16 snd_nxt;
167 u16 prev_from;
168 u16 window; 165 u16 window;
169 unsigned long stale_limit;
170 166
171 /* Reception */ 167 /* Reception */
172 u16 rcv_nxt; 168 u16 rcv_nxt;
@@ -1044,47 +1040,53 @@ static void tipc_link_advance_backlog(struct tipc_link *l,
1044 * link_retransmit_failure() - Detect repeated retransmit failures 1040 * link_retransmit_failure() - Detect repeated retransmit failures
1045 * @l: tipc link sender 1041 * @l: tipc link sender
1046 * @r: tipc link receiver (= l in case of unicast) 1042 * @r: tipc link receiver (= l in case of unicast)
1047 * @from: seqno of the 1st packet in retransmit request
1048 * @rc: returned code 1043 * @rc: returned code
1049 * 1044 *
1050 * Return: true if the repeated retransmit failures happens, otherwise 1045 * Return: true if the repeated retransmit failures happens, otherwise
1051 * false 1046 * false
1052 */ 1047 */
1053static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, 1048static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
1054 u16 from, int *rc) 1049 int *rc)
1055{ 1050{
1056 struct sk_buff *skb = skb_peek(&l->transmq); 1051 struct sk_buff *skb = skb_peek(&l->transmq);
1057 struct tipc_msg *hdr; 1052 struct tipc_msg *hdr;
1058 1053
1059 if (!skb) 1054 if (!skb)
1060 return false; 1055 return false;
1061 hdr = buf_msg(skb);
1062 1056
1063 /* Detect repeated retransmit failures on same packet */ 1057 if (!TIPC_SKB_CB(skb)->retr_cnt)
1064 if (r->prev_from != from) { 1058 return false;
1065 r->prev_from = from;
1066 r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
1067 } else if (time_after(jiffies, r->stale_limit)) {
1068 pr_warn("Retransmission failure on link <%s>\n", l->name);
1069 link_print(l, "State of link ");
1070 pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
1071 msg_user(hdr), msg_type(hdr), msg_size(hdr),
1072 msg_errcode(hdr));
1073 pr_info("sqno %u, prev: %x, src: %x\n",
1074 msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
1075
1076 trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
1077 trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
1078 trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
1079 1059
1080 if (link_is_bc_sndlink(l)) 1060 if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp +
1081 *rc = TIPC_LINK_DOWN_EVT; 1061 msecs_to_jiffies(r->tolerance)))
1062 return false;
1063
1064 hdr = buf_msg(skb);
1065 if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr)))
1066 return false;
1082 1067
1068 pr_warn("Retransmission failure on link <%s>\n", l->name);
1069 link_print(l, "State of link ");
1070 pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
1071 msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
1072 pr_info("sqno %u, prev: %x, dest: %x\n",
1073 msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr));
1074 pr_info("retr_stamp %d, retr_cnt %d\n",
1075 jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp),
1076 TIPC_SKB_CB(skb)->retr_cnt);
1077
1078 trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
1079 trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
1080 trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
1081
1082 if (link_is_bc_sndlink(l)) {
1083 r->state = LINK_RESET;
1084 *rc = TIPC_LINK_DOWN_EVT;
1085 } else {
1083 *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); 1086 *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
1084 return true;
1085 } 1087 }
1086 1088
1087 return false; 1089 return true;
1088} 1090}
1089 1091
1090/* tipc_link_bc_retrans() - retransmit zero or more packets 1092/* tipc_link_bc_retrans() - retransmit zero or more packets
@@ -1110,7 +1112,7 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
1110 1112
1111 trace_tipc_link_retrans(r, from, to, &l->transmq); 1113 trace_tipc_link_retrans(r, from, to, &l->transmq);
1112 1114
1113 if (link_retransmit_failure(l, r, from, &rc)) 1115 if (link_retransmit_failure(l, r, &rc))
1114 return rc; 1116 return rc;
1115 1117
1116 skb_queue_walk(&l->transmq, skb) { 1118 skb_queue_walk(&l->transmq, skb) {
@@ -1119,11 +1121,10 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
1119 continue; 1121 continue;
1120 if (more(msg_seqno(hdr), to)) 1122 if (more(msg_seqno(hdr), to))
1121 break; 1123 break;
1122 if (link_is_bc_sndlink(l)) { 1124
1123 if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) 1125 if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
1124 continue; 1126 continue;
1125 TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; 1127 TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
1126 }
1127 _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC); 1128 _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC);
1128 if (!_skb) 1129 if (!_skb)
1129 return 0; 1130 return 0;
@@ -1133,6 +1134,10 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
1133 _skb->priority = TC_PRIO_CONTROL; 1134 _skb->priority = TC_PRIO_CONTROL;
1134 __skb_queue_tail(xmitq, _skb); 1135 __skb_queue_tail(xmitq, _skb);
1135 l->stats.retransmitted++; 1136 l->stats.retransmitted++;
1137
1138 /* Increase actual retrans counter & mark first time */
1139 if (!TIPC_SKB_CB(skb)->retr_cnt++)
1140 TIPC_SKB_CB(skb)->retr_stamp = jiffies;
1136 } 1141 }
1137 return 0; 1142 return 0;
1138} 1143}
@@ -1357,12 +1362,10 @@ static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
1357 struct tipc_msg *hdr; 1362 struct tipc_msg *hdr;
1358 u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; 1363 u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
1359 u16 ack = l->rcv_nxt - 1; 1364 u16 ack = l->rcv_nxt - 1;
1365 bool passed = false;
1360 u16 seqno, n = 0; 1366 u16 seqno, n = 0;
1361 int rc = 0; 1367 int rc = 0;
1362 1368
1363 if (gap && link_retransmit_failure(l, l, acked + 1, &rc))
1364 return rc;
1365
1366 skb_queue_walk_safe(&l->transmq, skb, tmp) { 1369 skb_queue_walk_safe(&l->transmq, skb, tmp) {
1367 seqno = buf_seqno(skb); 1370 seqno = buf_seqno(skb);
1368 1371
@@ -1372,12 +1375,17 @@ next_gap_ack:
1372 __skb_unlink(skb, &l->transmq); 1375 __skb_unlink(skb, &l->transmq);
1373 kfree_skb(skb); 1376 kfree_skb(skb);
1374 } else if (less_eq(seqno, acked + gap)) { 1377 } else if (less_eq(seqno, acked + gap)) {
1375 /* retransmit skb */ 1378 /* First, check if repeated retrans failures occurs? */
1379 if (!passed && link_retransmit_failure(l, l, &rc))
1380 return rc;
1381 passed = true;
1382
1383 /* retransmit skb if unrestricted*/
1376 if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) 1384 if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
1377 continue; 1385 continue;
1378 TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; 1386 TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
1379 1387 _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE,
1380 _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); 1388 GFP_ATOMIC);
1381 if (!_skb) 1389 if (!_skb)
1382 continue; 1390 continue;
1383 hdr = buf_msg(_skb); 1391 hdr = buf_msg(_skb);
@@ -1386,6 +1394,10 @@ next_gap_ack:
1386 _skb->priority = TC_PRIO_CONTROL; 1394 _skb->priority = TC_PRIO_CONTROL;
1387 __skb_queue_tail(xmitq, _skb); 1395 __skb_queue_tail(xmitq, _skb);
1388 l->stats.retransmitted++; 1396 l->stats.retransmitted++;
1397
1398 /* Increase actual retrans counter & mark first time */
1399 if (!TIPC_SKB_CB(skb)->retr_cnt++)
1400 TIPC_SKB_CB(skb)->retr_stamp = jiffies;
1389 } else { 1401 } else {
1390 /* retry with Gap ACK blocks if any */ 1402 /* retry with Gap ACK blocks if any */
1391 if (!ga || n >= ga->gack_cnt) 1403 if (!ga || n >= ga->gack_cnt)
@@ -2577,7 +2589,7 @@ int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf)
2577 i += scnprintf(buf + i, sz - i, " %x", l->peer_caps); 2589 i += scnprintf(buf + i, sz - i, " %x", l->peer_caps);
2578 i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt); 2590 i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt);
2579 i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt); 2591 i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt);
2580 i += scnprintf(buf + i, sz - i, " %u", l->prev_from); 2592 i += scnprintf(buf + i, sz - i, " %u", 0);
2581 i += scnprintf(buf + i, sz - i, " %u", 0); 2593 i += scnprintf(buf + i, sz - i, " %u", 0);
2582 i += scnprintf(buf + i, sz - i, " %u", l->acked); 2594 i += scnprintf(buf + i, sz - i, " %u", l->acked);
2583 2595
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index da509f0eb9ca..d7ebc9e955f6 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -102,13 +102,15 @@ struct plist;
102#define TIPC_MEDIA_INFO_OFFSET 5 102#define TIPC_MEDIA_INFO_OFFSET 5
103 103
104struct tipc_skb_cb { 104struct tipc_skb_cb {
105 u32 bytes_read;
106 u32 orig_member;
107 struct sk_buff *tail; 105 struct sk_buff *tail;
108 unsigned long nxt_retr; 106 unsigned long nxt_retr;
109 bool validated; 107 unsigned long retr_stamp;
108 u32 bytes_read;
109 u32 orig_member;
110 u16 chain_imp; 110 u16 chain_imp;
111 u16 ackers; 111 u16 ackers;
112 u16 retr_cnt;
113 bool validated;
112}; 114};
113 115
114#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) 116#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))