summaryrefslogtreecommitdiffstats
path: root/net/tipc/link.c
diff options
context:
space:
mode:
authorJon Paul Maloy <jon.maloy@ericsson.com>2016-09-01 13:52:49 -0400
committerDavid S. Miller <davem@davemloft.net>2016-09-02 20:10:24 -0400
commit02d11ca20091fcef904f05defda80c53e5b4e793 (patch)
tree2d1f26e4facff8b4e784c60d5a3c6163ba40ed30 /net/tipc/link.c
parent2c896fb02e7f65299646f295a007bda043e0f382 (diff)
tipc: transfer broadcast nacks in link state messages
When we send broadcasts in clusters of more 70-80 nodes, we sometimes see the broadcast link resetting because of an excessive number of retransmissions. This is caused by a combination of two factors: 1) A 'NACK crunch", where loss of broadcast packets is discovered and NACK'ed by several nodes simultaneously, leading to multiple redundant broadcast retransmissions. 2) The fact that the NACKS as such also are sent as broadcast, leading to excessive load and packet loss on the transmitting switch/bridge. This commit deals with the latter problem, by moving sending of broadcast nacks from the dedicated BCAST_PROTOCOL/NACK message type to regular unicast LINK_PROTOCOL/STATE messages. We allocate 10 unused bits in word 8 of the said message for this purpose, and introduce a new capability bit, TIPC_BCAST_STATE_NACK in order to keep the change backwards compatible. Reviewed-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/tipc/link.c')
-rw-r--r--net/tipc/link.c64
1 files changed, 51 insertions, 13 deletions
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2c6e1b9e024b..136316fb37ec 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -367,6 +367,18 @@ int tipc_link_bc_peers(struct tipc_link *l)
367 return l->ackers; 367 return l->ackers;
368} 368}
369 369
370u16 link_bc_rcv_gap(struct tipc_link *l)
371{
372 struct sk_buff *skb = skb_peek(&l->deferdq);
373 u16 gap = 0;
374
375 if (more(l->snd_nxt, l->rcv_nxt))
376 gap = l->snd_nxt - l->rcv_nxt;
377 if (skb)
378 gap = buf_seqno(skb) - l->rcv_nxt;
379 return gap;
380}
381
370void tipc_link_set_mtu(struct tipc_link *l, int mtu) 382void tipc_link_set_mtu(struct tipc_link *l, int mtu)
371{ 383{
372 l->mtu = mtu; 384 l->mtu = mtu;
@@ -1135,7 +1147,10 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
1135 if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf) 1147 if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf)
1136 return 0; 1148 return 0;
1137 l->rcv_unacked = 0; 1149 l->rcv_unacked = 0;
1138 return TIPC_LINK_SND_BC_ACK; 1150
1151 /* Use snd_nxt to store peer's snd_nxt in broadcast rcv link */
1152 l->snd_nxt = l->rcv_nxt;
1153 return TIPC_LINK_SND_STATE;
1139 } 1154 }
1140 1155
1141 /* Unicast ACK */ 1156 /* Unicast ACK */
@@ -1236,7 +1251,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
1236 rc |= tipc_link_input(l, skb, l->inputq); 1251 rc |= tipc_link_input(l, skb, l->inputq);
1237 if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) 1252 if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN))
1238 rc |= tipc_link_build_state_msg(l, xmitq); 1253 rc |= tipc_link_build_state_msg(l, xmitq);
1239 if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK)) 1254 if (unlikely(rc & ~TIPC_LINK_SND_STATE))
1240 break; 1255 break;
1241 } while ((skb = __skb_dequeue(defq))); 1256 } while ((skb = __skb_dequeue(defq)));
1242 1257
@@ -1250,10 +1265,11 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1250 u16 rcvgap, int tolerance, int priority, 1265 u16 rcvgap, int tolerance, int priority,
1251 struct sk_buff_head *xmitq) 1266 struct sk_buff_head *xmitq)
1252{ 1267{
1268 struct tipc_link *bcl = l->bc_rcvlink;
1253 struct sk_buff *skb; 1269 struct sk_buff *skb;
1254 struct tipc_msg *hdr; 1270 struct tipc_msg *hdr;
1255 struct sk_buff_head *dfq = &l->deferdq; 1271 struct sk_buff_head *dfq = &l->deferdq;
1256 bool node_up = link_is_up(l->bc_rcvlink); 1272 bool node_up = link_is_up(bcl);
1257 struct tipc_mon_state *mstate = &l->mon_state; 1273 struct tipc_mon_state *mstate = &l->mon_state;
1258 int dlen = 0; 1274 int dlen = 0;
1259 void *data; 1275 void *data;
@@ -1281,7 +1297,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1281 msg_set_net_plane(hdr, l->net_plane); 1297 msg_set_net_plane(hdr, l->net_plane);
1282 msg_set_next_sent(hdr, l->snd_nxt); 1298 msg_set_next_sent(hdr, l->snd_nxt);
1283 msg_set_ack(hdr, l->rcv_nxt - 1); 1299 msg_set_ack(hdr, l->rcv_nxt - 1);
1284 msg_set_bcast_ack(hdr, l->bc_rcvlink->rcv_nxt - 1); 1300 msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1);
1285 msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); 1301 msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
1286 msg_set_link_tolerance(hdr, tolerance); 1302 msg_set_link_tolerance(hdr, tolerance);
1287 msg_set_linkprio(hdr, priority); 1303 msg_set_linkprio(hdr, priority);
@@ -1291,6 +1307,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1291 1307
1292 if (mtyp == STATE_MSG) { 1308 if (mtyp == STATE_MSG) {
1293 msg_set_seq_gap(hdr, rcvgap); 1309 msg_set_seq_gap(hdr, rcvgap);
1310 msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
1294 msg_set_probe(hdr, probe); 1311 msg_set_probe(hdr, probe);
1295 tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); 1312 tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
1296 msg_set_size(hdr, INT_H_SIZE + dlen); 1313 msg_set_size(hdr, INT_H_SIZE + dlen);
@@ -1575,49 +1592,68 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
1575 1592
1576/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state 1593/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state
1577 */ 1594 */
1578void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, 1595int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
1579 struct sk_buff_head *xmitq) 1596 struct sk_buff_head *xmitq)
1580{ 1597{
1581 u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); 1598 u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
1599 u16 from = msg_bcast_ack(hdr) + 1;
1600 u16 to = from + msg_bc_gap(hdr) - 1;
1601 int rc = 0;
1582 1602
1583 if (!link_is_up(l)) 1603 if (!link_is_up(l))
1584 return; 1604 return rc;
1585 1605
1586 if (!msg_peer_node_is_up(hdr)) 1606 if (!msg_peer_node_is_up(hdr))
1587 return; 1607 return rc;
1588 1608
1589 /* Open when peer ackowledges our bcast init msg (pkt #1) */ 1609 /* Open when peer ackowledges our bcast init msg (pkt #1) */
1590 if (msg_ack(hdr)) 1610 if (msg_ack(hdr))
1591 l->bc_peer_is_up = true; 1611 l->bc_peer_is_up = true;
1592 1612
1593 if (!l->bc_peer_is_up) 1613 if (!l->bc_peer_is_up)
1594 return; 1614 return rc;
1595 1615
1596 /* Ignore if peers_snd_nxt goes beyond receive window */ 1616 /* Ignore if peers_snd_nxt goes beyond receive window */
1597 if (more(peers_snd_nxt, l->rcv_nxt + l->window)) 1617 if (more(peers_snd_nxt, l->rcv_nxt + l->window))
1598 return; 1618 return rc;
1619
1620 if (!less(to, from)) {
1621 rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq);
1622 l->stats.recv_nacks++;
1623 }
1624
1625 l->snd_nxt = peers_snd_nxt;
1626 if (link_bc_rcv_gap(l))
1627 rc |= TIPC_LINK_SND_STATE;
1628
1629 /* Return now if sender supports nack via STATE messages */
1630 if (l->peer_caps & TIPC_BCAST_STATE_NACK)
1631 return rc;
1632
1633 /* Otherwise, be backwards compatible */
1599 1634
1600 if (!more(peers_snd_nxt, l->rcv_nxt)) { 1635 if (!more(peers_snd_nxt, l->rcv_nxt)) {
1601 l->nack_state = BC_NACK_SND_CONDITIONAL; 1636 l->nack_state = BC_NACK_SND_CONDITIONAL;
1602 return; 1637 return 0;
1603 } 1638 }
1604 1639
1605 /* Don't NACK if one was recently sent or peeked */ 1640 /* Don't NACK if one was recently sent or peeked */
1606 if (l->nack_state == BC_NACK_SND_SUPPRESS) { 1641 if (l->nack_state == BC_NACK_SND_SUPPRESS) {
1607 l->nack_state = BC_NACK_SND_UNCONDITIONAL; 1642 l->nack_state = BC_NACK_SND_UNCONDITIONAL;
1608 return; 1643 return 0;
1609 } 1644 }
1610 1645
1611 /* Conditionally delay NACK sending until next synch rcv */ 1646 /* Conditionally delay NACK sending until next synch rcv */
1612 if (l->nack_state == BC_NACK_SND_CONDITIONAL) { 1647 if (l->nack_state == BC_NACK_SND_CONDITIONAL) {
1613 l->nack_state = BC_NACK_SND_UNCONDITIONAL; 1648 l->nack_state = BC_NACK_SND_UNCONDITIONAL;
1614 if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN) 1649 if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN)
1615 return; 1650 return 0;
1616 } 1651 }
1617 1652
1618 /* Send NACK now but suppress next one */ 1653 /* Send NACK now but suppress next one */
1619 tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq); 1654 tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq);
1620 l->nack_state = BC_NACK_SND_SUPPRESS; 1655 l->nack_state = BC_NACK_SND_SUPPRESS;
1656 return 0;
1621} 1657}
1622 1658
1623void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, 1659void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
@@ -1654,6 +1690,8 @@ void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
1654} 1690}
1655 1691
1656/* tipc_link_bc_nack_rcv(): receive broadcast nack message 1692/* tipc_link_bc_nack_rcv(): receive broadcast nack message
1693 * This function is here for backwards compatibility, since
1694 * no BCAST_PROTOCOL/STATE messages occur from TIPC v2.5.
1657 */ 1695 */
1658int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, 1696int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
1659 struct sk_buff_head *xmitq) 1697 struct sk_buff_head *xmitq)