aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@osdl.org>2005-11-10 20:14:59 -0500
committerDavid S. Miller <davem@davemloft.net>2005-11-10 20:14:59 -0500
commit6a438bbe68c7013a42d9c5aee5a40d7dafdbe6ec (patch)
tree09775f0479168cd53494155a5789e78df218b497 /net
parentcaa20d9abe810be2ede9612b6c9db6ce7d6edf80 (diff)
[TCP]: speed up SACK processing
Use "hints" to speed up the SACK processing. Various forms of this have been used by TCP developers (Web100, STCP, BIC) to avoid the 2x linear search of outstanding segments. Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_input.c144
-rw-r--r--net/ipv4/tcp_output.c54
2 files changed, 172 insertions, 26 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 34cfa58eab76..40a26b7157b4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -897,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
897 int prior_fackets; 897 int prior_fackets;
898 u32 lost_retrans = 0; 898 u32 lost_retrans = 0;
899 int flag = 0; 899 int flag = 0;
900 int dup_sack = 0;
900 int i; 901 int i;
901 902
902 if (!tp->sacked_out) 903 if (!tp->sacked_out)
903 tp->fackets_out = 0; 904 tp->fackets_out = 0;
904 prior_fackets = tp->fackets_out; 905 prior_fackets = tp->fackets_out;
905 906
906 for (i=0; i<num_sacks; i++, sp++) { 907 /* SACK fastpath:
907 struct sk_buff *skb; 908 * if the only SACK change is the increase of the end_seq of
908 __u32 start_seq = ntohl(sp->start_seq); 909 * the first block then only apply that SACK block
909 __u32 end_seq = ntohl(sp->end_seq); 910 * and use retrans queue hinting otherwise slowpath */
910 int fack_count = 0; 911 flag = 1;
911 int dup_sack = 0; 912 for (i = 0; i< num_sacks; i++) {
913 __u32 start_seq = ntohl(sp[i].start_seq);
914 __u32 end_seq = ntohl(sp[i].end_seq);
915
916 if (i == 0){
917 if (tp->recv_sack_cache[i].start_seq != start_seq)
918 flag = 0;
919 } else {
920 if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
921 (tp->recv_sack_cache[i].end_seq != end_seq))
922 flag = 0;
923 }
924 tp->recv_sack_cache[i].start_seq = start_seq;
925 tp->recv_sack_cache[i].end_seq = end_seq;
912 926
913 /* Check for D-SACK. */ 927 /* Check for D-SACK. */
914 if (i == 0) { 928 if (i == 0) {
@@ -940,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
940 if (before(ack, prior_snd_una - tp->max_window)) 954 if (before(ack, prior_snd_una - tp->max_window))
941 return 0; 955 return 0;
942 } 956 }
957 }
958
959 if (flag)
960 num_sacks = 1;
961 else {
962 int j;
963 tp->fastpath_skb_hint = NULL;
964
965 /* order SACK blocks to allow in order walk of the retrans queue */
966 for (i = num_sacks-1; i > 0; i--) {
967 for (j = 0; j < i; j++){
968 if (after(ntohl(sp[j].start_seq),
969 ntohl(sp[j+1].start_seq))){
970 sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq);
971 sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq);
972 sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq);
973 sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq);
974 }
975
976 }
977 }
978 }
979
980 /* clear flag as used for different purpose in following code */
981 flag = 0;
982
983 for (i=0; i<num_sacks; i++, sp++) {
984 struct sk_buff *skb;
985 __u32 start_seq = ntohl(sp->start_seq);
986 __u32 end_seq = ntohl(sp->end_seq);
987 int fack_count;
988
989 /* Use SACK fastpath hint if valid */
990 if (tp->fastpath_skb_hint) {
991 skb = tp->fastpath_skb_hint;
992 fack_count = tp->fastpath_cnt_hint;
993 } else {
994 skb = sk->sk_write_queue.next;
995 fack_count = 0;
996 }
943 997
944 /* Event "B" in the comment above. */ 998 /* Event "B" in the comment above. */
945 if (after(end_seq, tp->high_seq)) 999 if (after(end_seq, tp->high_seq))
946 flag |= FLAG_DATA_LOST; 1000 flag |= FLAG_DATA_LOST;
947 1001
948 sk_stream_for_retrans_queue(skb, sk) { 1002 sk_stream_for_retrans_queue_from(skb, sk) {
949 int in_sack, pcount; 1003 int in_sack, pcount;
950 u8 sacked; 1004 u8 sacked;
951 1005
1006 tp->fastpath_skb_hint = skb;
1007 tp->fastpath_cnt_hint = fack_count;
1008
952 /* The retransmission queue is always in order, so 1009 /* The retransmission queue is always in order, so
953 * we can short-circuit the walk early. 1010 * we can short-circuit the walk early.
954 */ 1011 */
@@ -1023,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1023 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); 1080 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1024 tp->lost_out -= tcp_skb_pcount(skb); 1081 tp->lost_out -= tcp_skb_pcount(skb);
1025 tp->retrans_out -= tcp_skb_pcount(skb); 1082 tp->retrans_out -= tcp_skb_pcount(skb);
1083
1084 /* clear lost hint */
1085 tp->retransmit_skb_hint = NULL;
1026 } 1086 }
1027 } else { 1087 } else {
1028 /* New sack for not retransmitted frame, 1088 /* New sack for not retransmitted frame,
@@ -1035,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1035 if (sacked & TCPCB_LOST) { 1095 if (sacked & TCPCB_LOST) {
1036 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 1096 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1037 tp->lost_out -= tcp_skb_pcount(skb); 1097 tp->lost_out -= tcp_skb_pcount(skb);
1098
1099 /* clear lost hint */
1100 tp->retransmit_skb_hint = NULL;
1038 } 1101 }
1039 } 1102 }
1040 1103
@@ -1058,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1058 (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { 1121 (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
1059 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1122 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1060 tp->retrans_out -= tcp_skb_pcount(skb); 1123 tp->retrans_out -= tcp_skb_pcount(skb);
1124 tp->retransmit_skb_hint = NULL;
1061 } 1125 }
1062 } 1126 }
1063 } 1127 }
@@ -1085,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1085 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1149 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1086 tp->retrans_out -= tcp_skb_pcount(skb); 1150 tp->retrans_out -= tcp_skb_pcount(skb);
1087 1151
1152 /* clear lost hint */
1153 tp->retransmit_skb_hint = NULL;
1154
1088 if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { 1155 if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1089 tp->lost_out += tcp_skb_pcount(skb); 1156 tp->lost_out += tcp_skb_pcount(skb);
1090 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1157 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -1192,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk)
1192 tcp_set_ca_state(sk, TCP_CA_Loss); 1259 tcp_set_ca_state(sk, TCP_CA_Loss);
1193 tp->high_seq = tp->frto_highmark; 1260 tp->high_seq = tp->frto_highmark;
1194 TCP_ECN_queue_cwr(tp); 1261 TCP_ECN_queue_cwr(tp);
1262
1263 clear_all_retrans_hints(tp);
1195} 1264}
1196 1265
1197void tcp_clear_retrans(struct tcp_sock *tp) 1266void tcp_clear_retrans(struct tcp_sock *tp)
@@ -1258,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how)
1258 tcp_set_ca_state(sk, TCP_CA_Loss); 1327 tcp_set_ca_state(sk, TCP_CA_Loss);
1259 tp->high_seq = tp->snd_nxt; 1328 tp->high_seq = tp->snd_nxt;
1260 TCP_ECN_queue_cwr(tp); 1329 TCP_ECN_queue_cwr(tp);
1330
1331 clear_all_retrans_hints(tp);
1261} 1332}
1262 1333
1263static int tcp_check_sack_reneging(struct sock *sk) 1334static int tcp_check_sack_reneging(struct sock *sk)
@@ -1482,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
1482 int packets, u32 high_seq) 1553 int packets, u32 high_seq)
1483{ 1554{
1484 struct sk_buff *skb; 1555 struct sk_buff *skb;
1485 int cnt = packets; 1556 int cnt;
1486 1557
1487 BUG_TRAP(cnt <= tp->packets_out); 1558 BUG_TRAP(packets <= tp->packets_out);
1559 if (tp->lost_skb_hint) {
1560 skb = tp->lost_skb_hint;
1561 cnt = tp->lost_cnt_hint;
1562 } else {
1563 skb = sk->sk_write_queue.next;
1564 cnt = 0;
1565 }
1488 1566
1489 sk_stream_for_retrans_queue(skb, sk) { 1567 sk_stream_for_retrans_queue_from(skb, sk) {
1490 cnt -= tcp_skb_pcount(skb); 1568 /* TODO: do this better */
1491 if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) 1569 /* this is not the most efficient way to do this... */
1570 tp->lost_skb_hint = skb;
1571 tp->lost_cnt_hint = cnt;
1572 cnt += tcp_skb_pcount(skb);
1573 if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
1492 break; 1574 break;
1493 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { 1575 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
1494 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1576 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1495 tp->lost_out += tcp_skb_pcount(skb); 1577 tp->lost_out += tcp_skb_pcount(skb);
1578
1579 /* clear xmit_retransmit_queue hints
1580 * if this is beyond hint */
1581 if(tp->retransmit_skb_hint != NULL &&
1582 before(TCP_SKB_CB(skb)->seq,
1583 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
1584
1585 tp->retransmit_skb_hint = NULL;
1586 }
1496 } 1587 }
1497 } 1588 }
1498 tcp_sync_left_out(tp); 1589 tcp_sync_left_out(tp);
@@ -1519,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
1519 if (tcp_head_timedout(sk, tp)) { 1610 if (tcp_head_timedout(sk, tp)) {
1520 struct sk_buff *skb; 1611 struct sk_buff *skb;
1521 1612
1522 sk_stream_for_retrans_queue(skb, sk) { 1613 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
1523 if (tcp_skb_timedout(sk, skb) && 1614 : sk->sk_write_queue.next;
1524 !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { 1615
1616 sk_stream_for_retrans_queue_from(skb, sk) {
1617 if (!tcp_skb_timedout(sk, skb))
1618 break;
1619
1620 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
1525 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1621 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1526 tp->lost_out += tcp_skb_pcount(skb); 1622 tp->lost_out += tcp_skb_pcount(skb);
1623
1624 /* clear xmit_retrans hint */
1625 if (tp->retransmit_skb_hint &&
1626 before(TCP_SKB_CB(skb)->seq,
1627 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
1628
1629 tp->retransmit_skb_hint = NULL;
1527 } 1630 }
1528 } 1631 }
1632
1633 tp->scoreboard_skb_hint = skb;
1634
1529 tcp_sync_left_out(tp); 1635 tcp_sync_left_out(tp);
1530 } 1636 }
1531} 1637}
@@ -1605,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
1605 } 1711 }
1606 tcp_moderate_cwnd(tp); 1712 tcp_moderate_cwnd(tp);
1607 tp->snd_cwnd_stamp = tcp_time_stamp; 1713 tp->snd_cwnd_stamp = tcp_time_stamp;
1714
1715 /* There is something screwy going on with the retrans hints after
1716 an undo */
1717 clear_all_retrans_hints(tp);
1608} 1718}
1609 1719
1610static inline int tcp_may_undo(struct tcp_sock *tp) 1720static inline int tcp_may_undo(struct tcp_sock *tp)
@@ -1688,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
1688 sk_stream_for_retrans_queue(skb, sk) { 1798 sk_stream_for_retrans_queue(skb, sk) {
1689 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 1799 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1690 } 1800 }
1801
1802 clear_all_retrans_hints(tp);
1803
1691 DBGUNDO(sk, tp, "partial loss"); 1804 DBGUNDO(sk, tp, "partial loss");
1692 tp->lost_out = 0; 1805 tp->lost_out = 0;
1693 tp->left_out = tp->sacked_out; 1806 tp->left_out = tp->sacked_out;
@@ -2117,6 +2230,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2117 tcp_packets_out_dec(tp, skb); 2230 tcp_packets_out_dec(tp, skb);
2118 __skb_unlink(skb, &sk->sk_write_queue); 2231 __skb_unlink(skb, &sk->sk_write_queue);
2119 sk_stream_free_skb(sk, skb); 2232 sk_stream_free_skb(sk, skb);
2233 clear_all_retrans_hints(tp);
2120 } 2234 }
2121 2235
2122 if (acked&FLAG_ACKED) { 2236 if (acked&FLAG_ACKED) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 602e7057e438..029c70dfb585 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -436,6 +436,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
436 u16 flags; 436 u16 flags;
437 437
438 BUG_ON(len > skb->len); 438 BUG_ON(len > skb->len);
439
440 clear_all_retrans_hints(tp);
439 nsize = skb_headlen(skb) - len; 441 nsize = skb_headlen(skb) - len;
440 if (nsize < 0) 442 if (nsize < 0)
441 nsize = 0; 443 nsize = 0;
@@ -1260,7 +1262,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
1260 BUG_ON(tcp_skb_pcount(skb) != 1 || 1262 BUG_ON(tcp_skb_pcount(skb) != 1 ||
1261 tcp_skb_pcount(next_skb) != 1); 1263 tcp_skb_pcount(next_skb) != 1);
1262 1264
1263 /* Ok. We will be able to collapse the packet. */ 1265 /* changing transmit queue under us so clear hints */
1266 clear_all_retrans_hints(tp);
1267
1268 /* Ok. We will be able to collapse the packet. */
1264 __skb_unlink(next_skb, &sk->sk_write_queue); 1269 __skb_unlink(next_skb, &sk->sk_write_queue);
1265 1270
1266 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); 1271 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
@@ -1330,6 +1335,8 @@ void tcp_simple_retransmit(struct sock *sk)
1330 } 1335 }
1331 } 1336 }
1332 1337
1338 clear_all_retrans_hints(tp);
1339
1333 if (!lost) 1340 if (!lost)
1334 return; 1341 return;
1335 1342
@@ -1468,13 +1475,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1468 const struct inet_connection_sock *icsk = inet_csk(sk); 1475 const struct inet_connection_sock *icsk = inet_csk(sk);
1469 struct tcp_sock *tp = tcp_sk(sk); 1476 struct tcp_sock *tp = tcp_sk(sk);
1470 struct sk_buff *skb; 1477 struct sk_buff *skb;
1471 int packet_cnt = tp->lost_out; 1478 int packet_cnt;
1479
1480 if (tp->retransmit_skb_hint) {
1481 skb = tp->retransmit_skb_hint;
1482 packet_cnt = tp->retransmit_cnt_hint;
1483 }else{
1484 skb = sk->sk_write_queue.next;
1485 packet_cnt = 0;
1486 }
1472 1487
1473 /* First pass: retransmit lost packets. */ 1488 /* First pass: retransmit lost packets. */
1474 if (packet_cnt) { 1489 if (tp->lost_out) {
1475 sk_stream_for_retrans_queue(skb, sk) { 1490 sk_stream_for_retrans_queue_from(skb, sk) {
1476 __u8 sacked = TCP_SKB_CB(skb)->sacked; 1491 __u8 sacked = TCP_SKB_CB(skb)->sacked;
1477 1492
1493 /* we could do better than to assign each time */
1494 tp->retransmit_skb_hint = skb;
1495 tp->retransmit_cnt_hint = packet_cnt;
1496
1478 /* Assume this retransmit will generate 1497 /* Assume this retransmit will generate
1479 * only one packet for congestion window 1498 * only one packet for congestion window
1480 * calculation purposes. This works because 1499 * calculation purposes. This works because
@@ -1485,10 +1504,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1485 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) 1504 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
1486 return; 1505 return;
1487 1506
1488 if (sacked&TCPCB_LOST) { 1507 if (sacked & TCPCB_LOST) {
1489 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { 1508 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
1490 if (tcp_retransmit_skb(sk, skb)) 1509 if (tcp_retransmit_skb(sk, skb)) {
1510 tp->retransmit_skb_hint = NULL;
1491 return; 1511 return;
1512 }
1492 if (icsk->icsk_ca_state != TCP_CA_Loss) 1513 if (icsk->icsk_ca_state != TCP_CA_Loss)
1493 NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); 1514 NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
1494 else 1515 else
@@ -1501,8 +1522,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1501 TCP_RTO_MAX); 1522 TCP_RTO_MAX);
1502 } 1523 }
1503 1524
1504 packet_cnt -= tcp_skb_pcount(skb); 1525 packet_cnt += tcp_skb_pcount(skb);
1505 if (packet_cnt <= 0) 1526 if (packet_cnt >= tp->lost_out)
1506 break; 1527 break;
1507 } 1528 }
1508 } 1529 }
@@ -1528,9 +1549,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1528 if (tcp_may_send_now(sk, tp)) 1549 if (tcp_may_send_now(sk, tp))
1529 return; 1550 return;
1530 1551
1531 packet_cnt = 0; 1552 if (tp->forward_skb_hint) {
1553 skb = tp->forward_skb_hint;
1554 packet_cnt = tp->forward_cnt_hint;
1555 } else{
1556 skb = sk->sk_write_queue.next;
1557 packet_cnt = 0;
1558 }
1559
1560 sk_stream_for_retrans_queue_from(skb, sk) {
1561 tp->forward_cnt_hint = packet_cnt;
1562 tp->forward_skb_hint = skb;
1532 1563
1533 sk_stream_for_retrans_queue(skb, sk) {
1534 /* Similar to the retransmit loop above we 1564 /* Similar to the retransmit loop above we
1535 * can pretend that the retransmitted SKB 1565 * can pretend that the retransmitted SKB
1536 * we send out here will be composed of one 1566 * we send out here will be composed of one
@@ -1547,8 +1577,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1547 continue; 1577 continue;
1548 1578
1549 /* Ok, retransmit it. */ 1579 /* Ok, retransmit it. */
1550 if (tcp_retransmit_skb(sk, skb)) 1580 if (tcp_retransmit_skb(sk, skb)) {
1581 tp->forward_skb_hint = NULL;
1551 break; 1582 break;
1583 }
1552 1584
1553 if (skb == skb_peek(&sk->sk_write_queue)) 1585 if (skb == skb_peek(&sk->sk_write_queue))
1554 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 1586 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,