aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@osdl.org>2005-11-10 20:14:59 -0500
committerDavid S. Miller <davem@davemloft.net>2005-11-10 20:14:59 -0500
commit6a438bbe68c7013a42d9c5aee5a40d7dafdbe6ec (patch)
tree09775f0479168cd53494155a5789e78df218b497 /net/ipv4/tcp_input.c
parentcaa20d9abe810be2ede9612b6c9db6ce7d6edf80 (diff)
[TCP]: speed up SACK processing
Use "hints" to speed up the SACK processing. Various forms of this have been used by TCP developers (Web100, STCP, BIC) to avoid the 2x linear search of outstanding segments. Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c144
1 files changed, 129 insertions, 15 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 34cfa58eab76..40a26b7157b4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -897,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
897 int prior_fackets; 897 int prior_fackets;
898 u32 lost_retrans = 0; 898 u32 lost_retrans = 0;
899 int flag = 0; 899 int flag = 0;
900 int dup_sack = 0;
900 int i; 901 int i;
901 902
902 if (!tp->sacked_out) 903 if (!tp->sacked_out)
903 tp->fackets_out = 0; 904 tp->fackets_out = 0;
904 prior_fackets = tp->fackets_out; 905 prior_fackets = tp->fackets_out;
905 906
906 for (i=0; i<num_sacks; i++, sp++) { 907 /* SACK fastpath:
907 struct sk_buff *skb; 908 * if the only SACK change is the increase of the end_seq of
908 __u32 start_seq = ntohl(sp->start_seq); 909 * the first block then only apply that SACK block
909 __u32 end_seq = ntohl(sp->end_seq); 910 * and use retrans queue hinting otherwise slowpath */
910 int fack_count = 0; 911 flag = 1;
911 int dup_sack = 0; 912 for (i = 0; i< num_sacks; i++) {
913 __u32 start_seq = ntohl(sp[i].start_seq);
914 __u32 end_seq = ntohl(sp[i].end_seq);
915
916 if (i == 0){
917 if (tp->recv_sack_cache[i].start_seq != start_seq)
918 flag = 0;
919 } else {
920 if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
921 (tp->recv_sack_cache[i].end_seq != end_seq))
922 flag = 0;
923 }
924 tp->recv_sack_cache[i].start_seq = start_seq;
925 tp->recv_sack_cache[i].end_seq = end_seq;
912 926
913 /* Check for D-SACK. */ 927 /* Check for D-SACK. */
914 if (i == 0) { 928 if (i == 0) {
@@ -940,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
940 if (before(ack, prior_snd_una - tp->max_window)) 954 if (before(ack, prior_snd_una - tp->max_window))
941 return 0; 955 return 0;
942 } 956 }
957 }
958
959 if (flag)
960 num_sacks = 1;
961 else {
962 int j;
963 tp->fastpath_skb_hint = NULL;
964
965 /* order SACK blocks to allow in order walk of the retrans queue */
966 for (i = num_sacks-1; i > 0; i--) {
967 for (j = 0; j < i; j++){
968 if (after(ntohl(sp[j].start_seq),
969 ntohl(sp[j+1].start_seq))){
970 sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq);
971 sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq);
972 sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq);
973 sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq);
974 }
975
976 }
977 }
978 }
979
980 /* clear flag as used for different purpose in following code */
981 flag = 0;
982
983 for (i=0; i<num_sacks; i++, sp++) {
984 struct sk_buff *skb;
985 __u32 start_seq = ntohl(sp->start_seq);
986 __u32 end_seq = ntohl(sp->end_seq);
987 int fack_count;
988
989 /* Use SACK fastpath hint if valid */
990 if (tp->fastpath_skb_hint) {
991 skb = tp->fastpath_skb_hint;
992 fack_count = tp->fastpath_cnt_hint;
993 } else {
994 skb = sk->sk_write_queue.next;
995 fack_count = 0;
996 }
943 997
944 /* Event "B" in the comment above. */ 998 /* Event "B" in the comment above. */
945 if (after(end_seq, tp->high_seq)) 999 if (after(end_seq, tp->high_seq))
946 flag |= FLAG_DATA_LOST; 1000 flag |= FLAG_DATA_LOST;
947 1001
948 sk_stream_for_retrans_queue(skb, sk) { 1002 sk_stream_for_retrans_queue_from(skb, sk) {
949 int in_sack, pcount; 1003 int in_sack, pcount;
950 u8 sacked; 1004 u8 sacked;
951 1005
1006 tp->fastpath_skb_hint = skb;
1007 tp->fastpath_cnt_hint = fack_count;
1008
952 /* The retransmission queue is always in order, so 1009 /* The retransmission queue is always in order, so
953 * we can short-circuit the walk early. 1010 * we can short-circuit the walk early.
954 */ 1011 */
@@ -1023,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1023 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); 1080 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1024 tp->lost_out -= tcp_skb_pcount(skb); 1081 tp->lost_out -= tcp_skb_pcount(skb);
1025 tp->retrans_out -= tcp_skb_pcount(skb); 1082 tp->retrans_out -= tcp_skb_pcount(skb);
1083
1084 /* clear lost hint */
1085 tp->retransmit_skb_hint = NULL;
1026 } 1086 }
1027 } else { 1087 } else {
1028 /* New sack for not retransmitted frame, 1088 /* New sack for not retransmitted frame,
@@ -1035,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1035 if (sacked & TCPCB_LOST) { 1095 if (sacked & TCPCB_LOST) {
1036 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 1096 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1037 tp->lost_out -= tcp_skb_pcount(skb); 1097 tp->lost_out -= tcp_skb_pcount(skb);
1098
1099 /* clear lost hint */
1100 tp->retransmit_skb_hint = NULL;
1038 } 1101 }
1039 } 1102 }
1040 1103
@@ -1058,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1058 (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { 1121 (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
1059 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1122 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1060 tp->retrans_out -= tcp_skb_pcount(skb); 1123 tp->retrans_out -= tcp_skb_pcount(skb);
1124 tp->retransmit_skb_hint = NULL;
1061 } 1125 }
1062 } 1126 }
1063 } 1127 }
@@ -1085,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1085 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1149 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1086 tp->retrans_out -= tcp_skb_pcount(skb); 1150 tp->retrans_out -= tcp_skb_pcount(skb);
1087 1151
1152 /* clear lost hint */
1153 tp->retransmit_skb_hint = NULL;
1154
1088 if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { 1155 if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1089 tp->lost_out += tcp_skb_pcount(skb); 1156 tp->lost_out += tcp_skb_pcount(skb);
1090 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1157 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -1192,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk)
1192 tcp_set_ca_state(sk, TCP_CA_Loss); 1259 tcp_set_ca_state(sk, TCP_CA_Loss);
1193 tp->high_seq = tp->frto_highmark; 1260 tp->high_seq = tp->frto_highmark;
1194 TCP_ECN_queue_cwr(tp); 1261 TCP_ECN_queue_cwr(tp);
1262
1263 clear_all_retrans_hints(tp);
1195} 1264}
1196 1265
1197void tcp_clear_retrans(struct tcp_sock *tp) 1266void tcp_clear_retrans(struct tcp_sock *tp)
@@ -1258,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how)
1258 tcp_set_ca_state(sk, TCP_CA_Loss); 1327 tcp_set_ca_state(sk, TCP_CA_Loss);
1259 tp->high_seq = tp->snd_nxt; 1328 tp->high_seq = tp->snd_nxt;
1260 TCP_ECN_queue_cwr(tp); 1329 TCP_ECN_queue_cwr(tp);
1330
1331 clear_all_retrans_hints(tp);
1261} 1332}
1262 1333
1263static int tcp_check_sack_reneging(struct sock *sk) 1334static int tcp_check_sack_reneging(struct sock *sk)
@@ -1482,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
1482 int packets, u32 high_seq) 1553 int packets, u32 high_seq)
1483{ 1554{
1484 struct sk_buff *skb; 1555 struct sk_buff *skb;
1485 int cnt = packets; 1556 int cnt;
1486 1557
1487 BUG_TRAP(cnt <= tp->packets_out); 1558 BUG_TRAP(packets <= tp->packets_out);
1559 if (tp->lost_skb_hint) {
1560 skb = tp->lost_skb_hint;
1561 cnt = tp->lost_cnt_hint;
1562 } else {
1563 skb = sk->sk_write_queue.next;
1564 cnt = 0;
1565 }
1488 1566
1489 sk_stream_for_retrans_queue(skb, sk) { 1567 sk_stream_for_retrans_queue_from(skb, sk) {
1490 cnt -= tcp_skb_pcount(skb); 1568 /* TODO: do this better */
1491 if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) 1569 /* this is not the most efficient way to do this... */
1570 tp->lost_skb_hint = skb;
1571 tp->lost_cnt_hint = cnt;
1572 cnt += tcp_skb_pcount(skb);
1573 if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
1492 break; 1574 break;
1493 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { 1575 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
1494 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1576 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1495 tp->lost_out += tcp_skb_pcount(skb); 1577 tp->lost_out += tcp_skb_pcount(skb);
1578
1579 /* clear xmit_retransmit_queue hints
1580 * if this is beyond hint */
1581 if(tp->retransmit_skb_hint != NULL &&
1582 before(TCP_SKB_CB(skb)->seq,
1583 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
1584
1585 tp->retransmit_skb_hint = NULL;
1586 }
1496 } 1587 }
1497 } 1588 }
1498 tcp_sync_left_out(tp); 1589 tcp_sync_left_out(tp);
@@ -1519,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
1519 if (tcp_head_timedout(sk, tp)) { 1610 if (tcp_head_timedout(sk, tp)) {
1520 struct sk_buff *skb; 1611 struct sk_buff *skb;
1521 1612
1522 sk_stream_for_retrans_queue(skb, sk) { 1613 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
1523 if (tcp_skb_timedout(sk, skb) && 1614 : sk->sk_write_queue.next;
1524 !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { 1615
1616 sk_stream_for_retrans_queue_from(skb, sk) {
1617 if (!tcp_skb_timedout(sk, skb))
1618 break;
1619
1620 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
1525 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1621 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1526 tp->lost_out += tcp_skb_pcount(skb); 1622 tp->lost_out += tcp_skb_pcount(skb);
1623
1624 /* clear xmit_retrans hint */
1625 if (tp->retransmit_skb_hint &&
1626 before(TCP_SKB_CB(skb)->seq,
1627 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
1628
1629 tp->retransmit_skb_hint = NULL;
1527 } 1630 }
1528 } 1631 }
1632
1633 tp->scoreboard_skb_hint = skb;
1634
1529 tcp_sync_left_out(tp); 1635 tcp_sync_left_out(tp);
1530 } 1636 }
1531} 1637}
@@ -1605,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
1605 } 1711 }
1606 tcp_moderate_cwnd(tp); 1712 tcp_moderate_cwnd(tp);
1607 tp->snd_cwnd_stamp = tcp_time_stamp; 1713 tp->snd_cwnd_stamp = tcp_time_stamp;
1714
1715 /* There is something screwy going on with the retrans hints after
1716 an undo */
1717 clear_all_retrans_hints(tp);
1608} 1718}
1609 1719
1610static inline int tcp_may_undo(struct tcp_sock *tp) 1720static inline int tcp_may_undo(struct tcp_sock *tp)
@@ -1688,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
1688 sk_stream_for_retrans_queue(skb, sk) { 1798 sk_stream_for_retrans_queue(skb, sk) {
1689 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 1799 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1690 } 1800 }
1801
1802 clear_all_retrans_hints(tp);
1803
1691 DBGUNDO(sk, tp, "partial loss"); 1804 DBGUNDO(sk, tp, "partial loss");
1692 tp->lost_out = 0; 1805 tp->lost_out = 0;
1693 tp->left_out = tp->sacked_out; 1806 tp->left_out = tp->sacked_out;
@@ -2117,6 +2230,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2117 tcp_packets_out_dec(tp, skb); 2230 tcp_packets_out_dec(tp, skb);
2118 __skb_unlink(skb, &sk->sk_write_queue); 2231 __skb_unlink(skb, &sk->sk_write_queue);
2119 sk_stream_free_skb(sk, skb); 2232 sk_stream_free_skb(sk, skb);
2233 clear_all_retrans_hints(tp);
2120 } 2234 }
2121 2235
2122 if (acked&FLAG_ACKED) { 2236 if (acked&FLAG_ACKED) {