diff options
author | Stephen Hemminger <shemminger@osdl.org> | 2005-11-10 20:14:59 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2005-11-10 20:14:59 -0500 |
commit | 6a438bbe68c7013a42d9c5aee5a40d7dafdbe6ec (patch) | |
tree | 09775f0479168cd53494155a5789e78df218b497 /net/ipv4/tcp_input.c | |
parent | caa20d9abe810be2ede9612b6c9db6ce7d6edf80 (diff) |
[TCP]: speed up SACK processing
Use "hints" to speed up the SACK processing. Various forms
of this have been used by TCP developers (Web100, STCP, BIC)
to avoid the 2x linear search of outstanding segments.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 144 |
1 files changed, 129 insertions, 15 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 34cfa58eab76..40a26b7157b4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -897,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
897 | int prior_fackets; | 897 | int prior_fackets; |
898 | u32 lost_retrans = 0; | 898 | u32 lost_retrans = 0; |
899 | int flag = 0; | 899 | int flag = 0; |
900 | int dup_sack = 0; | ||
900 | int i; | 901 | int i; |
901 | 902 | ||
902 | if (!tp->sacked_out) | 903 | if (!tp->sacked_out) |
903 | tp->fackets_out = 0; | 904 | tp->fackets_out = 0; |
904 | prior_fackets = tp->fackets_out; | 905 | prior_fackets = tp->fackets_out; |
905 | 906 | ||
906 | for (i=0; i<num_sacks; i++, sp++) { | 907 | /* SACK fastpath: |
907 | struct sk_buff *skb; | 908 | * if the only SACK change is the increase of the end_seq of |
908 | __u32 start_seq = ntohl(sp->start_seq); | 909 | * the first block then only apply that SACK block |
909 | __u32 end_seq = ntohl(sp->end_seq); | 910 | * and use retrans queue hinting otherwise slowpath */ |
910 | int fack_count = 0; | 911 | flag = 1; |
911 | int dup_sack = 0; | 912 | for (i = 0; i< num_sacks; i++) { |
913 | __u32 start_seq = ntohl(sp[i].start_seq); | ||
914 | __u32 end_seq = ntohl(sp[i].end_seq); | ||
915 | |||
916 | if (i == 0){ | ||
917 | if (tp->recv_sack_cache[i].start_seq != start_seq) | ||
918 | flag = 0; | ||
919 | } else { | ||
920 | if ((tp->recv_sack_cache[i].start_seq != start_seq) || | ||
921 | (tp->recv_sack_cache[i].end_seq != end_seq)) | ||
922 | flag = 0; | ||
923 | } | ||
924 | tp->recv_sack_cache[i].start_seq = start_seq; | ||
925 | tp->recv_sack_cache[i].end_seq = end_seq; | ||
912 | 926 | ||
913 | /* Check for D-SACK. */ | 927 | /* Check for D-SACK. */ |
914 | if (i == 0) { | 928 | if (i == 0) { |
@@ -940,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
940 | if (before(ack, prior_snd_una - tp->max_window)) | 954 | if (before(ack, prior_snd_una - tp->max_window)) |
941 | return 0; | 955 | return 0; |
942 | } | 956 | } |
957 | } | ||
958 | |||
959 | if (flag) | ||
960 | num_sacks = 1; | ||
961 | else { | ||
962 | int j; | ||
963 | tp->fastpath_skb_hint = NULL; | ||
964 | |||
965 | /* order SACK blocks to allow in order walk of the retrans queue */ | ||
966 | for (i = num_sacks-1; i > 0; i--) { | ||
967 | for (j = 0; j < i; j++){ | ||
968 | if (after(ntohl(sp[j].start_seq), | ||
969 | ntohl(sp[j+1].start_seq))){ | ||
970 | sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq); | ||
971 | sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq); | ||
972 | sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq); | ||
973 | sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq); | ||
974 | } | ||
975 | |||
976 | } | ||
977 | } | ||
978 | } | ||
979 | |||
980 | /* clear flag as used for different purpose in following code */ | ||
981 | flag = 0; | ||
982 | |||
983 | for (i=0; i<num_sacks; i++, sp++) { | ||
984 | struct sk_buff *skb; | ||
985 | __u32 start_seq = ntohl(sp->start_seq); | ||
986 | __u32 end_seq = ntohl(sp->end_seq); | ||
987 | int fack_count; | ||
988 | |||
989 | /* Use SACK fastpath hint if valid */ | ||
990 | if (tp->fastpath_skb_hint) { | ||
991 | skb = tp->fastpath_skb_hint; | ||
992 | fack_count = tp->fastpath_cnt_hint; | ||
993 | } else { | ||
994 | skb = sk->sk_write_queue.next; | ||
995 | fack_count = 0; | ||
996 | } | ||
943 | 997 | ||
944 | /* Event "B" in the comment above. */ | 998 | /* Event "B" in the comment above. */ |
945 | if (after(end_seq, tp->high_seq)) | 999 | if (after(end_seq, tp->high_seq)) |
946 | flag |= FLAG_DATA_LOST; | 1000 | flag |= FLAG_DATA_LOST; |
947 | 1001 | ||
948 | sk_stream_for_retrans_queue(skb, sk) { | 1002 | sk_stream_for_retrans_queue_from(skb, sk) { |
949 | int in_sack, pcount; | 1003 | int in_sack, pcount; |
950 | u8 sacked; | 1004 | u8 sacked; |
951 | 1005 | ||
1006 | tp->fastpath_skb_hint = skb; | ||
1007 | tp->fastpath_cnt_hint = fack_count; | ||
1008 | |||
952 | /* The retransmission queue is always in order, so | 1009 | /* The retransmission queue is always in order, so |
953 | * we can short-circuit the walk early. | 1010 | * we can short-circuit the walk early. |
954 | */ | 1011 | */ |
@@ -1023,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1023 | TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); | 1080 | TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); |
1024 | tp->lost_out -= tcp_skb_pcount(skb); | 1081 | tp->lost_out -= tcp_skb_pcount(skb); |
1025 | tp->retrans_out -= tcp_skb_pcount(skb); | 1082 | tp->retrans_out -= tcp_skb_pcount(skb); |
1083 | |||
1084 | /* clear lost hint */ | ||
1085 | tp->retransmit_skb_hint = NULL; | ||
1026 | } | 1086 | } |
1027 | } else { | 1087 | } else { |
1028 | /* New sack for not retransmitted frame, | 1088 | /* New sack for not retransmitted frame, |
@@ -1035,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1035 | if (sacked & TCPCB_LOST) { | 1095 | if (sacked & TCPCB_LOST) { |
1036 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1096 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; |
1037 | tp->lost_out -= tcp_skb_pcount(skb); | 1097 | tp->lost_out -= tcp_skb_pcount(skb); |
1098 | |||
1099 | /* clear lost hint */ | ||
1100 | tp->retransmit_skb_hint = NULL; | ||
1038 | } | 1101 | } |
1039 | } | 1102 | } |
1040 | 1103 | ||
@@ -1058,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1058 | (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { | 1121 | (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { |
1059 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1122 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1060 | tp->retrans_out -= tcp_skb_pcount(skb); | 1123 | tp->retrans_out -= tcp_skb_pcount(skb); |
1124 | tp->retransmit_skb_hint = NULL; | ||
1061 | } | 1125 | } |
1062 | } | 1126 | } |
1063 | } | 1127 | } |
@@ -1085,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1085 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1149 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1086 | tp->retrans_out -= tcp_skb_pcount(skb); | 1150 | tp->retrans_out -= tcp_skb_pcount(skb); |
1087 | 1151 | ||
1152 | /* clear lost hint */ | ||
1153 | tp->retransmit_skb_hint = NULL; | ||
1154 | |||
1088 | if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { | 1155 | if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { |
1089 | tp->lost_out += tcp_skb_pcount(skb); | 1156 | tp->lost_out += tcp_skb_pcount(skb); |
1090 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1157 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
@@ -1192,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk) | |||
1192 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1259 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1193 | tp->high_seq = tp->frto_highmark; | 1260 | tp->high_seq = tp->frto_highmark; |
1194 | TCP_ECN_queue_cwr(tp); | 1261 | TCP_ECN_queue_cwr(tp); |
1262 | |||
1263 | clear_all_retrans_hints(tp); | ||
1195 | } | 1264 | } |
1196 | 1265 | ||
1197 | void tcp_clear_retrans(struct tcp_sock *tp) | 1266 | void tcp_clear_retrans(struct tcp_sock *tp) |
@@ -1258,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1258 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1327 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1259 | tp->high_seq = tp->snd_nxt; | 1328 | tp->high_seq = tp->snd_nxt; |
1260 | TCP_ECN_queue_cwr(tp); | 1329 | TCP_ECN_queue_cwr(tp); |
1330 | |||
1331 | clear_all_retrans_hints(tp); | ||
1261 | } | 1332 | } |
1262 | 1333 | ||
1263 | static int tcp_check_sack_reneging(struct sock *sk) | 1334 | static int tcp_check_sack_reneging(struct sock *sk) |
@@ -1482,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, | |||
1482 | int packets, u32 high_seq) | 1553 | int packets, u32 high_seq) |
1483 | { | 1554 | { |
1484 | struct sk_buff *skb; | 1555 | struct sk_buff *skb; |
1485 | int cnt = packets; | 1556 | int cnt; |
1486 | 1557 | ||
1487 | BUG_TRAP(cnt <= tp->packets_out); | 1558 | BUG_TRAP(packets <= tp->packets_out); |
1559 | if (tp->lost_skb_hint) { | ||
1560 | skb = tp->lost_skb_hint; | ||
1561 | cnt = tp->lost_cnt_hint; | ||
1562 | } else { | ||
1563 | skb = sk->sk_write_queue.next; | ||
1564 | cnt = 0; | ||
1565 | } | ||
1488 | 1566 | ||
1489 | sk_stream_for_retrans_queue(skb, sk) { | 1567 | sk_stream_for_retrans_queue_from(skb, sk) { |
1490 | cnt -= tcp_skb_pcount(skb); | 1568 | /* TODO: do this better */ |
1491 | if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) | 1569 | /* this is not the most efficient way to do this... */ |
1570 | tp->lost_skb_hint = skb; | ||
1571 | tp->lost_cnt_hint = cnt; | ||
1572 | cnt += tcp_skb_pcount(skb); | ||
1573 | if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq)) | ||
1492 | break; | 1574 | break; |
1493 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | 1575 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { |
1494 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1576 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1495 | tp->lost_out += tcp_skb_pcount(skb); | 1577 | tp->lost_out += tcp_skb_pcount(skb); |
1578 | |||
1579 | /* clear xmit_retransmit_queue hints | ||
1580 | * if this is beyond hint */ | ||
1581 | if(tp->retransmit_skb_hint != NULL && | ||
1582 | before(TCP_SKB_CB(skb)->seq, | ||
1583 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) { | ||
1584 | |||
1585 | tp->retransmit_skb_hint = NULL; | ||
1586 | } | ||
1496 | } | 1587 | } |
1497 | } | 1588 | } |
1498 | tcp_sync_left_out(tp); | 1589 | tcp_sync_left_out(tp); |
@@ -1519,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) | |||
1519 | if (tcp_head_timedout(sk, tp)) { | 1610 | if (tcp_head_timedout(sk, tp)) { |
1520 | struct sk_buff *skb; | 1611 | struct sk_buff *skb; |
1521 | 1612 | ||
1522 | sk_stream_for_retrans_queue(skb, sk) { | 1613 | skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint |
1523 | if (tcp_skb_timedout(sk, skb) && | 1614 | : sk->sk_write_queue.next; |
1524 | !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | 1615 | |
1616 | sk_stream_for_retrans_queue_from(skb, sk) { | ||
1617 | if (!tcp_skb_timedout(sk, skb)) | ||
1618 | break; | ||
1619 | |||
1620 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | ||
1525 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1621 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1526 | tp->lost_out += tcp_skb_pcount(skb); | 1622 | tp->lost_out += tcp_skb_pcount(skb); |
1623 | |||
1624 | /* clear xmit_retrans hint */ | ||
1625 | if (tp->retransmit_skb_hint && | ||
1626 | before(TCP_SKB_CB(skb)->seq, | ||
1627 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) | ||
1628 | |||
1629 | tp->retransmit_skb_hint = NULL; | ||
1527 | } | 1630 | } |
1528 | } | 1631 | } |
1632 | |||
1633 | tp->scoreboard_skb_hint = skb; | ||
1634 | |||
1529 | tcp_sync_left_out(tp); | 1635 | tcp_sync_left_out(tp); |
1530 | } | 1636 | } |
1531 | } | 1637 | } |
@@ -1605,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) | |||
1605 | } | 1711 | } |
1606 | tcp_moderate_cwnd(tp); | 1712 | tcp_moderate_cwnd(tp); |
1607 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1713 | tp->snd_cwnd_stamp = tcp_time_stamp; |
1714 | |||
1715 | /* There is something screwy going on with the retrans hints after | ||
1716 | an undo */ | ||
1717 | clear_all_retrans_hints(tp); | ||
1608 | } | 1718 | } |
1609 | 1719 | ||
1610 | static inline int tcp_may_undo(struct tcp_sock *tp) | 1720 | static inline int tcp_may_undo(struct tcp_sock *tp) |
@@ -1688,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) | |||
1688 | sk_stream_for_retrans_queue(skb, sk) { | 1798 | sk_stream_for_retrans_queue(skb, sk) { |
1689 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1799 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; |
1690 | } | 1800 | } |
1801 | |||
1802 | clear_all_retrans_hints(tp); | ||
1803 | |||
1691 | DBGUNDO(sk, tp, "partial loss"); | 1804 | DBGUNDO(sk, tp, "partial loss"); |
1692 | tp->lost_out = 0; | 1805 | tp->lost_out = 0; |
1693 | tp->left_out = tp->sacked_out; | 1806 | tp->left_out = tp->sacked_out; |
@@ -2117,6 +2230,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) | |||
2117 | tcp_packets_out_dec(tp, skb); | 2230 | tcp_packets_out_dec(tp, skb); |
2118 | __skb_unlink(skb, &sk->sk_write_queue); | 2231 | __skb_unlink(skb, &sk->sk_write_queue); |
2119 | sk_stream_free_skb(sk, skb); | 2232 | sk_stream_free_skb(sk, skb); |
2233 | clear_all_retrans_hints(tp); | ||
2120 | } | 2234 | } |
2121 | 2235 | ||
2122 | if (acked&FLAG_ACKED) { | 2236 | if (acked&FLAG_ACKED) { |