diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 144 |
1 files changed, 129 insertions, 15 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 34cfa58eab76..40a26b7157b4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -897,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
897 | int prior_fackets; | 897 | int prior_fackets; |
898 | u32 lost_retrans = 0; | 898 | u32 lost_retrans = 0; |
899 | int flag = 0; | 899 | int flag = 0; |
900 | int dup_sack = 0; | ||
900 | int i; | 901 | int i; |
901 | 902 | ||
902 | if (!tp->sacked_out) | 903 | if (!tp->sacked_out) |
903 | tp->fackets_out = 0; | 904 | tp->fackets_out = 0; |
904 | prior_fackets = tp->fackets_out; | 905 | prior_fackets = tp->fackets_out; |
905 | 906 | ||
906 | for (i=0; i<num_sacks; i++, sp++) { | 907 | /* SACK fastpath: |
907 | struct sk_buff *skb; | 908 | * if the only SACK change is the increase of the end_seq of |
908 | __u32 start_seq = ntohl(sp->start_seq); | 909 | * the first block then only apply that SACK block |
909 | __u32 end_seq = ntohl(sp->end_seq); | 910 | * and use retrans queue hinting otherwise slowpath */ |
910 | int fack_count = 0; | 911 | flag = 1; |
911 | int dup_sack = 0; | 912 | for (i = 0; i< num_sacks; i++) { |
913 | __u32 start_seq = ntohl(sp[i].start_seq); | ||
914 | __u32 end_seq = ntohl(sp[i].end_seq); | ||
915 | |||
916 | if (i == 0){ | ||
917 | if (tp->recv_sack_cache[i].start_seq != start_seq) | ||
918 | flag = 0; | ||
919 | } else { | ||
920 | if ((tp->recv_sack_cache[i].start_seq != start_seq) || | ||
921 | (tp->recv_sack_cache[i].end_seq != end_seq)) | ||
922 | flag = 0; | ||
923 | } | ||
924 | tp->recv_sack_cache[i].start_seq = start_seq; | ||
925 | tp->recv_sack_cache[i].end_seq = end_seq; | ||
912 | 926 | ||
913 | /* Check for D-SACK. */ | 927 | /* Check for D-SACK. */ |
914 | if (i == 0) { | 928 | if (i == 0) { |
@@ -940,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
940 | if (before(ack, prior_snd_una - tp->max_window)) | 954 | if (before(ack, prior_snd_una - tp->max_window)) |
941 | return 0; | 955 | return 0; |
942 | } | 956 | } |
957 | } | ||
958 | |||
959 | if (flag) | ||
960 | num_sacks = 1; | ||
961 | else { | ||
962 | int j; | ||
963 | tp->fastpath_skb_hint = NULL; | ||
964 | |||
965 | /* order SACK blocks to allow in order walk of the retrans queue */ | ||
966 | for (i = num_sacks-1; i > 0; i--) { | ||
967 | for (j = 0; j < i; j++){ | ||
968 | if (after(ntohl(sp[j].start_seq), | ||
969 | ntohl(sp[j+1].start_seq))){ | ||
970 | sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq); | ||
971 | sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq); | ||
972 | sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq); | ||
973 | sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq); | ||
974 | } | ||
975 | |||
976 | } | ||
977 | } | ||
978 | } | ||
979 | |||
980 | /* clear flag as used for different purpose in following code */ | ||
981 | flag = 0; | ||
982 | |||
983 | for (i=0; i<num_sacks; i++, sp++) { | ||
984 | struct sk_buff *skb; | ||
985 | __u32 start_seq = ntohl(sp->start_seq); | ||
986 | __u32 end_seq = ntohl(sp->end_seq); | ||
987 | int fack_count; | ||
988 | |||
989 | /* Use SACK fastpath hint if valid */ | ||
990 | if (tp->fastpath_skb_hint) { | ||
991 | skb = tp->fastpath_skb_hint; | ||
992 | fack_count = tp->fastpath_cnt_hint; | ||
993 | } else { | ||
994 | skb = sk->sk_write_queue.next; | ||
995 | fack_count = 0; | ||
996 | } | ||
943 | 997 | ||
944 | /* Event "B" in the comment above. */ | 998 | /* Event "B" in the comment above. */ |
945 | if (after(end_seq, tp->high_seq)) | 999 | if (after(end_seq, tp->high_seq)) |
946 | flag |= FLAG_DATA_LOST; | 1000 | flag |= FLAG_DATA_LOST; |
947 | 1001 | ||
948 | sk_stream_for_retrans_queue(skb, sk) { | 1002 | sk_stream_for_retrans_queue_from(skb, sk) { |
949 | int in_sack, pcount; | 1003 | int in_sack, pcount; |
950 | u8 sacked; | 1004 | u8 sacked; |
951 | 1005 | ||
1006 | tp->fastpath_skb_hint = skb; | ||
1007 | tp->fastpath_cnt_hint = fack_count; | ||
1008 | |||
952 | /* The retransmission queue is always in order, so | 1009 | /* The retransmission queue is always in order, so |
953 | * we can short-circuit the walk early. | 1010 | * we can short-circuit the walk early. |
954 | */ | 1011 | */ |
@@ -1023,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1023 | TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); | 1080 | TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); |
1024 | tp->lost_out -= tcp_skb_pcount(skb); | 1081 | tp->lost_out -= tcp_skb_pcount(skb); |
1025 | tp->retrans_out -= tcp_skb_pcount(skb); | 1082 | tp->retrans_out -= tcp_skb_pcount(skb); |
1083 | |||
1084 | /* clear lost hint */ | ||
1085 | tp->retransmit_skb_hint = NULL; | ||
1026 | } | 1086 | } |
1027 | } else { | 1087 | } else { |
1028 | /* New sack for not retransmitted frame, | 1088 | /* New sack for not retransmitted frame, |
@@ -1035,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1035 | if (sacked & TCPCB_LOST) { | 1095 | if (sacked & TCPCB_LOST) { |
1036 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1096 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; |
1037 | tp->lost_out -= tcp_skb_pcount(skb); | 1097 | tp->lost_out -= tcp_skb_pcount(skb); |
1098 | |||
1099 | /* clear lost hint */ | ||
1100 | tp->retransmit_skb_hint = NULL; | ||
1038 | } | 1101 | } |
1039 | } | 1102 | } |
1040 | 1103 | ||
@@ -1058,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1058 | (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { | 1121 | (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { |
1059 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1122 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1060 | tp->retrans_out -= tcp_skb_pcount(skb); | 1123 | tp->retrans_out -= tcp_skb_pcount(skb); |
1124 | tp->retransmit_skb_hint = NULL; | ||
1061 | } | 1125 | } |
1062 | } | 1126 | } |
1063 | } | 1127 | } |
@@ -1085,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1085 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1149 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1086 | tp->retrans_out -= tcp_skb_pcount(skb); | 1150 | tp->retrans_out -= tcp_skb_pcount(skb); |
1087 | 1151 | ||
1152 | /* clear lost hint */ | ||
1153 | tp->retransmit_skb_hint = NULL; | ||
1154 | |||
1088 | if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { | 1155 | if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { |
1089 | tp->lost_out += tcp_skb_pcount(skb); | 1156 | tp->lost_out += tcp_skb_pcount(skb); |
1090 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1157 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
@@ -1192,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk) | |||
1192 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1259 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1193 | tp->high_seq = tp->frto_highmark; | 1260 | tp->high_seq = tp->frto_highmark; |
1194 | TCP_ECN_queue_cwr(tp); | 1261 | TCP_ECN_queue_cwr(tp); |
1262 | |||
1263 | clear_all_retrans_hints(tp); | ||
1195 | } | 1264 | } |
1196 | 1265 | ||
1197 | void tcp_clear_retrans(struct tcp_sock *tp) | 1266 | void tcp_clear_retrans(struct tcp_sock *tp) |
@@ -1258,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1258 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1327 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1259 | tp->high_seq = tp->snd_nxt; | 1328 | tp->high_seq = tp->snd_nxt; |
1260 | TCP_ECN_queue_cwr(tp); | 1329 | TCP_ECN_queue_cwr(tp); |
1330 | |||
1331 | clear_all_retrans_hints(tp); | ||
1261 | } | 1332 | } |
1262 | 1333 | ||
1263 | static int tcp_check_sack_reneging(struct sock *sk) | 1334 | static int tcp_check_sack_reneging(struct sock *sk) |
@@ -1482,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, | |||
1482 | int packets, u32 high_seq) | 1553 | int packets, u32 high_seq) |
1483 | { | 1554 | { |
1484 | struct sk_buff *skb; | 1555 | struct sk_buff *skb; |
1485 | int cnt = packets; | 1556 | int cnt; |
1486 | 1557 | ||
1487 | BUG_TRAP(cnt <= tp->packets_out); | 1558 | BUG_TRAP(packets <= tp->packets_out); |
1559 | if (tp->lost_skb_hint) { | ||
1560 | skb = tp->lost_skb_hint; | ||
1561 | cnt = tp->lost_cnt_hint; | ||
1562 | } else { | ||
1563 | skb = sk->sk_write_queue.next; | ||
1564 | cnt = 0; | ||
1565 | } | ||
1488 | 1566 | ||
1489 | sk_stream_for_retrans_queue(skb, sk) { | 1567 | sk_stream_for_retrans_queue_from(skb, sk) { |
1490 | cnt -= tcp_skb_pcount(skb); | 1568 | /* TODO: do this better */ |
1491 | if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) | 1569 | /* this is not the most efficient way to do this... */ |
1570 | tp->lost_skb_hint = skb; | ||
1571 | tp->lost_cnt_hint = cnt; | ||
1572 | cnt += tcp_skb_pcount(skb); | ||
1573 | if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq)) | ||
1492 | break; | 1574 | break; |
1493 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | 1575 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { |
1494 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1576 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1495 | tp->lost_out += tcp_skb_pcount(skb); | 1577 | tp->lost_out += tcp_skb_pcount(skb); |
1578 | |||
1579 | /* clear xmit_retransmit_queue hints | ||
1580 | * if this is beyond hint */ | ||
1581 | if(tp->retransmit_skb_hint != NULL && | ||
1582 | before(TCP_SKB_CB(skb)->seq, | ||
1583 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) { | ||
1584 | |||
1585 | tp->retransmit_skb_hint = NULL; | ||
1586 | } | ||
1496 | } | 1587 | } |
1497 | } | 1588 | } |
1498 | tcp_sync_left_out(tp); | 1589 | tcp_sync_left_out(tp); |
@@ -1519,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) | |||
1519 | if (tcp_head_timedout(sk, tp)) { | 1610 | if (tcp_head_timedout(sk, tp)) { |
1520 | struct sk_buff *skb; | 1611 | struct sk_buff *skb; |
1521 | 1612 | ||
1522 | sk_stream_for_retrans_queue(skb, sk) { | 1613 | skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint |
1523 | if (tcp_skb_timedout(sk, skb) && | 1614 | : sk->sk_write_queue.next; |
1524 | !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | 1615 | |
1616 | sk_stream_for_retrans_queue_from(skb, sk) { | ||
1617 | if (!tcp_skb_timedout(sk, skb)) | ||
1618 | break; | ||
1619 | |||
1620 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | ||
1525 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1621 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1526 | tp->lost_out += tcp_skb_pcount(skb); | 1622 | tp->lost_out += tcp_skb_pcount(skb); |
1623 | |||
1624 | /* clear xmit_retrans hint */ | ||
1625 | if (tp->retransmit_skb_hint && | ||
1626 | before(TCP_SKB_CB(skb)->seq, | ||
1627 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) | ||
1628 | |||
1629 | tp->retransmit_skb_hint = NULL; | ||
1527 | } | 1630 | } |
1528 | } | 1631 | } |
1632 | |||
1633 | tp->scoreboard_skb_hint = skb; | ||
1634 | |||
1529 | tcp_sync_left_out(tp); | 1635 | tcp_sync_left_out(tp); |
1530 | } | 1636 | } |
1531 | } | 1637 | } |
@@ -1605,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) | |||
1605 | } | 1711 | } |
1606 | tcp_moderate_cwnd(tp); | 1712 | tcp_moderate_cwnd(tp); |
1607 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1713 | tp->snd_cwnd_stamp = tcp_time_stamp; |
1714 | |||
1715 | /* There is something screwy going on with the retrans hints after | ||
1716 | an undo */ | ||
1717 | clear_all_retrans_hints(tp); | ||
1608 | } | 1718 | } |
1609 | 1719 | ||
1610 | static inline int tcp_may_undo(struct tcp_sock *tp) | 1720 | static inline int tcp_may_undo(struct tcp_sock *tp) |
@@ -1688,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) | |||
1688 | sk_stream_for_retrans_queue(skb, sk) { | 1798 | sk_stream_for_retrans_queue(skb, sk) { |
1689 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1799 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; |
1690 | } | 1800 | } |
1801 | |||
1802 | clear_all_retrans_hints(tp); | ||
1803 | |||
1691 | DBGUNDO(sk, tp, "partial loss"); | 1804 | DBGUNDO(sk, tp, "partial loss"); |
1692 | tp->lost_out = 0; | 1805 | tp->lost_out = 0; |
1693 | tp->left_out = tp->sacked_out; | 1806 | tp->left_out = tp->sacked_out; |
@@ -2117,6 +2230,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) | |||
2117 | tcp_packets_out_dec(tp, skb); | 2230 | tcp_packets_out_dec(tp, skb); |
2118 | __skb_unlink(skb, &sk->sk_write_queue); | 2231 | __skb_unlink(skb, &sk->sk_write_queue); |
2119 | sk_stream_free_skb(sk, skb); | 2232 | sk_stream_free_skb(sk, skb); |
2233 | clear_all_retrans_hints(tp); | ||
2120 | } | 2234 | } |
2121 | 2235 | ||
2122 | if (acked&FLAG_ACKED) { | 2236 | if (acked&FLAG_ACKED) { |