diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 332 |
1 files changed, 170 insertions, 162 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 67ccce2a96bd..d77c0d29e239 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -979,6 +979,39 @@ static void tcp_update_reordering(struct sock *sk, const int metric, | |||
979 | } | 979 | } |
980 | } | 980 | } |
981 | 981 | ||
982 | /* This must be called before lost_out is incremented */ | ||
983 | static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) | ||
984 | { | ||
985 | if ((tp->retransmit_skb_hint == NULL) || | ||
986 | before(TCP_SKB_CB(skb)->seq, | ||
987 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) | ||
988 | tp->retransmit_skb_hint = skb; | ||
989 | |||
990 | if (!tp->lost_out || | ||
991 | after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high)) | ||
992 | tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; | ||
993 | } | ||
994 | |||
995 | static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) | ||
996 | { | ||
997 | if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { | ||
998 | tcp_verify_retransmit_hint(tp, skb); | ||
999 | |||
1000 | tp->lost_out += tcp_skb_pcount(skb); | ||
1001 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | ||
1002 | } | ||
1003 | } | ||
1004 | |||
1005 | void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) | ||
1006 | { | ||
1007 | tcp_verify_retransmit_hint(tp, skb); | ||
1008 | |||
1009 | if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { | ||
1010 | tp->lost_out += tcp_skb_pcount(skb); | ||
1011 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | ||
1012 | } | ||
1013 | } | ||
1014 | |||
982 | /* This procedure tags the retransmission queue when SACKs arrive. | 1015 | /* This procedure tags the retransmission queue when SACKs arrive. |
983 | * | 1016 | * |
984 | * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). | 1017 | * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). |
@@ -1155,13 +1188,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) | |||
1155 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1188 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1156 | tp->retrans_out -= tcp_skb_pcount(skb); | 1189 | tp->retrans_out -= tcp_skb_pcount(skb); |
1157 | 1190 | ||
1158 | /* clear lost hint */ | 1191 | tcp_skb_mark_lost_uncond_verify(tp, skb); |
1159 | tp->retransmit_skb_hint = NULL; | ||
1160 | |||
1161 | if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { | ||
1162 | tp->lost_out += tcp_skb_pcount(skb); | ||
1163 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | ||
1164 | } | ||
1165 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); | 1192 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); |
1166 | } else { | 1193 | } else { |
1167 | if (before(ack_seq, new_low_seq)) | 1194 | if (before(ack_seq, new_low_seq)) |
@@ -1271,9 +1298,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1271 | ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); | 1298 | ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); |
1272 | tp->lost_out -= tcp_skb_pcount(skb); | 1299 | tp->lost_out -= tcp_skb_pcount(skb); |
1273 | tp->retrans_out -= tcp_skb_pcount(skb); | 1300 | tp->retrans_out -= tcp_skb_pcount(skb); |
1274 | |||
1275 | /* clear lost hint */ | ||
1276 | tp->retransmit_skb_hint = NULL; | ||
1277 | } | 1301 | } |
1278 | } else { | 1302 | } else { |
1279 | if (!(sacked & TCPCB_RETRANS)) { | 1303 | if (!(sacked & TCPCB_RETRANS)) { |
@@ -1292,9 +1316,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1292 | if (sacked & TCPCB_LOST) { | 1316 | if (sacked & TCPCB_LOST) { |
1293 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1317 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; |
1294 | tp->lost_out -= tcp_skb_pcount(skb); | 1318 | tp->lost_out -= tcp_skb_pcount(skb); |
1295 | |||
1296 | /* clear lost hint */ | ||
1297 | tp->retransmit_skb_hint = NULL; | ||
1298 | } | 1319 | } |
1299 | } | 1320 | } |
1300 | 1321 | ||
@@ -1324,7 +1345,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1324 | if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { | 1345 | if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { |
1325 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1346 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1326 | tp->retrans_out -= tcp_skb_pcount(skb); | 1347 | tp->retrans_out -= tcp_skb_pcount(skb); |
1327 | tp->retransmit_skb_hint = NULL; | ||
1328 | } | 1348 | } |
1329 | 1349 | ||
1330 | return flag; | 1350 | return flag; |
@@ -1726,6 +1746,8 @@ int tcp_use_frto(struct sock *sk) | |||
1726 | return 0; | 1746 | return 0; |
1727 | 1747 | ||
1728 | skb = tcp_write_queue_head(sk); | 1748 | skb = tcp_write_queue_head(sk); |
1749 | if (tcp_skb_is_last(sk, skb)) | ||
1750 | return 1; | ||
1729 | skb = tcp_write_queue_next(sk, skb); /* Skips head */ | 1751 | skb = tcp_write_queue_next(sk, skb); /* Skips head */ |
1730 | tcp_for_write_queue_from(skb, sk) { | 1752 | tcp_for_write_queue_from(skb, sk) { |
1731 | if (skb == tcp_send_head(sk)) | 1753 | if (skb == tcp_send_head(sk)) |
@@ -1867,6 +1889,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) | |||
1867 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { | 1889 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { |
1868 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1890 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1869 | tp->lost_out += tcp_skb_pcount(skb); | 1891 | tp->lost_out += tcp_skb_pcount(skb); |
1892 | tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; | ||
1870 | } | 1893 | } |
1871 | } | 1894 | } |
1872 | tcp_verify_left_out(tp); | 1895 | tcp_verify_left_out(tp); |
@@ -1883,7 +1906,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) | |||
1883 | tp->high_seq = tp->snd_nxt; | 1906 | tp->high_seq = tp->snd_nxt; |
1884 | TCP_ECN_queue_cwr(tp); | 1907 | TCP_ECN_queue_cwr(tp); |
1885 | 1908 | ||
1886 | tcp_clear_retrans_hints_partial(tp); | 1909 | tcp_clear_all_retrans_hints(tp); |
1887 | } | 1910 | } |
1888 | 1911 | ||
1889 | static void tcp_clear_retrans_partial(struct tcp_sock *tp) | 1912 | static void tcp_clear_retrans_partial(struct tcp_sock *tp) |
@@ -1934,12 +1957,11 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1934 | /* Push undo marker, if it was plain RTO and nothing | 1957 | /* Push undo marker, if it was plain RTO and nothing |
1935 | * was retransmitted. */ | 1958 | * was retransmitted. */ |
1936 | tp->undo_marker = tp->snd_una; | 1959 | tp->undo_marker = tp->snd_una; |
1937 | tcp_clear_retrans_hints_partial(tp); | ||
1938 | } else { | 1960 | } else { |
1939 | tp->sacked_out = 0; | 1961 | tp->sacked_out = 0; |
1940 | tp->fackets_out = 0; | 1962 | tp->fackets_out = 0; |
1941 | tcp_clear_all_retrans_hints(tp); | ||
1942 | } | 1963 | } |
1964 | tcp_clear_all_retrans_hints(tp); | ||
1943 | 1965 | ||
1944 | tcp_for_write_queue(skb, sk) { | 1966 | tcp_for_write_queue(skb, sk) { |
1945 | if (skb == tcp_send_head(sk)) | 1967 | if (skb == tcp_send_head(sk)) |
@@ -1952,6 +1974,7 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1952 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; | 1974 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; |
1953 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1975 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1954 | tp->lost_out += tcp_skb_pcount(skb); | 1976 | tp->lost_out += tcp_skb_pcount(skb); |
1977 | tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; | ||
1955 | } | 1978 | } |
1956 | } | 1979 | } |
1957 | tcp_verify_left_out(tp); | 1980 | tcp_verify_left_out(tp); |
@@ -2157,19 +2180,6 @@ static int tcp_time_to_recover(struct sock *sk) | |||
2157 | return 0; | 2180 | return 0; |
2158 | } | 2181 | } |
2159 | 2182 | ||
2160 | /* RFC: This is from the original, I doubt that this is necessary at all: | ||
2161 | * clear xmit_retrans hint if seq of this skb is beyond hint. How could we | ||
2162 | * retransmitted past LOST markings in the first place? I'm not fully sure | ||
2163 | * about undo and end of connection cases, which can cause R without L? | ||
2164 | */ | ||
2165 | static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) | ||
2166 | { | ||
2167 | if ((tp->retransmit_skb_hint != NULL) && | ||
2168 | before(TCP_SKB_CB(skb)->seq, | ||
2169 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) | ||
2170 | tp->retransmit_skb_hint = NULL; | ||
2171 | } | ||
2172 | |||
2173 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is | 2183 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is |
2174 | * is against sacked "cnt", otherwise it's against facked "cnt" | 2184 | * is against sacked "cnt", otherwise it's against facked "cnt" |
2175 | */ | 2185 | */ |
@@ -2217,11 +2227,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) | |||
2217 | cnt = packets; | 2227 | cnt = packets; |
2218 | } | 2228 | } |
2219 | 2229 | ||
2220 | if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { | 2230 | tcp_skb_mark_lost(tp, skb); |
2221 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | ||
2222 | tp->lost_out += tcp_skb_pcount(skb); | ||
2223 | tcp_verify_retransmit_hint(tp, skb); | ||
2224 | } | ||
2225 | } | 2231 | } |
2226 | tcp_verify_left_out(tp); | 2232 | tcp_verify_left_out(tp); |
2227 | } | 2233 | } |
@@ -2263,11 +2269,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) | |||
2263 | if (!tcp_skb_timedout(sk, skb)) | 2269 | if (!tcp_skb_timedout(sk, skb)) |
2264 | break; | 2270 | break; |
2265 | 2271 | ||
2266 | if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { | 2272 | tcp_skb_mark_lost(tp, skb); |
2267 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | ||
2268 | tp->lost_out += tcp_skb_pcount(skb); | ||
2269 | tcp_verify_retransmit_hint(tp, skb); | ||
2270 | } | ||
2271 | } | 2273 | } |
2272 | 2274 | ||
2273 | tp->scoreboard_skb_hint = skb; | 2275 | tp->scoreboard_skb_hint = skb; |
@@ -2378,10 +2380,6 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) | |||
2378 | } | 2380 | } |
2379 | tcp_moderate_cwnd(tp); | 2381 | tcp_moderate_cwnd(tp); |
2380 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2382 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2381 | |||
2382 | /* There is something screwy going on with the retrans hints after | ||
2383 | an undo */ | ||
2384 | tcp_clear_all_retrans_hints(tp); | ||
2385 | } | 2383 | } |
2386 | 2384 | ||
2387 | static inline int tcp_may_undo(struct tcp_sock *tp) | 2385 | static inline int tcp_may_undo(struct tcp_sock *tp) |
@@ -2838,7 +2836,8 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) | |||
2838 | * is before the ack sequence we can discard it as it's confirmed to have | 2836 | * is before the ack sequence we can discard it as it's confirmed to have |
2839 | * arrived at the other end. | 2837 | * arrived at the other end. |
2840 | */ | 2838 | */ |
2841 | static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) | 2839 | static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, |
2840 | u32 prior_snd_una) | ||
2842 | { | 2841 | { |
2843 | struct tcp_sock *tp = tcp_sk(sk); | 2842 | struct tcp_sock *tp = tcp_sk(sk); |
2844 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2843 | const struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -2848,6 +2847,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) | |||
2848 | int flag = 0; | 2847 | int flag = 0; |
2849 | u32 pkts_acked = 0; | 2848 | u32 pkts_acked = 0; |
2850 | u32 reord = tp->packets_out; | 2849 | u32 reord = tp->packets_out; |
2850 | u32 prior_sacked = tp->sacked_out; | ||
2851 | s32 seq_rtt = -1; | 2851 | s32 seq_rtt = -1; |
2852 | s32 ca_seq_rtt = -1; | 2852 | s32 ca_seq_rtt = -1; |
2853 | ktime_t last_ackt = net_invalid_timestamp(); | 2853 | ktime_t last_ackt = net_invalid_timestamp(); |
@@ -2904,9 +2904,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) | |||
2904 | if (sacked & TCPCB_LOST) | 2904 | if (sacked & TCPCB_LOST) |
2905 | tp->lost_out -= acked_pcount; | 2905 | tp->lost_out -= acked_pcount; |
2906 | 2906 | ||
2907 | if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up))) | ||
2908 | tp->urg_mode = 0; | ||
2909 | |||
2910 | tp->packets_out -= acked_pcount; | 2907 | tp->packets_out -= acked_pcount; |
2911 | pkts_acked += acked_pcount; | 2908 | pkts_acked += acked_pcount; |
2912 | 2909 | ||
@@ -2929,9 +2926,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) | |||
2929 | 2926 | ||
2930 | tcp_unlink_write_queue(skb, sk); | 2927 | tcp_unlink_write_queue(skb, sk); |
2931 | sk_wmem_free_skb(sk, skb); | 2928 | sk_wmem_free_skb(sk, skb); |
2932 | tcp_clear_all_retrans_hints(tp); | 2929 | tp->scoreboard_skb_hint = NULL; |
2930 | if (skb == tp->retransmit_skb_hint) | ||
2931 | tp->retransmit_skb_hint = NULL; | ||
2932 | if (skb == tp->lost_skb_hint) | ||
2933 | tp->lost_skb_hint = NULL; | ||
2933 | } | 2934 | } |
2934 | 2935 | ||
2936 | if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una))) | ||
2937 | tp->snd_up = tp->snd_una; | ||
2938 | |||
2935 | if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) | 2939 | if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) |
2936 | flag |= FLAG_SACK_RENEGING; | 2940 | flag |= FLAG_SACK_RENEGING; |
2937 | 2941 | ||
@@ -2948,6 +2952,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) | |||
2948 | /* Non-retransmitted hole got filled? That's reordering */ | 2952 | /* Non-retransmitted hole got filled? That's reordering */ |
2949 | if (reord < prior_fackets) | 2953 | if (reord < prior_fackets) |
2950 | tcp_update_reordering(sk, tp->fackets_out - reord, 0); | 2954 | tcp_update_reordering(sk, tp->fackets_out - reord, 0); |
2955 | |||
2956 | /* No need to care for underflows here because | ||
2957 | * the lost_skb_hint gets NULLed if we're past it | ||
2958 | * (or something non-trivial happened) | ||
2959 | */ | ||
2960 | if (tcp_is_fack(tp)) | ||
2961 | tp->lost_cnt_hint -= pkts_acked; | ||
2962 | else | ||
2963 | tp->lost_cnt_hint -= prior_sacked - tp->sacked_out; | ||
2951 | } | 2964 | } |
2952 | 2965 | ||
2953 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); | 2966 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); |
@@ -3299,7 +3312,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
3299 | goto no_queue; | 3312 | goto no_queue; |
3300 | 3313 | ||
3301 | /* See if we can take anything off of the retransmit queue. */ | 3314 | /* See if we can take anything off of the retransmit queue. */ |
3302 | flag |= tcp_clean_rtx_queue(sk, prior_fackets); | 3315 | flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); |
3303 | 3316 | ||
3304 | if (tp->frto_counter) | 3317 | if (tp->frto_counter) |
3305 | frto_cwnd = tcp_process_frto(sk, flag); | 3318 | frto_cwnd = tcp_process_frto(sk, flag); |
@@ -3442,6 +3455,22 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
3442 | } | 3455 | } |
3443 | } | 3456 | } |
3444 | 3457 | ||
3458 | static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) | ||
3459 | { | ||
3460 | __be32 *ptr = (__be32 *)(th + 1); | ||
3461 | |||
3462 | if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | ||
3463 | | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { | ||
3464 | tp->rx_opt.saw_tstamp = 1; | ||
3465 | ++ptr; | ||
3466 | tp->rx_opt.rcv_tsval = ntohl(*ptr); | ||
3467 | ++ptr; | ||
3468 | tp->rx_opt.rcv_tsecr = ntohl(*ptr); | ||
3469 | return 1; | ||
3470 | } | ||
3471 | return 0; | ||
3472 | } | ||
3473 | |||
3445 | /* Fast parse options. This hopes to only see timestamps. | 3474 | /* Fast parse options. This hopes to only see timestamps. |
3446 | * If it is wrong it falls back on tcp_parse_options(). | 3475 | * If it is wrong it falls back on tcp_parse_options(). |
3447 | */ | 3476 | */ |
@@ -3453,16 +3482,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, | |||
3453 | return 0; | 3482 | return 0; |
3454 | } else if (tp->rx_opt.tstamp_ok && | 3483 | } else if (tp->rx_opt.tstamp_ok && |
3455 | th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { | 3484 | th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { |
3456 | __be32 *ptr = (__be32 *)(th + 1); | 3485 | if (tcp_parse_aligned_timestamp(tp, th)) |
3457 | if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | ||
3458 | | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { | ||
3459 | tp->rx_opt.saw_tstamp = 1; | ||
3460 | ++ptr; | ||
3461 | tp->rx_opt.rcv_tsval = ntohl(*ptr); | ||
3462 | ++ptr; | ||
3463 | tp->rx_opt.rcv_tsecr = ntohl(*ptr); | ||
3464 | return 1; | 3486 | return 1; |
3465 | } | ||
3466 | } | 3487 | } |
3467 | tcp_parse_options(skb, &tp->rx_opt, 1); | 3488 | tcp_parse_options(skb, &tp->rx_opt, 1); |
3468 | return 1; | 3489 | return 1; |
@@ -4138,7 +4159,7 @@ drop: | |||
4138 | skb1 = skb1->prev; | 4159 | skb1 = skb1->prev; |
4139 | } | 4160 | } |
4140 | } | 4161 | } |
4141 | __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue); | 4162 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); |
4142 | 4163 | ||
4143 | /* And clean segments covered by new one as whole. */ | 4164 | /* And clean segments covered by new one as whole. */ |
4144 | while ((skb1 = skb->next) != | 4165 | while ((skb1 = skb->next) != |
@@ -4161,6 +4182,18 @@ add_sack: | |||
4161 | } | 4182 | } |
4162 | } | 4183 | } |
4163 | 4184 | ||
4185 | static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, | ||
4186 | struct sk_buff_head *list) | ||
4187 | { | ||
4188 | struct sk_buff *next = skb->next; | ||
4189 | |||
4190 | __skb_unlink(skb, list); | ||
4191 | __kfree_skb(skb); | ||
4192 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); | ||
4193 | |||
4194 | return next; | ||
4195 | } | ||
4196 | |||
4164 | /* Collapse contiguous sequence of skbs head..tail with | 4197 | /* Collapse contiguous sequence of skbs head..tail with |
4165 | * sequence numbers start..end. | 4198 | * sequence numbers start..end. |
4166 | * Segments with FIN/SYN are not collapsed (only because this | 4199 | * Segments with FIN/SYN are not collapsed (only because this |
@@ -4178,11 +4211,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
4178 | for (skb = head; skb != tail;) { | 4211 | for (skb = head; skb != tail;) { |
4179 | /* No new bits? It is possible on ofo queue. */ | 4212 | /* No new bits? It is possible on ofo queue. */ |
4180 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { | 4213 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { |
4181 | struct sk_buff *next = skb->next; | 4214 | skb = tcp_collapse_one(sk, skb, list); |
4182 | __skb_unlink(skb, list); | ||
4183 | __kfree_skb(skb); | ||
4184 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); | ||
4185 | skb = next; | ||
4186 | continue; | 4215 | continue; |
4187 | } | 4216 | } |
4188 | 4217 | ||
@@ -4228,7 +4257,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
4228 | memcpy(nskb->head, skb->head, header); | 4257 | memcpy(nskb->head, skb->head, header); |
4229 | memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); | 4258 | memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); |
4230 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; | 4259 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; |
4231 | __skb_insert(nskb, skb->prev, skb, list); | 4260 | __skb_queue_before(list, skb, nskb); |
4232 | skb_set_owner_r(nskb, sk); | 4261 | skb_set_owner_r(nskb, sk); |
4233 | 4262 | ||
4234 | /* Copy data, releasing collapsed skbs. */ | 4263 | /* Copy data, releasing collapsed skbs. */ |
@@ -4246,11 +4275,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
4246 | start += size; | 4275 | start += size; |
4247 | } | 4276 | } |
4248 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { | 4277 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { |
4249 | struct sk_buff *next = skb->next; | 4278 | skb = tcp_collapse_one(sk, skb, list); |
4250 | __skb_unlink(skb, list); | ||
4251 | __kfree_skb(skb); | ||
4252 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); | ||
4253 | skb = next; | ||
4254 | if (skb == tail || | 4279 | if (skb == tail || |
4255 | tcp_hdr(skb)->syn || | 4280 | tcp_hdr(skb)->syn || |
4256 | tcp_hdr(skb)->fin) | 4281 | tcp_hdr(skb)->fin) |
@@ -4436,8 +4461,8 @@ static void tcp_new_space(struct sock *sk) | |||
4436 | 4461 | ||
4437 | if (tcp_should_expand_sndbuf(sk)) { | 4462 | if (tcp_should_expand_sndbuf(sk)) { |
4438 | int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + | 4463 | int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + |
4439 | MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), | 4464 | MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); |
4440 | demanded = max_t(unsigned int, tp->snd_cwnd, | 4465 | int demanded = max_t(unsigned int, tp->snd_cwnd, |
4441 | tp->reordering + 1); | 4466 | tp->reordering + 1); |
4442 | sndmem *= 2 * demanded; | 4467 | sndmem *= 2 * demanded; |
4443 | if (sndmem > sk->sk_sndbuf) | 4468 | if (sndmem > sk->sk_sndbuf) |
@@ -4691,6 +4716,67 @@ out: | |||
4691 | } | 4716 | } |
4692 | #endif /* CONFIG_NET_DMA */ | 4717 | #endif /* CONFIG_NET_DMA */ |
4693 | 4718 | ||
4719 | /* Does PAWS and seqno based validation of an incoming segment, flags will | ||
4720 | * play significant role here. | ||
4721 | */ | ||
4722 | static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, | ||
4723 | struct tcphdr *th, int syn_inerr) | ||
4724 | { | ||
4725 | struct tcp_sock *tp = tcp_sk(sk); | ||
4726 | |||
4727 | /* RFC1323: H1. Apply PAWS check first. */ | ||
4728 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && | ||
4729 | tcp_paws_discard(sk, skb)) { | ||
4730 | if (!th->rst) { | ||
4731 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); | ||
4732 | tcp_send_dupack(sk, skb); | ||
4733 | goto discard; | ||
4734 | } | ||
4735 | /* Reset is accepted even if it did not pass PAWS. */ | ||
4736 | } | ||
4737 | |||
4738 | /* Step 1: check sequence number */ | ||
4739 | if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { | ||
4740 | /* RFC793, page 37: "In all states except SYN-SENT, all reset | ||
4741 | * (RST) segments are validated by checking their SEQ-fields." | ||
4742 | * And page 69: "If an incoming segment is not acceptable, | ||
4743 | * an acknowledgment should be sent in reply (unless the RST | ||
4744 | * bit is set, if so drop the segment and return)". | ||
4745 | */ | ||
4746 | if (!th->rst) | ||
4747 | tcp_send_dupack(sk, skb); | ||
4748 | goto discard; | ||
4749 | } | ||
4750 | |||
4751 | /* Step 2: check RST bit */ | ||
4752 | if (th->rst) { | ||
4753 | tcp_reset(sk); | ||
4754 | goto discard; | ||
4755 | } | ||
4756 | |||
4757 | /* ts_recent update must be made after we are sure that the packet | ||
4758 | * is in window. | ||
4759 | */ | ||
4760 | tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); | ||
4761 | |||
4762 | /* step 3: check security and precedence [ignored] */ | ||
4763 | |||
4764 | /* step 4: Check for a SYN in window. */ | ||
4765 | if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { | ||
4766 | if (syn_inerr) | ||
4767 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); | ||
4768 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); | ||
4769 | tcp_reset(sk); | ||
4770 | return -1; | ||
4771 | } | ||
4772 | |||
4773 | return 1; | ||
4774 | |||
4775 | discard: | ||
4776 | __kfree_skb(skb); | ||
4777 | return 0; | ||
4778 | } | ||
4779 | |||
4694 | /* | 4780 | /* |
4695 | * TCP receive function for the ESTABLISHED state. | 4781 | * TCP receive function for the ESTABLISHED state. |
4696 | * | 4782 | * |
@@ -4718,6 +4804,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
4718 | struct tcphdr *th, unsigned len) | 4804 | struct tcphdr *th, unsigned len) |
4719 | { | 4805 | { |
4720 | struct tcp_sock *tp = tcp_sk(sk); | 4806 | struct tcp_sock *tp = tcp_sk(sk); |
4807 | int res; | ||
4721 | 4808 | ||
4722 | /* | 4809 | /* |
4723 | * Header prediction. | 4810 | * Header prediction. |
@@ -4756,19 +4843,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
4756 | 4843 | ||
4757 | /* Check timestamp */ | 4844 | /* Check timestamp */ |
4758 | if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { | 4845 | if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { |
4759 | __be32 *ptr = (__be32 *)(th + 1); | ||
4760 | |||
4761 | /* No? Slow path! */ | 4846 | /* No? Slow path! */ |
4762 | if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 4847 | if (!tcp_parse_aligned_timestamp(tp, th)) |
4763 | | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) | ||
4764 | goto slow_path; | 4848 | goto slow_path; |
4765 | 4849 | ||
4766 | tp->rx_opt.saw_tstamp = 1; | ||
4767 | ++ptr; | ||
4768 | tp->rx_opt.rcv_tsval = ntohl(*ptr); | ||
4769 | ++ptr; | ||
4770 | tp->rx_opt.rcv_tsecr = ntohl(*ptr); | ||
4771 | |||
4772 | /* If PAWS failed, check it more carefully in slow path */ | 4850 | /* If PAWS failed, check it more carefully in slow path */ |
4773 | if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) | 4851 | if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) |
4774 | goto slow_path; | 4852 | goto slow_path; |
@@ -4879,7 +4957,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
4879 | goto no_ack; | 4957 | goto no_ack; |
4880 | } | 4958 | } |
4881 | 4959 | ||
4882 | __tcp_ack_snd_check(sk, 0); | 4960 | if (!copied_early || tp->rcv_nxt != tp->rcv_wup) |
4961 | __tcp_ack_snd_check(sk, 0); | ||
4883 | no_ack: | 4962 | no_ack: |
4884 | #ifdef CONFIG_NET_DMA | 4963 | #ifdef CONFIG_NET_DMA |
4885 | if (copied_early) | 4964 | if (copied_early) |
@@ -4899,51 +4978,12 @@ slow_path: | |||
4899 | goto csum_error; | 4978 | goto csum_error; |
4900 | 4979 | ||
4901 | /* | 4980 | /* |
4902 | * RFC1323: H1. Apply PAWS check first. | ||
4903 | */ | ||
4904 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && | ||
4905 | tcp_paws_discard(sk, skb)) { | ||
4906 | if (!th->rst) { | ||
4907 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); | ||
4908 | tcp_send_dupack(sk, skb); | ||
4909 | goto discard; | ||
4910 | } | ||
4911 | /* Resets are accepted even if PAWS failed. | ||
4912 | |||
4913 | ts_recent update must be made after we are sure | ||
4914 | that the packet is in window. | ||
4915 | */ | ||
4916 | } | ||
4917 | |||
4918 | /* | ||
4919 | * Standard slow path. | 4981 | * Standard slow path. |
4920 | */ | 4982 | */ |
4921 | 4983 | ||
4922 | if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { | 4984 | res = tcp_validate_incoming(sk, skb, th, 1); |
4923 | /* RFC793, page 37: "In all states except SYN-SENT, all reset | 4985 | if (res <= 0) |
4924 | * (RST) segments are validated by checking their SEQ-fields." | 4986 | return -res; |
4925 | * And page 69: "If an incoming segment is not acceptable, | ||
4926 | * an acknowledgment should be sent in reply (unless the RST bit | ||
4927 | * is set, if so drop the segment and return)". | ||
4928 | */ | ||
4929 | if (!th->rst) | ||
4930 | tcp_send_dupack(sk, skb); | ||
4931 | goto discard; | ||
4932 | } | ||
4933 | |||
4934 | if (th->rst) { | ||
4935 | tcp_reset(sk); | ||
4936 | goto discard; | ||
4937 | } | ||
4938 | |||
4939 | tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); | ||
4940 | |||
4941 | if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { | ||
4942 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); | ||
4943 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); | ||
4944 | tcp_reset(sk); | ||
4945 | return 1; | ||
4946 | } | ||
4947 | 4987 | ||
4948 | step5: | 4988 | step5: |
4949 | if (th->ack) | 4989 | if (th->ack) |
@@ -5225,6 +5265,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5225 | struct tcp_sock *tp = tcp_sk(sk); | 5265 | struct tcp_sock *tp = tcp_sk(sk); |
5226 | struct inet_connection_sock *icsk = inet_csk(sk); | 5266 | struct inet_connection_sock *icsk = inet_csk(sk); |
5227 | int queued = 0; | 5267 | int queued = 0; |
5268 | int res; | ||
5228 | 5269 | ||
5229 | tp->rx_opt.saw_tstamp = 0; | 5270 | tp->rx_opt.saw_tstamp = 0; |
5230 | 5271 | ||
@@ -5277,42 +5318,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5277 | return 0; | 5318 | return 0; |
5278 | } | 5319 | } |
5279 | 5320 | ||
5280 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && | 5321 | res = tcp_validate_incoming(sk, skb, th, 0); |
5281 | tcp_paws_discard(sk, skb)) { | 5322 | if (res <= 0) |
5282 | if (!th->rst) { | 5323 | return -res; |
5283 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); | ||
5284 | tcp_send_dupack(sk, skb); | ||
5285 | goto discard; | ||
5286 | } | ||
5287 | /* Reset is accepted even if it did not pass PAWS. */ | ||
5288 | } | ||
5289 | |||
5290 | /* step 1: check sequence number */ | ||
5291 | if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { | ||
5292 | if (!th->rst) | ||
5293 | tcp_send_dupack(sk, skb); | ||
5294 | goto discard; | ||
5295 | } | ||
5296 | |||
5297 | /* step 2: check RST bit */ | ||
5298 | if (th->rst) { | ||
5299 | tcp_reset(sk); | ||
5300 | goto discard; | ||
5301 | } | ||
5302 | |||
5303 | tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); | ||
5304 | |||
5305 | /* step 3: check security and precedence [ignored] */ | ||
5306 | |||
5307 | /* step 4: | ||
5308 | * | ||
5309 | * Check for a SYN in window. | ||
5310 | */ | ||
5311 | if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { | ||
5312 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); | ||
5313 | tcp_reset(sk); | ||
5314 | return 1; | ||
5315 | } | ||
5316 | 5324 | ||
5317 | /* step 5: check the ACK field */ | 5325 | /* step 5: check the ACK field */ |
5318 | if (th->ack) { | 5326 | if (th->ack) { |