aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c332
1 files changed, 170 insertions, 162 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 67ccce2a96bd..d77c0d29e239 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -979,6 +979,39 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
979 } 979 }
980} 980}
981 981
982/* This must be called before lost_out is incremented */
983static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
984{
985 if ((tp->retransmit_skb_hint == NULL) ||
986 before(TCP_SKB_CB(skb)->seq,
987 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
988 tp->retransmit_skb_hint = skb;
989
990 if (!tp->lost_out ||
991 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
992 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
993}
994
995static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
996{
997 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
998 tcp_verify_retransmit_hint(tp, skb);
999
1000 tp->lost_out += tcp_skb_pcount(skb);
1001 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1002 }
1003}
1004
1005void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
1006{
1007 tcp_verify_retransmit_hint(tp, skb);
1008
1009 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1010 tp->lost_out += tcp_skb_pcount(skb);
1011 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1012 }
1013}
1014
982/* This procedure tags the retransmission queue when SACKs arrive. 1015/* This procedure tags the retransmission queue when SACKs arrive.
983 * 1016 *
984 * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). 1017 * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@@ -1155,13 +1188,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
1155 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1188 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1156 tp->retrans_out -= tcp_skb_pcount(skb); 1189 tp->retrans_out -= tcp_skb_pcount(skb);
1157 1190
1158 /* clear lost hint */ 1191 tcp_skb_mark_lost_uncond_verify(tp, skb);
1159 tp->retransmit_skb_hint = NULL;
1160
1161 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1162 tp->lost_out += tcp_skb_pcount(skb);
1163 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1164 }
1165 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); 1192 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1166 } else { 1193 } else {
1167 if (before(ack_seq, new_low_seq)) 1194 if (before(ack_seq, new_low_seq))
@@ -1271,9 +1298,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
1271 ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); 1298 ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1272 tp->lost_out -= tcp_skb_pcount(skb); 1299 tp->lost_out -= tcp_skb_pcount(skb);
1273 tp->retrans_out -= tcp_skb_pcount(skb); 1300 tp->retrans_out -= tcp_skb_pcount(skb);
1274
1275 /* clear lost hint */
1276 tp->retransmit_skb_hint = NULL;
1277 } 1301 }
1278 } else { 1302 } else {
1279 if (!(sacked & TCPCB_RETRANS)) { 1303 if (!(sacked & TCPCB_RETRANS)) {
@@ -1292,9 +1316,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
1292 if (sacked & TCPCB_LOST) { 1316 if (sacked & TCPCB_LOST) {
1293 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 1317 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1294 tp->lost_out -= tcp_skb_pcount(skb); 1318 tp->lost_out -= tcp_skb_pcount(skb);
1295
1296 /* clear lost hint */
1297 tp->retransmit_skb_hint = NULL;
1298 } 1319 }
1299 } 1320 }
1300 1321
@@ -1324,7 +1345,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
1324 if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { 1345 if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) {
1325 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1346 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1326 tp->retrans_out -= tcp_skb_pcount(skb); 1347 tp->retrans_out -= tcp_skb_pcount(skb);
1327 tp->retransmit_skb_hint = NULL;
1328 } 1348 }
1329 1349
1330 return flag; 1350 return flag;
@@ -1726,6 +1746,8 @@ int tcp_use_frto(struct sock *sk)
1726 return 0; 1746 return 0;
1727 1747
1728 skb = tcp_write_queue_head(sk); 1748 skb = tcp_write_queue_head(sk);
1749 if (tcp_skb_is_last(sk, skb))
1750 return 1;
1729 skb = tcp_write_queue_next(sk, skb); /* Skips head */ 1751 skb = tcp_write_queue_next(sk, skb); /* Skips head */
1730 tcp_for_write_queue_from(skb, sk) { 1752 tcp_for_write_queue_from(skb, sk) {
1731 if (skb == tcp_send_head(sk)) 1753 if (skb == tcp_send_head(sk))
@@ -1867,6 +1889,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1867 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { 1889 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1868 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1890 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1869 tp->lost_out += tcp_skb_pcount(skb); 1891 tp->lost_out += tcp_skb_pcount(skb);
1892 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
1870 } 1893 }
1871 } 1894 }
1872 tcp_verify_left_out(tp); 1895 tcp_verify_left_out(tp);
@@ -1883,7 +1906,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1883 tp->high_seq = tp->snd_nxt; 1906 tp->high_seq = tp->snd_nxt;
1884 TCP_ECN_queue_cwr(tp); 1907 TCP_ECN_queue_cwr(tp);
1885 1908
1886 tcp_clear_retrans_hints_partial(tp); 1909 tcp_clear_all_retrans_hints(tp);
1887} 1910}
1888 1911
1889static void tcp_clear_retrans_partial(struct tcp_sock *tp) 1912static void tcp_clear_retrans_partial(struct tcp_sock *tp)
@@ -1934,12 +1957,11 @@ void tcp_enter_loss(struct sock *sk, int how)
1934 /* Push undo marker, if it was plain RTO and nothing 1957 /* Push undo marker, if it was plain RTO and nothing
1935 * was retransmitted. */ 1958 * was retransmitted. */
1936 tp->undo_marker = tp->snd_una; 1959 tp->undo_marker = tp->snd_una;
1937 tcp_clear_retrans_hints_partial(tp);
1938 } else { 1960 } else {
1939 tp->sacked_out = 0; 1961 tp->sacked_out = 0;
1940 tp->fackets_out = 0; 1962 tp->fackets_out = 0;
1941 tcp_clear_all_retrans_hints(tp);
1942 } 1963 }
1964 tcp_clear_all_retrans_hints(tp);
1943 1965
1944 tcp_for_write_queue(skb, sk) { 1966 tcp_for_write_queue(skb, sk) {
1945 if (skb == tcp_send_head(sk)) 1967 if (skb == tcp_send_head(sk))
@@ -1952,6 +1974,7 @@ void tcp_enter_loss(struct sock *sk, int how)
1952 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; 1974 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
1953 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1975 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1954 tp->lost_out += tcp_skb_pcount(skb); 1976 tp->lost_out += tcp_skb_pcount(skb);
1977 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
1955 } 1978 }
1956 } 1979 }
1957 tcp_verify_left_out(tp); 1980 tcp_verify_left_out(tp);
@@ -2157,19 +2180,6 @@ static int tcp_time_to_recover(struct sock *sk)
2157 return 0; 2180 return 0;
2158} 2181}
2159 2182
2160/* RFC: This is from the original, I doubt that this is necessary at all:
2161 * clear xmit_retrans hint if seq of this skb is beyond hint. How could we
2162 * retransmitted past LOST markings in the first place? I'm not fully sure
2163 * about undo and end of connection cases, which can cause R without L?
2164 */
2165static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
2166{
2167 if ((tp->retransmit_skb_hint != NULL) &&
2168 before(TCP_SKB_CB(skb)->seq,
2169 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
2170 tp->retransmit_skb_hint = NULL;
2171}
2172
2173/* Mark head of queue up as lost. With RFC3517 SACK, the packets is 2183/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
2174 * is against sacked "cnt", otherwise it's against facked "cnt" 2184 * is against sacked "cnt", otherwise it's against facked "cnt"
2175 */ 2185 */
@@ -2217,11 +2227,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
2217 cnt = packets; 2227 cnt = packets;
2218 } 2228 }
2219 2229
2220 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { 2230 tcp_skb_mark_lost(tp, skb);
2221 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2222 tp->lost_out += tcp_skb_pcount(skb);
2223 tcp_verify_retransmit_hint(tp, skb);
2224 }
2225 } 2231 }
2226 tcp_verify_left_out(tp); 2232 tcp_verify_left_out(tp);
2227} 2233}
@@ -2263,11 +2269,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2263 if (!tcp_skb_timedout(sk, skb)) 2269 if (!tcp_skb_timedout(sk, skb))
2264 break; 2270 break;
2265 2271
2266 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { 2272 tcp_skb_mark_lost(tp, skb);
2267 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2268 tp->lost_out += tcp_skb_pcount(skb);
2269 tcp_verify_retransmit_hint(tp, skb);
2270 }
2271 } 2273 }
2272 2274
2273 tp->scoreboard_skb_hint = skb; 2275 tp->scoreboard_skb_hint = skb;
@@ -2378,10 +2380,6 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
2378 } 2380 }
2379 tcp_moderate_cwnd(tp); 2381 tcp_moderate_cwnd(tp);
2380 tp->snd_cwnd_stamp = tcp_time_stamp; 2382 tp->snd_cwnd_stamp = tcp_time_stamp;
2381
2382 /* There is something screwy going on with the retrans hints after
2383 an undo */
2384 tcp_clear_all_retrans_hints(tp);
2385} 2383}
2386 2384
2387static inline int tcp_may_undo(struct tcp_sock *tp) 2385static inline int tcp_may_undo(struct tcp_sock *tp)
@@ -2838,7 +2836,8 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
2838 * is before the ack sequence we can discard it as it's confirmed to have 2836 * is before the ack sequence we can discard it as it's confirmed to have
2839 * arrived at the other end. 2837 * arrived at the other end.
2840 */ 2838 */
2841static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) 2839static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
2840 u32 prior_snd_una)
2842{ 2841{
2843 struct tcp_sock *tp = tcp_sk(sk); 2842 struct tcp_sock *tp = tcp_sk(sk);
2844 const struct inet_connection_sock *icsk = inet_csk(sk); 2843 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2848,6 +2847,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
2848 int flag = 0; 2847 int flag = 0;
2849 u32 pkts_acked = 0; 2848 u32 pkts_acked = 0;
2850 u32 reord = tp->packets_out; 2849 u32 reord = tp->packets_out;
2850 u32 prior_sacked = tp->sacked_out;
2851 s32 seq_rtt = -1; 2851 s32 seq_rtt = -1;
2852 s32 ca_seq_rtt = -1; 2852 s32 ca_seq_rtt = -1;
2853 ktime_t last_ackt = net_invalid_timestamp(); 2853 ktime_t last_ackt = net_invalid_timestamp();
@@ -2904,9 +2904,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
2904 if (sacked & TCPCB_LOST) 2904 if (sacked & TCPCB_LOST)
2905 tp->lost_out -= acked_pcount; 2905 tp->lost_out -= acked_pcount;
2906 2906
2907 if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up)))
2908 tp->urg_mode = 0;
2909
2910 tp->packets_out -= acked_pcount; 2907 tp->packets_out -= acked_pcount;
2911 pkts_acked += acked_pcount; 2908 pkts_acked += acked_pcount;
2912 2909
@@ -2929,9 +2926,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
2929 2926
2930 tcp_unlink_write_queue(skb, sk); 2927 tcp_unlink_write_queue(skb, sk);
2931 sk_wmem_free_skb(sk, skb); 2928 sk_wmem_free_skb(sk, skb);
2932 tcp_clear_all_retrans_hints(tp); 2929 tp->scoreboard_skb_hint = NULL;
2930 if (skb == tp->retransmit_skb_hint)
2931 tp->retransmit_skb_hint = NULL;
2932 if (skb == tp->lost_skb_hint)
2933 tp->lost_skb_hint = NULL;
2933 } 2934 }
2934 2935
2936 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
2937 tp->snd_up = tp->snd_una;
2938
2935 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 2939 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2936 flag |= FLAG_SACK_RENEGING; 2940 flag |= FLAG_SACK_RENEGING;
2937 2941
@@ -2948,6 +2952,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
2948 /* Non-retransmitted hole got filled? That's reordering */ 2952 /* Non-retransmitted hole got filled? That's reordering */
2949 if (reord < prior_fackets) 2953 if (reord < prior_fackets)
2950 tcp_update_reordering(sk, tp->fackets_out - reord, 0); 2954 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
2955
2956 /* No need to care for underflows here because
2957 * the lost_skb_hint gets NULLed if we're past it
2958 * (or something non-trivial happened)
2959 */
2960 if (tcp_is_fack(tp))
2961 tp->lost_cnt_hint -= pkts_acked;
2962 else
2963 tp->lost_cnt_hint -= prior_sacked - tp->sacked_out;
2951 } 2964 }
2952 2965
2953 tp->fackets_out -= min(pkts_acked, tp->fackets_out); 2966 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
@@ -3299,7 +3312,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3299 goto no_queue; 3312 goto no_queue;
3300 3313
3301 /* See if we can take anything off of the retransmit queue. */ 3314 /* See if we can take anything off of the retransmit queue. */
3302 flag |= tcp_clean_rtx_queue(sk, prior_fackets); 3315 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3303 3316
3304 if (tp->frto_counter) 3317 if (tp->frto_counter)
3305 frto_cwnd = tcp_process_frto(sk, flag); 3318 frto_cwnd = tcp_process_frto(sk, flag);
@@ -3442,6 +3455,22 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3442 } 3455 }
3443} 3456}
3444 3457
3458static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
3459{
3460 __be32 *ptr = (__be32 *)(th + 1);
3461
3462 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3463 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3464 tp->rx_opt.saw_tstamp = 1;
3465 ++ptr;
3466 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3467 ++ptr;
3468 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
3469 return 1;
3470 }
3471 return 0;
3472}
3473
3445/* Fast parse options. This hopes to only see timestamps. 3474/* Fast parse options. This hopes to only see timestamps.
3446 * If it is wrong it falls back on tcp_parse_options(). 3475 * If it is wrong it falls back on tcp_parse_options().
3447 */ 3476 */
@@ -3453,16 +3482,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3453 return 0; 3482 return 0;
3454 } else if (tp->rx_opt.tstamp_ok && 3483 } else if (tp->rx_opt.tstamp_ok &&
3455 th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { 3484 th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
3456 __be32 *ptr = (__be32 *)(th + 1); 3485 if (tcp_parse_aligned_timestamp(tp, th))
3457 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3458 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3459 tp->rx_opt.saw_tstamp = 1;
3460 ++ptr;
3461 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3462 ++ptr;
3463 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
3464 return 1; 3486 return 1;
3465 }
3466 } 3487 }
3467 tcp_parse_options(skb, &tp->rx_opt, 1); 3488 tcp_parse_options(skb, &tp->rx_opt, 1);
3468 return 1; 3489 return 1;
@@ -4138,7 +4159,7 @@ drop:
4138 skb1 = skb1->prev; 4159 skb1 = skb1->prev;
4139 } 4160 }
4140 } 4161 }
4141 __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue); 4162 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4142 4163
4143 /* And clean segments covered by new one as whole. */ 4164 /* And clean segments covered by new one as whole. */
4144 while ((skb1 = skb->next) != 4165 while ((skb1 = skb->next) !=
@@ -4161,6 +4182,18 @@ add_sack:
4161 } 4182 }
4162} 4183}
4163 4184
4185static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4186 struct sk_buff_head *list)
4187{
4188 struct sk_buff *next = skb->next;
4189
4190 __skb_unlink(skb, list);
4191 __kfree_skb(skb);
4192 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4193
4194 return next;
4195}
4196
4164/* Collapse contiguous sequence of skbs head..tail with 4197/* Collapse contiguous sequence of skbs head..tail with
4165 * sequence numbers start..end. 4198 * sequence numbers start..end.
4166 * Segments with FIN/SYN are not collapsed (only because this 4199 * Segments with FIN/SYN are not collapsed (only because this
@@ -4178,11 +4211,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4178 for (skb = head; skb != tail;) { 4211 for (skb = head; skb != tail;) {
4179 /* No new bits? It is possible on ofo queue. */ 4212 /* No new bits? It is possible on ofo queue. */
4180 if (!before(start, TCP_SKB_CB(skb)->end_seq)) { 4213 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4181 struct sk_buff *next = skb->next; 4214 skb = tcp_collapse_one(sk, skb, list);
4182 __skb_unlink(skb, list);
4183 __kfree_skb(skb);
4184 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4185 skb = next;
4186 continue; 4215 continue;
4187 } 4216 }
4188 4217
@@ -4228,7 +4257,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4228 memcpy(nskb->head, skb->head, header); 4257 memcpy(nskb->head, skb->head, header);
4229 memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); 4258 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4230 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; 4259 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4231 __skb_insert(nskb, skb->prev, skb, list); 4260 __skb_queue_before(list, skb, nskb);
4232 skb_set_owner_r(nskb, sk); 4261 skb_set_owner_r(nskb, sk);
4233 4262
4234 /* Copy data, releasing collapsed skbs. */ 4263 /* Copy data, releasing collapsed skbs. */
@@ -4246,11 +4275,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4246 start += size; 4275 start += size;
4247 } 4276 }
4248 if (!before(start, TCP_SKB_CB(skb)->end_seq)) { 4277 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4249 struct sk_buff *next = skb->next; 4278 skb = tcp_collapse_one(sk, skb, list);
4250 __skb_unlink(skb, list);
4251 __kfree_skb(skb);
4252 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4253 skb = next;
4254 if (skb == tail || 4279 if (skb == tail ||
4255 tcp_hdr(skb)->syn || 4280 tcp_hdr(skb)->syn ||
4256 tcp_hdr(skb)->fin) 4281 tcp_hdr(skb)->fin)
@@ -4436,8 +4461,8 @@ static void tcp_new_space(struct sock *sk)
4436 4461
4437 if (tcp_should_expand_sndbuf(sk)) { 4462 if (tcp_should_expand_sndbuf(sk)) {
4438 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + 4463 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
4439 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), 4464 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
4440 demanded = max_t(unsigned int, tp->snd_cwnd, 4465 int demanded = max_t(unsigned int, tp->snd_cwnd,
4441 tp->reordering + 1); 4466 tp->reordering + 1);
4442 sndmem *= 2 * demanded; 4467 sndmem *= 2 * demanded;
4443 if (sndmem > sk->sk_sndbuf) 4468 if (sndmem > sk->sk_sndbuf)
@@ -4691,6 +4716,67 @@ out:
4691} 4716}
4692#endif /* CONFIG_NET_DMA */ 4717#endif /* CONFIG_NET_DMA */
4693 4718
4719/* Does PAWS and seqno based validation of an incoming segment, flags will
4720 * play significant role here.
4721 */
4722static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
4723 struct tcphdr *th, int syn_inerr)
4724{
4725 struct tcp_sock *tp = tcp_sk(sk);
4726
4727 /* RFC1323: H1. Apply PAWS check first. */
4728 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
4729 tcp_paws_discard(sk, skb)) {
4730 if (!th->rst) {
4731 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
4732 tcp_send_dupack(sk, skb);
4733 goto discard;
4734 }
4735 /* Reset is accepted even if it did not pass PAWS. */
4736 }
4737
4738 /* Step 1: check sequence number */
4739 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
4740 /* RFC793, page 37: "In all states except SYN-SENT, all reset
4741 * (RST) segments are validated by checking their SEQ-fields."
4742 * And page 69: "If an incoming segment is not acceptable,
4743 * an acknowledgment should be sent in reply (unless the RST
4744 * bit is set, if so drop the segment and return)".
4745 */
4746 if (!th->rst)
4747 tcp_send_dupack(sk, skb);
4748 goto discard;
4749 }
4750
4751 /* Step 2: check RST bit */
4752 if (th->rst) {
4753 tcp_reset(sk);
4754 goto discard;
4755 }
4756
4757 /* ts_recent update must be made after we are sure that the packet
4758 * is in window.
4759 */
4760 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
4761
4762 /* step 3: check security and precedence [ignored] */
4763
4764 /* step 4: Check for a SYN in window. */
4765 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4766 if (syn_inerr)
4767 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
4768 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
4769 tcp_reset(sk);
4770 return -1;
4771 }
4772
4773 return 1;
4774
4775discard:
4776 __kfree_skb(skb);
4777 return 0;
4778}
4779
4694/* 4780/*
4695 * TCP receive function for the ESTABLISHED state. 4781 * TCP receive function for the ESTABLISHED state.
4696 * 4782 *
@@ -4718,6 +4804,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4718 struct tcphdr *th, unsigned len) 4804 struct tcphdr *th, unsigned len)
4719{ 4805{
4720 struct tcp_sock *tp = tcp_sk(sk); 4806 struct tcp_sock *tp = tcp_sk(sk);
4807 int res;
4721 4808
4722 /* 4809 /*
4723 * Header prediction. 4810 * Header prediction.
@@ -4756,19 +4843,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4756 4843
4757 /* Check timestamp */ 4844 /* Check timestamp */
4758 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { 4845 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
4759 __be32 *ptr = (__be32 *)(th + 1);
4760
4761 /* No? Slow path! */ 4846 /* No? Slow path! */
4762 if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) 4847 if (!tcp_parse_aligned_timestamp(tp, th))
4763 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
4764 goto slow_path; 4848 goto slow_path;
4765 4849
4766 tp->rx_opt.saw_tstamp = 1;
4767 ++ptr;
4768 tp->rx_opt.rcv_tsval = ntohl(*ptr);
4769 ++ptr;
4770 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
4771
4772 /* If PAWS failed, check it more carefully in slow path */ 4850 /* If PAWS failed, check it more carefully in slow path */
4773 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) 4851 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
4774 goto slow_path; 4852 goto slow_path;
@@ -4879,7 +4957,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4879 goto no_ack; 4957 goto no_ack;
4880 } 4958 }
4881 4959
4882 __tcp_ack_snd_check(sk, 0); 4960 if (!copied_early || tp->rcv_nxt != tp->rcv_wup)
4961 __tcp_ack_snd_check(sk, 0);
4883no_ack: 4962no_ack:
4884#ifdef CONFIG_NET_DMA 4963#ifdef CONFIG_NET_DMA
4885 if (copied_early) 4964 if (copied_early)
@@ -4899,51 +4978,12 @@ slow_path:
4899 goto csum_error; 4978 goto csum_error;
4900 4979
4901 /* 4980 /*
4902 * RFC1323: H1. Apply PAWS check first.
4903 */
4904 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
4905 tcp_paws_discard(sk, skb)) {
4906 if (!th->rst) {
4907 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
4908 tcp_send_dupack(sk, skb);
4909 goto discard;
4910 }
4911 /* Resets are accepted even if PAWS failed.
4912
4913 ts_recent update must be made after we are sure
4914 that the packet is in window.
4915 */
4916 }
4917
4918 /*
4919 * Standard slow path. 4981 * Standard slow path.
4920 */ 4982 */
4921 4983
4922 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { 4984 res = tcp_validate_incoming(sk, skb, th, 1);
4923 /* RFC793, page 37: "In all states except SYN-SENT, all reset 4985 if (res <= 0)
4924 * (RST) segments are validated by checking their SEQ-fields." 4986 return -res;
4925 * And page 69: "If an incoming segment is not acceptable,
4926 * an acknowledgment should be sent in reply (unless the RST bit
4927 * is set, if so drop the segment and return)".
4928 */
4929 if (!th->rst)
4930 tcp_send_dupack(sk, skb);
4931 goto discard;
4932 }
4933
4934 if (th->rst) {
4935 tcp_reset(sk);
4936 goto discard;
4937 }
4938
4939 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
4940
4941 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4942 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
4943 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
4944 tcp_reset(sk);
4945 return 1;
4946 }
4947 4987
4948step5: 4988step5:
4949 if (th->ack) 4989 if (th->ack)
@@ -5225,6 +5265,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5225 struct tcp_sock *tp = tcp_sk(sk); 5265 struct tcp_sock *tp = tcp_sk(sk);
5226 struct inet_connection_sock *icsk = inet_csk(sk); 5266 struct inet_connection_sock *icsk = inet_csk(sk);
5227 int queued = 0; 5267 int queued = 0;
5268 int res;
5228 5269
5229 tp->rx_opt.saw_tstamp = 0; 5270 tp->rx_opt.saw_tstamp = 0;
5230 5271
@@ -5277,42 +5318,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5277 return 0; 5318 return 0;
5278 } 5319 }
5279 5320
5280 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && 5321 res = tcp_validate_incoming(sk, skb, th, 0);
5281 tcp_paws_discard(sk, skb)) { 5322 if (res <= 0)
5282 if (!th->rst) { 5323 return -res;
5283 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5284 tcp_send_dupack(sk, skb);
5285 goto discard;
5286 }
5287 /* Reset is accepted even if it did not pass PAWS. */
5288 }
5289
5290 /* step 1: check sequence number */
5291 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5292 if (!th->rst)
5293 tcp_send_dupack(sk, skb);
5294 goto discard;
5295 }
5296
5297 /* step 2: check RST bit */
5298 if (th->rst) {
5299 tcp_reset(sk);
5300 goto discard;
5301 }
5302
5303 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5304
5305 /* step 3: check security and precedence [ignored] */
5306
5307 /* step 4:
5308 *
5309 * Check for a SYN in window.
5310 */
5311 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
5312 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
5313 tcp_reset(sk);
5314 return 1;
5315 }
5316 5324
5317 /* step 5: check the ACK field */ 5325 /* step 5: check the ACK field */
5318 if (th->ack) { 5326 if (th->ack) {