aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /net/ipv4/tcp_input.c
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c1805
1 files changed, 775 insertions, 1030 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 18f97ca76b0..d73aab3fbfc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -61,8 +61,6 @@
61 * Pasi Sarolahti: F-RTO for dealing with spurious RTOs 61 * Pasi Sarolahti: F-RTO for dealing with spurious RTOs
62 */ 62 */
63 63
64#define pr_fmt(fmt) "TCP: " fmt
65
66#include <linux/mm.h> 64#include <linux/mm.h>
67#include <linux/slab.h> 65#include <linux/slab.h>
68#include <linux/module.h> 66#include <linux/module.h>
@@ -85,23 +83,20 @@ int sysctl_tcp_ecn __read_mostly = 2;
85EXPORT_SYMBOL(sysctl_tcp_ecn); 83EXPORT_SYMBOL(sysctl_tcp_ecn);
86int sysctl_tcp_dsack __read_mostly = 1; 84int sysctl_tcp_dsack __read_mostly = 1;
87int sysctl_tcp_app_win __read_mostly = 31; 85int sysctl_tcp_app_win __read_mostly = 31;
88int sysctl_tcp_adv_win_scale __read_mostly = 1; 86int sysctl_tcp_adv_win_scale __read_mostly = 2;
89EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); 87EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
90 88
91/* rfc5961 challenge ack rate limiting */
92int sysctl_tcp_challenge_ack_limit = 100;
93
94int sysctl_tcp_stdurg __read_mostly; 89int sysctl_tcp_stdurg __read_mostly;
95int sysctl_tcp_rfc1337 __read_mostly; 90int sysctl_tcp_rfc1337 __read_mostly;
96int sysctl_tcp_max_orphans __read_mostly = NR_FILE; 91int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
97int sysctl_tcp_frto __read_mostly = 2; 92int sysctl_tcp_frto __read_mostly = 2;
98int sysctl_tcp_frto_response __read_mostly; 93int sysctl_tcp_frto_response __read_mostly;
94int sysctl_tcp_nometrics_save __read_mostly;
99 95
100int sysctl_tcp_thin_dupack __read_mostly; 96int sysctl_tcp_thin_dupack __read_mostly;
101 97
102int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; 98int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
103int sysctl_tcp_abc __read_mostly; 99int sysctl_tcp_abc __read_mostly;
104int sysctl_tcp_early_retrans __read_mostly = 2;
105 100
106#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 101#define FLAG_DATA 0x01 /* Incoming frame contained data. */
107#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 102#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -110,6 +105,7 @@ int sysctl_tcp_early_retrans __read_mostly = 2;
110#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ 105#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
111#define FLAG_DATA_SACKED 0x20 /* New SACK. */ 106#define FLAG_DATA_SACKED 0x20 /* New SACK. */
112#define FLAG_ECE 0x40 /* ECE in this ACK */ 107#define FLAG_ECE 0x40 /* ECE in this ACK */
108#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */
113#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 109#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
114#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ 110#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
115#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 111#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -178,7 +174,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
178static void tcp_incr_quickack(struct sock *sk) 174static void tcp_incr_quickack(struct sock *sk)
179{ 175{
180 struct inet_connection_sock *icsk = inet_csk(sk); 176 struct inet_connection_sock *icsk = inet_csk(sk);
181 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); 177 unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
182 178
183 if (quickacks == 0) 179 if (quickacks == 0)
184 quickacks = 2; 180 quickacks = 2;
@@ -198,10 +194,9 @@ static void tcp_enter_quickack_mode(struct sock *sk)
198 * and the session is not interactive. 194 * and the session is not interactive.
199 */ 195 */
200 196
201static inline bool tcp_in_quickack_mode(const struct sock *sk) 197static inline int tcp_in_quickack_mode(const struct sock *sk)
202{ 198{
203 const struct inet_connection_sock *icsk = inet_csk(sk); 199 const struct inet_connection_sock *icsk = inet_csk(sk);
204
205 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; 200 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
206} 201}
207 202
@@ -211,7 +206,7 @@ static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
211 tp->ecn_flags |= TCP_ECN_QUEUE_CWR; 206 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
212} 207}
213 208
214static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb) 209static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
215{ 210{
216 if (tcp_hdr(skb)->cwr) 211 if (tcp_hdr(skb)->cwr)
217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 212 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
@@ -222,49 +217,36 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
222 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
223} 218}
224 219
225static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) 220static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
226{ 221{
227 if (!(tp->ecn_flags & TCP_ECN_OK)) 222 if (tp->ecn_flags & TCP_ECN_OK) {
228 return; 223 if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
229 224 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
230 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
231 case INET_ECN_NOT_ECT:
232 /* Funny extension: if ECT is not set on a segment, 225 /* Funny extension: if ECT is not set on a segment,
233 * and we already seen ECT on a previous segment, 226 * it is surely retransmit. It is not in ECN RFC,
234 * it is probably a retransmit. 227 * but Linux follows this rule. */
235 */ 228 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
236 if (tp->ecn_flags & TCP_ECN_SEEN)
237 tcp_enter_quickack_mode((struct sock *)tp);
238 break;
239 case INET_ECN_CE:
240 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
241 /* Better not delay acks, sender can have a very low cwnd */
242 tcp_enter_quickack_mode((struct sock *)tp); 229 tcp_enter_quickack_mode((struct sock *)tp);
243 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
244 }
245 /* fallinto */
246 default:
247 tp->ecn_flags |= TCP_ECN_SEEN;
248 } 230 }
249} 231}
250 232
251static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) 233static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
252{ 234{
253 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr)) 235 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
254 tp->ecn_flags &= ~TCP_ECN_OK; 236 tp->ecn_flags &= ~TCP_ECN_OK;
255} 237}
256 238
257static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th) 239static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
258{ 240{
259 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr)) 241 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
260 tp->ecn_flags &= ~TCP_ECN_OK; 242 tp->ecn_flags &= ~TCP_ECN_OK;
261} 243}
262 244
263static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th) 245static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
264{ 246{
265 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK)) 247 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
266 return true; 248 return 1;
267 return false; 249 return 0;
268} 250}
269 251
270/* Buffer size and advertised window tuning. 252/* Buffer size and advertised window tuning.
@@ -274,11 +256,14 @@ static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr
274 256
275static void tcp_fixup_sndbuf(struct sock *sk) 257static void tcp_fixup_sndbuf(struct sock *sk)
276{ 258{
277 int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); 259 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
260 sizeof(struct sk_buff);
278 261
279 sndmem *= TCP_INIT_CWND; 262 if (sk->sk_sndbuf < 3 * sndmem) {
280 if (sk->sk_sndbuf < sndmem) 263 sk->sk_sndbuf = 3 * sndmem;
281 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); 264 if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
265 sk->sk_sndbuf = sysctl_tcp_wmem[2];
266 }
282} 267}
283 268
284/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) 269/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -324,14 +309,14 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
324 return 0; 309 return 0;
325} 310}
326 311
327static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) 312static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
328{ 313{
329 struct tcp_sock *tp = tcp_sk(sk); 314 struct tcp_sock *tp = tcp_sk(sk);
330 315
331 /* Check #1 */ 316 /* Check #1 */
332 if (tp->rcv_ssthresh < tp->window_clamp && 317 if (tp->rcv_ssthresh < tp->window_clamp &&
333 (int)tp->rcv_ssthresh < tcp_space(sk) && 318 (int)tp->rcv_ssthresh < tcp_space(sk) &&
334 !sk_under_memory_pressure(sk)) { 319 !tcp_memory_pressure) {
335 int incr; 320 int incr;
336 321
337 /* Check #2. Increase window, if skb with such overhead 322 /* Check #2. Increase window, if skb with such overhead
@@ -343,7 +328,6 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
343 incr = __tcp_grow_window(sk, skb); 328 incr = __tcp_grow_window(sk, skb);
344 329
345 if (incr) { 330 if (incr) {
346 incr = max_t(int, incr, 2 * skb->len);
347 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, 331 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
348 tp->window_clamp); 332 tp->window_clamp);
349 inet_csk(sk)->icsk_ack.quick |= 1; 333 inet_csk(sk)->icsk_ack.quick |= 1;
@@ -355,30 +339,23 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
355 339
356static void tcp_fixup_rcvbuf(struct sock *sk) 340static void tcp_fixup_rcvbuf(struct sock *sk)
357{ 341{
358 u32 mss = tcp_sk(sk)->advmss; 342 struct tcp_sock *tp = tcp_sk(sk);
359 u32 icwnd = TCP_DEFAULT_INIT_RCVWND; 343 int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
360 int rcvmem;
361 344
362 /* Limit to 10 segments if mss <= 1460, 345 /* Try to select rcvbuf so that 4 mss-sized segments
363 * or 14600/mss segments, with a minimum of two segments. 346 * will fit to window and corresponding skbs will fit to our rcvbuf.
347 * (was 3; 4 is minimum to allow fast retransmit to work.)
364 */ 348 */
365 if (mss > 1460) 349 while (tcp_win_from_space(rcvmem) < tp->advmss)
366 icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
367
368 rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
369 while (tcp_win_from_space(rcvmem) < mss)
370 rcvmem += 128; 350 rcvmem += 128;
371 351 if (sk->sk_rcvbuf < 4 * rcvmem)
372 rcvmem *= icwnd; 352 sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
373
374 if (sk->sk_rcvbuf < rcvmem)
375 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
376} 353}
377 354
378/* 4. Try to fixup all. It is made immediately after connection enters 355/* 4. Try to fixup all. It is made immediately after connection enters
379 * established state. 356 * established state.
380 */ 357 */
381void tcp_init_buffer_space(struct sock *sk) 358static void tcp_init_buffer_space(struct sock *sk)
382{ 359{
383 struct tcp_sock *tp = tcp_sk(sk); 360 struct tcp_sock *tp = tcp_sk(sk);
384 int maxwin; 361 int maxwin;
@@ -421,8 +398,8 @@ static void tcp_clamp_window(struct sock *sk)
421 398
422 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && 399 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
423 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && 400 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
424 !sk_under_memory_pressure(sk) && 401 !tcp_memory_pressure &&
425 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { 402 atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
426 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), 403 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
427 sysctl_tcp_rmem[2]); 404 sysctl_tcp_rmem[2]);
428 } 405 }
@@ -439,7 +416,7 @@ static void tcp_clamp_window(struct sock *sk)
439 */ 416 */
440void tcp_initialize_rcv_mss(struct sock *sk) 417void tcp_initialize_rcv_mss(struct sock *sk)
441{ 418{
442 const struct tcp_sock *tp = tcp_sk(sk); 419 struct tcp_sock *tp = tcp_sk(sk);
443 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); 420 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
444 421
445 hint = min(hint, tp->rcv_wnd / 2); 422 hint = min(hint, tp->rcv_wnd / 2);
@@ -483,11 +460,8 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
483 if (!win_dep) { 460 if (!win_dep) {
484 m -= (new_sample >> 3); 461 m -= (new_sample >> 3);
485 new_sample += m; 462 new_sample += m;
486 } else { 463 } else if (m < new_sample)
487 m <<= 3; 464 new_sample = m << 3;
488 if (m < new_sample)
489 new_sample = m;
490 }
491 } else { 465 } else {
492 /* No previous measure. */ 466 /* No previous measure. */
493 new_sample = m << 3; 467 new_sample = m << 3;
@@ -503,7 +477,7 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
503 goto new_measure; 477 goto new_measure;
504 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) 478 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
505 return; 479 return;
506 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1); 480 tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
507 481
508new_measure: 482new_measure:
509 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd; 483 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
@@ -557,7 +531,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
557 space /= tp->advmss; 531 space /= tp->advmss;
558 if (!space) 532 if (!space)
559 space = 1; 533 space = 1;
560 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); 534 rcvmem = (tp->advmss + MAX_TCP_HEADER +
535 16 + sizeof(struct sk_buff));
561 while (tcp_win_from_space(rcvmem) < tp->advmss) 536 while (tcp_win_from_space(rcvmem) < tp->advmss)
562 rcvmem += 128; 537 rcvmem += 128;
563 space *= rcvmem; 538 space *= rcvmem;
@@ -707,7 +682,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
707/* Calculate rto without backoff. This is the second half of Van Jacobson's 682/* Calculate rto without backoff. This is the second half of Van Jacobson's
708 * routine referred to above. 683 * routine referred to above.
709 */ 684 */
710void tcp_set_rto(struct sock *sk) 685static inline void tcp_set_rto(struct sock *sk)
711{ 686{
712 const struct tcp_sock *tp = tcp_sk(sk); 687 const struct tcp_sock *tp = tcp_sk(sk);
713 /* Old crap is replaced with new one. 8) 688 /* Old crap is replaced with new one. 8)
@@ -734,7 +709,110 @@ void tcp_set_rto(struct sock *sk)
734 tcp_bound_rto(sk); 709 tcp_bound_rto(sk);
735} 710}
736 711
737__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) 712/* Save metrics learned by this TCP session.
713 This function is called only, when TCP finishes successfully
714 i.e. when it enters TIME-WAIT or goes from LAST-ACK to CLOSE.
715 */
716void tcp_update_metrics(struct sock *sk)
717{
718 struct tcp_sock *tp = tcp_sk(sk);
719 struct dst_entry *dst = __sk_dst_get(sk);
720
721 if (sysctl_tcp_nometrics_save)
722 return;
723
724 dst_confirm(dst);
725
726 if (dst && (dst->flags & DST_HOST)) {
727 const struct inet_connection_sock *icsk = inet_csk(sk);
728 int m;
729 unsigned long rtt;
730
731 if (icsk->icsk_backoff || !tp->srtt) {
732 /* This session failed to estimate rtt. Why?
733 * Probably, no packets returned in time.
734 * Reset our results.
735 */
736 if (!(dst_metric_locked(dst, RTAX_RTT)))
737 dst_metric_set(dst, RTAX_RTT, 0);
738 return;
739 }
740
741 rtt = dst_metric_rtt(dst, RTAX_RTT);
742 m = rtt - tp->srtt;
743
744 /* If newly calculated rtt larger than stored one,
745 * store new one. Otherwise, use EWMA. Remember,
746 * rtt overestimation is always better than underestimation.
747 */
748 if (!(dst_metric_locked(dst, RTAX_RTT))) {
749 if (m <= 0)
750 set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt);
751 else
752 set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3));
753 }
754
755 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
756 unsigned long var;
757 if (m < 0)
758 m = -m;
759
760 /* Scale deviation to rttvar fixed point */
761 m >>= 1;
762 if (m < tp->mdev)
763 m = tp->mdev;
764
765 var = dst_metric_rtt(dst, RTAX_RTTVAR);
766 if (m >= var)
767 var = m;
768 else
769 var -= (var - m) >> 2;
770
771 set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
772 }
773
774 if (tcp_in_initial_slowstart(tp)) {
775 /* Slow start still did not finish. */
776 if (dst_metric(dst, RTAX_SSTHRESH) &&
777 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
778 (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
779 dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1);
780 if (!dst_metric_locked(dst, RTAX_CWND) &&
781 tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
782 dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd);
783 } else if (tp->snd_cwnd > tp->snd_ssthresh &&
784 icsk->icsk_ca_state == TCP_CA_Open) {
785 /* Cong. avoidance phase, cwnd is reliable. */
786 if (!dst_metric_locked(dst, RTAX_SSTHRESH))
787 dst_metric_set(dst, RTAX_SSTHRESH,
788 max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
789 if (!dst_metric_locked(dst, RTAX_CWND))
790 dst_metric_set(dst, RTAX_CWND,
791 (dst_metric(dst, RTAX_CWND) +
792 tp->snd_cwnd) >> 1);
793 } else {
794 /* Else slow start did not finish, cwnd is non-sense,
795 ssthresh may be also invalid.
796 */
797 if (!dst_metric_locked(dst, RTAX_CWND))
798 dst_metric_set(dst, RTAX_CWND,
799 (dst_metric(dst, RTAX_CWND) +
800 tp->snd_ssthresh) >> 1);
801 if (dst_metric(dst, RTAX_SSTHRESH) &&
802 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
803 tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
804 dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh);
805 }
806
807 if (!dst_metric_locked(dst, RTAX_REORDERING)) {
808 if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
809 tp->reordering != sysctl_tcp_reordering)
810 dst_metric_set(dst, RTAX_REORDERING, tp->reordering);
811 }
812 }
813}
814
815__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
738{ 816{
739 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); 817 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
740 818
@@ -743,22 +821,124 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
743 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 821 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
744} 822}
745 823
824/* Set slow start threshold and cwnd not falling to slow start */
825void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
826{
827 struct tcp_sock *tp = tcp_sk(sk);
828 const struct inet_connection_sock *icsk = inet_csk(sk);
829
830 tp->prior_ssthresh = 0;
831 tp->bytes_acked = 0;
832 if (icsk->icsk_ca_state < TCP_CA_CWR) {
833 tp->undo_marker = 0;
834 if (set_ssthresh)
835 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
836 tp->snd_cwnd = min(tp->snd_cwnd,
837 tcp_packets_in_flight(tp) + 1U);
838 tp->snd_cwnd_cnt = 0;
839 tp->high_seq = tp->snd_nxt;
840 tp->snd_cwnd_stamp = tcp_time_stamp;
841 TCP_ECN_queue_cwr(tp);
842
843 tcp_set_ca_state(sk, TCP_CA_CWR);
844 }
845}
846
746/* 847/*
747 * Packet counting of FACK is based on in-order assumptions, therefore TCP 848 * Packet counting of FACK is based on in-order assumptions, therefore TCP
748 * disables it when reordering is detected 849 * disables it when reordering is detected
749 */ 850 */
750void tcp_disable_fack(struct tcp_sock *tp) 851static void tcp_disable_fack(struct tcp_sock *tp)
751{ 852{
752 /* RFC3517 uses different metric in lost marker => reset on change */ 853 /* RFC3517 uses different metric in lost marker => reset on change */
753 if (tcp_is_fack(tp)) 854 if (tcp_is_fack(tp))
754 tp->lost_skb_hint = NULL; 855 tp->lost_skb_hint = NULL;
755 tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED; 856 tp->rx_opt.sack_ok &= ~2;
756} 857}
757 858
758/* Take a notice that peer is sending D-SACKs */ 859/* Take a notice that peer is sending D-SACKs */
759static void tcp_dsack_seen(struct tcp_sock *tp) 860static void tcp_dsack_seen(struct tcp_sock *tp)
760{ 861{
761 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; 862 tp->rx_opt.sack_ok |= 4;
863}
864
865/* Initialize metrics on socket. */
866
867static void tcp_init_metrics(struct sock *sk)
868{
869 struct tcp_sock *tp = tcp_sk(sk);
870 struct dst_entry *dst = __sk_dst_get(sk);
871
872 if (dst == NULL)
873 goto reset;
874
875 dst_confirm(dst);
876
877 if (dst_metric_locked(dst, RTAX_CWND))
878 tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND);
879 if (dst_metric(dst, RTAX_SSTHRESH)) {
880 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
881 if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
882 tp->snd_ssthresh = tp->snd_cwnd_clamp;
883 } else {
884 /* ssthresh may have been reduced unnecessarily during.
885 * 3WHS. Restore it back to its initial default.
886 */
887 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
888 }
889 if (dst_metric(dst, RTAX_REORDERING) &&
890 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
891 tcp_disable_fack(tp);
892 tp->reordering = dst_metric(dst, RTAX_REORDERING);
893 }
894
895 if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0)
896 goto reset;
897
898 /* Initial rtt is determined from SYN,SYN-ACK.
899 * The segment is small and rtt may appear much
900 * less than real one. Use per-dst memory
901 * to make it more realistic.
902 *
903 * A bit of theory. RTT is time passed after "normal" sized packet
904 * is sent until it is ACKed. In normal circumstances sending small
905 * packets force peer to delay ACKs and calculation is correct too.
906 * The algorithm is adaptive and, provided we follow specs, it
907 * NEVER underestimate RTT. BUT! If peer tries to make some clever
908 * tricks sort of "quick acks" for time long enough to decrease RTT
909 * to low value, and then abruptly stops to do it and starts to delay
910 * ACKs, wait for troubles.
911 */
912 if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) {
913 tp->srtt = dst_metric_rtt(dst, RTAX_RTT);
914 tp->rtt_seq = tp->snd_nxt;
915 }
916 if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) {
917 tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR);
918 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
919 }
920 tcp_set_rto(sk);
921reset:
922 if (tp->srtt == 0) {
923 /* RFC2988bis: We've failed to get a valid RTT sample from
924 * 3WHS. This is most likely due to retransmission,
925 * including spurious one. Reset the RTO back to 3secs
926 * from the more aggressive 1sec to avoid more spurious
927 * retransmission.
928 */
929 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
930 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
931 }
932 /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
933 * retransmitted. In light of RFC2988bis' more aggressive 1sec
934 * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
935 * retransmission has occurred.
936 */
937 if (tp->total_retrans > 1)
938 tp->snd_cwnd = 1;
939 else
940 tp->snd_cwnd = tcp_init_cwnd(tp, dst);
941 tp->snd_cwnd_stamp = tcp_time_stamp;
762} 942}
763 943
764static void tcp_update_reordering(struct sock *sk, const int metric, 944static void tcp_update_reordering(struct sock *sk, const int metric,
@@ -782,18 +962,15 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
782 962
783 NET_INC_STATS_BH(sock_net(sk), mib_idx); 963 NET_INC_STATS_BH(sock_net(sk), mib_idx);
784#if FASTRETRANS_DEBUG > 1 964#if FASTRETRANS_DEBUG > 1
785 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", 965 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
786 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, 966 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
787 tp->reordering, 967 tp->reordering,
788 tp->fackets_out, 968 tp->fackets_out,
789 tp->sacked_out, 969 tp->sacked_out,
790 tp->undo_marker ? tp->undo_retrans : 0); 970 tp->undo_marker ? tp->undo_retrans : 0);
791#endif 971#endif
792 tcp_disable_fack(tp); 972 tcp_disable_fack(tp);
793 } 973 }
794
795 if (metric > 0)
796 tcp_disable_early_retrans(tp);
797} 974}
798 975
799/* This must be called before lost_out is incremented */ 976/* This must be called before lost_out is incremented */
@@ -851,11 +1028,13 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
851 * These 6 states form finite state machine, controlled by the following events: 1028 * These 6 states form finite state machine, controlled by the following events:
852 * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) 1029 * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
853 * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) 1030 * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
854 * 3. Loss detection event of two flavors: 1031 * 3. Loss detection event of one of three flavors:
855 * A. Scoreboard estimator decided the packet is lost. 1032 * A. Scoreboard estimator decided the packet is lost.
856 * A'. Reno "three dupacks" marks head of queue lost. 1033 * A'. Reno "three dupacks" marks head of queue lost.
857 * A''. Its FACK modification, head until snd.fack is lost. 1034 * A''. Its FACK modfication, head until snd.fack is lost.
858 * B. SACK arrives sacking SND.NXT at the moment, when the 1035 * B. SACK arrives sacking data transmitted after never retransmitted
1036 * hole was sent out.
1037 * C. SACK arrives sacking SND.NXT at the moment, when the
859 * segment was retransmitted. 1038 * segment was retransmitted.
860 * 4. D-SACK added new rule: D-SACK changes any tag to S. 1039 * 4. D-SACK added new rule: D-SACK changes any tag to S.
861 * 1040 *
@@ -924,36 +1103,36 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
924 * the exact amount is rather hard to quantify. However, tp->max_window can 1103 * the exact amount is rather hard to quantify. However, tp->max_window can
925 * be used as an exaggerated estimate. 1104 * be used as an exaggerated estimate.
926 */ 1105 */
927static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack, 1106static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
928 u32 start_seq, u32 end_seq) 1107 u32 start_seq, u32 end_seq)
929{ 1108{
930 /* Too far in future, or reversed (interpretation is ambiguous) */ 1109 /* Too far in future, or reversed (interpretation is ambiguous) */
931 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq)) 1110 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
932 return false; 1111 return 0;
933 1112
934 /* Nasty start_seq wrap-around check (see comments above) */ 1113 /* Nasty start_seq wrap-around check (see comments above) */
935 if (!before(start_seq, tp->snd_nxt)) 1114 if (!before(start_seq, tp->snd_nxt))
936 return false; 1115 return 0;
937 1116
938 /* In outstanding window? ...This is valid exit for D-SACKs too. 1117 /* In outstanding window? ...This is valid exit for D-SACKs too.
939 * start_seq == snd_una is non-sensical (see comments above) 1118 * start_seq == snd_una is non-sensical (see comments above)
940 */ 1119 */
941 if (after(start_seq, tp->snd_una)) 1120 if (after(start_seq, tp->snd_una))
942 return true; 1121 return 1;
943 1122
944 if (!is_dsack || !tp->undo_marker) 1123 if (!is_dsack || !tp->undo_marker)
945 return false; 1124 return 0;
946 1125
947 /* ...Then it's D-SACK, and must reside below snd_una completely */ 1126 /* ...Then it's D-SACK, and must reside below snd_una completely */
948 if (after(end_seq, tp->snd_una)) 1127 if (after(end_seq, tp->snd_una))
949 return false; 1128 return 0;
950 1129
951 if (!before(start_seq, tp->undo_marker)) 1130 if (!before(start_seq, tp->undo_marker))
952 return true; 1131 return 1;
953 1132
954 /* Too old */ 1133 /* Too old */
955 if (!after(end_seq, tp->undo_marker)) 1134 if (!after(end_seq, tp->undo_marker))
956 return false; 1135 return 0;
957 1136
958 /* Undo_marker boundary crossing (overestimates a lot). Known already: 1137 /* Undo_marker boundary crossing (overestimates a lot). Known already:
959 * start_seq < undo_marker and end_seq >= undo_marker. 1138 * start_seq < undo_marker and end_seq >= undo_marker.
@@ -962,7 +1141,7 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
962} 1141}
963 1142
964/* Check for lost retransmit. This superb idea is borrowed from "ratehalving". 1143/* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
965 * Event "B". Later note: FACK people cheated me again 8), we have to account 1144 * Event "C". Later note: FACK people cheated me again 8), we have to account
966 * for reordering! Ugly, but should help. 1145 * for reordering! Ugly, but should help.
967 * 1146 *
968 * Search retransmitted skbs from write_queue that were sent when snd_nxt was 1147 * Search retransmitted skbs from write_queue that were sent when snd_nxt was
@@ -1025,17 +1204,17 @@ static void tcp_mark_lost_retrans(struct sock *sk)
1025 tp->lost_retrans_low = new_low_seq; 1204 tp->lost_retrans_low = new_low_seq;
1026} 1205}
1027 1206
1028static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, 1207static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
1029 struct tcp_sack_block_wire *sp, int num_sacks, 1208 struct tcp_sack_block_wire *sp, int num_sacks,
1030 u32 prior_snd_una) 1209 u32 prior_snd_una)
1031{ 1210{
1032 struct tcp_sock *tp = tcp_sk(sk); 1211 struct tcp_sock *tp = tcp_sk(sk);
1033 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq); 1212 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1034 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq); 1213 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1035 bool dup_sack = false; 1214 int dup_sack = 0;
1036 1215
1037 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { 1216 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1038 dup_sack = true; 1217 dup_sack = 1;
1039 tcp_dsack_seen(tp); 1218 tcp_dsack_seen(tp);
1040 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV); 1219 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1041 } else if (num_sacks > 1) { 1220 } else if (num_sacks > 1) {
@@ -1044,7 +1223,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1044 1223
1045 if (!after(end_seq_0, end_seq_1) && 1224 if (!after(end_seq_0, end_seq_1) &&
1046 !before(start_seq_0, start_seq_1)) { 1225 !before(start_seq_0, start_seq_1)) {
1047 dup_sack = true; 1226 dup_sack = 1;
1048 tcp_dsack_seen(tp); 1227 tcp_dsack_seen(tp);
1049 NET_INC_STATS_BH(sock_net(sk), 1228 NET_INC_STATS_BH(sock_net(sk),
1050 LINUX_MIB_TCPDSACKOFORECV); 1229 LINUX_MIB_TCPDSACKOFORECV);
@@ -1075,10 +1254,9 @@ struct tcp_sacktag_state {
1075 * FIXME: this could be merged to shift decision code 1254 * FIXME: this could be merged to shift decision code
1076 */ 1255 */
1077static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, 1256static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1078 u32 start_seq, u32 end_seq) 1257 u32 start_seq, u32 end_seq)
1079{ 1258{
1080 int err; 1259 int in_sack, err;
1081 bool in_sack;
1082 unsigned int pkt_len; 1260 unsigned int pkt_len;
1083 unsigned int mss; 1261 unsigned int mss;
1084 1262
@@ -1120,26 +1298,25 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1120 return in_sack; 1298 return in_sack;
1121} 1299}
1122 1300
1123/* Mark the given newly-SACKed range as such, adjusting counters and hints. */ 1301static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
1124static u8 tcp_sacktag_one(struct sock *sk, 1302 struct tcp_sacktag_state *state,
1125 struct tcp_sacktag_state *state, u8 sacked, 1303 int dup_sack, int pcount)
1126 u32 start_seq, u32 end_seq,
1127 bool dup_sack, int pcount)
1128{ 1304{
1129 struct tcp_sock *tp = tcp_sk(sk); 1305 struct tcp_sock *tp = tcp_sk(sk);
1306 u8 sacked = TCP_SKB_CB(skb)->sacked;
1130 int fack_count = state->fack_count; 1307 int fack_count = state->fack_count;
1131 1308
1132 /* Account D-SACK for retransmitted packet. */ 1309 /* Account D-SACK for retransmitted packet. */
1133 if (dup_sack && (sacked & TCPCB_RETRANS)) { 1310 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1134 if (tp->undo_marker && tp->undo_retrans && 1311 if (tp->undo_marker && tp->undo_retrans &&
1135 after(end_seq, tp->undo_marker)) 1312 after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
1136 tp->undo_retrans--; 1313 tp->undo_retrans--;
1137 if (sacked & TCPCB_SACKED_ACKED) 1314 if (sacked & TCPCB_SACKED_ACKED)
1138 state->reord = min(fack_count, state->reord); 1315 state->reord = min(fack_count, state->reord);
1139 } 1316 }
1140 1317
1141 /* Nothing to do; acked frame is about to be dropped (was ACKed). */ 1318 /* Nothing to do; acked frame is about to be dropped (was ACKed). */
1142 if (!after(end_seq, tp->snd_una)) 1319 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1143 return sacked; 1320 return sacked;
1144 1321
1145 if (!(sacked & TCPCB_SACKED_ACKED)) { 1322 if (!(sacked & TCPCB_SACKED_ACKED)) {
@@ -1158,13 +1335,13 @@ static u8 tcp_sacktag_one(struct sock *sk,
1158 /* New sack for not retransmitted frame, 1335 /* New sack for not retransmitted frame,
1159 * which was in hole. It is reordering. 1336 * which was in hole. It is reordering.
1160 */ 1337 */
1161 if (before(start_seq, 1338 if (before(TCP_SKB_CB(skb)->seq,
1162 tcp_highest_sack_seq(tp))) 1339 tcp_highest_sack_seq(tp)))
1163 state->reord = min(fack_count, 1340 state->reord = min(fack_count,
1164 state->reord); 1341 state->reord);
1165 1342
1166 /* SACK enhanced F-RTO (RFC4138; Appendix B) */ 1343 /* SACK enhanced F-RTO (RFC4138; Appendix B) */
1167 if (!after(end_seq, tp->frto_highmark)) 1344 if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
1168 state->flag |= FLAG_ONLY_ORIG_SACKED; 1345 state->flag |= FLAG_ONLY_ORIG_SACKED;
1169 } 1346 }
1170 1347
@@ -1182,7 +1359,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
1182 1359
1183 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ 1360 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
1184 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && 1361 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1185 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) 1362 before(TCP_SKB_CB(skb)->seq,
1363 TCP_SKB_CB(tp->lost_skb_hint)->seq))
1186 tp->lost_cnt_hint += pcount; 1364 tp->lost_cnt_hint += pcount;
1187 1365
1188 if (fack_count > tp->fackets_out) 1366 if (fack_count > tp->fackets_out)
@@ -1201,30 +1379,16 @@ static u8 tcp_sacktag_one(struct sock *sk,
1201 return sacked; 1379 return sacked;
1202} 1380}
1203 1381
1204/* Shift newly-SACKed bytes from this skb to the immediately previous 1382static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1205 * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. 1383 struct tcp_sacktag_state *state,
1206 */ 1384 unsigned int pcount, int shifted, int mss,
1207static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, 1385 int dup_sack)
1208 struct tcp_sacktag_state *state,
1209 unsigned int pcount, int shifted, int mss,
1210 bool dup_sack)
1211{ 1386{
1212 struct tcp_sock *tp = tcp_sk(sk); 1387 struct tcp_sock *tp = tcp_sk(sk);
1213 struct sk_buff *prev = tcp_write_queue_prev(sk, skb); 1388 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1214 u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
1215 u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
1216 1389
1217 BUG_ON(!pcount); 1390 BUG_ON(!pcount);
1218 1391
1219 /* Adjust counters and hints for the newly sacked sequence
1220 * range but discard the return value since prev is already
1221 * marked. We must tag the range first because the seq
1222 * advancement below implicitly advances
1223 * tcp_highest_sack_seq() when skb is highest_sack.
1224 */
1225 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1226 start_seq, end_seq, dup_sack, pcount);
1227
1228 if (skb == tp->lost_skb_hint) 1392 if (skb == tp->lost_skb_hint)
1229 tp->lost_cnt_hint += pcount; 1393 tp->lost_cnt_hint += pcount;
1230 1394
@@ -1251,13 +1415,16 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1251 skb_shinfo(skb)->gso_type = 0; 1415 skb_shinfo(skb)->gso_type = 0;
1252 } 1416 }
1253 1417
1418 /* We discard results */
1419 tcp_sacktag_one(skb, sk, state, dup_sack, pcount);
1420
1254 /* Difference in this won't matter, both ACKed by the same cumul. ACK */ 1421 /* Difference in this won't matter, both ACKed by the same cumul. ACK */
1255 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); 1422 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1256 1423
1257 if (skb->len > 0) { 1424 if (skb->len > 0) {
1258 BUG_ON(!tcp_skb_pcount(skb)); 1425 BUG_ON(!tcp_skb_pcount(skb));
1259 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED); 1426 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1260 return false; 1427 return 0;
1261 } 1428 }
1262 1429
1263 /* Whole SKB was eaten :-) */ 1430 /* Whole SKB was eaten :-) */
@@ -1271,7 +1438,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1271 tp->lost_cnt_hint -= tcp_skb_pcount(prev); 1438 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1272 } 1439 }
1273 1440
1274 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; 1441 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags;
1275 if (skb == tcp_highest_sack(sk)) 1442 if (skb == tcp_highest_sack(sk))
1276 tcp_advance_highest_sack(sk, skb); 1443 tcp_advance_highest_sack(sk, skb);
1277 1444
@@ -1280,19 +1447,19 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1280 1447
1281 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED); 1448 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1282 1449
1283 return true; 1450 return 1;
1284} 1451}
1285 1452
1286/* I wish gso_size would have a bit more sane initialization than 1453/* I wish gso_size would have a bit more sane initialization than
1287 * something-or-zero which complicates things 1454 * something-or-zero which complicates things
1288 */ 1455 */
1289static int tcp_skb_seglen(const struct sk_buff *skb) 1456static int tcp_skb_seglen(struct sk_buff *skb)
1290{ 1457{
1291 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb); 1458 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1292} 1459}
1293 1460
1294/* Shifting pages past head area doesn't work */ 1461/* Shifting pages past head area doesn't work */
1295static int skb_can_shift(const struct sk_buff *skb) 1462static int skb_can_shift(struct sk_buff *skb)
1296{ 1463{
1297 return !skb_headlen(skb) && skb_is_nonlinear(skb); 1464 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1298} 1465}
@@ -1303,7 +1470,7 @@ static int skb_can_shift(const struct sk_buff *skb)
1303static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, 1470static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1304 struct tcp_sacktag_state *state, 1471 struct tcp_sacktag_state *state,
1305 u32 start_seq, u32 end_seq, 1472 u32 start_seq, u32 end_seq,
1306 bool dup_sack) 1473 int dup_sack)
1307{ 1474{
1308 struct tcp_sock *tp = tcp_sk(sk); 1475 struct tcp_sock *tp = tcp_sk(sk);
1309 struct sk_buff *prev; 1476 struct sk_buff *prev;
@@ -1398,10 +1565,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1398 } 1565 }
1399 } 1566 }
1400 1567
1401 /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */
1402 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1403 goto fallback;
1404
1405 if (!skb_shift(prev, skb, len)) 1568 if (!skb_shift(prev, skb, len))
1406 goto fallback; 1569 goto fallback;
1407 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) 1570 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
@@ -1442,14 +1605,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1442 struct tcp_sack_block *next_dup, 1605 struct tcp_sack_block *next_dup,
1443 struct tcp_sacktag_state *state, 1606 struct tcp_sacktag_state *state,
1444 u32 start_seq, u32 end_seq, 1607 u32 start_seq, u32 end_seq,
1445 bool dup_sack_in) 1608 int dup_sack_in)
1446{ 1609{
1447 struct tcp_sock *tp = tcp_sk(sk); 1610 struct tcp_sock *tp = tcp_sk(sk);
1448 struct sk_buff *tmp; 1611 struct sk_buff *tmp;
1449 1612
1450 tcp_for_write_queue_from(skb, sk) { 1613 tcp_for_write_queue_from(skb, sk) {
1451 int in_sack = 0; 1614 int in_sack = 0;
1452 bool dup_sack = dup_sack_in; 1615 int dup_sack = dup_sack_in;
1453 1616
1454 if (skb == tcp_send_head(sk)) 1617 if (skb == tcp_send_head(sk))
1455 break; 1618 break;
@@ -1464,7 +1627,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1464 next_dup->start_seq, 1627 next_dup->start_seq,
1465 next_dup->end_seq); 1628 next_dup->end_seq);
1466 if (in_sack > 0) 1629 if (in_sack > 0)
1467 dup_sack = true; 1630 dup_sack = 1;
1468 } 1631 }
1469 1632
1470 /* skb reference here is a bit tricky to get right, since 1633 /* skb reference here is a bit tricky to get right, since
@@ -1492,14 +1655,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1492 break; 1655 break;
1493 1656
1494 if (in_sack) { 1657 if (in_sack) {
1495 TCP_SKB_CB(skb)->sacked = 1658 TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk,
1496 tcp_sacktag_one(sk, 1659 state,
1497 state, 1660 dup_sack,
1498 TCP_SKB_CB(skb)->sacked, 1661 tcp_skb_pcount(skb));
1499 TCP_SKB_CB(skb)->seq,
1500 TCP_SKB_CB(skb)->end_seq,
1501 dup_sack,
1502 tcp_skb_pcount(skb));
1503 1662
1504 if (!before(TCP_SKB_CB(skb)->seq, 1663 if (!before(TCP_SKB_CB(skb)->seq,
1505 tcp_highest_sack_seq(tp))) 1664 tcp_highest_sack_seq(tp)))
@@ -1549,19 +1708,19 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1549 return skb; 1708 return skb;
1550} 1709}
1551 1710
1552static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache) 1711static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
1553{ 1712{
1554 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache); 1713 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1555} 1714}
1556 1715
1557static int 1716static int
1558tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, 1717tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1559 u32 prior_snd_una) 1718 u32 prior_snd_una)
1560{ 1719{
1561 const struct inet_connection_sock *icsk = inet_csk(sk); 1720 const struct inet_connection_sock *icsk = inet_csk(sk);
1562 struct tcp_sock *tp = tcp_sk(sk); 1721 struct tcp_sock *tp = tcp_sk(sk);
1563 const unsigned char *ptr = (skb_transport_header(ack_skb) + 1722 unsigned char *ptr = (skb_transport_header(ack_skb) +
1564 TCP_SKB_CB(ack_skb)->sacked); 1723 TCP_SKB_CB(ack_skb)->sacked);
1565 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); 1724 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1566 struct tcp_sack_block sp[TCP_NUM_SACKS]; 1725 struct tcp_sack_block sp[TCP_NUM_SACKS];
1567 struct tcp_sack_block *cache; 1726 struct tcp_sack_block *cache;
@@ -1569,7 +1728,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1569 struct sk_buff *skb; 1728 struct sk_buff *skb;
1570 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); 1729 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1571 int used_sacks; 1730 int used_sacks;
1572 bool found_dup_sack = false; 1731 int found_dup_sack = 0;
1573 int i, j; 1732 int i, j;
1574 int first_sack_index; 1733 int first_sack_index;
1575 1734
@@ -1600,7 +1759,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1600 used_sacks = 0; 1759 used_sacks = 0;
1601 first_sack_index = 0; 1760 first_sack_index = 0;
1602 for (i = 0; i < num_sacks; i++) { 1761 for (i = 0; i < num_sacks; i++) {
1603 bool dup_sack = !i && found_dup_sack; 1762 int dup_sack = !i && found_dup_sack;
1604 1763
1605 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq); 1764 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1606 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq); 1765 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
@@ -1667,12 +1826,16 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1667 while (i < used_sacks) { 1826 while (i < used_sacks) {
1668 u32 start_seq = sp[i].start_seq; 1827 u32 start_seq = sp[i].start_seq;
1669 u32 end_seq = sp[i].end_seq; 1828 u32 end_seq = sp[i].end_seq;
1670 bool dup_sack = (found_dup_sack && (i == first_sack_index)); 1829 int dup_sack = (found_dup_sack && (i == first_sack_index));
1671 struct tcp_sack_block *next_dup = NULL; 1830 struct tcp_sack_block *next_dup = NULL;
1672 1831
1673 if (found_dup_sack && ((i + 1) == first_sack_index)) 1832 if (found_dup_sack && ((i + 1) == first_sack_index))
1674 next_dup = &sp[i + 1]; 1833 next_dup = &sp[i + 1];
1675 1834
1835 /* Event "B" in the comment above. */
1836 if (after(end_seq, tp->high_seq))
1837 state.flag |= FLAG_DATA_LOST;
1838
1676 /* Skip too early cached blocks */ 1839 /* Skip too early cached blocks */
1677 while (tcp_sack_cache_ok(tp, cache) && 1840 while (tcp_sack_cache_ok(tp, cache) &&
1678 !before(start_seq, cache->end_seq)) 1841 !before(start_seq, cache->end_seq))
@@ -1769,9 +1932,9 @@ out:
1769} 1932}
1770 1933
1771/* Limits sacked_out so that sum with lost_out isn't ever larger than 1934/* Limits sacked_out so that sum with lost_out isn't ever larger than
1772 * packets_out. Returns false if sacked_out adjustement wasn't necessary. 1935 * packets_out. Returns zero if sacked_out adjustement wasn't necessary.
1773 */ 1936 */
1774static bool tcp_limit_reno_sacked(struct tcp_sock *tp) 1937static int tcp_limit_reno_sacked(struct tcp_sock *tp)
1775{ 1938{
1776 u32 holes; 1939 u32 holes;
1777 1940
@@ -1780,9 +1943,9 @@ static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1780 1943
1781 if ((tp->sacked_out + holes) > tp->packets_out) { 1944 if ((tp->sacked_out + holes) > tp->packets_out) {
1782 tp->sacked_out = tp->packets_out - holes; 1945 tp->sacked_out = tp->packets_out - holes;
1783 return true; 1946 return 1;
1784 } 1947 }
1785 return false; 1948 return 0;
1786} 1949}
1787 1950
1788/* If we receive more dupacks than we expected counting segments 1951/* If we receive more dupacks than we expected counting segments
@@ -1836,40 +1999,40 @@ static int tcp_is_sackfrto(const struct tcp_sock *tp)
1836/* F-RTO can only be used if TCP has never retransmitted anything other than 1999/* F-RTO can only be used if TCP has never retransmitted anything other than
1837 * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) 2000 * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
1838 */ 2001 */
1839bool tcp_use_frto(struct sock *sk) 2002int tcp_use_frto(struct sock *sk)
1840{ 2003{
1841 const struct tcp_sock *tp = tcp_sk(sk); 2004 const struct tcp_sock *tp = tcp_sk(sk);
1842 const struct inet_connection_sock *icsk = inet_csk(sk); 2005 const struct inet_connection_sock *icsk = inet_csk(sk);
1843 struct sk_buff *skb; 2006 struct sk_buff *skb;
1844 2007
1845 if (!sysctl_tcp_frto) 2008 if (!sysctl_tcp_frto)
1846 return false; 2009 return 0;
1847 2010
1848 /* MTU probe and F-RTO won't really play nicely along currently */ 2011 /* MTU probe and F-RTO won't really play nicely along currently */
1849 if (icsk->icsk_mtup.probe_size) 2012 if (icsk->icsk_mtup.probe_size)
1850 return false; 2013 return 0;
1851 2014
1852 if (tcp_is_sackfrto(tp)) 2015 if (tcp_is_sackfrto(tp))
1853 return true; 2016 return 1;
1854 2017
1855 /* Avoid expensive walking of rexmit queue if possible */ 2018 /* Avoid expensive walking of rexmit queue if possible */
1856 if (tp->retrans_out > 1) 2019 if (tp->retrans_out > 1)
1857 return false; 2020 return 0;
1858 2021
1859 skb = tcp_write_queue_head(sk); 2022 skb = tcp_write_queue_head(sk);
1860 if (tcp_skb_is_last(sk, skb)) 2023 if (tcp_skb_is_last(sk, skb))
1861 return true; 2024 return 1;
1862 skb = tcp_write_queue_next(sk, skb); /* Skips head */ 2025 skb = tcp_write_queue_next(sk, skb); /* Skips head */
1863 tcp_for_write_queue_from(skb, sk) { 2026 tcp_for_write_queue_from(skb, sk) {
1864 if (skb == tcp_send_head(sk)) 2027 if (skb == tcp_send_head(sk))
1865 break; 2028 break;
1866 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) 2029 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1867 return false; 2030 return 0;
1868 /* Short-circuit when first non-SACKed skb has been checked */ 2031 /* Short-circuit when first non-SACKed skb has been checked */
1869 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 2032 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1870 break; 2033 break;
1871 } 2034 }
1872 return true; 2035 return 1;
1873} 2036}
1874 2037
1875/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO 2038/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
@@ -2105,7 +2268,7 @@ void tcp_enter_loss(struct sock *sk, int how)
2105 * 2268 *
2106 * Do processing similar to RTO timeout. 2269 * Do processing similar to RTO timeout.
2107 */ 2270 */
2108static bool tcp_check_sack_reneging(struct sock *sk, int flag) 2271static int tcp_check_sack_reneging(struct sock *sk, int flag)
2109{ 2272{
2110 if (flag & FLAG_SACK_RENEGING) { 2273 if (flag & FLAG_SACK_RENEGING) {
2111 struct inet_connection_sock *icsk = inet_csk(sk); 2274 struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2116,12 +2279,12 @@ static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2116 tcp_retransmit_skb(sk, tcp_write_queue_head(sk)); 2279 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
2117 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 2280 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2118 icsk->icsk_rto, TCP_RTO_MAX); 2281 icsk->icsk_rto, TCP_RTO_MAX);
2119 return true; 2282 return 1;
2120 } 2283 }
2121 return false; 2284 return 0;
2122} 2285}
2123 2286
2124static inline int tcp_fackets_out(const struct tcp_sock *tp) 2287static inline int tcp_fackets_out(struct tcp_sock *tp)
2125{ 2288{
2126 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out; 2289 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2127} 2290}
@@ -2141,41 +2304,19 @@ static inline int tcp_fackets_out(const struct tcp_sock *tp)
2141 * they differ. Since neither occurs due to loss, TCP should really 2304 * they differ. Since neither occurs due to loss, TCP should really
2142 * ignore them. 2305 * ignore them.
2143 */ 2306 */
2144static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) 2307static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
2145{ 2308{
2146 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; 2309 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2147} 2310}
2148 2311
2149static bool tcp_pause_early_retransmit(struct sock *sk, int flag) 2312static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
2150{
2151 struct tcp_sock *tp = tcp_sk(sk);
2152 unsigned long delay;
2153
2154 /* Delay early retransmit and entering fast recovery for
2155 * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
2156 * available, or RTO is scheduled to fire first.
2157 */
2158 if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt)
2159 return false;
2160
2161 delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
2162 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2163 return false;
2164
2165 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX);
2166 tp->early_retrans_delayed = 1;
2167 return true;
2168}
2169
2170static inline int tcp_skb_timedout(const struct sock *sk,
2171 const struct sk_buff *skb)
2172{ 2313{
2173 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto; 2314 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
2174} 2315}
2175 2316
2176static inline int tcp_head_timedout(const struct sock *sk) 2317static inline int tcp_head_timedout(struct sock *sk)
2177{ 2318{
2178 const struct tcp_sock *tp = tcp_sk(sk); 2319 struct tcp_sock *tp = tcp_sk(sk);
2179 2320
2180 return tp->packets_out && 2321 return tp->packets_out &&
2181 tcp_skb_timedout(sk, tcp_write_queue_head(sk)); 2322 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
@@ -2274,28 +2415,28 @@ static inline int tcp_head_timedout(const struct sock *sk)
2274 * Main question: may we further continue forward transmission 2415 * Main question: may we further continue forward transmission
2275 * with the same cwnd? 2416 * with the same cwnd?
2276 */ 2417 */
2277static bool tcp_time_to_recover(struct sock *sk, int flag) 2418static int tcp_time_to_recover(struct sock *sk)
2278{ 2419{
2279 struct tcp_sock *tp = tcp_sk(sk); 2420 struct tcp_sock *tp = tcp_sk(sk);
2280 __u32 packets_out; 2421 __u32 packets_out;
2281 2422
2282 /* Do not perform any recovery during F-RTO algorithm */ 2423 /* Do not perform any recovery during F-RTO algorithm */
2283 if (tp->frto_counter) 2424 if (tp->frto_counter)
2284 return false; 2425 return 0;
2285 2426
2286 /* Trick#1: The loss is proven. */ 2427 /* Trick#1: The loss is proven. */
2287 if (tp->lost_out) 2428 if (tp->lost_out)
2288 return true; 2429 return 1;
2289 2430
2290 /* Not-A-Trick#2 : Classic rule... */ 2431 /* Not-A-Trick#2 : Classic rule... */
2291 if (tcp_dupack_heuristics(tp) > tp->reordering) 2432 if (tcp_dupack_heuristics(tp) > tp->reordering)
2292 return true; 2433 return 1;
2293 2434
2294 /* Trick#3 : when we use RFC2988 timer restart, fast 2435 /* Trick#3 : when we use RFC2988 timer restart, fast
2295 * retransmit can be triggered by timeout of queue head. 2436 * retransmit can be triggered by timeout of queue head.
2296 */ 2437 */
2297 if (tcp_is_fack(tp) && tcp_head_timedout(sk)) 2438 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
2298 return true; 2439 return 1;
2299 2440
2300 /* Trick#4: It is still not OK... But will it be useful to delay 2441 /* Trick#4: It is still not OK... But will it be useful to delay
2301 * recovery more? 2442 * recovery more?
@@ -2307,7 +2448,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2307 /* We have nothing to send. This connection is limited 2448 /* We have nothing to send. This connection is limited
2308 * either by receiver window or by application. 2449 * either by receiver window or by application.
2309 */ 2450 */
2310 return true; 2451 return 1;
2311 } 2452 }
2312 2453
2313 /* If a thin stream is detected, retransmit after first 2454 /* If a thin stream is detected, retransmit after first
@@ -2318,19 +2459,9 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2318 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) && 2459 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2319 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 && 2460 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2320 tcp_is_sack(tp) && !tcp_send_head(sk)) 2461 tcp_is_sack(tp) && !tcp_send_head(sk))
2321 return true; 2462 return 1;
2322 2463
2323 /* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious 2464 return 0;
2324 * retransmissions due to small network reorderings, we implement
2325 * Mitigation A.3 in the RFC and delay the retransmission for a short
2326 * interval if appropriate.
2327 */
2328 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2329 (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) &&
2330 !tcp_may_send_now(sk))
2331 return !tcp_pause_early_retransmit(sk, flag);
2332
2333 return false;
2334} 2465}
2335 2466
2336/* New heuristics: it is possible only after we switched to restart timer 2467/* New heuristics: it is possible only after we switched to restart timer
@@ -2371,11 +2502,8 @@ static void tcp_timeout_skbs(struct sock *sk)
2371 tcp_verify_left_out(tp); 2502 tcp_verify_left_out(tp);
2372} 2503}
2373 2504
2374/* Detect loss in event "A" above by marking head of queue up as lost. 2505/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
2375 * For FACK or non-SACK(Reno) senders, the first "packets" number of segments 2506 * is against sacked "cnt", otherwise it's against facked "cnt"
2376 * are considered lost. For RFC3517 SACK, a segment is considered lost if it
2377 * has at least tp->reordering SACKed seqments above it; "packets" refers to
2378 * the maximum SACKed segments to pass before reaching this limit.
2379 */ 2507 */
2380static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) 2508static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2381{ 2509{
@@ -2384,8 +2512,6 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2384 int cnt, oldcnt; 2512 int cnt, oldcnt;
2385 int err; 2513 int err;
2386 unsigned int mss; 2514 unsigned int mss;
2387 /* Use SACK to deduce losses of new sequences sent during recovery */
2388 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2389 2515
2390 WARN_ON(packets > tp->packets_out); 2516 WARN_ON(packets > tp->packets_out);
2391 if (tp->lost_skb_hint) { 2517 if (tp->lost_skb_hint) {
@@ -2407,7 +2533,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2407 tp->lost_skb_hint = skb; 2533 tp->lost_skb_hint = skb;
2408 tp->lost_cnt_hint = cnt; 2534 tp->lost_cnt_hint = cnt;
2409 2535
2410 if (after(TCP_SKB_CB(skb)->end_seq, loss_high)) 2536 if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
2411 break; 2537 break;
2412 2538
2413 oldcnt = cnt; 2539 oldcnt = cnt;
@@ -2417,7 +2543,6 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2417 2543
2418 if (cnt > packets) { 2544 if (cnt > packets) {
2419 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || 2545 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2420 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2421 (oldcnt >= packets)) 2546 (oldcnt >= packets))
2422 break; 2547 break;
2423 2548
@@ -2470,10 +2595,39 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2470 tp->snd_cwnd_stamp = tcp_time_stamp; 2595 tp->snd_cwnd_stamp = tcp_time_stamp;
2471} 2596}
2472 2597
2598/* Lower bound on congestion window is slow start threshold
2599 * unless congestion avoidance choice decides to overide it.
2600 */
2601static inline u32 tcp_cwnd_min(const struct sock *sk)
2602{
2603 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
2604
2605 return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
2606}
2607
2608/* Decrease cwnd each second ack. */
2609static void tcp_cwnd_down(struct sock *sk, int flag)
2610{
2611 struct tcp_sock *tp = tcp_sk(sk);
2612 int decr = tp->snd_cwnd_cnt + 1;
2613
2614 if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
2615 (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
2616 tp->snd_cwnd_cnt = decr & 1;
2617 decr >>= 1;
2618
2619 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2620 tp->snd_cwnd -= decr;
2621
2622 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2623 tp->snd_cwnd_stamp = tcp_time_stamp;
2624 }
2625}
2626
2473/* Nothing was retransmitted or returned timestamp is less 2627/* Nothing was retransmitted or returned timestamp is less
2474 * than timestamp of the first retransmission. 2628 * than timestamp of the first retransmission.
2475 */ 2629 */
2476static inline bool tcp_packet_delayed(const struct tcp_sock *tp) 2630static inline int tcp_packet_delayed(struct tcp_sock *tp)
2477{ 2631{
2478 return !tp->retrans_stamp || 2632 return !tp->retrans_stamp ||
2479 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 2633 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
@@ -2489,22 +2643,22 @@ static void DBGUNDO(struct sock *sk, const char *msg)
2489 struct inet_sock *inet = inet_sk(sk); 2643 struct inet_sock *inet = inet_sk(sk);
2490 2644
2491 if (sk->sk_family == AF_INET) { 2645 if (sk->sk_family == AF_INET) {
2492 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", 2646 printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2493 msg, 2647 msg,
2494 &inet->inet_daddr, ntohs(inet->inet_dport), 2648 &inet->inet_daddr, ntohs(inet->inet_dport),
2495 tp->snd_cwnd, tcp_left_out(tp), 2649 tp->snd_cwnd, tcp_left_out(tp),
2496 tp->snd_ssthresh, tp->prior_ssthresh, 2650 tp->snd_ssthresh, tp->prior_ssthresh,
2497 tp->packets_out); 2651 tp->packets_out);
2498 } 2652 }
2499#if IS_ENABLED(CONFIG_IPV6) 2653#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2500 else if (sk->sk_family == AF_INET6) { 2654 else if (sk->sk_family == AF_INET6) {
2501 struct ipv6_pinfo *np = inet6_sk(sk); 2655 struct ipv6_pinfo *np = inet6_sk(sk);
2502 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", 2656 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2503 msg, 2657 msg,
2504 &np->daddr, ntohs(inet->inet_dport), 2658 &np->daddr, ntohs(inet->inet_dport),
2505 tp->snd_cwnd, tcp_left_out(tp), 2659 tp->snd_cwnd, tcp_left_out(tp),
2506 tp->snd_ssthresh, tp->prior_ssthresh, 2660 tp->snd_ssthresh, tp->prior_ssthresh,
2507 tp->packets_out); 2661 tp->packets_out);
2508 } 2662 }
2509#endif 2663#endif
2510} 2664}
@@ -2534,13 +2688,13 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
2534 tp->snd_cwnd_stamp = tcp_time_stamp; 2688 tp->snd_cwnd_stamp = tcp_time_stamp;
2535} 2689}
2536 2690
2537static inline bool tcp_may_undo(const struct tcp_sock *tp) 2691static inline int tcp_may_undo(struct tcp_sock *tp)
2538{ 2692{
2539 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp)); 2693 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2540} 2694}
2541 2695
2542/* People celebrate: "We love our President!" */ 2696/* People celebrate: "We love our President!" */
2543static bool tcp_try_undo_recovery(struct sock *sk) 2697static int tcp_try_undo_recovery(struct sock *sk)
2544{ 2698{
2545 struct tcp_sock *tp = tcp_sk(sk); 2699 struct tcp_sock *tp = tcp_sk(sk);
2546 2700
@@ -2565,10 +2719,10 @@ static bool tcp_try_undo_recovery(struct sock *sk)
2565 * is ACKed. For Reno it is MUST to prevent false 2719 * is ACKed. For Reno it is MUST to prevent false
2566 * fast retransmits (RFC2582). SACK TCP is safe. */ 2720 * fast retransmits (RFC2582). SACK TCP is safe. */
2567 tcp_moderate_cwnd(tp); 2721 tcp_moderate_cwnd(tp);
2568 return true; 2722 return 1;
2569 } 2723 }
2570 tcp_set_ca_state(sk, TCP_CA_Open); 2724 tcp_set_ca_state(sk, TCP_CA_Open);
2571 return false; 2725 return 0;
2572} 2726}
2573 2727
2574/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ 2728/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
@@ -2598,19 +2752,19 @@ static void tcp_try_undo_dsack(struct sock *sk)
2598 * that successive retransmissions of a segment must not advance 2752 * that successive retransmissions of a segment must not advance
2599 * retrans_stamp under any conditions. 2753 * retrans_stamp under any conditions.
2600 */ 2754 */
2601static bool tcp_any_retrans_done(const struct sock *sk) 2755static int tcp_any_retrans_done(struct sock *sk)
2602{ 2756{
2603 const struct tcp_sock *tp = tcp_sk(sk); 2757 struct tcp_sock *tp = tcp_sk(sk);
2604 struct sk_buff *skb; 2758 struct sk_buff *skb;
2605 2759
2606 if (tp->retrans_out) 2760 if (tp->retrans_out)
2607 return true; 2761 return 1;
2608 2762
2609 skb = tcp_write_queue_head(sk); 2763 skb = tcp_write_queue_head(sk);
2610 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) 2764 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2611 return true; 2765 return 1;
2612 2766
2613 return false; 2767 return 0;
2614} 2768}
2615 2769
2616/* Undo during fast recovery after partial ACK. */ 2770/* Undo during fast recovery after partial ACK. */
@@ -2644,7 +2798,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
2644} 2798}
2645 2799
2646/* Undo during loss recovery after partial ACK. */ 2800/* Undo during loss recovery after partial ACK. */
2647static bool tcp_try_undo_loss(struct sock *sk) 2801static int tcp_try_undo_loss(struct sock *sk)
2648{ 2802{
2649 struct tcp_sock *tp = tcp_sk(sk); 2803 struct tcp_sock *tp = tcp_sk(sk);
2650 2804
@@ -2666,91 +2820,28 @@ static bool tcp_try_undo_loss(struct sock *sk)
2666 tp->undo_marker = 0; 2820 tp->undo_marker = 0;
2667 if (tcp_is_sack(tp)) 2821 if (tcp_is_sack(tp))
2668 tcp_set_ca_state(sk, TCP_CA_Open); 2822 tcp_set_ca_state(sk, TCP_CA_Open);
2669 return true; 2823 return 1;
2670 }
2671 return false;
2672}
2673
2674/* The cwnd reduction in CWR and Recovery use the PRR algorithm
2675 * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/
2676 * It computes the number of packets to send (sndcnt) based on packets newly
2677 * delivered:
2678 * 1) If the packets in flight is larger than ssthresh, PRR spreads the
2679 * cwnd reductions across a full RTT.
2680 * 2) If packets in flight is lower than ssthresh (such as due to excess
2681 * losses and/or application stalls), do not perform any further cwnd
2682 * reductions, but instead slow start up to ssthresh.
2683 */
2684static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
2685{
2686 struct tcp_sock *tp = tcp_sk(sk);
2687
2688 tp->high_seq = tp->snd_nxt;
2689 tp->bytes_acked = 0;
2690 tp->snd_cwnd_cnt = 0;
2691 tp->prior_cwnd = tp->snd_cwnd;
2692 tp->prr_delivered = 0;
2693 tp->prr_out = 0;
2694 if (set_ssthresh)
2695 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2696 TCP_ECN_queue_cwr(tp);
2697}
2698
2699static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
2700 int fast_rexmit)
2701{
2702 struct tcp_sock *tp = tcp_sk(sk);
2703 int sndcnt = 0;
2704 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2705
2706 tp->prr_delivered += newly_acked_sacked;
2707 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2708 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2709 tp->prior_cwnd - 1;
2710 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2711 } else {
2712 sndcnt = min_t(int, delta,
2713 max_t(int, tp->prr_delivered - tp->prr_out,
2714 newly_acked_sacked) + 1);
2715 } 2824 }
2716 2825 return 0;
2717 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2718 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2719} 2826}
2720 2827
2721static inline void tcp_end_cwnd_reduction(struct sock *sk) 2828static inline void tcp_complete_cwr(struct sock *sk)
2722{ 2829{
2723 struct tcp_sock *tp = tcp_sk(sk); 2830 struct tcp_sock *tp = tcp_sk(sk);
2724 2831 /* Do not moderate cwnd if it's already undone in cwr or recovery */
2725 /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ 2832 if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) {
2726 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
2727 (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
2728 tp->snd_cwnd = tp->snd_ssthresh; 2833 tp->snd_cwnd = tp->snd_ssthresh;
2729 tp->snd_cwnd_stamp = tcp_time_stamp; 2834 tp->snd_cwnd_stamp = tcp_time_stamp;
2730 } 2835 }
2731 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2836 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2732} 2837}
2733 2838
2734/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
2735void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
2736{
2737 struct tcp_sock *tp = tcp_sk(sk);
2738
2739 tp->prior_ssthresh = 0;
2740 tp->bytes_acked = 0;
2741 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2742 tp->undo_marker = 0;
2743 tcp_init_cwnd_reduction(sk, set_ssthresh);
2744 tcp_set_ca_state(sk, TCP_CA_CWR);
2745 }
2746}
2747
2748static void tcp_try_keep_open(struct sock *sk) 2839static void tcp_try_keep_open(struct sock *sk)
2749{ 2840{
2750 struct tcp_sock *tp = tcp_sk(sk); 2841 struct tcp_sock *tp = tcp_sk(sk);
2751 int state = TCP_CA_Open; 2842 int state = TCP_CA_Open;
2752 2843
2753 if (tcp_left_out(tp) || tcp_any_retrans_done(sk)) 2844 if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker)
2754 state = TCP_CA_Disorder; 2845 state = TCP_CA_Disorder;
2755 2846
2756 if (inet_csk(sk)->icsk_ca_state != state) { 2847 if (inet_csk(sk)->icsk_ca_state != state) {
@@ -2759,7 +2850,7 @@ static void tcp_try_keep_open(struct sock *sk)
2759 } 2850 }
2760} 2851}
2761 2852
2762static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) 2853static void tcp_try_to_open(struct sock *sk, int flag)
2763{ 2854{
2764 struct tcp_sock *tp = tcp_sk(sk); 2855 struct tcp_sock *tp = tcp_sk(sk);
2765 2856
@@ -2773,10 +2864,9 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
2773 2864
2774 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2865 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2775 tcp_try_keep_open(sk); 2866 tcp_try_keep_open(sk);
2776 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) 2867 tcp_moderate_cwnd(tp);
2777 tcp_moderate_cwnd(tp);
2778 } else { 2868 } else {
2779 tcp_cwnd_reduction(sk, newly_acked_sacked, 0); 2869 tcp_cwnd_down(sk, flag);
2780 } 2870 }
2781} 2871}
2782 2872
@@ -2858,30 +2948,6 @@ void tcp_simple_retransmit(struct sock *sk)
2858} 2948}
2859EXPORT_SYMBOL(tcp_simple_retransmit); 2949EXPORT_SYMBOL(tcp_simple_retransmit);
2860 2950
2861static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2862{
2863 struct tcp_sock *tp = tcp_sk(sk);
2864 int mib_idx;
2865
2866 if (tcp_is_reno(tp))
2867 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2868 else
2869 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2870
2871 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2872
2873 tp->prior_ssthresh = 0;
2874 tp->undo_marker = tp->snd_una;
2875 tp->undo_retrans = tp->retrans_out;
2876
2877 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2878 if (!ece_ack)
2879 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2880 tcp_init_cwnd_reduction(sk, true);
2881 }
2882 tcp_set_ca_state(sk, TCP_CA_Recovery);
2883}
2884
2885/* Process an event, which can update packets-in-flight not trivially. 2951/* Process an event, which can update packets-in-flight not trivially.
2886 * Main goal of this function is to calculate new estimate for left_out, 2952 * Main goal of this function is to calculate new estimate for left_out,
2887 * taking into account both packets sitting in receiver's buffer and 2953 * taking into account both packets sitting in receiver's buffer and
@@ -2893,16 +2959,14 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2893 * It does _not_ decide what to send, it is made in function 2959 * It does _not_ decide what to send, it is made in function
2894 * tcp_xmit_retransmit_queue(). 2960 * tcp_xmit_retransmit_queue().
2895 */ 2961 */
2896static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, 2962static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2897 int prior_sacked, bool is_dupack,
2898 int flag)
2899{ 2963{
2900 struct inet_connection_sock *icsk = inet_csk(sk); 2964 struct inet_connection_sock *icsk = inet_csk(sk);
2901 struct tcp_sock *tp = tcp_sk(sk); 2965 struct tcp_sock *tp = tcp_sk(sk);
2966 int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
2902 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && 2967 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2903 (tcp_fackets_out(tp) > tp->reordering)); 2968 (tcp_fackets_out(tp) > tp->reordering));
2904 int newly_acked_sacked = 0; 2969 int fast_rexmit = 0, mib_idx;
2905 int fast_rexmit = 0;
2906 2970
2907 if (WARN_ON(!tp->packets_out && tp->sacked_out)) 2971 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2908 tp->sacked_out = 0; 2972 tp->sacked_out = 0;
@@ -2918,10 +2982,19 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2918 if (tcp_check_sack_reneging(sk, flag)) 2982 if (tcp_check_sack_reneging(sk, flag))
2919 return; 2983 return;
2920 2984
2921 /* C. Check consistency of the current state. */ 2985 /* C. Process data loss notification, provided it is valid. */
2986 if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
2987 before(tp->snd_una, tp->high_seq) &&
2988 icsk->icsk_ca_state != TCP_CA_Open &&
2989 tp->fackets_out > tp->reordering) {
2990 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
2991 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
2992 }
2993
2994 /* D. Check consistency of the current state. */
2922 tcp_verify_left_out(tp); 2995 tcp_verify_left_out(tp);
2923 2996
2924 /* D. Check state exit conditions. State can be terminated 2997 /* E. Check state exit conditions. State can be terminated
2925 * when high_seq is ACKed. */ 2998 * when high_seq is ACKed. */
2926 if (icsk->icsk_ca_state == TCP_CA_Open) { 2999 if (icsk->icsk_ca_state == TCP_CA_Open) {
2927 WARN_ON(tp->retrans_out != 0); 3000 WARN_ON(tp->retrans_out != 0);
@@ -2938,7 +3011,18 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2938 /* CWR is to be held something *above* high_seq 3011 /* CWR is to be held something *above* high_seq
2939 * is ACKed for CWR bit to reach receiver. */ 3012 * is ACKed for CWR bit to reach receiver. */
2940 if (tp->snd_una != tp->high_seq) { 3013 if (tp->snd_una != tp->high_seq) {
2941 tcp_end_cwnd_reduction(sk); 3014 tcp_complete_cwr(sk);
3015 tcp_set_ca_state(sk, TCP_CA_Open);
3016 }
3017 break;
3018
3019 case TCP_CA_Disorder:
3020 tcp_try_undo_dsack(sk);
3021 if (!tp->undo_marker ||
3022 /* For SACK case do not Open to allow to undo
3023 * catching for all duplicate ACKs. */
3024 tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
3025 tp->undo_marker = 0;
2942 tcp_set_ca_state(sk, TCP_CA_Open); 3026 tcp_set_ca_state(sk, TCP_CA_Open);
2943 } 3027 }
2944 break; 3028 break;
@@ -2948,12 +3032,12 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2948 tcp_reset_reno_sack(tp); 3032 tcp_reset_reno_sack(tp);
2949 if (tcp_try_undo_recovery(sk)) 3033 if (tcp_try_undo_recovery(sk))
2950 return; 3034 return;
2951 tcp_end_cwnd_reduction(sk); 3035 tcp_complete_cwr(sk);
2952 break; 3036 break;
2953 } 3037 }
2954 } 3038 }
2955 3039
2956 /* E. Process state. */ 3040 /* F. Process state. */
2957 switch (icsk->icsk_ca_state) { 3041 switch (icsk->icsk_ca_state) {
2958 case TCP_CA_Recovery: 3042 case TCP_CA_Recovery:
2959 if (!(flag & FLAG_SND_UNA_ADVANCED)) { 3043 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
@@ -2961,7 +3045,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2961 tcp_add_reno_sack(sk); 3045 tcp_add_reno_sack(sk);
2962 } else 3046 } else
2963 do_lost = tcp_try_undo_partial(sk, pkts_acked); 3047 do_lost = tcp_try_undo_partial(sk, pkts_acked);
2964 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
2965 break; 3048 break;
2966 case TCP_CA_Loss: 3049 case TCP_CA_Loss:
2967 if (flag & FLAG_DATA_ACKED) 3050 if (flag & FLAG_DATA_ACKED)
@@ -2983,13 +3066,12 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2983 if (is_dupack) 3066 if (is_dupack)
2984 tcp_add_reno_sack(sk); 3067 tcp_add_reno_sack(sk);
2985 } 3068 }
2986 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
2987 3069
2988 if (icsk->icsk_ca_state <= TCP_CA_Disorder) 3070 if (icsk->icsk_ca_state == TCP_CA_Disorder)
2989 tcp_try_undo_dsack(sk); 3071 tcp_try_undo_dsack(sk);
2990 3072
2991 if (!tcp_time_to_recover(sk, flag)) { 3073 if (!tcp_time_to_recover(sk)) {
2992 tcp_try_to_open(sk, flag, newly_acked_sacked); 3074 tcp_try_to_open(sk, flag);
2993 return; 3075 return;
2994 } 3076 }
2995 3077
@@ -3005,13 +3087,35 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3005 } 3087 }
3006 3088
3007 /* Otherwise enter Recovery state */ 3089 /* Otherwise enter Recovery state */
3008 tcp_enter_recovery(sk, (flag & FLAG_ECE)); 3090
3091 if (tcp_is_reno(tp))
3092 mib_idx = LINUX_MIB_TCPRENORECOVERY;
3093 else
3094 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
3095
3096 NET_INC_STATS_BH(sock_net(sk), mib_idx);
3097
3098 tp->high_seq = tp->snd_nxt;
3099 tp->prior_ssthresh = 0;
3100 tp->undo_marker = tp->snd_una;
3101 tp->undo_retrans = tp->retrans_out;
3102
3103 if (icsk->icsk_ca_state < TCP_CA_CWR) {
3104 if (!(flag & FLAG_ECE))
3105 tp->prior_ssthresh = tcp_current_ssthresh(sk);
3106 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
3107 TCP_ECN_queue_cwr(tp);
3108 }
3109
3110 tp->bytes_acked = 0;
3111 tp->snd_cwnd_cnt = 0;
3112 tcp_set_ca_state(sk, TCP_CA_Recovery);
3009 fast_rexmit = 1; 3113 fast_rexmit = 1;
3010 } 3114 }
3011 3115
3012 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) 3116 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3013 tcp_update_scoreboard(sk, fast_rexmit); 3117 tcp_update_scoreboard(sk, fast_rexmit);
3014 tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); 3118 tcp_cwnd_down(sk, flag);
3015 tcp_xmit_retransmit_queue(sk); 3119 tcp_xmit_retransmit_queue(sk);
3016} 3120}
3017 3121
@@ -3086,53 +3190,16 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3086/* Restart timer after forward progress on connection. 3190/* Restart timer after forward progress on connection.
3087 * RFC2988 recommends to restart timer to now+rto. 3191 * RFC2988 recommends to restart timer to now+rto.
3088 */ 3192 */
3089void tcp_rearm_rto(struct sock *sk) 3193static void tcp_rearm_rto(struct sock *sk)
3090{ 3194{
3091 struct tcp_sock *tp = tcp_sk(sk); 3195 struct tcp_sock *tp = tcp_sk(sk);
3092 3196
3093 /* If the retrans timer is currently being used by Fast Open
3094 * for SYN-ACK retrans purpose, stay put.
3095 */
3096 if (tp->fastopen_rsk)
3097 return;
3098
3099 if (!tp->packets_out) { 3197 if (!tp->packets_out) {
3100 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 3198 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3101 } else { 3199 } else {
3102 u32 rto = inet_csk(sk)->icsk_rto; 3200 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3103 /* Offset the time elapsed after installing regular RTO */ 3201 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3104 if (tp->early_retrans_delayed) {
3105 struct sk_buff *skb = tcp_write_queue_head(sk);
3106 const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
3107 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
3108 /* delta may not be positive if the socket is locked
3109 * when the delayed ER timer fires and is rescheduled.
3110 */
3111 if (delta > 0)
3112 rto = delta;
3113 }
3114 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3115 TCP_RTO_MAX);
3116 } 3202 }
3117 tp->early_retrans_delayed = 0;
3118}
3119
3120/* This function is called when the delayed ER timer fires. TCP enters
3121 * fast recovery and performs fast-retransmit.
3122 */
3123void tcp_resume_early_retransmit(struct sock *sk)
3124{
3125 struct tcp_sock *tp = tcp_sk(sk);
3126
3127 tcp_rearm_rto(sk);
3128
3129 /* Stop if ER is disabled after the delayed ER timer is scheduled */
3130 if (!tp->do_early_retrans)
3131 return;
3132
3133 tcp_enter_recovery(sk, false);
3134 tcp_update_scoreboard(sk, 1);
3135 tcp_xmit_retransmit_queue(sk);
3136} 3203}
3137 3204
3138/* If we get here, the whole TSO packet has not been acked. */ 3205/* If we get here, the whole TSO packet has not been acked. */
@@ -3167,7 +3234,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3167 const struct inet_connection_sock *icsk = inet_csk(sk); 3234 const struct inet_connection_sock *icsk = inet_csk(sk);
3168 struct sk_buff *skb; 3235 struct sk_buff *skb;
3169 u32 now = tcp_time_stamp; 3236 u32 now = tcp_time_stamp;
3170 int fully_acked = true; 3237 int fully_acked = 1;
3171 int flag = 0; 3238 int flag = 0;
3172 u32 pkts_acked = 0; 3239 u32 pkts_acked = 0;
3173 u32 reord = tp->packets_out; 3240 u32 reord = tp->packets_out;
@@ -3191,7 +3258,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3191 if (!acked_pcount) 3258 if (!acked_pcount)
3192 break; 3259 break;
3193 3260
3194 fully_acked = false; 3261 fully_acked = 0;
3195 } else { 3262 } else {
3196 acked_pcount = tcp_skb_pcount(skb); 3263 acked_pcount = tcp_skb_pcount(skb);
3197 } 3264 }
@@ -3229,7 +3296,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3229 * connection startup slow start one packet too 3296 * connection startup slow start one packet too
3230 * quickly. This is severely frowned upon behavior. 3297 * quickly. This is severely frowned upon behavior.
3231 */ 3298 */
3232 if (!(scb->tcp_flags & TCPHDR_SYN)) { 3299 if (!(scb->flags & TCPHDR_SYN)) {
3233 flag |= FLAG_DATA_ACKED; 3300 flag |= FLAG_DATA_ACKED;
3234 } else { 3301 } else {
3235 flag |= FLAG_SYN_ACKED; 3302 flag |= FLAG_SYN_ACKED;
@@ -3308,18 +3375,18 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3308 if (!tp->packets_out && tcp_is_sack(tp)) { 3375 if (!tp->packets_out && tcp_is_sack(tp)) {
3309 icsk = inet_csk(sk); 3376 icsk = inet_csk(sk);
3310 if (tp->lost_out) { 3377 if (tp->lost_out) {
3311 pr_debug("Leak l=%u %d\n", 3378 printk(KERN_DEBUG "Leak l=%u %d\n",
3312 tp->lost_out, icsk->icsk_ca_state); 3379 tp->lost_out, icsk->icsk_ca_state);
3313 tp->lost_out = 0; 3380 tp->lost_out = 0;
3314 } 3381 }
3315 if (tp->sacked_out) { 3382 if (tp->sacked_out) {
3316 pr_debug("Leak s=%u %d\n", 3383 printk(KERN_DEBUG "Leak s=%u %d\n",
3317 tp->sacked_out, icsk->icsk_ca_state); 3384 tp->sacked_out, icsk->icsk_ca_state);
3318 tp->sacked_out = 0; 3385 tp->sacked_out = 0;
3319 } 3386 }
3320 if (tp->retrans_out) { 3387 if (tp->retrans_out) {
3321 pr_debug("Leak r=%u %d\n", 3388 printk(KERN_DEBUG "Leak r=%u %d\n",
3322 tp->retrans_out, icsk->icsk_ca_state); 3389 tp->retrans_out, icsk->icsk_ca_state);
3323 tp->retrans_out = 0; 3390 tp->retrans_out = 0;
3324 } 3391 }
3325 } 3392 }
@@ -3347,23 +3414,23 @@ static void tcp_ack_probe(struct sock *sk)
3347 } 3414 }
3348} 3415}
3349 3416
3350static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag) 3417static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
3351{ 3418{
3352 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || 3419 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3353 inet_csk(sk)->icsk_ca_state != TCP_CA_Open; 3420 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3354} 3421}
3355 3422
3356static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) 3423static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3357{ 3424{
3358 const struct tcp_sock *tp = tcp_sk(sk); 3425 const struct tcp_sock *tp = tcp_sk(sk);
3359 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && 3426 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3360 !tcp_in_cwnd_reduction(sk); 3427 !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
3361} 3428}
3362 3429
3363/* Check that window update is acceptable. 3430/* Check that window update is acceptable.
3364 * The function assumes that snd_una<=ack<=snd_next. 3431 * The function assumes that snd_una<=ack<=snd_next.
3365 */ 3432 */
3366static inline bool tcp_may_update_window(const struct tcp_sock *tp, 3433static inline int tcp_may_update_window(const struct tcp_sock *tp,
3367 const u32 ack, const u32 ack_seq, 3434 const u32 ack, const u32 ack_seq,
3368 const u32 nwin) 3435 const u32 nwin)
3369{ 3436{
@@ -3377,7 +3444,7 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3377 * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 3444 * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
3378 * and in FreeBSD. NetBSD's one is even worse.) is wrong. 3445 * and in FreeBSD. NetBSD's one is even worse.) is wrong.
3379 */ 3446 */
3380static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack, 3447static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
3381 u32 ack_seq) 3448 u32 ack_seq)
3382{ 3449{
3383 struct tcp_sock *tp = tcp_sk(sk); 3450 struct tcp_sock *tp = tcp_sk(sk);
@@ -3425,9 +3492,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3425} 3492}
3426 3493
3427/* A conservative spurious RTO response algorithm: reduce cwnd using 3494/* A conservative spurious RTO response algorithm: reduce cwnd using
3428 * PRR and continue in congestion avoidance. 3495 * rate halving and continue in congestion avoidance.
3429 */ 3496 */
3430static void tcp_cwr_spur_to_response(struct sock *sk) 3497static void tcp_ratehalving_spur_to_response(struct sock *sk)
3431{ 3498{
3432 tcp_enter_cwr(sk, 0); 3499 tcp_enter_cwr(sk, 0);
3433} 3500}
@@ -3435,7 +3502,7 @@ static void tcp_cwr_spur_to_response(struct sock *sk)
3435static void tcp_undo_spur_to_response(struct sock *sk, int flag) 3502static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3436{ 3503{
3437 if (flag & FLAG_ECE) 3504 if (flag & FLAG_ECE)
3438 tcp_cwr_spur_to_response(sk); 3505 tcp_ratehalving_spur_to_response(sk);
3439 else 3506 else
3440 tcp_undo_cwr(sk, true); 3507 tcp_undo_cwr(sk, true);
3441} 3508}
@@ -3470,7 +3537,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3470 * to prove that the RTO is indeed spurious. It transfers the control 3537 * to prove that the RTO is indeed spurious. It transfers the control
3471 * from F-RTO to the conventional RTO recovery 3538 * from F-RTO to the conventional RTO recovery
3472 */ 3539 */
3473static bool tcp_process_frto(struct sock *sk, int flag) 3540static int tcp_process_frto(struct sock *sk, int flag)
3474{ 3541{
3475 struct tcp_sock *tp = tcp_sk(sk); 3542 struct tcp_sock *tp = tcp_sk(sk);
3476 3543
@@ -3486,7 +3553,7 @@ static bool tcp_process_frto(struct sock *sk, int flag)
3486 3553
3487 if (!before(tp->snd_una, tp->frto_highmark)) { 3554 if (!before(tp->snd_una, tp->frto_highmark)) {
3488 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); 3555 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
3489 return true; 3556 return 1;
3490 } 3557 }
3491 3558
3492 if (!tcp_is_sackfrto(tp)) { 3559 if (!tcp_is_sackfrto(tp)) {
@@ -3495,19 +3562,19 @@ static bool tcp_process_frto(struct sock *sk, int flag)
3495 * data, winupdate 3562 * data, winupdate
3496 */ 3563 */
3497 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP)) 3564 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
3498 return true; 3565 return 1;
3499 3566
3500 if (!(flag & FLAG_DATA_ACKED)) { 3567 if (!(flag & FLAG_DATA_ACKED)) {
3501 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), 3568 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
3502 flag); 3569 flag);
3503 return true; 3570 return 1;
3504 } 3571 }
3505 } else { 3572 } else {
3506 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { 3573 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
3507 /* Prevent sending of new data. */ 3574 /* Prevent sending of new data. */
3508 tp->snd_cwnd = min(tp->snd_cwnd, 3575 tp->snd_cwnd = min(tp->snd_cwnd,
3509 tcp_packets_in_flight(tp)); 3576 tcp_packets_in_flight(tp));
3510 return true; 3577 return 1;
3511 } 3578 }
3512 3579
3513 if ((tp->frto_counter >= 2) && 3580 if ((tp->frto_counter >= 2) &&
@@ -3517,10 +3584,10 @@ static bool tcp_process_frto(struct sock *sk, int flag)
3517 /* RFC4138 shortcoming (see comment above) */ 3584 /* RFC4138 shortcoming (see comment above) */
3518 if (!(flag & FLAG_FORWARD_PROGRESS) && 3585 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3519 (flag & FLAG_NOT_DUP)) 3586 (flag & FLAG_NOT_DUP))
3520 return true; 3587 return 1;
3521 3588
3522 tcp_enter_frto_loss(sk, 3, flag); 3589 tcp_enter_frto_loss(sk, 3, flag);
3523 return true; 3590 return 1;
3524 } 3591 }
3525 } 3592 }
3526 3593
@@ -3532,7 +3599,7 @@ static bool tcp_process_frto(struct sock *sk, int flag)
3532 if (!tcp_may_send_now(sk)) 3599 if (!tcp_may_send_now(sk))
3533 tcp_enter_frto_loss(sk, 2, flag); 3600 tcp_enter_frto_loss(sk, 2, flag);
3534 3601
3535 return true; 3602 return 1;
3536 } else { 3603 } else {
3537 switch (sysctl_tcp_frto_response) { 3604 switch (sysctl_tcp_frto_response) {
3538 case 2: 3605 case 2:
@@ -3542,61 +3609,34 @@ static bool tcp_process_frto(struct sock *sk, int flag)
3542 tcp_conservative_spur_to_response(tp); 3609 tcp_conservative_spur_to_response(tp);
3543 break; 3610 break;
3544 default: 3611 default:
3545 tcp_cwr_spur_to_response(sk); 3612 tcp_ratehalving_spur_to_response(sk);
3546 break; 3613 break;
3547 } 3614 }
3548 tp->frto_counter = 0; 3615 tp->frto_counter = 0;
3549 tp->undo_marker = 0; 3616 tp->undo_marker = 0;
3550 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); 3617 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3551 } 3618 }
3552 return false; 3619 return 0;
3553}
3554
3555/* RFC 5961 7 [ACK Throttling] */
3556static void tcp_send_challenge_ack(struct sock *sk)
3557{
3558 /* unprotected vars, we dont care of overwrites */
3559 static u32 challenge_timestamp;
3560 static unsigned int challenge_count;
3561 u32 now = jiffies / HZ;
3562
3563 if (now != challenge_timestamp) {
3564 challenge_timestamp = now;
3565 challenge_count = 0;
3566 }
3567 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
3568 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
3569 tcp_send_ack(sk);
3570 }
3571} 3620}
3572 3621
3573/* This routine deals with incoming acks, but not outgoing ones. */ 3622/* This routine deals with incoming acks, but not outgoing ones. */
3574static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) 3623static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3575{ 3624{
3576 struct inet_connection_sock *icsk = inet_csk(sk); 3625 struct inet_connection_sock *icsk = inet_csk(sk);
3577 struct tcp_sock *tp = tcp_sk(sk); 3626 struct tcp_sock *tp = tcp_sk(sk);
3578 u32 prior_snd_una = tp->snd_una; 3627 u32 prior_snd_una = tp->snd_una;
3579 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3628 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3580 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3629 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3581 bool is_dupack = false;
3582 u32 prior_in_flight; 3630 u32 prior_in_flight;
3583 u32 prior_fackets; 3631 u32 prior_fackets;
3584 int prior_packets; 3632 int prior_packets;
3585 int prior_sacked = tp->sacked_out; 3633 int frto_cwnd = 0;
3586 int pkts_acked = 0;
3587 bool frto_cwnd = false;
3588 3634
3589 /* If the ack is older than previous acks 3635 /* If the ack is older than previous acks
3590 * then we can probably ignore it. 3636 * then we can probably ignore it.
3591 */ 3637 */
3592 if (before(ack, prior_snd_una)) { 3638 if (before(ack, prior_snd_una))
3593 /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
3594 if (before(ack, prior_snd_una - tp->max_window)) {
3595 tcp_send_challenge_ack(sk);
3596 return -1;
3597 }
3598 goto old_ack; 3639 goto old_ack;
3599 }
3600 3640
3601 /* If the ack includes data we haven't sent yet, discard 3641 /* If the ack includes data we haven't sent yet, discard
3602 * this segment (RFC793 Section 3.9). 3642 * this segment (RFC793 Section 3.9).
@@ -3604,9 +3644,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3604 if (after(ack, tp->snd_nxt)) 3644 if (after(ack, tp->snd_nxt))
3605 goto invalid_ack; 3645 goto invalid_ack;
3606 3646
3607 if (tp->early_retrans_delayed)
3608 tcp_rearm_rto(sk);
3609
3610 if (after(ack, prior_snd_una)) 3647 if (after(ack, prior_snd_una))
3611 flag |= FLAG_SND_UNA_ADVANCED; 3648 flag |= FLAG_SND_UNA_ADVANCED;
3612 3649
@@ -3664,8 +3701,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3664 /* See if we can take anything off of the retransmit queue. */ 3701 /* See if we can take anything off of the retransmit queue. */
3665 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); 3702 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3666 3703
3667 pkts_acked = prior_packets - tp->packets_out;
3668
3669 if (tp->frto_counter) 3704 if (tp->frto_counter)
3670 frto_cwnd = tcp_process_frto(sk, flag); 3705 frto_cwnd = tcp_process_frto(sk, flag);
3671 /* Guarantee sacktag reordering detection against wrap-arounds */ 3706 /* Guarantee sacktag reordering detection against wrap-arounds */
@@ -3677,26 +3712,19 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3677 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && 3712 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3678 tcp_may_raise_cwnd(sk, flag)) 3713 tcp_may_raise_cwnd(sk, flag))
3679 tcp_cong_avoid(sk, ack, prior_in_flight); 3714 tcp_cong_avoid(sk, ack, prior_in_flight);
3680 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3715 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
3681 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, 3716 flag);
3682 is_dupack, flag);
3683 } else { 3717 } else {
3684 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3718 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3685 tcp_cong_avoid(sk, ack, prior_in_flight); 3719 tcp_cong_avoid(sk, ack, prior_in_flight);
3686 } 3720 }
3687 3721
3688 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { 3722 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3689 struct dst_entry *dst = __sk_dst_get(sk); 3723 dst_confirm(__sk_dst_get(sk));
3690 if (dst) 3724
3691 dst_confirm(dst);
3692 }
3693 return 1; 3725 return 1;
3694 3726
3695no_queue: 3727no_queue:
3696 /* If data was DSACKed, see if we can undo a cwnd reduction. */
3697 if (flag & FLAG_DSACKING_ACK)
3698 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3699 is_dupack, flag);
3700 /* If this ack opens up a zero window, clear backoff. It was 3728 /* If this ack opens up a zero window, clear backoff. It was
3701 * being used to time the probes, and is probably far higher than 3729 * being used to time the probes, and is probably far higher than
3702 * it needs to be for normal retransmission. 3730 * it needs to be for normal retransmission.
@@ -3710,13 +3738,10 @@ invalid_ack:
3710 return -1; 3738 return -1;
3711 3739
3712old_ack: 3740old_ack:
3713 /* If data was SACKed, tag it and see if we should send more data.
3714 * If data was DSACKed, see if we can undo a cwnd reduction.
3715 */
3716 if (TCP_SKB_CB(skb)->sacked) { 3741 if (TCP_SKB_CB(skb)->sacked) {
3717 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3742 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3718 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, 3743 if (icsk->icsk_ca_state == TCP_CA_Open)
3719 is_dupack, flag); 3744 tcp_try_keep_open(sk);
3720 } 3745 }
3721 3746
3722 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); 3747 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -3727,15 +3752,14 @@ old_ack:
3727 * But, this can also be called on packets in the established flow when 3752 * But, this can also be called on packets in the established flow when
3728 * the fast version below fails. 3753 * the fast version below fails.
3729 */ 3754 */
3730void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, 3755void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3731 const u8 **hvpp, int estab, 3756 u8 **hvpp, int estab)
3732 struct tcp_fastopen_cookie *foc)
3733{ 3757{
3734 const unsigned char *ptr; 3758 unsigned char *ptr;
3735 const struct tcphdr *th = tcp_hdr(skb); 3759 struct tcphdr *th = tcp_hdr(skb);
3736 int length = (th->doff * 4) - sizeof(struct tcphdr); 3760 int length = (th->doff * 4) - sizeof(struct tcphdr);
3737 3761
3738 ptr = (const unsigned char *)(th + 1); 3762 ptr = (unsigned char *)(th + 1);
3739 opt_rx->saw_tstamp = 0; 3763 opt_rx->saw_tstamp = 0;
3740 3764
3741 while (length > 0) { 3765 while (length > 0) {
@@ -3772,9 +3796,10 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
3772 __u8 snd_wscale = *(__u8 *)ptr; 3796 __u8 snd_wscale = *(__u8 *)ptr;
3773 opt_rx->wscale_ok = 1; 3797 opt_rx->wscale_ok = 1;
3774 if (snd_wscale > 14) { 3798 if (snd_wscale > 14) {
3775 net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n", 3799 if (net_ratelimit())
3776 __func__, 3800 printk(KERN_INFO "tcp_parse_options: Illegal window "
3777 snd_wscale); 3801 "scaling value %d >14 received.\n",
3802 snd_wscale);
3778 snd_wscale = 14; 3803 snd_wscale = 14;
3779 } 3804 }
3780 opt_rx->snd_wscale = snd_wscale; 3805 opt_rx->snd_wscale = snd_wscale;
@@ -3792,7 +3817,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
3792 case TCPOPT_SACK_PERM: 3817 case TCPOPT_SACK_PERM:
3793 if (opsize == TCPOLEN_SACK_PERM && th->syn && 3818 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3794 !estab && sysctl_tcp_sack) { 3819 !estab && sysctl_tcp_sack) {
3795 opt_rx->sack_ok = TCP_SACK_SEEN; 3820 opt_rx->sack_ok = 1;
3796 tcp_sack_reset(opt_rx); 3821 tcp_sack_reset(opt_rx);
3797 } 3822 }
3798 break; 3823 break;
@@ -3836,25 +3861,8 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
3836 break; 3861 break;
3837 } 3862 }
3838 break; 3863 break;
3839
3840 case TCPOPT_EXP:
3841 /* Fast Open option shares code 254 using a
3842 * 16 bits magic number. It's valid only in
3843 * SYN or SYN-ACK with an even size.
3844 */
3845 if (opsize < TCPOLEN_EXP_FASTOPEN_BASE ||
3846 get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC ||
3847 foc == NULL || !th->syn || (opsize & 1))
3848 break;
3849 foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE;
3850 if (foc->len >= TCP_FASTOPEN_COOKIE_MIN &&
3851 foc->len <= TCP_FASTOPEN_COOKIE_MAX)
3852 memcpy(foc->val, ptr + 2, foc->len);
3853 else if (foc->len != 0)
3854 foc->len = -1;
3855 break;
3856
3857 } 3864 }
3865
3858 ptr += opsize-2; 3866 ptr += opsize-2;
3859 length -= opsize; 3867 length -= opsize;
3860 } 3868 }
@@ -3862,9 +3870,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
3862} 3870}
3863EXPORT_SYMBOL(tcp_parse_options); 3871EXPORT_SYMBOL(tcp_parse_options);
3864 3872
3865static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th) 3873static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
3866{ 3874{
3867 const __be32 *ptr = (const __be32 *)(th + 1); 3875 __be32 *ptr = (__be32 *)(th + 1);
3868 3876
3869 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) 3877 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3870 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { 3878 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
@@ -3873,41 +3881,40 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
3873 tp->rx_opt.rcv_tsval = ntohl(*ptr); 3881 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3874 ++ptr; 3882 ++ptr;
3875 tp->rx_opt.rcv_tsecr = ntohl(*ptr); 3883 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
3876 return true; 3884 return 1;
3877 } 3885 }
3878 return false; 3886 return 0;
3879} 3887}
3880 3888
3881/* Fast parse options. This hopes to only see timestamps. 3889/* Fast parse options. This hopes to only see timestamps.
3882 * If it is wrong it falls back on tcp_parse_options(). 3890 * If it is wrong it falls back on tcp_parse_options().
3883 */ 3891 */
3884static bool tcp_fast_parse_options(const struct sk_buff *skb, 3892static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3885 const struct tcphdr *th, 3893 struct tcp_sock *tp, u8 **hvpp)
3886 struct tcp_sock *tp, const u8 **hvpp)
3887{ 3894{
3888 /* In the spirit of fast parsing, compare doff directly to constant 3895 /* In the spirit of fast parsing, compare doff directly to constant
3889 * values. Because equality is used, short doff can be ignored here. 3896 * values. Because equality is used, short doff can be ignored here.
3890 */ 3897 */
3891 if (th->doff == (sizeof(*th) / 4)) { 3898 if (th->doff == (sizeof(*th) / 4)) {
3892 tp->rx_opt.saw_tstamp = 0; 3899 tp->rx_opt.saw_tstamp = 0;
3893 return false; 3900 return 0;
3894 } else if (tp->rx_opt.tstamp_ok && 3901 } else if (tp->rx_opt.tstamp_ok &&
3895 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) { 3902 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3896 if (tcp_parse_aligned_timestamp(tp, th)) 3903 if (tcp_parse_aligned_timestamp(tp, th))
3897 return true; 3904 return 1;
3898 } 3905 }
3899 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); 3906 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
3900 return true; 3907 return 1;
3901} 3908}
3902 3909
3903#ifdef CONFIG_TCP_MD5SIG 3910#ifdef CONFIG_TCP_MD5SIG
3904/* 3911/*
3905 * Parse MD5 Signature option 3912 * Parse MD5 Signature option
3906 */ 3913 */
3907const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) 3914u8 *tcp_parse_md5sig_option(struct tcphdr *th)
3908{ 3915{
3909 int length = (th->doff << 2) - sizeof(*th); 3916 int length = (th->doff << 2) - sizeof (*th);
3910 const u8 *ptr = (const u8 *)(th + 1); 3917 u8 *ptr = (u8*)(th + 1);
3911 3918
3912 /* If the TCP option is too short, we can short cut */ 3919 /* If the TCP option is too short, we can short cut */
3913 if (length < TCPOLEN_MD5SIG) 3920 if (length < TCPOLEN_MD5SIG)
@@ -3984,8 +3991,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3984 3991
3985static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) 3992static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3986{ 3993{
3987 const struct tcp_sock *tp = tcp_sk(sk); 3994 struct tcp_sock *tp = tcp_sk(sk);
3988 const struct tcphdr *th = tcp_hdr(skb); 3995 struct tcphdr *th = tcp_hdr(skb);
3989 u32 seq = TCP_SKB_CB(skb)->seq; 3996 u32 seq = TCP_SKB_CB(skb)->seq;
3990 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3997 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3991 3998
@@ -4002,7 +4009,7 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
4002 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ); 4009 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
4003} 4010}
4004 4011
4005static inline bool tcp_paws_discard(const struct sock *sk, 4012static inline int tcp_paws_discard(const struct sock *sk,
4006 const struct sk_buff *skb) 4013 const struct sk_buff *skb)
4007{ 4014{
4008 const struct tcp_sock *tp = tcp_sk(sk); 4015 const struct tcp_sock *tp = tcp_sk(sk);
@@ -4024,14 +4031,14 @@ static inline bool tcp_paws_discard(const struct sock *sk,
4024 * (borrowed from freebsd) 4031 * (borrowed from freebsd)
4025 */ 4032 */
4026 4033
4027static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) 4034static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq)
4028{ 4035{
4029 return !before(end_seq, tp->rcv_wup) && 4036 return !before(end_seq, tp->rcv_wup) &&
4030 !after(seq, tp->rcv_nxt + tcp_receive_window(tp)); 4037 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4031} 4038}
4032 4039
4033/* When we get a reset we do this. */ 4040/* When we get a reset we do this. */
4034void tcp_reset(struct sock *sk) 4041static void tcp_reset(struct sock *sk)
4035{ 4042{
4036 /* We want the right error as BSD sees it (and indeed as we do). */ 4043 /* We want the right error as BSD sees it (and indeed as we do). */
4037 switch (sk->sk_state) { 4044 switch (sk->sk_state) {
@@ -4069,7 +4076,7 @@ void tcp_reset(struct sock *sk)
4069 * 4076 *
4070 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. 4077 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
4071 */ 4078 */
4072static void tcp_fin(struct sock *sk) 4079static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
4073{ 4080{
4074 struct tcp_sock *tp = tcp_sk(sk); 4081 struct tcp_sock *tp = tcp_sk(sk);
4075 4082
@@ -4113,7 +4120,7 @@ static void tcp_fin(struct sock *sk)
4113 /* Only TCP_LISTEN and TCP_CLOSE are left, in these 4120 /* Only TCP_LISTEN and TCP_CLOSE are left, in these
4114 * cases we should never reach this piece of code. 4121 * cases we should never reach this piece of code.
4115 */ 4122 */
4116 pr_err("%s: Impossible, sk->sk_state=%d\n", 4123 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
4117 __func__, sk->sk_state); 4124 __func__, sk->sk_state);
4118 break; 4125 break;
4119 } 4126 }
@@ -4138,7 +4145,7 @@ static void tcp_fin(struct sock *sk)
4138 } 4145 }
4139} 4146}
4140 4147
4141static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, 4148static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4142 u32 end_seq) 4149 u32 end_seq)
4143{ 4150{
4144 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { 4151 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
@@ -4146,9 +4153,9 @@ static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4146 sp->start_seq = seq; 4153 sp->start_seq = seq;
4147 if (after(end_seq, sp->end_seq)) 4154 if (after(end_seq, sp->end_seq))
4148 sp->end_seq = end_seq; 4155 sp->end_seq = end_seq;
4149 return true; 4156 return 1;
4150 } 4157 }
4151 return false; 4158 return 0;
4152} 4159}
4153 4160
4154static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) 4161static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
@@ -4181,7 +4188,7 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4181 tcp_sack_extend(tp->duplicate_sack, seq, end_seq); 4188 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4182} 4189}
4183 4190
4184static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) 4191static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
4185{ 4192{
4186 struct tcp_sock *tp = tcp_sk(sk); 4193 struct tcp_sock *tp = tcp_sk(sk);
4187 4194
@@ -4340,258 +4347,37 @@ static void tcp_ofo_queue(struct sock *sk)
4340 __skb_queue_tail(&sk->sk_receive_queue, skb); 4347 __skb_queue_tail(&sk->sk_receive_queue, skb);
4341 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4348 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4342 if (tcp_hdr(skb)->fin) 4349 if (tcp_hdr(skb)->fin)
4343 tcp_fin(sk); 4350 tcp_fin(skb, sk, tcp_hdr(skb));
4344 } 4351 }
4345} 4352}
4346 4353
4347static bool tcp_prune_ofo_queue(struct sock *sk); 4354static int tcp_prune_ofo_queue(struct sock *sk);
4348static int tcp_prune_queue(struct sock *sk); 4355static int tcp_prune_queue(struct sock *sk);
4349 4356
4350static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, 4357static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
4351 unsigned int size)
4352{ 4358{
4353 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 4359 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4354 !sk_rmem_schedule(sk, skb, size)) { 4360 !sk_rmem_schedule(sk, size)) {
4355 4361
4356 if (tcp_prune_queue(sk) < 0) 4362 if (tcp_prune_queue(sk) < 0)
4357 return -1; 4363 return -1;
4358 4364
4359 if (!sk_rmem_schedule(sk, skb, size)) { 4365 if (!sk_rmem_schedule(sk, size)) {
4360 if (!tcp_prune_ofo_queue(sk)) 4366 if (!tcp_prune_ofo_queue(sk))
4361 return -1; 4367 return -1;
4362 4368
4363 if (!sk_rmem_schedule(sk, skb, size)) 4369 if (!sk_rmem_schedule(sk, size))
4364 return -1; 4370 return -1;
4365 } 4371 }
4366 } 4372 }
4367 return 0; 4373 return 0;
4368} 4374}
4369 4375
4370/**
4371 * tcp_try_coalesce - try to merge skb to prior one
4372 * @sk: socket
4373 * @to: prior buffer
4374 * @from: buffer to add in queue
4375 * @fragstolen: pointer to boolean
4376 *
4377 * Before queueing skb @from after @to, try to merge them
4378 * to reduce overall memory use and queue lengths, if cost is small.
4379 * Packets in ofo or receive queues can stay a long time.
4380 * Better try to coalesce them right now to avoid future collapses.
4381 * Returns true if caller should free @from instead of queueing it
4382 */
4383static bool tcp_try_coalesce(struct sock *sk,
4384 struct sk_buff *to,
4385 struct sk_buff *from,
4386 bool *fragstolen)
4387{
4388 int delta;
4389
4390 *fragstolen = false;
4391
4392 if (tcp_hdr(from)->fin)
4393 return false;
4394
4395 /* Its possible this segment overlaps with prior segment in queue */
4396 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4397 return false;
4398
4399 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4400 return false;
4401
4402 atomic_add(delta, &sk->sk_rmem_alloc);
4403 sk_mem_charge(sk, delta);
4404 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4405 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4406 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4407 return true;
4408}
4409
4410static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4411{
4412 struct tcp_sock *tp = tcp_sk(sk);
4413 struct sk_buff *skb1;
4414 u32 seq, end_seq;
4415
4416 TCP_ECN_check_ce(tp, skb);
4417
4418 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4419 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
4420 __kfree_skb(skb);
4421 return;
4422 }
4423
4424 /* Disable header prediction. */
4425 tp->pred_flags = 0;
4426 inet_csk_schedule_ack(sk);
4427
4428 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4429 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4430 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4431
4432 skb1 = skb_peek_tail(&tp->out_of_order_queue);
4433 if (!skb1) {
4434 /* Initial out of order segment, build 1 SACK. */
4435 if (tcp_is_sack(tp)) {
4436 tp->rx_opt.num_sacks = 1;
4437 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4438 tp->selective_acks[0].end_seq =
4439 TCP_SKB_CB(skb)->end_seq;
4440 }
4441 __skb_queue_head(&tp->out_of_order_queue, skb);
4442 goto end;
4443 }
4444
4445 seq = TCP_SKB_CB(skb)->seq;
4446 end_seq = TCP_SKB_CB(skb)->end_seq;
4447
4448 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4449 bool fragstolen;
4450
4451 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4452 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4453 } else {
4454 kfree_skb_partial(skb, fragstolen);
4455 skb = NULL;
4456 }
4457
4458 if (!tp->rx_opt.num_sacks ||
4459 tp->selective_acks[0].end_seq != seq)
4460 goto add_sack;
4461
4462 /* Common case: data arrive in order after hole. */
4463 tp->selective_acks[0].end_seq = end_seq;
4464 goto end;
4465 }
4466
4467 /* Find place to insert this segment. */
4468 while (1) {
4469 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4470 break;
4471 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4472 skb1 = NULL;
4473 break;
4474 }
4475 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4476 }
4477
4478 /* Do skb overlap to previous one? */
4479 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4480 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4481 /* All the bits are present. Drop. */
4482 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4483 __kfree_skb(skb);
4484 skb = NULL;
4485 tcp_dsack_set(sk, seq, end_seq);
4486 goto add_sack;
4487 }
4488 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4489 /* Partial overlap. */
4490 tcp_dsack_set(sk, seq,
4491 TCP_SKB_CB(skb1)->end_seq);
4492 } else {
4493 if (skb_queue_is_first(&tp->out_of_order_queue,
4494 skb1))
4495 skb1 = NULL;
4496 else
4497 skb1 = skb_queue_prev(
4498 &tp->out_of_order_queue,
4499 skb1);
4500 }
4501 }
4502 if (!skb1)
4503 __skb_queue_head(&tp->out_of_order_queue, skb);
4504 else
4505 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4506
4507 /* And clean segments covered by new one as whole. */
4508 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4509 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4510
4511 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4512 break;
4513 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4514 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4515 end_seq);
4516 break;
4517 }
4518 __skb_unlink(skb1, &tp->out_of_order_queue);
4519 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4520 TCP_SKB_CB(skb1)->end_seq);
4521 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4522 __kfree_skb(skb1);
4523 }
4524
4525add_sack:
4526 if (tcp_is_sack(tp))
4527 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4528end:
4529 if (skb)
4530 skb_set_owner_r(skb, sk);
4531}
4532
4533static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
4534 bool *fragstolen)
4535{
4536 int eaten;
4537 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4538
4539 __skb_pull(skb, hdrlen);
4540 eaten = (tail &&
4541 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
4542 tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4543 if (!eaten) {
4544 __skb_queue_tail(&sk->sk_receive_queue, skb);
4545 skb_set_owner_r(skb, sk);
4546 }
4547 return eaten;
4548}
4549
4550int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4551{
4552 struct sk_buff *skb = NULL;
4553 struct tcphdr *th;
4554 bool fragstolen;
4555
4556 if (size == 0)
4557 return 0;
4558
4559 skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
4560 if (!skb)
4561 goto err;
4562
4563 if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th)))
4564 goto err_free;
4565
4566 th = (struct tcphdr *)skb_put(skb, sizeof(*th));
4567 skb_reset_transport_header(skb);
4568 memset(th, 0, sizeof(*th));
4569
4570 if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
4571 goto err_free;
4572
4573 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4574 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4575 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4576
4577 if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) {
4578 WARN_ON_ONCE(fragstolen); /* should not happen */
4579 __kfree_skb(skb);
4580 }
4581 return size;
4582
4583err_free:
4584 kfree_skb(skb);
4585err:
4586 return -ENOMEM;
4587}
4588
4589static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) 4376static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4590{ 4377{
4591 const struct tcphdr *th = tcp_hdr(skb); 4378 struct tcphdr *th = tcp_hdr(skb);
4592 struct tcp_sock *tp = tcp_sk(sk); 4379 struct tcp_sock *tp = tcp_sk(sk);
4593 int eaten = -1; 4380 int eaten = -1;
4594 bool fragstolen = false;
4595 4381
4596 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) 4382 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4597 goto drop; 4383 goto drop;
@@ -4633,16 +4419,17 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4633 if (eaten <= 0) { 4419 if (eaten <= 0) {
4634queue_and_out: 4420queue_and_out:
4635 if (eaten < 0 && 4421 if (eaten < 0 &&
4636 tcp_try_rmem_schedule(sk, skb, skb->truesize)) 4422 tcp_try_rmem_schedule(sk, skb->truesize))
4637 goto drop; 4423 goto drop;
4638 4424
4639 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); 4425 skb_set_owner_r(skb, sk);
4426 __skb_queue_tail(&sk->sk_receive_queue, skb);
4640 } 4427 }
4641 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4428 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4642 if (skb->len) 4429 if (skb->len)
4643 tcp_event_data_recv(sk, skb); 4430 tcp_event_data_recv(sk, skb);
4644 if (th->fin) 4431 if (th->fin)
4645 tcp_fin(sk); 4432 tcp_fin(skb, sk, th);
4646 4433
4647 if (!skb_queue_empty(&tp->out_of_order_queue)) { 4434 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4648 tcp_ofo_queue(sk); 4435 tcp_ofo_queue(sk);
@@ -4660,8 +4447,8 @@ queue_and_out:
4660 tcp_fast_path_check(sk); 4447 tcp_fast_path_check(sk);
4661 4448
4662 if (eaten > 0) 4449 if (eaten > 0)
4663 kfree_skb_partial(skb, fragstolen); 4450 __kfree_skb(skb);
4664 if (!sock_flag(sk, SOCK_DEAD)) 4451 else if (!sock_flag(sk, SOCK_DEAD))
4665 sk->sk_data_ready(sk, 0); 4452 sk->sk_data_ready(sk, 0);
4666 return; 4453 return;
4667 } 4454 }
@@ -4701,7 +4488,105 @@ drop:
4701 goto queue_and_out; 4488 goto queue_and_out;
4702 } 4489 }
4703 4490
4704 tcp_data_queue_ofo(sk, skb); 4491 TCP_ECN_check_ce(tp, skb);
4492
4493 if (tcp_try_rmem_schedule(sk, skb->truesize))
4494 goto drop;
4495
4496 /* Disable header prediction. */
4497 tp->pred_flags = 0;
4498 inet_csk_schedule_ack(sk);
4499
4500 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4501 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4502
4503 skb_set_owner_r(skb, sk);
4504
4505 if (!skb_peek(&tp->out_of_order_queue)) {
4506 /* Initial out of order segment, build 1 SACK. */
4507 if (tcp_is_sack(tp)) {
4508 tp->rx_opt.num_sacks = 1;
4509 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4510 tp->selective_acks[0].end_seq =
4511 TCP_SKB_CB(skb)->end_seq;
4512 }
4513 __skb_queue_head(&tp->out_of_order_queue, skb);
4514 } else {
4515 struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue);
4516 u32 seq = TCP_SKB_CB(skb)->seq;
4517 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4518
4519 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4520 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4521
4522 if (!tp->rx_opt.num_sacks ||
4523 tp->selective_acks[0].end_seq != seq)
4524 goto add_sack;
4525
4526 /* Common case: data arrive in order after hole. */
4527 tp->selective_acks[0].end_seq = end_seq;
4528 return;
4529 }
4530
4531 /* Find place to insert this segment. */
4532 while (1) {
4533 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4534 break;
4535 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4536 skb1 = NULL;
4537 break;
4538 }
4539 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4540 }
4541
4542 /* Do skb overlap to previous one? */
4543 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4544 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4545 /* All the bits are present. Drop. */
4546 __kfree_skb(skb);
4547 tcp_dsack_set(sk, seq, end_seq);
4548 goto add_sack;
4549 }
4550 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4551 /* Partial overlap. */
4552 tcp_dsack_set(sk, seq,
4553 TCP_SKB_CB(skb1)->end_seq);
4554 } else {
4555 if (skb_queue_is_first(&tp->out_of_order_queue,
4556 skb1))
4557 skb1 = NULL;
4558 else
4559 skb1 = skb_queue_prev(
4560 &tp->out_of_order_queue,
4561 skb1);
4562 }
4563 }
4564 if (!skb1)
4565 __skb_queue_head(&tp->out_of_order_queue, skb);
4566 else
4567 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4568
4569 /* And clean segments covered by new one as whole. */
4570 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4571 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4572
4573 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4574 break;
4575 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4576 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4577 end_seq);
4578 break;
4579 }
4580 __skb_unlink(skb1, &tp->out_of_order_queue);
4581 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4582 TCP_SKB_CB(skb1)->end_seq);
4583 __kfree_skb(skb1);
4584 }
4585
4586add_sack:
4587 if (tcp_is_sack(tp))
4588 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4589 }
4705} 4590}
4706 4591
4707static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, 4592static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
@@ -4880,10 +4765,10 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
4880 * Purge the out-of-order queue. 4765 * Purge the out-of-order queue.
4881 * Return true if queue was pruned. 4766 * Return true if queue was pruned.
4882 */ 4767 */
4883static bool tcp_prune_ofo_queue(struct sock *sk) 4768static int tcp_prune_ofo_queue(struct sock *sk)
4884{ 4769{
4885 struct tcp_sock *tp = tcp_sk(sk); 4770 struct tcp_sock *tp = tcp_sk(sk);
4886 bool res = false; 4771 int res = 0;
4887 4772
4888 if (!skb_queue_empty(&tp->out_of_order_queue)) { 4773 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4889 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED); 4774 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
@@ -4897,7 +4782,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
4897 if (tp->rx_opt.sack_ok) 4782 if (tp->rx_opt.sack_ok)
4898 tcp_sack_reset(&tp->rx_opt); 4783 tcp_sack_reset(&tp->rx_opt);
4899 sk_mem_reclaim(sk); 4784 sk_mem_reclaim(sk);
4900 res = true; 4785 res = 1;
4901 } 4786 }
4902 return res; 4787 return res;
4903} 4788}
@@ -4919,7 +4804,7 @@ static int tcp_prune_queue(struct sock *sk)
4919 4804
4920 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 4805 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4921 tcp_clamp_window(sk); 4806 tcp_clamp_window(sk);
4922 else if (sk_under_memory_pressure(sk)) 4807 else if (tcp_memory_pressure)
4923 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); 4808 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4924 4809
4925 tcp_collapse_ofo_queue(sk); 4810 tcp_collapse_ofo_queue(sk);
@@ -4974,29 +4859,29 @@ void tcp_cwnd_application_limited(struct sock *sk)
4974 tp->snd_cwnd_stamp = tcp_time_stamp; 4859 tp->snd_cwnd_stamp = tcp_time_stamp;
4975} 4860}
4976 4861
4977static bool tcp_should_expand_sndbuf(const struct sock *sk) 4862static int tcp_should_expand_sndbuf(struct sock *sk)
4978{ 4863{
4979 const struct tcp_sock *tp = tcp_sk(sk); 4864 struct tcp_sock *tp = tcp_sk(sk);
4980 4865
4981 /* If the user specified a specific send buffer setting, do 4866 /* If the user specified a specific send buffer setting, do
4982 * not modify it. 4867 * not modify it.
4983 */ 4868 */
4984 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) 4869 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4985 return false; 4870 return 0;
4986 4871
4987 /* If we are under global TCP memory pressure, do not expand. */ 4872 /* If we are under global TCP memory pressure, do not expand. */
4988 if (sk_under_memory_pressure(sk)) 4873 if (tcp_memory_pressure)
4989 return false; 4874 return 0;
4990 4875
4991 /* If we are under soft global TCP memory pressure, do not expand. */ 4876 /* If we are under soft global TCP memory pressure, do not expand. */
4992 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0)) 4877 if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
4993 return false; 4878 return 0;
4994 4879
4995 /* If we filled the congestion window, do not expand. */ 4880 /* If we filled the congestion window, do not expand. */
4996 if (tp->packets_out >= tp->snd_cwnd) 4881 if (tp->packets_out >= tp->snd_cwnd)
4997 return false; 4882 return 0;
4998 4883
4999 return true; 4884 return 1;
5000} 4885}
5001 4886
5002/* When incoming ACK allowed to free some skb from write_queue, 4887/* When incoming ACK allowed to free some skb from write_queue,
@@ -5010,10 +4895,8 @@ static void tcp_new_space(struct sock *sk)
5010 struct tcp_sock *tp = tcp_sk(sk); 4895 struct tcp_sock *tp = tcp_sk(sk);
5011 4896
5012 if (tcp_should_expand_sndbuf(sk)) { 4897 if (tcp_should_expand_sndbuf(sk)) {
5013 int sndmem = SKB_TRUESIZE(max_t(u32, 4898 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
5014 tp->rx_opt.mss_clamp, 4899 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
5015 tp->mss_cache) +
5016 MAX_TCP_HEADER);
5017 int demanded = max_t(unsigned int, tp->snd_cwnd, 4900 int demanded = max_t(unsigned int, tp->snd_cwnd,
5018 tp->reordering + 1); 4901 tp->reordering + 1);
5019 sndmem *= 2 * demanded; 4902 sndmem *= 2 * demanded;
@@ -5085,7 +4968,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)
5085 * either form (or just set the sysctl tcp_stdurg). 4968 * either form (or just set the sysctl tcp_stdurg).
5086 */ 4969 */
5087 4970
5088static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) 4971static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
5089{ 4972{
5090 struct tcp_sock *tp = tcp_sk(sk); 4973 struct tcp_sock *tp = tcp_sk(sk);
5091 u32 ptr = ntohs(th->urg_ptr); 4974 u32 ptr = ntohs(th->urg_ptr);
@@ -5151,7 +5034,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5151} 5034}
5152 5035
5153/* This is the 'fast' part of urgent handling. */ 5036/* This is the 'fast' part of urgent handling. */
5154static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th) 5037static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
5155{ 5038{
5156 struct tcp_sock *tp = tcp_sk(sk); 5039 struct tcp_sock *tp = tcp_sk(sk);
5157 5040
@@ -5214,7 +5097,7 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk,
5214 return result; 5097 return result;
5215} 5098}
5216 5099
5217static inline bool tcp_checksum_complete_user(struct sock *sk, 5100static inline int tcp_checksum_complete_user(struct sock *sk,
5218 struct sk_buff *skb) 5101 struct sk_buff *skb)
5219{ 5102{
5220 return !skb_csum_unnecessary(skb) && 5103 return !skb_csum_unnecessary(skb) &&
@@ -5222,19 +5105,19 @@ static inline bool tcp_checksum_complete_user(struct sock *sk,
5222} 5105}
5223 5106
5224#ifdef CONFIG_NET_DMA 5107#ifdef CONFIG_NET_DMA
5225static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, 5108static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
5226 int hlen) 5109 int hlen)
5227{ 5110{
5228 struct tcp_sock *tp = tcp_sk(sk); 5111 struct tcp_sock *tp = tcp_sk(sk);
5229 int chunk = skb->len - hlen; 5112 int chunk = skb->len - hlen;
5230 int dma_cookie; 5113 int dma_cookie;
5231 bool copied_early = false; 5114 int copied_early = 0;
5232 5115
5233 if (tp->ucopy.wakeup) 5116 if (tp->ucopy.wakeup)
5234 return false; 5117 return 0;
5235 5118
5236 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 5119 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
5237 tp->ucopy.dma_chan = net_dma_find_channel(); 5120 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
5238 5121
5239 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) { 5122 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
5240 5123
@@ -5247,7 +5130,7 @@ static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
5247 goto out; 5130 goto out;
5248 5131
5249 tp->ucopy.dma_cookie = dma_cookie; 5132 tp->ucopy.dma_cookie = dma_cookie;
5250 copied_early = true; 5133 copied_early = 1;
5251 5134
5252 tp->ucopy.len -= chunk; 5135 tp->ucopy.len -= chunk;
5253 tp->copied_seq += chunk; 5136 tp->copied_seq += chunk;
@@ -5271,10 +5154,10 @@ out:
5271/* Does PAWS and seqno based validation of an incoming segment, flags will 5154/* Does PAWS and seqno based validation of an incoming segment, flags will
5272 * play significant role here. 5155 * play significant role here.
5273 */ 5156 */
5274static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, 5157static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5275 const struct tcphdr *th, int syn_inerr) 5158 struct tcphdr *th, int syn_inerr)
5276{ 5159{
5277 const u8 *hash_location; 5160 u8 *hash_location;
5278 struct tcp_sock *tp = tcp_sk(sk); 5161 struct tcp_sock *tp = tcp_sk(sk);
5279 5162
5280 /* RFC1323: H1. Apply PAWS check first. */ 5163 /* RFC1323: H1. Apply PAWS check first. */
@@ -5297,48 +5180,38 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5297 * an acknowledgment should be sent in reply (unless the RST 5180 * an acknowledgment should be sent in reply (unless the RST
5298 * bit is set, if so drop the segment and return)". 5181 * bit is set, if so drop the segment and return)".
5299 */ 5182 */
5300 if (!th->rst) { 5183 if (!th->rst)
5301 if (th->syn)
5302 goto syn_challenge;
5303 tcp_send_dupack(sk, skb); 5184 tcp_send_dupack(sk, skb);
5304 }
5305 goto discard; 5185 goto discard;
5306 } 5186 }
5307 5187
5308 /* Step 2: check RST bit */ 5188 /* Step 2: check RST bit */
5309 if (th->rst) { 5189 if (th->rst) {
5310 /* RFC 5961 3.2 : 5190 tcp_reset(sk);
5311 * If sequence number exactly matches RCV.NXT, then
5312 * RESET the connection
5313 * else
5314 * Send a challenge ACK
5315 */
5316 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
5317 tcp_reset(sk);
5318 else
5319 tcp_send_challenge_ack(sk);
5320 goto discard; 5191 goto discard;
5321 } 5192 }
5322 5193
5194 /* ts_recent update must be made after we are sure that the packet
5195 * is in window.
5196 */
5197 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5198
5323 /* step 3: check security and precedence [ignored] */ 5199 /* step 3: check security and precedence [ignored] */
5324 5200
5325 /* step 4: Check for a SYN 5201 /* step 4: Check for a SYN in window. */
5326 * RFC 5691 4.2 : Send a challenge ack 5202 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
5327 */
5328 if (th->syn) {
5329syn_challenge:
5330 if (syn_inerr) 5203 if (syn_inerr)
5331 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 5204 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5332 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); 5205 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
5333 tcp_send_challenge_ack(sk); 5206 tcp_reset(sk);
5334 goto discard; 5207 return -1;
5335 } 5208 }
5336 5209
5337 return true; 5210 return 1;
5338 5211
5339discard: 5212discard:
5340 __kfree_skb(skb); 5213 __kfree_skb(skb);
5341 return false; 5214 return 0;
5342} 5215}
5343 5216
5344/* 5217/*
@@ -5365,12 +5238,11 @@ discard:
5365 * tcp_data_queue when everything is OK. 5238 * tcp_data_queue when everything is OK.
5366 */ 5239 */
5367int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, 5240int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5368 const struct tcphdr *th, unsigned int len) 5241 struct tcphdr *th, unsigned len)
5369{ 5242{
5370 struct tcp_sock *tp = tcp_sk(sk); 5243 struct tcp_sock *tp = tcp_sk(sk);
5244 int res;
5371 5245
5372 if (unlikely(sk->sk_rx_dst == NULL))
5373 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5374 /* 5246 /*
5375 * Header prediction. 5247 * Header prediction.
5376 * The code loosely follows the one in the famous 5248 * The code loosely follows the one in the famous
@@ -5450,14 +5322,11 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5450 } else { 5322 } else {
5451 int eaten = 0; 5323 int eaten = 0;
5452 int copied_early = 0; 5324 int copied_early = 0;
5453 bool fragstolen = false;
5454 5325
5455 if (tp->copied_seq == tp->rcv_nxt && 5326 if (tp->copied_seq == tp->rcv_nxt &&
5456 len - tcp_header_len <= tp->ucopy.len) { 5327 len - tcp_header_len <= tp->ucopy.len) {
5457#ifdef CONFIG_NET_DMA 5328#ifdef CONFIG_NET_DMA
5458 if (tp->ucopy.task == current && 5329 if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
5459 sock_owned_by_user(sk) &&
5460 tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
5461 copied_early = 1; 5330 copied_early = 1;
5462 eaten = 1; 5331 eaten = 1;
5463 } 5332 }
@@ -5510,8 +5379,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5510 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); 5379 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
5511 5380
5512 /* Bulk data transfer: receiver */ 5381 /* Bulk data transfer: receiver */
5513 eaten = tcp_queue_rcv(sk, skb, tcp_header_len, 5382 __skb_pull(skb, tcp_header_len);
5514 &fragstolen); 5383 __skb_queue_tail(&sk->sk_receive_queue, skb);
5384 skb_set_owner_r(skb, sk);
5385 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5515 } 5386 }
5516 5387
5517 tcp_event_data_recv(sk, skb); 5388 tcp_event_data_recv(sk, skb);
@@ -5533,8 +5404,9 @@ no_ack:
5533 else 5404 else
5534#endif 5405#endif
5535 if (eaten) 5406 if (eaten)
5536 kfree_skb_partial(skb, fragstolen); 5407 __kfree_skb(skb);
5537 sk->sk_data_ready(sk, 0); 5408 else
5409 sk->sk_data_ready(sk, 0);
5538 return 0; 5410 return 0;
5539 } 5411 }
5540 } 5412 }
@@ -5543,25 +5415,18 @@ slow_path:
5543 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb)) 5415 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
5544 goto csum_error; 5416 goto csum_error;
5545 5417
5546 if (!th->ack && !th->rst)
5547 goto discard;
5548
5549 /* 5418 /*
5550 * Standard slow path. 5419 * Standard slow path.
5551 */ 5420 */
5552 5421
5553 if (!tcp_validate_incoming(sk, skb, th, 1)) 5422 res = tcp_validate_incoming(sk, skb, th, 1);
5554 return 0; 5423 if (res <= 0)
5424 return -res;
5555 5425
5556step5: 5426step5:
5557 if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) 5427 if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
5558 goto discard; 5428 goto discard;
5559 5429
5560 /* ts_recent update must be made after we are sure that the packet
5561 * is in window.
5562 */
5563 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5564
5565 tcp_rcv_rtt_measure_ts(sk, skb); 5430 tcp_rcv_rtt_measure_ts(sk, skb);
5566 5431
5567 /* Process urgent data. */ 5432 /* Process urgent data. */
@@ -5583,101 +5448,16 @@ discard:
5583} 5448}
5584EXPORT_SYMBOL(tcp_rcv_established); 5449EXPORT_SYMBOL(tcp_rcv_established);
5585 5450
5586void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5587{
5588 struct tcp_sock *tp = tcp_sk(sk);
5589 struct inet_connection_sock *icsk = inet_csk(sk);
5590
5591 tcp_set_state(sk, TCP_ESTABLISHED);
5592
5593 if (skb != NULL) {
5594 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5595 security_inet_conn_established(sk, skb);
5596 }
5597
5598 /* Make sure socket is routed, for correct metrics. */
5599 icsk->icsk_af_ops->rebuild_header(sk);
5600
5601 tcp_init_metrics(sk);
5602
5603 tcp_init_congestion_control(sk);
5604
5605 /* Prevent spurious tcp_cwnd_restart() on first data
5606 * packet.
5607 */
5608 tp->lsndtime = tcp_time_stamp;
5609
5610 tcp_init_buffer_space(sk);
5611
5612 if (sock_flag(sk, SOCK_KEEPOPEN))
5613 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5614
5615 if (!tp->rx_opt.snd_wscale)
5616 __tcp_fast_path_on(tp, tp->snd_wnd);
5617 else
5618 tp->pred_flags = 0;
5619
5620 if (!sock_flag(sk, SOCK_DEAD)) {
5621 sk->sk_state_change(sk);
5622 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5623 }
5624}
5625
5626static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5627 struct tcp_fastopen_cookie *cookie)
5628{
5629 struct tcp_sock *tp = tcp_sk(sk);
5630 struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
5631 u16 mss = tp->rx_opt.mss_clamp;
5632 bool syn_drop;
5633
5634 if (mss == tp->rx_opt.user_mss) {
5635 struct tcp_options_received opt;
5636 const u8 *hash_location;
5637
5638 /* Get original SYNACK MSS value if user MSS sets mss_clamp */
5639 tcp_clear_options(&opt);
5640 opt.user_mss = opt.mss_clamp = 0;
5641 tcp_parse_options(synack, &opt, &hash_location, 0, NULL);
5642 mss = opt.mss_clamp;
5643 }
5644
5645 if (!tp->syn_fastopen) /* Ignore an unsolicited cookie */
5646 cookie->len = -1;
5647
5648 /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably
5649 * the remote receives only the retransmitted (regular) SYNs: either
5650 * the original SYN-data or the corresponding SYN-ACK is lost.
5651 */
5652 syn_drop = (cookie->len <= 0 && data &&
5653 inet_csk(sk)->icsk_retransmits);
5654
5655 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
5656
5657 if (data) { /* Retransmit unacked data in SYN */
5658 tcp_for_write_queue_from(data, sk) {
5659 if (data == tcp_send_head(sk) ||
5660 __tcp_retransmit_skb(sk, data))
5661 break;
5662 }
5663 tcp_rearm_rto(sk);
5664 return true;
5665 }
5666 tp->syn_data_acked = tp->syn_data;
5667 return false;
5668}
5669
5670static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5451static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5671 const struct tcphdr *th, unsigned int len) 5452 struct tcphdr *th, unsigned len)
5672{ 5453{
5673 const u8 *hash_location; 5454 u8 *hash_location;
5674 struct inet_connection_sock *icsk = inet_csk(sk); 5455 struct inet_connection_sock *icsk = inet_csk(sk);
5675 struct tcp_sock *tp = tcp_sk(sk); 5456 struct tcp_sock *tp = tcp_sk(sk);
5676 struct tcp_cookie_values *cvp = tp->cookie_values; 5457 struct tcp_cookie_values *cvp = tp->cookie_values;
5677 struct tcp_fastopen_cookie foc = { .len = -1 };
5678 int saved_clamp = tp->rx_opt.mss_clamp; 5458 int saved_clamp = tp->rx_opt.mss_clamp;
5679 5459
5680 tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); 5460 tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0);
5681 5461
5682 if (th->ack) { 5462 if (th->ack) {
5683 /* rfc793: 5463 /* rfc793:
@@ -5687,9 +5467,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5687 * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send 5467 * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
5688 * a reset (unless the RST bit is set, if so drop 5468 * a reset (unless the RST bit is set, if so drop
5689 * the segment and return)" 5469 * the segment and return)"
5470 *
5471 * We do not send data with SYN, so that RFC-correct
5472 * test reduces to:
5690 */ 5473 */
5691 if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) || 5474 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
5692 after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))
5693 goto reset_and_undo; 5475 goto reset_and_undo;
5694 5476
5695 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 5477 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
@@ -5731,7 +5513,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5731 5513
5732 TCP_ECN_rcv_synack(tp, th); 5514 TCP_ECN_rcv_synack(tp, th);
5733 5515
5734 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); 5516 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5735 tcp_ack(sk, skb, FLAG_SLOWPATH); 5517 tcp_ack(sk, skb, FLAG_SLOWPATH);
5736 5518
5737 /* Ok.. it's good. Set up sequence numbers and 5519 /* Ok.. it's good. Set up sequence numbers and
@@ -5744,6 +5526,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5744 * never scaled. 5526 * never scaled.
5745 */ 5527 */
5746 tp->snd_wnd = ntohs(th->window); 5528 tp->snd_wnd = ntohs(th->window);
5529 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5747 5530
5748 if (!tp->rx_opt.wscale_ok) { 5531 if (!tp->rx_opt.wscale_ok) {
5749 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; 5532 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
@@ -5797,12 +5580,36 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5797 } 5580 }
5798 5581
5799 smp_mb(); 5582 smp_mb();
5583 tcp_set_state(sk, TCP_ESTABLISHED);
5800 5584
5801 tcp_finish_connect(sk, skb); 5585 security_inet_conn_established(sk, skb);
5802 5586
5803 if ((tp->syn_fastopen || tp->syn_data) && 5587 /* Make sure socket is routed, for correct metrics. */
5804 tcp_rcv_fastopen_synack(sk, skb, &foc)) 5588 icsk->icsk_af_ops->rebuild_header(sk);
5805 return -1; 5589
5590 tcp_init_metrics(sk);
5591
5592 tcp_init_congestion_control(sk);
5593
5594 /* Prevent spurious tcp_cwnd_restart() on first data
5595 * packet.
5596 */
5597 tp->lsndtime = tcp_time_stamp;
5598
5599 tcp_init_buffer_space(sk);
5600
5601 if (sock_flag(sk, SOCK_KEEPOPEN))
5602 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5603
5604 if (!tp->rx_opt.snd_wscale)
5605 __tcp_fast_path_on(tp, tp->snd_wnd);
5606 else
5607 tp->pred_flags = 0;
5608
5609 if (!sock_flag(sk, SOCK_DEAD)) {
5610 sk->sk_state_change(sk);
5611 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5612 }
5806 5613
5807 if (sk->sk_write_pending || 5614 if (sk->sk_write_pending ||
5808 icsk->icsk_accept_queue.rskq_defer_accept || 5615 icsk->icsk_accept_queue.rskq_defer_accept ||
@@ -5816,6 +5623,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5816 */ 5623 */
5817 inet_csk_schedule_ack(sk); 5624 inet_csk_schedule_ack(sk);
5818 icsk->icsk_ack.lrcvtime = tcp_time_stamp; 5625 icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5626 icsk->icsk_ack.ato = TCP_ATO_MIN;
5627 tcp_incr_quickack(sk);
5819 tcp_enter_quickack_mode(sk); 5628 tcp_enter_quickack_mode(sk);
5820 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5629 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5821 TCP_DELACK_MAX, TCP_RTO_MAX); 5630 TCP_DELACK_MAX, TCP_RTO_MAX);
@@ -5881,9 +5690,7 @@ discard:
5881 tcp_send_synack(sk); 5690 tcp_send_synack(sk);
5882#if 0 5691#if 0
5883 /* Note, we could accept data and URG from this segment. 5692 /* Note, we could accept data and URG from this segment.
5884 * There are no obstacles to make this (except that we must 5693 * There are no obstacles to make this.
5885 * either change tcp_recvmsg() to prevent it from returning data
5886 * before 3WHS completes per RFC793, or employ TCP Fast Open).
5887 * 5694 *
5888 * However, if we ignore data in ACKless segments sometimes, 5695 * However, if we ignore data in ACKless segments sometimes,
5889 * we have no reasons to accept it sometimes. 5696 * we have no reasons to accept it sometimes.
@@ -5919,12 +5726,12 @@ reset_and_undo:
5919 */ 5726 */
5920 5727
5921int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, 5728int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5922 const struct tcphdr *th, unsigned int len) 5729 struct tcphdr *th, unsigned len)
5923{ 5730{
5924 struct tcp_sock *tp = tcp_sk(sk); 5731 struct tcp_sock *tp = tcp_sk(sk);
5925 struct inet_connection_sock *icsk = inet_csk(sk); 5732 struct inet_connection_sock *icsk = inet_csk(sk);
5926 struct request_sock *req;
5927 int queued = 0; 5733 int queued = 0;
5734 int res;
5928 5735
5929 tp->rx_opt.saw_tstamp = 0; 5736 tp->rx_opt.saw_tstamp = 0;
5930 5737
@@ -5940,8 +5747,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5940 goto discard; 5747 goto discard;
5941 5748
5942 if (th->syn) { 5749 if (th->syn) {
5943 if (th->fin)
5944 goto discard;
5945 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) 5750 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5946 return 1; 5751 return 1;
5947 5752
@@ -5979,47 +5784,18 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5979 return 0; 5784 return 0;
5980 } 5785 }
5981 5786
5982 req = tp->fastopen_rsk; 5787 res = tcp_validate_incoming(sk, skb, th, 0);
5983 if (req != NULL) { 5788 if (res <= 0)
5984 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && 5789 return -res;
5985 sk->sk_state != TCP_FIN_WAIT1);
5986
5987 if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
5988 goto discard;
5989 }
5990
5991 if (!th->ack && !th->rst)
5992 goto discard;
5993
5994 if (!tcp_validate_incoming(sk, skb, th, 0))
5995 return 0;
5996 5790
5997 /* step 5: check the ACK field */ 5791 /* step 5: check the ACK field */
5998 if (true) { 5792 if (th->ack) {
5999 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; 5793 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
6000 5794
6001 switch (sk->sk_state) { 5795 switch (sk->sk_state) {
6002 case TCP_SYN_RECV: 5796 case TCP_SYN_RECV:
6003 if (acceptable) { 5797 if (acceptable) {
6004 /* Once we leave TCP_SYN_RECV, we no longer 5798 tp->copied_seq = tp->rcv_nxt;
6005 * need req so release it.
6006 */
6007 if (req) {
6008 tcp_synack_rtt_meas(sk, req);
6009 tp->total_retrans = req->num_retrans;
6010
6011 reqsk_fastopen_remove(sk, req, false);
6012 } else {
6013 /* Make sure socket is routed, for
6014 * correct metrics.
6015 */
6016 icsk->icsk_af_ops->rebuild_header(sk);
6017 tcp_init_congestion_control(sk);
6018
6019 tcp_mtup_init(sk);
6020 tcp_init_buffer_space(sk);
6021 tp->copied_seq = tp->rcv_nxt;
6022 }
6023 smp_mb(); 5799 smp_mb();
6024 tcp_set_state(sk, TCP_ESTABLISHED); 5800 tcp_set_state(sk, TCP_ESTABLISHED);
6025 sk->sk_state_change(sk); 5801 sk->sk_state_change(sk);
@@ -6041,27 +5817,23 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
6041 if (tp->rx_opt.tstamp_ok) 5817 if (tp->rx_opt.tstamp_ok)
6042 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 5818 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
6043 5819
6044 if (req) { 5820 /* Make sure socket is routed, for
6045 /* Re-arm the timer because data may 5821 * correct metrics.
6046 * have been sent out. This is similar 5822 */
6047 * to the regular data transmission case 5823 icsk->icsk_af_ops->rebuild_header(sk);
6048 * when new data has just been ack'ed. 5824
6049 * 5825 tcp_init_metrics(sk);
6050 * (TFO) - we could try to be more 5826
6051 * aggressive and retranmitting any data 5827 tcp_init_congestion_control(sk);
6052 * sooner based on when they were sent
6053 * out.
6054 */
6055 tcp_rearm_rto(sk);
6056 } else
6057 tcp_init_metrics(sk);
6058 5828
6059 /* Prevent spurious tcp_cwnd_restart() on 5829 /* Prevent spurious tcp_cwnd_restart() on
6060 * first data packet. 5830 * first data packet.
6061 */ 5831 */
6062 tp->lsndtime = tcp_time_stamp; 5832 tp->lsndtime = tcp_time_stamp;
6063 5833
5834 tcp_mtup_init(sk);
6064 tcp_initialize_rcv_mss(sk); 5835 tcp_initialize_rcv_mss(sk);
5836 tcp_init_buffer_space(sk);
6065 tcp_fast_path_on(tp); 5837 tcp_fast_path_on(tp);
6066 } else { 5838 } else {
6067 return 1; 5839 return 1;
@@ -6069,33 +5841,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
6069 break; 5841 break;
6070 5842
6071 case TCP_FIN_WAIT1: 5843 case TCP_FIN_WAIT1:
6072 /* If we enter the TCP_FIN_WAIT1 state and we are a
6073 * Fast Open socket and this is the first acceptable
6074 * ACK we have received, this would have acknowledged
6075 * our SYNACK so stop the SYNACK timer.
6076 */
6077 if (req != NULL) {
6078 /* Return RST if ack_seq is invalid.
6079 * Note that RFC793 only says to generate a
6080 * DUPACK for it but for TCP Fast Open it seems
6081 * better to treat this case like TCP_SYN_RECV
6082 * above.
6083 */
6084 if (!acceptable)
6085 return 1;
6086 /* We no longer need the request sock. */
6087 reqsk_fastopen_remove(sk, req, false);
6088 tcp_rearm_rto(sk);
6089 }
6090 if (tp->snd_una == tp->write_seq) { 5844 if (tp->snd_una == tp->write_seq) {
6091 struct dst_entry *dst;
6092
6093 tcp_set_state(sk, TCP_FIN_WAIT2); 5845 tcp_set_state(sk, TCP_FIN_WAIT2);
6094 sk->sk_shutdown |= SEND_SHUTDOWN; 5846 sk->sk_shutdown |= SEND_SHUTDOWN;
6095 5847 dst_confirm(__sk_dst_get(sk));
6096 dst = __sk_dst_get(sk);
6097 if (dst)
6098 dst_confirm(dst);
6099 5848
6100 if (!sock_flag(sk, SOCK_DEAD)) 5849 if (!sock_flag(sk, SOCK_DEAD))
6101 /* Wake up lingering close() */ 5850 /* Wake up lingering close() */
@@ -6145,12 +5894,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
6145 } 5894 }
6146 break; 5895 break;
6147 } 5896 }
6148 } 5897 } else
6149 5898 goto discard;
6150 /* ts_recent update must be made after we are sure that the packet
6151 * is in window.
6152 */
6153 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
6154 5899
6155 /* step 6: check the URG bit */ 5900 /* step 6: check the URG bit */
6156 tcp_urg(sk, skb, th); 5901 tcp_urg(sk, skb, th);