diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 204 |
1 files changed, 133 insertions, 71 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 25a89eaa669d..227cba79fa6b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -267,11 +267,31 @@ static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr | |||
267 | * 1. Tuning sk->sk_sndbuf, when connection enters established state. | 267 | * 1. Tuning sk->sk_sndbuf, when connection enters established state. |
268 | */ | 268 | */ |
269 | 269 | ||
270 | static void tcp_fixup_sndbuf(struct sock *sk) | 270 | static void tcp_sndbuf_expand(struct sock *sk) |
271 | { | 271 | { |
272 | int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); | 272 | const struct tcp_sock *tp = tcp_sk(sk); |
273 | int sndmem, per_mss; | ||
274 | u32 nr_segs; | ||
275 | |||
276 | /* Worst case is non GSO/TSO : each frame consumes one skb | ||
277 | * and skb->head is kmalloced using power of two area of memory | ||
278 | */ | ||
279 | per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + | ||
280 | MAX_TCP_HEADER + | ||
281 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | ||
282 | |||
283 | per_mss = roundup_pow_of_two(per_mss) + | ||
284 | SKB_DATA_ALIGN(sizeof(struct sk_buff)); | ||
285 | |||
286 | nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd); | ||
287 | nr_segs = max_t(u32, nr_segs, tp->reordering + 1); | ||
288 | |||
289 | /* Fast Recovery (RFC 5681 3.2) : | ||
290 | * Cubic needs 1.7 factor, rounded to 2 to include | ||
291 | * extra cushion (application might react slowly to POLLOUT) | ||
292 | */ | ||
293 | sndmem = 2 * nr_segs * per_mss; | ||
273 | 294 | ||
274 | sndmem *= TCP_INIT_CWND; | ||
275 | if (sk->sk_sndbuf < sndmem) | 295 | if (sk->sk_sndbuf < sndmem) |
276 | sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); | 296 | sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); |
277 | } | 297 | } |
@@ -355,6 +375,12 @@ static void tcp_fixup_rcvbuf(struct sock *sk) | |||
355 | rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * | 375 | rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * |
356 | tcp_default_init_rwnd(mss); | 376 | tcp_default_init_rwnd(mss); |
357 | 377 | ||
378 | /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency | ||
379 | * Allow enough cushion so that sender is not limited by our window | ||
380 | */ | ||
381 | if (sysctl_tcp_moderate_rcvbuf) | ||
382 | rcvmem <<= 2; | ||
383 | |||
358 | if (sk->sk_rcvbuf < rcvmem) | 384 | if (sk->sk_rcvbuf < rcvmem) |
359 | sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); | 385 | sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); |
360 | } | 386 | } |
@@ -370,9 +396,11 @@ void tcp_init_buffer_space(struct sock *sk) | |||
370 | if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) | 396 | if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) |
371 | tcp_fixup_rcvbuf(sk); | 397 | tcp_fixup_rcvbuf(sk); |
372 | if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) | 398 | if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) |
373 | tcp_fixup_sndbuf(sk); | 399 | tcp_sndbuf_expand(sk); |
374 | 400 | ||
375 | tp->rcvq_space.space = tp->rcv_wnd; | 401 | tp->rcvq_space.space = tp->rcv_wnd; |
402 | tp->rcvq_space.time = tcp_time_stamp; | ||
403 | tp->rcvq_space.seq = tp->copied_seq; | ||
376 | 404 | ||
377 | maxwin = tcp_full_space(sk); | 405 | maxwin = tcp_full_space(sk); |
378 | 406 | ||
@@ -512,48 +540,62 @@ void tcp_rcv_space_adjust(struct sock *sk) | |||
512 | { | 540 | { |
513 | struct tcp_sock *tp = tcp_sk(sk); | 541 | struct tcp_sock *tp = tcp_sk(sk); |
514 | int time; | 542 | int time; |
515 | int space; | 543 | int copied; |
516 | |||
517 | if (tp->rcvq_space.time == 0) | ||
518 | goto new_measure; | ||
519 | 544 | ||
520 | time = tcp_time_stamp - tp->rcvq_space.time; | 545 | time = tcp_time_stamp - tp->rcvq_space.time; |
521 | if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0) | 546 | if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0) |
522 | return; | 547 | return; |
523 | 548 | ||
524 | space = 2 * (tp->copied_seq - tp->rcvq_space.seq); | 549 | /* Number of bytes copied to user in last RTT */ |
550 | copied = tp->copied_seq - tp->rcvq_space.seq; | ||
551 | if (copied <= tp->rcvq_space.space) | ||
552 | goto new_measure; | ||
525 | 553 | ||
526 | space = max(tp->rcvq_space.space, space); | 554 | /* A bit of theory : |
555 | * copied = bytes received in previous RTT, our base window | ||
556 | * To cope with packet losses, we need a 2x factor | ||
557 | * To cope with slow start, and sender growing its cwin by 100 % | ||
558 | * every RTT, we need a 4x factor, because the ACK we are sending | ||
559 | * now is for the next RTT, not the current one : | ||
560 | * <prev RTT . ><current RTT .. ><next RTT .... > | ||
561 | */ | ||
527 | 562 | ||
528 | if (tp->rcvq_space.space != space) { | 563 | if (sysctl_tcp_moderate_rcvbuf && |
529 | int rcvmem; | 564 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { |
565 | int rcvwin, rcvmem, rcvbuf; | ||
530 | 566 | ||
531 | tp->rcvq_space.space = space; | 567 | /* minimal window to cope with packet losses, assuming |
568 | * steady state. Add some cushion because of small variations. | ||
569 | */ | ||
570 | rcvwin = (copied << 1) + 16 * tp->advmss; | ||
532 | 571 | ||
533 | if (sysctl_tcp_moderate_rcvbuf && | 572 | /* If rate increased by 25%, |
534 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { | 573 | * assume slow start, rcvwin = 3 * copied |
535 | int new_clamp = space; | 574 | * If rate increased by 50%, |
575 | * assume sender can use 2x growth, rcvwin = 4 * copied | ||
576 | */ | ||
577 | if (copied >= | ||
578 | tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) { | ||
579 | if (copied >= | ||
580 | tp->rcvq_space.space + (tp->rcvq_space.space >> 1)) | ||
581 | rcvwin <<= 1; | ||
582 | else | ||
583 | rcvwin += (rcvwin >> 1); | ||
584 | } | ||
536 | 585 | ||
537 | /* Receive space grows, normalize in order to | 586 | rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); |
538 | * take into account packet headers and sk_buff | 587 | while (tcp_win_from_space(rcvmem) < tp->advmss) |
539 | * structure overhead. | 588 | rcvmem += 128; |
540 | */ | 589 | |
541 | space /= tp->advmss; | 590 | rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); |
542 | if (!space) | 591 | if (rcvbuf > sk->sk_rcvbuf) { |
543 | space = 1; | 592 | sk->sk_rcvbuf = rcvbuf; |
544 | rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); | 593 | |
545 | while (tcp_win_from_space(rcvmem) < tp->advmss) | 594 | /* Make the window clamp follow along. */ |
546 | rcvmem += 128; | 595 | tp->window_clamp = rcvwin; |
547 | space *= rcvmem; | ||
548 | space = min(space, sysctl_tcp_rmem[2]); | ||
549 | if (space > sk->sk_rcvbuf) { | ||
550 | sk->sk_rcvbuf = space; | ||
551 | |||
552 | /* Make the window clamp follow along. */ | ||
553 | tp->window_clamp = new_clamp; | ||
554 | } | ||
555 | } | 596 | } |
556 | } | 597 | } |
598 | tp->rcvq_space.space = copied; | ||
557 | 599 | ||
558 | new_measure: | 600 | new_measure: |
559 | tp->rcvq_space.seq = tp->copied_seq; | 601 | tp->rcvq_space.seq = tp->copied_seq; |
@@ -629,6 +671,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
629 | { | 671 | { |
630 | struct tcp_sock *tp = tcp_sk(sk); | 672 | struct tcp_sock *tp = tcp_sk(sk); |
631 | long m = mrtt; /* RTT */ | 673 | long m = mrtt; /* RTT */ |
674 | u32 srtt = tp->srtt; | ||
632 | 675 | ||
633 | /* The following amusing code comes from Jacobson's | 676 | /* The following amusing code comes from Jacobson's |
634 | * article in SIGCOMM '88. Note that rtt and mdev | 677 | * article in SIGCOMM '88. Note that rtt and mdev |
@@ -646,11 +689,9 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
646 | * does not matter how to _calculate_ it. Seems, it was trap | 689 | * does not matter how to _calculate_ it. Seems, it was trap |
647 | * that VJ failed to avoid. 8) | 690 | * that VJ failed to avoid. 8) |
648 | */ | 691 | */ |
649 | if (m == 0) | 692 | if (srtt != 0) { |
650 | m = 1; | 693 | m -= (srtt >> 3); /* m is now error in rtt est */ |
651 | if (tp->srtt != 0) { | 694 | srtt += m; /* rtt = 7/8 rtt + 1/8 new */ |
652 | m -= (tp->srtt >> 3); /* m is now error in rtt est */ | ||
653 | tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */ | ||
654 | if (m < 0) { | 695 | if (m < 0) { |
655 | m = -m; /* m is now abs(error) */ | 696 | m = -m; /* m is now abs(error) */ |
656 | m -= (tp->mdev >> 2); /* similar update on mdev */ | 697 | m -= (tp->mdev >> 2); /* similar update on mdev */ |
@@ -681,11 +722,12 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
681 | } | 722 | } |
682 | } else { | 723 | } else { |
683 | /* no previous measure. */ | 724 | /* no previous measure. */ |
684 | tp->srtt = m << 3; /* take the measured time to be rtt */ | 725 | srtt = m << 3; /* take the measured time to be rtt */ |
685 | tp->mdev = m << 1; /* make sure rto = 3*rtt */ | 726 | tp->mdev = m << 1; /* make sure rto = 3*rtt */ |
686 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | 727 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); |
687 | tp->rtt_seq = tp->snd_nxt; | 728 | tp->rtt_seq = tp->snd_nxt; |
688 | } | 729 | } |
730 | tp->srtt = max(1U, srtt); | ||
689 | } | 731 | } |
690 | 732 | ||
691 | /* Set the sk_pacing_rate to allow proper sizing of TSO packets. | 733 | /* Set the sk_pacing_rate to allow proper sizing of TSO packets. |
@@ -704,8 +746,10 @@ static void tcp_update_pacing_rate(struct sock *sk) | |||
704 | 746 | ||
705 | rate *= max(tp->snd_cwnd, tp->packets_out); | 747 | rate *= max(tp->snd_cwnd, tp->packets_out); |
706 | 748 | ||
707 | /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), | 749 | /* Correction for small srtt and scheduling constraints. |
708 | * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) | 750 | * For small rtt, consider noise is too high, and use |
751 | * the minimal value (srtt = 1 -> 125 us for HZ=1000) | ||
752 | * | ||
709 | * We probably need usec resolution in the future. | 753 | * We probably need usec resolution in the future. |
710 | * Note: This also takes care of possible srtt=0 case, | 754 | * Note: This also takes care of possible srtt=0 case, |
711 | * when tcp_rtt_estimator() was not yet called. | 755 | * when tcp_rtt_estimator() was not yet called. |
@@ -713,13 +757,18 @@ static void tcp_update_pacing_rate(struct sock *sk) | |||
713 | if (tp->srtt > 8 + 2) | 757 | if (tp->srtt > 8 + 2) |
714 | do_div(rate, tp->srtt); | 758 | do_div(rate, tp->srtt); |
715 | 759 | ||
716 | sk->sk_pacing_rate = min_t(u64, rate, ~0U); | 760 | /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate |
761 | * without any lock. We want to make sure compiler wont store | ||
762 | * intermediate values in this location. | ||
763 | */ | ||
764 | ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate, | ||
765 | sk->sk_max_pacing_rate); | ||
717 | } | 766 | } |
718 | 767 | ||
719 | /* Calculate rto without backoff. This is the second half of Van Jacobson's | 768 | /* Calculate rto without backoff. This is the second half of Van Jacobson's |
720 | * routine referred to above. | 769 | * routine referred to above. |
721 | */ | 770 | */ |
722 | void tcp_set_rto(struct sock *sk) | 771 | static void tcp_set_rto(struct sock *sk) |
723 | { | 772 | { |
724 | const struct tcp_sock *tp = tcp_sk(sk); | 773 | const struct tcp_sock *tp = tcp_sk(sk); |
725 | /* Old crap is replaced with new one. 8) | 774 | /* Old crap is replaced with new one. 8) |
@@ -1284,7 +1333,10 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1284 | tp->lost_cnt_hint -= tcp_skb_pcount(prev); | 1333 | tp->lost_cnt_hint -= tcp_skb_pcount(prev); |
1285 | } | 1334 | } |
1286 | 1335 | ||
1287 | TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; | 1336 | TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; |
1337 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) | ||
1338 | TCP_SKB_CB(prev)->end_seq++; | ||
1339 | |||
1288 | if (skb == tcp_highest_sack(sk)) | 1340 | if (skb == tcp_highest_sack(sk)) |
1289 | tcp_advance_highest_sack(sk, skb); | 1341 | tcp_advance_highest_sack(sk, skb); |
1290 | 1342 | ||
@@ -2853,7 +2905,8 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, | |||
2853 | * left edge of the send window. | 2905 | * left edge of the send window. |
2854 | * See draft-ietf-tcplw-high-performance-00, section 3.3. | 2906 | * See draft-ietf-tcplw-high-performance-00, section 3.3. |
2855 | */ | 2907 | */ |
2856 | if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) | 2908 | if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && |
2909 | flag & FLAG_ACKED) | ||
2857 | seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; | 2910 | seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; |
2858 | 2911 | ||
2859 | if (seq_rtt < 0) | 2912 | if (seq_rtt < 0) |
@@ -2868,20 +2921,25 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, | |||
2868 | } | 2921 | } |
2869 | 2922 | ||
2870 | /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ | 2923 | /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ |
2871 | static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req) | 2924 | static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) |
2872 | { | 2925 | { |
2873 | struct tcp_sock *tp = tcp_sk(sk); | 2926 | struct tcp_sock *tp = tcp_sk(sk); |
2874 | s32 seq_rtt = -1; | 2927 | s32 seq_rtt = -1; |
2875 | 2928 | ||
2876 | if (tp->lsndtime && !tp->total_retrans) | 2929 | if (synack_stamp && !tp->total_retrans) |
2877 | seq_rtt = tcp_time_stamp - tp->lsndtime; | 2930 | seq_rtt = tcp_time_stamp - synack_stamp; |
2878 | tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); | 2931 | |
2932 | /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets | ||
2933 | * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() | ||
2934 | */ | ||
2935 | if (!tp->srtt) | ||
2936 | tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); | ||
2879 | } | 2937 | } |
2880 | 2938 | ||
2881 | static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | 2939 | static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) |
2882 | { | 2940 | { |
2883 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2941 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2884 | icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight); | 2942 | icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight); |
2885 | tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; | 2943 | tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; |
2886 | } | 2944 | } |
2887 | 2945 | ||
@@ -2970,7 +3028,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
2970 | const struct inet_connection_sock *icsk = inet_csk(sk); | 3028 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2971 | struct sk_buff *skb; | 3029 | struct sk_buff *skb; |
2972 | u32 now = tcp_time_stamp; | 3030 | u32 now = tcp_time_stamp; |
2973 | int fully_acked = true; | 3031 | bool fully_acked = true; |
2974 | int flag = 0; | 3032 | int flag = 0; |
2975 | u32 pkts_acked = 0; | 3033 | u32 pkts_acked = 0; |
2976 | u32 reord = tp->packets_out; | 3034 | u32 reord = tp->packets_out; |
@@ -2978,6 +3036,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
2978 | s32 seq_rtt = -1; | 3036 | s32 seq_rtt = -1; |
2979 | s32 ca_seq_rtt = -1; | 3037 | s32 ca_seq_rtt = -1; |
2980 | ktime_t last_ackt = net_invalid_timestamp(); | 3038 | ktime_t last_ackt = net_invalid_timestamp(); |
3039 | bool rtt_update; | ||
2981 | 3040 | ||
2982 | while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { | 3041 | while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { |
2983 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); | 3042 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); |
@@ -3054,14 +3113,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3054 | if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) | 3113 | if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) |
3055 | flag |= FLAG_SACK_RENEGING; | 3114 | flag |= FLAG_SACK_RENEGING; |
3056 | 3115 | ||
3057 | if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) || | 3116 | rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt); |
3058 | (flag & FLAG_ACKED)) | ||
3059 | tcp_rearm_rto(sk); | ||
3060 | 3117 | ||
3061 | if (flag & FLAG_ACKED) { | 3118 | if (flag & FLAG_ACKED) { |
3062 | const struct tcp_congestion_ops *ca_ops | 3119 | const struct tcp_congestion_ops *ca_ops |
3063 | = inet_csk(sk)->icsk_ca_ops; | 3120 | = inet_csk(sk)->icsk_ca_ops; |
3064 | 3121 | ||
3122 | tcp_rearm_rto(sk); | ||
3065 | if (unlikely(icsk->icsk_mtup.probe_size && | 3123 | if (unlikely(icsk->icsk_mtup.probe_size && |
3066 | !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { | 3124 | !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { |
3067 | tcp_mtup_probe_success(sk); | 3125 | tcp_mtup_probe_success(sk); |
@@ -3100,6 +3158,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3100 | 3158 | ||
3101 | ca_ops->pkts_acked(sk, pkts_acked, rtt_us); | 3159 | ca_ops->pkts_acked(sk, pkts_acked, rtt_us); |
3102 | } | 3160 | } |
3161 | } else if (skb && rtt_update && sack_rtt >= 0 && | ||
3162 | sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) { | ||
3163 | /* Do not re-arm RTO if the sack RTT is measured from data sent | ||
3164 | * after when the head was last (re)transmitted. Otherwise the | ||
3165 | * timeout may continue to extend in loss recovery. | ||
3166 | */ | ||
3167 | tcp_rearm_rto(sk); | ||
3103 | } | 3168 | } |
3104 | 3169 | ||
3105 | #if FASTRETRANS_DEBUG > 0 | 3170 | #if FASTRETRANS_DEBUG > 0 |
@@ -3288,7 +3353,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) | |||
3288 | tcp_init_cwnd_reduction(sk, true); | 3353 | tcp_init_cwnd_reduction(sk, true); |
3289 | tcp_set_ca_state(sk, TCP_CA_CWR); | 3354 | tcp_set_ca_state(sk, TCP_CA_CWR); |
3290 | tcp_end_cwnd_reduction(sk); | 3355 | tcp_end_cwnd_reduction(sk); |
3291 | tcp_set_ca_state(sk, TCP_CA_Open); | 3356 | tcp_try_keep_open(sk); |
3292 | NET_INC_STATS_BH(sock_net(sk), | 3357 | NET_INC_STATS_BH(sock_net(sk), |
3293 | LINUX_MIB_TCPLOSSPROBERECOVERY); | 3358 | LINUX_MIB_TCPLOSSPROBERECOVERY); |
3294 | } | 3359 | } |
@@ -3391,7 +3456,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3391 | 3456 | ||
3392 | /* Advance cwnd if state allows */ | 3457 | /* Advance cwnd if state allows */ |
3393 | if (tcp_may_raise_cwnd(sk, flag)) | 3458 | if (tcp_may_raise_cwnd(sk, flag)) |
3394 | tcp_cong_avoid(sk, ack, prior_in_flight); | 3459 | tcp_cong_avoid(sk, ack, acked, prior_in_flight); |
3395 | 3460 | ||
3396 | if (tcp_ack_is_dubious(sk, flag)) { | 3461 | if (tcp_ack_is_dubious(sk, flag)) { |
3397 | is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); | 3462 | is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); |
@@ -3623,7 +3688,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) | |||
3623 | int opcode = *ptr++; | 3688 | int opcode = *ptr++; |
3624 | int opsize; | 3689 | int opsize; |
3625 | 3690 | ||
3626 | switch(opcode) { | 3691 | switch (opcode) { |
3627 | case TCPOPT_EOL: | 3692 | case TCPOPT_EOL: |
3628 | return NULL; | 3693 | return NULL; |
3629 | case TCPOPT_NOP: | 3694 | case TCPOPT_NOP: |
@@ -3983,7 +4048,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) | |||
3983 | WARN_ON(before(tp->rcv_nxt, sp->end_seq)); | 4048 | WARN_ON(before(tp->rcv_nxt, sp->end_seq)); |
3984 | 4049 | ||
3985 | /* Zap this SACK, by moving forward any other SACKS. */ | 4050 | /* Zap this SACK, by moving forward any other SACKS. */ |
3986 | for (i=this_sack+1; i < num_sacks; i++) | 4051 | for (i = this_sack+1; i < num_sacks; i++) |
3987 | tp->selective_acks[i-1] = tp->selective_acks[i]; | 4052 | tp->selective_acks[i-1] = tp->selective_acks[i]; |
3988 | num_sacks--; | 4053 | num_sacks--; |
3989 | continue; | 4054 | continue; |
@@ -4701,15 +4766,7 @@ static void tcp_new_space(struct sock *sk) | |||
4701 | struct tcp_sock *tp = tcp_sk(sk); | 4766 | struct tcp_sock *tp = tcp_sk(sk); |
4702 | 4767 | ||
4703 | if (tcp_should_expand_sndbuf(sk)) { | 4768 | if (tcp_should_expand_sndbuf(sk)) { |
4704 | int sndmem = SKB_TRUESIZE(max_t(u32, | 4769 | tcp_sndbuf_expand(sk); |
4705 | tp->rx_opt.mss_clamp, | ||
4706 | tp->mss_cache) + | ||
4707 | MAX_TCP_HEADER); | ||
4708 | int demanded = max_t(unsigned int, tp->snd_cwnd, | ||
4709 | tp->reordering + 1); | ||
4710 | sndmem *= 2 * demanded; | ||
4711 | if (sndmem > sk->sk_sndbuf) | ||
4712 | sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); | ||
4713 | tp->snd_cwnd_stamp = tcp_time_stamp; | 4770 | tp->snd_cwnd_stamp = tcp_time_stamp; |
4714 | } | 4771 | } |
4715 | 4772 | ||
@@ -5584,6 +5641,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5584 | struct request_sock *req; | 5641 | struct request_sock *req; |
5585 | int queued = 0; | 5642 | int queued = 0; |
5586 | bool acceptable; | 5643 | bool acceptable; |
5644 | u32 synack_stamp; | ||
5587 | 5645 | ||
5588 | tp->rx_opt.saw_tstamp = 0; | 5646 | tp->rx_opt.saw_tstamp = 0; |
5589 | 5647 | ||
@@ -5666,16 +5724,18 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5666 | * so release it. | 5724 | * so release it. |
5667 | */ | 5725 | */ |
5668 | if (req) { | 5726 | if (req) { |
5727 | synack_stamp = tcp_rsk(req)->snt_synack; | ||
5669 | tp->total_retrans = req->num_retrans; | 5728 | tp->total_retrans = req->num_retrans; |
5670 | reqsk_fastopen_remove(sk, req, false); | 5729 | reqsk_fastopen_remove(sk, req, false); |
5671 | } else { | 5730 | } else { |
5731 | synack_stamp = tp->lsndtime; | ||
5672 | /* Make sure socket is routed, for correct metrics. */ | 5732 | /* Make sure socket is routed, for correct metrics. */ |
5673 | icsk->icsk_af_ops->rebuild_header(sk); | 5733 | icsk->icsk_af_ops->rebuild_header(sk); |
5674 | tcp_init_congestion_control(sk); | 5734 | tcp_init_congestion_control(sk); |
5675 | 5735 | ||
5676 | tcp_mtup_init(sk); | 5736 | tcp_mtup_init(sk); |
5677 | tcp_init_buffer_space(sk); | ||
5678 | tp->copied_seq = tp->rcv_nxt; | 5737 | tp->copied_seq = tp->rcv_nxt; |
5738 | tcp_init_buffer_space(sk); | ||
5679 | } | 5739 | } |
5680 | smp_mb(); | 5740 | smp_mb(); |
5681 | tcp_set_state(sk, TCP_ESTABLISHED); | 5741 | tcp_set_state(sk, TCP_ESTABLISHED); |
@@ -5691,7 +5751,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5691 | tp->snd_una = TCP_SKB_CB(skb)->ack_seq; | 5751 | tp->snd_una = TCP_SKB_CB(skb)->ack_seq; |
5692 | tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; | 5752 | tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; |
5693 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); | 5753 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); |
5694 | tcp_synack_rtt_meas(sk, req); | 5754 | tcp_synack_rtt_meas(sk, synack_stamp); |
5695 | 5755 | ||
5696 | if (tp->rx_opt.tstamp_ok) | 5756 | if (tp->rx_opt.tstamp_ok) |
5697 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 5757 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
@@ -5709,6 +5769,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5709 | } else | 5769 | } else |
5710 | tcp_init_metrics(sk); | 5770 | tcp_init_metrics(sk); |
5711 | 5771 | ||
5772 | tcp_update_pacing_rate(sk); | ||
5773 | |||
5712 | /* Prevent spurious tcp_cwnd_restart() on first data packet */ | 5774 | /* Prevent spurious tcp_cwnd_restart() on first data packet */ |
5713 | tp->lsndtime = tcp_time_stamp; | 5775 | tp->lsndtime = tcp_time_stamp; |
5714 | 5776 | ||