aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c204
1 files changed, 133 insertions, 71 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 25a89eaa669d..227cba79fa6b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -267,11 +267,31 @@ static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr
267 * 1. Tuning sk->sk_sndbuf, when connection enters established state. 267 * 1. Tuning sk->sk_sndbuf, when connection enters established state.
268 */ 268 */
269 269
270static void tcp_fixup_sndbuf(struct sock *sk) 270static void tcp_sndbuf_expand(struct sock *sk)
271{ 271{
272 int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); 272 const struct tcp_sock *tp = tcp_sk(sk);
273 int sndmem, per_mss;
274 u32 nr_segs;
275
276 /* Worst case is non GSO/TSO : each frame consumes one skb
277 * and skb->head is kmalloced using power of two area of memory
278 */
279 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
280 MAX_TCP_HEADER +
281 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
282
283 per_mss = roundup_pow_of_two(per_mss) +
284 SKB_DATA_ALIGN(sizeof(struct sk_buff));
285
286 nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
287 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
288
289 /* Fast Recovery (RFC 5681 3.2) :
290 * Cubic needs 1.7 factor, rounded to 2 to include
291 * extra cushion (application might react slowly to POLLOUT)
292 */
293 sndmem = 2 * nr_segs * per_mss;
273 294
274 sndmem *= TCP_INIT_CWND;
275 if (sk->sk_sndbuf < sndmem) 295 if (sk->sk_sndbuf < sndmem)
276 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); 296 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
277} 297}
@@ -355,6 +375,12 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
355 rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * 375 rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
356 tcp_default_init_rwnd(mss); 376 tcp_default_init_rwnd(mss);
357 377
378 /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
379 * Allow enough cushion so that sender is not limited by our window
380 */
381 if (sysctl_tcp_moderate_rcvbuf)
382 rcvmem <<= 2;
383
358 if (sk->sk_rcvbuf < rcvmem) 384 if (sk->sk_rcvbuf < rcvmem)
359 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); 385 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
360} 386}
@@ -370,9 +396,11 @@ void tcp_init_buffer_space(struct sock *sk)
370 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) 396 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
371 tcp_fixup_rcvbuf(sk); 397 tcp_fixup_rcvbuf(sk);
372 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) 398 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
373 tcp_fixup_sndbuf(sk); 399 tcp_sndbuf_expand(sk);
374 400
375 tp->rcvq_space.space = tp->rcv_wnd; 401 tp->rcvq_space.space = tp->rcv_wnd;
402 tp->rcvq_space.time = tcp_time_stamp;
403 tp->rcvq_space.seq = tp->copied_seq;
376 404
377 maxwin = tcp_full_space(sk); 405 maxwin = tcp_full_space(sk);
378 406
@@ -512,48 +540,62 @@ void tcp_rcv_space_adjust(struct sock *sk)
512{ 540{
513 struct tcp_sock *tp = tcp_sk(sk); 541 struct tcp_sock *tp = tcp_sk(sk);
514 int time; 542 int time;
515 int space; 543 int copied;
516
517 if (tp->rcvq_space.time == 0)
518 goto new_measure;
519 544
520 time = tcp_time_stamp - tp->rcvq_space.time; 545 time = tcp_time_stamp - tp->rcvq_space.time;
521 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0) 546 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
522 return; 547 return;
523 548
524 space = 2 * (tp->copied_seq - tp->rcvq_space.seq); 549 /* Number of bytes copied to user in last RTT */
550 copied = tp->copied_seq - tp->rcvq_space.seq;
551 if (copied <= tp->rcvq_space.space)
552 goto new_measure;
525 553
526 space = max(tp->rcvq_space.space, space); 554 /* A bit of theory :
555 * copied = bytes received in previous RTT, our base window
556 * To cope with packet losses, we need a 2x factor
557 * To cope with slow start, and sender growing its cwin by 100 %
558 * every RTT, we need a 4x factor, because the ACK we are sending
559 * now is for the next RTT, not the current one :
560 * <prev RTT . ><current RTT .. ><next RTT .... >
561 */
527 562
528 if (tp->rcvq_space.space != space) { 563 if (sysctl_tcp_moderate_rcvbuf &&
529 int rcvmem; 564 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
565 int rcvwin, rcvmem, rcvbuf;
530 566
531 tp->rcvq_space.space = space; 567 /* minimal window to cope with packet losses, assuming
568 * steady state. Add some cushion because of small variations.
569 */
570 rcvwin = (copied << 1) + 16 * tp->advmss;
532 571
533 if (sysctl_tcp_moderate_rcvbuf && 572 /* If rate increased by 25%,
534 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { 573 * assume slow start, rcvwin = 3 * copied
535 int new_clamp = space; 574 * If rate increased by 50%,
575 * assume sender can use 2x growth, rcvwin = 4 * copied
576 */
577 if (copied >=
578 tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) {
579 if (copied >=
580 tp->rcvq_space.space + (tp->rcvq_space.space >> 1))
581 rcvwin <<= 1;
582 else
583 rcvwin += (rcvwin >> 1);
584 }
536 585
537 /* Receive space grows, normalize in order to 586 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
538 * take into account packet headers and sk_buff 587 while (tcp_win_from_space(rcvmem) < tp->advmss)
539 * structure overhead. 588 rcvmem += 128;
540 */ 589
541 space /= tp->advmss; 590 rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
542 if (!space) 591 if (rcvbuf > sk->sk_rcvbuf) {
543 space = 1; 592 sk->sk_rcvbuf = rcvbuf;
544 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); 593
545 while (tcp_win_from_space(rcvmem) < tp->advmss) 594 /* Make the window clamp follow along. */
546 rcvmem += 128; 595 tp->window_clamp = rcvwin;
547 space *= rcvmem;
548 space = min(space, sysctl_tcp_rmem[2]);
549 if (space > sk->sk_rcvbuf) {
550 sk->sk_rcvbuf = space;
551
552 /* Make the window clamp follow along. */
553 tp->window_clamp = new_clamp;
554 }
555 } 596 }
556 } 597 }
598 tp->rcvq_space.space = copied;
557 599
558new_measure: 600new_measure:
559 tp->rcvq_space.seq = tp->copied_seq; 601 tp->rcvq_space.seq = tp->copied_seq;
@@ -629,6 +671,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
629{ 671{
630 struct tcp_sock *tp = tcp_sk(sk); 672 struct tcp_sock *tp = tcp_sk(sk);
631 long m = mrtt; /* RTT */ 673 long m = mrtt; /* RTT */
674 u32 srtt = tp->srtt;
632 675
633 /* The following amusing code comes from Jacobson's 676 /* The following amusing code comes from Jacobson's
634 * article in SIGCOMM '88. Note that rtt and mdev 677 * article in SIGCOMM '88. Note that rtt and mdev
@@ -646,11 +689,9 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
646 * does not matter how to _calculate_ it. Seems, it was trap 689 * does not matter how to _calculate_ it. Seems, it was trap
647 * that VJ failed to avoid. 8) 690 * that VJ failed to avoid. 8)
648 */ 691 */
649 if (m == 0) 692 if (srtt != 0) {
650 m = 1; 693 m -= (srtt >> 3); /* m is now error in rtt est */
651 if (tp->srtt != 0) { 694 srtt += m; /* rtt = 7/8 rtt + 1/8 new */
652 m -= (tp->srtt >> 3); /* m is now error in rtt est */
653 tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */
654 if (m < 0) { 695 if (m < 0) {
655 m = -m; /* m is now abs(error) */ 696 m = -m; /* m is now abs(error) */
656 m -= (tp->mdev >> 2); /* similar update on mdev */ 697 m -= (tp->mdev >> 2); /* similar update on mdev */
@@ -681,11 +722,12 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
681 } 722 }
682 } else { 723 } else {
683 /* no previous measure. */ 724 /* no previous measure. */
684 tp->srtt = m << 3; /* take the measured time to be rtt */ 725 srtt = m << 3; /* take the measured time to be rtt */
685 tp->mdev = m << 1; /* make sure rto = 3*rtt */ 726 tp->mdev = m << 1; /* make sure rto = 3*rtt */
686 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); 727 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
687 tp->rtt_seq = tp->snd_nxt; 728 tp->rtt_seq = tp->snd_nxt;
688 } 729 }
730 tp->srtt = max(1U, srtt);
689} 731}
690 732
691/* Set the sk_pacing_rate to allow proper sizing of TSO packets. 733/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
@@ -704,8 +746,10 @@ static void tcp_update_pacing_rate(struct sock *sk)
704 746
705 rate *= max(tp->snd_cwnd, tp->packets_out); 747 rate *= max(tp->snd_cwnd, tp->packets_out);
706 748
707 /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), 749 /* Correction for small srtt and scheduling constraints.
708 * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) 750 * For small rtt, consider noise is too high, and use
751 * the minimal value (srtt = 1 -> 125 us for HZ=1000)
752 *
709 * We probably need usec resolution in the future. 753 * We probably need usec resolution in the future.
710 * Note: This also takes care of possible srtt=0 case, 754 * Note: This also takes care of possible srtt=0 case,
711 * when tcp_rtt_estimator() was not yet called. 755 * when tcp_rtt_estimator() was not yet called.
@@ -713,13 +757,18 @@ static void tcp_update_pacing_rate(struct sock *sk)
713 if (tp->srtt > 8 + 2) 757 if (tp->srtt > 8 + 2)
714 do_div(rate, tp->srtt); 758 do_div(rate, tp->srtt);
715 759
716 sk->sk_pacing_rate = min_t(u64, rate, ~0U); 760 /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
761 * without any lock. We want to make sure compiler wont store
762 * intermediate values in this location.
763 */
764 ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate,
765 sk->sk_max_pacing_rate);
717} 766}
718 767
719/* Calculate rto without backoff. This is the second half of Van Jacobson's 768/* Calculate rto without backoff. This is the second half of Van Jacobson's
720 * routine referred to above. 769 * routine referred to above.
721 */ 770 */
722void tcp_set_rto(struct sock *sk) 771static void tcp_set_rto(struct sock *sk)
723{ 772{
724 const struct tcp_sock *tp = tcp_sk(sk); 773 const struct tcp_sock *tp = tcp_sk(sk);
725 /* Old crap is replaced with new one. 8) 774 /* Old crap is replaced with new one. 8)
@@ -1284,7 +1333,10 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1284 tp->lost_cnt_hint -= tcp_skb_pcount(prev); 1333 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1285 } 1334 }
1286 1335
1287 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; 1336 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1337 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1338 TCP_SKB_CB(prev)->end_seq++;
1339
1288 if (skb == tcp_highest_sack(sk)) 1340 if (skb == tcp_highest_sack(sk))
1289 tcp_advance_highest_sack(sk, skb); 1341 tcp_advance_highest_sack(sk, skb);
1290 1342
@@ -2853,7 +2905,8 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2853 * left edge of the send window. 2905 * left edge of the send window.
2854 * See draft-ietf-tcplw-high-performance-00, section 3.3. 2906 * See draft-ietf-tcplw-high-performance-00, section 3.3.
2855 */ 2907 */
2856 if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) 2908 if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2909 flag & FLAG_ACKED)
2857 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; 2910 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
2858 2911
2859 if (seq_rtt < 0) 2912 if (seq_rtt < 0)
@@ -2868,20 +2921,25 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2868} 2921}
2869 2922
2870/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ 2923/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
2871static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req) 2924static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
2872{ 2925{
2873 struct tcp_sock *tp = tcp_sk(sk); 2926 struct tcp_sock *tp = tcp_sk(sk);
2874 s32 seq_rtt = -1; 2927 s32 seq_rtt = -1;
2875 2928
2876 if (tp->lsndtime && !tp->total_retrans) 2929 if (synack_stamp && !tp->total_retrans)
2877 seq_rtt = tcp_time_stamp - tp->lsndtime; 2930 seq_rtt = tcp_time_stamp - synack_stamp;
2878 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); 2931
2932 /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets
2933 * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack()
2934 */
2935 if (!tp->srtt)
2936 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
2879} 2937}
2880 2938
2881static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) 2939static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
2882{ 2940{
2883 const struct inet_connection_sock *icsk = inet_csk(sk); 2941 const struct inet_connection_sock *icsk = inet_csk(sk);
2884 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight); 2942 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight);
2885 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; 2943 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
2886} 2944}
2887 2945
@@ -2970,7 +3028,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
2970 const struct inet_connection_sock *icsk = inet_csk(sk); 3028 const struct inet_connection_sock *icsk = inet_csk(sk);
2971 struct sk_buff *skb; 3029 struct sk_buff *skb;
2972 u32 now = tcp_time_stamp; 3030 u32 now = tcp_time_stamp;
2973 int fully_acked = true; 3031 bool fully_acked = true;
2974 int flag = 0; 3032 int flag = 0;
2975 u32 pkts_acked = 0; 3033 u32 pkts_acked = 0;
2976 u32 reord = tp->packets_out; 3034 u32 reord = tp->packets_out;
@@ -2978,6 +3036,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
2978 s32 seq_rtt = -1; 3036 s32 seq_rtt = -1;
2979 s32 ca_seq_rtt = -1; 3037 s32 ca_seq_rtt = -1;
2980 ktime_t last_ackt = net_invalid_timestamp(); 3038 ktime_t last_ackt = net_invalid_timestamp();
3039 bool rtt_update;
2981 3040
2982 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { 3041 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
2983 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 3042 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
@@ -3054,14 +3113,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3054 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 3113 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3055 flag |= FLAG_SACK_RENEGING; 3114 flag |= FLAG_SACK_RENEGING;
3056 3115
3057 if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) || 3116 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt);
3058 (flag & FLAG_ACKED))
3059 tcp_rearm_rto(sk);
3060 3117
3061 if (flag & FLAG_ACKED) { 3118 if (flag & FLAG_ACKED) {
3062 const struct tcp_congestion_ops *ca_ops 3119 const struct tcp_congestion_ops *ca_ops
3063 = inet_csk(sk)->icsk_ca_ops; 3120 = inet_csk(sk)->icsk_ca_ops;
3064 3121
3122 tcp_rearm_rto(sk);
3065 if (unlikely(icsk->icsk_mtup.probe_size && 3123 if (unlikely(icsk->icsk_mtup.probe_size &&
3066 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { 3124 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3067 tcp_mtup_probe_success(sk); 3125 tcp_mtup_probe_success(sk);
@@ -3100,6 +3158,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3100 3158
3101 ca_ops->pkts_acked(sk, pkts_acked, rtt_us); 3159 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3102 } 3160 }
3161 } else if (skb && rtt_update && sack_rtt >= 0 &&
3162 sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) {
3163 /* Do not re-arm RTO if the sack RTT is measured from data sent
3164 * after when the head was last (re)transmitted. Otherwise the
3165 * timeout may continue to extend in loss recovery.
3166 */
3167 tcp_rearm_rto(sk);
3103 } 3168 }
3104 3169
3105#if FASTRETRANS_DEBUG > 0 3170#if FASTRETRANS_DEBUG > 0
@@ -3288,7 +3353,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3288 tcp_init_cwnd_reduction(sk, true); 3353 tcp_init_cwnd_reduction(sk, true);
3289 tcp_set_ca_state(sk, TCP_CA_CWR); 3354 tcp_set_ca_state(sk, TCP_CA_CWR);
3290 tcp_end_cwnd_reduction(sk); 3355 tcp_end_cwnd_reduction(sk);
3291 tcp_set_ca_state(sk, TCP_CA_Open); 3356 tcp_try_keep_open(sk);
3292 NET_INC_STATS_BH(sock_net(sk), 3357 NET_INC_STATS_BH(sock_net(sk),
3293 LINUX_MIB_TCPLOSSPROBERECOVERY); 3358 LINUX_MIB_TCPLOSSPROBERECOVERY);
3294 } 3359 }
@@ -3391,7 +3456,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3391 3456
3392 /* Advance cwnd if state allows */ 3457 /* Advance cwnd if state allows */
3393 if (tcp_may_raise_cwnd(sk, flag)) 3458 if (tcp_may_raise_cwnd(sk, flag))
3394 tcp_cong_avoid(sk, ack, prior_in_flight); 3459 tcp_cong_avoid(sk, ack, acked, prior_in_flight);
3395 3460
3396 if (tcp_ack_is_dubious(sk, flag)) { 3461 if (tcp_ack_is_dubious(sk, flag)) {
3397 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3462 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
@@ -3623,7 +3688,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3623 int opcode = *ptr++; 3688 int opcode = *ptr++;
3624 int opsize; 3689 int opsize;
3625 3690
3626 switch(opcode) { 3691 switch (opcode) {
3627 case TCPOPT_EOL: 3692 case TCPOPT_EOL:
3628 return NULL; 3693 return NULL;
3629 case TCPOPT_NOP: 3694 case TCPOPT_NOP:
@@ -3983,7 +4048,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
3983 WARN_ON(before(tp->rcv_nxt, sp->end_seq)); 4048 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
3984 4049
3985 /* Zap this SACK, by moving forward any other SACKS. */ 4050 /* Zap this SACK, by moving forward any other SACKS. */
3986 for (i=this_sack+1; i < num_sacks; i++) 4051 for (i = this_sack+1; i < num_sacks; i++)
3987 tp->selective_acks[i-1] = tp->selective_acks[i]; 4052 tp->selective_acks[i-1] = tp->selective_acks[i];
3988 num_sacks--; 4053 num_sacks--;
3989 continue; 4054 continue;
@@ -4701,15 +4766,7 @@ static void tcp_new_space(struct sock *sk)
4701 struct tcp_sock *tp = tcp_sk(sk); 4766 struct tcp_sock *tp = tcp_sk(sk);
4702 4767
4703 if (tcp_should_expand_sndbuf(sk)) { 4768 if (tcp_should_expand_sndbuf(sk)) {
4704 int sndmem = SKB_TRUESIZE(max_t(u32, 4769 tcp_sndbuf_expand(sk);
4705 tp->rx_opt.mss_clamp,
4706 tp->mss_cache) +
4707 MAX_TCP_HEADER);
4708 int demanded = max_t(unsigned int, tp->snd_cwnd,
4709 tp->reordering + 1);
4710 sndmem *= 2 * demanded;
4711 if (sndmem > sk->sk_sndbuf)
4712 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
4713 tp->snd_cwnd_stamp = tcp_time_stamp; 4770 tp->snd_cwnd_stamp = tcp_time_stamp;
4714 } 4771 }
4715 4772
@@ -5584,6 +5641,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5584 struct request_sock *req; 5641 struct request_sock *req;
5585 int queued = 0; 5642 int queued = 0;
5586 bool acceptable; 5643 bool acceptable;
5644 u32 synack_stamp;
5587 5645
5588 tp->rx_opt.saw_tstamp = 0; 5646 tp->rx_opt.saw_tstamp = 0;
5589 5647
@@ -5666,16 +5724,18 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5666 * so release it. 5724 * so release it.
5667 */ 5725 */
5668 if (req) { 5726 if (req) {
5727 synack_stamp = tcp_rsk(req)->snt_synack;
5669 tp->total_retrans = req->num_retrans; 5728 tp->total_retrans = req->num_retrans;
5670 reqsk_fastopen_remove(sk, req, false); 5729 reqsk_fastopen_remove(sk, req, false);
5671 } else { 5730 } else {
5731 synack_stamp = tp->lsndtime;
5672 /* Make sure socket is routed, for correct metrics. */ 5732 /* Make sure socket is routed, for correct metrics. */
5673 icsk->icsk_af_ops->rebuild_header(sk); 5733 icsk->icsk_af_ops->rebuild_header(sk);
5674 tcp_init_congestion_control(sk); 5734 tcp_init_congestion_control(sk);
5675 5735
5676 tcp_mtup_init(sk); 5736 tcp_mtup_init(sk);
5677 tcp_init_buffer_space(sk);
5678 tp->copied_seq = tp->rcv_nxt; 5737 tp->copied_seq = tp->rcv_nxt;
5738 tcp_init_buffer_space(sk);
5679 } 5739 }
5680 smp_mb(); 5740 smp_mb();
5681 tcp_set_state(sk, TCP_ESTABLISHED); 5741 tcp_set_state(sk, TCP_ESTABLISHED);
@@ -5691,7 +5751,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5691 tp->snd_una = TCP_SKB_CB(skb)->ack_seq; 5751 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5692 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; 5752 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
5693 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); 5753 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5694 tcp_synack_rtt_meas(sk, req); 5754 tcp_synack_rtt_meas(sk, synack_stamp);
5695 5755
5696 if (tp->rx_opt.tstamp_ok) 5756 if (tp->rx_opt.tstamp_ok)
5697 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 5757 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -5709,6 +5769,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5709 } else 5769 } else
5710 tcp_init_metrics(sk); 5770 tcp_init_metrics(sk);
5711 5771
5772 tcp_update_pacing_rate(sk);
5773
5712 /* Prevent spurious tcp_cwnd_restart() on first data packet */ 5774 /* Prevent spurious tcp_cwnd_restart() on first data packet */
5713 tp->lsndtime = tcp_time_stamp; 5775 tp->lsndtime = tcp_time_stamp;
5714 5776