aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c281
1 files changed, 148 insertions, 133 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d377f4854cb8..432c36649db3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -237,7 +237,11 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s
237 tcp_enter_quickack_mode((struct sock *)tp); 237 tcp_enter_quickack_mode((struct sock *)tp);
238 break; 238 break;
239 case INET_ECN_CE: 239 case INET_ECN_CE:
240 tp->ecn_flags |= TCP_ECN_DEMAND_CWR; 240 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
241 /* Better not delay acks, sender can have a very low cwnd */
242 tcp_enter_quickack_mode((struct sock *)tp);
243 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
244 }
241 /* fallinto */ 245 /* fallinto */
242 default: 246 default:
243 tp->ecn_flags |= TCP_ECN_SEEN; 247 tp->ecn_flags |= TCP_ECN_SEEN;
@@ -374,7 +378,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
374/* 4. Try to fixup all. It is made immediately after connection enters 378/* 4. Try to fixup all. It is made immediately after connection enters
375 * established state. 379 * established state.
376 */ 380 */
377static void tcp_init_buffer_space(struct sock *sk) 381void tcp_init_buffer_space(struct sock *sk)
378{ 382{
379 struct tcp_sock *tp = tcp_sk(sk); 383 struct tcp_sock *tp = tcp_sk(sk);
380 int maxwin; 384 int maxwin;
@@ -739,29 +743,6 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
739 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 743 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
740} 744}
741 745
742/* Set slow start threshold and cwnd not falling to slow start */
743void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
744{
745 struct tcp_sock *tp = tcp_sk(sk);
746 const struct inet_connection_sock *icsk = inet_csk(sk);
747
748 tp->prior_ssthresh = 0;
749 tp->bytes_acked = 0;
750 if (icsk->icsk_ca_state < TCP_CA_CWR) {
751 tp->undo_marker = 0;
752 if (set_ssthresh)
753 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
754 tp->snd_cwnd = min(tp->snd_cwnd,
755 tcp_packets_in_flight(tp) + 1U);
756 tp->snd_cwnd_cnt = 0;
757 tp->high_seq = tp->snd_nxt;
758 tp->snd_cwnd_stamp = tcp_time_stamp;
759 TCP_ECN_queue_cwr(tp);
760
761 tcp_set_ca_state(sk, TCP_CA_CWR);
762 }
763}
764
765/* 746/*
766 * Packet counting of FACK is based on in-order assumptions, therefore TCP 747 * Packet counting of FACK is based on in-order assumptions, therefore TCP
767 * disables it when reordering is detected 748 * disables it when reordering is detected
@@ -2489,35 +2470,6 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2489 tp->snd_cwnd_stamp = tcp_time_stamp; 2470 tp->snd_cwnd_stamp = tcp_time_stamp;
2490} 2471}
2491 2472
2492/* Lower bound on congestion window is slow start threshold
2493 * unless congestion avoidance choice decides to overide it.
2494 */
2495static inline u32 tcp_cwnd_min(const struct sock *sk)
2496{
2497 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
2498
2499 return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
2500}
2501
2502/* Decrease cwnd each second ack. */
2503static void tcp_cwnd_down(struct sock *sk, int flag)
2504{
2505 struct tcp_sock *tp = tcp_sk(sk);
2506 int decr = tp->snd_cwnd_cnt + 1;
2507
2508 if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
2509 (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
2510 tp->snd_cwnd_cnt = decr & 1;
2511 decr >>= 1;
2512
2513 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2514 tp->snd_cwnd -= decr;
2515
2516 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2517 tp->snd_cwnd_stamp = tcp_time_stamp;
2518 }
2519}
2520
2521/* Nothing was retransmitted or returned timestamp is less 2473/* Nothing was retransmitted or returned timestamp is less
2522 * than timestamp of the first retransmission. 2474 * than timestamp of the first retransmission.
2523 */ 2475 */
@@ -2719,24 +2671,80 @@ static bool tcp_try_undo_loss(struct sock *sk)
2719 return false; 2671 return false;
2720} 2672}
2721 2673
2722static inline void tcp_complete_cwr(struct sock *sk) 2674/* The cwnd reduction in CWR and Recovery use the PRR algorithm
2675 * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/
2676 * It computes the number of packets to send (sndcnt) based on packets newly
2677 * delivered:
2678 * 1) If the packets in flight is larger than ssthresh, PRR spreads the
2679 * cwnd reductions across a full RTT.
2680 * 2) If packets in flight is lower than ssthresh (such as due to excess
2681 * losses and/or application stalls), do not perform any further cwnd
2682 * reductions, but instead slow start up to ssthresh.
2683 */
2684static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
2723{ 2685{
2724 struct tcp_sock *tp = tcp_sk(sk); 2686 struct tcp_sock *tp = tcp_sk(sk);
2725 2687
2726 /* Do not moderate cwnd if it's already undone in cwr or recovery. */ 2688 tp->high_seq = tp->snd_nxt;
2727 if (tp->undo_marker) { 2689 tp->bytes_acked = 0;
2728 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) { 2690 tp->snd_cwnd_cnt = 0;
2729 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); 2691 tp->prior_cwnd = tp->snd_cwnd;
2730 tp->snd_cwnd_stamp = tcp_time_stamp; 2692 tp->prr_delivered = 0;
2731 } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) { 2693 tp->prr_out = 0;
2732 /* PRR algorithm. */ 2694 if (set_ssthresh)
2733 tp->snd_cwnd = tp->snd_ssthresh; 2695 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2734 tp->snd_cwnd_stamp = tcp_time_stamp; 2696 TCP_ECN_queue_cwr(tp);
2735 } 2697}
2698
2699static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
2700 int fast_rexmit)
2701{
2702 struct tcp_sock *tp = tcp_sk(sk);
2703 int sndcnt = 0;
2704 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2705
2706 tp->prr_delivered += newly_acked_sacked;
2707 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2708 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2709 tp->prior_cwnd - 1;
2710 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2711 } else {
2712 sndcnt = min_t(int, delta,
2713 max_t(int, tp->prr_delivered - tp->prr_out,
2714 newly_acked_sacked) + 1);
2715 }
2716
2717 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2718 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2719}
2720
2721static inline void tcp_end_cwnd_reduction(struct sock *sk)
2722{
2723 struct tcp_sock *tp = tcp_sk(sk);
2724
2725 /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
2726 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
2727 (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
2728 tp->snd_cwnd = tp->snd_ssthresh;
2729 tp->snd_cwnd_stamp = tcp_time_stamp;
2736 } 2730 }
2737 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2731 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2738} 2732}
2739 2733
2734/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
2735void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
2736{
2737 struct tcp_sock *tp = tcp_sk(sk);
2738
2739 tp->prior_ssthresh = 0;
2740 tp->bytes_acked = 0;
2741 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2742 tp->undo_marker = 0;
2743 tcp_init_cwnd_reduction(sk, set_ssthresh);
2744 tcp_set_ca_state(sk, TCP_CA_CWR);
2745 }
2746}
2747
2740static void tcp_try_keep_open(struct sock *sk) 2748static void tcp_try_keep_open(struct sock *sk)
2741{ 2749{
2742 struct tcp_sock *tp = tcp_sk(sk); 2750 struct tcp_sock *tp = tcp_sk(sk);
@@ -2751,7 +2759,7 @@ static void tcp_try_keep_open(struct sock *sk)
2751 } 2759 }
2752} 2760}
2753 2761
2754static void tcp_try_to_open(struct sock *sk, int flag) 2762static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
2755{ 2763{
2756 struct tcp_sock *tp = tcp_sk(sk); 2764 struct tcp_sock *tp = tcp_sk(sk);
2757 2765
@@ -2768,7 +2776,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
2768 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) 2776 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2769 tcp_moderate_cwnd(tp); 2777 tcp_moderate_cwnd(tp);
2770 } else { 2778 } else {
2771 tcp_cwnd_down(sk, flag); 2779 tcp_cwnd_reduction(sk, newly_acked_sacked, 0);
2772 } 2780 }
2773} 2781}
2774 2782
@@ -2850,38 +2858,6 @@ void tcp_simple_retransmit(struct sock *sk)
2850} 2858}
2851EXPORT_SYMBOL(tcp_simple_retransmit); 2859EXPORT_SYMBOL(tcp_simple_retransmit);
2852 2860
2853/* This function implements the PRR algorithm, specifcally the PRR-SSRB
2854 * (proportional rate reduction with slow start reduction bound) as described in
2855 * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
2856 * It computes the number of packets to send (sndcnt) based on packets newly
2857 * delivered:
2858 * 1) If the packets in flight is larger than ssthresh, PRR spreads the
2859 * cwnd reductions across a full RTT.
2860 * 2) If packets in flight is lower than ssthresh (such as due to excess
2861 * losses and/or application stalls), do not perform any further cwnd
2862 * reductions, but instead slow start up to ssthresh.
2863 */
2864static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
2865 int fast_rexmit, int flag)
2866{
2867 struct tcp_sock *tp = tcp_sk(sk);
2868 int sndcnt = 0;
2869 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2870
2871 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2872 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2873 tp->prior_cwnd - 1;
2874 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2875 } else {
2876 sndcnt = min_t(int, delta,
2877 max_t(int, tp->prr_delivered - tp->prr_out,
2878 newly_acked_sacked) + 1);
2879 }
2880
2881 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2882 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2883}
2884
2885static void tcp_enter_recovery(struct sock *sk, bool ece_ack) 2861static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2886{ 2862{
2887 struct tcp_sock *tp = tcp_sk(sk); 2863 struct tcp_sock *tp = tcp_sk(sk);
@@ -2894,7 +2870,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2894 2870
2895 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2871 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2896 2872
2897 tp->high_seq = tp->snd_nxt;
2898 tp->prior_ssthresh = 0; 2873 tp->prior_ssthresh = 0;
2899 tp->undo_marker = tp->snd_una; 2874 tp->undo_marker = tp->snd_una;
2900 tp->undo_retrans = tp->retrans_out; 2875 tp->undo_retrans = tp->retrans_out;
@@ -2902,15 +2877,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2902 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { 2877 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2903 if (!ece_ack) 2878 if (!ece_ack)
2904 tp->prior_ssthresh = tcp_current_ssthresh(sk); 2879 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2905 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); 2880 tcp_init_cwnd_reduction(sk, true);
2906 TCP_ECN_queue_cwr(tp);
2907 } 2881 }
2908
2909 tp->bytes_acked = 0;
2910 tp->snd_cwnd_cnt = 0;
2911 tp->prior_cwnd = tp->snd_cwnd;
2912 tp->prr_delivered = 0;
2913 tp->prr_out = 0;
2914 tcp_set_ca_state(sk, TCP_CA_Recovery); 2882 tcp_set_ca_state(sk, TCP_CA_Recovery);
2915} 2883}
2916 2884
@@ -2970,7 +2938,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2970 /* CWR is to be held something *above* high_seq 2938 /* CWR is to be held something *above* high_seq
2971 * is ACKed for CWR bit to reach receiver. */ 2939 * is ACKed for CWR bit to reach receiver. */
2972 if (tp->snd_una != tp->high_seq) { 2940 if (tp->snd_una != tp->high_seq) {
2973 tcp_complete_cwr(sk); 2941 tcp_end_cwnd_reduction(sk);
2974 tcp_set_ca_state(sk, TCP_CA_Open); 2942 tcp_set_ca_state(sk, TCP_CA_Open);
2975 } 2943 }
2976 break; 2944 break;
@@ -2980,7 +2948,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2980 tcp_reset_reno_sack(tp); 2948 tcp_reset_reno_sack(tp);
2981 if (tcp_try_undo_recovery(sk)) 2949 if (tcp_try_undo_recovery(sk))
2982 return; 2950 return;
2983 tcp_complete_cwr(sk); 2951 tcp_end_cwnd_reduction(sk);
2984 break; 2952 break;
2985 } 2953 }
2986 } 2954 }
@@ -3021,7 +2989,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3021 tcp_try_undo_dsack(sk); 2989 tcp_try_undo_dsack(sk);
3022 2990
3023 if (!tcp_time_to_recover(sk, flag)) { 2991 if (!tcp_time_to_recover(sk, flag)) {
3024 tcp_try_to_open(sk, flag); 2992 tcp_try_to_open(sk, flag, newly_acked_sacked);
3025 return; 2993 return;
3026 } 2994 }
3027 2995
@@ -3043,8 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3043 3011
3044 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) 3012 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3045 tcp_update_scoreboard(sk, fast_rexmit); 3013 tcp_update_scoreboard(sk, fast_rexmit);
3046 tp->prr_delivered += newly_acked_sacked; 3014 tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
3047 tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
3048 tcp_xmit_retransmit_queue(sk); 3015 tcp_xmit_retransmit_queue(sk);
3049} 3016}
3050 3017
@@ -3123,6 +3090,12 @@ void tcp_rearm_rto(struct sock *sk)
3123{ 3090{
3124 struct tcp_sock *tp = tcp_sk(sk); 3091 struct tcp_sock *tp = tcp_sk(sk);
3125 3092
3093 /* If the retrans timer is currently being used by Fast Open
3094 * for SYN-ACK retrans purpose, stay put.
3095 */
3096 if (tp->fastopen_rsk)
3097 return;
3098
3126 if (!tp->packets_out) { 3099 if (!tp->packets_out) {
3127 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 3100 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3128 } else { 3101 } else {
@@ -3384,7 +3357,7 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3384{ 3357{
3385 const struct tcp_sock *tp = tcp_sk(sk); 3358 const struct tcp_sock *tp = tcp_sk(sk);
3386 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && 3359 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3387 !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR)); 3360 !tcp_in_cwnd_reduction(sk);
3388} 3361}
3389 3362
3390/* Check that window update is acceptable. 3363/* Check that window update is acceptable.
@@ -3452,9 +3425,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3452} 3425}
3453 3426
3454/* A conservative spurious RTO response algorithm: reduce cwnd using 3427/* A conservative spurious RTO response algorithm: reduce cwnd using
3455 * rate halving and continue in congestion avoidance. 3428 * PRR and continue in congestion avoidance.
3456 */ 3429 */
3457static void tcp_ratehalving_spur_to_response(struct sock *sk) 3430static void tcp_cwr_spur_to_response(struct sock *sk)
3458{ 3431{
3459 tcp_enter_cwr(sk, 0); 3432 tcp_enter_cwr(sk, 0);
3460} 3433}
@@ -3462,7 +3435,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk)
3462static void tcp_undo_spur_to_response(struct sock *sk, int flag) 3435static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3463{ 3436{
3464 if (flag & FLAG_ECE) 3437 if (flag & FLAG_ECE)
3465 tcp_ratehalving_spur_to_response(sk); 3438 tcp_cwr_spur_to_response(sk);
3466 else 3439 else
3467 tcp_undo_cwr(sk, true); 3440 tcp_undo_cwr(sk, true);
3468} 3441}
@@ -3569,7 +3542,7 @@ static bool tcp_process_frto(struct sock *sk, int flag)
3569 tcp_conservative_spur_to_response(tp); 3542 tcp_conservative_spur_to_response(tp);
3570 break; 3543 break;
3571 default: 3544 default:
3572 tcp_ratehalving_spur_to_response(sk); 3545 tcp_cwr_spur_to_response(sk);
3573 break; 3546 break;
3574 } 3547 }
3575 tp->frto_counter = 0; 3548 tp->frto_counter = 0;
@@ -4034,7 +4007,7 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4034} 4007}
4035 4008
4036/* When we get a reset we do this. */ 4009/* When we get a reset we do this. */
4037static void tcp_reset(struct sock *sk) 4010void tcp_reset(struct sock *sk)
4038{ 4011{
4039 /* We want the right error as BSD sees it (and indeed as we do). */ 4012 /* We want the right error as BSD sees it (and indeed as we do). */
4040 switch (sk->sk_state) { 4013 switch (sk->sk_state) {
@@ -5740,7 +5713,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5740 5713
5741 TCP_ECN_rcv_synack(tp, th); 5714 TCP_ECN_rcv_synack(tp, th);
5742 5715
5743 tp->snd_wl1 = TCP_SKB_CB(skb)->seq; 5716 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5744 tcp_ack(sk, skb, FLAG_SLOWPATH); 5717 tcp_ack(sk, skb, FLAG_SLOWPATH);
5745 5718
5746 /* Ok.. it's good. Set up sequence numbers and 5719 /* Ok.. it's good. Set up sequence numbers and
@@ -5753,7 +5726,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5753 * never scaled. 5726 * never scaled.
5754 */ 5727 */
5755 tp->snd_wnd = ntohs(th->window); 5728 tp->snd_wnd = ntohs(th->window);
5756 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5757 5729
5758 if (!tp->rx_opt.wscale_ok) { 5730 if (!tp->rx_opt.wscale_ok) {
5759 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; 5731 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
@@ -5891,7 +5863,9 @@ discard:
5891 tcp_send_synack(sk); 5863 tcp_send_synack(sk);
5892#if 0 5864#if 0
5893 /* Note, we could accept data and URG from this segment. 5865 /* Note, we could accept data and URG from this segment.
5894 * There are no obstacles to make this. 5866 * There are no obstacles to make this (except that we must
5867 * either change tcp_recvmsg() to prevent it from returning data
5868 * before 3WHS completes per RFC793, or employ TCP Fast Open).
5895 * 5869 *
5896 * However, if we ignore data in ACKless segments sometimes, 5870 * However, if we ignore data in ACKless segments sometimes,
5897 * we have no reasons to accept it sometimes. 5871 * we have no reasons to accept it sometimes.
@@ -5931,6 +5905,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5931{ 5905{
5932 struct tcp_sock *tp = tcp_sk(sk); 5906 struct tcp_sock *tp = tcp_sk(sk);
5933 struct inet_connection_sock *icsk = inet_csk(sk); 5907 struct inet_connection_sock *icsk = inet_csk(sk);
5908 struct request_sock *req;
5934 int queued = 0; 5909 int queued = 0;
5935 5910
5936 tp->rx_opt.saw_tstamp = 0; 5911 tp->rx_opt.saw_tstamp = 0;
@@ -5986,6 +5961,14 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5986 return 0; 5961 return 0;
5987 } 5962 }
5988 5963
5964 req = tp->fastopen_rsk;
5965 if (req != NULL) {
5966 BUG_ON(sk->sk_state != TCP_SYN_RECV &&
5967 sk->sk_state != TCP_FIN_WAIT1);
5968
5969 if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
5970 goto discard;
5971 }
5989 if (!tcp_validate_incoming(sk, skb, th, 0)) 5972 if (!tcp_validate_incoming(sk, skb, th, 0))
5990 return 0; 5973 return 0;
5991 5974
@@ -5996,7 +5979,25 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5996 switch (sk->sk_state) { 5979 switch (sk->sk_state) {
5997 case TCP_SYN_RECV: 5980 case TCP_SYN_RECV:
5998 if (acceptable) { 5981 if (acceptable) {
5999 tp->copied_seq = tp->rcv_nxt; 5982 /* Once we leave TCP_SYN_RECV, we no longer
5983 * need req so release it.
5984 */
5985 if (req) {
5986 tcp_synack_rtt_meas(sk, req);
5987 tp->total_retrans = req->retrans;
5988
5989 reqsk_fastopen_remove(sk, req, false);
5990 } else {
5991 /* Make sure socket is routed, for
5992 * correct metrics.
5993 */
5994 icsk->icsk_af_ops->rebuild_header(sk);
5995 tcp_init_congestion_control(sk);
5996
5997 tcp_mtup_init(sk);
5998 tcp_init_buffer_space(sk);
5999 tp->copied_seq = tp->rcv_nxt;
6000 }
6000 smp_mb(); 6001 smp_mb();
6001 tcp_set_state(sk, TCP_ESTABLISHED); 6002 tcp_set_state(sk, TCP_ESTABLISHED);
6002 sk->sk_state_change(sk); 6003 sk->sk_state_change(sk);
@@ -6018,23 +6019,27 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
6018 if (tp->rx_opt.tstamp_ok) 6019 if (tp->rx_opt.tstamp_ok)
6019 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 6020 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
6020 6021
6021 /* Make sure socket is routed, for 6022 if (req) {
6022 * correct metrics. 6023 /* Re-arm the timer because data may
6023 */ 6024 * have been sent out. This is similar
6024 icsk->icsk_af_ops->rebuild_header(sk); 6025 * to the regular data transmission case
6025 6026 * when new data has just been ack'ed.
6026 tcp_init_metrics(sk); 6027 *
6027 6028 * (TFO) - we could try to be more
6028 tcp_init_congestion_control(sk); 6029 * aggressive and retranmitting any data
6030 * sooner based on when they were sent
6031 * out.
6032 */
6033 tcp_rearm_rto(sk);
6034 } else
6035 tcp_init_metrics(sk);
6029 6036
6030 /* Prevent spurious tcp_cwnd_restart() on 6037 /* Prevent spurious tcp_cwnd_restart() on
6031 * first data packet. 6038 * first data packet.
6032 */ 6039 */
6033 tp->lsndtime = tcp_time_stamp; 6040 tp->lsndtime = tcp_time_stamp;
6034 6041
6035 tcp_mtup_init(sk);
6036 tcp_initialize_rcv_mss(sk); 6042 tcp_initialize_rcv_mss(sk);
6037 tcp_init_buffer_space(sk);
6038 tcp_fast_path_on(tp); 6043 tcp_fast_path_on(tp);
6039 } else { 6044 } else {
6040 return 1; 6045 return 1;
@@ -6042,6 +6047,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
6042 break; 6047 break;
6043 6048
6044 case TCP_FIN_WAIT1: 6049 case TCP_FIN_WAIT1:
6050 /* If we enter the TCP_FIN_WAIT1 state and we are a
6051 * Fast Open socket and this is the first acceptable
6052 * ACK we have received, this would have acknowledged
6053 * our SYNACK so stop the SYNACK timer.
6054 */
6055 if (acceptable && req != NULL) {
6056 /* We no longer need the request sock. */
6057 reqsk_fastopen_remove(sk, req, false);
6058 tcp_rearm_rto(sk);
6059 }
6045 if (tp->snd_una == tp->write_seq) { 6060 if (tp->snd_una == tp->write_seq) {
6046 struct dst_entry *dst; 6061 struct dst_entry *dst;
6047 6062