diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 281 |
1 files changed, 148 insertions, 133 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d377f4854cb8..432c36649db3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -237,7 +237,11 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s | |||
237 | tcp_enter_quickack_mode((struct sock *)tp); | 237 | tcp_enter_quickack_mode((struct sock *)tp); |
238 | break; | 238 | break; |
239 | case INET_ECN_CE: | 239 | case INET_ECN_CE: |
240 | tp->ecn_flags |= TCP_ECN_DEMAND_CWR; | 240 | if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { |
241 | /* Better not delay acks, sender can have a very low cwnd */ | ||
242 | tcp_enter_quickack_mode((struct sock *)tp); | ||
243 | tp->ecn_flags |= TCP_ECN_DEMAND_CWR; | ||
244 | } | ||
241 | /* fallinto */ | 245 | /* fallinto */ |
242 | default: | 246 | default: |
243 | tp->ecn_flags |= TCP_ECN_SEEN; | 247 | tp->ecn_flags |= TCP_ECN_SEEN; |
@@ -374,7 +378,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) | |||
374 | /* 4. Try to fixup all. It is made immediately after connection enters | 378 | /* 4. Try to fixup all. It is made immediately after connection enters |
375 | * established state. | 379 | * established state. |
376 | */ | 380 | */ |
377 | static void tcp_init_buffer_space(struct sock *sk) | 381 | void tcp_init_buffer_space(struct sock *sk) |
378 | { | 382 | { |
379 | struct tcp_sock *tp = tcp_sk(sk); | 383 | struct tcp_sock *tp = tcp_sk(sk); |
380 | int maxwin; | 384 | int maxwin; |
@@ -739,29 +743,6 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) | |||
739 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); | 743 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); |
740 | } | 744 | } |
741 | 745 | ||
742 | /* Set slow start threshold and cwnd not falling to slow start */ | ||
743 | void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) | ||
744 | { | ||
745 | struct tcp_sock *tp = tcp_sk(sk); | ||
746 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
747 | |||
748 | tp->prior_ssthresh = 0; | ||
749 | tp->bytes_acked = 0; | ||
750 | if (icsk->icsk_ca_state < TCP_CA_CWR) { | ||
751 | tp->undo_marker = 0; | ||
752 | if (set_ssthresh) | ||
753 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | ||
754 | tp->snd_cwnd = min(tp->snd_cwnd, | ||
755 | tcp_packets_in_flight(tp) + 1U); | ||
756 | tp->snd_cwnd_cnt = 0; | ||
757 | tp->high_seq = tp->snd_nxt; | ||
758 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
759 | TCP_ECN_queue_cwr(tp); | ||
760 | |||
761 | tcp_set_ca_state(sk, TCP_CA_CWR); | ||
762 | } | ||
763 | } | ||
764 | |||
765 | /* | 746 | /* |
766 | * Packet counting of FACK is based on in-order assumptions, therefore TCP | 747 | * Packet counting of FACK is based on in-order assumptions, therefore TCP |
767 | * disables it when reordering is detected | 748 | * disables it when reordering is detected |
@@ -2489,35 +2470,6 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp) | |||
2489 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2470 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2490 | } | 2471 | } |
2491 | 2472 | ||
2492 | /* Lower bound on congestion window is slow start threshold | ||
2493 | * unless congestion avoidance choice decides to overide it. | ||
2494 | */ | ||
2495 | static inline u32 tcp_cwnd_min(const struct sock *sk) | ||
2496 | { | ||
2497 | const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; | ||
2498 | |||
2499 | return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh; | ||
2500 | } | ||
2501 | |||
2502 | /* Decrease cwnd each second ack. */ | ||
2503 | static void tcp_cwnd_down(struct sock *sk, int flag) | ||
2504 | { | ||
2505 | struct tcp_sock *tp = tcp_sk(sk); | ||
2506 | int decr = tp->snd_cwnd_cnt + 1; | ||
2507 | |||
2508 | if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) || | ||
2509 | (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) { | ||
2510 | tp->snd_cwnd_cnt = decr & 1; | ||
2511 | decr >>= 1; | ||
2512 | |||
2513 | if (decr && tp->snd_cwnd > tcp_cwnd_min(sk)) | ||
2514 | tp->snd_cwnd -= decr; | ||
2515 | |||
2516 | tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); | ||
2517 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
2518 | } | ||
2519 | } | ||
2520 | |||
2521 | /* Nothing was retransmitted or returned timestamp is less | 2473 | /* Nothing was retransmitted or returned timestamp is less |
2522 | * than timestamp of the first retransmission. | 2474 | * than timestamp of the first retransmission. |
2523 | */ | 2475 | */ |
@@ -2719,24 +2671,80 @@ static bool tcp_try_undo_loss(struct sock *sk) | |||
2719 | return false; | 2671 | return false; |
2720 | } | 2672 | } |
2721 | 2673 | ||
2722 | static inline void tcp_complete_cwr(struct sock *sk) | 2674 | /* The cwnd reduction in CWR and Recovery use the PRR algorithm |
2675 | * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/ | ||
2676 | * It computes the number of packets to send (sndcnt) based on packets newly | ||
2677 | * delivered: | ||
2678 | * 1) If the packets in flight is larger than ssthresh, PRR spreads the | ||
2679 | * cwnd reductions across a full RTT. | ||
2680 | * 2) If packets in flight is lower than ssthresh (such as due to excess | ||
2681 | * losses and/or application stalls), do not perform any further cwnd | ||
2682 | * reductions, but instead slow start up to ssthresh. | ||
2683 | */ | ||
2684 | static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) | ||
2723 | { | 2685 | { |
2724 | struct tcp_sock *tp = tcp_sk(sk); | 2686 | struct tcp_sock *tp = tcp_sk(sk); |
2725 | 2687 | ||
2726 | /* Do not moderate cwnd if it's already undone in cwr or recovery. */ | 2688 | tp->high_seq = tp->snd_nxt; |
2727 | if (tp->undo_marker) { | 2689 | tp->bytes_acked = 0; |
2728 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) { | 2690 | tp->snd_cwnd_cnt = 0; |
2729 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); | 2691 | tp->prior_cwnd = tp->snd_cwnd; |
2730 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2692 | tp->prr_delivered = 0; |
2731 | } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) { | 2693 | tp->prr_out = 0; |
2732 | /* PRR algorithm. */ | 2694 | if (set_ssthresh) |
2733 | tp->snd_cwnd = tp->snd_ssthresh; | 2695 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); |
2734 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2696 | TCP_ECN_queue_cwr(tp); |
2735 | } | 2697 | } |
2698 | |||
2699 | static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, | ||
2700 | int fast_rexmit) | ||
2701 | { | ||
2702 | struct tcp_sock *tp = tcp_sk(sk); | ||
2703 | int sndcnt = 0; | ||
2704 | int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); | ||
2705 | |||
2706 | tp->prr_delivered += newly_acked_sacked; | ||
2707 | if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { | ||
2708 | u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + | ||
2709 | tp->prior_cwnd - 1; | ||
2710 | sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; | ||
2711 | } else { | ||
2712 | sndcnt = min_t(int, delta, | ||
2713 | max_t(int, tp->prr_delivered - tp->prr_out, | ||
2714 | newly_acked_sacked) + 1); | ||
2715 | } | ||
2716 | |||
2717 | sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); | ||
2718 | tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; | ||
2719 | } | ||
2720 | |||
2721 | static inline void tcp_end_cwnd_reduction(struct sock *sk) | ||
2722 | { | ||
2723 | struct tcp_sock *tp = tcp_sk(sk); | ||
2724 | |||
2725 | /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ | ||
2726 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || | ||
2727 | (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { | ||
2728 | tp->snd_cwnd = tp->snd_ssthresh; | ||
2729 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
2736 | } | 2730 | } |
2737 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); | 2731 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); |
2738 | } | 2732 | } |
2739 | 2733 | ||
2734 | /* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */ | ||
2735 | void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) | ||
2736 | { | ||
2737 | struct tcp_sock *tp = tcp_sk(sk); | ||
2738 | |||
2739 | tp->prior_ssthresh = 0; | ||
2740 | tp->bytes_acked = 0; | ||
2741 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | ||
2742 | tp->undo_marker = 0; | ||
2743 | tcp_init_cwnd_reduction(sk, set_ssthresh); | ||
2744 | tcp_set_ca_state(sk, TCP_CA_CWR); | ||
2745 | } | ||
2746 | } | ||
2747 | |||
2740 | static void tcp_try_keep_open(struct sock *sk) | 2748 | static void tcp_try_keep_open(struct sock *sk) |
2741 | { | 2749 | { |
2742 | struct tcp_sock *tp = tcp_sk(sk); | 2750 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -2751,7 +2759,7 @@ static void tcp_try_keep_open(struct sock *sk) | |||
2751 | } | 2759 | } |
2752 | } | 2760 | } |
2753 | 2761 | ||
2754 | static void tcp_try_to_open(struct sock *sk, int flag) | 2762 | static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) |
2755 | { | 2763 | { |
2756 | struct tcp_sock *tp = tcp_sk(sk); | 2764 | struct tcp_sock *tp = tcp_sk(sk); |
2757 | 2765 | ||
@@ -2768,7 +2776,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) | |||
2768 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) | 2776 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) |
2769 | tcp_moderate_cwnd(tp); | 2777 | tcp_moderate_cwnd(tp); |
2770 | } else { | 2778 | } else { |
2771 | tcp_cwnd_down(sk, flag); | 2779 | tcp_cwnd_reduction(sk, newly_acked_sacked, 0); |
2772 | } | 2780 | } |
2773 | } | 2781 | } |
2774 | 2782 | ||
@@ -2850,38 +2858,6 @@ void tcp_simple_retransmit(struct sock *sk) | |||
2850 | } | 2858 | } |
2851 | EXPORT_SYMBOL(tcp_simple_retransmit); | 2859 | EXPORT_SYMBOL(tcp_simple_retransmit); |
2852 | 2860 | ||
2853 | /* This function implements the PRR algorithm, specifcally the PRR-SSRB | ||
2854 | * (proportional rate reduction with slow start reduction bound) as described in | ||
2855 | * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. | ||
2856 | * It computes the number of packets to send (sndcnt) based on packets newly | ||
2857 | * delivered: | ||
2858 | * 1) If the packets in flight is larger than ssthresh, PRR spreads the | ||
2859 | * cwnd reductions across a full RTT. | ||
2860 | * 2) If packets in flight is lower than ssthresh (such as due to excess | ||
2861 | * losses and/or application stalls), do not perform any further cwnd | ||
2862 | * reductions, but instead slow start up to ssthresh. | ||
2863 | */ | ||
2864 | static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, | ||
2865 | int fast_rexmit, int flag) | ||
2866 | { | ||
2867 | struct tcp_sock *tp = tcp_sk(sk); | ||
2868 | int sndcnt = 0; | ||
2869 | int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); | ||
2870 | |||
2871 | if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { | ||
2872 | u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + | ||
2873 | tp->prior_cwnd - 1; | ||
2874 | sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; | ||
2875 | } else { | ||
2876 | sndcnt = min_t(int, delta, | ||
2877 | max_t(int, tp->prr_delivered - tp->prr_out, | ||
2878 | newly_acked_sacked) + 1); | ||
2879 | } | ||
2880 | |||
2881 | sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); | ||
2882 | tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; | ||
2883 | } | ||
2884 | |||
2885 | static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | 2861 | static void tcp_enter_recovery(struct sock *sk, bool ece_ack) |
2886 | { | 2862 | { |
2887 | struct tcp_sock *tp = tcp_sk(sk); | 2863 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -2894,7 +2870,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | |||
2894 | 2870 | ||
2895 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | 2871 | NET_INC_STATS_BH(sock_net(sk), mib_idx); |
2896 | 2872 | ||
2897 | tp->high_seq = tp->snd_nxt; | ||
2898 | tp->prior_ssthresh = 0; | 2873 | tp->prior_ssthresh = 0; |
2899 | tp->undo_marker = tp->snd_una; | 2874 | tp->undo_marker = tp->snd_una; |
2900 | tp->undo_retrans = tp->retrans_out; | 2875 | tp->undo_retrans = tp->retrans_out; |
@@ -2902,15 +2877,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | |||
2902 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | 2877 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
2903 | if (!ece_ack) | 2878 | if (!ece_ack) |
2904 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | 2879 | tp->prior_ssthresh = tcp_current_ssthresh(sk); |
2905 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); | 2880 | tcp_init_cwnd_reduction(sk, true); |
2906 | TCP_ECN_queue_cwr(tp); | ||
2907 | } | 2881 | } |
2908 | |||
2909 | tp->bytes_acked = 0; | ||
2910 | tp->snd_cwnd_cnt = 0; | ||
2911 | tp->prior_cwnd = tp->snd_cwnd; | ||
2912 | tp->prr_delivered = 0; | ||
2913 | tp->prr_out = 0; | ||
2914 | tcp_set_ca_state(sk, TCP_CA_Recovery); | 2882 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
2915 | } | 2883 | } |
2916 | 2884 | ||
@@ -2970,7 +2938,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
2970 | /* CWR is to be held something *above* high_seq | 2938 | /* CWR is to be held something *above* high_seq |
2971 | * is ACKed for CWR bit to reach receiver. */ | 2939 | * is ACKed for CWR bit to reach receiver. */ |
2972 | if (tp->snd_una != tp->high_seq) { | 2940 | if (tp->snd_una != tp->high_seq) { |
2973 | tcp_complete_cwr(sk); | 2941 | tcp_end_cwnd_reduction(sk); |
2974 | tcp_set_ca_state(sk, TCP_CA_Open); | 2942 | tcp_set_ca_state(sk, TCP_CA_Open); |
2975 | } | 2943 | } |
2976 | break; | 2944 | break; |
@@ -2980,7 +2948,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
2980 | tcp_reset_reno_sack(tp); | 2948 | tcp_reset_reno_sack(tp); |
2981 | if (tcp_try_undo_recovery(sk)) | 2949 | if (tcp_try_undo_recovery(sk)) |
2982 | return; | 2950 | return; |
2983 | tcp_complete_cwr(sk); | 2951 | tcp_end_cwnd_reduction(sk); |
2984 | break; | 2952 | break; |
2985 | } | 2953 | } |
2986 | } | 2954 | } |
@@ -3021,7 +2989,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3021 | tcp_try_undo_dsack(sk); | 2989 | tcp_try_undo_dsack(sk); |
3022 | 2990 | ||
3023 | if (!tcp_time_to_recover(sk, flag)) { | 2991 | if (!tcp_time_to_recover(sk, flag)) { |
3024 | tcp_try_to_open(sk, flag); | 2992 | tcp_try_to_open(sk, flag, newly_acked_sacked); |
3025 | return; | 2993 | return; |
3026 | } | 2994 | } |
3027 | 2995 | ||
@@ -3043,8 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3043 | 3011 | ||
3044 | if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) | 3012 | if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) |
3045 | tcp_update_scoreboard(sk, fast_rexmit); | 3013 | tcp_update_scoreboard(sk, fast_rexmit); |
3046 | tp->prr_delivered += newly_acked_sacked; | 3014 | tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); |
3047 | tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); | ||
3048 | tcp_xmit_retransmit_queue(sk); | 3015 | tcp_xmit_retransmit_queue(sk); |
3049 | } | 3016 | } |
3050 | 3017 | ||
@@ -3123,6 +3090,12 @@ void tcp_rearm_rto(struct sock *sk) | |||
3123 | { | 3090 | { |
3124 | struct tcp_sock *tp = tcp_sk(sk); | 3091 | struct tcp_sock *tp = tcp_sk(sk); |
3125 | 3092 | ||
3093 | /* If the retrans timer is currently being used by Fast Open | ||
3094 | * for SYN-ACK retrans purpose, stay put. | ||
3095 | */ | ||
3096 | if (tp->fastopen_rsk) | ||
3097 | return; | ||
3098 | |||
3126 | if (!tp->packets_out) { | 3099 | if (!tp->packets_out) { |
3127 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); | 3100 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); |
3128 | } else { | 3101 | } else { |
@@ -3384,7 +3357,7 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) | |||
3384 | { | 3357 | { |
3385 | const struct tcp_sock *tp = tcp_sk(sk); | 3358 | const struct tcp_sock *tp = tcp_sk(sk); |
3386 | return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && | 3359 | return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && |
3387 | !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR)); | 3360 | !tcp_in_cwnd_reduction(sk); |
3388 | } | 3361 | } |
3389 | 3362 | ||
3390 | /* Check that window update is acceptable. | 3363 | /* Check that window update is acceptable. |
@@ -3452,9 +3425,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) | |||
3452 | } | 3425 | } |
3453 | 3426 | ||
3454 | /* A conservative spurious RTO response algorithm: reduce cwnd using | 3427 | /* A conservative spurious RTO response algorithm: reduce cwnd using |
3455 | * rate halving and continue in congestion avoidance. | 3428 | * PRR and continue in congestion avoidance. |
3456 | */ | 3429 | */ |
3457 | static void tcp_ratehalving_spur_to_response(struct sock *sk) | 3430 | static void tcp_cwr_spur_to_response(struct sock *sk) |
3458 | { | 3431 | { |
3459 | tcp_enter_cwr(sk, 0); | 3432 | tcp_enter_cwr(sk, 0); |
3460 | } | 3433 | } |
@@ -3462,7 +3435,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk) | |||
3462 | static void tcp_undo_spur_to_response(struct sock *sk, int flag) | 3435 | static void tcp_undo_spur_to_response(struct sock *sk, int flag) |
3463 | { | 3436 | { |
3464 | if (flag & FLAG_ECE) | 3437 | if (flag & FLAG_ECE) |
3465 | tcp_ratehalving_spur_to_response(sk); | 3438 | tcp_cwr_spur_to_response(sk); |
3466 | else | 3439 | else |
3467 | tcp_undo_cwr(sk, true); | 3440 | tcp_undo_cwr(sk, true); |
3468 | } | 3441 | } |
@@ -3569,7 +3542,7 @@ static bool tcp_process_frto(struct sock *sk, int flag) | |||
3569 | tcp_conservative_spur_to_response(tp); | 3542 | tcp_conservative_spur_to_response(tp); |
3570 | break; | 3543 | break; |
3571 | default: | 3544 | default: |
3572 | tcp_ratehalving_spur_to_response(sk); | 3545 | tcp_cwr_spur_to_response(sk); |
3573 | break; | 3546 | break; |
3574 | } | 3547 | } |
3575 | tp->frto_counter = 0; | 3548 | tp->frto_counter = 0; |
@@ -4034,7 +4007,7 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) | |||
4034 | } | 4007 | } |
4035 | 4008 | ||
4036 | /* When we get a reset we do this. */ | 4009 | /* When we get a reset we do this. */ |
4037 | static void tcp_reset(struct sock *sk) | 4010 | void tcp_reset(struct sock *sk) |
4038 | { | 4011 | { |
4039 | /* We want the right error as BSD sees it (and indeed as we do). */ | 4012 | /* We want the right error as BSD sees it (and indeed as we do). */ |
4040 | switch (sk->sk_state) { | 4013 | switch (sk->sk_state) { |
@@ -5740,7 +5713,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5740 | 5713 | ||
5741 | TCP_ECN_rcv_synack(tp, th); | 5714 | TCP_ECN_rcv_synack(tp, th); |
5742 | 5715 | ||
5743 | tp->snd_wl1 = TCP_SKB_CB(skb)->seq; | 5716 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); |
5744 | tcp_ack(sk, skb, FLAG_SLOWPATH); | 5717 | tcp_ack(sk, skb, FLAG_SLOWPATH); |
5745 | 5718 | ||
5746 | /* Ok.. it's good. Set up sequence numbers and | 5719 | /* Ok.. it's good. Set up sequence numbers and |
@@ -5753,7 +5726,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5753 | * never scaled. | 5726 | * never scaled. |
5754 | */ | 5727 | */ |
5755 | tp->snd_wnd = ntohs(th->window); | 5728 | tp->snd_wnd = ntohs(th->window); |
5756 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); | ||
5757 | 5729 | ||
5758 | if (!tp->rx_opt.wscale_ok) { | 5730 | if (!tp->rx_opt.wscale_ok) { |
5759 | tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; | 5731 | tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; |
@@ -5891,7 +5863,9 @@ discard: | |||
5891 | tcp_send_synack(sk); | 5863 | tcp_send_synack(sk); |
5892 | #if 0 | 5864 | #if 0 |
5893 | /* Note, we could accept data and URG from this segment. | 5865 | /* Note, we could accept data and URG from this segment. |
5894 | * There are no obstacles to make this. | 5866 | * There are no obstacles to make this (except that we must |
5867 | * either change tcp_recvmsg() to prevent it from returning data | ||
5868 | * before 3WHS completes per RFC793, or employ TCP Fast Open). | ||
5895 | * | 5869 | * |
5896 | * However, if we ignore data in ACKless segments sometimes, | 5870 | * However, if we ignore data in ACKless segments sometimes, |
5897 | * we have no reasons to accept it sometimes. | 5871 | * we have no reasons to accept it sometimes. |
@@ -5931,6 +5905,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5931 | { | 5905 | { |
5932 | struct tcp_sock *tp = tcp_sk(sk); | 5906 | struct tcp_sock *tp = tcp_sk(sk); |
5933 | struct inet_connection_sock *icsk = inet_csk(sk); | 5907 | struct inet_connection_sock *icsk = inet_csk(sk); |
5908 | struct request_sock *req; | ||
5934 | int queued = 0; | 5909 | int queued = 0; |
5935 | 5910 | ||
5936 | tp->rx_opt.saw_tstamp = 0; | 5911 | tp->rx_opt.saw_tstamp = 0; |
@@ -5986,6 +5961,14 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5986 | return 0; | 5961 | return 0; |
5987 | } | 5962 | } |
5988 | 5963 | ||
5964 | req = tp->fastopen_rsk; | ||
5965 | if (req != NULL) { | ||
5966 | BUG_ON(sk->sk_state != TCP_SYN_RECV && | ||
5967 | sk->sk_state != TCP_FIN_WAIT1); | ||
5968 | |||
5969 | if (tcp_check_req(sk, skb, req, NULL, true) == NULL) | ||
5970 | goto discard; | ||
5971 | } | ||
5989 | if (!tcp_validate_incoming(sk, skb, th, 0)) | 5972 | if (!tcp_validate_incoming(sk, skb, th, 0)) |
5990 | return 0; | 5973 | return 0; |
5991 | 5974 | ||
@@ -5996,7 +5979,25 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5996 | switch (sk->sk_state) { | 5979 | switch (sk->sk_state) { |
5997 | case TCP_SYN_RECV: | 5980 | case TCP_SYN_RECV: |
5998 | if (acceptable) { | 5981 | if (acceptable) { |
5999 | tp->copied_seq = tp->rcv_nxt; | 5982 | /* Once we leave TCP_SYN_RECV, we no longer |
5983 | * need req so release it. | ||
5984 | */ | ||
5985 | if (req) { | ||
5986 | tcp_synack_rtt_meas(sk, req); | ||
5987 | tp->total_retrans = req->retrans; | ||
5988 | |||
5989 | reqsk_fastopen_remove(sk, req, false); | ||
5990 | } else { | ||
5991 | /* Make sure socket is routed, for | ||
5992 | * correct metrics. | ||
5993 | */ | ||
5994 | icsk->icsk_af_ops->rebuild_header(sk); | ||
5995 | tcp_init_congestion_control(sk); | ||
5996 | |||
5997 | tcp_mtup_init(sk); | ||
5998 | tcp_init_buffer_space(sk); | ||
5999 | tp->copied_seq = tp->rcv_nxt; | ||
6000 | } | ||
6000 | smp_mb(); | 6001 | smp_mb(); |
6001 | tcp_set_state(sk, TCP_ESTABLISHED); | 6002 | tcp_set_state(sk, TCP_ESTABLISHED); |
6002 | sk->sk_state_change(sk); | 6003 | sk->sk_state_change(sk); |
@@ -6018,23 +6019,27 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
6018 | if (tp->rx_opt.tstamp_ok) | 6019 | if (tp->rx_opt.tstamp_ok) |
6019 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 6020 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
6020 | 6021 | ||
6021 | /* Make sure socket is routed, for | 6022 | if (req) { |
6022 | * correct metrics. | 6023 | /* Re-arm the timer because data may |
6023 | */ | 6024 | * have been sent out. This is similar |
6024 | icsk->icsk_af_ops->rebuild_header(sk); | 6025 | * to the regular data transmission case |
6025 | 6026 | * when new data has just been ack'ed. | |
6026 | tcp_init_metrics(sk); | 6027 | * |
6027 | 6028 | * (TFO) - we could try to be more | |
6028 | tcp_init_congestion_control(sk); | 6029 | * aggressive and retranmitting any data |
6030 | * sooner based on when they were sent | ||
6031 | * out. | ||
6032 | */ | ||
6033 | tcp_rearm_rto(sk); | ||
6034 | } else | ||
6035 | tcp_init_metrics(sk); | ||
6029 | 6036 | ||
6030 | /* Prevent spurious tcp_cwnd_restart() on | 6037 | /* Prevent spurious tcp_cwnd_restart() on |
6031 | * first data packet. | 6038 | * first data packet. |
6032 | */ | 6039 | */ |
6033 | tp->lsndtime = tcp_time_stamp; | 6040 | tp->lsndtime = tcp_time_stamp; |
6034 | 6041 | ||
6035 | tcp_mtup_init(sk); | ||
6036 | tcp_initialize_rcv_mss(sk); | 6042 | tcp_initialize_rcv_mss(sk); |
6037 | tcp_init_buffer_space(sk); | ||
6038 | tcp_fast_path_on(tp); | 6043 | tcp_fast_path_on(tp); |
6039 | } else { | 6044 | } else { |
6040 | return 1; | 6045 | return 1; |
@@ -6042,6 +6047,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
6042 | break; | 6047 | break; |
6043 | 6048 | ||
6044 | case TCP_FIN_WAIT1: | 6049 | case TCP_FIN_WAIT1: |
6050 | /* If we enter the TCP_FIN_WAIT1 state and we are a | ||
6051 | * Fast Open socket and this is the first acceptable | ||
6052 | * ACK we have received, this would have acknowledged | ||
6053 | * our SYNACK so stop the SYNACK timer. | ||
6054 | */ | ||
6055 | if (acceptable && req != NULL) { | ||
6056 | /* We no longer need the request sock. */ | ||
6057 | reqsk_fastopen_remove(sk, req, false); | ||
6058 | tcp_rearm_rto(sk); | ||
6059 | } | ||
6045 | if (tp->snd_una == tp->write_seq) { | 6060 | if (tp->snd_una == tp->write_seq) { |
6046 | struct dst_entry *dst; | 6061 | struct dst_entry *dst; |
6047 | 6062 | ||