diff options
-rw-r--r-- | include/net/tcp.h | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 119 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 6 |
3 files changed, 52 insertions, 83 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h index 1421b02a7905..a8cb00c0c6d9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -913,15 +913,21 @@ static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) | |||
913 | return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; | 913 | return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; |
914 | } | 914 | } |
915 | 915 | ||
916 | static inline bool tcp_in_cwnd_reduction(const struct sock *sk) | ||
917 | { | ||
918 | return (TCPF_CA_CWR | TCPF_CA_Recovery) & | ||
919 | (1 << inet_csk(sk)->icsk_ca_state); | ||
920 | } | ||
921 | |||
916 | /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. | 922 | /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. |
917 | * The exception is rate halving phase, when cwnd is decreasing towards | 923 | * The exception is cwnd reduction phase, when cwnd is decreasing towards |
918 | * ssthresh. | 924 | * ssthresh. |
919 | */ | 925 | */ |
920 | static inline __u32 tcp_current_ssthresh(const struct sock *sk) | 926 | static inline __u32 tcp_current_ssthresh(const struct sock *sk) |
921 | { | 927 | { |
922 | const struct tcp_sock *tp = tcp_sk(sk); | 928 | const struct tcp_sock *tp = tcp_sk(sk); |
923 | 929 | ||
924 | if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) | 930 | if (tcp_in_cwnd_reduction(sk)) |
925 | return tp->snd_ssthresh; | 931 | return tp->snd_ssthresh; |
926 | else | 932 | else |
927 | return max(tp->snd_ssthresh, | 933 | return max(tp->snd_ssthresh, |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 38589e464e63..e2bec815ff23 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -2470,35 +2470,6 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp) | |||
2470 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2470 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2471 | } | 2471 | } |
2472 | 2472 | ||
2473 | /* Lower bound on congestion window is slow start threshold | ||
2474 | * unless congestion avoidance choice decides to overide it. | ||
2475 | */ | ||
2476 | static inline u32 tcp_cwnd_min(const struct sock *sk) | ||
2477 | { | ||
2478 | const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; | ||
2479 | |||
2480 | return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh; | ||
2481 | } | ||
2482 | |||
2483 | /* Decrease cwnd each second ack. */ | ||
2484 | static void tcp_cwnd_down(struct sock *sk, int flag) | ||
2485 | { | ||
2486 | struct tcp_sock *tp = tcp_sk(sk); | ||
2487 | int decr = tp->snd_cwnd_cnt + 1; | ||
2488 | |||
2489 | if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) || | ||
2490 | (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) { | ||
2491 | tp->snd_cwnd_cnt = decr & 1; | ||
2492 | decr >>= 1; | ||
2493 | |||
2494 | if (decr && tp->snd_cwnd > tcp_cwnd_min(sk)) | ||
2495 | tp->snd_cwnd -= decr; | ||
2496 | |||
2497 | tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); | ||
2498 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
2499 | } | ||
2500 | } | ||
2501 | |||
2502 | /* Nothing was retransmitted or returned timestamp is less | 2473 | /* Nothing was retransmitted or returned timestamp is less |
2503 | * than timestamp of the first retransmission. | 2474 | * than timestamp of the first retransmission. |
2504 | */ | 2475 | */ |
@@ -2700,9 +2671,8 @@ static bool tcp_try_undo_loss(struct sock *sk) | |||
2700 | return false; | 2671 | return false; |
2701 | } | 2672 | } |
2702 | 2673 | ||
2703 | /* This function implements the PRR algorithm, specifcally the PRR-SSRB | 2674 | /* The cwnd reduction in CWR and Recovery use the PRR algorithm |
2704 | * (proportional rate reduction with slow start reduction bound) as described in | 2675 | * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/ |
2705 | * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. | ||
2706 | * It computes the number of packets to send (sndcnt) based on packets newly | 2676 | * It computes the number of packets to send (sndcnt) based on packets newly |
2707 | * delivered: | 2677 | * delivered: |
2708 | * 1) If the packets in flight is larger than ssthresh, PRR spreads the | 2678 | * 1) If the packets in flight is larger than ssthresh, PRR spreads the |
@@ -2711,13 +2681,29 @@ static bool tcp_try_undo_loss(struct sock *sk) | |||
2711 | * losses and/or application stalls), do not perform any further cwnd | 2681 | * losses and/or application stalls), do not perform any further cwnd |
2712 | * reductions, but instead slow start up to ssthresh. | 2682 | * reductions, but instead slow start up to ssthresh. |
2713 | */ | 2683 | */ |
2714 | static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, | 2684 | static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) |
2715 | int fast_rexmit, int flag) | 2685 | { |
2686 | struct tcp_sock *tp = tcp_sk(sk); | ||
2687 | |||
2688 | tp->high_seq = tp->snd_nxt; | ||
2689 | tp->bytes_acked = 0; | ||
2690 | tp->snd_cwnd_cnt = 0; | ||
2691 | tp->prior_cwnd = tp->snd_cwnd; | ||
2692 | tp->prr_delivered = 0; | ||
2693 | tp->prr_out = 0; | ||
2694 | if (set_ssthresh) | ||
2695 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); | ||
2696 | TCP_ECN_queue_cwr(tp); | ||
2697 | } | ||
2698 | |||
2699 | static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, | ||
2700 | int fast_rexmit) | ||
2716 | { | 2701 | { |
2717 | struct tcp_sock *tp = tcp_sk(sk); | 2702 | struct tcp_sock *tp = tcp_sk(sk); |
2718 | int sndcnt = 0; | 2703 | int sndcnt = 0; |
2719 | int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); | 2704 | int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); |
2720 | 2705 | ||
2706 | tp->prr_delivered += newly_acked_sacked; | ||
2721 | if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { | 2707 | if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { |
2722 | u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + | 2708 | u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + |
2723 | tp->prior_cwnd - 1; | 2709 | tp->prior_cwnd - 1; |
@@ -2732,43 +2718,29 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, | |||
2732 | tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; | 2718 | tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; |
2733 | } | 2719 | } |
2734 | 2720 | ||
2735 | static inline void tcp_complete_cwr(struct sock *sk) | 2721 | static inline void tcp_end_cwnd_reduction(struct sock *sk) |
2736 | { | 2722 | { |
2737 | struct tcp_sock *tp = tcp_sk(sk); | 2723 | struct tcp_sock *tp = tcp_sk(sk); |
2738 | 2724 | ||
2739 | /* Do not moderate cwnd if it's already undone in cwr or recovery. */ | 2725 | /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ |
2740 | if (tp->undo_marker) { | 2726 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || |
2741 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) { | 2727 | (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { |
2742 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); | 2728 | tp->snd_cwnd = tp->snd_ssthresh; |
2743 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2729 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2744 | } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) { | ||
2745 | /* PRR algorithm. */ | ||
2746 | tp->snd_cwnd = tp->snd_ssthresh; | ||
2747 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
2748 | } | ||
2749 | } | 2730 | } |
2750 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); | 2731 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); |
2751 | } | 2732 | } |
2752 | 2733 | ||
2753 | /* Set slow start threshold and cwnd not falling to slow start */ | 2734 | /* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */ |
2754 | void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) | 2735 | void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) |
2755 | { | 2736 | { |
2756 | struct tcp_sock *tp = tcp_sk(sk); | 2737 | struct tcp_sock *tp = tcp_sk(sk); |
2757 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
2758 | 2738 | ||
2759 | tp->prior_ssthresh = 0; | 2739 | tp->prior_ssthresh = 0; |
2760 | tp->bytes_acked = 0; | 2740 | tp->bytes_acked = 0; |
2761 | if (icsk->icsk_ca_state < TCP_CA_CWR) { | 2741 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
2762 | tp->undo_marker = 0; | 2742 | tp->undo_marker = 0; |
2763 | if (set_ssthresh) | 2743 | tcp_init_cwnd_reduction(sk, set_ssthresh); |
2764 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | ||
2765 | tp->snd_cwnd = min(tp->snd_cwnd, | ||
2766 | tcp_packets_in_flight(tp) + 1U); | ||
2767 | tp->snd_cwnd_cnt = 0; | ||
2768 | tp->high_seq = tp->snd_nxt; | ||
2769 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
2770 | TCP_ECN_queue_cwr(tp); | ||
2771 | |||
2772 | tcp_set_ca_state(sk, TCP_CA_CWR); | 2744 | tcp_set_ca_state(sk, TCP_CA_CWR); |
2773 | } | 2745 | } |
2774 | } | 2746 | } |
@@ -2787,7 +2759,7 @@ static void tcp_try_keep_open(struct sock *sk) | |||
2787 | } | 2759 | } |
2788 | } | 2760 | } |
2789 | 2761 | ||
2790 | static void tcp_try_to_open(struct sock *sk, int flag) | 2762 | static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) |
2791 | { | 2763 | { |
2792 | struct tcp_sock *tp = tcp_sk(sk); | 2764 | struct tcp_sock *tp = tcp_sk(sk); |
2793 | 2765 | ||
@@ -2804,7 +2776,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) | |||
2804 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) | 2776 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) |
2805 | tcp_moderate_cwnd(tp); | 2777 | tcp_moderate_cwnd(tp); |
2806 | } else { | 2778 | } else { |
2807 | tcp_cwnd_down(sk, flag); | 2779 | tcp_cwnd_reduction(sk, newly_acked_sacked, 0); |
2808 | } | 2780 | } |
2809 | } | 2781 | } |
2810 | 2782 | ||
@@ -2898,7 +2870,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | |||
2898 | 2870 | ||
2899 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | 2871 | NET_INC_STATS_BH(sock_net(sk), mib_idx); |
2900 | 2872 | ||
2901 | tp->high_seq = tp->snd_nxt; | ||
2902 | tp->prior_ssthresh = 0; | 2873 | tp->prior_ssthresh = 0; |
2903 | tp->undo_marker = tp->snd_una; | 2874 | tp->undo_marker = tp->snd_una; |
2904 | tp->undo_retrans = tp->retrans_out; | 2875 | tp->undo_retrans = tp->retrans_out; |
@@ -2906,15 +2877,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | |||
2906 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | 2877 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
2907 | if (!ece_ack) | 2878 | if (!ece_ack) |
2908 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | 2879 | tp->prior_ssthresh = tcp_current_ssthresh(sk); |
2909 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); | 2880 | tcp_init_cwnd_reduction(sk, true); |
2910 | TCP_ECN_queue_cwr(tp); | ||
2911 | } | 2881 | } |
2912 | |||
2913 | tp->bytes_acked = 0; | ||
2914 | tp->snd_cwnd_cnt = 0; | ||
2915 | tp->prior_cwnd = tp->snd_cwnd; | ||
2916 | tp->prr_delivered = 0; | ||
2917 | tp->prr_out = 0; | ||
2918 | tcp_set_ca_state(sk, TCP_CA_Recovery); | 2882 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
2919 | } | 2883 | } |
2920 | 2884 | ||
@@ -2974,7 +2938,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
2974 | /* CWR is to be held something *above* high_seq | 2938 | /* CWR is to be held something *above* high_seq |
2975 | * is ACKed for CWR bit to reach receiver. */ | 2939 | * is ACKed for CWR bit to reach receiver. */ |
2976 | if (tp->snd_una != tp->high_seq) { | 2940 | if (tp->snd_una != tp->high_seq) { |
2977 | tcp_complete_cwr(sk); | 2941 | tcp_end_cwnd_reduction(sk); |
2978 | tcp_set_ca_state(sk, TCP_CA_Open); | 2942 | tcp_set_ca_state(sk, TCP_CA_Open); |
2979 | } | 2943 | } |
2980 | break; | 2944 | break; |
@@ -2984,7 +2948,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
2984 | tcp_reset_reno_sack(tp); | 2948 | tcp_reset_reno_sack(tp); |
2985 | if (tcp_try_undo_recovery(sk)) | 2949 | if (tcp_try_undo_recovery(sk)) |
2986 | return; | 2950 | return; |
2987 | tcp_complete_cwr(sk); | 2951 | tcp_end_cwnd_reduction(sk); |
2988 | break; | 2952 | break; |
2989 | } | 2953 | } |
2990 | } | 2954 | } |
@@ -3025,7 +2989,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3025 | tcp_try_undo_dsack(sk); | 2989 | tcp_try_undo_dsack(sk); |
3026 | 2990 | ||
3027 | if (!tcp_time_to_recover(sk, flag)) { | 2991 | if (!tcp_time_to_recover(sk, flag)) { |
3028 | tcp_try_to_open(sk, flag); | 2992 | tcp_try_to_open(sk, flag, newly_acked_sacked); |
3029 | return; | 2993 | return; |
3030 | } | 2994 | } |
3031 | 2995 | ||
@@ -3047,8 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3047 | 3011 | ||
3048 | if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) | 3012 | if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) |
3049 | tcp_update_scoreboard(sk, fast_rexmit); | 3013 | tcp_update_scoreboard(sk, fast_rexmit); |
3050 | tp->prr_delivered += newly_acked_sacked; | 3014 | tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); |
3051 | tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); | ||
3052 | tcp_xmit_retransmit_queue(sk); | 3015 | tcp_xmit_retransmit_queue(sk); |
3053 | } | 3016 | } |
3054 | 3017 | ||
@@ -3394,7 +3357,7 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) | |||
3394 | { | 3357 | { |
3395 | const struct tcp_sock *tp = tcp_sk(sk); | 3358 | const struct tcp_sock *tp = tcp_sk(sk); |
3396 | return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && | 3359 | return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && |
3397 | !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR)); | 3360 | !tcp_in_cwnd_reduction(sk); |
3398 | } | 3361 | } |
3399 | 3362 | ||
3400 | /* Check that window update is acceptable. | 3363 | /* Check that window update is acceptable. |
@@ -3462,9 +3425,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) | |||
3462 | } | 3425 | } |
3463 | 3426 | ||
3464 | /* A conservative spurious RTO response algorithm: reduce cwnd using | 3427 | /* A conservative spurious RTO response algorithm: reduce cwnd using |
3465 | * rate halving and continue in congestion avoidance. | 3428 | * PRR and continue in congestion avoidance. |
3466 | */ | 3429 | */ |
3467 | static void tcp_ratehalving_spur_to_response(struct sock *sk) | 3430 | static void tcp_cwr_spur_to_response(struct sock *sk) |
3468 | { | 3431 | { |
3469 | tcp_enter_cwr(sk, 0); | 3432 | tcp_enter_cwr(sk, 0); |
3470 | } | 3433 | } |
@@ -3472,7 +3435,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk) | |||
3472 | static void tcp_undo_spur_to_response(struct sock *sk, int flag) | 3435 | static void tcp_undo_spur_to_response(struct sock *sk, int flag) |
3473 | { | 3436 | { |
3474 | if (flag & FLAG_ECE) | 3437 | if (flag & FLAG_ECE) |
3475 | tcp_ratehalving_spur_to_response(sk); | 3438 | tcp_cwr_spur_to_response(sk); |
3476 | else | 3439 | else |
3477 | tcp_undo_cwr(sk, true); | 3440 | tcp_undo_cwr(sk, true); |
3478 | } | 3441 | } |
@@ -3579,7 +3542,7 @@ static bool tcp_process_frto(struct sock *sk, int flag) | |||
3579 | tcp_conservative_spur_to_response(tp); | 3542 | tcp_conservative_spur_to_response(tp); |
3580 | break; | 3543 | break; |
3581 | default: | 3544 | default: |
3582 | tcp_ratehalving_spur_to_response(sk); | 3545 | tcp_cwr_spur_to_response(sk); |
3583 | break; | 3546 | break; |
3584 | } | 3547 | } |
3585 | tp->frto_counter = 0; | 3548 | tp->frto_counter = 0; |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9383b51f3efc..cfe6ffe1c177 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -2037,10 +2037,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
2037 | if (push_one) | 2037 | if (push_one) |
2038 | break; | 2038 | break; |
2039 | } | 2039 | } |
2040 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) | ||
2041 | tp->prr_out += sent_pkts; | ||
2042 | 2040 | ||
2043 | if (likely(sent_pkts)) { | 2041 | if (likely(sent_pkts)) { |
2042 | if (tcp_in_cwnd_reduction(sk)) | ||
2043 | tp->prr_out += sent_pkts; | ||
2044 | tcp_cwnd_validate(sk); | 2044 | tcp_cwnd_validate(sk); |
2045 | return false; | 2045 | return false; |
2046 | } | 2046 | } |
@@ -2542,7 +2542,7 @@ begin_fwd: | |||
2542 | } | 2542 | } |
2543 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | 2543 | NET_INC_STATS_BH(sock_net(sk), mib_idx); |
2544 | 2544 | ||
2545 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) | 2545 | if (tcp_in_cwnd_reduction(sk)) |
2546 | tp->prr_out += tcp_skb_pcount(skb); | 2546 | tp->prr_out += tcp_skb_pcount(skb); |
2547 | 2547 | ||
2548 | if (skb == tcp_write_queue_head(sk)) | 2548 | if (skb == tcp_write_queue_head(sk)) |