aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/tcp.h10
-rw-r--r--net/ipv4/tcp_input.c119
-rw-r--r--net/ipv4/tcp_output.c6
3 files changed, 52 insertions, 83 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1421b02a7905..a8cb00c0c6d9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -913,15 +913,21 @@ static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
913 return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; 913 return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
914} 914}
915 915
916static inline bool tcp_in_cwnd_reduction(const struct sock *sk)
917{
918 return (TCPF_CA_CWR | TCPF_CA_Recovery) &
919 (1 << inet_csk(sk)->icsk_ca_state);
920}
921
916/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. 922/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
917 * The exception is rate halving phase, when cwnd is decreasing towards 923 * The exception is cwnd reduction phase, when cwnd is decreasing towards
918 * ssthresh. 924 * ssthresh.
919 */ 925 */
920static inline __u32 tcp_current_ssthresh(const struct sock *sk) 926static inline __u32 tcp_current_ssthresh(const struct sock *sk)
921{ 927{
922 const struct tcp_sock *tp = tcp_sk(sk); 928 const struct tcp_sock *tp = tcp_sk(sk);
923 929
924 if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) 930 if (tcp_in_cwnd_reduction(sk))
925 return tp->snd_ssthresh; 931 return tp->snd_ssthresh;
926 else 932 else
927 return max(tp->snd_ssthresh, 933 return max(tp->snd_ssthresh,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 38589e464e63..e2bec815ff23 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2470,35 +2470,6 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2470 tp->snd_cwnd_stamp = tcp_time_stamp; 2470 tp->snd_cwnd_stamp = tcp_time_stamp;
2471} 2471}
2472 2472
2473/* Lower bound on congestion window is slow start threshold
2474 * unless congestion avoidance choice decides to overide it.
2475 */
2476static inline u32 tcp_cwnd_min(const struct sock *sk)
2477{
2478 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
2479
2480 return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
2481}
2482
2483/* Decrease cwnd each second ack. */
2484static void tcp_cwnd_down(struct sock *sk, int flag)
2485{
2486 struct tcp_sock *tp = tcp_sk(sk);
2487 int decr = tp->snd_cwnd_cnt + 1;
2488
2489 if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
2490 (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
2491 tp->snd_cwnd_cnt = decr & 1;
2492 decr >>= 1;
2493
2494 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2495 tp->snd_cwnd -= decr;
2496
2497 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2498 tp->snd_cwnd_stamp = tcp_time_stamp;
2499 }
2500}
2501
2502/* Nothing was retransmitted or returned timestamp is less 2473/* Nothing was retransmitted or returned timestamp is less
2503 * than timestamp of the first retransmission. 2474 * than timestamp of the first retransmission.
2504 */ 2475 */
@@ -2700,9 +2671,8 @@ static bool tcp_try_undo_loss(struct sock *sk)
2700 return false; 2671 return false;
2701} 2672}
2702 2673
2703/* This function implements the PRR algorithm, specifcally the PRR-SSRB 2674/* The cwnd reduction in CWR and Recovery use the PRR algorithm
2704 * (proportional rate reduction with slow start reduction bound) as described in 2675 * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/
2705 * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
2706 * It computes the number of packets to send (sndcnt) based on packets newly 2676 * It computes the number of packets to send (sndcnt) based on packets newly
2707 * delivered: 2677 * delivered:
2708 * 1) If the packets in flight is larger than ssthresh, PRR spreads the 2678 * 1) If the packets in flight is larger than ssthresh, PRR spreads the
@@ -2711,13 +2681,29 @@ static bool tcp_try_undo_loss(struct sock *sk)
2711 * losses and/or application stalls), do not perform any further cwnd 2681 * losses and/or application stalls), do not perform any further cwnd
2712 * reductions, but instead slow start up to ssthresh. 2682 * reductions, but instead slow start up to ssthresh.
2713 */ 2683 */
2714static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, 2684static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
2715 int fast_rexmit, int flag) 2685{
2686 struct tcp_sock *tp = tcp_sk(sk);
2687
2688 tp->high_seq = tp->snd_nxt;
2689 tp->bytes_acked = 0;
2690 tp->snd_cwnd_cnt = 0;
2691 tp->prior_cwnd = tp->snd_cwnd;
2692 tp->prr_delivered = 0;
2693 tp->prr_out = 0;
2694 if (set_ssthresh)
2695 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2696 TCP_ECN_queue_cwr(tp);
2697}
2698
2699static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
2700 int fast_rexmit)
2716{ 2701{
2717 struct tcp_sock *tp = tcp_sk(sk); 2702 struct tcp_sock *tp = tcp_sk(sk);
2718 int sndcnt = 0; 2703 int sndcnt = 0;
2719 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); 2704 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2720 2705
2706 tp->prr_delivered += newly_acked_sacked;
2721 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { 2707 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2722 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + 2708 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2723 tp->prior_cwnd - 1; 2709 tp->prior_cwnd - 1;
@@ -2732,43 +2718,29 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
2732 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; 2718 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2733} 2719}
2734 2720
2735static inline void tcp_complete_cwr(struct sock *sk) 2721static inline void tcp_end_cwnd_reduction(struct sock *sk)
2736{ 2722{
2737 struct tcp_sock *tp = tcp_sk(sk); 2723 struct tcp_sock *tp = tcp_sk(sk);
2738 2724
2739 /* Do not moderate cwnd if it's already undone in cwr or recovery. */ 2725 /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
2740 if (tp->undo_marker) { 2726 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
2741 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) { 2727 (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
2742 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); 2728 tp->snd_cwnd = tp->snd_ssthresh;
2743 tp->snd_cwnd_stamp = tcp_time_stamp; 2729 tp->snd_cwnd_stamp = tcp_time_stamp;
2744 } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) {
2745 /* PRR algorithm. */
2746 tp->snd_cwnd = tp->snd_ssthresh;
2747 tp->snd_cwnd_stamp = tcp_time_stamp;
2748 }
2749 } 2730 }
2750 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2731 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2751} 2732}
2752 2733
2753/* Set slow start threshold and cwnd not falling to slow start */ 2734/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
2754void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) 2735void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
2755{ 2736{
2756 struct tcp_sock *tp = tcp_sk(sk); 2737 struct tcp_sock *tp = tcp_sk(sk);
2757 const struct inet_connection_sock *icsk = inet_csk(sk);
2758 2738
2759 tp->prior_ssthresh = 0; 2739 tp->prior_ssthresh = 0;
2760 tp->bytes_acked = 0; 2740 tp->bytes_acked = 0;
2761 if (icsk->icsk_ca_state < TCP_CA_CWR) { 2741 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2762 tp->undo_marker = 0; 2742 tp->undo_marker = 0;
2763 if (set_ssthresh) 2743 tcp_init_cwnd_reduction(sk, set_ssthresh);
2764 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2765 tp->snd_cwnd = min(tp->snd_cwnd,
2766 tcp_packets_in_flight(tp) + 1U);
2767 tp->snd_cwnd_cnt = 0;
2768 tp->high_seq = tp->snd_nxt;
2769 tp->snd_cwnd_stamp = tcp_time_stamp;
2770 TCP_ECN_queue_cwr(tp);
2771
2772 tcp_set_ca_state(sk, TCP_CA_CWR); 2744 tcp_set_ca_state(sk, TCP_CA_CWR);
2773 } 2745 }
2774} 2746}
@@ -2787,7 +2759,7 @@ static void tcp_try_keep_open(struct sock *sk)
2787 } 2759 }
2788} 2760}
2789 2761
2790static void tcp_try_to_open(struct sock *sk, int flag) 2762static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
2791{ 2763{
2792 struct tcp_sock *tp = tcp_sk(sk); 2764 struct tcp_sock *tp = tcp_sk(sk);
2793 2765
@@ -2804,7 +2776,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
2804 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) 2776 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2805 tcp_moderate_cwnd(tp); 2777 tcp_moderate_cwnd(tp);
2806 } else { 2778 } else {
2807 tcp_cwnd_down(sk, flag); 2779 tcp_cwnd_reduction(sk, newly_acked_sacked, 0);
2808 } 2780 }
2809} 2781}
2810 2782
@@ -2898,7 +2870,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2898 2870
2899 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2871 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2900 2872
2901 tp->high_seq = tp->snd_nxt;
2902 tp->prior_ssthresh = 0; 2873 tp->prior_ssthresh = 0;
2903 tp->undo_marker = tp->snd_una; 2874 tp->undo_marker = tp->snd_una;
2904 tp->undo_retrans = tp->retrans_out; 2875 tp->undo_retrans = tp->retrans_out;
@@ -2906,15 +2877,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2906 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { 2877 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2907 if (!ece_ack) 2878 if (!ece_ack)
2908 tp->prior_ssthresh = tcp_current_ssthresh(sk); 2879 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2909 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); 2880 tcp_init_cwnd_reduction(sk, true);
2910 TCP_ECN_queue_cwr(tp);
2911 } 2881 }
2912
2913 tp->bytes_acked = 0;
2914 tp->snd_cwnd_cnt = 0;
2915 tp->prior_cwnd = tp->snd_cwnd;
2916 tp->prr_delivered = 0;
2917 tp->prr_out = 0;
2918 tcp_set_ca_state(sk, TCP_CA_Recovery); 2882 tcp_set_ca_state(sk, TCP_CA_Recovery);
2919} 2883}
2920 2884
@@ -2974,7 +2938,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2974 /* CWR is to be held something *above* high_seq 2938 /* CWR is to be held something *above* high_seq
2975 * is ACKed for CWR bit to reach receiver. */ 2939 * is ACKed for CWR bit to reach receiver. */
2976 if (tp->snd_una != tp->high_seq) { 2940 if (tp->snd_una != tp->high_seq) {
2977 tcp_complete_cwr(sk); 2941 tcp_end_cwnd_reduction(sk);
2978 tcp_set_ca_state(sk, TCP_CA_Open); 2942 tcp_set_ca_state(sk, TCP_CA_Open);
2979 } 2943 }
2980 break; 2944 break;
@@ -2984,7 +2948,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2984 tcp_reset_reno_sack(tp); 2948 tcp_reset_reno_sack(tp);
2985 if (tcp_try_undo_recovery(sk)) 2949 if (tcp_try_undo_recovery(sk))
2986 return; 2950 return;
2987 tcp_complete_cwr(sk); 2951 tcp_end_cwnd_reduction(sk);
2988 break; 2952 break;
2989 } 2953 }
2990 } 2954 }
@@ -3025,7 +2989,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3025 tcp_try_undo_dsack(sk); 2989 tcp_try_undo_dsack(sk);
3026 2990
3027 if (!tcp_time_to_recover(sk, flag)) { 2991 if (!tcp_time_to_recover(sk, flag)) {
3028 tcp_try_to_open(sk, flag); 2992 tcp_try_to_open(sk, flag, newly_acked_sacked);
3029 return; 2993 return;
3030 } 2994 }
3031 2995
@@ -3047,8 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3047 3011
3048 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) 3012 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3049 tcp_update_scoreboard(sk, fast_rexmit); 3013 tcp_update_scoreboard(sk, fast_rexmit);
3050 tp->prr_delivered += newly_acked_sacked; 3014 tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
3051 tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
3052 tcp_xmit_retransmit_queue(sk); 3015 tcp_xmit_retransmit_queue(sk);
3053} 3016}
3054 3017
@@ -3394,7 +3357,7 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3394{ 3357{
3395 const struct tcp_sock *tp = tcp_sk(sk); 3358 const struct tcp_sock *tp = tcp_sk(sk);
3396 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && 3359 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3397 !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR)); 3360 !tcp_in_cwnd_reduction(sk);
3398} 3361}
3399 3362
3400/* Check that window update is acceptable. 3363/* Check that window update is acceptable.
@@ -3462,9 +3425,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3462} 3425}
3463 3426
3464/* A conservative spurious RTO response algorithm: reduce cwnd using 3427/* A conservative spurious RTO response algorithm: reduce cwnd using
3465 * rate halving and continue in congestion avoidance. 3428 * PRR and continue in congestion avoidance.
3466 */ 3429 */
3467static void tcp_ratehalving_spur_to_response(struct sock *sk) 3430static void tcp_cwr_spur_to_response(struct sock *sk)
3468{ 3431{
3469 tcp_enter_cwr(sk, 0); 3432 tcp_enter_cwr(sk, 0);
3470} 3433}
@@ -3472,7 +3435,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk)
3472static void tcp_undo_spur_to_response(struct sock *sk, int flag) 3435static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3473{ 3436{
3474 if (flag & FLAG_ECE) 3437 if (flag & FLAG_ECE)
3475 tcp_ratehalving_spur_to_response(sk); 3438 tcp_cwr_spur_to_response(sk);
3476 else 3439 else
3477 tcp_undo_cwr(sk, true); 3440 tcp_undo_cwr(sk, true);
3478} 3441}
@@ -3579,7 +3542,7 @@ static bool tcp_process_frto(struct sock *sk, int flag)
3579 tcp_conservative_spur_to_response(tp); 3542 tcp_conservative_spur_to_response(tp);
3580 break; 3543 break;
3581 default: 3544 default:
3582 tcp_ratehalving_spur_to_response(sk); 3545 tcp_cwr_spur_to_response(sk);
3583 break; 3546 break;
3584 } 3547 }
3585 tp->frto_counter = 0; 3548 tp->frto_counter = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9383b51f3efc..cfe6ffe1c177 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2037,10 +2037,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2037 if (push_one) 2037 if (push_one)
2038 break; 2038 break;
2039 } 2039 }
2040 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
2041 tp->prr_out += sent_pkts;
2042 2040
2043 if (likely(sent_pkts)) { 2041 if (likely(sent_pkts)) {
2042 if (tcp_in_cwnd_reduction(sk))
2043 tp->prr_out += sent_pkts;
2044 tcp_cwnd_validate(sk); 2044 tcp_cwnd_validate(sk);
2045 return false; 2045 return false;
2046 } 2046 }
@@ -2542,7 +2542,7 @@ begin_fwd:
2542 } 2542 }
2543 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2543 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2544 2544
2545 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) 2545 if (tcp_in_cwnd_reduction(sk))
2546 tp->prr_out += tcp_skb_pcount(skb); 2546 tp->prr_out += tcp_skb_pcount(skb);
2547 2547
2548 if (skb == tcp_write_queue_head(sk)) 2548 if (skb == tcp_write_queue_head(sk))