aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2012-09-02 13:38:04 -0400
committerDavid S. Miller <davem@davemloft.net>2012-09-03 14:34:02 -0400
commit684bad1107571d35610a674c61b3544efb5a5b13 (patch)
treeaba9dc4a825ac3c454d9058f0bec0829f2b6df69 /net/ipv4
parentfb4d3d1df31907eadd2e2a745e840921888b346a (diff)
tcp: use PRR to reduce cwin in CWR state
Use proportional rate reduction (PRR) algorithm to reduce cwnd in CWR state, in addition to Recovery state. Retire the current rate-halving in CWR. When losses are detected via ACKs in CWR state, the sender enters Recovery state but the cwnd reduction continues and does not restart. Rename and refactor cwnd reduction functions since both CWR and Recovery use the same algorithm: tcp_init_cwnd_reduction() is new and initiates reduction state variables. tcp_cwnd_reduction() is previously tcp_update_cwnd_in_recovery(). tcp_ends_cwnd_reduction() is previously tcp_complete_cwr(). The rate halving functions and logic such as tcp_cwnd_down(), tcp_min_cwnd(), and the cwnd moderation inside tcp_enter_cwr() are removed. The unused parameter, flag, in tcp_cwnd_reduction() is also removed. Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp_input.c119
-rw-r--r--net/ipv4/tcp_output.c6
2 files changed, 44 insertions, 81 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 38589e464e63..e2bec815ff23 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2470,35 +2470,6 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2470 tp->snd_cwnd_stamp = tcp_time_stamp; 2470 tp->snd_cwnd_stamp = tcp_time_stamp;
2471} 2471}
2472 2472
2473/* Lower bound on congestion window is slow start threshold
2474 * unless congestion avoidance choice decides to overide it.
2475 */
2476static inline u32 tcp_cwnd_min(const struct sock *sk)
2477{
2478 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
2479
2480 return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
2481}
2482
2483/* Decrease cwnd each second ack. */
2484static void tcp_cwnd_down(struct sock *sk, int flag)
2485{
2486 struct tcp_sock *tp = tcp_sk(sk);
2487 int decr = tp->snd_cwnd_cnt + 1;
2488
2489 if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
2490 (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
2491 tp->snd_cwnd_cnt = decr & 1;
2492 decr >>= 1;
2493
2494 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2495 tp->snd_cwnd -= decr;
2496
2497 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2498 tp->snd_cwnd_stamp = tcp_time_stamp;
2499 }
2500}
2501
2502/* Nothing was retransmitted or returned timestamp is less 2473/* Nothing was retransmitted or returned timestamp is less
2503 * than timestamp of the first retransmission. 2474 * than timestamp of the first retransmission.
2504 */ 2475 */
@@ -2700,9 +2671,8 @@ static bool tcp_try_undo_loss(struct sock *sk)
2700 return false; 2671 return false;
2701} 2672}
2702 2673
2703/* This function implements the PRR algorithm, specifcally the PRR-SSRB 2674/* The cwnd reduction in CWR and Recovery use the PRR algorithm
2704 * (proportional rate reduction with slow start reduction bound) as described in 2675 * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/
2705 * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
2706 * It computes the number of packets to send (sndcnt) based on packets newly 2676 * It computes the number of packets to send (sndcnt) based on packets newly
2707 * delivered: 2677 * delivered:
2708 * 1) If the packets in flight is larger than ssthresh, PRR spreads the 2678 * 1) If the packets in flight is larger than ssthresh, PRR spreads the
@@ -2711,13 +2681,29 @@ static bool tcp_try_undo_loss(struct sock *sk)
2711 * losses and/or application stalls), do not perform any further cwnd 2681 * losses and/or application stalls), do not perform any further cwnd
2712 * reductions, but instead slow start up to ssthresh. 2682 * reductions, but instead slow start up to ssthresh.
2713 */ 2683 */
2714static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, 2684static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
2715 int fast_rexmit, int flag) 2685{
2686 struct tcp_sock *tp = tcp_sk(sk);
2687
2688 tp->high_seq = tp->snd_nxt;
2689 tp->bytes_acked = 0;
2690 tp->snd_cwnd_cnt = 0;
2691 tp->prior_cwnd = tp->snd_cwnd;
2692 tp->prr_delivered = 0;
2693 tp->prr_out = 0;
2694 if (set_ssthresh)
2695 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2696 TCP_ECN_queue_cwr(tp);
2697}
2698
2699static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
2700 int fast_rexmit)
2716{ 2701{
2717 struct tcp_sock *tp = tcp_sk(sk); 2702 struct tcp_sock *tp = tcp_sk(sk);
2718 int sndcnt = 0; 2703 int sndcnt = 0;
2719 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); 2704 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2720 2705
2706 tp->prr_delivered += newly_acked_sacked;
2721 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { 2707 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2722 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + 2708 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2723 tp->prior_cwnd - 1; 2709 tp->prior_cwnd - 1;
@@ -2732,43 +2718,29 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
2732 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; 2718 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2733} 2719}
2734 2720
2735static inline void tcp_complete_cwr(struct sock *sk) 2721static inline void tcp_end_cwnd_reduction(struct sock *sk)
2736{ 2722{
2737 struct tcp_sock *tp = tcp_sk(sk); 2723 struct tcp_sock *tp = tcp_sk(sk);
2738 2724
2739 /* Do not moderate cwnd if it's already undone in cwr or recovery. */ 2725 /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
2740 if (tp->undo_marker) { 2726 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
2741 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) { 2727 (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
2742 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); 2728 tp->snd_cwnd = tp->snd_ssthresh;
2743 tp->snd_cwnd_stamp = tcp_time_stamp; 2729 tp->snd_cwnd_stamp = tcp_time_stamp;
2744 } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) {
2745 /* PRR algorithm. */
2746 tp->snd_cwnd = tp->snd_ssthresh;
2747 tp->snd_cwnd_stamp = tcp_time_stamp;
2748 }
2749 } 2730 }
2750 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2731 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2751} 2732}
2752 2733
2753/* Set slow start threshold and cwnd not falling to slow start */ 2734/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
2754void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) 2735void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
2755{ 2736{
2756 struct tcp_sock *tp = tcp_sk(sk); 2737 struct tcp_sock *tp = tcp_sk(sk);
2757 const struct inet_connection_sock *icsk = inet_csk(sk);
2758 2738
2759 tp->prior_ssthresh = 0; 2739 tp->prior_ssthresh = 0;
2760 tp->bytes_acked = 0; 2740 tp->bytes_acked = 0;
2761 if (icsk->icsk_ca_state < TCP_CA_CWR) { 2741 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2762 tp->undo_marker = 0; 2742 tp->undo_marker = 0;
2763 if (set_ssthresh) 2743 tcp_init_cwnd_reduction(sk, set_ssthresh);
2764 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2765 tp->snd_cwnd = min(tp->snd_cwnd,
2766 tcp_packets_in_flight(tp) + 1U);
2767 tp->snd_cwnd_cnt = 0;
2768 tp->high_seq = tp->snd_nxt;
2769 tp->snd_cwnd_stamp = tcp_time_stamp;
2770 TCP_ECN_queue_cwr(tp);
2771
2772 tcp_set_ca_state(sk, TCP_CA_CWR); 2744 tcp_set_ca_state(sk, TCP_CA_CWR);
2773 } 2745 }
2774} 2746}
@@ -2787,7 +2759,7 @@ static void tcp_try_keep_open(struct sock *sk)
2787 } 2759 }
2788} 2760}
2789 2761
2790static void tcp_try_to_open(struct sock *sk, int flag) 2762static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
2791{ 2763{
2792 struct tcp_sock *tp = tcp_sk(sk); 2764 struct tcp_sock *tp = tcp_sk(sk);
2793 2765
@@ -2804,7 +2776,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
2804 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) 2776 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2805 tcp_moderate_cwnd(tp); 2777 tcp_moderate_cwnd(tp);
2806 } else { 2778 } else {
2807 tcp_cwnd_down(sk, flag); 2779 tcp_cwnd_reduction(sk, newly_acked_sacked, 0);
2808 } 2780 }
2809} 2781}
2810 2782
@@ -2898,7 +2870,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2898 2870
2899 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2871 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2900 2872
2901 tp->high_seq = tp->snd_nxt;
2902 tp->prior_ssthresh = 0; 2873 tp->prior_ssthresh = 0;
2903 tp->undo_marker = tp->snd_una; 2874 tp->undo_marker = tp->snd_una;
2904 tp->undo_retrans = tp->retrans_out; 2875 tp->undo_retrans = tp->retrans_out;
@@ -2906,15 +2877,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2906 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { 2877 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2907 if (!ece_ack) 2878 if (!ece_ack)
2908 tp->prior_ssthresh = tcp_current_ssthresh(sk); 2879 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2909 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); 2880 tcp_init_cwnd_reduction(sk, true);
2910 TCP_ECN_queue_cwr(tp);
2911 } 2881 }
2912
2913 tp->bytes_acked = 0;
2914 tp->snd_cwnd_cnt = 0;
2915 tp->prior_cwnd = tp->snd_cwnd;
2916 tp->prr_delivered = 0;
2917 tp->prr_out = 0;
2918 tcp_set_ca_state(sk, TCP_CA_Recovery); 2882 tcp_set_ca_state(sk, TCP_CA_Recovery);
2919} 2883}
2920 2884
@@ -2974,7 +2938,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2974 /* CWR is to be held something *above* high_seq 2938 /* CWR is to be held something *above* high_seq
2975 * is ACKed for CWR bit to reach receiver. */ 2939 * is ACKed for CWR bit to reach receiver. */
2976 if (tp->snd_una != tp->high_seq) { 2940 if (tp->snd_una != tp->high_seq) {
2977 tcp_complete_cwr(sk); 2941 tcp_end_cwnd_reduction(sk);
2978 tcp_set_ca_state(sk, TCP_CA_Open); 2942 tcp_set_ca_state(sk, TCP_CA_Open);
2979 } 2943 }
2980 break; 2944 break;
@@ -2984,7 +2948,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2984 tcp_reset_reno_sack(tp); 2948 tcp_reset_reno_sack(tp);
2985 if (tcp_try_undo_recovery(sk)) 2949 if (tcp_try_undo_recovery(sk))
2986 return; 2950 return;
2987 tcp_complete_cwr(sk); 2951 tcp_end_cwnd_reduction(sk);
2988 break; 2952 break;
2989 } 2953 }
2990 } 2954 }
@@ -3025,7 +2989,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3025 tcp_try_undo_dsack(sk); 2989 tcp_try_undo_dsack(sk);
3026 2990
3027 if (!tcp_time_to_recover(sk, flag)) { 2991 if (!tcp_time_to_recover(sk, flag)) {
3028 tcp_try_to_open(sk, flag); 2992 tcp_try_to_open(sk, flag, newly_acked_sacked);
3029 return; 2993 return;
3030 } 2994 }
3031 2995
@@ -3047,8 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3047 3011
3048 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) 3012 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3049 tcp_update_scoreboard(sk, fast_rexmit); 3013 tcp_update_scoreboard(sk, fast_rexmit);
3050 tp->prr_delivered += newly_acked_sacked; 3014 tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
3051 tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
3052 tcp_xmit_retransmit_queue(sk); 3015 tcp_xmit_retransmit_queue(sk);
3053} 3016}
3054 3017
@@ -3394,7 +3357,7 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3394{ 3357{
3395 const struct tcp_sock *tp = tcp_sk(sk); 3358 const struct tcp_sock *tp = tcp_sk(sk);
3396 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && 3359 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3397 !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR)); 3360 !tcp_in_cwnd_reduction(sk);
3398} 3361}
3399 3362
3400/* Check that window update is acceptable. 3363/* Check that window update is acceptable.
@@ -3462,9 +3425,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3462} 3425}
3463 3426
3464/* A conservative spurious RTO response algorithm: reduce cwnd using 3427/* A conservative spurious RTO response algorithm: reduce cwnd using
3465 * rate halving and continue in congestion avoidance. 3428 * PRR and continue in congestion avoidance.
3466 */ 3429 */
3467static void tcp_ratehalving_spur_to_response(struct sock *sk) 3430static void tcp_cwr_spur_to_response(struct sock *sk)
3468{ 3431{
3469 tcp_enter_cwr(sk, 0); 3432 tcp_enter_cwr(sk, 0);
3470} 3433}
@@ -3472,7 +3435,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk)
3472static void tcp_undo_spur_to_response(struct sock *sk, int flag) 3435static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3473{ 3436{
3474 if (flag & FLAG_ECE) 3437 if (flag & FLAG_ECE)
3475 tcp_ratehalving_spur_to_response(sk); 3438 tcp_cwr_spur_to_response(sk);
3476 else 3439 else
3477 tcp_undo_cwr(sk, true); 3440 tcp_undo_cwr(sk, true);
3478} 3441}
@@ -3579,7 +3542,7 @@ static bool tcp_process_frto(struct sock *sk, int flag)
3579 tcp_conservative_spur_to_response(tp); 3542 tcp_conservative_spur_to_response(tp);
3580 break; 3543 break;
3581 default: 3544 default:
3582 tcp_ratehalving_spur_to_response(sk); 3545 tcp_cwr_spur_to_response(sk);
3583 break; 3546 break;
3584 } 3547 }
3585 tp->frto_counter = 0; 3548 tp->frto_counter = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9383b51f3efc..cfe6ffe1c177 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2037,10 +2037,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2037 if (push_one) 2037 if (push_one)
2038 break; 2038 break;
2039 } 2039 }
2040 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
2041 tp->prr_out += sent_pkts;
2042 2040
2043 if (likely(sent_pkts)) { 2041 if (likely(sent_pkts)) {
2042 if (tcp_in_cwnd_reduction(sk))
2043 tp->prr_out += sent_pkts;
2044 tcp_cwnd_validate(sk); 2044 tcp_cwnd_validate(sk);
2045 return false; 2045 return false;
2046 } 2046 }
@@ -2542,7 +2542,7 @@ begin_fwd:
2542 } 2542 }
2543 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2543 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2544 2544
2545 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) 2545 if (tcp_in_cwnd_reduction(sk))
2546 tp->prr_out += tcp_skb_pcount(skb); 2546 tp->prr_out += tcp_skb_pcount(skb);
2547 2547
2548 if (skb == tcp_write_queue_head(sk)) 2548 if (skb == tcp_write_queue_head(sk))