diff options
author | Yuchung Cheng <ycheng@google.com> | 2012-09-02 13:38:04 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-09-03 14:34:02 -0400 |
commit | 684bad1107571d35610a674c61b3544efb5a5b13 (patch) | |
tree | aba9dc4a825ac3c454d9058f0bec0829f2b6df69 /net/ipv4 | |
parent | fb4d3d1df31907eadd2e2a745e840921888b346a (diff) |
tcp: use PRR to reduce cwin in CWR state
Use proportional rate reduction (PRR) algorithm to reduce cwnd in CWR state,
in addition to Recovery state. Retire the current rate-halving in CWR.
When losses are detected via ACKs in CWR state, the sender enters Recovery
state but the cwnd reduction continues and does not restart.
Rename and refactor cwnd reduction functions since both CWR and Recovery
use the same algorithm:
tcp_init_cwnd_reduction() is new and initiates reduction state variables.
tcp_cwnd_reduction() is previously tcp_update_cwnd_in_recovery().
tcp_ends_cwnd_reduction() is previously tcp_complete_cwr().
The rate halving functions and logic such as tcp_cwnd_down(), tcp_min_cwnd(),
and the cwnd moderation inside tcp_enter_cwr() are removed. The unused
parameter, flag, in tcp_cwnd_reduction() is also removed.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/tcp_input.c | 119 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 6 |
2 files changed, 44 insertions, 81 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 38589e464e63..e2bec815ff23 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -2470,35 +2470,6 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp) | |||
2470 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2470 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2471 | } | 2471 | } |
2472 | 2472 | ||
2473 | /* Lower bound on congestion window is slow start threshold | ||
2474 | * unless congestion avoidance choice decides to overide it. | ||
2475 | */ | ||
2476 | static inline u32 tcp_cwnd_min(const struct sock *sk) | ||
2477 | { | ||
2478 | const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; | ||
2479 | |||
2480 | return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh; | ||
2481 | } | ||
2482 | |||
2483 | /* Decrease cwnd each second ack. */ | ||
2484 | static void tcp_cwnd_down(struct sock *sk, int flag) | ||
2485 | { | ||
2486 | struct tcp_sock *tp = tcp_sk(sk); | ||
2487 | int decr = tp->snd_cwnd_cnt + 1; | ||
2488 | |||
2489 | if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) || | ||
2490 | (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) { | ||
2491 | tp->snd_cwnd_cnt = decr & 1; | ||
2492 | decr >>= 1; | ||
2493 | |||
2494 | if (decr && tp->snd_cwnd > tcp_cwnd_min(sk)) | ||
2495 | tp->snd_cwnd -= decr; | ||
2496 | |||
2497 | tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); | ||
2498 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
2499 | } | ||
2500 | } | ||
2501 | |||
2502 | /* Nothing was retransmitted or returned timestamp is less | 2473 | /* Nothing was retransmitted or returned timestamp is less |
2503 | * than timestamp of the first retransmission. | 2474 | * than timestamp of the first retransmission. |
2504 | */ | 2475 | */ |
@@ -2700,9 +2671,8 @@ static bool tcp_try_undo_loss(struct sock *sk) | |||
2700 | return false; | 2671 | return false; |
2701 | } | 2672 | } |
2702 | 2673 | ||
2703 | /* This function implements the PRR algorithm, specifcally the PRR-SSRB | 2674 | /* The cwnd reduction in CWR and Recovery use the PRR algorithm |
2704 | * (proportional rate reduction with slow start reduction bound) as described in | 2675 | * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/ |
2705 | * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. | ||
2706 | * It computes the number of packets to send (sndcnt) based on packets newly | 2676 | * It computes the number of packets to send (sndcnt) based on packets newly |
2707 | * delivered: | 2677 | * delivered: |
2708 | * 1) If the packets in flight is larger than ssthresh, PRR spreads the | 2678 | * 1) If the packets in flight is larger than ssthresh, PRR spreads the |
@@ -2711,13 +2681,29 @@ static bool tcp_try_undo_loss(struct sock *sk) | |||
2711 | * losses and/or application stalls), do not perform any further cwnd | 2681 | * losses and/or application stalls), do not perform any further cwnd |
2712 | * reductions, but instead slow start up to ssthresh. | 2682 | * reductions, but instead slow start up to ssthresh. |
2713 | */ | 2683 | */ |
2714 | static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, | 2684 | static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) |
2715 | int fast_rexmit, int flag) | 2685 | { |
2686 | struct tcp_sock *tp = tcp_sk(sk); | ||
2687 | |||
2688 | tp->high_seq = tp->snd_nxt; | ||
2689 | tp->bytes_acked = 0; | ||
2690 | tp->snd_cwnd_cnt = 0; | ||
2691 | tp->prior_cwnd = tp->snd_cwnd; | ||
2692 | tp->prr_delivered = 0; | ||
2693 | tp->prr_out = 0; | ||
2694 | if (set_ssthresh) | ||
2695 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); | ||
2696 | TCP_ECN_queue_cwr(tp); | ||
2697 | } | ||
2698 | |||
2699 | static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, | ||
2700 | int fast_rexmit) | ||
2716 | { | 2701 | { |
2717 | struct tcp_sock *tp = tcp_sk(sk); | 2702 | struct tcp_sock *tp = tcp_sk(sk); |
2718 | int sndcnt = 0; | 2703 | int sndcnt = 0; |
2719 | int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); | 2704 | int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); |
2720 | 2705 | ||
2706 | tp->prr_delivered += newly_acked_sacked; | ||
2721 | if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { | 2707 | if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { |
2722 | u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + | 2708 | u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + |
2723 | tp->prior_cwnd - 1; | 2709 | tp->prior_cwnd - 1; |
@@ -2732,43 +2718,29 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, | |||
2732 | tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; | 2718 | tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; |
2733 | } | 2719 | } |
2734 | 2720 | ||
2735 | static inline void tcp_complete_cwr(struct sock *sk) | 2721 | static inline void tcp_end_cwnd_reduction(struct sock *sk) |
2736 | { | 2722 | { |
2737 | struct tcp_sock *tp = tcp_sk(sk); | 2723 | struct tcp_sock *tp = tcp_sk(sk); |
2738 | 2724 | ||
2739 | /* Do not moderate cwnd if it's already undone in cwr or recovery. */ | 2725 | /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ |
2740 | if (tp->undo_marker) { | 2726 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || |
2741 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) { | 2727 | (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { |
2742 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); | 2728 | tp->snd_cwnd = tp->snd_ssthresh; |
2743 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2729 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2744 | } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) { | ||
2745 | /* PRR algorithm. */ | ||
2746 | tp->snd_cwnd = tp->snd_ssthresh; | ||
2747 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
2748 | } | ||
2749 | } | 2730 | } |
2750 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); | 2731 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); |
2751 | } | 2732 | } |
2752 | 2733 | ||
2753 | /* Set slow start threshold and cwnd not falling to slow start */ | 2734 | /* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */ |
2754 | void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) | 2735 | void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) |
2755 | { | 2736 | { |
2756 | struct tcp_sock *tp = tcp_sk(sk); | 2737 | struct tcp_sock *tp = tcp_sk(sk); |
2757 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
2758 | 2738 | ||
2759 | tp->prior_ssthresh = 0; | 2739 | tp->prior_ssthresh = 0; |
2760 | tp->bytes_acked = 0; | 2740 | tp->bytes_acked = 0; |
2761 | if (icsk->icsk_ca_state < TCP_CA_CWR) { | 2741 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
2762 | tp->undo_marker = 0; | 2742 | tp->undo_marker = 0; |
2763 | if (set_ssthresh) | 2743 | tcp_init_cwnd_reduction(sk, set_ssthresh); |
2764 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | ||
2765 | tp->snd_cwnd = min(tp->snd_cwnd, | ||
2766 | tcp_packets_in_flight(tp) + 1U); | ||
2767 | tp->snd_cwnd_cnt = 0; | ||
2768 | tp->high_seq = tp->snd_nxt; | ||
2769 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
2770 | TCP_ECN_queue_cwr(tp); | ||
2771 | |||
2772 | tcp_set_ca_state(sk, TCP_CA_CWR); | 2744 | tcp_set_ca_state(sk, TCP_CA_CWR); |
2773 | } | 2745 | } |
2774 | } | 2746 | } |
@@ -2787,7 +2759,7 @@ static void tcp_try_keep_open(struct sock *sk) | |||
2787 | } | 2759 | } |
2788 | } | 2760 | } |
2789 | 2761 | ||
2790 | static void tcp_try_to_open(struct sock *sk, int flag) | 2762 | static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) |
2791 | { | 2763 | { |
2792 | struct tcp_sock *tp = tcp_sk(sk); | 2764 | struct tcp_sock *tp = tcp_sk(sk); |
2793 | 2765 | ||
@@ -2804,7 +2776,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) | |||
2804 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) | 2776 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) |
2805 | tcp_moderate_cwnd(tp); | 2777 | tcp_moderate_cwnd(tp); |
2806 | } else { | 2778 | } else { |
2807 | tcp_cwnd_down(sk, flag); | 2779 | tcp_cwnd_reduction(sk, newly_acked_sacked, 0); |
2808 | } | 2780 | } |
2809 | } | 2781 | } |
2810 | 2782 | ||
@@ -2898,7 +2870,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | |||
2898 | 2870 | ||
2899 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | 2871 | NET_INC_STATS_BH(sock_net(sk), mib_idx); |
2900 | 2872 | ||
2901 | tp->high_seq = tp->snd_nxt; | ||
2902 | tp->prior_ssthresh = 0; | 2873 | tp->prior_ssthresh = 0; |
2903 | tp->undo_marker = tp->snd_una; | 2874 | tp->undo_marker = tp->snd_una; |
2904 | tp->undo_retrans = tp->retrans_out; | 2875 | tp->undo_retrans = tp->retrans_out; |
@@ -2906,15 +2877,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | |||
2906 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | 2877 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
2907 | if (!ece_ack) | 2878 | if (!ece_ack) |
2908 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | 2879 | tp->prior_ssthresh = tcp_current_ssthresh(sk); |
2909 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); | 2880 | tcp_init_cwnd_reduction(sk, true); |
2910 | TCP_ECN_queue_cwr(tp); | ||
2911 | } | 2881 | } |
2912 | |||
2913 | tp->bytes_acked = 0; | ||
2914 | tp->snd_cwnd_cnt = 0; | ||
2915 | tp->prior_cwnd = tp->snd_cwnd; | ||
2916 | tp->prr_delivered = 0; | ||
2917 | tp->prr_out = 0; | ||
2918 | tcp_set_ca_state(sk, TCP_CA_Recovery); | 2882 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
2919 | } | 2883 | } |
2920 | 2884 | ||
@@ -2974,7 +2938,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
2974 | /* CWR is to be held something *above* high_seq | 2938 | /* CWR is to be held something *above* high_seq |
2975 | * is ACKed for CWR bit to reach receiver. */ | 2939 | * is ACKed for CWR bit to reach receiver. */ |
2976 | if (tp->snd_una != tp->high_seq) { | 2940 | if (tp->snd_una != tp->high_seq) { |
2977 | tcp_complete_cwr(sk); | 2941 | tcp_end_cwnd_reduction(sk); |
2978 | tcp_set_ca_state(sk, TCP_CA_Open); | 2942 | tcp_set_ca_state(sk, TCP_CA_Open); |
2979 | } | 2943 | } |
2980 | break; | 2944 | break; |
@@ -2984,7 +2948,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
2984 | tcp_reset_reno_sack(tp); | 2948 | tcp_reset_reno_sack(tp); |
2985 | if (tcp_try_undo_recovery(sk)) | 2949 | if (tcp_try_undo_recovery(sk)) |
2986 | return; | 2950 | return; |
2987 | tcp_complete_cwr(sk); | 2951 | tcp_end_cwnd_reduction(sk); |
2988 | break; | 2952 | break; |
2989 | } | 2953 | } |
2990 | } | 2954 | } |
@@ -3025,7 +2989,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3025 | tcp_try_undo_dsack(sk); | 2989 | tcp_try_undo_dsack(sk); |
3026 | 2990 | ||
3027 | if (!tcp_time_to_recover(sk, flag)) { | 2991 | if (!tcp_time_to_recover(sk, flag)) { |
3028 | tcp_try_to_open(sk, flag); | 2992 | tcp_try_to_open(sk, flag, newly_acked_sacked); |
3029 | return; | 2993 | return; |
3030 | } | 2994 | } |
3031 | 2995 | ||
@@ -3047,8 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3047 | 3011 | ||
3048 | if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) | 3012 | if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) |
3049 | tcp_update_scoreboard(sk, fast_rexmit); | 3013 | tcp_update_scoreboard(sk, fast_rexmit); |
3050 | tp->prr_delivered += newly_acked_sacked; | 3014 | tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); |
3051 | tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); | ||
3052 | tcp_xmit_retransmit_queue(sk); | 3015 | tcp_xmit_retransmit_queue(sk); |
3053 | } | 3016 | } |
3054 | 3017 | ||
@@ -3394,7 +3357,7 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) | |||
3394 | { | 3357 | { |
3395 | const struct tcp_sock *tp = tcp_sk(sk); | 3358 | const struct tcp_sock *tp = tcp_sk(sk); |
3396 | return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && | 3359 | return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && |
3397 | !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR)); | 3360 | !tcp_in_cwnd_reduction(sk); |
3398 | } | 3361 | } |
3399 | 3362 | ||
3400 | /* Check that window update is acceptable. | 3363 | /* Check that window update is acceptable. |
@@ -3462,9 +3425,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) | |||
3462 | } | 3425 | } |
3463 | 3426 | ||
3464 | /* A conservative spurious RTO response algorithm: reduce cwnd using | 3427 | /* A conservative spurious RTO response algorithm: reduce cwnd using |
3465 | * rate halving and continue in congestion avoidance. | 3428 | * PRR and continue in congestion avoidance. |
3466 | */ | 3429 | */ |
3467 | static void tcp_ratehalving_spur_to_response(struct sock *sk) | 3430 | static void tcp_cwr_spur_to_response(struct sock *sk) |
3468 | { | 3431 | { |
3469 | tcp_enter_cwr(sk, 0); | 3432 | tcp_enter_cwr(sk, 0); |
3470 | } | 3433 | } |
@@ -3472,7 +3435,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk) | |||
3472 | static void tcp_undo_spur_to_response(struct sock *sk, int flag) | 3435 | static void tcp_undo_spur_to_response(struct sock *sk, int flag) |
3473 | { | 3436 | { |
3474 | if (flag & FLAG_ECE) | 3437 | if (flag & FLAG_ECE) |
3475 | tcp_ratehalving_spur_to_response(sk); | 3438 | tcp_cwr_spur_to_response(sk); |
3476 | else | 3439 | else |
3477 | tcp_undo_cwr(sk, true); | 3440 | tcp_undo_cwr(sk, true); |
3478 | } | 3441 | } |
@@ -3579,7 +3542,7 @@ static bool tcp_process_frto(struct sock *sk, int flag) | |||
3579 | tcp_conservative_spur_to_response(tp); | 3542 | tcp_conservative_spur_to_response(tp); |
3580 | break; | 3543 | break; |
3581 | default: | 3544 | default: |
3582 | tcp_ratehalving_spur_to_response(sk); | 3545 | tcp_cwr_spur_to_response(sk); |
3583 | break; | 3546 | break; |
3584 | } | 3547 | } |
3585 | tp->frto_counter = 0; | 3548 | tp->frto_counter = 0; |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9383b51f3efc..cfe6ffe1c177 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -2037,10 +2037,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
2037 | if (push_one) | 2037 | if (push_one) |
2038 | break; | 2038 | break; |
2039 | } | 2039 | } |
2040 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) | ||
2041 | tp->prr_out += sent_pkts; | ||
2042 | 2040 | ||
2043 | if (likely(sent_pkts)) { | 2041 | if (likely(sent_pkts)) { |
2042 | if (tcp_in_cwnd_reduction(sk)) | ||
2043 | tp->prr_out += sent_pkts; | ||
2044 | tcp_cwnd_validate(sk); | 2044 | tcp_cwnd_validate(sk); |
2045 | return false; | 2045 | return false; |
2046 | } | 2046 | } |
@@ -2542,7 +2542,7 @@ begin_fwd: | |||
2542 | } | 2542 | } |
2543 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | 2543 | NET_INC_STATS_BH(sock_net(sk), mib_idx); |
2544 | 2544 | ||
2545 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) | 2545 | if (tcp_in_cwnd_reduction(sk)) |
2546 | tp->prr_out += tcp_skb_pcount(skb); | 2546 | tp->prr_out += tcp_skb_pcount(skb); |
2547 | 2547 | ||
2548 | if (skb == tcp_write_queue_head(sk)) | 2548 | if (skb == tcp_write_queue_head(sk)) |