diff options
-rw-r--r-- | include/linux/tcp.h | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 58 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 7 |
3 files changed, 62 insertions, 7 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 531ede8006d9..6b63b310af36 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -379,6 +379,10 @@ struct tcp_sock { | |||
379 | u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ | 379 | u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ |
380 | u32 snd_cwnd_used; | 380 | u32 snd_cwnd_used; |
381 | u32 snd_cwnd_stamp; | 381 | u32 snd_cwnd_stamp; |
382 | u32 prior_cwnd; /* Congestion window at start of Recovery. */ | ||
383 | u32 prr_delivered; /* Number of newly delivered packets to | ||
384 | * receiver in Recovery. */ | ||
385 | u32 prr_out; /* Total number of pkts sent during Recovery. */ | ||
382 | 386 | ||
383 | u32 rcv_wnd; /* Current receiver window */ | 387 | u32 rcv_wnd; /* Current receiver window */ |
384 | u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ | 388 | u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ea0d2183df4b..385c470195eb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -2830,9 +2830,13 @@ static int tcp_try_undo_loss(struct sock *sk) | |||
2830 | static inline void tcp_complete_cwr(struct sock *sk) | 2830 | static inline void tcp_complete_cwr(struct sock *sk) |
2831 | { | 2831 | { |
2832 | struct tcp_sock *tp = tcp_sk(sk); | 2832 | struct tcp_sock *tp = tcp_sk(sk); |
2833 | /* Do not moderate cwnd if it's already undone in cwr or recovery */ | 2833 | |
2834 | if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { | 2834 | /* Do not moderate cwnd if it's already undone in cwr or recovery. */ |
2835 | tp->snd_cwnd = tp->snd_ssthresh; | 2835 | if (tp->undo_marker) { |
2836 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) | ||
2837 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); | ||
2838 | else /* PRR */ | ||
2839 | tp->snd_cwnd = tp->snd_ssthresh; | ||
2836 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2840 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2837 | } | 2841 | } |
2838 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); | 2842 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); |
@@ -2950,6 +2954,38 @@ void tcp_simple_retransmit(struct sock *sk) | |||
2950 | } | 2954 | } |
2951 | EXPORT_SYMBOL(tcp_simple_retransmit); | 2955 | EXPORT_SYMBOL(tcp_simple_retransmit); |
2952 | 2956 | ||
2957 | /* This function implements the PRR algorithm, specifcally the PRR-SSRB | ||
2958 | * (proportional rate reduction with slow start reduction bound) as described in | ||
2959 | * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. | ||
2960 | * It computes the number of packets to send (sndcnt) based on packets newly | ||
2961 | * delivered: | ||
2962 | * 1) If the packets in flight is larger than ssthresh, PRR spreads the | ||
2963 | * cwnd reductions across a full RTT. | ||
2964 | * 2) If packets in flight is lower than ssthresh (such as due to excess | ||
2965 | * losses and/or application stalls), do not perform any further cwnd | ||
2966 | * reductions, but instead slow start up to ssthresh. | ||
2967 | */ | ||
2968 | static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, | ||
2969 | int fast_rexmit, int flag) | ||
2970 | { | ||
2971 | struct tcp_sock *tp = tcp_sk(sk); | ||
2972 | int sndcnt = 0; | ||
2973 | int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); | ||
2974 | |||
2975 | if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { | ||
2976 | u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + | ||
2977 | tp->prior_cwnd - 1; | ||
2978 | sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; | ||
2979 | } else { | ||
2980 | sndcnt = min_t(int, delta, | ||
2981 | max_t(int, tp->prr_delivered - tp->prr_out, | ||
2982 | newly_acked_sacked) + 1); | ||
2983 | } | ||
2984 | |||
2985 | sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); | ||
2986 | tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; | ||
2987 | } | ||
2988 | |||
2953 | /* Process an event, which can update packets-in-flight not trivially. | 2989 | /* Process an event, which can update packets-in-flight not trivially. |
2954 | * Main goal of this function is to calculate new estimate for left_out, | 2990 | * Main goal of this function is to calculate new estimate for left_out, |
2955 | * taking into account both packets sitting in receiver's buffer and | 2991 | * taking into account both packets sitting in receiver's buffer and |
@@ -2961,7 +2997,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit); | |||
2961 | * It does _not_ decide what to send, it is made in function | 2997 | * It does _not_ decide what to send, it is made in function |
2962 | * tcp_xmit_retransmit_queue(). | 2998 | * tcp_xmit_retransmit_queue(). |
2963 | */ | 2999 | */ |
2964 | static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) | 3000 | static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, |
3001 | int newly_acked_sacked, int flag) | ||
2965 | { | 3002 | { |
2966 | struct inet_connection_sock *icsk = inet_csk(sk); | 3003 | struct inet_connection_sock *icsk = inet_csk(sk); |
2967 | struct tcp_sock *tp = tcp_sk(sk); | 3004 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -3111,13 +3148,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) | |||
3111 | 3148 | ||
3112 | tp->bytes_acked = 0; | 3149 | tp->bytes_acked = 0; |
3113 | tp->snd_cwnd_cnt = 0; | 3150 | tp->snd_cwnd_cnt = 0; |
3151 | tp->prior_cwnd = tp->snd_cwnd; | ||
3152 | tp->prr_delivered = 0; | ||
3153 | tp->prr_out = 0; | ||
3114 | tcp_set_ca_state(sk, TCP_CA_Recovery); | 3154 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
3115 | fast_rexmit = 1; | 3155 | fast_rexmit = 1; |
3116 | } | 3156 | } |
3117 | 3157 | ||
3118 | if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) | 3158 | if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) |
3119 | tcp_update_scoreboard(sk, fast_rexmit); | 3159 | tcp_update_scoreboard(sk, fast_rexmit); |
3120 | tcp_cwnd_down(sk, flag); | 3160 | tp->prr_delivered += newly_acked_sacked; |
3161 | tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); | ||
3121 | tcp_xmit_retransmit_queue(sk); | 3162 | tcp_xmit_retransmit_queue(sk); |
3122 | } | 3163 | } |
3123 | 3164 | ||
@@ -3632,6 +3673,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
3632 | u32 prior_in_flight; | 3673 | u32 prior_in_flight; |
3633 | u32 prior_fackets; | 3674 | u32 prior_fackets; |
3634 | int prior_packets; | 3675 | int prior_packets; |
3676 | int prior_sacked = tp->sacked_out; | ||
3677 | int newly_acked_sacked = 0; | ||
3635 | int frto_cwnd = 0; | 3678 | int frto_cwnd = 0; |
3636 | 3679 | ||
3637 | /* If the ack is older than previous acks | 3680 | /* If the ack is older than previous acks |
@@ -3703,6 +3746,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
3703 | /* See if we can take anything off of the retransmit queue. */ | 3746 | /* See if we can take anything off of the retransmit queue. */ |
3704 | flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); | 3747 | flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); |
3705 | 3748 | ||
3749 | newly_acked_sacked = (prior_packets - prior_sacked) - | ||
3750 | (tp->packets_out - tp->sacked_out); | ||
3751 | |||
3706 | if (tp->frto_counter) | 3752 | if (tp->frto_counter) |
3707 | frto_cwnd = tcp_process_frto(sk, flag); | 3753 | frto_cwnd = tcp_process_frto(sk, flag); |
3708 | /* Guarantee sacktag reordering detection against wrap-arounds */ | 3754 | /* Guarantee sacktag reordering detection against wrap-arounds */ |
@@ -3715,7 +3761,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
3715 | tcp_may_raise_cwnd(sk, flag)) | 3761 | tcp_may_raise_cwnd(sk, flag)) |
3716 | tcp_cong_avoid(sk, ack, prior_in_flight); | 3762 | tcp_cong_avoid(sk, ack, prior_in_flight); |
3717 | tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, | 3763 | tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, |
3718 | flag); | 3764 | newly_acked_sacked, flag); |
3719 | } else { | 3765 | } else { |
3720 | if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) | 3766 | if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) |
3721 | tcp_cong_avoid(sk, ack, prior_in_flight); | 3767 | tcp_cong_avoid(sk, ack, prior_in_flight); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0377c061f22f..081dcd6fd0c4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -1796,11 +1796,13 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1796 | tcp_event_new_data_sent(sk, skb); | 1796 | tcp_event_new_data_sent(sk, skb); |
1797 | 1797 | ||
1798 | tcp_minshall_update(tp, mss_now, skb); | 1798 | tcp_minshall_update(tp, mss_now, skb); |
1799 | sent_pkts++; | 1799 | sent_pkts += tcp_skb_pcount(skb); |
1800 | 1800 | ||
1801 | if (push_one) | 1801 | if (push_one) |
1802 | break; | 1802 | break; |
1803 | } | 1803 | } |
1804 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) | ||
1805 | tp->prr_out += sent_pkts; | ||
1804 | 1806 | ||
1805 | if (likely(sent_pkts)) { | 1807 | if (likely(sent_pkts)) { |
1806 | tcp_cwnd_validate(sk); | 1808 | tcp_cwnd_validate(sk); |
@@ -2294,6 +2296,9 @@ begin_fwd: | |||
2294 | return; | 2296 | return; |
2295 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | 2297 | NET_INC_STATS_BH(sock_net(sk), mib_idx); |
2296 | 2298 | ||
2299 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) | ||
2300 | tp->prr_out += tcp_skb_pcount(skb); | ||
2301 | |||
2297 | if (skb == tcp_write_queue_head(sk)) | 2302 | if (skb == tcp_write_queue_head(sk)) |
2298 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 2303 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
2299 | inet_csk(sk)->icsk_rto, | 2304 | inet_csk(sk)->icsk_rto, |