aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/tcp.h4
-rw-r--r--net/ipv4/tcp_input.c58
-rw-r--r--net/ipv4/tcp_output.c7
3 files changed, 62 insertions, 7 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 531ede8006d9..6b63b310af36 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -379,6 +379,10 @@ struct tcp_sock {
379 u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ 379 u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
380 u32 snd_cwnd_used; 380 u32 snd_cwnd_used;
381 u32 snd_cwnd_stamp; 381 u32 snd_cwnd_stamp;
382 u32 prior_cwnd; /* Congestion window at start of Recovery. */
383 u32 prr_delivered; /* Number of newly delivered packets to
384 * receiver in Recovery. */
385 u32 prr_out; /* Total number of pkts sent during Recovery. */
382 386
383 u32 rcv_wnd; /* Current receiver window */ 387 u32 rcv_wnd; /* Current receiver window */
384 u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ 388 u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ea0d2183df4b..385c470195eb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2830,9 +2830,13 @@ static int tcp_try_undo_loss(struct sock *sk)
2830static inline void tcp_complete_cwr(struct sock *sk) 2830static inline void tcp_complete_cwr(struct sock *sk)
2831{ 2831{
2832 struct tcp_sock *tp = tcp_sk(sk); 2832 struct tcp_sock *tp = tcp_sk(sk);
2833 /* Do not moderate cwnd if it's already undone in cwr or recovery */ 2833
2834 if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { 2834 /* Do not moderate cwnd if it's already undone in cwr or recovery. */
2835 tp->snd_cwnd = tp->snd_ssthresh; 2835 if (tp->undo_marker) {
2836 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR)
2837 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2838 else /* PRR */
2839 tp->snd_cwnd = tp->snd_ssthresh;
2836 tp->snd_cwnd_stamp = tcp_time_stamp; 2840 tp->snd_cwnd_stamp = tcp_time_stamp;
2837 } 2841 }
2838 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2842 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
@@ -2950,6 +2954,38 @@ void tcp_simple_retransmit(struct sock *sk)
2950} 2954}
2951EXPORT_SYMBOL(tcp_simple_retransmit); 2955EXPORT_SYMBOL(tcp_simple_retransmit);
2952 2956
2957/* This function implements the PRR algorithm, specifcally the PRR-SSRB
2958 * (proportional rate reduction with slow start reduction bound) as described in
2959 * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
2960 * It computes the number of packets to send (sndcnt) based on packets newly
2961 * delivered:
2962 * 1) If the packets in flight is larger than ssthresh, PRR spreads the
2963 * cwnd reductions across a full RTT.
2964 * 2) If packets in flight is lower than ssthresh (such as due to excess
2965 * losses and/or application stalls), do not perform any further cwnd
2966 * reductions, but instead slow start up to ssthresh.
2967 */
2968static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
2969 int fast_rexmit, int flag)
2970{
2971 struct tcp_sock *tp = tcp_sk(sk);
2972 int sndcnt = 0;
2973 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2974
2975 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2976 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2977 tp->prior_cwnd - 1;
2978 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2979 } else {
2980 sndcnt = min_t(int, delta,
2981 max_t(int, tp->prr_delivered - tp->prr_out,
2982 newly_acked_sacked) + 1);
2983 }
2984
2985 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2986 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2987}
2988
2953/* Process an event, which can update packets-in-flight not trivially. 2989/* Process an event, which can update packets-in-flight not trivially.
2954 * Main goal of this function is to calculate new estimate for left_out, 2990 * Main goal of this function is to calculate new estimate for left_out,
2955 * taking into account both packets sitting in receiver's buffer and 2991 * taking into account both packets sitting in receiver's buffer and
@@ -2961,7 +2997,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit);
2961 * It does _not_ decide what to send, it is made in function 2997 * It does _not_ decide what to send, it is made in function
2962 * tcp_xmit_retransmit_queue(). 2998 * tcp_xmit_retransmit_queue().
2963 */ 2999 */
2964static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) 3000static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3001 int newly_acked_sacked, int flag)
2965{ 3002{
2966 struct inet_connection_sock *icsk = inet_csk(sk); 3003 struct inet_connection_sock *icsk = inet_csk(sk);
2967 struct tcp_sock *tp = tcp_sk(sk); 3004 struct tcp_sock *tp = tcp_sk(sk);
@@ -3111,13 +3148,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
3111 3148
3112 tp->bytes_acked = 0; 3149 tp->bytes_acked = 0;
3113 tp->snd_cwnd_cnt = 0; 3150 tp->snd_cwnd_cnt = 0;
3151 tp->prior_cwnd = tp->snd_cwnd;
3152 tp->prr_delivered = 0;
3153 tp->prr_out = 0;
3114 tcp_set_ca_state(sk, TCP_CA_Recovery); 3154 tcp_set_ca_state(sk, TCP_CA_Recovery);
3115 fast_rexmit = 1; 3155 fast_rexmit = 1;
3116 } 3156 }
3117 3157
3118 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) 3158 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3119 tcp_update_scoreboard(sk, fast_rexmit); 3159 tcp_update_scoreboard(sk, fast_rexmit);
3120 tcp_cwnd_down(sk, flag); 3160 tp->prr_delivered += newly_acked_sacked;
3161 tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
3121 tcp_xmit_retransmit_queue(sk); 3162 tcp_xmit_retransmit_queue(sk);
3122} 3163}
3123 3164
@@ -3632,6 +3673,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3632 u32 prior_in_flight; 3673 u32 prior_in_flight;
3633 u32 prior_fackets; 3674 u32 prior_fackets;
3634 int prior_packets; 3675 int prior_packets;
3676 int prior_sacked = tp->sacked_out;
3677 int newly_acked_sacked = 0;
3635 int frto_cwnd = 0; 3678 int frto_cwnd = 0;
3636 3679
3637 /* If the ack is older than previous acks 3680 /* If the ack is older than previous acks
@@ -3703,6 +3746,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3703 /* See if we can take anything off of the retransmit queue. */ 3746 /* See if we can take anything off of the retransmit queue. */
3704 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); 3747 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3705 3748
3749 newly_acked_sacked = (prior_packets - prior_sacked) -
3750 (tp->packets_out - tp->sacked_out);
3751
3706 if (tp->frto_counter) 3752 if (tp->frto_counter)
3707 frto_cwnd = tcp_process_frto(sk, flag); 3753 frto_cwnd = tcp_process_frto(sk, flag);
3708 /* Guarantee sacktag reordering detection against wrap-arounds */ 3754 /* Guarantee sacktag reordering detection against wrap-arounds */
@@ -3715,7 +3761,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3715 tcp_may_raise_cwnd(sk, flag)) 3761 tcp_may_raise_cwnd(sk, flag))
3716 tcp_cong_avoid(sk, ack, prior_in_flight); 3762 tcp_cong_avoid(sk, ack, prior_in_flight);
3717 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, 3763 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
3718 flag); 3764 newly_acked_sacked, flag);
3719 } else { 3765 } else {
3720 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3766 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3721 tcp_cong_avoid(sk, ack, prior_in_flight); 3767 tcp_cong_avoid(sk, ack, prior_in_flight);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0377c061f22f..081dcd6fd0c4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1796,11 +1796,13 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1796 tcp_event_new_data_sent(sk, skb); 1796 tcp_event_new_data_sent(sk, skb);
1797 1797
1798 tcp_minshall_update(tp, mss_now, skb); 1798 tcp_minshall_update(tp, mss_now, skb);
1799 sent_pkts++; 1799 sent_pkts += tcp_skb_pcount(skb);
1800 1800
1801 if (push_one) 1801 if (push_one)
1802 break; 1802 break;
1803 } 1803 }
1804 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
1805 tp->prr_out += sent_pkts;
1804 1806
1805 if (likely(sent_pkts)) { 1807 if (likely(sent_pkts)) {
1806 tcp_cwnd_validate(sk); 1808 tcp_cwnd_validate(sk);
@@ -2294,6 +2296,9 @@ begin_fwd:
2294 return; 2296 return;
2295 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2297 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2296 2298
2299 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
2300 tp->prr_out += tcp_skb_pcount(skb);
2301
2297 if (skb == tcp_write_queue_head(sk)) 2302 if (skb == tcp_write_queue_head(sk))
2298 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 2303 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2299 inet_csk(sk)->icsk_rto, 2304 inet_csk(sk)->icsk_rto,