aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2013-08-21 20:29:23 -0400
committerDavid S. Miller <davem@davemloft.net>2013-08-22 17:39:46 -0400
commit0f7cc9a3c2bd89b15720dbf358e9b9e62af27126 (patch)
tree984571f61086e60bc7bad6d6fb62222299cebd95 /net/ipv4/tcp_input.c
parentbe23ab5131621c0dadacdd72de7c94ab168627ee (diff)
tcp: increase throughput when reordering is high
The stack currently detects reordering and avoid spurious retransmission very well. However the throughput is sub-optimal under high reordering because cwnd is increased only if the data is deliverd in order. I.e., FLAG_DATA_ACKED check in tcp_ack(). The more packet are reordered the worse the throughput is. Therefore when reordering is proven high, cwnd should advance whenever the data is delivered regardless of its ordering. If reordering is low, conservatively advance cwnd only on ordered deliveries in Open state, and retain cwnd in Disordered state (RFC5681). Using netperf on a qdisc setup of 20Mbps BW and random RTT from 45ms to 55ms (for reordering effect). This change increases TCP throughput by 20 - 25% to near bottleneck BW. A special case is the stretched ACK with new SACK and/or ECE mark. For example, a receiver may receive an out of order or ECN packet with unacked data buffered because of LRO or delayed ACK. The principle on such an ACK is to advance cwnd on the cummulative acked part first, then reduce cwnd in tcp_fastretrans_alert(). Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c32
1 files changed, 20 insertions, 12 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e965cc7b87ff..ec492eae0cd7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2485,8 +2485,6 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
2485 2485
2486 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2486 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2487 tcp_try_keep_open(sk); 2487 tcp_try_keep_open(sk);
2488 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2489 tcp_moderate_cwnd(tp);
2490 } else { 2488 } else {
2491 tcp_cwnd_reduction(sk, prior_unsacked, 0); 2489 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2492 } 2490 }
@@ -3128,11 +3126,24 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3128 inet_csk(sk)->icsk_ca_state != TCP_CA_Open; 3126 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3129} 3127}
3130 3128
3129/* Decide wheather to run the increase function of congestion control. */
3131static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) 3130static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3132{ 3131{
3133 const struct tcp_sock *tp = tcp_sk(sk); 3132 if (tcp_in_cwnd_reduction(sk))
3134 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && 3133 return false;
3135 !tcp_in_cwnd_reduction(sk); 3134
3135 /* If reordering is high then always grow cwnd whenever data is
3136 * delivered regardless of its ordering. Otherwise stay conservative
3137 * and only grow cwnd on in-order delivery in Open state, and retain
3138 * cwnd in Disordered state (RFC5681). A stretched ACK with
3139 * new SACK or ECE mark may first advance cwnd here and later reduce
3140 * cwnd in tcp_fastretrans_alert() based on more states.
3141 */
3142 if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
3143 return flag & FLAG_FORWARD_PROGRESS;
3144
3145 return inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
3146 flag & FLAG_DATA_ACKED;
3136} 3147}
3137 3148
3138/* Check that window update is acceptable. 3149/* Check that window update is acceptable.
@@ -3352,18 +3363,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3352 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt); 3363 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt);
3353 acked -= tp->packets_out; 3364 acked -= tp->packets_out;
3354 3365
3366 /* Advance cwnd if state allows */
3367 if (tcp_may_raise_cwnd(sk, flag))
3368 tcp_cong_avoid(sk, ack, prior_in_flight);
3369
3355 if (tcp_ack_is_dubious(sk, flag)) { 3370 if (tcp_ack_is_dubious(sk, flag)) {
3356 /* Advance CWND, if state allows this. */
3357 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
3358 tcp_cong_avoid(sk, ack, prior_in_flight);
3359 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3371 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3360 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3372 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3361 is_dupack, flag); 3373 is_dupack, flag);
3362 } else {
3363 if (flag & FLAG_DATA_ACKED)
3364 tcp_cong_avoid(sk, ack, prior_in_flight);
3365 } 3374 }
3366
3367 if (tp->tlp_high_seq) 3375 if (tp->tlp_high_seq)
3368 tcp_process_tlp_ack(sk, ack, flag); 3376 tcp_process_tlp_ack(sk, ack, flag);
3369 3377