aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeal Cardwell <ncardwell@google.com>2015-01-28 20:01:35 -0500
committerDavid S. Miller <davem@davemloft.net>2015-01-29 01:18:37 -0500
commite73ebb0881ea5534ce606c1d71b4ac44db5c6930 (patch)
tree2a20ba5a81c6e85982c4fe3b8ec33ca2cd4b2a96
parent59343cd7c4809cf7598789e1cd14563780ae4239 (diff)
tcp: stretch ACK fixes prep
LRO, GRO, delayed ACKs, and middleboxes can cause "stretch ACKs" that cover more than the RFC-specified maximum of 2 packets. These stretch ACKs can cause serious performance shortfalls in common congestion control algorithms that were designed and tuned years ago with receiver hosts that were not using LRO or GRO, and were instead politely ACKing every other packet. This patch series fixes Reno and CUBIC to handle stretch ACKs. This patch prepares for the upcoming stretch ACK bug fix patches. It adds an "acked" parameter to tcp_cong_avoid_ai() to allow for future fixes to tcp_cong_avoid_ai() to correctly handle stretch ACKs, and changes all congestion control algorithms to pass in 1 for the ACKed count. It also changes tcp_slow_start() to return the number of packet ACK "credits" that were not processed in slow start mode, and can be processed by the congestion control module in additive increase mode. In future patches we will fix tcp_cong_avoid_ai() to handle stretch ACKs, and fix Reno and CUBIC handling of stretch ACKs in slow start and additive increase mode. Reported-by: Eyal Perry <eyalpe@mellanox.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/tcp.h4
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cong.c11
-rw-r--r--net/ipv4/tcp_cubic.c2
-rw-r--r--net/ipv4/tcp_scalable.c3
-rw-r--r--net/ipv4/tcp_veno.c2
-rw-r--r--net/ipv4/tcp_yeah.c2
7 files changed, 15 insertions, 11 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f50f29faf76f..9d9111ef43ae 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -834,8 +834,8 @@ void tcp_get_available_congestion_control(char *buf, size_t len);
834void tcp_get_allowed_congestion_control(char *buf, size_t len); 834void tcp_get_allowed_congestion_control(char *buf, size_t len);
835int tcp_set_allowed_congestion_control(char *allowed); 835int tcp_set_allowed_congestion_control(char *allowed);
836int tcp_set_congestion_control(struct sock *sk, const char *name); 836int tcp_set_congestion_control(struct sock *sk, const char *name);
837void tcp_slow_start(struct tcp_sock *tp, u32 acked); 837u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
838void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w); 838void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
839 839
840u32 tcp_reno_ssthresh(struct sock *sk); 840u32 tcp_reno_ssthresh(struct sock *sk);
841void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); 841void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index bb395d46a389..c037644eafb7 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
150 tcp_slow_start(tp, acked); 150 tcp_slow_start(tp, acked);
151 else { 151 else {
152 bictcp_update(ca, tp->snd_cwnd); 152 bictcp_update(ca, tp->snd_cwnd);
153 tcp_cong_avoid_ai(tp, ca->cnt); 153 tcp_cong_avoid_ai(tp, ca->cnt, 1);
154 } 154 }
155} 155}
156 156
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 27ead0dd16bc..6826017c12d1 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -291,25 +291,28 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
291 * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and 291 * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and
292 * returns the leftover acks to adjust cwnd in congestion avoidance mode. 292 * returns the leftover acks to adjust cwnd in congestion avoidance mode.
293 */ 293 */
294void tcp_slow_start(struct tcp_sock *tp, u32 acked) 294u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
295{ 295{
296 u32 cwnd = tp->snd_cwnd + acked; 296 u32 cwnd = tp->snd_cwnd + acked;
297 297
298 if (cwnd > tp->snd_ssthresh) 298 if (cwnd > tp->snd_ssthresh)
299 cwnd = tp->snd_ssthresh + 1; 299 cwnd = tp->snd_ssthresh + 1;
300 acked -= cwnd - tp->snd_cwnd;
300 tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); 301 tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
302
303 return acked;
301} 304}
302EXPORT_SYMBOL_GPL(tcp_slow_start); 305EXPORT_SYMBOL_GPL(tcp_slow_start);
303 306
304/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */ 307/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */
305void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w) 308void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
306{ 309{
307 if (tp->snd_cwnd_cnt >= w) { 310 if (tp->snd_cwnd_cnt >= w) {
308 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 311 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
309 tp->snd_cwnd++; 312 tp->snd_cwnd++;
310 tp->snd_cwnd_cnt = 0; 313 tp->snd_cwnd_cnt = 0;
311 } else { 314 } else {
312 tp->snd_cwnd_cnt++; 315 tp->snd_cwnd_cnt += acked;
313 } 316 }
314} 317}
315EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); 318EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
@@ -333,7 +336,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
333 tcp_slow_start(tp, acked); 336 tcp_slow_start(tp, acked);
334 /* In dangerous area, increase slowly. */ 337 /* In dangerous area, increase slowly. */
335 else 338 else
336 tcp_cong_avoid_ai(tp, tp->snd_cwnd); 339 tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1);
337} 340}
338EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); 341EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
339 342
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 6b6002416a73..df4bc4d87e58 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -320,7 +320,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
320 tcp_slow_start(tp, acked); 320 tcp_slow_start(tp, acked);
321 } else { 321 } else {
322 bictcp_update(ca, tp->snd_cwnd); 322 bictcp_update(ca, tp->snd_cwnd);
323 tcp_cong_avoid_ai(tp, ca->cnt); 323 tcp_cong_avoid_ai(tp, ca->cnt, 1);
324 } 324 }
325} 325}
326 326
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 6824afb65d93..333bcb2415ff 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -25,7 +25,8 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
25 if (tp->snd_cwnd <= tp->snd_ssthresh) 25 if (tp->snd_cwnd <= tp->snd_ssthresh)
26 tcp_slow_start(tp, acked); 26 tcp_slow_start(tp, acked);
27 else 27 else
28 tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)); 28 tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
29 1);
29} 30}
30 31
31static u32 tcp_scalable_ssthresh(struct sock *sk) 32static u32 tcp_scalable_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index a4d2d2d88dca..112151eeee45 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -159,7 +159,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
159 /* In the "non-congestive state", increase cwnd 159 /* In the "non-congestive state", increase cwnd
160 * every rtt. 160 * every rtt.
161 */ 161 */
162 tcp_cong_avoid_ai(tp, tp->snd_cwnd); 162 tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1);
163 } else { 163 } else {
164 /* In the "congestive state", increase cwnd 164 /* In the "congestive state", increase cwnd
165 * every other rtt. 165 * every other rtt.
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index cd7273218598..17d35662930d 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -92,7 +92,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
92 92
93 } else { 93 } else {
94 /* Reno */ 94 /* Reno */
95 tcp_cong_avoid_ai(tp, tp->snd_cwnd); 95 tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1);
96 } 96 }
97 97
98 /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. 98 /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.