aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2013-06-11 18:35:32 -0400
committerDavid S. Miller <davem@davemloft.net>2013-06-13 05:46:29 -0400
commit85f16525a2eb66e6092cbd8dcf42371df8334ed0 (patch)
treeadaf85853246f46fc00f9ed4235059650acf7517
parent8e994402ad5e6ae3d391c0935f9f1dc2eeb92a5e (diff)
tcp: properly send new data in fast recovery in first RTT
Linux sends new unset data during disorder and recovery state if all (suspected) lost packets have been retransmitted ( RFC5681, section 3.2 step 1 & 2, RFC3517 section 4, NexSeg() Rule 2). One requirement is to keep the receive window about twice the estimated sender's congestion window (tcp_rcv_space_adjust()), assuming the fast retransmits repair the losses in the next round trip. But currently it's not the case on the first round trip in either normal or Fast Open connection, beucase the initial receive window is identical to (expected) sender's initial congestion window. The fix is to double it. Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/tcp.h5
-rw-r--r--net/ipv4/tcp_input.c13
-rw-r--r--net/ipv4/tcp_output.c33
3 files changed, 22 insertions, 29 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0d637e9403a5..6fa80831dc40 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -61,9 +61,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
61 */ 61 */
62#define MAX_TCP_WINDOW 32767U 62#define MAX_TCP_WINDOW 32767U
63 63
64/* Offer an initial receive window of 10 mss. */
65#define TCP_DEFAULT_INIT_RCVWND 10
66
67/* Minimal accepted MSS. It is (60+60+8) - (20+20). */ 64/* Minimal accepted MSS. It is (60+60+8) - (20+20). */
68#define TCP_MIN_MSS 88U 65#define TCP_MIN_MSS 88U
69 66
@@ -1047,6 +1044,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
1047 rx_opt->num_sacks = 0; 1044 rx_opt->num_sacks = 0;
1048} 1045}
1049 1046
1047extern u32 tcp_default_init_rwnd(u32 mss);
1048
1050/* Determine a window scaling and initial window to offer. */ 1049/* Determine a window scaling and initial window to offer. */
1051extern void tcp_select_initial_window(int __space, __u32 mss, 1050extern void tcp_select_initial_window(int __space, __u32 mss,
1052 __u32 *rcv_wnd, __u32 *window_clamp, 1051 __u32 *rcv_wnd, __u32 *window_clamp,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 907311c9a012..46271cdcf088 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -347,22 +347,13 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
347} 347}
348 348
349/* 3. Tuning rcvbuf, when connection enters established state. */ 349/* 3. Tuning rcvbuf, when connection enters established state. */
350
351static void tcp_fixup_rcvbuf(struct sock *sk) 350static void tcp_fixup_rcvbuf(struct sock *sk)
352{ 351{
353 u32 mss = tcp_sk(sk)->advmss; 352 u32 mss = tcp_sk(sk)->advmss;
354 u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
355 int rcvmem; 353 int rcvmem;
356 354
357 /* Limit to 10 segments if mss <= 1460, 355 rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
358 * or 14600/mss segments, with a minimum of two segments. 356 tcp_default_init_rwnd(mss);
359 */
360 if (mss > 1460)
361 icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
362
363 rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER);
364
365 rcvmem *= icwnd;
366 357
367 if (sk->sk_rcvbuf < rcvmem) 358 if (sk->sk_rcvbuf < rcvmem)
368 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); 359 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ec335fabd5cc..3dd46eab3b05 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -181,6 +181,21 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
181 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 181 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
182} 182}
183 183
184
185u32 tcp_default_init_rwnd(u32 mss)
186{
187 /* Initial receive window should be twice of TCP_INIT_CWND to
188 * enable proper sending of new unset data during fast recovery
189 * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a
190 * limit when mss is larger than 1460.
191 */
192 u32 init_rwnd = TCP_INIT_CWND * 2;
193
194 if (mss > 1460)
195 init_rwnd = max((1460 * init_rwnd) / mss, 2U);
196 return init_rwnd;
197}
198
184/* Determine a window scaling and initial window to offer. 199/* Determine a window scaling and initial window to offer.
185 * Based on the assumption that the given amount of space 200 * Based on the assumption that the given amount of space
186 * will be offered. Store the results in the tp structure. 201 * will be offered. Store the results in the tp structure.
@@ -230,22 +245,10 @@ void tcp_select_initial_window(int __space, __u32 mss,
230 } 245 }
231 } 246 }
232 247
233 /* Set initial window to a value enough for senders starting with
234 * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place
235 * a limit on the initial window when mss is larger than 1460.
236 */
237 if (mss > (1 << *rcv_wscale)) { 248 if (mss > (1 << *rcv_wscale)) {
238 int init_cwnd = TCP_DEFAULT_INIT_RCVWND; 249 if (!init_rcv_wnd) /* Use default unless specified otherwise */
239 if (mss > 1460) 250 init_rcv_wnd = tcp_default_init_rwnd(mss);
240 init_cwnd = 251 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
241 max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
242 /* when initializing use the value from init_rcv_wnd
243 * rather than the default from above
244 */
245 if (init_rcv_wnd)
246 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
247 else
248 *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
249 } 252 }
250 253
251 /* Set the clamp no higher than max representable value */ 254 /* Set the clamp no higher than max representable value */