aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorJohn Heffner <jheffner@psc.edu>2005-11-10 20:11:48 -0500
committerDavid S. Miller <davem@davemloft.net>2005-11-10 20:11:48 -0500
commit326f36e9e7de362e09745ce6f84b65e7ccac33ba (patch)
treebf8e56d28a8de0fbbeb4e79bea565f2e31abe07e /net/ipv4
parent9772efb970780aeed488c19d8b4afd46c3b484af (diff)
[TCP]: receive buffer growth limiting with mixed MTU
This is a patch for discussion addressing some receive buffer growing issues. This is partially related to the thread "Possible BUG in IPv4 TCP window handling..." last week. Specifically it addresses the problem of an interaction between rcvbuf moderation (receiver autotuning) and rcv_ssthresh. The problem occurs when sending small packets to a receiver with a larger MTU. (A very common case I have is a host with a 1500 byte MTU sending to a host with a 9k MTU.) In such a case, the rcv_ssthresh code is targeting a window size corresponding to filling up the current rcvbuf, not taking into account that the new rcvbuf moderation may increase the rcvbuf size. One hunk makes rcv_ssthresh use tcp_rmem[2] as the size target rather than rcvbuf. The other changes the behavior when it overflows its memory bounds with in-order data so that it tries to grow rcvbuf (the same as with out-of-order data). These changes should help my problem of mixed MTUs, and should also help the case from last week's thread I think. (In both cases though you still need tcp_rmem[2] to be set much larger than the TCP window.) One question is if this is too aggressive at trying to increase rcvbuf if it's under memory stress. Orignally-from: John Heffner <jheffner@psc.edu> Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp_input.c35
1 files changed, 8 insertions, 27 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4cb5e6f408dc..827cd4b9e867 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -234,7 +234,7 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
234{ 234{
235 /* Optimize this! */ 235 /* Optimize this! */
236 int truesize = tcp_win_from_space(skb->truesize)/2; 236 int truesize = tcp_win_from_space(skb->truesize)/2;
237 int window = tcp_full_space(sk)/2; 237 int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
238 238
239 while (tp->rcv_ssthresh <= window) { 239 while (tp->rcv_ssthresh <= window) {
240 if (truesize <= skb->len) 240 if (truesize <= skb->len)
@@ -327,37 +327,18 @@ static void tcp_init_buffer_space(struct sock *sk)
327static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) 327static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
328{ 328{
329 struct inet_connection_sock *icsk = inet_csk(sk); 329 struct inet_connection_sock *icsk = inet_csk(sk);
330 struct sk_buff *skb;
331 unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
332 int ofo_win = 0;
333 330
334 icsk->icsk_ack.quick = 0; 331 icsk->icsk_ack.quick = 0;
335 332
336 skb_queue_walk(&tp->out_of_order_queue, skb) { 333 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
337 ofo_win += skb->len; 334 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
338 } 335 !tcp_memory_pressure &&
339 336 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
340 /* If overcommit is due to out of order segments, 337 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
341 * do not clamp window. Try to expand rcvbuf instead. 338 sysctl_tcp_rmem[2]);
342 */
343 if (ofo_win) {
344 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
345 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
346 !tcp_memory_pressure &&
347 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
348 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
349 sysctl_tcp_rmem[2]);
350 } 339 }
351 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { 340 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
352 app_win += ofo_win;
353 if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf)
354 app_win >>= 1;
355 if (app_win > icsk->icsk_ack.rcv_mss)
356 app_win -= icsk->icsk_ack.rcv_mss;
357 app_win = max(app_win, 2U*tp->advmss);
358
359 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); 341 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
360 }
361} 342}
362 343
363/* Receiver "autotuning" code. 344/* Receiver "autotuning" code.