diff options
| author | John Heffner <jheffner@psc.edu> | 2005-11-10 20:11:48 -0500 | 
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2005-11-10 20:11:48 -0500 | 
| commit | 326f36e9e7de362e09745ce6f84b65e7ccac33ba (patch) | |
| tree | bf8e56d28a8de0fbbeb4e79bea565f2e31abe07e | |
| parent | 9772efb970780aeed488c19d8b4afd46c3b484af (diff) | |
[TCP]: receive buffer growth limiting with mixed MTU
This is a patch for discussion addressing some receive buffer growing issues.
This is partially related to the thread "Possible BUG in IPv4 TCP window
handling..." last week.
Specifically it addresses the problem of an interaction between rcvbuf
moderation (receiver autotuning) and rcv_ssthresh.  The problem occurs when
sending small packets to a receiver with a larger MTU.  (A very common case I
have is a host with a 1500 byte MTU sending to a host with a 9k MTU.)  In
such a case, the rcv_ssthresh code is targeting a window size corresponding
to filling up the current rcvbuf, not taking into account that the new rcvbuf
moderation may increase the rcvbuf size.
One hunk makes rcv_ssthresh use tcp_rmem[2] as the size target rather than
rcvbuf.  The other changes the behavior when it overflows its memory bounds
with in-order data so that it tries to grow rcvbuf (the same as with
out-of-order data).
These changes should help my problem of mixed MTUs, and should also help the
case from last week's thread I think.  (In both cases though you still need
tcp_rmem[2] to be set much larger than the TCP window.)  One question is if
this is too aggressive at trying to increase rcvbuf if it's under memory
stress.
Orignally-from: John Heffner <jheffner@psc.edu>
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | net/ipv4/tcp_input.c | 35 | 
1 files changed, 8 insertions, 27 deletions
| diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4cb5e6f408dc..827cd4b9e867 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -234,7 +234,7 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, | |||
| 234 | { | 234 | { | 
| 235 | /* Optimize this! */ | 235 | /* Optimize this! */ | 
| 236 | int truesize = tcp_win_from_space(skb->truesize)/2; | 236 | int truesize = tcp_win_from_space(skb->truesize)/2; | 
| 237 | int window = tcp_full_space(sk)/2; | 237 | int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2; | 
| 238 | 238 | ||
| 239 | while (tp->rcv_ssthresh <= window) { | 239 | while (tp->rcv_ssthresh <= window) { | 
| 240 | if (truesize <= skb->len) | 240 | if (truesize <= skb->len) | 
| @@ -327,37 +327,18 @@ static void tcp_init_buffer_space(struct sock *sk) | |||
| 327 | static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) | 327 | static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) | 
| 328 | { | 328 | { | 
| 329 | struct inet_connection_sock *icsk = inet_csk(sk); | 329 | struct inet_connection_sock *icsk = inet_csk(sk); | 
| 330 | struct sk_buff *skb; | ||
| 331 | unsigned int app_win = tp->rcv_nxt - tp->copied_seq; | ||
| 332 | int ofo_win = 0; | ||
| 333 | 330 | ||
| 334 | icsk->icsk_ack.quick = 0; | 331 | icsk->icsk_ack.quick = 0; | 
| 335 | 332 | ||
| 336 | skb_queue_walk(&tp->out_of_order_queue, skb) { | 333 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && | 
| 337 | ofo_win += skb->len; | 334 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && | 
| 338 | } | 335 | !tcp_memory_pressure && | 
| 339 | 336 | atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { | |
| 340 | /* If overcommit is due to out of order segments, | 337 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), | 
| 341 | * do not clamp window. Try to expand rcvbuf instead. | 338 | sysctl_tcp_rmem[2]); | 
| 342 | */ | ||
| 343 | if (ofo_win) { | ||
| 344 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && | ||
| 345 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && | ||
| 346 | !tcp_memory_pressure && | ||
| 347 | atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) | ||
| 348 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), | ||
| 349 | sysctl_tcp_rmem[2]); | ||
| 350 | } | 339 | } | 
| 351 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { | 340 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) | 
| 352 | app_win += ofo_win; | ||
| 353 | if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf) | ||
| 354 | app_win >>= 1; | ||
| 355 | if (app_win > icsk->icsk_ack.rcv_mss) | ||
| 356 | app_win -= icsk->icsk_ack.rcv_mss; | ||
| 357 | app_win = max(app_win, 2U*tp->advmss); | ||
| 358 | |||
| 359 | tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); | 341 | tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); | 
| 360 | } | ||
| 361 | } | 342 | } | 
| 362 | 343 | ||
| 363 | /* Receiver "autotuning" code. | 344 | /* Receiver "autotuning" code. | 
