aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-09-06 13:35:58 -0400
committerDavid S. Miller <davem@davemloft.net>2013-09-06 14:43:49 -0400
commit4e4f1fc226816905c937f9b29dabe351075dfe0f (patch)
tree6d440146005a9d075008e9cfbc5aa319dcc3767f /net
parent0042d0c840c616186a5b09207a0e77fab7581db3 (diff)
tcp: properly increase rcv_ssthresh for ofo packets
TCP receive window handling is multi staged. A socket has a memory budget, static or dynamic, in sk_rcvbuf. Because we do not really know how this memory budget translates to a TCP window (payload), TCP announces a small initial window (about 20 MSS). When a packet is received, we increase TCP rcv_win depending on the payload/truesize ratio of this packet. Good citizen packets give a hint that it's reasonable to have rcv_win = sk_rcvbuf/2 This heuristic takes place in tcp_grow_window() Problem is : We currently call tcp_grow_window() only for in-order packets. This means that reorders or packet losses stop proper grow of rcv_win, and senders are unable to benefit from fast recovery, or proper reordering level detection. Really, a packet being stored in OFO queue is not a bad citizen. It should be part of the game as in-order packets. In our traces, we very often see sender is limited by linux small receive windows, even if linux hosts use autotuning (DRS) and should allow rcv_win to grow to ~3MB. Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_input.c5
1 files changed, 4 insertions, 1 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 894bc174f472..25a89eaa669d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4139,6 +4139,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4139 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { 4139 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4140 __skb_queue_after(&tp->out_of_order_queue, skb1, skb); 4140 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4141 } else { 4141 } else {
4142 tcp_grow_window(sk, skb);
4142 kfree_skb_partial(skb, fragstolen); 4143 kfree_skb_partial(skb, fragstolen);
4143 skb = NULL; 4144 skb = NULL;
4144 } 4145 }
@@ -4214,8 +4215,10 @@ add_sack:
4214 if (tcp_is_sack(tp)) 4215 if (tcp_is_sack(tp))
4215 tcp_sack_new_ofo_skb(sk, seq, end_seq); 4216 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4216end: 4217end:
4217 if (skb) 4218 if (skb) {
4219 tcp_grow_window(sk, skb);
4218 skb_set_owner_r(skb, sk); 4220 skb_set_owner_r(skb, sk);
4221 }
4219} 4222}
4220 4223
4221static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, 4224static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,