aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-05-01 22:28:41 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2012-05-21 12:40:00 -0400
commitf1aadd585872545e03701a91b1f2e9d66a35d5d3 (patch)
tree96ff6b102706774622ae64f62afdb2645c420d97 /net/ipv4/tcp.c
parent0acfc3c427fadb1b714940c56c194f97e72fda7a (diff)
tcp: change tcp_adv_win_scale and tcp_rmem[2]
[ Upstream commit b49960a05e32121d29316cfdf653894b88ac9190 ] tcp_adv_win_scale default value is 2, meaning we expect a good citizen skb to have skb->len / skb->truesize ratio of 75% (3/4) In 2.6 kernels we (mis)accounted for typical MSS=1460 frame : 1536 + 64 + 256 = 1856 'estimated truesize', and 1856 * 3/4 = 1392. So these skbs were considered as not bloated. With recent truesize fixes, a typical MSS=1460 frame truesize is now the more precise : 2048 + 256 = 2304. But 2304 * 3/4 = 1728. So these skb are not good citizen anymore, because 1460 < 1728 (GRO can escape this problem because it build skbs with a too low truesize.) This also means tcp advertises a too optimistic window for a given allocated rcvspace : When receiving frames, sk_rmem_alloc can hit sk_rcvbuf limit and we call tcp_prune_queue()/tcp_collapse() too often, especially when application is slow to drain its receive queue or in case of losses (netperf is fast, scp is slow). This is a major latency source. We should adjust the len/truesize ratio to 50% instead of 75% This patch : 1) changes tcp_adv_win_scale default to 1 instead of 2 2) increase tcp_rmem[2] limit from 4MB to 6MB to take into account better truesize tracking and to allow autotuning tcp receive window to reach same value than before. Note that same amount of kernel memory is consumed compared to 2.6 kernels. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Tom Herbert <therbert@google.com> Cc: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c9
1 files changed, 5 insertions, 4 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 80b988f0bca..74f84a483ea 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3221,7 +3221,7 @@ void __init tcp_init(void)
3221{ 3221{
3222 struct sk_buff *skb = NULL; 3222 struct sk_buff *skb = NULL;
3223 unsigned long limit; 3223 unsigned long limit;
3224 int i, max_share, cnt; 3224 int i, max_rshare, max_wshare, cnt;
3225 unsigned long jiffy = jiffies; 3225 unsigned long jiffy = jiffies;
3226 3226
3227 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); 3227 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -3285,15 +3285,16 @@ void __init tcp_init(void)
3285 3285
3286 /* Set per-socket limits to no more than 1/128 the pressure threshold */ 3286 /* Set per-socket limits to no more than 1/128 the pressure threshold */
3287 limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); 3287 limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
3288 max_share = min(4UL*1024*1024, limit); 3288 max_wshare = min(4UL*1024*1024, limit);
3289 max_rshare = min(6UL*1024*1024, limit);
3289 3290
3290 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; 3291 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
3291 sysctl_tcp_wmem[1] = 16*1024; 3292 sysctl_tcp_wmem[1] = 16*1024;
3292 sysctl_tcp_wmem[2] = max(64*1024, max_share); 3293 sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
3293 3294
3294 sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; 3295 sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
3295 sysctl_tcp_rmem[1] = 87380; 3296 sysctl_tcp_rmem[1] = 87380;
3296 sysctl_tcp_rmem[2] = max(87380, max_share); 3297 sysctl_tcp_rmem[2] = max(87380, max_rshare);
3297 3298
3298 printk(KERN_INFO "TCP: Hash tables configured " 3299 printk(KERN_INFO "TCP: Hash tables configured "
3299 "(established %u bind %u)\n", 3300 "(established %u bind %u)\n",