aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-05-01 22:28:41 -0400
committerDavid S. Miller <davem@davemloft.net>2012-05-02 21:08:58 -0400
commitb49960a05e32121d29316cfdf653894b88ac9190 (patch)
tree101bb83073486809d5cc84505fecf772f4a77599 /net
parent84768edbb2721637620b2d84501bb0d5aed603f1 (diff)
tcp: change tcp_adv_win_scale and tcp_rmem[2]
tcp_adv_win_scale default value is 2, meaning we expect a good citizen skb to have skb->len / skb->truesize ratio of 75% (3/4) In 2.6 kernels we (mis)accounted for typical MSS=1460 frame : 1536 + 64 + 256 = 1856 'estimated truesize', and 1856 * 3/4 = 1392. So these skbs were considered as not bloated. With recent truesize fixes, a typical MSS=1460 frame truesize is now the more precise : 2048 + 256 = 2304. But 2304 * 3/4 = 1728. So these skb are not good citizen anymore, because 1460 < 1728 (GRO can escape this problem because it build skbs with a too low truesize.) This also means tcp advertises a too optimistic window for a given allocated rcvspace : When receiving frames, sk_rmem_alloc can hit sk_rcvbuf limit and we call tcp_prune_queue()/tcp_collapse() too often, especially when application is slow to drain its receive queue or in case of losses (netperf is fast, scp is slow). This is a major latency source. We should adjust the len/truesize ratio to 50% instead of 75% This patch : 1) changes tcp_adv_win_scale default to 1 instead of 2 2) increase tcp_rmem[2] limit from 4MB to 6MB to take into account better truesize tracking and to allow autotuning tcp receive window to reach same value than before. Note that same amount of kernel memory is consumed compared to 2.6 kernels. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Tom Herbert <therbert@google.com> Cc: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp.c9
-rw-r--r--net/ipv4/tcp_input.c2
2 files changed, 6 insertions, 5 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8bb6adeb62c0..1272a88c2a63 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3243,7 +3243,7 @@ void __init tcp_init(void)
3243{ 3243{
3244 struct sk_buff *skb = NULL; 3244 struct sk_buff *skb = NULL;
3245 unsigned long limit; 3245 unsigned long limit;
3246 int max_share, cnt; 3246 int max_rshare, max_wshare, cnt;
3247 unsigned int i; 3247 unsigned int i;
3248 unsigned long jiffy = jiffies; 3248 unsigned long jiffy = jiffies;
3249 3249
@@ -3303,15 +3303,16 @@ void __init tcp_init(void)
3303 tcp_init_mem(&init_net); 3303 tcp_init_mem(&init_net);
3304 /* Set per-socket limits to no more than 1/128 the pressure threshold */ 3304 /* Set per-socket limits to no more than 1/128 the pressure threshold */
3305 limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); 3305 limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
3306 max_share = min(4UL*1024*1024, limit); 3306 max_wshare = min(4UL*1024*1024, limit);
3307 max_rshare = min(6UL*1024*1024, limit);
3307 3308
3308 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; 3309 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
3309 sysctl_tcp_wmem[1] = 16*1024; 3310 sysctl_tcp_wmem[1] = 16*1024;
3310 sysctl_tcp_wmem[2] = max(64*1024, max_share); 3311 sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
3311 3312
3312 sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; 3313 sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
3313 sysctl_tcp_rmem[1] = 87380; 3314 sysctl_tcp_rmem[1] = 87380;
3314 sysctl_tcp_rmem[2] = max(87380, max_share); 3315 sysctl_tcp_rmem[2] = max(87380, max_rshare);
3315 3316
3316 pr_info("Hash tables configured (established %u bind %u)\n", 3317 pr_info("Hash tables configured (established %u bind %u)\n",
3317 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); 3318 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d99efd7dfb6c..257b61789eeb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -85,7 +85,7 @@ int sysctl_tcp_ecn __read_mostly = 2;
85EXPORT_SYMBOL(sysctl_tcp_ecn); 85EXPORT_SYMBOL(sysctl_tcp_ecn);
86int sysctl_tcp_dsack __read_mostly = 1; 86int sysctl_tcp_dsack __read_mostly = 1;
87int sysctl_tcp_app_win __read_mostly = 31; 87int sysctl_tcp_app_win __read_mostly = 31;
88int sysctl_tcp_adv_win_scale __read_mostly = 2; 88int sysctl_tcp_adv_win_scale __read_mostly = 1;
89EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); 89EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
90 90
91int sysctl_tcp_stdurg __read_mostly; 91int sysctl_tcp_stdurg __read_mostly;