diff options
author | Haishuang Yan <yanhaishuang@cmss.chinamobile.com> | 2016-12-28 04:52:32 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-12-29 11:38:31 -0500 |
commit | 1946e672c173559155a3e210fe95dbf8b7b8ddf7 (patch) | |
tree | 0d794dc28150aac130c2e6dd0024cb3a4a5ec594 /net/ipv4 | |
parent | 801822d1beea4f11a38df991b420ca917f6a917b (diff) |
ipv4: Namespaceify tcp_tw_recycle and tcp_max_tw_buckets knob
Different namespace application might require fast recycling
TIME-WAIT sockets independently of the host.
Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_timewait_sock.c | 3 | ||||
-rw-r--r-- | net/ipv4/proc.c | 2 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 28 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 14 |
8 files changed, 32 insertions, 34 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f75069883f2b..aae410bb655a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -1831,8 +1831,6 @@ static int __init inet_init(void) | |||
1831 | 1831 | ||
1832 | ip_init(); | 1832 | ip_init(); |
1833 | 1833 | ||
1834 | tcp_v4_init(); | ||
1835 | |||
1836 | /* Setup TCP slab cache for open requests. */ | 1834 | /* Setup TCP slab cache for open requests. */ |
1837 | tcp_init(); | 1835 | tcp_init(); |
1838 | 1836 | ||
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index ddcd56c08d14..f8aff2c71cde 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -257,8 +257,7 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) | |||
257 | } | 257 | } |
258 | EXPORT_SYMBOL_GPL(__inet_twsk_schedule); | 258 | EXPORT_SYMBOL_GPL(__inet_twsk_schedule); |
259 | 259 | ||
260 | void inet_twsk_purge(struct inet_hashinfo *hashinfo, | 260 | void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) |
261 | struct inet_timewait_death_row *twdr, int family) | ||
262 | { | 261 | { |
263 | struct inet_timewait_sock *tw; | 262 | struct inet_timewait_sock *tw; |
264 | struct sock *sk; | 263 | struct sock *sk; |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 7143ca1a6af9..0247ca032232 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -65,7 +65,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) | |||
65 | socket_seq_show(seq); | 65 | socket_seq_show(seq); |
66 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", | 66 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", |
67 | sock_prot_inuse_get(net, &tcp_prot), orphans, | 67 | sock_prot_inuse_get(net, &tcp_prot), orphans, |
68 | atomic_read(&tcp_death_row.tw_count), sockets, | 68 | atomic_read(&net->ipv4.tcp_death_row.tw_count), sockets, |
69 | proto_memory_allocated(&tcp_prot)); | 69 | proto_memory_allocated(&tcp_prot)); |
70 | seq_printf(seq, "UDP: inuse %d mem %ld\n", | 70 | seq_printf(seq, "UDP: inuse %d mem %ld\n", |
71 | sock_prot_inuse_get(net, &udp_prot), | 71 | sock_prot_inuse_get(net, &udp_prot), |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 22cbd61079b5..66f8f1b1dc78 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -290,13 +290,6 @@ static struct ctl_table ipv4_table[] = { | |||
290 | .proc_handler = proc_dointvec | 290 | .proc_handler = proc_dointvec |
291 | }, | 291 | }, |
292 | { | 292 | { |
293 | .procname = "tcp_max_tw_buckets", | ||
294 | .data = &tcp_death_row.sysctl_max_tw_buckets, | ||
295 | .maxlen = sizeof(int), | ||
296 | .mode = 0644, | ||
297 | .proc_handler = proc_dointvec | ||
298 | }, | ||
299 | { | ||
300 | .procname = "tcp_fastopen", | 293 | .procname = "tcp_fastopen", |
301 | .data = &sysctl_tcp_fastopen, | 294 | .data = &sysctl_tcp_fastopen, |
302 | .maxlen = sizeof(int), | 295 | .maxlen = sizeof(int), |
@@ -310,13 +303,6 @@ static struct ctl_table ipv4_table[] = { | |||
310 | .proc_handler = proc_tcp_fastopen_key, | 303 | .proc_handler = proc_tcp_fastopen_key, |
311 | }, | 304 | }, |
312 | { | 305 | { |
313 | .procname = "tcp_tw_recycle", | ||
314 | .data = &tcp_death_row.sysctl_tw_recycle, | ||
315 | .maxlen = sizeof(int), | ||
316 | .mode = 0644, | ||
317 | .proc_handler = proc_dointvec | ||
318 | }, | ||
319 | { | ||
320 | .procname = "tcp_abort_on_overflow", | 306 | .procname = "tcp_abort_on_overflow", |
321 | .data = &sysctl_tcp_abort_on_overflow, | 307 | .data = &sysctl_tcp_abort_on_overflow, |
322 | .maxlen = sizeof(int), | 308 | .maxlen = sizeof(int), |
@@ -960,6 +946,20 @@ static struct ctl_table ipv4_net_table[] = { | |||
960 | .mode = 0644, | 946 | .mode = 0644, |
961 | .proc_handler = proc_dointvec | 947 | .proc_handler = proc_dointvec |
962 | }, | 948 | }, |
949 | { | ||
950 | .procname = "tcp_max_tw_buckets", | ||
951 | .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, | ||
952 | .maxlen = sizeof(int), | ||
953 | .mode = 0644, | ||
954 | .proc_handler = proc_dointvec | ||
955 | }, | ||
956 | { | ||
957 | .procname = "tcp_tw_recycle", | ||
958 | .data = &init_net.ipv4.tcp_death_row.sysctl_tw_recycle, | ||
959 | .maxlen = sizeof(int), | ||
960 | .mode = 0644, | ||
961 | .proc_handler = proc_dointvec | ||
962 | }, | ||
963 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 963 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
964 | { | 964 | { |
965 | .procname = "fib_multipath_use_neigh", | 965 | .procname = "fib_multipath_use_neigh", |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4a044964da66..7f0d81c090ce 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -3334,6 +3334,7 @@ void __init tcp_init(void) | |||
3334 | 3334 | ||
3335 | percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); | 3335 | percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); |
3336 | percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); | 3336 | percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); |
3337 | inet_hashinfo_init(&tcp_hashinfo); | ||
3337 | tcp_hashinfo.bind_bucket_cachep = | 3338 | tcp_hashinfo.bind_bucket_cachep = |
3338 | kmem_cache_create("tcp_bind_bucket", | 3339 | kmem_cache_create("tcp_bind_bucket", |
3339 | sizeof(struct inet_bind_bucket), 0, | 3340 | sizeof(struct inet_bind_bucket), 0, |
@@ -3378,7 +3379,6 @@ void __init tcp_init(void) | |||
3378 | 3379 | ||
3379 | cnt = tcp_hashinfo.ehash_mask + 1; | 3380 | cnt = tcp_hashinfo.ehash_mask + 1; |
3380 | 3381 | ||
3381 | tcp_death_row.sysctl_max_tw_buckets = cnt / 2; | ||
3382 | sysctl_tcp_max_orphans = cnt / 2; | 3382 | sysctl_tcp_max_orphans = cnt / 2; |
3383 | sysctl_max_syn_backlog = max(128, cnt / 256); | 3383 | sysctl_max_syn_backlog = max(128, cnt / 256); |
3384 | 3384 | ||
@@ -3399,6 +3399,7 @@ void __init tcp_init(void) | |||
3399 | pr_info("Hash tables configured (established %u bind %u)\n", | 3399 | pr_info("Hash tables configured (established %u bind %u)\n", |
3400 | tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); | 3400 | tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); |
3401 | 3401 | ||
3402 | tcp_v4_init(); | ||
3402 | tcp_metrics_init(); | 3403 | tcp_metrics_init(); |
3403 | BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0); | 3404 | BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0); |
3404 | tcp_tasklet_init(); | 3405 | tcp_tasklet_init(); |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6c790754ae3e..c61480249835 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -6363,7 +6363,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, | |||
6363 | * timewait bucket, so that all the necessary checks | 6363 | * timewait bucket, so that all the necessary checks |
6364 | * are made in the function processing timewait state. | 6364 | * are made in the function processing timewait state. |
6365 | */ | 6365 | */ |
6366 | if (tcp_death_row.sysctl_tw_recycle) { | 6366 | if (net->ipv4.tcp_death_row.sysctl_tw_recycle) { |
6367 | bool strict; | 6367 | bool strict; |
6368 | 6368 | ||
6369 | dst = af_ops->route_req(sk, &fl, req, &strict); | 6369 | dst = af_ops->route_req(sk, &fl, req, &strict); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fe9da4fb96bf..56b5f49e3f97 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -146,6 +146,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
146 | struct rtable *rt; | 146 | struct rtable *rt; |
147 | int err; | 147 | int err; |
148 | struct ip_options_rcu *inet_opt; | 148 | struct ip_options_rcu *inet_opt; |
149 | struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; | ||
149 | 150 | ||
150 | if (addr_len < sizeof(struct sockaddr_in)) | 151 | if (addr_len < sizeof(struct sockaddr_in)) |
151 | return -EINVAL; | 152 | return -EINVAL; |
@@ -196,7 +197,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
196 | tp->write_seq = 0; | 197 | tp->write_seq = 0; |
197 | } | 198 | } |
198 | 199 | ||
199 | if (tcp_death_row.sysctl_tw_recycle && | 200 | if (tcp_death_row->sysctl_tw_recycle && |
200 | !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) | 201 | !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) |
201 | tcp_fetch_timewait_stamp(sk, &rt->dst); | 202 | tcp_fetch_timewait_stamp(sk, &rt->dst); |
202 | 203 | ||
@@ -215,7 +216,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
215 | * complete initialization after this. | 216 | * complete initialization after this. |
216 | */ | 217 | */ |
217 | tcp_set_state(sk, TCP_SYN_SENT); | 218 | tcp_set_state(sk, TCP_SYN_SENT); |
218 | err = inet_hash_connect(&tcp_death_row, sk); | 219 | err = inet_hash_connect(tcp_death_row, sk); |
219 | if (err) | 220 | if (err) |
220 | goto failure; | 221 | goto failure; |
221 | 222 | ||
@@ -2457,6 +2458,10 @@ static int __net_init tcp_sk_init(struct net *net) | |||
2457 | net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; | 2458 | net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; |
2458 | net->ipv4.sysctl_tcp_tw_reuse = 0; | 2459 | net->ipv4.sysctl_tcp_tw_reuse = 0; |
2459 | 2460 | ||
2461 | net->ipv4.tcp_death_row.sysctl_tw_recycle = 0; | ||
2462 | net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (tcp_hashinfo.ehash_mask + 1) / 2; | ||
2463 | net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; | ||
2464 | |||
2460 | return 0; | 2465 | return 0; |
2461 | fail: | 2466 | fail: |
2462 | tcp_sk_exit(net); | 2467 | tcp_sk_exit(net); |
@@ -2466,7 +2471,7 @@ fail: | |||
2466 | 2471 | ||
2467 | static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) | 2472 | static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) |
2468 | { | 2473 | { |
2469 | inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); | 2474 | inet_twsk_purge(&tcp_hashinfo, AF_INET); |
2470 | } | 2475 | } |
2471 | 2476 | ||
2472 | static struct pernet_operations __net_initdata tcp_sk_ops = { | 2477 | static struct pernet_operations __net_initdata tcp_sk_ops = { |
@@ -2477,7 +2482,6 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { | |||
2477 | 2482 | ||
2478 | void __init tcp_v4_init(void) | 2483 | void __init tcp_v4_init(void) |
2479 | { | 2484 | { |
2480 | inet_hashinfo_init(&tcp_hashinfo); | ||
2481 | if (register_pernet_subsys(&tcp_sk_ops)) | 2485 | if (register_pernet_subsys(&tcp_sk_ops)) |
2482 | panic("Failed to create the TCP control socket.\n"); | 2486 | panic("Failed to create the TCP control socket.\n"); |
2483 | } | 2487 | } |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 28ce5ee831f5..06fde26a82b7 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -29,12 +29,6 @@ | |||
29 | 29 | ||
30 | int sysctl_tcp_abort_on_overflow __read_mostly; | 30 | int sysctl_tcp_abort_on_overflow __read_mostly; |
31 | 31 | ||
32 | struct inet_timewait_death_row tcp_death_row = { | ||
33 | .sysctl_max_tw_buckets = NR_FILE * 2, | ||
34 | .hashinfo = &tcp_hashinfo, | ||
35 | }; | ||
36 | EXPORT_SYMBOL_GPL(tcp_death_row); | ||
37 | |||
38 | static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) | 32 | static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) |
39 | { | 33 | { |
40 | if (seq == s_win) | 34 | if (seq == s_win) |
@@ -100,6 +94,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, | |||
100 | struct tcp_options_received tmp_opt; | 94 | struct tcp_options_received tmp_opt; |
101 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | 95 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); |
102 | bool paws_reject = false; | 96 | bool paws_reject = false; |
97 | struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row; | ||
103 | 98 | ||
104 | tmp_opt.saw_tstamp = 0; | 99 | tmp_opt.saw_tstamp = 0; |
105 | if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { | 100 | if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { |
@@ -153,7 +148,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, | |||
153 | tcptw->tw_ts_recent = tmp_opt.rcv_tsval; | 148 | tcptw->tw_ts_recent = tmp_opt.rcv_tsval; |
154 | } | 149 | } |
155 | 150 | ||
156 | if (tcp_death_row.sysctl_tw_recycle && | 151 | if (tcp_death_row->sysctl_tw_recycle && |
157 | tcptw->tw_ts_recent_stamp && | 152 | tcptw->tw_ts_recent_stamp && |
158 | tcp_tw_remember_stamp(tw)) | 153 | tcp_tw_remember_stamp(tw)) |
159 | inet_twsk_reschedule(tw, tw->tw_timeout); | 154 | inet_twsk_reschedule(tw, tw->tw_timeout); |
@@ -264,11 +259,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
264 | const struct tcp_sock *tp = tcp_sk(sk); | 259 | const struct tcp_sock *tp = tcp_sk(sk); |
265 | struct inet_timewait_sock *tw; | 260 | struct inet_timewait_sock *tw; |
266 | bool recycle_ok = false; | 261 | bool recycle_ok = false; |
262 | struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; | ||
267 | 263 | ||
268 | if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) | 264 | if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) |
269 | recycle_ok = tcp_remember_stamp(sk); | 265 | recycle_ok = tcp_remember_stamp(sk); |
270 | 266 | ||
271 | tw = inet_twsk_alloc(sk, &tcp_death_row, state); | 267 | tw = inet_twsk_alloc(sk, tcp_death_row, state); |
272 | 268 | ||
273 | if (tw) { | 269 | if (tw) { |
274 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | 270 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); |