aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorHaishuang Yan <yanhaishuang@cmss.chinamobile.com>2016-12-28 04:52:32 -0500
committerDavid S. Miller <davem@davemloft.net>2016-12-29 11:38:31 -0500
commit1946e672c173559155a3e210fe95dbf8b7b8ddf7 (patch)
tree0d794dc28150aac130c2e6dd0024cb3a4a5ec594 /net/ipv4
parent801822d1beea4f11a38df991b420ca917f6a917b (diff)
ipv4: Namespaceify tcp_tw_recycle and tcp_max_tw_buckets knob
Different namespace application might require fast recycling TIME-WAIT sockets independently of the host. Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/inet_timewait_sock.c3
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c28
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/tcp_minisocks.c14
8 files changed, 32 insertions, 34 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f75069883f2b..aae410bb655a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1831,8 +1831,6 @@ static int __init inet_init(void)
1831 1831
1832 ip_init(); 1832 ip_init();
1833 1833
1834 tcp_v4_init();
1835
1836 /* Setup TCP slab cache for open requests. */ 1834 /* Setup TCP slab cache for open requests. */
1837 tcp_init(); 1835 tcp_init();
1838 1836
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index ddcd56c08d14..f8aff2c71cde 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -257,8 +257,7 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
257} 257}
258EXPORT_SYMBOL_GPL(__inet_twsk_schedule); 258EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
259 259
260void inet_twsk_purge(struct inet_hashinfo *hashinfo, 260void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
261 struct inet_timewait_death_row *twdr, int family)
262{ 261{
263 struct inet_timewait_sock *tw; 262 struct inet_timewait_sock *tw;
264 struct sock *sk; 263 struct sock *sk;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 7143ca1a6af9..0247ca032232 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -65,7 +65,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
65 socket_seq_show(seq); 65 socket_seq_show(seq);
66 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", 66 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
67 sock_prot_inuse_get(net, &tcp_prot), orphans, 67 sock_prot_inuse_get(net, &tcp_prot), orphans,
68 atomic_read(&tcp_death_row.tw_count), sockets, 68 atomic_read(&net->ipv4.tcp_death_row.tw_count), sockets,
69 proto_memory_allocated(&tcp_prot)); 69 proto_memory_allocated(&tcp_prot));
70 seq_printf(seq, "UDP: inuse %d mem %ld\n", 70 seq_printf(seq, "UDP: inuse %d mem %ld\n",
71 sock_prot_inuse_get(net, &udp_prot), 71 sock_prot_inuse_get(net, &udp_prot),
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 22cbd61079b5..66f8f1b1dc78 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -290,13 +290,6 @@ static struct ctl_table ipv4_table[] = {
290 .proc_handler = proc_dointvec 290 .proc_handler = proc_dointvec
291 }, 291 },
292 { 292 {
293 .procname = "tcp_max_tw_buckets",
294 .data = &tcp_death_row.sysctl_max_tw_buckets,
295 .maxlen = sizeof(int),
296 .mode = 0644,
297 .proc_handler = proc_dointvec
298 },
299 {
300 .procname = "tcp_fastopen", 293 .procname = "tcp_fastopen",
301 .data = &sysctl_tcp_fastopen, 294 .data = &sysctl_tcp_fastopen,
302 .maxlen = sizeof(int), 295 .maxlen = sizeof(int),
@@ -310,13 +303,6 @@ static struct ctl_table ipv4_table[] = {
310 .proc_handler = proc_tcp_fastopen_key, 303 .proc_handler = proc_tcp_fastopen_key,
311 }, 304 },
312 { 305 {
313 .procname = "tcp_tw_recycle",
314 .data = &tcp_death_row.sysctl_tw_recycle,
315 .maxlen = sizeof(int),
316 .mode = 0644,
317 .proc_handler = proc_dointvec
318 },
319 {
320 .procname = "tcp_abort_on_overflow", 306 .procname = "tcp_abort_on_overflow",
321 .data = &sysctl_tcp_abort_on_overflow, 307 .data = &sysctl_tcp_abort_on_overflow,
322 .maxlen = sizeof(int), 308 .maxlen = sizeof(int),
@@ -960,6 +946,20 @@ static struct ctl_table ipv4_net_table[] = {
960 .mode = 0644, 946 .mode = 0644,
961 .proc_handler = proc_dointvec 947 .proc_handler = proc_dointvec
962 }, 948 },
949 {
950 .procname = "tcp_max_tw_buckets",
951 .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
952 .maxlen = sizeof(int),
953 .mode = 0644,
954 .proc_handler = proc_dointvec
955 },
956 {
957 .procname = "tcp_tw_recycle",
958 .data = &init_net.ipv4.tcp_death_row.sysctl_tw_recycle,
959 .maxlen = sizeof(int),
960 .mode = 0644,
961 .proc_handler = proc_dointvec
962 },
963#ifdef CONFIG_IP_ROUTE_MULTIPATH 963#ifdef CONFIG_IP_ROUTE_MULTIPATH
964 { 964 {
965 .procname = "fib_multipath_use_neigh", 965 .procname = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4a044964da66..7f0d81c090ce 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3334,6 +3334,7 @@ void __init tcp_init(void)
3334 3334
3335 percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); 3335 percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
3336 percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); 3336 percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
3337 inet_hashinfo_init(&tcp_hashinfo);
3337 tcp_hashinfo.bind_bucket_cachep = 3338 tcp_hashinfo.bind_bucket_cachep =
3338 kmem_cache_create("tcp_bind_bucket", 3339 kmem_cache_create("tcp_bind_bucket",
3339 sizeof(struct inet_bind_bucket), 0, 3340 sizeof(struct inet_bind_bucket), 0,
@@ -3378,7 +3379,6 @@ void __init tcp_init(void)
3378 3379
3379 cnt = tcp_hashinfo.ehash_mask + 1; 3380 cnt = tcp_hashinfo.ehash_mask + 1;
3380 3381
3381 tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
3382 sysctl_tcp_max_orphans = cnt / 2; 3382 sysctl_tcp_max_orphans = cnt / 2;
3383 sysctl_max_syn_backlog = max(128, cnt / 256); 3383 sysctl_max_syn_backlog = max(128, cnt / 256);
3384 3384
@@ -3399,6 +3399,7 @@ void __init tcp_init(void)
3399 pr_info("Hash tables configured (established %u bind %u)\n", 3399 pr_info("Hash tables configured (established %u bind %u)\n",
3400 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); 3400 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
3401 3401
3402 tcp_v4_init();
3402 tcp_metrics_init(); 3403 tcp_metrics_init();
3403 BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0); 3404 BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
3404 tcp_tasklet_init(); 3405 tcp_tasklet_init();
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6c790754ae3e..c61480249835 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6363,7 +6363,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6363 * timewait bucket, so that all the necessary checks 6363 * timewait bucket, so that all the necessary checks
6364 * are made in the function processing timewait state. 6364 * are made in the function processing timewait state.
6365 */ 6365 */
6366 if (tcp_death_row.sysctl_tw_recycle) { 6366 if (net->ipv4.tcp_death_row.sysctl_tw_recycle) {
6367 bool strict; 6367 bool strict;
6368 6368
6369 dst = af_ops->route_req(sk, &fl, req, &strict); 6369 dst = af_ops->route_req(sk, &fl, req, &strict);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fe9da4fb96bf..56b5f49e3f97 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -146,6 +146,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
146 struct rtable *rt; 146 struct rtable *rt;
147 int err; 147 int err;
148 struct ip_options_rcu *inet_opt; 148 struct ip_options_rcu *inet_opt;
149 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
149 150
150 if (addr_len < sizeof(struct sockaddr_in)) 151 if (addr_len < sizeof(struct sockaddr_in))
151 return -EINVAL; 152 return -EINVAL;
@@ -196,7 +197,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
196 tp->write_seq = 0; 197 tp->write_seq = 0;
197 } 198 }
198 199
199 if (tcp_death_row.sysctl_tw_recycle && 200 if (tcp_death_row->sysctl_tw_recycle &&
200 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) 201 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
201 tcp_fetch_timewait_stamp(sk, &rt->dst); 202 tcp_fetch_timewait_stamp(sk, &rt->dst);
202 203
@@ -215,7 +216,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
215 * complete initialization after this. 216 * complete initialization after this.
216 */ 217 */
217 tcp_set_state(sk, TCP_SYN_SENT); 218 tcp_set_state(sk, TCP_SYN_SENT);
218 err = inet_hash_connect(&tcp_death_row, sk); 219 err = inet_hash_connect(tcp_death_row, sk);
219 if (err) 220 if (err)
220 goto failure; 221 goto failure;
221 222
@@ -2457,6 +2458,10 @@ static int __net_init tcp_sk_init(struct net *net)
2457 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; 2458 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2458 net->ipv4.sysctl_tcp_tw_reuse = 0; 2459 net->ipv4.sysctl_tcp_tw_reuse = 0;
2459 2460
2461 net->ipv4.tcp_death_row.sysctl_tw_recycle = 0;
2462 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (tcp_hashinfo.ehash_mask + 1) / 2;
2463 net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
2464
2460 return 0; 2465 return 0;
2461fail: 2466fail:
2462 tcp_sk_exit(net); 2467 tcp_sk_exit(net);
@@ -2466,7 +2471,7 @@ fail:
2466 2471
2467static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2472static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2468{ 2473{
2469 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); 2474 inet_twsk_purge(&tcp_hashinfo, AF_INET);
2470} 2475}
2471 2476
2472static struct pernet_operations __net_initdata tcp_sk_ops = { 2477static struct pernet_operations __net_initdata tcp_sk_ops = {
@@ -2477,7 +2482,6 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
2477 2482
2478void __init tcp_v4_init(void) 2483void __init tcp_v4_init(void)
2479{ 2484{
2480 inet_hashinfo_init(&tcp_hashinfo);
2481 if (register_pernet_subsys(&tcp_sk_ops)) 2485 if (register_pernet_subsys(&tcp_sk_ops))
2482 panic("Failed to create the TCP control socket.\n"); 2486 panic("Failed to create the TCP control socket.\n");
2483} 2487}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 28ce5ee831f5..06fde26a82b7 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -29,12 +29,6 @@
29 29
30int sysctl_tcp_abort_on_overflow __read_mostly; 30int sysctl_tcp_abort_on_overflow __read_mostly;
31 31
32struct inet_timewait_death_row tcp_death_row = {
33 .sysctl_max_tw_buckets = NR_FILE * 2,
34 .hashinfo = &tcp_hashinfo,
35};
36EXPORT_SYMBOL_GPL(tcp_death_row);
37
38static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) 32static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
39{ 33{
40 if (seq == s_win) 34 if (seq == s_win)
@@ -100,6 +94,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
100 struct tcp_options_received tmp_opt; 94 struct tcp_options_received tmp_opt;
101 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 95 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
102 bool paws_reject = false; 96 bool paws_reject = false;
97 struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row;
103 98
104 tmp_opt.saw_tstamp = 0; 99 tmp_opt.saw_tstamp = 0;
105 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { 100 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
@@ -153,7 +148,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
153 tcptw->tw_ts_recent = tmp_opt.rcv_tsval; 148 tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
154 } 149 }
155 150
156 if (tcp_death_row.sysctl_tw_recycle && 151 if (tcp_death_row->sysctl_tw_recycle &&
157 tcptw->tw_ts_recent_stamp && 152 tcptw->tw_ts_recent_stamp &&
158 tcp_tw_remember_stamp(tw)) 153 tcp_tw_remember_stamp(tw))
159 inet_twsk_reschedule(tw, tw->tw_timeout); 154 inet_twsk_reschedule(tw, tw->tw_timeout);
@@ -264,11 +259,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
264 const struct tcp_sock *tp = tcp_sk(sk); 259 const struct tcp_sock *tp = tcp_sk(sk);
265 struct inet_timewait_sock *tw; 260 struct inet_timewait_sock *tw;
266 bool recycle_ok = false; 261 bool recycle_ok = false;
262 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
267 263
268 if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) 264 if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
269 recycle_ok = tcp_remember_stamp(sk); 265 recycle_ok = tcp_remember_stamp(sk);
270 266
271 tw = inet_twsk_alloc(sk, &tcp_death_row, state); 267 tw = inet_twsk_alloc(sk, tcp_death_row, state);
272 268
273 if (tw) { 269 if (tw) {
274 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 270 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);