aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTonghao Zhang <xiangxia.m.yue@gmail.com>2017-12-14 08:51:58 -0500
committerDavid S. Miller <davem@davemloft.net>2017-12-19 09:58:14 -0500
commit648845ab7e200993dccd3948c719c858368c91e7 (patch)
treeca35bba9338cb8bca9cadfa1badd2e755277beda
parent08fc7f8140730d2f8499c91b5abad44581b74635 (diff)
sock: Move the socket inuse to namespace.
In some case, we want to know how many sockets are in use in different _net_ namespaces. It's a key resource metric. This patch add a member in struct netns_core. This is a counter for socket-inuse in the _net_ namespace. The patch will add/sub counter in the sk_alloc, sk_clone_lock and __sk_free. This patch will not counter the socket created in kernel. It's not very useful for userspace to know how many kernel sockets we created. The main reasons for doing this are that: 1. When linux calls the 'do_exit' for process to exit, the functions 'exit_task_namespaces' and 'exit_task_work' will be called sequentially. 'exit_task_namespaces' may have destroyed the _net_ namespace, but 'sock_release' called in 'exit_task_work' may use the _net_ namespace if we counter the socket-inuse in sock_release. 2. socket and sock are in pair. More important, sock holds the _net_ namespace. We counter the socket-inuse in sock, for avoiding holding _net_ namespace again in socket. It's a easy way to maintain the code. Signed-off-by: Martin Zhang <zhangjunweimartin@didichuxing.com> Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/netns/core.h3
-rw-r--r--include/net/sock.h1
-rw-r--r--net/core/sock.c47
-rw-r--r--net/socket.c21
4 files changed, 51 insertions, 21 deletions
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 45cfb5dc76c7..a5e8a66c57b4 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -11,6 +11,9 @@ struct netns_core {
11 11
12 int sysctl_somaxconn; 12 int sysctl_somaxconn;
13 13
14#ifdef CONFIG_PROC_FS
15 int __percpu *sock_inuse;
16#endif
14 struct prot_inuse __percpu *prot_inuse; 17 struct prot_inuse __percpu *prot_inuse;
15}; 18};
16 19
diff --git a/include/net/sock.h b/include/net/sock.h
index 9a9047268d37..0a32f3ce381c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1262,6 +1262,7 @@ proto_memory_pressure(struct proto *prot)
1262/* Called with local bh disabled */ 1262/* Called with local bh disabled */
1263void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); 1263void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc);
1264int sock_prot_inuse_get(struct net *net, struct proto *proto); 1264int sock_prot_inuse_get(struct net *net, struct proto *proto);
1265int sock_inuse_get(struct net *net);
1265#else 1266#else
1266static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, 1267static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
1267 int inc) 1268 int inc)
diff --git a/net/core/sock.c b/net/core/sock.c
index c2dd2d339db7..72d14b221784 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -145,6 +145,8 @@
145static DEFINE_MUTEX(proto_list_mutex); 145static DEFINE_MUTEX(proto_list_mutex);
146static LIST_HEAD(proto_list); 146static LIST_HEAD(proto_list);
147 147
148static void sock_inuse_add(struct net *net, int val);
149
148/** 150/**
149 * sk_ns_capable - General socket capability test 151 * sk_ns_capable - General socket capability test
150 * @sk: Socket to use a capability on or through 152 * @sk: Socket to use a capability on or through
@@ -1531,8 +1533,11 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1531 sk->sk_kern_sock = kern; 1533 sk->sk_kern_sock = kern;
1532 sock_lock_init(sk); 1534 sock_lock_init(sk);
1533 sk->sk_net_refcnt = kern ? 0 : 1; 1535 sk->sk_net_refcnt = kern ? 0 : 1;
1534 if (likely(sk->sk_net_refcnt)) 1536 if (likely(sk->sk_net_refcnt)) {
1535 get_net(net); 1537 get_net(net);
1538 sock_inuse_add(net, 1);
1539 }
1540
1536 sock_net_set(sk, net); 1541 sock_net_set(sk, net);
1537 refcount_set(&sk->sk_wmem_alloc, 1); 1542 refcount_set(&sk->sk_wmem_alloc, 1);
1538 1543
@@ -1595,6 +1600,9 @@ void sk_destruct(struct sock *sk)
1595 1600
1596static void __sk_free(struct sock *sk) 1601static void __sk_free(struct sock *sk)
1597{ 1602{
1603 if (likely(sk->sk_net_refcnt))
1604 sock_inuse_add(sock_net(sk), -1);
1605
1598 if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) 1606 if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
1599 sock_diag_broadcast_destroy(sk); 1607 sock_diag_broadcast_destroy(sk);
1600 else 1608 else
@@ -1716,6 +1724,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1716 newsk->sk_priority = 0; 1724 newsk->sk_priority = 0;
1717 newsk->sk_incoming_cpu = raw_smp_processor_id(); 1725 newsk->sk_incoming_cpu = raw_smp_processor_id();
1718 atomic64_set(&newsk->sk_cookie, 0); 1726 atomic64_set(&newsk->sk_cookie, 0);
1727 if (likely(newsk->sk_net_refcnt))
1728 sock_inuse_add(sock_net(newsk), 1);
1719 1729
1720 /* 1730 /*
1721 * Before updating sk_refcnt, we must commit prior changes to memory 1731 * Before updating sk_refcnt, we must commit prior changes to memory
@@ -3061,15 +3071,44 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot)
3061} 3071}
3062EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 3072EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3063 3073
3074static void sock_inuse_add(struct net *net, int val)
3075{
3076 this_cpu_add(*net->core.sock_inuse, val);
3077}
3078
3079int sock_inuse_get(struct net *net)
3080{
3081 int cpu, res = 0;
3082
3083 for_each_possible_cpu(cpu)
3084 res += *per_cpu_ptr(net->core.sock_inuse, cpu);
3085
3086 return res;
3087}
3088
3089EXPORT_SYMBOL_GPL(sock_inuse_get);
3090
3064static int __net_init sock_inuse_init_net(struct net *net) 3091static int __net_init sock_inuse_init_net(struct net *net)
3065{ 3092{
3066 net->core.prot_inuse = alloc_percpu(struct prot_inuse); 3093 net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3067 return net->core.prot_inuse ? 0 : -ENOMEM; 3094 if (net->core.prot_inuse == NULL)
3095 return -ENOMEM;
3096
3097 net->core.sock_inuse = alloc_percpu(int);
3098 if (net->core.sock_inuse == NULL)
3099 goto out;
3100
3101 return 0;
3102
3103out:
3104 free_percpu(net->core.prot_inuse);
3105 return -ENOMEM;
3068} 3106}
3069 3107
3070static void __net_exit sock_inuse_exit_net(struct net *net) 3108static void __net_exit sock_inuse_exit_net(struct net *net)
3071{ 3109{
3072 free_percpu(net->core.prot_inuse); 3110 free_percpu(net->core.prot_inuse);
3111 free_percpu(net->core.sock_inuse);
3073} 3112}
3074 3113
3075static struct pernet_operations net_inuse_ops = { 3114static struct pernet_operations net_inuse_ops = {
@@ -3112,6 +3151,10 @@ static inline void assign_proto_idx(struct proto *prot)
3112static inline void release_proto_idx(struct proto *prot) 3151static inline void release_proto_idx(struct proto *prot)
3113{ 3152{
3114} 3153}
3154
3155static void sock_inuse_add(struct net *net, int val)
3156{
3157}
3115#endif 3158#endif
3116 3159
3117static void req_prot_cleanup(struct request_sock_ops *rsk_prot) 3160static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
diff --git a/net/socket.c b/net/socket.c
index 05f361faec45..bbd2e9ceb692 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -163,12 +163,6 @@ static DEFINE_SPINLOCK(net_family_lock);
163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
164 164
165/* 165/*
166 * Statistics counters of the socket lists
167 */
168
169static DEFINE_PER_CPU(int, sockets_in_use);
170
171/*
172 * Support routines. 166 * Support routines.
173 * Move socket addresses back and forth across the kernel/user 167 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits. 168 * divide and look after the messy bits.
@@ -578,7 +572,6 @@ struct socket *sock_alloc(void)
578 inode->i_gid = current_fsgid(); 572 inode->i_gid = current_fsgid();
579 inode->i_op = &sockfs_inode_ops; 573 inode->i_op = &sockfs_inode_ops;
580 574
581 this_cpu_add(sockets_in_use, 1);
582 return sock; 575 return sock;
583} 576}
584EXPORT_SYMBOL(sock_alloc); 577EXPORT_SYMBOL(sock_alloc);
@@ -605,7 +598,6 @@ void sock_release(struct socket *sock)
605 if (rcu_dereference_protected(sock->wq, 1)->fasync_list) 598 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
606 pr_err("%s: fasync list not empty!\n", __func__); 599 pr_err("%s: fasync list not empty!\n", __func__);
607 600
608 this_cpu_sub(sockets_in_use, 1);
609 if (!sock->file) { 601 if (!sock->file) {
610 iput(SOCK_INODE(sock)); 602 iput(SOCK_INODE(sock));
611 return; 603 return;
@@ -2622,17 +2614,8 @@ core_initcall(sock_init); /* early initcall */
2622#ifdef CONFIG_PROC_FS 2614#ifdef CONFIG_PROC_FS
2623void socket_seq_show(struct seq_file *seq) 2615void socket_seq_show(struct seq_file *seq)
2624{ 2616{
2625 int cpu; 2617 seq_printf(seq, "sockets: used %d\n",
2626 int counter = 0; 2618 sock_inuse_get(seq->private));
2627
2628 for_each_possible_cpu(cpu)
2629 counter += per_cpu(sockets_in_use, cpu);
2630
2631 /* It can be negative, by the way. 8) */
2632 if (counter < 0)
2633 counter = 0;
2634
2635 seq_printf(seq, "sockets: used %d\n", counter);
2636} 2619}
2637#endif /* CONFIG_PROC_FS */ 2620#endif /* CONFIG_PROC_FS */
2638 2621