aboutsummaryrefslogtreecommitdiffstats
path: root/net/netlink
diff options
context:
space:
mode:
Diffstat (limited to 'net/netlink')
-rw-r--r--net/netlink/af_netlink.c105
-rw-r--r--net/netlink/af_netlink.h2
-rw-r--r--net/netlink/diag.c15
-rw-r--r--net/netlink/genetlink.c6
4 files changed, 66 insertions, 62 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 02fdde28dada..2197af00673a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -98,12 +98,12 @@ static int netlink_dump(struct sock *sk);
98static void netlink_skb_destructor(struct sk_buff *skb); 98static void netlink_skb_destructor(struct sk_buff *skb);
99 99
100/* nl_table locking explained: 100/* nl_table locking explained:
101 * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock 101 * Lookup and traversal are protected with an RCU read-side lock. Insertion
102 * combined with an RCU read-side lock. Insertion and removal are protected 102 * and removal are protected with per bucket lock while using RCU list
103 * with nl_sk_hash_lock while using RCU list modification primitives and may 103 * modification primitives and may run in parallel to RCU protected lookups.
104 * run in parallel to nl_table_lock protected lookups. Destruction of the 104 * Destruction of the Netlink socket may only occur *after* nl_table_lock has
105 * Netlink socket may only occur *after* nl_table_lock has been acquired 105 * been acquired * either during or after the socket has been removed from
106 * either during or after the socket has been removed from the list. 106 * the list and after an RCU grace period.
107 */ 107 */
108DEFINE_RWLOCK(nl_table_lock); 108DEFINE_RWLOCK(nl_table_lock);
109EXPORT_SYMBOL_GPL(nl_table_lock); 109EXPORT_SYMBOL_GPL(nl_table_lock);
@@ -111,19 +111,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
111 111
112#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); 112#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
113 113
114/* Protects netlink socket hash table mutations */
115DEFINE_MUTEX(nl_sk_hash_lock);
116EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
117
118#ifdef CONFIG_PROVE_LOCKING
119static int lockdep_nl_sk_hash_is_held(void *parent)
120{
121 if (debug_locks)
122 return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock);
123 return 1;
124}
125#endif
126
127static ATOMIC_NOTIFIER_HEAD(netlink_chain); 114static ATOMIC_NOTIFIER_HEAD(netlink_chain);
128 115
129static DEFINE_SPINLOCK(netlink_tap_lock); 116static DEFINE_SPINLOCK(netlink_tap_lock);
@@ -1003,26 +990,33 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
1003 .net = net, 990 .net = net,
1004 .portid = portid, 991 .portid = portid,
1005 }; 992 };
1006 u32 hash;
1007 993
1008 hash = rhashtable_hashfn(&table->hash, &portid, sizeof(portid)); 994 return rhashtable_lookup_compare(&table->hash, &portid,
1009
1010 return rhashtable_lookup_compare(&table->hash, hash,
1011 &netlink_compare, &arg); 995 &netlink_compare, &arg);
1012} 996}
1013 997
998static bool __netlink_insert(struct netlink_table *table, struct sock *sk)
999{
1000 struct netlink_compare_arg arg = {
1001 .net = sock_net(sk),
1002 .portid = nlk_sk(sk)->portid,
1003 };
1004
1005 return rhashtable_lookup_compare_insert(&table->hash,
1006 &nlk_sk(sk)->node,
1007 &netlink_compare, &arg);
1008}
1009
1014static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) 1010static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
1015{ 1011{
1016 struct netlink_table *table = &nl_table[protocol]; 1012 struct netlink_table *table = &nl_table[protocol];
1017 struct sock *sk; 1013 struct sock *sk;
1018 1014
1019 read_lock(&nl_table_lock);
1020 rcu_read_lock(); 1015 rcu_read_lock();
1021 sk = __netlink_lookup(table, portid, net); 1016 sk = __netlink_lookup(table, portid, net);
1022 if (sk) 1017 if (sk)
1023 sock_hold(sk); 1018 sock_hold(sk);
1024 rcu_read_unlock(); 1019 rcu_read_unlock();
1025 read_unlock(&nl_table_lock);
1026 1020
1027 return sk; 1021 return sk;
1028} 1022}
@@ -1053,29 +1047,33 @@ netlink_update_listeners(struct sock *sk)
1053 * makes sure updates are visible before bind or setsockopt return. */ 1047 * makes sure updates are visible before bind or setsockopt return. */
1054} 1048}
1055 1049
1056static int netlink_insert(struct sock *sk, struct net *net, u32 portid) 1050static int netlink_insert(struct sock *sk, u32 portid)
1057{ 1051{
1058 struct netlink_table *table = &nl_table[sk->sk_protocol]; 1052 struct netlink_table *table = &nl_table[sk->sk_protocol];
1059 int err = -EADDRINUSE; 1053 int err;
1060 1054
1061 mutex_lock(&nl_sk_hash_lock); 1055 lock_sock(sk);
1062 if (__netlink_lookup(table, portid, net))
1063 goto err;
1064 1056
1065 err = -EBUSY; 1057 err = -EBUSY;
1066 if (nlk_sk(sk)->portid) 1058 if (nlk_sk(sk)->portid)
1067 goto err; 1059 goto err;
1068 1060
1069 err = -ENOMEM; 1061 err = -ENOMEM;
1070 if (BITS_PER_LONG > 32 && unlikely(table->hash.nelems >= UINT_MAX)) 1062 if (BITS_PER_LONG > 32 &&
1063 unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
1071 goto err; 1064 goto err;
1072 1065
1073 nlk_sk(sk)->portid = portid; 1066 nlk_sk(sk)->portid = portid;
1074 sock_hold(sk); 1067 sock_hold(sk);
1075 rhashtable_insert(&table->hash, &nlk_sk(sk)->node); 1068
1076 err = 0; 1069 err = 0;
1070 if (!__netlink_insert(table, sk)) {
1071 err = -EADDRINUSE;
1072 sock_put(sk);
1073 }
1074
1077err: 1075err:
1078 mutex_unlock(&nl_sk_hash_lock); 1076 release_sock(sk);
1079 return err; 1077 return err;
1080} 1078}
1081 1079
@@ -1083,13 +1081,11 @@ static void netlink_remove(struct sock *sk)
1083{ 1081{
1084 struct netlink_table *table; 1082 struct netlink_table *table;
1085 1083
1086 mutex_lock(&nl_sk_hash_lock);
1087 table = &nl_table[sk->sk_protocol]; 1084 table = &nl_table[sk->sk_protocol];
1088 if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { 1085 if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) {
1089 WARN_ON(atomic_read(&sk->sk_refcnt) == 1); 1086 WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
1090 __sock_put(sk); 1087 __sock_put(sk);
1091 } 1088 }
1092 mutex_unlock(&nl_sk_hash_lock);
1093 1089
1094 netlink_table_grab(); 1090 netlink_table_grab();
1095 if (nlk_sk(sk)->subscriptions) { 1091 if (nlk_sk(sk)->subscriptions) {
@@ -1197,6 +1193,13 @@ out_module:
1197 goto out; 1193 goto out;
1198} 1194}
1199 1195
1196static void deferred_put_nlk_sk(struct rcu_head *head)
1197{
1198 struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
1199
1200 sock_put(&nlk->sk);
1201}
1202
1200static int netlink_release(struct socket *sock) 1203static int netlink_release(struct socket *sock)
1201{ 1204{
1202 struct sock *sk = sock->sk; 1205 struct sock *sk = sock->sk;
@@ -1269,7 +1272,7 @@ static int netlink_release(struct socket *sock)
1269 local_bh_disable(); 1272 local_bh_disable();
1270 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); 1273 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
1271 local_bh_enable(); 1274 local_bh_enable();
1272 sock_put(sk); 1275 call_rcu(&nlk->rcu, deferred_put_nlk_sk);
1273 return 0; 1276 return 0;
1274} 1277}
1275 1278
@@ -1284,7 +1287,6 @@ static int netlink_autobind(struct socket *sock)
1284 1287
1285retry: 1288retry:
1286 cond_resched(); 1289 cond_resched();
1287 netlink_table_grab();
1288 rcu_read_lock(); 1290 rcu_read_lock();
1289 if (__netlink_lookup(table, portid, net)) { 1291 if (__netlink_lookup(table, portid, net)) {
1290 /* Bind collision, search negative portid values. */ 1292 /* Bind collision, search negative portid values. */
@@ -1292,13 +1294,11 @@ retry:
1292 if (rover > -4097) 1294 if (rover > -4097)
1293 rover = -4097; 1295 rover = -4097;
1294 rcu_read_unlock(); 1296 rcu_read_unlock();
1295 netlink_table_ungrab();
1296 goto retry; 1297 goto retry;
1297 } 1298 }
1298 rcu_read_unlock(); 1299 rcu_read_unlock();
1299 netlink_table_ungrab();
1300 1300
1301 err = netlink_insert(sk, net, portid); 1301 err = netlink_insert(sk, portid);
1302 if (err == -EADDRINUSE) 1302 if (err == -EADDRINUSE)
1303 goto retry; 1303 goto retry;
1304 1304
@@ -1486,7 +1486,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
1486 1486
1487 if (!nlk->portid) { 1487 if (!nlk->portid) {
1488 err = nladdr->nl_pid ? 1488 err = nladdr->nl_pid ?
1489 netlink_insert(sk, net, nladdr->nl_pid) : 1489 netlink_insert(sk, nladdr->nl_pid) :
1490 netlink_autobind(sock); 1490 netlink_autobind(sock);
1491 if (err) { 1491 if (err) {
1492 netlink_undo_bind(nlk->ngroups, groups, sk); 1492 netlink_undo_bind(nlk->ngroups, groups, sk);
@@ -2492,7 +2492,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
2492 if (cfg && cfg->input) 2492 if (cfg && cfg->input)
2493 nlk_sk(sk)->netlink_rcv = cfg->input; 2493 nlk_sk(sk)->netlink_rcv = cfg->input;
2494 2494
2495 if (netlink_insert(sk, net, 0)) 2495 if (netlink_insert(sk, 0))
2496 goto out_sock_release; 2496 goto out_sock_release;
2497 2497
2498 nlk = nlk_sk(sk); 2498 nlk = nlk_sk(sk);
@@ -2911,7 +2911,9 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
2911 const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); 2911 const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
2912 2912
2913 for (j = 0; j < tbl->size; j++) { 2913 for (j = 0; j < tbl->size; j++) {
2914 rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) { 2914 struct rhash_head *node;
2915
2916 rht_for_each_entry_rcu(nlk, node, tbl, j, node) {
2915 s = (struct sock *)nlk; 2917 s = (struct sock *)nlk;
2916 2918
2917 if (sock_net(s) != seq_file_net(seq)) 2919 if (sock_net(s) != seq_file_net(seq))
@@ -2929,9 +2931,8 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
2929} 2931}
2930 2932
2931static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) 2933static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
2932 __acquires(nl_table_lock) __acquires(RCU) 2934 __acquires(RCU)
2933{ 2935{
2934 read_lock(&nl_table_lock);
2935 rcu_read_lock(); 2936 rcu_read_lock();
2936 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2937 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2937} 2938}
@@ -2939,6 +2940,8 @@ static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
2939static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2940static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2940{ 2941{
2941 struct rhashtable *ht; 2942 struct rhashtable *ht;
2943 const struct bucket_table *tbl;
2944 struct rhash_head *node;
2942 struct netlink_sock *nlk; 2945 struct netlink_sock *nlk;
2943 struct nl_seq_iter *iter; 2946 struct nl_seq_iter *iter;
2944 struct net *net; 2947 struct net *net;
@@ -2955,17 +2958,17 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2955 2958
2956 i = iter->link; 2959 i = iter->link;
2957 ht = &nl_table[i].hash; 2960 ht = &nl_table[i].hash;
2958 rht_for_each_entry(nlk, nlk->node.next, ht, node) 2961 tbl = rht_dereference_rcu(ht->tbl, ht);
2962 rht_for_each_entry_rcu_continue(nlk, node, nlk->node.next, tbl, iter->hash_idx, node)
2959 if (net_eq(sock_net((struct sock *)nlk), net)) 2963 if (net_eq(sock_net((struct sock *)nlk), net))
2960 return nlk; 2964 return nlk;
2961 2965
2962 j = iter->hash_idx + 1; 2966 j = iter->hash_idx + 1;
2963 2967
2964 do { 2968 do {
2965 const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
2966 2969
2967 for (; j < tbl->size; j++) { 2970 for (; j < tbl->size; j++) {
2968 rht_for_each_entry(nlk, tbl->buckets[j], ht, node) { 2971 rht_for_each_entry_rcu(nlk, node, tbl, j, node) {
2969 if (net_eq(sock_net((struct sock *)nlk), net)) { 2972 if (net_eq(sock_net((struct sock *)nlk), net)) {
2970 iter->link = i; 2973 iter->link = i;
2971 iter->hash_idx = j; 2974 iter->hash_idx = j;
@@ -2981,10 +2984,9 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2981} 2984}
2982 2985
2983static void netlink_seq_stop(struct seq_file *seq, void *v) 2986static void netlink_seq_stop(struct seq_file *seq, void *v)
2984 __releases(RCU) __releases(nl_table_lock) 2987 __releases(RCU)
2985{ 2988{
2986 rcu_read_unlock(); 2989 rcu_read_unlock();
2987 read_unlock(&nl_table_lock);
2988} 2990}
2989 2991
2990 2992
@@ -3131,9 +3133,6 @@ static int __init netlink_proto_init(void)
3131 .max_shift = 16, /* 64K */ 3133 .max_shift = 16, /* 64K */
3132 .grow_decision = rht_grow_above_75, 3134 .grow_decision = rht_grow_above_75,
3133 .shrink_decision = rht_shrink_below_30, 3135 .shrink_decision = rht_shrink_below_30,
3134#ifdef CONFIG_PROVE_LOCKING
3135 .mutex_is_held = lockdep_nl_sk_hash_is_held,
3136#endif
3137 }; 3136 };
3138 3137
3139 if (err != 0) 3138 if (err != 0)
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index f1c31b39aa3e..89008405d6b4 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -51,6 +51,7 @@ struct netlink_sock {
51#endif /* CONFIG_NETLINK_MMAP */ 51#endif /* CONFIG_NETLINK_MMAP */
52 52
53 struct rhash_head node; 53 struct rhash_head node;
54 struct rcu_head rcu;
54}; 55};
55 56
56static inline struct netlink_sock *nlk_sk(struct sock *sk) 57static inline struct netlink_sock *nlk_sk(struct sock *sk)
@@ -74,6 +75,5 @@ struct netlink_table {
74 75
75extern struct netlink_table *nl_table; 76extern struct netlink_table *nl_table;
76extern rwlock_t nl_table_lock; 77extern rwlock_t nl_table_lock;
77extern struct mutex nl_sk_hash_lock;
78 78
79#endif 79#endif
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index de8c74a3c061..3ee63a3cff30 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -91,7 +91,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
91 sk_diag_put_rings_cfg(sk, skb)) 91 sk_diag_put_rings_cfg(sk, skb))
92 goto out_nlmsg_trim; 92 goto out_nlmsg_trim;
93 93
94 return nlmsg_end(skb, nlh); 94 nlmsg_end(skb, nlh);
95 return 0;
95 96
96out_nlmsg_trim: 97out_nlmsg_trim:
97 nlmsg_cancel(skb, nlh); 98 nlmsg_cancel(skb, nlh);
@@ -103,7 +104,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
103{ 104{
104 struct netlink_table *tbl = &nl_table[protocol]; 105 struct netlink_table *tbl = &nl_table[protocol];
105 struct rhashtable *ht = &tbl->hash; 106 struct rhashtable *ht = &tbl->hash;
106 const struct bucket_table *htbl = rht_dereference(ht->tbl, ht); 107 const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht);
107 struct net *net = sock_net(skb->sk); 108 struct net *net = sock_net(skb->sk);
108 struct netlink_diag_req *req; 109 struct netlink_diag_req *req;
109 struct netlink_sock *nlsk; 110 struct netlink_sock *nlsk;
@@ -113,7 +114,9 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
113 req = nlmsg_data(cb->nlh); 114 req = nlmsg_data(cb->nlh);
114 115
115 for (i = 0; i < htbl->size; i++) { 116 for (i = 0; i < htbl->size; i++) {
116 rht_for_each_entry(nlsk, htbl->buckets[i], ht, node) { 117 struct rhash_head *pos;
118
119 rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) {
117 sk = (struct sock *)nlsk; 120 sk = (struct sock *)nlsk;
118 121
119 if (!net_eq(sock_net(sk), net)) 122 if (!net_eq(sock_net(sk), net))
@@ -170,7 +173,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
170 173
171 req = nlmsg_data(cb->nlh); 174 req = nlmsg_data(cb->nlh);
172 175
173 mutex_lock(&nl_sk_hash_lock); 176 rcu_read_lock();
174 read_lock(&nl_table_lock); 177 read_lock(&nl_table_lock);
175 178
176 if (req->sdiag_protocol == NDIAG_PROTO_ALL) { 179 if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
@@ -184,7 +187,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
184 } else { 187 } else {
185 if (req->sdiag_protocol >= MAX_LINKS) { 188 if (req->sdiag_protocol >= MAX_LINKS) {
186 read_unlock(&nl_table_lock); 189 read_unlock(&nl_table_lock);
187 mutex_unlock(&nl_sk_hash_lock); 190 rcu_read_unlock();
188 return -ENOENT; 191 return -ENOENT;
189 } 192 }
190 193
@@ -192,7 +195,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
192 } 195 }
193 196
194 read_unlock(&nl_table_lock); 197 read_unlock(&nl_table_lock);
195 mutex_unlock(&nl_sk_hash_lock); 198 rcu_read_unlock();
196 199
197 return skb->len; 200 return skb->len;
198} 201}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index ee57459fc258..2ed5f964772e 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -762,7 +762,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq,
762 nla_nest_end(skb, nla_grps); 762 nla_nest_end(skb, nla_grps);
763 } 763 }
764 764
765 return genlmsg_end(skb, hdr); 765 genlmsg_end(skb, hdr);
766 return 0;
766 767
767nla_put_failure: 768nla_put_failure:
768 genlmsg_cancel(skb, hdr); 769 genlmsg_cancel(skb, hdr);
@@ -802,7 +803,8 @@ static int ctrl_fill_mcgrp_info(struct genl_family *family,
802 nla_nest_end(skb, nest); 803 nla_nest_end(skb, nest);
803 nla_nest_end(skb, nla_grps); 804 nla_nest_end(skb, nla_grps);
804 805
805 return genlmsg_end(skb, hdr); 806 genlmsg_end(skb, hdr);
807 return 0;
806 808
807nla_put_failure: 809nla_put_failure:
808 genlmsg_cancel(skb, hdr); 810 genlmsg_cancel(skb, hdr);