diff options
author | Thomas Graf <tgraf@suug.ch> | 2015-01-02 17:00:22 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-01-03 14:32:57 -0500 |
commit | 21e4902aea80ef35afc00ee8d2abdea4f519b7f7 (patch) | |
tree | 8c1d2761c3959356151eed7bb677df633d64c0dd /net/netlink | |
parent | f89bd6f87a53ce5a7d60662429591ebac2745c10 (diff) |
netlink: Lockless lookup with RCU grace period in socket release
Defers the release of the socket reference using call_rcu() to
allow using an RCU read-side protected call to rhashtable_lookup()
This restores behaviour and performance gains as previously
introduced by e341694 ("netlink: Convert netlink_lookup() to use
RCU protected hash table") without the side effect of severely
delayed socket destruction.
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netlink')
-rw-r--r-- | net/netlink/af_netlink.c | 32 | ||||
-rw-r--r-- | net/netlink/af_netlink.h | 1 |
2 files changed, 17 insertions, 16 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 738c3bfaa564..298e1df7132a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -97,12 +97,12 @@ static int netlink_dump(struct sock *sk); | |||
97 | static void netlink_skb_destructor(struct sk_buff *skb); | 97 | static void netlink_skb_destructor(struct sk_buff *skb); |
98 | 98 | ||
99 | /* nl_table locking explained: | 99 | /* nl_table locking explained: |
100 | * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock | 100 | * Lookup and traversal are protected with an RCU read-side lock. Insertion |
101 | * combined with an RCU read-side lock. Insertion and removal are protected | 101 | * and removal are protected with nl_sk_hash_lock while using RCU list |
102 | * with nl_sk_hash_lock while using RCU list modification primitives and may | 102 | * modification primitives and may run in parallel to RCU protected lookups. |
103 | * run in parallel to nl_table_lock protected lookups. Destruction of the | 103 | * Destruction of the Netlink socket may only occur *after* nl_table_lock has |
104 | * Netlink socket may only occur *after* nl_table_lock has been acquired | 104 | * been acquired * either during or after the socket has been removed from |
105 | * either during or after the socket has been removed from the list. | 105 | * the list and after an RCU grace period. |
106 | */ | 106 | */ |
107 | DEFINE_RWLOCK(nl_table_lock); | 107 | DEFINE_RWLOCK(nl_table_lock); |
108 | EXPORT_SYMBOL_GPL(nl_table_lock); | 108 | EXPORT_SYMBOL_GPL(nl_table_lock); |
@@ -1003,13 +1003,11 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) | |||
1003 | struct netlink_table *table = &nl_table[protocol]; | 1003 | struct netlink_table *table = &nl_table[protocol]; |
1004 | struct sock *sk; | 1004 | struct sock *sk; |
1005 | 1005 | ||
1006 | read_lock(&nl_table_lock); | ||
1007 | rcu_read_lock(); | 1006 | rcu_read_lock(); |
1008 | sk = __netlink_lookup(table, portid, net); | 1007 | sk = __netlink_lookup(table, portid, net); |
1009 | if (sk) | 1008 | if (sk) |
1010 | sock_hold(sk); | 1009 | sock_hold(sk); |
1011 | rcu_read_unlock(); | 1010 | rcu_read_unlock(); |
1012 | read_unlock(&nl_table_lock); | ||
1013 | 1011 | ||
1014 | return sk; | 1012 | return sk; |
1015 | } | 1013 | } |
@@ -1183,6 +1181,13 @@ out_module: | |||
1183 | goto out; | 1181 | goto out; |
1184 | } | 1182 | } |
1185 | 1183 | ||
1184 | static void deferred_put_nlk_sk(struct rcu_head *head) | ||
1185 | { | ||
1186 | struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); | ||
1187 | |||
1188 | sock_put(&nlk->sk); | ||
1189 | } | ||
1190 | |||
1186 | static int netlink_release(struct socket *sock) | 1191 | static int netlink_release(struct socket *sock) |
1187 | { | 1192 | { |
1188 | struct sock *sk = sock->sk; | 1193 | struct sock *sk = sock->sk; |
@@ -1248,7 +1253,7 @@ static int netlink_release(struct socket *sock) | |||
1248 | local_bh_disable(); | 1253 | local_bh_disable(); |
1249 | sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); | 1254 | sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); |
1250 | local_bh_enable(); | 1255 | local_bh_enable(); |
1251 | sock_put(sk); | 1256 | call_rcu(&nlk->rcu, deferred_put_nlk_sk); |
1252 | return 0; | 1257 | return 0; |
1253 | } | 1258 | } |
1254 | 1259 | ||
@@ -1263,7 +1268,6 @@ static int netlink_autobind(struct socket *sock) | |||
1263 | 1268 | ||
1264 | retry: | 1269 | retry: |
1265 | cond_resched(); | 1270 | cond_resched(); |
1266 | netlink_table_grab(); | ||
1267 | rcu_read_lock(); | 1271 | rcu_read_lock(); |
1268 | if (__netlink_lookup(table, portid, net)) { | 1272 | if (__netlink_lookup(table, portid, net)) { |
1269 | /* Bind collision, search negative portid values. */ | 1273 | /* Bind collision, search negative portid values. */ |
@@ -1271,11 +1275,9 @@ retry: | |||
1271 | if (rover > -4097) | 1275 | if (rover > -4097) |
1272 | rover = -4097; | 1276 | rover = -4097; |
1273 | rcu_read_unlock(); | 1277 | rcu_read_unlock(); |
1274 | netlink_table_ungrab(); | ||
1275 | goto retry; | 1278 | goto retry; |
1276 | } | 1279 | } |
1277 | rcu_read_unlock(); | 1280 | rcu_read_unlock(); |
1278 | netlink_table_ungrab(); | ||
1279 | 1281 | ||
1280 | err = netlink_insert(sk, net, portid); | 1282 | err = netlink_insert(sk, net, portid); |
1281 | if (err == -EADDRINUSE) | 1283 | if (err == -EADDRINUSE) |
@@ -2910,9 +2912,8 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) | |||
2910 | } | 2912 | } |
2911 | 2913 | ||
2912 | static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) | 2914 | static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) |
2913 | __acquires(nl_table_lock) __acquires(RCU) | 2915 | __acquires(RCU) |
2914 | { | 2916 | { |
2915 | read_lock(&nl_table_lock); | ||
2916 | rcu_read_lock(); | 2917 | rcu_read_lock(); |
2917 | return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; | 2918 | return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; |
2918 | } | 2919 | } |
@@ -2964,10 +2965,9 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2964 | } | 2965 | } |
2965 | 2966 | ||
2966 | static void netlink_seq_stop(struct seq_file *seq, void *v) | 2967 | static void netlink_seq_stop(struct seq_file *seq, void *v) |
2967 | __releases(RCU) __releases(nl_table_lock) | 2968 | __releases(RCU) |
2968 | { | 2969 | { |
2969 | rcu_read_unlock(); | 2970 | rcu_read_unlock(); |
2970 | read_unlock(&nl_table_lock); | ||
2971 | } | 2971 | } |
2972 | 2972 | ||
2973 | 2973 | ||
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index f123a88496f8..fd96fa76202a 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h | |||
@@ -50,6 +50,7 @@ struct netlink_sock { | |||
50 | #endif /* CONFIG_NETLINK_MMAP */ | 50 | #endif /* CONFIG_NETLINK_MMAP */ |
51 | 51 | ||
52 | struct rhash_head node; | 52 | struct rhash_head node; |
53 | struct rcu_head rcu; | ||
53 | }; | 54 | }; |
54 | 55 | ||
55 | static inline struct netlink_sock *nlk_sk(struct sock *sk) | 56 | static inline struct netlink_sock *nlk_sk(struct sock *sk) |