diff options
author | David S. Miller <davem@davemloft.net> | 2015-01-13 14:01:06 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-01-13 14:01:06 -0500 |
commit | 52e3ad9f011fe72620b2f7050227cd48fd295ad5 (patch) | |
tree | 97def40b133732cc55d2f15bfa8e6f652528dc4e | |
parent | d2c60b1350c9a3eb7ed407c18f50306762365646 (diff) | |
parent | 6f73d3b13dc5e16ae06025cd1b12a36b2857caa2 (diff) |
Merge branch 'rhashtable-next'
Ying Xue says:
====================
remove nl_sk_hash_lock from netlink socket
After tipc socket successfully avoids the involvement of an extra lock
with rhashtable_lookup_insert(), it's possible for netlink socket to
remove its hash socket lock now. But as netlink socket needs a compare
function to look for an object, we first introduce a new function
called rhashtable_lookup_compare_insert() in commit #1 which is
implemented based on original rhashtable_lookup_insert(). We
subsequently remove nl_sk_hash_lock from netlink socket with the new
introduced function in commit #2. Lastly, as Thomas requested, we add
commit #3 to indicate the implementation of what the grow and shrink
decision function must enforce min/max shift.
v2:
As Thomas pointed out, there was a race between checking portid and
then setting it in commit #2. Now use socket lock to make the process
of both checking and setting portid atomic, and then eliminate the
race.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/rhashtable.h | 9 | ||||
-rw-r--r-- | lib/rhashtable.c | 42 | ||||
-rw-r--r-- | net/netlink/af_netlink.c | 33 | ||||
-rw-r--r-- | net/netlink/af_netlink.h | 1 | ||||
-rw-r--r-- | net/netlink/diag.c | 10 |
5 files changed, 74 insertions, 21 deletions
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 326acd8c2e9f..9570832ab07c 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h | |||
@@ -79,6 +79,10 @@ struct rhashtable; | |||
79 | * @obj_hashfn: Function to hash object | 79 | * @obj_hashfn: Function to hash object |
80 | * @grow_decision: If defined, may return true if table should expand | 80 | * @grow_decision: If defined, may return true if table should expand |
81 | * @shrink_decision: If defined, may return true if table should shrink | 81 | * @shrink_decision: If defined, may return true if table should shrink |
82 | * | ||
83 | * Note: when implementing the grow and shrink decision function, min/max | ||
84 | * shift must be enforced, otherwise, resizing watermarks they set may be | ||
85 | * useless. | ||
82 | */ | 86 | */ |
83 | struct rhashtable_params { | 87 | struct rhashtable_params { |
84 | size_t nelem_hint; | 88 | size_t nelem_hint; |
@@ -168,7 +172,12 @@ int rhashtable_shrink(struct rhashtable *ht); | |||
168 | void *rhashtable_lookup(struct rhashtable *ht, const void *key); | 172 | void *rhashtable_lookup(struct rhashtable *ht, const void *key); |
169 | void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, | 173 | void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, |
170 | bool (*compare)(void *, void *), void *arg); | 174 | bool (*compare)(void *, void *), void *arg); |
175 | |||
171 | bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj); | 176 | bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj); |
177 | bool rhashtable_lookup_compare_insert(struct rhashtable *ht, | ||
178 | struct rhash_head *obj, | ||
179 | bool (*compare)(void *, void *), | ||
180 | void *arg); | ||
172 | 181 | ||
173 | void rhashtable_destroy(struct rhashtable *ht); | 182 | void rhashtable_destroy(struct rhashtable *ht); |
174 | 183 | ||
diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 8023b554905c..ed6ae1ad304c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c | |||
@@ -727,6 +727,43 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); | |||
727 | */ | 727 | */ |
728 | bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) | 728 | bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) |
729 | { | 729 | { |
730 | struct rhashtable_compare_arg arg = { | ||
731 | .ht = ht, | ||
732 | .key = rht_obj(ht, obj) + ht->p.key_offset, | ||
733 | }; | ||
734 | |||
735 | BUG_ON(!ht->p.key_len); | ||
736 | |||
737 | return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare, | ||
738 | &arg); | ||
739 | } | ||
740 | EXPORT_SYMBOL_GPL(rhashtable_lookup_insert); | ||
741 | |||
742 | /** | ||
743 | * rhashtable_lookup_compare_insert - search and insert object to hash table | ||
744 | * with compare function | ||
745 | * @ht: hash table | ||
746 | * @obj: pointer to hash head inside object | ||
747 | * @compare: compare function, must return true on match | ||
748 | * @arg: argument passed on to compare function | ||
749 | * | ||
750 | * Locks down the bucket chain in both the old and new table if a resize | ||
751 | * is in progress to ensure that writers can't remove from the old table | ||
752 | * and can't insert to the new table during the atomic operation of search | ||
753 | * and insertion. Searches for duplicates in both the old and new table if | ||
754 | * a resize is in progress. | ||
755 | * | ||
756 | * Lookups may occur in parallel with hashtable mutations and resizing. | ||
757 | * | ||
758 | * Will trigger an automatic deferred table resizing if the size grows | ||
759 | * beyond the watermark indicated by grow_decision() which can be passed | ||
760 | * to rhashtable_init(). | ||
761 | */ | ||
762 | bool rhashtable_lookup_compare_insert(struct rhashtable *ht, | ||
763 | struct rhash_head *obj, | ||
764 | bool (*compare)(void *, void *), | ||
765 | void *arg) | ||
766 | { | ||
730 | struct bucket_table *new_tbl, *old_tbl; | 767 | struct bucket_table *new_tbl, *old_tbl; |
731 | spinlock_t *new_bucket_lock, *old_bucket_lock; | 768 | spinlock_t *new_bucket_lock, *old_bucket_lock; |
732 | u32 new_hash, old_hash; | 769 | u32 new_hash, old_hash; |
@@ -747,7 +784,8 @@ bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) | |||
747 | if (unlikely(old_tbl != new_tbl)) | 784 | if (unlikely(old_tbl != new_tbl)) |
748 | spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED); | 785 | spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED); |
749 | 786 | ||
750 | if (rhashtable_lookup(ht, rht_obj(ht, obj) + ht->p.key_offset)) { | 787 | if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset, |
788 | compare, arg)) { | ||
751 | success = false; | 789 | success = false; |
752 | goto exit; | 790 | goto exit; |
753 | } | 791 | } |
@@ -763,7 +801,7 @@ exit: | |||
763 | 801 | ||
764 | return success; | 802 | return success; |
765 | } | 803 | } |
766 | EXPORT_SYMBOL_GPL(rhashtable_lookup_insert); | 804 | EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert); |
767 | 805 | ||
768 | static size_t rounded_hashtable_size(struct rhashtable_params *params) | 806 | static size_t rounded_hashtable_size(struct rhashtable_params *params) |
769 | { | 807 | { |
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 298e1df7132a..01b702d63457 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -98,7 +98,7 @@ static void netlink_skb_destructor(struct sk_buff *skb); | |||
98 | 98 | ||
99 | /* nl_table locking explained: | 99 | /* nl_table locking explained: |
100 | * Lookup and traversal are protected with an RCU read-side lock. Insertion | 100 | * Lookup and traversal are protected with an RCU read-side lock. Insertion |
101 | * and removal are protected with nl_sk_hash_lock while using RCU list | 101 | * and removal are protected with per bucket lock while using RCU list |
102 | * modification primitives and may run in parallel to RCU protected lookups. | 102 | * modification primitives and may run in parallel to RCU protected lookups. |
103 | * Destruction of the Netlink socket may only occur *after* nl_table_lock has | 103 | * Destruction of the Netlink socket may only occur *after* nl_table_lock has |
104 | * been acquired * either during or after the socket has been removed from | 104 | * been acquired * either during or after the socket has been removed from |
@@ -110,10 +110,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); | |||
110 | 110 | ||
111 | #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); | 111 | #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); |
112 | 112 | ||
113 | /* Protects netlink socket hash table mutations */ | ||
114 | DEFINE_MUTEX(nl_sk_hash_lock); | ||
115 | EXPORT_SYMBOL_GPL(nl_sk_hash_lock); | ||
116 | |||
117 | static ATOMIC_NOTIFIER_HEAD(netlink_chain); | 113 | static ATOMIC_NOTIFIER_HEAD(netlink_chain); |
118 | 114 | ||
119 | static DEFINE_SPINLOCK(netlink_tap_lock); | 115 | static DEFINE_SPINLOCK(netlink_tap_lock); |
@@ -998,6 +994,19 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, | |||
998 | &netlink_compare, &arg); | 994 | &netlink_compare, &arg); |
999 | } | 995 | } |
1000 | 996 | ||
997 | static bool __netlink_insert(struct netlink_table *table, struct sock *sk, | ||
998 | struct net *net) | ||
999 | { | ||
1000 | struct netlink_compare_arg arg = { | ||
1001 | .net = net, | ||
1002 | .portid = nlk_sk(sk)->portid, | ||
1003 | }; | ||
1004 | |||
1005 | return rhashtable_lookup_compare_insert(&table->hash, | ||
1006 | &nlk_sk(sk)->node, | ||
1007 | &netlink_compare, &arg); | ||
1008 | } | ||
1009 | |||
1001 | static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) | 1010 | static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) |
1002 | { | 1011 | { |
1003 | struct netlink_table *table = &nl_table[protocol]; | 1012 | struct netlink_table *table = &nl_table[protocol]; |
@@ -1043,9 +1052,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) | |||
1043 | struct netlink_table *table = &nl_table[sk->sk_protocol]; | 1052 | struct netlink_table *table = &nl_table[sk->sk_protocol]; |
1044 | int err = -EADDRINUSE; | 1053 | int err = -EADDRINUSE; |
1045 | 1054 | ||
1046 | mutex_lock(&nl_sk_hash_lock); | 1055 | lock_sock(sk); |
1047 | if (__netlink_lookup(table, portid, net)) | ||
1048 | goto err; | ||
1049 | 1056 | ||
1050 | err = -EBUSY; | 1057 | err = -EBUSY; |
1051 | if (nlk_sk(sk)->portid) | 1058 | if (nlk_sk(sk)->portid) |
@@ -1058,10 +1065,12 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) | |||
1058 | 1065 | ||
1059 | nlk_sk(sk)->portid = portid; | 1066 | nlk_sk(sk)->portid = portid; |
1060 | sock_hold(sk); | 1067 | sock_hold(sk); |
1061 | rhashtable_insert(&table->hash, &nlk_sk(sk)->node); | 1068 | if (__netlink_insert(table, sk, net)) |
1062 | err = 0; | 1069 | err = 0; |
1070 | else | ||
1071 | sock_put(sk); | ||
1063 | err: | 1072 | err: |
1064 | mutex_unlock(&nl_sk_hash_lock); | 1073 | release_sock(sk); |
1065 | return err; | 1074 | return err; |
1066 | } | 1075 | } |
1067 | 1076 | ||
@@ -1069,13 +1078,11 @@ static void netlink_remove(struct sock *sk) | |||
1069 | { | 1078 | { |
1070 | struct netlink_table *table; | 1079 | struct netlink_table *table; |
1071 | 1080 | ||
1072 | mutex_lock(&nl_sk_hash_lock); | ||
1073 | table = &nl_table[sk->sk_protocol]; | 1081 | table = &nl_table[sk->sk_protocol]; |
1074 | if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { | 1082 | if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { |
1075 | WARN_ON(atomic_read(&sk->sk_refcnt) == 1); | 1083 | WARN_ON(atomic_read(&sk->sk_refcnt) == 1); |
1076 | __sock_put(sk); | 1084 | __sock_put(sk); |
1077 | } | 1085 | } |
1078 | mutex_unlock(&nl_sk_hash_lock); | ||
1079 | 1086 | ||
1080 | netlink_table_grab(); | 1087 | netlink_table_grab(); |
1081 | if (nlk_sk(sk)->subscriptions) { | 1088 | if (nlk_sk(sk)->subscriptions) { |
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index fd96fa76202a..7518375782f5 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h | |||
@@ -74,6 +74,5 @@ struct netlink_table { | |||
74 | 74 | ||
75 | extern struct netlink_table *nl_table; | 75 | extern struct netlink_table *nl_table; |
76 | extern rwlock_t nl_table_lock; | 76 | extern rwlock_t nl_table_lock; |
77 | extern struct mutex nl_sk_hash_lock; | ||
78 | 77 | ||
79 | #endif | 78 | #endif |
diff --git a/net/netlink/diag.c b/net/netlink/diag.c index fcca36d81a62..bb59a7ed0859 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c | |||
@@ -103,7 +103,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, | |||
103 | { | 103 | { |
104 | struct netlink_table *tbl = &nl_table[protocol]; | 104 | struct netlink_table *tbl = &nl_table[protocol]; |
105 | struct rhashtable *ht = &tbl->hash; | 105 | struct rhashtable *ht = &tbl->hash; |
106 | const struct bucket_table *htbl = rht_dereference(ht->tbl, ht); | 106 | const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht); |
107 | struct net *net = sock_net(skb->sk); | 107 | struct net *net = sock_net(skb->sk); |
108 | struct netlink_diag_req *req; | 108 | struct netlink_diag_req *req; |
109 | struct netlink_sock *nlsk; | 109 | struct netlink_sock *nlsk; |
@@ -115,7 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, | |||
115 | for (i = 0; i < htbl->size; i++) { | 115 | for (i = 0; i < htbl->size; i++) { |
116 | struct rhash_head *pos; | 116 | struct rhash_head *pos; |
117 | 117 | ||
118 | rht_for_each_entry(nlsk, pos, htbl, i, node) { | 118 | rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) { |
119 | sk = (struct sock *)nlsk; | 119 | sk = (struct sock *)nlsk; |
120 | 120 | ||
121 | if (!net_eq(sock_net(sk), net)) | 121 | if (!net_eq(sock_net(sk), net)) |
@@ -172,7 +172,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
172 | 172 | ||
173 | req = nlmsg_data(cb->nlh); | 173 | req = nlmsg_data(cb->nlh); |
174 | 174 | ||
175 | mutex_lock(&nl_sk_hash_lock); | 175 | rcu_read_lock(); |
176 | read_lock(&nl_table_lock); | 176 | read_lock(&nl_table_lock); |
177 | 177 | ||
178 | if (req->sdiag_protocol == NDIAG_PROTO_ALL) { | 178 | if (req->sdiag_protocol == NDIAG_PROTO_ALL) { |
@@ -186,7 +186,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
186 | } else { | 186 | } else { |
187 | if (req->sdiag_protocol >= MAX_LINKS) { | 187 | if (req->sdiag_protocol >= MAX_LINKS) { |
188 | read_unlock(&nl_table_lock); | 188 | read_unlock(&nl_table_lock); |
189 | mutex_unlock(&nl_sk_hash_lock); | 189 | rcu_read_unlock(); |
190 | return -ENOENT; | 190 | return -ENOENT; |
191 | } | 191 | } |
192 | 192 | ||
@@ -194,7 +194,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
194 | } | 194 | } |
195 | 195 | ||
196 | read_unlock(&nl_table_lock); | 196 | read_unlock(&nl_table_lock); |
197 | mutex_unlock(&nl_sk_hash_lock); | 197 | rcu_read_unlock(); |
198 | 198 | ||
199 | return skb->len; | 199 | return skb->len; |
200 | } | 200 | } |