aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-01-13 14:01:06 -0500
committerDavid S. Miller <davem@davemloft.net>2015-01-13 14:01:06 -0500
commit52e3ad9f011fe72620b2f7050227cd48fd295ad5 (patch)
tree97def40b133732cc55d2f15bfa8e6f652528dc4e
parentd2c60b1350c9a3eb7ed407c18f50306762365646 (diff)
parent6f73d3b13dc5e16ae06025cd1b12a36b2857caa2 (diff)
Merge branch 'rhashtable-next'
Ying Xue says: ==================== remove nl_sk_hash_lock from netlink socket After tipc socket successfully avoids the involvement of an extra lock with rhashtable_lookup_insert(), it's possible for netlink socket to remove its hash socket lock now. But as netlink socket needs a compare function to look for an object, we first introduce a new function called rhashtable_lookup_compare_insert() in commit #1 which is implemented based on original rhashtable_lookup_insert(). We subsequently remove nl_sk_hash_lock from netlink socket with the new introduced function in commit #2. Lastly, as Thomas requested, we add commit #3 to indicate the implementation of what the grow and shrink decision function must enforce min/max shift. v2: As Thomas pointed out, there was a race between checking portid and then setting it in commit #2. Now use socket lock to make the process of both checking and setting portid atomic, and then eliminate the race. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/rhashtable.h9
-rw-r--r--lib/rhashtable.c42
-rw-r--r--net/netlink/af_netlink.c33
-rw-r--r--net/netlink/af_netlink.h1
-rw-r--r--net/netlink/diag.c10
5 files changed, 74 insertions, 21 deletions
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 326acd8c2e9f..9570832ab07c 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -79,6 +79,10 @@ struct rhashtable;
79 * @obj_hashfn: Function to hash object 79 * @obj_hashfn: Function to hash object
80 * @grow_decision: If defined, may return true if table should expand 80 * @grow_decision: If defined, may return true if table should expand
81 * @shrink_decision: If defined, may return true if table should shrink 81 * @shrink_decision: If defined, may return true if table should shrink
82 *
83 * Note: when implementing the grow and shrink decision function, min/max
84 * shift must be enforced, otherwise, resizing watermarks they set may be
85 * useless.
82 */ 86 */
83struct rhashtable_params { 87struct rhashtable_params {
84 size_t nelem_hint; 88 size_t nelem_hint;
@@ -168,7 +172,12 @@ int rhashtable_shrink(struct rhashtable *ht);
168void *rhashtable_lookup(struct rhashtable *ht, const void *key); 172void *rhashtable_lookup(struct rhashtable *ht, const void *key);
169void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, 173void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
170 bool (*compare)(void *, void *), void *arg); 174 bool (*compare)(void *, void *), void *arg);
175
171bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj); 176bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj);
177bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
178 struct rhash_head *obj,
179 bool (*compare)(void *, void *),
180 void *arg);
172 181
173void rhashtable_destroy(struct rhashtable *ht); 182void rhashtable_destroy(struct rhashtable *ht);
174 183
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 8023b554905c..ed6ae1ad304c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -727,6 +727,43 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
727 */ 727 */
728bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) 728bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
729{ 729{
730 struct rhashtable_compare_arg arg = {
731 .ht = ht,
732 .key = rht_obj(ht, obj) + ht->p.key_offset,
733 };
734
735 BUG_ON(!ht->p.key_len);
736
737 return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare,
738 &arg);
739}
740EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
741
742/**
743 * rhashtable_lookup_compare_insert - search and insert object to hash table
744 * with compare function
745 * @ht: hash table
746 * @obj: pointer to hash head inside object
747 * @compare: compare function, must return true on match
748 * @arg: argument passed on to compare function
749 *
750 * Locks down the bucket chain in both the old and new table if a resize
751 * is in progress to ensure that writers can't remove from the old table
752 * and can't insert to the new table during the atomic operation of search
753 * and insertion. Searches for duplicates in both the old and new table if
754 * a resize is in progress.
755 *
756 * Lookups may occur in parallel with hashtable mutations and resizing.
757 *
758 * Will trigger an automatic deferred table resizing if the size grows
759 * beyond the watermark indicated by grow_decision() which can be passed
760 * to rhashtable_init().
761 */
762bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
763 struct rhash_head *obj,
764 bool (*compare)(void *, void *),
765 void *arg)
766{
730 struct bucket_table *new_tbl, *old_tbl; 767 struct bucket_table *new_tbl, *old_tbl;
731 spinlock_t *new_bucket_lock, *old_bucket_lock; 768 spinlock_t *new_bucket_lock, *old_bucket_lock;
732 u32 new_hash, old_hash; 769 u32 new_hash, old_hash;
@@ -747,7 +784,8 @@ bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
747 if (unlikely(old_tbl != new_tbl)) 784 if (unlikely(old_tbl != new_tbl))
748 spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED); 785 spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
749 786
750 if (rhashtable_lookup(ht, rht_obj(ht, obj) + ht->p.key_offset)) { 787 if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
788 compare, arg)) {
751 success = false; 789 success = false;
752 goto exit; 790 goto exit;
753 } 791 }
@@ -763,7 +801,7 @@ exit:
763 801
764 return success; 802 return success;
765} 803}
766EXPORT_SYMBOL_GPL(rhashtable_lookup_insert); 804EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert);
767 805
768static size_t rounded_hashtable_size(struct rhashtable_params *params) 806static size_t rounded_hashtable_size(struct rhashtable_params *params)
769{ 807{
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 298e1df7132a..01b702d63457 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -98,7 +98,7 @@ static void netlink_skb_destructor(struct sk_buff *skb);
98 98
99/* nl_table locking explained: 99/* nl_table locking explained:
100 * Lookup and traversal are protected with an RCU read-side lock. Insertion 100 * Lookup and traversal are protected with an RCU read-side lock. Insertion
101 * and removal are protected with nl_sk_hash_lock while using RCU list 101 * and removal are protected with per bucket lock while using RCU list
102 * modification primitives and may run in parallel to RCU protected lookups. 102 * modification primitives and may run in parallel to RCU protected lookups.
103 * Destruction of the Netlink socket may only occur *after* nl_table_lock has 103 * Destruction of the Netlink socket may only occur *after* nl_table_lock has
104 * been acquired * either during or after the socket has been removed from 104 * been acquired * either during or after the socket has been removed from
@@ -110,10 +110,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
110 110
111#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); 111#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
112 112
113/* Protects netlink socket hash table mutations */
114DEFINE_MUTEX(nl_sk_hash_lock);
115EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
116
117static ATOMIC_NOTIFIER_HEAD(netlink_chain); 113static ATOMIC_NOTIFIER_HEAD(netlink_chain);
118 114
119static DEFINE_SPINLOCK(netlink_tap_lock); 115static DEFINE_SPINLOCK(netlink_tap_lock);
@@ -998,6 +994,19 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
998 &netlink_compare, &arg); 994 &netlink_compare, &arg);
999} 995}
1000 996
997static bool __netlink_insert(struct netlink_table *table, struct sock *sk,
998 struct net *net)
999{
1000 struct netlink_compare_arg arg = {
1001 .net = net,
1002 .portid = nlk_sk(sk)->portid,
1003 };
1004
1005 return rhashtable_lookup_compare_insert(&table->hash,
1006 &nlk_sk(sk)->node,
1007 &netlink_compare, &arg);
1008}
1009
1001static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) 1010static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
1002{ 1011{
1003 struct netlink_table *table = &nl_table[protocol]; 1012 struct netlink_table *table = &nl_table[protocol];
@@ -1043,9 +1052,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
1043 struct netlink_table *table = &nl_table[sk->sk_protocol]; 1052 struct netlink_table *table = &nl_table[sk->sk_protocol];
1044 int err = -EADDRINUSE; 1053 int err = -EADDRINUSE;
1045 1054
1046 mutex_lock(&nl_sk_hash_lock); 1055 lock_sock(sk);
1047 if (__netlink_lookup(table, portid, net))
1048 goto err;
1049 1056
1050 err = -EBUSY; 1057 err = -EBUSY;
1051 if (nlk_sk(sk)->portid) 1058 if (nlk_sk(sk)->portid)
@@ -1058,10 +1065,12 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
1058 1065
1059 nlk_sk(sk)->portid = portid; 1066 nlk_sk(sk)->portid = portid;
1060 sock_hold(sk); 1067 sock_hold(sk);
1061 rhashtable_insert(&table->hash, &nlk_sk(sk)->node); 1068 if (__netlink_insert(table, sk, net))
1062 err = 0; 1069 err = 0;
1070 else
1071 sock_put(sk);
1063err: 1072err:
1064 mutex_unlock(&nl_sk_hash_lock); 1073 release_sock(sk);
1065 return err; 1074 return err;
1066} 1075}
1067 1076
@@ -1069,13 +1078,11 @@ static void netlink_remove(struct sock *sk)
1069{ 1078{
1070 struct netlink_table *table; 1079 struct netlink_table *table;
1071 1080
1072 mutex_lock(&nl_sk_hash_lock);
1073 table = &nl_table[sk->sk_protocol]; 1081 table = &nl_table[sk->sk_protocol];
1074 if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { 1082 if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) {
1075 WARN_ON(atomic_read(&sk->sk_refcnt) == 1); 1083 WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
1076 __sock_put(sk); 1084 __sock_put(sk);
1077 } 1085 }
1078 mutex_unlock(&nl_sk_hash_lock);
1079 1086
1080 netlink_table_grab(); 1087 netlink_table_grab();
1081 if (nlk_sk(sk)->subscriptions) { 1088 if (nlk_sk(sk)->subscriptions) {
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index fd96fa76202a..7518375782f5 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -74,6 +74,5 @@ struct netlink_table {
74 74
75extern struct netlink_table *nl_table; 75extern struct netlink_table *nl_table;
76extern rwlock_t nl_table_lock; 76extern rwlock_t nl_table_lock;
77extern struct mutex nl_sk_hash_lock;
78 77
79#endif 78#endif
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index fcca36d81a62..bb59a7ed0859 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -103,7 +103,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
103{ 103{
104 struct netlink_table *tbl = &nl_table[protocol]; 104 struct netlink_table *tbl = &nl_table[protocol];
105 struct rhashtable *ht = &tbl->hash; 105 struct rhashtable *ht = &tbl->hash;
106 const struct bucket_table *htbl = rht_dereference(ht->tbl, ht); 106 const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht);
107 struct net *net = sock_net(skb->sk); 107 struct net *net = sock_net(skb->sk);
108 struct netlink_diag_req *req; 108 struct netlink_diag_req *req;
109 struct netlink_sock *nlsk; 109 struct netlink_sock *nlsk;
@@ -115,7 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
115 for (i = 0; i < htbl->size; i++) { 115 for (i = 0; i < htbl->size; i++) {
116 struct rhash_head *pos; 116 struct rhash_head *pos;
117 117
118 rht_for_each_entry(nlsk, pos, htbl, i, node) { 118 rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) {
119 sk = (struct sock *)nlsk; 119 sk = (struct sock *)nlsk;
120 120
121 if (!net_eq(sock_net(sk), net)) 121 if (!net_eq(sock_net(sk), net))
@@ -172,7 +172,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
172 172
173 req = nlmsg_data(cb->nlh); 173 req = nlmsg_data(cb->nlh);
174 174
175 mutex_lock(&nl_sk_hash_lock); 175 rcu_read_lock();
176 read_lock(&nl_table_lock); 176 read_lock(&nl_table_lock);
177 177
178 if (req->sdiag_protocol == NDIAG_PROTO_ALL) { 178 if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
@@ -186,7 +186,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
186 } else { 186 } else {
187 if (req->sdiag_protocol >= MAX_LINKS) { 187 if (req->sdiag_protocol >= MAX_LINKS) {
188 read_unlock(&nl_table_lock); 188 read_unlock(&nl_table_lock);
189 mutex_unlock(&nl_sk_hash_lock); 189 rcu_read_unlock();
190 return -ENOENT; 190 return -ENOENT;
191 } 191 }
192 192
@@ -194,7 +194,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
194 } 194 }
195 195
196 read_unlock(&nl_table_lock); 196 read_unlock(&nl_table_lock);
197 mutex_unlock(&nl_sk_hash_lock); 197 rcu_read_unlock();
198 198
199 return skb->len; 199 return skb->len;
200} 200}