diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2008-11-23 20:22:55 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-11-23 20:22:55 -0500 |
commit | c25eb3bfb97294d0543a81230fbc237046b4b84c (patch) | |
tree | 6c9deabfb12f4d31f280cfcfe7e7580a2089931c /net/ipv6 | |
parent | 8c862c23e2563e6aedfc6c4aa6827cadb83f2414 (diff) |
net: Convert TCP/DCCP listening hash tables to use RCU
This is the last step to be able to perform full RCU lookups
in __inet_lookup() : After established/timewait tables, we
add RCU lookups to listening hash table.
The only trick here is that a socket of a given type (TCP ipv4,
TCP ipv6, ...) can now flight between two different tables
(established and listening) during a RCU grace period, so we
must use different 'nulls' end-of-chain values for two tables.
We define a large value :
#define LISTENING_NULLS_BASE (1U << 29)
So that slots in listening table are guaranteed to have different
end-of-chain values than slots in established table. A reader can
still detect it finished its lookup in the right chain.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 94 |
1 files changed, 59 insertions, 35 deletions
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index e0fd68187f83..8fe267feb81e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c | |||
@@ -33,7 +33,7 @@ void __inet6_hash(struct sock *sk) | |||
33 | 33 | ||
34 | ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; | 34 | ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; |
35 | spin_lock(&ilb->lock); | 35 | spin_lock(&ilb->lock); |
36 | __sk_add_node(sk, &ilb->head); | 36 | __sk_nulls_add_node_rcu(sk, &ilb->head); |
37 | spin_unlock(&ilb->lock); | 37 | spin_unlock(&ilb->lock); |
38 | } else { | 38 | } else { |
39 | unsigned int hash; | 39 | unsigned int hash; |
@@ -118,47 +118,71 @@ out: | |||
118 | } | 118 | } |
119 | EXPORT_SYMBOL(__inet6_lookup_established); | 119 | EXPORT_SYMBOL(__inet6_lookup_established); |
120 | 120 | ||
121 | static int inline compute_score(struct sock *sk, struct net *net, | ||
122 | const unsigned short hnum, | ||
123 | const struct in6_addr *daddr, | ||
124 | const int dif) | ||
125 | { | ||
126 | int score = -1; | ||
127 | |||
128 | if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && | ||
129 | sk->sk_family == PF_INET6) { | ||
130 | const struct ipv6_pinfo *np = inet6_sk(sk); | ||
131 | |||
132 | score = 1; | ||
133 | if (!ipv6_addr_any(&np->rcv_saddr)) { | ||
134 | if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) | ||
135 | return -1; | ||
136 | score++; | ||
137 | } | ||
138 | if (sk->sk_bound_dev_if) { | ||
139 | if (sk->sk_bound_dev_if != dif) | ||
140 | return -1; | ||
141 | score++; | ||
142 | } | ||
143 | } | ||
144 | return score; | ||
145 | } | ||
146 | |||
121 | struct sock *inet6_lookup_listener(struct net *net, | 147 | struct sock *inet6_lookup_listener(struct net *net, |
122 | struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, | 148 | struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, |
123 | const unsigned short hnum, const int dif) | 149 | const unsigned short hnum, const int dif) |
124 | { | 150 | { |
125 | struct sock *sk; | 151 | struct sock *sk; |
126 | const struct hlist_node *node; | 152 | const struct hlist_nulls_node *node; |
127 | struct sock *result = NULL; | 153 | struct sock *result; |
128 | int score, hiscore = 0; | 154 | int score, hiscore; |
129 | struct inet_listen_hashbucket *ilb; | 155 | unsigned int hash = inet_lhashfn(net, hnum); |
130 | 156 | struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; | |
131 | ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; | 157 | |
132 | spin_lock(&ilb->lock); | 158 | rcu_read_lock(); |
133 | sk_for_each(sk, node, &ilb->head) { | 159 | begin: |
134 | if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && | 160 | result = NULL; |
135 | sk->sk_family == PF_INET6) { | 161 | hiscore = -1; |
136 | const struct ipv6_pinfo *np = inet6_sk(sk); | 162 | sk_nulls_for_each(sk, node, &ilb->head) { |
137 | 163 | score = compute_score(sk, net, hnum, daddr, dif); | |
138 | score = 1; | 164 | if (score > hiscore) { |
139 | if (!ipv6_addr_any(&np->rcv_saddr)) { | 165 | hiscore = score; |
140 | if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) | 166 | result = sk; |
141 | continue; | ||
142 | score++; | ||
143 | } | ||
144 | if (sk->sk_bound_dev_if) { | ||
145 | if (sk->sk_bound_dev_if != dif) | ||
146 | continue; | ||
147 | score++; | ||
148 | } | ||
149 | if (score == 3) { | ||
150 | result = sk; | ||
151 | break; | ||
152 | } | ||
153 | if (score > hiscore) { | ||
154 | hiscore = score; | ||
155 | result = sk; | ||
156 | } | ||
157 | } | 167 | } |
158 | } | 168 | } |
159 | if (result) | 169 | /* |
160 | sock_hold(result); | 170 | * if the nulls value we got at the end of this lookup is |
161 | spin_unlock(&ilb->lock); | 171 | * not the expected one, we must restart lookup. |
172 | * We probably met an item that was moved to another chain. | ||
173 | */ | ||
174 | if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) | ||
175 | goto begin; | ||
176 | if (result) { | ||
177 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) | ||
178 | result = NULL; | ||
179 | else if (unlikely(compute_score(result, net, hnum, daddr, | ||
180 | dif) < hiscore)) { | ||
181 | sock_put(result); | ||
182 | goto begin; | ||
183 | } | ||
184 | } | ||
185 | rcu_read_unlock(); | ||
162 | return result; | 186 | return result; |
163 | } | 187 | } |
164 | 188 | ||