diff options
author | David S. Miller <davem@sunset.davemloft.net> | 2007-06-05 18:18:43 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-06-07 16:40:50 -0400 |
commit | df2bc459a3ad71f8b44c358bf7169acf9caf4acd (patch) | |
tree | 143206b16d59c723be3c2cf2375a33b2a9306280 /net/ipv4/udp.c | |
parent | 3c0d2f3780fc94746c4842e965bd2570e2119bb6 (diff) |
[UDP]: Revert 2-pass hashing changes.
This reverts changesets:
6aaf47fa48d3c44280810b1b470261d340e4ed87
b7b5f487ab39bc10ed0694af35651a03d9cb97ff
de34ed91c4ffa4727964a832c46e624dd1495cf5
fc038410b4b1643766f8033f4940bcdb1dace633
There are still some correctness issues recently
discovered which do not have a known fix that doesn't
involve doing a full hash table scan on port bind.
So revert for now.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r-- | net/ipv4/udp.c | 241 |
1 files changed, 68 insertions, 173 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5da703e699da..facb7e29304e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -114,36 +114,14 @@ DEFINE_RWLOCK(udp_hash_lock); | |||
114 | 114 | ||
115 | static int udp_port_rover; | 115 | static int udp_port_rover; |
116 | 116 | ||
117 | /* | 117 | static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) |
118 | * Note about this hash function : | ||
119 | * Typical use is probably daddr = 0, only dport is going to vary hash | ||
120 | */ | ||
121 | static inline unsigned int udp_hash_port(__u16 port) | ||
122 | { | ||
123 | return port; | ||
124 | } | ||
125 | |||
126 | static inline int __udp_lib_port_inuse(unsigned int hash, int port, | ||
127 | const struct sock *this_sk, | ||
128 | struct hlist_head udptable[], | ||
129 | const struct udp_get_port_ops *ops) | ||
130 | { | 118 | { |
131 | struct sock *sk; | 119 | struct sock *sk; |
132 | struct hlist_node *node; | 120 | struct hlist_node *node; |
133 | struct inet_sock *inet; | ||
134 | 121 | ||
135 | sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { | 122 | sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) |
136 | if (sk->sk_hash != hash) | 123 | if (sk->sk_hash == num) |
137 | continue; | ||
138 | inet = inet_sk(sk); | ||
139 | if (inet->num != port) | ||
140 | continue; | ||
141 | if (this_sk) { | ||
142 | if (ops->saddr_cmp(sk, this_sk)) | ||
143 | return 1; | ||
144 | } else if (ops->saddr_any(sk)) | ||
145 | return 1; | 124 | return 1; |
146 | } | ||
147 | return 0; | 125 | return 0; |
148 | } | 126 | } |
149 | 127 | ||
@@ -154,16 +132,16 @@ static inline int __udp_lib_port_inuse(unsigned int hash, int port, | |||
154 | * @snum: port number to look up | 132 | * @snum: port number to look up |
155 | * @udptable: hash list table, must be of UDP_HTABLE_SIZE | 133 | * @udptable: hash list table, must be of UDP_HTABLE_SIZE |
156 | * @port_rover: pointer to record of last unallocated port | 134 | * @port_rover: pointer to record of last unallocated port |
157 | * @ops: AF-dependent address operations | 135 | * @saddr_comp: AF-dependent comparison of bound local IP addresses |
158 | */ | 136 | */ |
159 | int __udp_lib_get_port(struct sock *sk, unsigned short snum, | 137 | int __udp_lib_get_port(struct sock *sk, unsigned short snum, |
160 | struct hlist_head udptable[], int *port_rover, | 138 | struct hlist_head udptable[], int *port_rover, |
161 | const struct udp_get_port_ops *ops) | 139 | int (*saddr_comp)(const struct sock *sk1, |
140 | const struct sock *sk2 ) ) | ||
162 | { | 141 | { |
163 | struct hlist_node *node; | 142 | struct hlist_node *node; |
164 | struct hlist_head *head; | 143 | struct hlist_head *head; |
165 | struct sock *sk2; | 144 | struct sock *sk2; |
166 | unsigned int hash; | ||
167 | int error = 1; | 145 | int error = 1; |
168 | 146 | ||
169 | write_lock_bh(&udp_hash_lock); | 147 | write_lock_bh(&udp_hash_lock); |
@@ -178,8 +156,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
178 | for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { | 156 | for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { |
179 | int size; | 157 | int size; |
180 | 158 | ||
181 | hash = ops->hash_port_and_rcv_saddr(result, sk); | 159 | head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; |
182 | head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; | ||
183 | if (hlist_empty(head)) { | 160 | if (hlist_empty(head)) { |
184 | if (result > sysctl_local_port_range[1]) | 161 | if (result > sysctl_local_port_range[1]) |
185 | result = sysctl_local_port_range[0] + | 162 | result = sysctl_local_port_range[0] + |
@@ -204,16 +181,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
204 | result = sysctl_local_port_range[0] | 181 | result = sysctl_local_port_range[0] |
205 | + ((result - sysctl_local_port_range[0]) & | 182 | + ((result - sysctl_local_port_range[0]) & |
206 | (UDP_HTABLE_SIZE - 1)); | 183 | (UDP_HTABLE_SIZE - 1)); |
207 | hash = udp_hash_port(result); | 184 | if (! __udp_lib_lport_inuse(result, udptable)) |
208 | if (__udp_lib_port_inuse(hash, result, | ||
209 | NULL, udptable, ops)) | ||
210 | continue; | ||
211 | if (ops->saddr_any(sk)) | ||
212 | break; | ||
213 | |||
214 | hash = ops->hash_port_and_rcv_saddr(result, sk); | ||
215 | if (! __udp_lib_port_inuse(hash, result, | ||
216 | sk, udptable, ops)) | ||
217 | break; | 185 | break; |
218 | } | 186 | } |
219 | if (i >= (1 << 16) / UDP_HTABLE_SIZE) | 187 | if (i >= (1 << 16) / UDP_HTABLE_SIZE) |
@@ -221,40 +189,21 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
221 | gotit: | 189 | gotit: |
222 | *port_rover = snum = result; | 190 | *port_rover = snum = result; |
223 | } else { | 191 | } else { |
224 | hash = udp_hash_port(snum); | 192 | head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; |
225 | head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; | ||
226 | 193 | ||
227 | sk_for_each(sk2, node, head) | 194 | sk_for_each(sk2, node, head) |
228 | if (sk2->sk_hash == hash && | 195 | if (sk2->sk_hash == snum && |
229 | sk2 != sk && | 196 | sk2 != sk && |
230 | inet_sk(sk2)->num == snum && | 197 | (!sk2->sk_reuse || !sk->sk_reuse) && |
231 | (!sk2->sk_reuse || !sk->sk_reuse) && | 198 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if |
232 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || | 199 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && |
233 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | 200 | (*saddr_comp)(sk, sk2) ) |
234 | ops->saddr_cmp(sk, sk2)) | ||
235 | goto fail; | 201 | goto fail; |
236 | |||
237 | if (!ops->saddr_any(sk)) { | ||
238 | hash = ops->hash_port_and_rcv_saddr(snum, sk); | ||
239 | head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; | ||
240 | |||
241 | sk_for_each(sk2, node, head) | ||
242 | if (sk2->sk_hash == hash && | ||
243 | sk2 != sk && | ||
244 | inet_sk(sk2)->num == snum && | ||
245 | (!sk2->sk_reuse || !sk->sk_reuse) && | ||
246 | (!sk2->sk_bound_dev_if || | ||
247 | !sk->sk_bound_dev_if || | ||
248 | sk2->sk_bound_dev_if == | ||
249 | sk->sk_bound_dev_if) && | ||
250 | ops->saddr_cmp(sk, sk2)) | ||
251 | goto fail; | ||
252 | } | ||
253 | } | 202 | } |
254 | inet_sk(sk)->num = snum; | 203 | inet_sk(sk)->num = snum; |
255 | sk->sk_hash = hash; | 204 | sk->sk_hash = snum; |
256 | if (sk_unhashed(sk)) { | 205 | if (sk_unhashed(sk)) { |
257 | head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; | 206 | head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; |
258 | sk_add_node(sk, head); | 207 | sk_add_node(sk, head); |
259 | sock_prot_inc_use(sk->sk_prot); | 208 | sock_prot_inc_use(sk->sk_prot); |
260 | } | 209 | } |
@@ -265,12 +214,12 @@ fail: | |||
265 | } | 214 | } |
266 | 215 | ||
267 | int udp_get_port(struct sock *sk, unsigned short snum, | 216 | int udp_get_port(struct sock *sk, unsigned short snum, |
268 | const struct udp_get_port_ops *ops) | 217 | int (*scmp)(const struct sock *, const struct sock *)) |
269 | { | 218 | { |
270 | return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, ops); | 219 | return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); |
271 | } | 220 | } |
272 | 221 | ||
273 | static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | 222 | int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) |
274 | { | 223 | { |
275 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | 224 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); |
276 | 225 | ||
@@ -279,33 +228,9 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | |||
279 | inet1->rcv_saddr == inet2->rcv_saddr )); | 228 | inet1->rcv_saddr == inet2->rcv_saddr )); |
280 | } | 229 | } |
281 | 230 | ||
282 | static int ipv4_rcv_saddr_any(const struct sock *sk) | ||
283 | { | ||
284 | return !inet_sk(sk)->rcv_saddr; | ||
285 | } | ||
286 | |||
287 | static inline unsigned int ipv4_hash_port_and_addr(__u16 port, __be32 addr) | ||
288 | { | ||
289 | addr ^= addr >> 16; | ||
290 | addr ^= addr >> 8; | ||
291 | return port ^ addr; | ||
292 | } | ||
293 | |||
294 | static unsigned int ipv4_hash_port_and_rcv_saddr(__u16 port, | ||
295 | const struct sock *sk) | ||
296 | { | ||
297 | return ipv4_hash_port_and_addr(port, inet_sk(sk)->rcv_saddr); | ||
298 | } | ||
299 | |||
300 | const struct udp_get_port_ops udp_ipv4_ops = { | ||
301 | .saddr_cmp = ipv4_rcv_saddr_equal, | ||
302 | .saddr_any = ipv4_rcv_saddr_any, | ||
303 | .hash_port_and_rcv_saddr = ipv4_hash_port_and_rcv_saddr, | ||
304 | }; | ||
305 | |||
306 | static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) | 231 | static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) |
307 | { | 232 | { |
308 | return udp_get_port(sk, snum, &udp_ipv4_ops); | 233 | return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); |
309 | } | 234 | } |
310 | 235 | ||
311 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | 236 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try |
@@ -317,77 +242,63 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, | |||
317 | { | 242 | { |
318 | struct sock *sk, *result = NULL; | 243 | struct sock *sk, *result = NULL; |
319 | struct hlist_node *node; | 244 | struct hlist_node *node; |
320 | unsigned int hash, hashwild; | 245 | unsigned short hnum = ntohs(dport); |
321 | int score, best = -1, hport = ntohs(dport); | 246 | int badness = -1; |
322 | |||
323 | hash = ipv4_hash_port_and_addr(hport, daddr); | ||
324 | hashwild = udp_hash_port(hport); | ||
325 | 247 | ||
326 | read_lock(&udp_hash_lock); | 248 | read_lock(&udp_hash_lock); |
327 | 249 | sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { | |
328 | lookup: | ||
329 | |||
330 | sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { | ||
331 | struct inet_sock *inet = inet_sk(sk); | 250 | struct inet_sock *inet = inet_sk(sk); |
332 | 251 | ||
333 | if (sk->sk_hash != hash || ipv6_only_sock(sk) || | 252 | if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { |
334 | inet->num != hport) | 253 | int score = (sk->sk_family == PF_INET ? 1 : 0); |
335 | continue; | 254 | if (inet->rcv_saddr) { |
336 | 255 | if (inet->rcv_saddr != daddr) | |
337 | score = (sk->sk_family == PF_INET ? 1 : 0); | 256 | continue; |
338 | if (inet->rcv_saddr) { | 257 | score+=2; |
339 | if (inet->rcv_saddr != daddr) | 258 | } |
340 | continue; | 259 | if (inet->daddr) { |
341 | score+=2; | 260 | if (inet->daddr != saddr) |
342 | } | 261 | continue; |
343 | if (inet->daddr) { | 262 | score+=2; |
344 | if (inet->daddr != saddr) | 263 | } |
345 | continue; | 264 | if (inet->dport) { |
346 | score+=2; | 265 | if (inet->dport != sport) |
347 | } | 266 | continue; |
348 | if (inet->dport) { | 267 | score+=2; |
349 | if (inet->dport != sport) | 268 | } |
350 | continue; | 269 | if (sk->sk_bound_dev_if) { |
351 | score+=2; | 270 | if (sk->sk_bound_dev_if != dif) |
352 | } | 271 | continue; |
353 | if (sk->sk_bound_dev_if) { | 272 | score+=2; |
354 | if (sk->sk_bound_dev_if != dif) | 273 | } |
355 | continue; | 274 | if (score == 9) { |
356 | score+=2; | 275 | result = sk; |
357 | } | 276 | break; |
358 | if (score == 9) { | 277 | } else if (score > badness) { |
359 | result = sk; | 278 | result = sk; |
360 | goto found; | 279 | badness = score; |
361 | } else if (score > best) { | 280 | } |
362 | result = sk; | ||
363 | best = score; | ||
364 | } | 281 | } |
365 | } | 282 | } |
366 | |||
367 | if (hash != hashwild) { | ||
368 | hash = hashwild; | ||
369 | goto lookup; | ||
370 | } | ||
371 | found: | ||
372 | if (result) | 283 | if (result) |
373 | sock_hold(result); | 284 | sock_hold(result); |
374 | read_unlock(&udp_hash_lock); | 285 | read_unlock(&udp_hash_lock); |
375 | return result; | 286 | return result; |
376 | } | 287 | } |
377 | 288 | ||
378 | static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned int hnum, | 289 | static inline struct sock *udp_v4_mcast_next(struct sock *sk, |
379 | int hport, __be32 loc_addr, | 290 | __be16 loc_port, __be32 loc_addr, |
380 | __be16 rmt_port, __be32 rmt_addr, | 291 | __be16 rmt_port, __be32 rmt_addr, |
381 | int dif) | 292 | int dif) |
382 | { | 293 | { |
383 | struct hlist_node *node; | 294 | struct hlist_node *node; |
384 | struct sock *s = sk; | 295 | struct sock *s = sk; |
296 | unsigned short hnum = ntohs(loc_port); | ||
385 | 297 | ||
386 | sk_for_each_from(s, node) { | 298 | sk_for_each_from(s, node) { |
387 | struct inet_sock *inet = inet_sk(s); | 299 | struct inet_sock *inet = inet_sk(s); |
388 | 300 | ||
389 | if (s->sk_hash != hnum || | 301 | if (s->sk_hash != hnum || |
390 | inet->num != hport || | ||
391 | (inet->daddr && inet->daddr != rmt_addr) || | 302 | (inet->daddr && inet->daddr != rmt_addr) || |
392 | (inet->dport != rmt_port && inet->dport) || | 303 | (inet->dport != rmt_port && inet->dport) || |
393 | (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || | 304 | (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || |
@@ -1221,45 +1132,29 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, | |||
1221 | __be32 saddr, __be32 daddr, | 1132 | __be32 saddr, __be32 daddr, |
1222 | struct hlist_head udptable[]) | 1133 | struct hlist_head udptable[]) |
1223 | { | 1134 | { |
1224 | struct sock *sk, *skw, *sknext; | 1135 | struct sock *sk; |
1225 | int dif; | 1136 | int dif; |
1226 | int hport = ntohs(uh->dest); | ||
1227 | unsigned int hash = ipv4_hash_port_and_addr(hport, daddr); | ||
1228 | unsigned int hashwild = udp_hash_port(hport); | ||
1229 | |||
1230 | dif = skb->dev->ifindex; | ||
1231 | 1137 | ||
1232 | read_lock(&udp_hash_lock); | 1138 | read_lock(&udp_hash_lock); |
1233 | 1139 | sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); | |
1234 | sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]); | 1140 | dif = skb->dev->ifindex; |
1235 | skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]); | 1141 | sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); |
1236 | |||
1237 | sk = udp_v4_mcast_next(sk, hash, hport, daddr, uh->source, saddr, dif); | ||
1238 | if (!sk) { | ||
1239 | hash = hashwild; | ||
1240 | sk = udp_v4_mcast_next(skw, hash, hport, daddr, uh->source, | ||
1241 | saddr, dif); | ||
1242 | } | ||
1243 | if (sk) { | 1142 | if (sk) { |
1143 | struct sock *sknext = NULL; | ||
1144 | |||
1244 | do { | 1145 | do { |
1245 | struct sk_buff *skb1 = skb; | 1146 | struct sk_buff *skb1 = skb; |
1246 | sknext = udp_v4_mcast_next(sk_next(sk), hash, hport, | 1147 | |
1247 | daddr, uh->source, saddr, dif); | 1148 | sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, |
1248 | if (!sknext && hash != hashwild) { | 1149 | uh->source, saddr, dif); |
1249 | hash = hashwild; | ||
1250 | sknext = udp_v4_mcast_next(skw, hash, hport, | ||
1251 | daddr, uh->source, saddr, dif); | ||
1252 | } | ||
1253 | if (sknext) | 1150 | if (sknext) |
1254 | skb1 = skb_clone(skb, GFP_ATOMIC); | 1151 | skb1 = skb_clone(skb, GFP_ATOMIC); |
1255 | 1152 | ||
1256 | if (skb1) { | 1153 | if (skb1) { |
1257 | int ret = udp_queue_rcv_skb(sk, skb1); | 1154 | int ret = udp_queue_rcv_skb(sk, skb1); |
1258 | if (ret > 0) | 1155 | if (ret > 0) |
1259 | /* | 1156 | /* we should probably re-process instead |
1260 | * we should probably re-process | 1157 | * of dropping packets here. */ |
1261 | * instead of dropping packets here. | ||
1262 | */ | ||
1263 | kfree_skb(skb1); | 1158 | kfree_skb(skb1); |
1264 | } | 1159 | } |
1265 | sk = sknext; | 1160 | sk = sknext; |
@@ -1346,7 +1241,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | |||
1346 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); | 1241 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); |
1347 | 1242 | ||
1348 | sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, | 1243 | sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, |
1349 | skb->dev->ifindex, udptable); | 1244 | skb->dev->ifindex, udptable ); |
1350 | 1245 | ||
1351 | if (sk != NULL) { | 1246 | if (sk != NULL) { |
1352 | int ret = udp_queue_rcv_skb(sk, skb); | 1247 | int ret = udp_queue_rcv_skb(sk, skb); |