diff options
author | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
---|---|---|
committer | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
commit | ada47b5fe13d89735805b566185f4885f5a3f750 (patch) | |
tree | 644b88f8a71896307d71438e9b3af49126ffb22b /net/ipv4/udp.c | |
parent | 43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff) | |
parent | 3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff) |
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r-- | net/ipv4/udp.c | 515 |
1 files changed, 397 insertions, 118 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0fa9f70e4b19..c36522a0f113 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -95,6 +95,7 @@ | |||
95 | #include <linux/mm.h> | 95 | #include <linux/mm.h> |
96 | #include <linux/inet.h> | 96 | #include <linux/inet.h> |
97 | #include <linux/netdevice.h> | 97 | #include <linux/netdevice.h> |
98 | #include <linux/slab.h> | ||
98 | #include <net/tcp_states.h> | 99 | #include <net/tcp_states.h> |
99 | #include <linux/skbuff.h> | 100 | #include <linux/skbuff.h> |
100 | #include <linux/proc_fs.h> | 101 | #include <linux/proc_fs.h> |
@@ -106,7 +107,7 @@ | |||
106 | #include <net/xfrm.h> | 107 | #include <net/xfrm.h> |
107 | #include "udp_impl.h" | 108 | #include "udp_impl.h" |
108 | 109 | ||
109 | struct udp_table udp_table; | 110 | struct udp_table udp_table __read_mostly; |
110 | EXPORT_SYMBOL(udp_table); | 111 | EXPORT_SYMBOL(udp_table); |
111 | 112 | ||
112 | int sysctl_udp_mem[3] __read_mostly; | 113 | int sysctl_udp_mem[3] __read_mostly; |
@@ -121,28 +122,30 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); | |||
121 | atomic_t udp_memory_allocated; | 122 | atomic_t udp_memory_allocated; |
122 | EXPORT_SYMBOL(udp_memory_allocated); | 123 | EXPORT_SYMBOL(udp_memory_allocated); |
123 | 124 | ||
124 | #define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) | 125 | #define MAX_UDP_PORTS 65536 |
126 | #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) | ||
125 | 127 | ||
126 | static int udp_lib_lport_inuse(struct net *net, __u16 num, | 128 | static int udp_lib_lport_inuse(struct net *net, __u16 num, |
127 | const struct udp_hslot *hslot, | 129 | const struct udp_hslot *hslot, |
128 | unsigned long *bitmap, | 130 | unsigned long *bitmap, |
129 | struct sock *sk, | 131 | struct sock *sk, |
130 | int (*saddr_comp)(const struct sock *sk1, | 132 | int (*saddr_comp)(const struct sock *sk1, |
131 | const struct sock *sk2)) | 133 | const struct sock *sk2), |
134 | unsigned int log) | ||
132 | { | 135 | { |
133 | struct sock *sk2; | 136 | struct sock *sk2; |
134 | struct hlist_nulls_node *node; | 137 | struct hlist_nulls_node *node; |
135 | 138 | ||
136 | sk_nulls_for_each(sk2, node, &hslot->head) | 139 | sk_nulls_for_each(sk2, node, &hslot->head) |
137 | if (net_eq(sock_net(sk2), net) && | 140 | if (net_eq(sock_net(sk2), net) && |
138 | sk2 != sk && | 141 | sk2 != sk && |
139 | (bitmap || sk2->sk_hash == num) && | 142 | (bitmap || udp_sk(sk2)->udp_port_hash == num) && |
140 | (!sk2->sk_reuse || !sk->sk_reuse) && | 143 | (!sk2->sk_reuse || !sk->sk_reuse) && |
141 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if | 144 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || |
142 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | 145 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && |
143 | (*saddr_comp)(sk, sk2)) { | 146 | (*saddr_comp)(sk, sk2)) { |
144 | if (bitmap) | 147 | if (bitmap) |
145 | __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, | 148 | __set_bit(udp_sk(sk2)->udp_port_hash >> log, |
146 | bitmap); | 149 | bitmap); |
147 | else | 150 | else |
148 | return 1; | 151 | return 1; |
@@ -150,18 +153,51 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, | |||
150 | return 0; | 153 | return 0; |
151 | } | 154 | } |
152 | 155 | ||
156 | /* | ||
157 | * Note: we still hold spinlock of primary hash chain, so no other writer | ||
158 | * can insert/delete a socket with local_port == num | ||
159 | */ | ||
160 | static int udp_lib_lport_inuse2(struct net *net, __u16 num, | ||
161 | struct udp_hslot *hslot2, | ||
162 | struct sock *sk, | ||
163 | int (*saddr_comp)(const struct sock *sk1, | ||
164 | const struct sock *sk2)) | ||
165 | { | ||
166 | struct sock *sk2; | ||
167 | struct hlist_nulls_node *node; | ||
168 | int res = 0; | ||
169 | |||
170 | spin_lock(&hslot2->lock); | ||
171 | udp_portaddr_for_each_entry(sk2, node, &hslot2->head) | ||
172 | if (net_eq(sock_net(sk2), net) && | ||
173 | sk2 != sk && | ||
174 | (udp_sk(sk2)->udp_port_hash == num) && | ||
175 | (!sk2->sk_reuse || !sk->sk_reuse) && | ||
176 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || | ||
177 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | ||
178 | (*saddr_comp)(sk, sk2)) { | ||
179 | res = 1; | ||
180 | break; | ||
181 | } | ||
182 | spin_unlock(&hslot2->lock); | ||
183 | return res; | ||
184 | } | ||
185 | |||
153 | /** | 186 | /** |
154 | * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 | 187 | * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 |
155 | * | 188 | * |
156 | * @sk: socket struct in question | 189 | * @sk: socket struct in question |
157 | * @snum: port number to look up | 190 | * @snum: port number to look up |
158 | * @saddr_comp: AF-dependent comparison of bound local IP addresses | 191 | * @saddr_comp: AF-dependent comparison of bound local IP addresses |
192 | * @hash2_nulladdr: AF-dependant hash value in secondary hash chains, | ||
193 | * with NULL address | ||
159 | */ | 194 | */ |
160 | int udp_lib_get_port(struct sock *sk, unsigned short snum, | 195 | int udp_lib_get_port(struct sock *sk, unsigned short snum, |
161 | int (*saddr_comp)(const struct sock *sk1, | 196 | int (*saddr_comp)(const struct sock *sk1, |
162 | const struct sock *sk2)) | 197 | const struct sock *sk2), |
198 | unsigned int hash2_nulladdr) | ||
163 | { | 199 | { |
164 | struct udp_hslot *hslot; | 200 | struct udp_hslot *hslot, *hslot2; |
165 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | 201 | struct udp_table *udptable = sk->sk_prot->h.udp_table; |
166 | int error = 1; | 202 | int error = 1; |
167 | struct net *net = sock_net(sk); | 203 | struct net *net = sock_net(sk); |
@@ -180,13 +216,14 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
180 | /* | 216 | /* |
181 | * force rand to be an odd multiple of UDP_HTABLE_SIZE | 217 | * force rand to be an odd multiple of UDP_HTABLE_SIZE |
182 | */ | 218 | */ |
183 | rand = (rand | 1) * UDP_HTABLE_SIZE; | 219 | rand = (rand | 1) * (udptable->mask + 1); |
184 | for (last = first + UDP_HTABLE_SIZE; first != last; first++) { | 220 | last = first + udptable->mask + 1; |
185 | hslot = &udptable->hash[udp_hashfn(net, first)]; | 221 | do { |
222 | hslot = udp_hashslot(udptable, net, first); | ||
186 | bitmap_zero(bitmap, PORTS_PER_CHAIN); | 223 | bitmap_zero(bitmap, PORTS_PER_CHAIN); |
187 | spin_lock_bh(&hslot->lock); | 224 | spin_lock_bh(&hslot->lock); |
188 | udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, | 225 | udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, |
189 | saddr_comp); | 226 | saddr_comp, udptable->log); |
190 | 227 | ||
191 | snum = first; | 228 | snum = first; |
192 | /* | 229 | /* |
@@ -196,25 +233,59 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
196 | */ | 233 | */ |
197 | do { | 234 | do { |
198 | if (low <= snum && snum <= high && | 235 | if (low <= snum && snum <= high && |
199 | !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) | 236 | !test_bit(snum >> udptable->log, bitmap)) |
200 | goto found; | 237 | goto found; |
201 | snum += rand; | 238 | snum += rand; |
202 | } while (snum != first); | 239 | } while (snum != first); |
203 | spin_unlock_bh(&hslot->lock); | 240 | spin_unlock_bh(&hslot->lock); |
204 | } | 241 | } while (++first != last); |
205 | goto fail; | 242 | goto fail; |
206 | } else { | 243 | } else { |
207 | hslot = &udptable->hash[udp_hashfn(net, snum)]; | 244 | hslot = udp_hashslot(udptable, net, snum); |
208 | spin_lock_bh(&hslot->lock); | 245 | spin_lock_bh(&hslot->lock); |
209 | if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) | 246 | if (hslot->count > 10) { |
247 | int exist; | ||
248 | unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; | ||
249 | |||
250 | slot2 &= udptable->mask; | ||
251 | hash2_nulladdr &= udptable->mask; | ||
252 | |||
253 | hslot2 = udp_hashslot2(udptable, slot2); | ||
254 | if (hslot->count < hslot2->count) | ||
255 | goto scan_primary_hash; | ||
256 | |||
257 | exist = udp_lib_lport_inuse2(net, snum, hslot2, | ||
258 | sk, saddr_comp); | ||
259 | if (!exist && (hash2_nulladdr != slot2)) { | ||
260 | hslot2 = udp_hashslot2(udptable, hash2_nulladdr); | ||
261 | exist = udp_lib_lport_inuse2(net, snum, hslot2, | ||
262 | sk, saddr_comp); | ||
263 | } | ||
264 | if (exist) | ||
265 | goto fail_unlock; | ||
266 | else | ||
267 | goto found; | ||
268 | } | ||
269 | scan_primary_hash: | ||
270 | if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, | ||
271 | saddr_comp, 0)) | ||
210 | goto fail_unlock; | 272 | goto fail_unlock; |
211 | } | 273 | } |
212 | found: | 274 | found: |
213 | inet_sk(sk)->num = snum; | 275 | inet_sk(sk)->inet_num = snum; |
214 | sk->sk_hash = snum; | 276 | udp_sk(sk)->udp_port_hash = snum; |
277 | udp_sk(sk)->udp_portaddr_hash ^= snum; | ||
215 | if (sk_unhashed(sk)) { | 278 | if (sk_unhashed(sk)) { |
216 | sk_nulls_add_node_rcu(sk, &hslot->head); | 279 | sk_nulls_add_node_rcu(sk, &hslot->head); |
280 | hslot->count++; | ||
217 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 281 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
282 | |||
283 | hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); | ||
284 | spin_lock(&hslot2->lock); | ||
285 | hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, | ||
286 | &hslot2->head); | ||
287 | hslot2->count++; | ||
288 | spin_unlock(&hslot2->lock); | ||
218 | } | 289 | } |
219 | error = 0; | 290 | error = 0; |
220 | fail_unlock: | 291 | fail_unlock: |
@@ -229,13 +300,26 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | |||
229 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | 300 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); |
230 | 301 | ||
231 | return (!ipv6_only_sock(sk2) && | 302 | return (!ipv6_only_sock(sk2) && |
232 | (!inet1->rcv_saddr || !inet2->rcv_saddr || | 303 | (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr || |
233 | inet1->rcv_saddr == inet2->rcv_saddr)); | 304 | inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); |
305 | } | ||
306 | |||
307 | static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, | ||
308 | unsigned int port) | ||
309 | { | ||
310 | return jhash_1word(saddr, net_hash_mix(net)) ^ port; | ||
234 | } | 311 | } |
235 | 312 | ||
236 | int udp_v4_get_port(struct sock *sk, unsigned short snum) | 313 | int udp_v4_get_port(struct sock *sk, unsigned short snum) |
237 | { | 314 | { |
238 | return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); | 315 | unsigned int hash2_nulladdr = |
316 | udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum); | ||
317 | unsigned int hash2_partial = | ||
318 | udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); | ||
319 | |||
320 | /* precompute partial secondary hash */ | ||
321 | udp_sk(sk)->udp_portaddr_hash = hash2_partial; | ||
322 | return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); | ||
239 | } | 323 | } |
240 | 324 | ||
241 | static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, | 325 | static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, |
@@ -244,23 +328,23 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, | |||
244 | { | 328 | { |
245 | int score = -1; | 329 | int score = -1; |
246 | 330 | ||
247 | if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && | 331 | if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && |
248 | !ipv6_only_sock(sk)) { | 332 | !ipv6_only_sock(sk)) { |
249 | struct inet_sock *inet = inet_sk(sk); | 333 | struct inet_sock *inet = inet_sk(sk); |
250 | 334 | ||
251 | score = (sk->sk_family == PF_INET ? 1 : 0); | 335 | score = (sk->sk_family == PF_INET ? 1 : 0); |
252 | if (inet->rcv_saddr) { | 336 | if (inet->inet_rcv_saddr) { |
253 | if (inet->rcv_saddr != daddr) | 337 | if (inet->inet_rcv_saddr != daddr) |
254 | return -1; | 338 | return -1; |
255 | score += 2; | 339 | score += 2; |
256 | } | 340 | } |
257 | if (inet->daddr) { | 341 | if (inet->inet_daddr) { |
258 | if (inet->daddr != saddr) | 342 | if (inet->inet_daddr != saddr) |
259 | return -1; | 343 | return -1; |
260 | score += 2; | 344 | score += 2; |
261 | } | 345 | } |
262 | if (inet->dport) { | 346 | if (inet->inet_dport) { |
263 | if (inet->dport != sport) | 347 | if (inet->inet_dport != sport) |
264 | return -1; | 348 | return -1; |
265 | score += 2; | 349 | score += 2; |
266 | } | 350 | } |
@@ -273,6 +357,89 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, | |||
273 | return score; | 357 | return score; |
274 | } | 358 | } |
275 | 359 | ||
360 | /* | ||
361 | * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) | ||
362 | */ | ||
363 | #define SCORE2_MAX (1 + 2 + 2 + 2) | ||
364 | static inline int compute_score2(struct sock *sk, struct net *net, | ||
365 | __be32 saddr, __be16 sport, | ||
366 | __be32 daddr, unsigned int hnum, int dif) | ||
367 | { | ||
368 | int score = -1; | ||
369 | |||
370 | if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) { | ||
371 | struct inet_sock *inet = inet_sk(sk); | ||
372 | |||
373 | if (inet->inet_rcv_saddr != daddr) | ||
374 | return -1; | ||
375 | if (inet->inet_num != hnum) | ||
376 | return -1; | ||
377 | |||
378 | score = (sk->sk_family == PF_INET ? 1 : 0); | ||
379 | if (inet->inet_daddr) { | ||
380 | if (inet->inet_daddr != saddr) | ||
381 | return -1; | ||
382 | score += 2; | ||
383 | } | ||
384 | if (inet->inet_dport) { | ||
385 | if (inet->inet_dport != sport) | ||
386 | return -1; | ||
387 | score += 2; | ||
388 | } | ||
389 | if (sk->sk_bound_dev_if) { | ||
390 | if (sk->sk_bound_dev_if != dif) | ||
391 | return -1; | ||
392 | score += 2; | ||
393 | } | ||
394 | } | ||
395 | return score; | ||
396 | } | ||
397 | |||
398 | |||
399 | /* called with read_rcu_lock() */ | ||
400 | static struct sock *udp4_lib_lookup2(struct net *net, | ||
401 | __be32 saddr, __be16 sport, | ||
402 | __be32 daddr, unsigned int hnum, int dif, | ||
403 | struct udp_hslot *hslot2, unsigned int slot2) | ||
404 | { | ||
405 | struct sock *sk, *result; | ||
406 | struct hlist_nulls_node *node; | ||
407 | int score, badness; | ||
408 | |||
409 | begin: | ||
410 | result = NULL; | ||
411 | badness = -1; | ||
412 | udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { | ||
413 | score = compute_score2(sk, net, saddr, sport, | ||
414 | daddr, hnum, dif); | ||
415 | if (score > badness) { | ||
416 | result = sk; | ||
417 | badness = score; | ||
418 | if (score == SCORE2_MAX) | ||
419 | goto exact_match; | ||
420 | } | ||
421 | } | ||
422 | /* | ||
423 | * if the nulls value we got at the end of this lookup is | ||
424 | * not the expected one, we must restart lookup. | ||
425 | * We probably met an item that was moved to another chain. | ||
426 | */ | ||
427 | if (get_nulls_value(node) != slot2) | ||
428 | goto begin; | ||
429 | |||
430 | if (result) { | ||
431 | exact_match: | ||
432 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) | ||
433 | result = NULL; | ||
434 | else if (unlikely(compute_score2(result, net, saddr, sport, | ||
435 | daddr, hnum, dif) < badness)) { | ||
436 | sock_put(result); | ||
437 | goto begin; | ||
438 | } | ||
439 | } | ||
440 | return result; | ||
441 | } | ||
442 | |||
276 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | 443 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try |
277 | * harder than this. -DaveM | 444 | * harder than this. -DaveM |
278 | */ | 445 | */ |
@@ -283,11 +450,35 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | |||
283 | struct sock *sk, *result; | 450 | struct sock *sk, *result; |
284 | struct hlist_nulls_node *node; | 451 | struct hlist_nulls_node *node; |
285 | unsigned short hnum = ntohs(dport); | 452 | unsigned short hnum = ntohs(dport); |
286 | unsigned int hash = udp_hashfn(net, hnum); | 453 | unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); |
287 | struct udp_hslot *hslot = &udptable->hash[hash]; | 454 | struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; |
288 | int score, badness; | 455 | int score, badness; |
289 | 456 | ||
290 | rcu_read_lock(); | 457 | rcu_read_lock(); |
458 | if (hslot->count > 10) { | ||
459 | hash2 = udp4_portaddr_hash(net, daddr, hnum); | ||
460 | slot2 = hash2 & udptable->mask; | ||
461 | hslot2 = &udptable->hash2[slot2]; | ||
462 | if (hslot->count < hslot2->count) | ||
463 | goto begin; | ||
464 | |||
465 | result = udp4_lib_lookup2(net, saddr, sport, | ||
466 | daddr, hnum, dif, | ||
467 | hslot2, slot2); | ||
468 | if (!result) { | ||
469 | hash2 = udp4_portaddr_hash(net, INADDR_ANY, hnum); | ||
470 | slot2 = hash2 & udptable->mask; | ||
471 | hslot2 = &udptable->hash2[slot2]; | ||
472 | if (hslot->count < hslot2->count) | ||
473 | goto begin; | ||
474 | |||
475 | result = udp4_lib_lookup2(net, saddr, sport, | ||
476 | INADDR_ANY, hnum, dif, | ||
477 | hslot2, slot2); | ||
478 | } | ||
479 | rcu_read_unlock(); | ||
480 | return result; | ||
481 | } | ||
291 | begin: | 482 | begin: |
292 | result = NULL; | 483 | result = NULL; |
293 | badness = -1; | 484 | badness = -1; |
@@ -304,7 +495,7 @@ begin: | |||
304 | * not the expected one, we must restart lookup. | 495 | * not the expected one, we must restart lookup. |
305 | * We probably met an item that was moved to another chain. | 496 | * We probably met an item that was moved to another chain. |
306 | */ | 497 | */ |
307 | if (get_nulls_value(node) != hash) | 498 | if (get_nulls_value(node) != slot) |
308 | goto begin; | 499 | goto begin; |
309 | 500 | ||
310 | if (result) { | 501 | if (result) { |
@@ -354,12 +545,13 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, | |||
354 | sk_nulls_for_each_from(s, node) { | 545 | sk_nulls_for_each_from(s, node) { |
355 | struct inet_sock *inet = inet_sk(s); | 546 | struct inet_sock *inet = inet_sk(s); |
356 | 547 | ||
357 | if (!net_eq(sock_net(s), net) || | 548 | if (!net_eq(sock_net(s), net) || |
358 | s->sk_hash != hnum || | 549 | udp_sk(s)->udp_port_hash != hnum || |
359 | (inet->daddr && inet->daddr != rmt_addr) || | 550 | (inet->inet_daddr && inet->inet_daddr != rmt_addr) || |
360 | (inet->dport != rmt_port && inet->dport) || | 551 | (inet->inet_dport != rmt_port && inet->inet_dport) || |
361 | (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || | 552 | (inet->inet_rcv_saddr && |
362 | ipv6_only_sock(s) || | 553 | inet->inet_rcv_saddr != loc_addr) || |
554 | ipv6_only_sock(s) || | ||
363 | (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) | 555 | (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) |
364 | continue; | 556 | continue; |
365 | if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) | 557 | if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) |
@@ -642,14 +834,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
642 | } else { | 834 | } else { |
643 | if (sk->sk_state != TCP_ESTABLISHED) | 835 | if (sk->sk_state != TCP_ESTABLISHED) |
644 | return -EDESTADDRREQ; | 836 | return -EDESTADDRREQ; |
645 | daddr = inet->daddr; | 837 | daddr = inet->inet_daddr; |
646 | dport = inet->dport; | 838 | dport = inet->inet_dport; |
647 | /* Open fast path for connected socket. | 839 | /* Open fast path for connected socket. |
648 | Route will not be used, if at least one option is set. | 840 | Route will not be used, if at least one option is set. |
649 | */ | 841 | */ |
650 | connected = 1; | 842 | connected = 1; |
651 | } | 843 | } |
652 | ipc.addr = inet->saddr; | 844 | ipc.addr = inet->inet_saddr; |
653 | 845 | ||
654 | ipc.oif = sk->sk_bound_dev_if; | 846 | ipc.oif = sk->sk_bound_dev_if; |
655 | err = sock_tx_timestamp(msg, sk, &ipc.shtx); | 847 | err = sock_tx_timestamp(msg, sk, &ipc.shtx); |
@@ -704,7 +896,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
704 | .proto = sk->sk_protocol, | 896 | .proto = sk->sk_protocol, |
705 | .flags = inet_sk_flowi_flags(sk), | 897 | .flags = inet_sk_flowi_flags(sk), |
706 | .uli_u = { .ports = | 898 | .uli_u = { .ports = |
707 | { .sport = inet->sport, | 899 | { .sport = inet->inet_sport, |
708 | .dport = dport } } }; | 900 | .dport = dport } } }; |
709 | struct net *net = sock_net(sk); | 901 | struct net *net = sock_net(sk); |
710 | 902 | ||
@@ -748,7 +940,7 @@ back_from_confirm: | |||
748 | inet->cork.fl.fl4_dst = daddr; | 940 | inet->cork.fl.fl4_dst = daddr; |
749 | inet->cork.fl.fl_ip_dport = dport; | 941 | inet->cork.fl.fl_ip_dport = dport; |
750 | inet->cork.fl.fl4_src = saddr; | 942 | inet->cork.fl.fl4_src = saddr; |
751 | inet->cork.fl.fl_ip_sport = inet->sport; | 943 | inet->cork.fl.fl_ip_sport = inet->inet_sport; |
752 | up->pending = AF_INET; | 944 | up->pending = AF_INET; |
753 | 945 | ||
754 | do_append_data: | 946 | do_append_data: |
@@ -862,6 +1054,7 @@ static unsigned int first_packet_length(struct sock *sk) | |||
862 | udp_lib_checksum_complete(skb)) { | 1054 | udp_lib_checksum_complete(skb)) { |
863 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, | 1055 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, |
864 | IS_UDPLITE(sk)); | 1056 | IS_UDPLITE(sk)); |
1057 | atomic_inc(&sk->sk_drops); | ||
865 | __skb_unlink(skb, rcvq); | 1058 | __skb_unlink(skb, rcvq); |
866 | __skb_queue_tail(&list_kill, skb); | 1059 | __skb_queue_tail(&list_kill, skb); |
867 | } | 1060 | } |
@@ -925,7 +1118,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
925 | struct inet_sock *inet = inet_sk(sk); | 1118 | struct inet_sock *inet = inet_sk(sk); |
926 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; | 1119 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; |
927 | struct sk_buff *skb; | 1120 | struct sk_buff *skb; |
928 | unsigned int ulen, copied; | 1121 | unsigned int ulen; |
929 | int peeked; | 1122 | int peeked; |
930 | int err; | 1123 | int err; |
931 | int is_udplite = IS_UDPLITE(sk); | 1124 | int is_udplite = IS_UDPLITE(sk); |
@@ -946,10 +1139,9 @@ try_again: | |||
946 | goto out; | 1139 | goto out; |
947 | 1140 | ||
948 | ulen = skb->len - sizeof(struct udphdr); | 1141 | ulen = skb->len - sizeof(struct udphdr); |
949 | copied = len; | 1142 | if (len > ulen) |
950 | if (copied > ulen) | 1143 | len = ulen; |
951 | copied = ulen; | 1144 | else if (len < ulen) |
952 | else if (copied < ulen) | ||
953 | msg->msg_flags |= MSG_TRUNC; | 1145 | msg->msg_flags |= MSG_TRUNC; |
954 | 1146 | ||
955 | /* | 1147 | /* |
@@ -958,14 +1150,14 @@ try_again: | |||
958 | * coverage checksum (UDP-Lite), do it before the copy. | 1150 | * coverage checksum (UDP-Lite), do it before the copy. |
959 | */ | 1151 | */ |
960 | 1152 | ||
961 | if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { | 1153 | if (len < ulen || UDP_SKB_CB(skb)->partial_cov) { |
962 | if (udp_lib_checksum_complete(skb)) | 1154 | if (udp_lib_checksum_complete(skb)) |
963 | goto csum_copy_err; | 1155 | goto csum_copy_err; |
964 | } | 1156 | } |
965 | 1157 | ||
966 | if (skb_csum_unnecessary(skb)) | 1158 | if (skb_csum_unnecessary(skb)) |
967 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | 1159 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), |
968 | msg->msg_iov, copied); | 1160 | msg->msg_iov, len); |
969 | else { | 1161 | else { |
970 | err = skb_copy_and_csum_datagram_iovec(skb, | 1162 | err = skb_copy_and_csum_datagram_iovec(skb, |
971 | sizeof(struct udphdr), | 1163 | sizeof(struct udphdr), |
@@ -982,7 +1174,7 @@ try_again: | |||
982 | UDP_INC_STATS_USER(sock_net(sk), | 1174 | UDP_INC_STATS_USER(sock_net(sk), |
983 | UDP_MIB_INDATAGRAMS, is_udplite); | 1175 | UDP_MIB_INDATAGRAMS, is_udplite); |
984 | 1176 | ||
985 | sock_recv_timestamp(msg, sk, skb); | 1177 | sock_recv_ts_and_drops(msg, sk, skb); |
986 | 1178 | ||
987 | /* Copy the address. */ | 1179 | /* Copy the address. */ |
988 | if (sin) { | 1180 | if (sin) { |
@@ -994,7 +1186,7 @@ try_again: | |||
994 | if (inet->cmsg_flags) | 1186 | if (inet->cmsg_flags) |
995 | ip_cmsg_recv(msg, skb); | 1187 | ip_cmsg_recv(msg, skb); |
996 | 1188 | ||
997 | err = copied; | 1189 | err = len; |
998 | if (flags & MSG_TRUNC) | 1190 | if (flags & MSG_TRUNC) |
999 | err = ulen; | 1191 | err = ulen; |
1000 | 1192 | ||
@@ -1023,15 +1215,15 @@ int udp_disconnect(struct sock *sk, int flags) | |||
1023 | */ | 1215 | */ |
1024 | 1216 | ||
1025 | sk->sk_state = TCP_CLOSE; | 1217 | sk->sk_state = TCP_CLOSE; |
1026 | inet->daddr = 0; | 1218 | inet->inet_daddr = 0; |
1027 | inet->dport = 0; | 1219 | inet->inet_dport = 0; |
1028 | sk->sk_bound_dev_if = 0; | 1220 | sk->sk_bound_dev_if = 0; |
1029 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) | 1221 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) |
1030 | inet_reset_saddr(sk); | 1222 | inet_reset_saddr(sk); |
1031 | 1223 | ||
1032 | if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { | 1224 | if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { |
1033 | sk->sk_prot->unhash(sk); | 1225 | sk->sk_prot->unhash(sk); |
1034 | inet->sport = 0; | 1226 | inet->inet_sport = 0; |
1035 | } | 1227 | } |
1036 | sk_dst_reset(sk); | 1228 | sk_dst_reset(sk); |
1037 | return 0; | 1229 | return 0; |
@@ -1042,13 +1234,22 @@ void udp_lib_unhash(struct sock *sk) | |||
1042 | { | 1234 | { |
1043 | if (sk_hashed(sk)) { | 1235 | if (sk_hashed(sk)) { |
1044 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | 1236 | struct udp_table *udptable = sk->sk_prot->h.udp_table; |
1045 | unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); | 1237 | struct udp_hslot *hslot, *hslot2; |
1046 | struct udp_hslot *hslot = &udptable->hash[hash]; | 1238 | |
1239 | hslot = udp_hashslot(udptable, sock_net(sk), | ||
1240 | udp_sk(sk)->udp_port_hash); | ||
1241 | hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); | ||
1047 | 1242 | ||
1048 | spin_lock_bh(&hslot->lock); | 1243 | spin_lock_bh(&hslot->lock); |
1049 | if (sk_nulls_del_node_init_rcu(sk)) { | 1244 | if (sk_nulls_del_node_init_rcu(sk)) { |
1050 | inet_sk(sk)->num = 0; | 1245 | hslot->count--; |
1246 | inet_sk(sk)->inet_num = 0; | ||
1051 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 1247 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
1248 | |||
1249 | spin_lock(&hslot2->lock); | ||
1250 | hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); | ||
1251 | hslot2->count--; | ||
1252 | spin_unlock(&hslot2->lock); | ||
1052 | } | 1253 | } |
1053 | spin_unlock_bh(&hslot->lock); | 1254 | spin_unlock_bh(&hslot->lock); |
1054 | } | 1255 | } |
@@ -1057,25 +1258,22 @@ EXPORT_SYMBOL(udp_lib_unhash); | |||
1057 | 1258 | ||
1058 | static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | 1259 | static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) |
1059 | { | 1260 | { |
1060 | int is_udplite = IS_UDPLITE(sk); | 1261 | int rc = sock_queue_rcv_skb(sk, skb); |
1061 | int rc; | 1262 | |
1263 | if (rc < 0) { | ||
1264 | int is_udplite = IS_UDPLITE(sk); | ||
1062 | 1265 | ||
1063 | if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { | ||
1064 | /* Note that an ENOMEM error is charged twice */ | 1266 | /* Note that an ENOMEM error is charged twice */ |
1065 | if (rc == -ENOMEM) { | 1267 | if (rc == -ENOMEM) |
1066 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, | 1268 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, |
1067 | is_udplite); | 1269 | is_udplite); |
1068 | atomic_inc(&sk->sk_drops); | 1270 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); |
1069 | } | 1271 | kfree_skb(skb); |
1070 | goto drop; | 1272 | return -1; |
1071 | } | 1273 | } |
1072 | 1274 | ||
1073 | return 0; | 1275 | return 0; |
1074 | 1276 | ||
1075 | drop: | ||
1076 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); | ||
1077 | kfree_skb(skb); | ||
1078 | return -1; | ||
1079 | } | 1277 | } |
1080 | 1278 | ||
1081 | /* returns: | 1279 | /* returns: |
@@ -1174,61 +1372,98 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1174 | bh_lock_sock(sk); | 1372 | bh_lock_sock(sk); |
1175 | if (!sock_owned_by_user(sk)) | 1373 | if (!sock_owned_by_user(sk)) |
1176 | rc = __udp_queue_rcv_skb(sk, skb); | 1374 | rc = __udp_queue_rcv_skb(sk, skb); |
1177 | else | 1375 | else if (sk_add_backlog(sk, skb)) { |
1178 | sk_add_backlog(sk, skb); | 1376 | bh_unlock_sock(sk); |
1377 | goto drop; | ||
1378 | } | ||
1179 | bh_unlock_sock(sk); | 1379 | bh_unlock_sock(sk); |
1180 | 1380 | ||
1181 | return rc; | 1381 | return rc; |
1182 | 1382 | ||
1183 | drop: | 1383 | drop: |
1184 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); | 1384 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); |
1385 | atomic_inc(&sk->sk_drops); | ||
1185 | kfree_skb(skb); | 1386 | kfree_skb(skb); |
1186 | return -1; | 1387 | return -1; |
1187 | } | 1388 | } |
1188 | 1389 | ||
1390 | |||
1391 | static void flush_stack(struct sock **stack, unsigned int count, | ||
1392 | struct sk_buff *skb, unsigned int final) | ||
1393 | { | ||
1394 | unsigned int i; | ||
1395 | struct sk_buff *skb1 = NULL; | ||
1396 | struct sock *sk; | ||
1397 | |||
1398 | for (i = 0; i < count; i++) { | ||
1399 | sk = stack[i]; | ||
1400 | if (likely(skb1 == NULL)) | ||
1401 | skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); | ||
1402 | |||
1403 | if (!skb1) { | ||
1404 | atomic_inc(&sk->sk_drops); | ||
1405 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, | ||
1406 | IS_UDPLITE(sk)); | ||
1407 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, | ||
1408 | IS_UDPLITE(sk)); | ||
1409 | } | ||
1410 | |||
1411 | if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0) | ||
1412 | skb1 = NULL; | ||
1413 | } | ||
1414 | if (unlikely(skb1)) | ||
1415 | kfree_skb(skb1); | ||
1416 | } | ||
1417 | |||
1189 | /* | 1418 | /* |
1190 | * Multicasts and broadcasts go to each listener. | 1419 | * Multicasts and broadcasts go to each listener. |
1191 | * | 1420 | * |
1192 | * Note: called only from the BH handler context, | 1421 | * Note: called only from the BH handler context. |
1193 | * so we don't need to lock the hashes. | ||
1194 | */ | 1422 | */ |
1195 | static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | 1423 | static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, |
1196 | struct udphdr *uh, | 1424 | struct udphdr *uh, |
1197 | __be32 saddr, __be32 daddr, | 1425 | __be32 saddr, __be32 daddr, |
1198 | struct udp_table *udptable) | 1426 | struct udp_table *udptable) |
1199 | { | 1427 | { |
1200 | struct sock *sk; | 1428 | struct sock *sk, *stack[256 / sizeof(struct sock *)]; |
1201 | struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; | 1429 | struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); |
1202 | int dif; | 1430 | int dif; |
1431 | unsigned int i, count = 0; | ||
1203 | 1432 | ||
1204 | spin_lock(&hslot->lock); | 1433 | spin_lock(&hslot->lock); |
1205 | sk = sk_nulls_head(&hslot->head); | 1434 | sk = sk_nulls_head(&hslot->head); |
1206 | dif = skb->dev->ifindex; | 1435 | dif = skb->dev->ifindex; |
1207 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); | 1436 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); |
1208 | if (sk) { | 1437 | while (sk) { |
1209 | struct sock *sknext = NULL; | 1438 | stack[count++] = sk; |
1439 | sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, | ||
1440 | daddr, uh->source, saddr, dif); | ||
1441 | if (unlikely(count == ARRAY_SIZE(stack))) { | ||
1442 | if (!sk) | ||
1443 | break; | ||
1444 | flush_stack(stack, count, skb, ~0); | ||
1445 | count = 0; | ||
1446 | } | ||
1447 | } | ||
1448 | /* | ||
1449 | * before releasing chain lock, we must take a reference on sockets | ||
1450 | */ | ||
1451 | for (i = 0; i < count; i++) | ||
1452 | sock_hold(stack[i]); | ||
1210 | 1453 | ||
1211 | do { | ||
1212 | struct sk_buff *skb1 = skb; | ||
1213 | |||
1214 | sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, | ||
1215 | daddr, uh->source, saddr, | ||
1216 | dif); | ||
1217 | if (sknext) | ||
1218 | skb1 = skb_clone(skb, GFP_ATOMIC); | ||
1219 | |||
1220 | if (skb1) { | ||
1221 | int ret = udp_queue_rcv_skb(sk, skb1); | ||
1222 | if (ret > 0) | ||
1223 | /* we should probably re-process instead | ||
1224 | * of dropping packets here. */ | ||
1225 | kfree_skb(skb1); | ||
1226 | } | ||
1227 | sk = sknext; | ||
1228 | } while (sknext); | ||
1229 | } else | ||
1230 | consume_skb(skb); | ||
1231 | spin_unlock(&hslot->lock); | 1454 | spin_unlock(&hslot->lock); |
1455 | |||
1456 | /* | ||
1457 | * do the slow work with no lock held | ||
1458 | */ | ||
1459 | if (count) { | ||
1460 | flush_stack(stack, count, skb, count - 1); | ||
1461 | |||
1462 | for (i = 0; i < count; i++) | ||
1463 | sock_put(stack[i]); | ||
1464 | } else { | ||
1465 | kfree_skb(skb); | ||
1466 | } | ||
1232 | return 0; | 1467 | return 0; |
1233 | } | 1468 | } |
1234 | 1469 | ||
@@ -1292,6 +1527,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, | |||
1292 | 1527 | ||
1293 | uh = udp_hdr(skb); | 1528 | uh = udp_hdr(skb); |
1294 | ulen = ntohs(uh->len); | 1529 | ulen = ntohs(uh->len); |
1530 | saddr = ip_hdr(skb)->saddr; | ||
1531 | daddr = ip_hdr(skb)->daddr; | ||
1532 | |||
1295 | if (ulen > skb->len) | 1533 | if (ulen > skb->len) |
1296 | goto short_packet; | 1534 | goto short_packet; |
1297 | 1535 | ||
@@ -1305,9 +1543,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, | |||
1305 | if (udp4_csum_init(skb, uh, proto)) | 1543 | if (udp4_csum_init(skb, uh, proto)) |
1306 | goto csum_error; | 1544 | goto csum_error; |
1307 | 1545 | ||
1308 | saddr = ip_hdr(skb)->saddr; | ||
1309 | daddr = ip_hdr(skb)->daddr; | ||
1310 | |||
1311 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | 1546 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) |
1312 | return __udp4_lib_mcast_deliver(net, skb, uh, | 1547 | return __udp4_lib_mcast_deliver(net, skb, uh, |
1313 | saddr, daddr, udptable); | 1548 | saddr, daddr, udptable); |
@@ -1620,9 +1855,14 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) | |||
1620 | struct udp_iter_state *state = seq->private; | 1855 | struct udp_iter_state *state = seq->private; |
1621 | struct net *net = seq_file_net(seq); | 1856 | struct net *net = seq_file_net(seq); |
1622 | 1857 | ||
1623 | for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { | 1858 | for (state->bucket = start; state->bucket <= state->udp_table->mask; |
1859 | ++state->bucket) { | ||
1624 | struct hlist_nulls_node *node; | 1860 | struct hlist_nulls_node *node; |
1625 | struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; | 1861 | struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; |
1862 | |||
1863 | if (hlist_nulls_empty(&hslot->head)) | ||
1864 | continue; | ||
1865 | |||
1626 | spin_lock_bh(&hslot->lock); | 1866 | spin_lock_bh(&hslot->lock); |
1627 | sk_nulls_for_each(sk, node, &hslot->head) { | 1867 | sk_nulls_for_each(sk, node, &hslot->head) { |
1628 | if (!net_eq(sock_net(sk), net)) | 1868 | if (!net_eq(sock_net(sk), net)) |
@@ -1647,7 +1887,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) | |||
1647 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); | 1887 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); |
1648 | 1888 | ||
1649 | if (!sk) { | 1889 | if (!sk) { |
1650 | if (state->bucket < UDP_HTABLE_SIZE) | 1890 | if (state->bucket <= state->udp_table->mask) |
1651 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); | 1891 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); |
1652 | return udp_get_first(seq, state->bucket + 1); | 1892 | return udp_get_first(seq, state->bucket + 1); |
1653 | } | 1893 | } |
@@ -1667,7 +1907,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) | |||
1667 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) | 1907 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) |
1668 | { | 1908 | { |
1669 | struct udp_iter_state *state = seq->private; | 1909 | struct udp_iter_state *state = seq->private; |
1670 | state->bucket = UDP_HTABLE_SIZE; | 1910 | state->bucket = MAX_UDP_PORTS; |
1671 | 1911 | ||
1672 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; | 1912 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; |
1673 | } | 1913 | } |
@@ -1689,7 +1929,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v) | |||
1689 | { | 1929 | { |
1690 | struct udp_iter_state *state = seq->private; | 1930 | struct udp_iter_state *state = seq->private; |
1691 | 1931 | ||
1692 | if (state->bucket < UDP_HTABLE_SIZE) | 1932 | if (state->bucket <= state->udp_table->mask) |
1693 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); | 1933 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); |
1694 | } | 1934 | } |
1695 | 1935 | ||
@@ -1744,12 +1984,12 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, | |||
1744 | int bucket, int *len) | 1984 | int bucket, int *len) |
1745 | { | 1985 | { |
1746 | struct inet_sock *inet = inet_sk(sp); | 1986 | struct inet_sock *inet = inet_sk(sp); |
1747 | __be32 dest = inet->daddr; | 1987 | __be32 dest = inet->inet_daddr; |
1748 | __be32 src = inet->rcv_saddr; | 1988 | __be32 src = inet->inet_rcv_saddr; |
1749 | __u16 destp = ntohs(inet->dport); | 1989 | __u16 destp = ntohs(inet->inet_dport); |
1750 | __u16 srcp = ntohs(inet->sport); | 1990 | __u16 srcp = ntohs(inet->inet_sport); |
1751 | 1991 | ||
1752 | seq_printf(f, "%4d: %08X:%04X %08X:%04X" | 1992 | seq_printf(f, "%5d: %08X:%04X %08X:%04X" |
1753 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", | 1993 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", |
1754 | bucket, src, srcp, dest, destp, sp->sk_state, | 1994 | bucket, src, srcp, dest, destp, sp->sk_state, |
1755 | sk_wmem_alloc_get(sp), | 1995 | sk_wmem_alloc_get(sp), |
@@ -1789,12 +2029,12 @@ static struct udp_seq_afinfo udp4_seq_afinfo = { | |||
1789 | }, | 2029 | }, |
1790 | }; | 2030 | }; |
1791 | 2031 | ||
1792 | static int udp4_proc_init_net(struct net *net) | 2032 | static int __net_init udp4_proc_init_net(struct net *net) |
1793 | { | 2033 | { |
1794 | return udp_proc_register(net, &udp4_seq_afinfo); | 2034 | return udp_proc_register(net, &udp4_seq_afinfo); |
1795 | } | 2035 | } |
1796 | 2036 | ||
1797 | static void udp4_proc_exit_net(struct net *net) | 2037 | static void __net_exit udp4_proc_exit_net(struct net *net) |
1798 | { | 2038 | { |
1799 | udp_proc_unregister(net, &udp4_seq_afinfo); | 2039 | udp_proc_unregister(net, &udp4_seq_afinfo); |
1800 | } | 2040 | } |
@@ -1815,21 +2055,60 @@ void udp4_proc_exit(void) | |||
1815 | } | 2055 | } |
1816 | #endif /* CONFIG_PROC_FS */ | 2056 | #endif /* CONFIG_PROC_FS */ |
1817 | 2057 | ||
1818 | void __init udp_table_init(struct udp_table *table) | 2058 | static __initdata unsigned long uhash_entries; |
2059 | static int __init set_uhash_entries(char *str) | ||
1819 | { | 2060 | { |
1820 | int i; | 2061 | if (!str) |
2062 | return 0; | ||
2063 | uhash_entries = simple_strtoul(str, &str, 0); | ||
2064 | if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) | ||
2065 | uhash_entries = UDP_HTABLE_SIZE_MIN; | ||
2066 | return 1; | ||
2067 | } | ||
2068 | __setup("uhash_entries=", set_uhash_entries); | ||
1821 | 2069 | ||
1822 | for (i = 0; i < UDP_HTABLE_SIZE; i++) { | 2070 | void __init udp_table_init(struct udp_table *table, const char *name) |
2071 | { | ||
2072 | unsigned int i; | ||
2073 | |||
2074 | if (!CONFIG_BASE_SMALL) | ||
2075 | table->hash = alloc_large_system_hash(name, | ||
2076 | 2 * sizeof(struct udp_hslot), | ||
2077 | uhash_entries, | ||
2078 | 21, /* one slot per 2 MB */ | ||
2079 | 0, | ||
2080 | &table->log, | ||
2081 | &table->mask, | ||
2082 | 64 * 1024); | ||
2083 | /* | ||
2084 | * Make sure hash table has the minimum size | ||
2085 | */ | ||
2086 | if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) { | ||
2087 | table->hash = kmalloc(UDP_HTABLE_SIZE_MIN * | ||
2088 | 2 * sizeof(struct udp_hslot), GFP_KERNEL); | ||
2089 | if (!table->hash) | ||
2090 | panic(name); | ||
2091 | table->log = ilog2(UDP_HTABLE_SIZE_MIN); | ||
2092 | table->mask = UDP_HTABLE_SIZE_MIN - 1; | ||
2093 | } | ||
2094 | table->hash2 = table->hash + (table->mask + 1); | ||
2095 | for (i = 0; i <= table->mask; i++) { | ||
1823 | INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); | 2096 | INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); |
2097 | table->hash[i].count = 0; | ||
1824 | spin_lock_init(&table->hash[i].lock); | 2098 | spin_lock_init(&table->hash[i].lock); |
1825 | } | 2099 | } |
2100 | for (i = 0; i <= table->mask; i++) { | ||
2101 | INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i); | ||
2102 | table->hash2[i].count = 0; | ||
2103 | spin_lock_init(&table->hash2[i].lock); | ||
2104 | } | ||
1826 | } | 2105 | } |
1827 | 2106 | ||
1828 | void __init udp_init(void) | 2107 | void __init udp_init(void) |
1829 | { | 2108 | { |
1830 | unsigned long nr_pages, limit; | 2109 | unsigned long nr_pages, limit; |
1831 | 2110 | ||
1832 | udp_table_init(&udp_table); | 2111 | udp_table_init(&udp_table, "UDP"); |
1833 | /* Set the pressure threshold up by the same strategy of TCP. It is a | 2112 | /* Set the pressure threshold up by the same strategy of TCP. It is a |
1834 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing | 2113 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing |
1835 | * toward zero with the amount of memory, with a floor of 128 pages. | 2114 | * toward zero with the amount of memory, with a floor of 128 pages. |