diff options
author | Su, Xuemin <suxm@chinanetcenter.com> | 2016-06-12 23:02:50 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-06-14 17:23:09 -0400 |
commit | d1e37288c9146dccff830e3253e403af8705b51f (patch) | |
tree | 573ea16d2781cf3754df56de05a306f8fff17c88 | |
parent | 6c0d54f1897d229748d4f41ef919078db6db2123 (diff) |
udp reuseport: fix packet of same flow hashed to different socket
There is a corner case in which udp packets belonging to a same
flow are hashed to different socket when hslot->count changes from 10
to 11:
1) When hslot->count <= 10, __udp_lib_lookup() searches udp_table->hash,
and always passes 'daddr' to udp_ehashfn().
2) When hslot->count > 10, __udp_lib_lookup() searches udp_table->hash2,
but may pass 'INADDR_ANY' to udp_ehashfn() if the sockets are bound to
INADDR_ANY instead of some specific addr.
That means when hslot->count changes from 10 to 11, the hash calculated by
udp_ehashfn() is also changed, and the udp packets belonging to a same
flow will be hashed to different socket.
This is easily reproduced:
1) Create 10 udp sockets and bind all of them to 0.0.0.0:40000.
2) From the same host send udp packets to 127.0.0.1:40000, record the
socket index which receives the packets.
3) Create 1 more udp socket and bind it to 0.0.0.0:44096. The number 44096
is 40000 + UDP_HASH_SIZE(4096), this makes the new socket put into the
same hslot as the aformentioned 10 sockets, and makes the hslot->count
change from 10 to 11.
4) From the same host send udp packets to 127.0.0.1:40000, and the socket
index which receives the packets will be different from the one received
in step 2.
This should not happen as the socket bound to 0.0.0.0:44096 should not
change the behavior of the sockets bound to 0.0.0.0:40000.
It's the same case for IPv6, and this patch also fixes that.
Signed-off-by: Su, Xuemin <suxm@chinanetcenter.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/udp.c | 73 | ||||
-rw-r--r-- | net/ipv6/udp.c | 71 |
2 files changed, 32 insertions, 112 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ba0d8b8b7690..ca5e8ea29538 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -391,9 +391,9 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) | |||
391 | return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); | 391 | return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); |
392 | } | 392 | } |
393 | 393 | ||
394 | static inline int compute_score(struct sock *sk, struct net *net, | 394 | static int compute_score(struct sock *sk, struct net *net, |
395 | __be32 saddr, unsigned short hnum, __be16 sport, | 395 | __be32 saddr, __be16 sport, |
396 | __be32 daddr, __be16 dport, int dif) | 396 | __be32 daddr, unsigned short hnum, int dif) |
397 | { | 397 | { |
398 | int score; | 398 | int score; |
399 | struct inet_sock *inet; | 399 | struct inet_sock *inet; |
@@ -434,52 +434,6 @@ static inline int compute_score(struct sock *sk, struct net *net, | |||
434 | return score; | 434 | return score; |
435 | } | 435 | } |
436 | 436 | ||
437 | /* | ||
438 | * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) | ||
439 | */ | ||
440 | static inline int compute_score2(struct sock *sk, struct net *net, | ||
441 | __be32 saddr, __be16 sport, | ||
442 | __be32 daddr, unsigned int hnum, int dif) | ||
443 | { | ||
444 | int score; | ||
445 | struct inet_sock *inet; | ||
446 | |||
447 | if (!net_eq(sock_net(sk), net) || | ||
448 | ipv6_only_sock(sk)) | ||
449 | return -1; | ||
450 | |||
451 | inet = inet_sk(sk); | ||
452 | |||
453 | if (inet->inet_rcv_saddr != daddr || | ||
454 | inet->inet_num != hnum) | ||
455 | return -1; | ||
456 | |||
457 | score = (sk->sk_family == PF_INET) ? 2 : 1; | ||
458 | |||
459 | if (inet->inet_daddr) { | ||
460 | if (inet->inet_daddr != saddr) | ||
461 | return -1; | ||
462 | score += 4; | ||
463 | } | ||
464 | |||
465 | if (inet->inet_dport) { | ||
466 | if (inet->inet_dport != sport) | ||
467 | return -1; | ||
468 | score += 4; | ||
469 | } | ||
470 | |||
471 | if (sk->sk_bound_dev_if) { | ||
472 | if (sk->sk_bound_dev_if != dif) | ||
473 | return -1; | ||
474 | score += 4; | ||
475 | } | ||
476 | |||
477 | if (sk->sk_incoming_cpu == raw_smp_processor_id()) | ||
478 | score++; | ||
479 | |||
480 | return score; | ||
481 | } | ||
482 | |||
483 | static u32 udp_ehashfn(const struct net *net, const __be32 laddr, | 437 | static u32 udp_ehashfn(const struct net *net, const __be32 laddr, |
484 | const __u16 lport, const __be32 faddr, | 438 | const __u16 lport, const __be32 faddr, |
485 | const __be16 fport) | 439 | const __be16 fport) |
@@ -492,11 +446,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, | |||
492 | udp_ehash_secret + net_hash_mix(net)); | 446 | udp_ehash_secret + net_hash_mix(net)); |
493 | } | 447 | } |
494 | 448 | ||
495 | /* called with read_rcu_lock() */ | 449 | /* called with rcu_read_lock() */ |
496 | static struct sock *udp4_lib_lookup2(struct net *net, | 450 | static struct sock *udp4_lib_lookup2(struct net *net, |
497 | __be32 saddr, __be16 sport, | 451 | __be32 saddr, __be16 sport, |
498 | __be32 daddr, unsigned int hnum, int dif, | 452 | __be32 daddr, unsigned int hnum, int dif, |
499 | struct udp_hslot *hslot2, unsigned int slot2, | 453 | struct udp_hslot *hslot2, |
500 | struct sk_buff *skb) | 454 | struct sk_buff *skb) |
501 | { | 455 | { |
502 | struct sock *sk, *result; | 456 | struct sock *sk, *result; |
@@ -506,7 +460,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, | |||
506 | result = NULL; | 460 | result = NULL; |
507 | badness = 0; | 461 | badness = 0; |
508 | udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { | 462 | udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { |
509 | score = compute_score2(sk, net, saddr, sport, | 463 | score = compute_score(sk, net, saddr, sport, |
510 | daddr, hnum, dif); | 464 | daddr, hnum, dif); |
511 | if (score > badness) { | 465 | if (score > badness) { |
512 | reuseport = sk->sk_reuseport; | 466 | reuseport = sk->sk_reuseport; |
@@ -554,17 +508,22 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | |||
554 | 508 | ||
555 | result = udp4_lib_lookup2(net, saddr, sport, | 509 | result = udp4_lib_lookup2(net, saddr, sport, |
556 | daddr, hnum, dif, | 510 | daddr, hnum, dif, |
557 | hslot2, slot2, skb); | 511 | hslot2, skb); |
558 | if (!result) { | 512 | if (!result) { |
513 | unsigned int old_slot2 = slot2; | ||
559 | hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); | 514 | hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); |
560 | slot2 = hash2 & udptable->mask; | 515 | slot2 = hash2 & udptable->mask; |
516 | /* avoid searching the same slot again. */ | ||
517 | if (unlikely(slot2 == old_slot2)) | ||
518 | return result; | ||
519 | |||
561 | hslot2 = &udptable->hash2[slot2]; | 520 | hslot2 = &udptable->hash2[slot2]; |
562 | if (hslot->count < hslot2->count) | 521 | if (hslot->count < hslot2->count) |
563 | goto begin; | 522 | goto begin; |
564 | 523 | ||
565 | result = udp4_lib_lookup2(net, saddr, sport, | 524 | result = udp4_lib_lookup2(net, saddr, sport, |
566 | htonl(INADDR_ANY), hnum, dif, | 525 | daddr, hnum, dif, |
567 | hslot2, slot2, skb); | 526 | hslot2, skb); |
568 | } | 527 | } |
569 | return result; | 528 | return result; |
570 | } | 529 | } |
@@ -572,8 +531,8 @@ begin: | |||
572 | result = NULL; | 531 | result = NULL; |
573 | badness = 0; | 532 | badness = 0; |
574 | sk_for_each_rcu(sk, &hslot->head) { | 533 | sk_for_each_rcu(sk, &hslot->head) { |
575 | score = compute_score(sk, net, saddr, hnum, sport, | 534 | score = compute_score(sk, net, saddr, sport, |
576 | daddr, dport, dif); | 535 | daddr, hnum, dif); |
577 | if (score > badness) { | 536 | if (score > badness) { |
578 | reuseport = sk->sk_reuseport; | 537 | reuseport = sk->sk_reuseport; |
579 | if (reuseport) { | 538 | if (reuseport) { |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f421c9f23c5b..005dc82c2138 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk) | |||
115 | udp_lib_rehash(sk, new_hash); | 115 | udp_lib_rehash(sk, new_hash); |
116 | } | 116 | } |
117 | 117 | ||
118 | static inline int compute_score(struct sock *sk, struct net *net, | 118 | static int compute_score(struct sock *sk, struct net *net, |
119 | unsigned short hnum, | 119 | const struct in6_addr *saddr, __be16 sport, |
120 | const struct in6_addr *saddr, __be16 sport, | 120 | const struct in6_addr *daddr, unsigned short hnum, |
121 | const struct in6_addr *daddr, __be16 dport, | 121 | int dif) |
122 | int dif) | ||
123 | { | 122 | { |
124 | int score; | 123 | int score; |
125 | struct inet_sock *inet; | 124 | struct inet_sock *inet; |
@@ -162,54 +161,11 @@ static inline int compute_score(struct sock *sk, struct net *net, | |||
162 | return score; | 161 | return score; |
163 | } | 162 | } |
164 | 163 | ||
165 | static inline int compute_score2(struct sock *sk, struct net *net, | 164 | /* called with rcu_read_lock() */ |
166 | const struct in6_addr *saddr, __be16 sport, | ||
167 | const struct in6_addr *daddr, | ||
168 | unsigned short hnum, int dif) | ||
169 | { | ||
170 | int score; | ||
171 | struct inet_sock *inet; | ||
172 | |||
173 | if (!net_eq(sock_net(sk), net) || | ||
174 | udp_sk(sk)->udp_port_hash != hnum || | ||
175 | sk->sk_family != PF_INET6) | ||
176 | return -1; | ||
177 | |||
178 | if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) | ||
179 | return -1; | ||
180 | |||
181 | score = 0; | ||
182 | inet = inet_sk(sk); | ||
183 | |||
184 | if (inet->inet_dport) { | ||
185 | if (inet->inet_dport != sport) | ||
186 | return -1; | ||
187 | score++; | ||
188 | } | ||
189 | |||
190 | if (!ipv6_addr_any(&sk->sk_v6_daddr)) { | ||
191 | if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) | ||
192 | return -1; | ||
193 | score++; | ||
194 | } | ||
195 | |||
196 | if (sk->sk_bound_dev_if) { | ||
197 | if (sk->sk_bound_dev_if != dif) | ||
198 | return -1; | ||
199 | score++; | ||
200 | } | ||
201 | |||
202 | if (sk->sk_incoming_cpu == raw_smp_processor_id()) | ||
203 | score++; | ||
204 | |||
205 | return score; | ||
206 | } | ||
207 | |||
208 | /* called with read_rcu_lock() */ | ||
209 | static struct sock *udp6_lib_lookup2(struct net *net, | 165 | static struct sock *udp6_lib_lookup2(struct net *net, |
210 | const struct in6_addr *saddr, __be16 sport, | 166 | const struct in6_addr *saddr, __be16 sport, |
211 | const struct in6_addr *daddr, unsigned int hnum, int dif, | 167 | const struct in6_addr *daddr, unsigned int hnum, int dif, |
212 | struct udp_hslot *hslot2, unsigned int slot2, | 168 | struct udp_hslot *hslot2, |
213 | struct sk_buff *skb) | 169 | struct sk_buff *skb) |
214 | { | 170 | { |
215 | struct sock *sk, *result; | 171 | struct sock *sk, *result; |
@@ -219,7 +175,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, | |||
219 | result = NULL; | 175 | result = NULL; |
220 | badness = -1; | 176 | badness = -1; |
221 | udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { | 177 | udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { |
222 | score = compute_score2(sk, net, saddr, sport, | 178 | score = compute_score(sk, net, saddr, sport, |
223 | daddr, hnum, dif); | 179 | daddr, hnum, dif); |
224 | if (score > badness) { | 180 | if (score > badness) { |
225 | reuseport = sk->sk_reuseport; | 181 | reuseport = sk->sk_reuseport; |
@@ -268,17 +224,22 @@ struct sock *__udp6_lib_lookup(struct net *net, | |||
268 | 224 | ||
269 | result = udp6_lib_lookup2(net, saddr, sport, | 225 | result = udp6_lib_lookup2(net, saddr, sport, |
270 | daddr, hnum, dif, | 226 | daddr, hnum, dif, |
271 | hslot2, slot2, skb); | 227 | hslot2, skb); |
272 | if (!result) { | 228 | if (!result) { |
229 | unsigned int old_slot2 = slot2; | ||
273 | hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); | 230 | hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); |
274 | slot2 = hash2 & udptable->mask; | 231 | slot2 = hash2 & udptable->mask; |
232 | /* avoid searching the same slot again. */ | ||
233 | if (unlikely(slot2 == old_slot2)) | ||
234 | return result; | ||
235 | |||
275 | hslot2 = &udptable->hash2[slot2]; | 236 | hslot2 = &udptable->hash2[slot2]; |
276 | if (hslot->count < hslot2->count) | 237 | if (hslot->count < hslot2->count) |
277 | goto begin; | 238 | goto begin; |
278 | 239 | ||
279 | result = udp6_lib_lookup2(net, saddr, sport, | 240 | result = udp6_lib_lookup2(net, saddr, sport, |
280 | &in6addr_any, hnum, dif, | 241 | daddr, hnum, dif, |
281 | hslot2, slot2, skb); | 242 | hslot2, skb); |
282 | } | 243 | } |
283 | return result; | 244 | return result; |
284 | } | 245 | } |
@@ -286,7 +247,7 @@ begin: | |||
286 | result = NULL; | 247 | result = NULL; |
287 | badness = -1; | 248 | badness = -1; |
288 | sk_for_each_rcu(sk, &hslot->head) { | 249 | sk_for_each_rcu(sk, &hslot->head) { |
289 | score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); | 250 | score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); |
290 | if (score > badness) { | 251 | if (score > badness) { |
291 | reuseport = sk->sk_reuseport; | 252 | reuseport = sk->sk_reuseport; |
292 | if (reuseport) { | 253 | if (reuseport) { |