aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSu, Xuemin <suxm@chinanetcenter.com>2016-06-12 23:02:50 -0400
committerDavid S. Miller <davem@davemloft.net>2016-06-14 17:23:09 -0400
commitd1e37288c9146dccff830e3253e403af8705b51f (patch)
tree573ea16d2781cf3754df56de05a306f8fff17c88
parent6c0d54f1897d229748d4f41ef919078db6db2123 (diff)
udp reuseport: fix packet of same flow hashed to different socket
There is a corner case in which udp packets belonging to a same flow are hashed to different socket when hslot->count changes from 10 to 11: 1) When hslot->count <= 10, __udp_lib_lookup() searches udp_table->hash, and always passes 'daddr' to udp_ehashfn(). 2) When hslot->count > 10, __udp_lib_lookup() searches udp_table->hash2, but may pass 'INADDR_ANY' to udp_ehashfn() if the sockets are bound to INADDR_ANY instead of some specific addr. That means when hslot->count changes from 10 to 11, the hash calculated by udp_ehashfn() is also changed, and the udp packets belonging to a same flow will be hashed to different socket. This is easily reproduced: 1) Create 10 udp sockets and bind all of them to 0.0.0.0:40000. 2) From the same host send udp packets to 127.0.0.1:40000, record the socket index which receives the packets. 3) Create 1 more udp socket and bind it to 0.0.0.0:44096. The number 44096 is 40000 + UDP_HASH_SIZE(4096), this makes the new socket put into the same hslot as the aformentioned 10 sockets, and makes the hslot->count change from 10 to 11. 4) From the same host send udp packets to 127.0.0.1:40000, and the socket index which receives the packets will be different from the one received in step 2. This should not happen as the socket bound to 0.0.0.0:44096 should not change the behavior of the sockets bound to 0.0.0.0:40000. It's the same case for IPv6, and this patch also fixes that. Signed-off-by: Su, Xuemin <suxm@chinanetcenter.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/udp.c73
-rw-r--r--net/ipv6/udp.c71
2 files changed, 32 insertions, 112 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ba0d8b8b7690..ca5e8ea29538 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -391,9 +391,9 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
391 return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); 391 return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
392} 392}
393 393
394static inline int compute_score(struct sock *sk, struct net *net, 394static int compute_score(struct sock *sk, struct net *net,
395 __be32 saddr, unsigned short hnum, __be16 sport, 395 __be32 saddr, __be16 sport,
396 __be32 daddr, __be16 dport, int dif) 396 __be32 daddr, unsigned short hnum, int dif)
397{ 397{
398 int score; 398 int score;
399 struct inet_sock *inet; 399 struct inet_sock *inet;
@@ -434,52 +434,6 @@ static inline int compute_score(struct sock *sk, struct net *net,
434 return score; 434 return score;
435} 435}
436 436
437/*
438 * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num)
439 */
440static inline int compute_score2(struct sock *sk, struct net *net,
441 __be32 saddr, __be16 sport,
442 __be32 daddr, unsigned int hnum, int dif)
443{
444 int score;
445 struct inet_sock *inet;
446
447 if (!net_eq(sock_net(sk), net) ||
448 ipv6_only_sock(sk))
449 return -1;
450
451 inet = inet_sk(sk);
452
453 if (inet->inet_rcv_saddr != daddr ||
454 inet->inet_num != hnum)
455 return -1;
456
457 score = (sk->sk_family == PF_INET) ? 2 : 1;
458
459 if (inet->inet_daddr) {
460 if (inet->inet_daddr != saddr)
461 return -1;
462 score += 4;
463 }
464
465 if (inet->inet_dport) {
466 if (inet->inet_dport != sport)
467 return -1;
468 score += 4;
469 }
470
471 if (sk->sk_bound_dev_if) {
472 if (sk->sk_bound_dev_if != dif)
473 return -1;
474 score += 4;
475 }
476
477 if (sk->sk_incoming_cpu == raw_smp_processor_id())
478 score++;
479
480 return score;
481}
482
483static u32 udp_ehashfn(const struct net *net, const __be32 laddr, 437static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
484 const __u16 lport, const __be32 faddr, 438 const __u16 lport, const __be32 faddr,
485 const __be16 fport) 439 const __be16 fport)
@@ -492,11 +446,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
492 udp_ehash_secret + net_hash_mix(net)); 446 udp_ehash_secret + net_hash_mix(net));
493} 447}
494 448
495/* called with read_rcu_lock() */ 449/* called with rcu_read_lock() */
496static struct sock *udp4_lib_lookup2(struct net *net, 450static struct sock *udp4_lib_lookup2(struct net *net,
497 __be32 saddr, __be16 sport, 451 __be32 saddr, __be16 sport,
498 __be32 daddr, unsigned int hnum, int dif, 452 __be32 daddr, unsigned int hnum, int dif,
499 struct udp_hslot *hslot2, unsigned int slot2, 453 struct udp_hslot *hslot2,
500 struct sk_buff *skb) 454 struct sk_buff *skb)
501{ 455{
502 struct sock *sk, *result; 456 struct sock *sk, *result;
@@ -506,7 +460,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
506 result = NULL; 460 result = NULL;
507 badness = 0; 461 badness = 0;
508 udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { 462 udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
509 score = compute_score2(sk, net, saddr, sport, 463 score = compute_score(sk, net, saddr, sport,
510 daddr, hnum, dif); 464 daddr, hnum, dif);
511 if (score > badness) { 465 if (score > badness) {
512 reuseport = sk->sk_reuseport; 466 reuseport = sk->sk_reuseport;
@@ -554,17 +508,22 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
554 508
555 result = udp4_lib_lookup2(net, saddr, sport, 509 result = udp4_lib_lookup2(net, saddr, sport,
556 daddr, hnum, dif, 510 daddr, hnum, dif,
557 hslot2, slot2, skb); 511 hslot2, skb);
558 if (!result) { 512 if (!result) {
513 unsigned int old_slot2 = slot2;
559 hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); 514 hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
560 slot2 = hash2 & udptable->mask; 515 slot2 = hash2 & udptable->mask;
516 /* avoid searching the same slot again. */
517 if (unlikely(slot2 == old_slot2))
518 return result;
519
561 hslot2 = &udptable->hash2[slot2]; 520 hslot2 = &udptable->hash2[slot2];
562 if (hslot->count < hslot2->count) 521 if (hslot->count < hslot2->count)
563 goto begin; 522 goto begin;
564 523
565 result = udp4_lib_lookup2(net, saddr, sport, 524 result = udp4_lib_lookup2(net, saddr, sport,
566 htonl(INADDR_ANY), hnum, dif, 525 daddr, hnum, dif,
567 hslot2, slot2, skb); 526 hslot2, skb);
568 } 527 }
569 return result; 528 return result;
570 } 529 }
@@ -572,8 +531,8 @@ begin:
572 result = NULL; 531 result = NULL;
573 badness = 0; 532 badness = 0;
574 sk_for_each_rcu(sk, &hslot->head) { 533 sk_for_each_rcu(sk, &hslot->head) {
575 score = compute_score(sk, net, saddr, hnum, sport, 534 score = compute_score(sk, net, saddr, sport,
576 daddr, dport, dif); 535 daddr, hnum, dif);
577 if (score > badness) { 536 if (score > badness) {
578 reuseport = sk->sk_reuseport; 537 reuseport = sk->sk_reuseport;
579 if (reuseport) { 538 if (reuseport) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f421c9f23c5b..005dc82c2138 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk)
115 udp_lib_rehash(sk, new_hash); 115 udp_lib_rehash(sk, new_hash);
116} 116}
117 117
118static inline int compute_score(struct sock *sk, struct net *net, 118static int compute_score(struct sock *sk, struct net *net,
119 unsigned short hnum, 119 const struct in6_addr *saddr, __be16 sport,
120 const struct in6_addr *saddr, __be16 sport, 120 const struct in6_addr *daddr, unsigned short hnum,
121 const struct in6_addr *daddr, __be16 dport, 121 int dif)
122 int dif)
123{ 122{
124 int score; 123 int score;
125 struct inet_sock *inet; 124 struct inet_sock *inet;
@@ -162,54 +161,11 @@ static inline int compute_score(struct sock *sk, struct net *net,
162 return score; 161 return score;
163} 162}
164 163
165static inline int compute_score2(struct sock *sk, struct net *net, 164/* called with rcu_read_lock() */
166 const struct in6_addr *saddr, __be16 sport,
167 const struct in6_addr *daddr,
168 unsigned short hnum, int dif)
169{
170 int score;
171 struct inet_sock *inet;
172
173 if (!net_eq(sock_net(sk), net) ||
174 udp_sk(sk)->udp_port_hash != hnum ||
175 sk->sk_family != PF_INET6)
176 return -1;
177
178 if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
179 return -1;
180
181 score = 0;
182 inet = inet_sk(sk);
183
184 if (inet->inet_dport) {
185 if (inet->inet_dport != sport)
186 return -1;
187 score++;
188 }
189
190 if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
191 if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
192 return -1;
193 score++;
194 }
195
196 if (sk->sk_bound_dev_if) {
197 if (sk->sk_bound_dev_if != dif)
198 return -1;
199 score++;
200 }
201
202 if (sk->sk_incoming_cpu == raw_smp_processor_id())
203 score++;
204
205 return score;
206}
207
208/* called with read_rcu_lock() */
209static struct sock *udp6_lib_lookup2(struct net *net, 165static struct sock *udp6_lib_lookup2(struct net *net,
210 const struct in6_addr *saddr, __be16 sport, 166 const struct in6_addr *saddr, __be16 sport,
211 const struct in6_addr *daddr, unsigned int hnum, int dif, 167 const struct in6_addr *daddr, unsigned int hnum, int dif,
212 struct udp_hslot *hslot2, unsigned int slot2, 168 struct udp_hslot *hslot2,
213 struct sk_buff *skb) 169 struct sk_buff *skb)
214{ 170{
215 struct sock *sk, *result; 171 struct sock *sk, *result;
@@ -219,7 +175,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
219 result = NULL; 175 result = NULL;
220 badness = -1; 176 badness = -1;
221 udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { 177 udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
222 score = compute_score2(sk, net, saddr, sport, 178 score = compute_score(sk, net, saddr, sport,
223 daddr, hnum, dif); 179 daddr, hnum, dif);
224 if (score > badness) { 180 if (score > badness) {
225 reuseport = sk->sk_reuseport; 181 reuseport = sk->sk_reuseport;
@@ -268,17 +224,22 @@ struct sock *__udp6_lib_lookup(struct net *net,
268 224
269 result = udp6_lib_lookup2(net, saddr, sport, 225 result = udp6_lib_lookup2(net, saddr, sport,
270 daddr, hnum, dif, 226 daddr, hnum, dif,
271 hslot2, slot2, skb); 227 hslot2, skb);
272 if (!result) { 228 if (!result) {
229 unsigned int old_slot2 = slot2;
273 hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); 230 hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
274 slot2 = hash2 & udptable->mask; 231 slot2 = hash2 & udptable->mask;
232 /* avoid searching the same slot again. */
233 if (unlikely(slot2 == old_slot2))
234 return result;
235
275 hslot2 = &udptable->hash2[slot2]; 236 hslot2 = &udptable->hash2[slot2];
276 if (hslot->count < hslot2->count) 237 if (hslot->count < hslot2->count)
277 goto begin; 238 goto begin;
278 239
279 result = udp6_lib_lookup2(net, saddr, sport, 240 result = udp6_lib_lookup2(net, saddr, sport,
280 &in6addr_any, hnum, dif, 241 daddr, hnum, dif,
281 hslot2, slot2, skb); 242 hslot2, skb);
282 } 243 }
283 return result; 244 return result;
284 } 245 }
@@ -286,7 +247,7 @@ begin:
286 result = NULL; 247 result = NULL;
287 badness = -1; 248 badness = -1;
288 sk_for_each_rcu(sk, &hslot->head) { 249 sk_for_each_rcu(sk, &hslot->head) {
289 score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); 250 score = compute_score(sk, net, saddr, sport, daddr, hnum, dif);
290 if (score > badness) { 251 if (score > badness) {
291 reuseport = sk->sk_reuseport; 252 reuseport = sk->sk_reuseport;
292 if (reuseport) { 253 if (reuseport) {