aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6/udp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/udp.c')
-rw-r--r--net/ipv6/udp.c265
1 files changed, 212 insertions, 53 deletions
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index cf538ed5ef6a..69ebdbe78c47 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -53,7 +53,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
53{ 53{
54 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; 54 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
55 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); 55 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
56 __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; 56 __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr;
57 __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); 57 __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
58 int sk_ipv6only = ipv6_only_sock(sk); 58 int sk_ipv6only = ipv6_only_sock(sk);
59 int sk2_ipv6only = inet_v6_ipv6only(sk2); 59 int sk2_ipv6only = inet_v6_ipv6only(sk2);
@@ -63,8 +63,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
63 /* if both are mapped, treat as IPv4 */ 63 /* if both are mapped, treat as IPv4 */
64 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) 64 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
65 return (!sk2_ipv6only && 65 return (!sk2_ipv6only &&
66 (!sk_rcv_saddr || !sk2_rcv_saddr || 66 (!sk1_rcv_saddr || !sk2_rcv_saddr ||
67 sk_rcv_saddr == sk2_rcv_saddr)); 67 sk1_rcv_saddr == sk2_rcv_saddr));
68 68
69 if (addr_type2 == IPV6_ADDR_ANY && 69 if (addr_type2 == IPV6_ADDR_ANY &&
70 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) 70 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
@@ -81,9 +81,33 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
81 return 0; 81 return 0;
82} 82}
83 83
84static unsigned int udp6_portaddr_hash(struct net *net,
85 const struct in6_addr *addr6,
86 unsigned int port)
87{
88 unsigned int hash, mix = net_hash_mix(net);
89
90 if (ipv6_addr_any(addr6))
91 hash = jhash_1word(0, mix);
92 else if (ipv6_addr_v4mapped(addr6))
93 hash = jhash_1word(addr6->s6_addr32[3], mix);
94 else
95 hash = jhash2(addr6->s6_addr32, 4, mix);
96
97 return hash ^ port;
98}
99
100
84int udp_v6_get_port(struct sock *sk, unsigned short snum) 101int udp_v6_get_port(struct sock *sk, unsigned short snum)
85{ 102{
86 return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); 103 unsigned int hash2_nulladdr =
104 udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
105 unsigned int hash2_partial =
106 udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);
107
108 /* precompute partial secondary hash */
109 udp_sk(sk)->udp_portaddr_hash = hash2_partial;
110 return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
87} 111}
88 112
89static inline int compute_score(struct sock *sk, struct net *net, 113static inline int compute_score(struct sock *sk, struct net *net,
@@ -94,14 +118,14 @@ static inline int compute_score(struct sock *sk, struct net *net,
94{ 118{
95 int score = -1; 119 int score = -1;
96 120
97 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && 121 if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
98 sk->sk_family == PF_INET6) { 122 sk->sk_family == PF_INET6) {
99 struct ipv6_pinfo *np = inet6_sk(sk); 123 struct ipv6_pinfo *np = inet6_sk(sk);
100 struct inet_sock *inet = inet_sk(sk); 124 struct inet_sock *inet = inet_sk(sk);
101 125
102 score = 0; 126 score = 0;
103 if (inet->dport) { 127 if (inet->inet_dport) {
104 if (inet->dport != sport) 128 if (inet->inet_dport != sport)
105 return -1; 129 return -1;
106 score++; 130 score++;
107 } 131 }
@@ -124,6 +148,86 @@ static inline int compute_score(struct sock *sk, struct net *net,
124 return score; 148 return score;
125} 149}
126 150
151#define SCORE2_MAX (1 + 1 + 1)
152static inline int compute_score2(struct sock *sk, struct net *net,
153 const struct in6_addr *saddr, __be16 sport,
154 const struct in6_addr *daddr, unsigned short hnum,
155 int dif)
156{
157 int score = -1;
158
159 if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
160 sk->sk_family == PF_INET6) {
161 struct ipv6_pinfo *np = inet6_sk(sk);
162 struct inet_sock *inet = inet_sk(sk);
163
164 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
165 return -1;
166 score = 0;
167 if (inet->inet_dport) {
168 if (inet->inet_dport != sport)
169 return -1;
170 score++;
171 }
172 if (!ipv6_addr_any(&np->daddr)) {
173 if (!ipv6_addr_equal(&np->daddr, saddr))
174 return -1;
175 score++;
176 }
177 if (sk->sk_bound_dev_if) {
178 if (sk->sk_bound_dev_if != dif)
179 return -1;
180 score++;
181 }
182 }
183 return score;
184}
185
186
187/* called with read_rcu_lock() */
188static struct sock *udp6_lib_lookup2(struct net *net,
189 const struct in6_addr *saddr, __be16 sport,
190 const struct in6_addr *daddr, unsigned int hnum, int dif,
191 struct udp_hslot *hslot2, unsigned int slot2)
192{
193 struct sock *sk, *result;
194 struct hlist_nulls_node *node;
195 int score, badness;
196
197begin:
198 result = NULL;
199 badness = -1;
200 udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
201 score = compute_score2(sk, net, saddr, sport,
202 daddr, hnum, dif);
203 if (score > badness) {
204 result = sk;
205 badness = score;
206 if (score == SCORE2_MAX)
207 goto exact_match;
208 }
209 }
210 /*
211 * if the nulls value we got at the end of this lookup is
212 * not the expected one, we must restart lookup.
213 * We probably met an item that was moved to another chain.
214 */
215 if (get_nulls_value(node) != slot2)
216 goto begin;
217
218 if (result) {
219exact_match:
220 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
221 result = NULL;
222 else if (unlikely(compute_score2(result, net, saddr, sport,
223 daddr, hnum, dif) < badness)) {
224 sock_put(result);
225 goto begin;
226 }
227 }
228 return result;
229}
230
127static struct sock *__udp6_lib_lookup(struct net *net, 231static struct sock *__udp6_lib_lookup(struct net *net,
128 struct in6_addr *saddr, __be16 sport, 232 struct in6_addr *saddr, __be16 sport,
129 struct in6_addr *daddr, __be16 dport, 233 struct in6_addr *daddr, __be16 dport,
@@ -132,11 +236,35 @@ static struct sock *__udp6_lib_lookup(struct net *net,
132 struct sock *sk, *result; 236 struct sock *sk, *result;
133 struct hlist_nulls_node *node; 237 struct hlist_nulls_node *node;
134 unsigned short hnum = ntohs(dport); 238 unsigned short hnum = ntohs(dport);
135 unsigned int hash = udp_hashfn(net, hnum); 239 unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
136 struct udp_hslot *hslot = &udptable->hash[hash]; 240 struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
137 int score, badness; 241 int score, badness;
138 242
139 rcu_read_lock(); 243 rcu_read_lock();
244 if (hslot->count > 10) {
245 hash2 = udp6_portaddr_hash(net, daddr, hnum);
246 slot2 = hash2 & udptable->mask;
247 hslot2 = &udptable->hash2[slot2];
248 if (hslot->count < hslot2->count)
249 goto begin;
250
251 result = udp6_lib_lookup2(net, saddr, sport,
252 daddr, hnum, dif,
253 hslot2, slot2);
254 if (!result) {
255 hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
256 slot2 = hash2 & udptable->mask;
257 hslot2 = &udptable->hash2[slot2];
258 if (hslot->count < hslot2->count)
259 goto begin;
260
261 result = udp6_lib_lookup2(net, &in6addr_any, sport,
262 daddr, hnum, dif,
263 hslot2, slot2);
264 }
265 rcu_read_unlock();
266 return result;
267 }
140begin: 268begin:
141 result = NULL; 269 result = NULL;
142 badness = -1; 270 badness = -1;
@@ -152,7 +280,7 @@ begin:
152 * not the expected one, we must restart lookup. 280 * not the expected one, we must restart lookup.
153 * We probably met an item that was moved to another chain. 281 * We probably met an item that was moved to another chain.
154 */ 282 */
155 if (get_nulls_value(node) != hash) 283 if (get_nulls_value(node) != slot)
156 goto begin; 284 goto begin;
157 285
158 if (result) { 286 if (result) {
@@ -252,7 +380,7 @@ try_again:
252 UDP_MIB_INDATAGRAMS, is_udplite); 380 UDP_MIB_INDATAGRAMS, is_udplite);
253 } 381 }
254 382
255 sock_recv_timestamp(msg, sk, skb); 383 sock_recv_ts_and_drops(msg, sk, skb);
256 384
257 /* Copy the address. */ 385 /* Copy the address. */
258 if (msg->msg_name) { 386 if (msg->msg_name) {
@@ -265,8 +393,8 @@ try_again:
265 sin6->sin6_scope_id = 0; 393 sin6->sin6_scope_id = 0;
266 394
267 if (is_udp4) 395 if (is_udp4)
268 ipv6_addr_set(&sin6->sin6_addr, 0, 0, 396 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
269 htonl(0xffff), ip_hdr(skb)->saddr); 397 &sin6->sin6_addr);
270 else { 398 else {
271 ipv6_addr_copy(&sin6->sin6_addr, 399 ipv6_addr_copy(&sin6->sin6_addr,
272 &ipv6_hdr(skb)->saddr); 400 &ipv6_hdr(skb)->saddr);
@@ -383,18 +511,18 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
383 goto drop; 511 goto drop;
384 } 512 }
385 513
386 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { 514 if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
387 /* Note that an ENOMEM error is charged twice */ 515 /* Note that an ENOMEM error is charged twice */
388 if (rc == -ENOMEM) { 516 if (rc == -ENOMEM)
389 UDP6_INC_STATS_BH(sock_net(sk), 517 UDP6_INC_STATS_BH(sock_net(sk),
390 UDP_MIB_RCVBUFERRORS, is_udplite); 518 UDP_MIB_RCVBUFERRORS, is_udplite);
391 atomic_inc(&sk->sk_drops); 519 goto drop_no_sk_drops_inc;
392 }
393 goto drop;
394 } 520 }
395 521
396 return 0; 522 return 0;
397drop: 523drop:
524 atomic_inc(&sk->sk_drops);
525drop_no_sk_drops_inc:
398 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 526 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
399 kfree_skb(skb); 527 kfree_skb(skb);
400 return -1; 528 return -1;
@@ -415,10 +543,11 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
415 if (!net_eq(sock_net(s), net)) 543 if (!net_eq(sock_net(s), net))
416 continue; 544 continue;
417 545
418 if (s->sk_hash == num && s->sk_family == PF_INET6) { 546 if (udp_sk(s)->udp_port_hash == num &&
547 s->sk_family == PF_INET6) {
419 struct ipv6_pinfo *np = inet6_sk(s); 548 struct ipv6_pinfo *np = inet6_sk(s);
420 if (inet->dport) { 549 if (inet->inet_dport) {
421 if (inet->dport != rmt_port) 550 if (inet->inet_dport != rmt_port)
422 continue; 551 continue;
423 } 552 }
424 if (!ipv6_addr_any(&np->daddr) && 553 if (!ipv6_addr_any(&np->daddr) &&
@@ -440,6 +569,33 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
440 return NULL; 569 return NULL;
441} 570}
442 571
572static void flush_stack(struct sock **stack, unsigned int count,
573 struct sk_buff *skb, unsigned int final)
574{
575 unsigned int i;
576 struct sock *sk;
577 struct sk_buff *skb1;
578
579 for (i = 0; i < count; i++) {
580 skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
581
582 sk = stack[i];
583 if (skb1) {
584 bh_lock_sock(sk);
585 if (!sock_owned_by_user(sk))
586 udpv6_queue_rcv_skb(sk, skb1);
587 else
588 sk_add_backlog(sk, skb1);
589 bh_unlock_sock(sk);
590 } else {
591 atomic_inc(&sk->sk_drops);
592 UDP6_INC_STATS_BH(sock_net(sk),
593 UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
594 UDP6_INC_STATS_BH(sock_net(sk),
595 UDP_MIB_INERRORS, IS_UDPLITE(sk));
596 }
597 }
598}
443/* 599/*
444 * Note: called only from the BH handler context, 600 * Note: called only from the BH handler context,
445 * so we don't need to lock the hashes. 601 * so we don't need to lock the hashes.
@@ -448,41 +604,43 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
448 struct in6_addr *saddr, struct in6_addr *daddr, 604 struct in6_addr *saddr, struct in6_addr *daddr,
449 struct udp_table *udptable) 605 struct udp_table *udptable)
450{ 606{
451 struct sock *sk, *sk2; 607 struct sock *sk, *stack[256 / sizeof(struct sock *)];
452 const struct udphdr *uh = udp_hdr(skb); 608 const struct udphdr *uh = udp_hdr(skb);
453 struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; 609 struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
454 int dif; 610 int dif;
611 unsigned int i, count = 0;
455 612
456 spin_lock(&hslot->lock); 613 spin_lock(&hslot->lock);
457 sk = sk_nulls_head(&hslot->head); 614 sk = sk_nulls_head(&hslot->head);
458 dif = inet6_iif(skb); 615 dif = inet6_iif(skb);
459 sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); 616 sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
460 if (!sk) { 617 while (sk) {
461 kfree_skb(skb); 618 stack[count++] = sk;
462 goto out; 619 sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
463 } 620 uh->source, saddr, dif);
464 621 if (unlikely(count == ARRAY_SIZE(stack))) {
465 sk2 = sk; 622 if (!sk)
466 while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr, 623 break;
467 uh->source, saddr, dif))) { 624 flush_stack(stack, count, skb, ~0);
468 struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); 625 count = 0;
469 if (buff) {
470 bh_lock_sock(sk2);
471 if (!sock_owned_by_user(sk2))
472 udpv6_queue_rcv_skb(sk2, buff);
473 else
474 sk_add_backlog(sk2, buff);
475 bh_unlock_sock(sk2);
476 } 626 }
477 } 627 }
478 bh_lock_sock(sk); 628 /*
479 if (!sock_owned_by_user(sk)) 629 * before releasing the lock, we must take reference on sockets
480 udpv6_queue_rcv_skb(sk, skb); 630 */
481 else 631 for (i = 0; i < count; i++)
482 sk_add_backlog(sk, skb); 632 sock_hold(stack[i]);
483 bh_unlock_sock(sk); 633
484out:
485 spin_unlock(&hslot->lock); 634 spin_unlock(&hslot->lock);
635
636 if (count) {
637 flush_stack(stack, count, skb, count - 1);
638
639 for (i = 0; i < count; i++)
640 sock_put(stack[i]);
641 } else {
642 kfree_skb(skb);
643 }
486 return 0; 644 return 0;
487} 645}
488 646
@@ -792,7 +950,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
792 if (ipv6_addr_v4mapped(daddr)) { 950 if (ipv6_addr_v4mapped(daddr)) {
793 struct sockaddr_in sin; 951 struct sockaddr_in sin;
794 sin.sin_family = AF_INET; 952 sin.sin_family = AF_INET;
795 sin.sin_port = sin6 ? sin6->sin6_port : inet->dport; 953 sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;
796 sin.sin_addr.s_addr = daddr->s6_addr32[3]; 954 sin.sin_addr.s_addr = daddr->s6_addr32[3];
797 msg->msg_name = &sin; 955 msg->msg_name = &sin;
798 msg->msg_namelen = sizeof(sin); 956 msg->msg_namelen = sizeof(sin);
@@ -865,7 +1023,7 @@ do_udp_sendmsg:
865 if (sk->sk_state != TCP_ESTABLISHED) 1023 if (sk->sk_state != TCP_ESTABLISHED)
866 return -EDESTADDRREQ; 1024 return -EDESTADDRREQ;
867 1025
868 fl.fl_ip_dport = inet->dport; 1026 fl.fl_ip_dport = inet->inet_dport;
869 daddr = &np->daddr; 1027 daddr = &np->daddr;
870 fl.fl6_flowlabel = np->flow_label; 1028 fl.fl6_flowlabel = np->flow_label;
871 connected = 1; 1029 connected = 1;
@@ -877,6 +1035,8 @@ do_udp_sendmsg:
877 if (!fl.oif) 1035 if (!fl.oif)
878 fl.oif = np->sticky_pktinfo.ipi6_ifindex; 1036 fl.oif = np->sticky_pktinfo.ipi6_ifindex;
879 1037
1038 fl.mark = sk->sk_mark;
1039
880 if (msg->msg_controllen) { 1040 if (msg->msg_controllen) {
881 opt = &opt_space; 1041 opt = &opt_space;
882 memset(opt, 0, sizeof(struct ipv6_txoptions)); 1042 memset(opt, 0, sizeof(struct ipv6_txoptions));
@@ -909,7 +1069,7 @@ do_udp_sendmsg:
909 fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ 1069 fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
910 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) 1070 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
911 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 1071 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
912 fl.fl_ip_sport = inet->sport; 1072 fl.fl_ip_sport = inet->inet_sport;
913 1073
914 /* merge ip6_build_xmit from ip6_output */ 1074 /* merge ip6_build_xmit from ip6_output */
915 if (opt && opt->srcrt) { 1075 if (opt && opt->srcrt) {
@@ -1190,10 +1350,10 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
1190 1350
1191 dest = &np->daddr; 1351 dest = &np->daddr;
1192 src = &np->rcv_saddr; 1352 src = &np->rcv_saddr;
1193 destp = ntohs(inet->dport); 1353 destp = ntohs(inet->inet_dport);
1194 srcp = ntohs(inet->sport); 1354 srcp = ntohs(inet->inet_sport);
1195 seq_printf(seq, 1355 seq_printf(seq,
1196 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1356 "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1197 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", 1357 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
1198 bucket, 1358 bucket,
1199 src->s6_addr32[0], src->s6_addr32[1], 1359 src->s6_addr32[0], src->s6_addr32[1],
@@ -1282,7 +1442,6 @@ static struct inet_protosw udpv6_protosw = {
1282 .protocol = IPPROTO_UDP, 1442 .protocol = IPPROTO_UDP,
1283 .prot = &udpv6_prot, 1443 .prot = &udpv6_prot,
1284 .ops = &inet6_dgram_ops, 1444 .ops = &inet6_dgram_ops,
1285 .capability =-1,
1286 .no_check = UDP_CSUM_DEFAULT, 1445 .no_check = UDP_CSUM_DEFAULT,
1287 .flags = INET_PROTOSW_PERMANENT, 1446 .flags = INET_PROTOSW_PERMANENT,
1288}; 1447};