aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorJody McIntyre <scjody@modernduck.com>2006-01-05 08:03:40 -0500
committerJody McIntyre <scjody@modernduck.com>2006-01-05 08:03:40 -0500
commit0a75c23a009ff65f651532cecc16675d05f4de37 (patch)
treebdcd6158758fe1810f0ddddb80d2816779518688 /net/ipv6
parent34b8c399dc04c8e51f014b73458e654570698597 (diff)
parentdb9edfd7e339ca4113153d887e782dd05be5a9eb (diff)
Merge with http://kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Makefile3
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/af_inet6.c90
-rw-r--r--net/ipv6/ah6.c1
-rw-r--r--net/ipv6/esp6.c1
-rw-r--r--net/ipv6/exthdrs.c4
-rw-r--r--net/ipv6/inet6_connection_sock.c199
-rw-r--r--net/ipv6/inet6_hashtables.c183
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ipcomp6.c1
-rw-r--r--net/ipv6/ipv6_sockglue.c24
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c191
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c1
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c1
-rw-r--r--net/ipv6/netfilter/ip6t_esp.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c4
-rw-r--r--net/ipv6/raw.c16
-rw-r--r--net/ipv6/tcp_ipv6.c639
-rw-r--r--net/ipv6/udp.c16
21 files changed, 689 insertions, 694 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 6460eec834..9601fd7f9d 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -8,7 +8,8 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
8 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ 8 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
9 protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ 9 protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
10 exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ 10 exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
11 ip6_flowlabel.o ipv6_syms.o netfilter.o 11 ip6_flowlabel.o ipv6_syms.o netfilter.o \
12 inet6_connection_sock.o
12 13
13ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ 14ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
14 xfrm6_output.o 15 xfrm6_output.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a60585fd85..704fb73e6c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1195,7 +1195,7 @@ struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *
1195int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) 1195int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
1196{ 1196{
1197 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; 1197 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
1198 const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); 1198 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
1199 u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; 1199 u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
1200 u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); 1200 u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
1201 int sk_ipv6only = ipv6_only_sock(sk); 1201 int sk_ipv6only = ipv6_only_sock(sk);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d9546380fa..68afc53be6 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -167,6 +167,7 @@ lookup_protocol:
167 sk->sk_reuse = 1; 167 sk->sk_reuse = 1;
168 168
169 inet = inet_sk(sk); 169 inet = inet_sk(sk);
170 inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
170 171
171 if (SOCK_RAW == sock->type) { 172 if (SOCK_RAW == sock->type) {
172 inet->num = protocol; 173 inet->num = protocol;
@@ -389,6 +390,8 @@ int inet6_destroy_sock(struct sock *sk)
389 return 0; 390 return 0;
390} 391}
391 392
393EXPORT_SYMBOL_GPL(inet6_destroy_sock);
394
392/* 395/*
393 * This does both peername and sockname. 396 * This does both peername and sockname.
394 */ 397 */
@@ -431,7 +434,6 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
431int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 434int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
432{ 435{
433 struct sock *sk = sock->sk; 436 struct sock *sk = sock->sk;
434 int err = -EINVAL;
435 437
436 switch(cmd) 438 switch(cmd)
437 { 439 {
@@ -450,16 +452,15 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
450 case SIOCSIFDSTADDR: 452 case SIOCSIFDSTADDR:
451 return addrconf_set_dstaddr((void __user *) arg); 453 return addrconf_set_dstaddr((void __user *) arg);
452 default: 454 default:
453 if (!sk->sk_prot->ioctl || 455 if (!sk->sk_prot->ioctl)
454 (err = sk->sk_prot->ioctl(sk, cmd, arg)) == -ENOIOCTLCMD) 456 return -ENOIOCTLCMD;
455 return(dev_ioctl(cmd,(void __user *) arg)); 457 return sk->sk_prot->ioctl(sk, cmd, arg);
456 return err;
457 } 458 }
458 /*NOTREACHED*/ 459 /*NOTREACHED*/
459 return(0); 460 return(0);
460} 461}
461 462
462struct proto_ops inet6_stream_ops = { 463const struct proto_ops inet6_stream_ops = {
463 .family = PF_INET6, 464 .family = PF_INET6,
464 .owner = THIS_MODULE, 465 .owner = THIS_MODULE,
465 .release = inet6_release, 466 .release = inet6_release,
@@ -480,7 +481,7 @@ struct proto_ops inet6_stream_ops = {
480 .sendpage = tcp_sendpage 481 .sendpage = tcp_sendpage
481}; 482};
482 483
483struct proto_ops inet6_dgram_ops = { 484const struct proto_ops inet6_dgram_ops = {
484 .family = PF_INET6, 485 .family = PF_INET6,
485 .owner = THIS_MODULE, 486 .owner = THIS_MODULE,
486 .release = inet6_release, 487 .release = inet6_release,
@@ -508,7 +509,7 @@ static struct net_proto_family inet6_family_ops = {
508}; 509};
509 510
510/* Same as inet6_dgram_ops, sans udp_poll. */ 511/* Same as inet6_dgram_ops, sans udp_poll. */
511static struct proto_ops inet6_sockraw_ops = { 512static const struct proto_ops inet6_sockraw_ops = {
512 .family = PF_INET6, 513 .family = PF_INET6,
513 .owner = THIS_MODULE, 514 .owner = THIS_MODULE,
514 .release = inet6_release, 515 .release = inet6_release,
@@ -609,6 +610,79 @@ inet6_unregister_protosw(struct inet_protosw *p)
609 } 610 }
610} 611}
611 612
613int inet6_sk_rebuild_header(struct sock *sk)
614{
615 int err;
616 struct dst_entry *dst;
617 struct ipv6_pinfo *np = inet6_sk(sk);
618
619 dst = __sk_dst_check(sk, np->dst_cookie);
620
621 if (dst == NULL) {
622 struct inet_sock *inet = inet_sk(sk);
623 struct in6_addr *final_p = NULL, final;
624 struct flowi fl;
625
626 memset(&fl, 0, sizeof(fl));
627 fl.proto = sk->sk_protocol;
628 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
629 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
630 fl.fl6_flowlabel = np->flow_label;
631 fl.oif = sk->sk_bound_dev_if;
632 fl.fl_ip_dport = inet->dport;
633 fl.fl_ip_sport = inet->sport;
634
635 if (np->opt && np->opt->srcrt) {
636 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
637 ipv6_addr_copy(&final, &fl.fl6_dst);
638 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
639 final_p = &final;
640 }
641
642 err = ip6_dst_lookup(sk, &dst, &fl);
643 if (err) {
644 sk->sk_route_caps = 0;
645 return err;
646 }
647 if (final_p)
648 ipv6_addr_copy(&fl.fl6_dst, final_p);
649
650 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
651 sk->sk_err_soft = -err;
652 return err;
653 }
654
655 ip6_dst_store(sk, dst, NULL);
656 sk->sk_route_caps = dst->dev->features &
657 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
658 }
659
660 return 0;
661}
662
663EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
664
665int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
666{
667 struct ipv6_pinfo *np = inet6_sk(sk);
668 struct inet6_skb_parm *opt = IP6CB(skb);
669
670 if (np->rxopt.all) {
671 if ((opt->hop && (np->rxopt.bits.hopopts ||
672 np->rxopt.bits.ohopopts)) ||
673 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) &&
674 np->rxopt.bits.rxflow) ||
675 (opt->srcrt && (np->rxopt.bits.srcrt ||
676 np->rxopt.bits.osrcrt)) ||
677 ((opt->dst1 || opt->dst0) &&
678 (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
679 return 1;
680 }
681 return 0;
682}
683
684EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
685
612int 686int
613snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) 687snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
614{ 688{
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f3629730eb..13cc7f8955 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -33,6 +33,7 @@
33#include <linux/string.h> 33#include <linux/string.h>
34#include <net/icmp.h> 34#include <net/icmp.h>
35#include <net/ipv6.h> 35#include <net/ipv6.h>
36#include <net/protocol.h>
36#include <net/xfrm.h> 37#include <net/xfrm.h>
37#include <asm/scatterlist.h> 38#include <asm/scatterlist.h>
38 39
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 8bfbe99707..6de8ee1a5a 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -36,6 +36,7 @@
36#include <linux/random.h> 36#include <linux/random.h>
37#include <net/icmp.h> 37#include <net/icmp.h>
38#include <net/ipv6.h> 38#include <net/ipv6.h>
39#include <net/protocol.h>
39#include <linux/icmpv6.h> 40#include <linux/icmpv6.h>
40 41
41static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) 42static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index be6faf3113..113374dc34 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -413,6 +413,8 @@ ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
413 return opt; 413 return opt;
414} 414}
415 415
416EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
417
416/********************************** 418/**********************************
417 Hop-by-hop options. 419 Hop-by-hop options.
418 **********************************/ 420 **********************************/
@@ -579,6 +581,8 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
579 return opt2; 581 return opt2;
580} 582}
581 583
584EXPORT_SYMBOL_GPL(ipv6_dup_options);
585
582static int ipv6_renew_option(void *ohdr, 586static int ipv6_renew_option(void *ohdr,
583 struct ipv6_opt_hdr __user *newopt, int newoptlen, 587 struct ipv6_opt_hdr __user *newopt, int newoptlen,
584 int inherit, 588 int inherit,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
new file mode 100644
index 0000000000..792f90f0f9
--- /dev/null
+++ b/net/ipv6/inet6_connection_sock.c
@@ -0,0 +1,199 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Support for INET6 connection oriented protocols.
7 *
8 * Authors: See the TCPv6 sources
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or(at your option) any later version.
14 */
15
16#include <linux/config.h>
17#include <linux/module.h>
18#include <linux/in6.h>
19#include <linux/ipv6.h>
20#include <linux/jhash.h>
21
22#include <net/addrconf.h>
23#include <net/inet_connection_sock.h>
24#include <net/inet_ecn.h>
25#include <net/inet_hashtables.h>
26#include <net/ip6_route.h>
27#include <net/sock.h>
28
29int inet6_csk_bind_conflict(const struct sock *sk,
30 const struct inet_bind_bucket *tb)
31{
32 const struct sock *sk2;
33 const struct hlist_node *node;
34
35 /* We must walk the whole port owner list in this case. -DaveM */
36 sk_for_each_bound(sk2, node, &tb->owners) {
37 if (sk != sk2 &&
38 (!sk->sk_bound_dev_if ||
39 !sk2->sk_bound_dev_if ||
40 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
41 (!sk->sk_reuse || !sk2->sk_reuse ||
42 sk2->sk_state == TCP_LISTEN) &&
43 ipv6_rcv_saddr_equal(sk, sk2))
44 break;
45 }
46
47 return node != NULL;
48}
49
50EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
51
52/*
53 * request_sock (formerly open request) hash tables.
54 */
55static u32 inet6_synq_hash(const struct in6_addr *raddr, const u16 rport,
56 const u32 rnd, const u16 synq_hsize)
57{
58 u32 a = raddr->s6_addr32[0];
59 u32 b = raddr->s6_addr32[1];
60 u32 c = raddr->s6_addr32[2];
61
62 a += JHASH_GOLDEN_RATIO;
63 b += JHASH_GOLDEN_RATIO;
64 c += rnd;
65 __jhash_mix(a, b, c);
66
67 a += raddr->s6_addr32[3];
68 b += (u32)rport;
69 __jhash_mix(a, b, c);
70
71 return c & (synq_hsize - 1);
72}
73
74struct request_sock *inet6_csk_search_req(const struct sock *sk,
75 struct request_sock ***prevp,
76 const __u16 rport,
77 const struct in6_addr *raddr,
78 const struct in6_addr *laddr,
79 const int iif)
80{
81 const struct inet_connection_sock *icsk = inet_csk(sk);
82 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
83 struct request_sock *req, **prev;
84
85 for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport,
86 lopt->hash_rnd,
87 lopt->nr_table_entries)];
88 (req = *prev) != NULL;
89 prev = &req->dl_next) {
90 const struct inet6_request_sock *treq = inet6_rsk(req);
91
92 if (inet_rsk(req)->rmt_port == rport &&
93 req->rsk_ops->family == AF_INET6 &&
94 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
95 ipv6_addr_equal(&treq->loc_addr, laddr) &&
96 (!treq->iif || treq->iif == iif)) {
97 BUG_TRAP(req->sk == NULL);
98 *prevp = prev;
99 return req;
100 }
101 }
102
103 return NULL;
104}
105
106EXPORT_SYMBOL_GPL(inet6_csk_search_req);
107
108void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
109 struct request_sock *req,
110 const unsigned long timeout)
111{
112 struct inet_connection_sock *icsk = inet_csk(sk);
113 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
114 const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
115 inet_rsk(req)->rmt_port,
116 lopt->hash_rnd, lopt->nr_table_entries);
117
118 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
119 inet_csk_reqsk_queue_added(sk, timeout);
120}
121
122EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
123
124void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
125{
126 struct ipv6_pinfo *np = inet6_sk(sk);
127 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
128
129 sin6->sin6_family = AF_INET6;
130 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
131 sin6->sin6_port = inet_sk(sk)->dport;
132 /* We do not store received flowlabel for TCP */
133 sin6->sin6_flowinfo = 0;
134 sin6->sin6_scope_id = 0;
135 if (sk->sk_bound_dev_if &&
136 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
137 sin6->sin6_scope_id = sk->sk_bound_dev_if;
138}
139
140EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
141
142int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
143{
144 struct sock *sk = skb->sk;
145 struct inet_sock *inet = inet_sk(sk);
146 struct ipv6_pinfo *np = inet6_sk(sk);
147 struct flowi fl;
148 struct dst_entry *dst;
149 struct in6_addr *final_p = NULL, final;
150
151 memset(&fl, 0, sizeof(fl));
152 fl.proto = sk->sk_protocol;
153 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
154 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
155 fl.fl6_flowlabel = np->flow_label;
156 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
157 fl.oif = sk->sk_bound_dev_if;
158 fl.fl_ip_sport = inet->sport;
159 fl.fl_ip_dport = inet->dport;
160
161 if (np->opt && np->opt->srcrt) {
162 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
163 ipv6_addr_copy(&final, &fl.fl6_dst);
164 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
165 final_p = &final;
166 }
167
168 dst = __sk_dst_check(sk, np->dst_cookie);
169
170 if (dst == NULL) {
171 int err = ip6_dst_lookup(sk, &dst, &fl);
172
173 if (err) {
174 sk->sk_err_soft = -err;
175 return err;
176 }
177
178 if (final_p)
179 ipv6_addr_copy(&fl.fl6_dst, final_p);
180
181 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
182 sk->sk_route_caps = 0;
183 return err;
184 }
185
186 ip6_dst_store(sk, dst, NULL);
187 sk->sk_route_caps = dst->dev->features &
188 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
189 }
190
191 skb->dst = dst_clone(dst);
192
193 /* Restore final destination back after routing done */
194 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
195
196 return ip6_xmit(sk, skb, &fl, np->opt, 0);
197}
198
199EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 01d5f46d4e..4154f3a8b6 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -5,7 +5,8 @@
5 * 5 *
6 * Generic INET6 transport hashtables 6 * Generic INET6 transport hashtables
7 * 7 *
8 * Authors: Lotsa people, from code originally in tcp 8 * Authors: Lotsa people, from code originally in tcp, generalised here
9 * by Arnaldo Carvalho de Melo <acme@mandriva.com>
9 * 10 *
10 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
@@ -14,12 +15,13 @@
14 */ 15 */
15 16
16#include <linux/config.h> 17#include <linux/config.h>
17
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/random.h>
19 20
20#include <net/inet_connection_sock.h> 21#include <net/inet_connection_sock.h>
21#include <net/inet_hashtables.h> 22#include <net/inet_hashtables.h>
22#include <net/inet6_hashtables.h> 23#include <net/inet6_hashtables.h>
24#include <net/ip.h>
23 25
24struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, 26struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
25 const struct in6_addr *daddr, 27 const struct in6_addr *daddr,
@@ -79,3 +81,180 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
79} 81}
80 82
81EXPORT_SYMBOL_GPL(inet6_lookup); 83EXPORT_SYMBOL_GPL(inet6_lookup);
84
85static int __inet6_check_established(struct inet_timewait_death_row *death_row,
86 struct sock *sk, const __u16 lport,
87 struct inet_timewait_sock **twp)
88{
89 struct inet_hashinfo *hinfo = death_row->hashinfo;
90 const struct inet_sock *inet = inet_sk(sk);
91 const struct ipv6_pinfo *np = inet6_sk(sk);
92 const struct in6_addr *daddr = &np->rcv_saddr;
93 const struct in6_addr *saddr = &np->daddr;
94 const int dif = sk->sk_bound_dev_if;
95 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
96 const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr,
97 inet->dport);
98 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
99 struct sock *sk2;
100 const struct hlist_node *node;
101 struct inet_timewait_sock *tw;
102
103 prefetch(head->chain.first);
104 write_lock(&head->lock);
105
106 /* Check TIME-WAIT sockets first. */
107 sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
108 const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
109
110 tw = inet_twsk(sk2);
111
112 if(*((__u32 *)&(tw->tw_dport)) == ports &&
113 sk2->sk_family == PF_INET6 &&
114 ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) &&
115 ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
116 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
117 if (twsk_unique(sk, sk2, twp))
118 goto unique;
119 else
120 goto not_unique;
121 }
122 }
123 tw = NULL;
124
125 /* And established part... */
126 sk_for_each(sk2, node, &head->chain) {
127 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
128 goto not_unique;
129 }
130
131unique:
132 BUG_TRAP(sk_unhashed(sk));
133 __sk_add_node(sk, &head->chain);
134 sk->sk_hash = hash;
135 sock_prot_inc_use(sk->sk_prot);
136 write_unlock(&head->lock);
137
138 if (twp != NULL) {
139 *twp = tw;
140 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
141 } else if (tw != NULL) {
142 /* Silly. Should hash-dance instead... */
143 inet_twsk_deschedule(tw, death_row);
144 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
145
146 inet_twsk_put(tw);
147 }
148 return 0;
149
150not_unique:
151 write_unlock(&head->lock);
152 return -EADDRNOTAVAIL;
153}
154
155static inline u32 inet6_sk_port_offset(const struct sock *sk)
156{
157 const struct inet_sock *inet = inet_sk(sk);
158 const struct ipv6_pinfo *np = inet6_sk(sk);
159 return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
160 np->daddr.s6_addr32,
161 inet->dport);
162}
163
164int inet6_hash_connect(struct inet_timewait_death_row *death_row,
165 struct sock *sk)
166{
167 struct inet_hashinfo *hinfo = death_row->hashinfo;
168 const unsigned short snum = inet_sk(sk)->num;
169 struct inet_bind_hashbucket *head;
170 struct inet_bind_bucket *tb;
171 int ret;
172
173 if (snum == 0) {
174 const int low = sysctl_local_port_range[0];
175 const int high = sysctl_local_port_range[1];
176 const int range = high - low;
177 int i, port;
178 static u32 hint;
179 const u32 offset = hint + inet6_sk_port_offset(sk);
180 struct hlist_node *node;
181 struct inet_timewait_sock *tw = NULL;
182
183 local_bh_disable();
184 for (i = 1; i <= range; i++) {
185 port = low + (i + offset) % range;
186 head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
187 spin_lock(&head->lock);
188
189 /* Does not bother with rcv_saddr checks,
190 * because the established check is already
191 * unique enough.
192 */
193 inet_bind_bucket_for_each(tb, node, &head->chain) {
194 if (tb->port == port) {
195 BUG_TRAP(!hlist_empty(&tb->owners));
196 if (tb->fastreuse >= 0)
197 goto next_port;
198 if (!__inet6_check_established(death_row,
199 sk, port,
200 &tw))
201 goto ok;
202 goto next_port;
203 }
204 }
205
206 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
207 head, port);
208 if (!tb) {
209 spin_unlock(&head->lock);
210 break;
211 }
212 tb->fastreuse = -1;
213 goto ok;
214
215 next_port:
216 spin_unlock(&head->lock);
217 }
218 local_bh_enable();
219
220 return -EADDRNOTAVAIL;
221
222ok:
223 hint += i;
224
225 /* Head lock still held and bh's disabled */
226 inet_bind_hash(sk, tb, port);
227 if (sk_unhashed(sk)) {
228 inet_sk(sk)->sport = htons(port);
229 __inet6_hash(hinfo, sk);
230 }
231 spin_unlock(&head->lock);
232
233 if (tw) {
234 inet_twsk_deschedule(tw, death_row);
235 inet_twsk_put(tw);
236 }
237
238 ret = 0;
239 goto out;
240 }
241
242 head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
243 tb = inet_csk(sk)->icsk_bind_hash;
244 spin_lock_bh(&head->lock);
245
246 if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
247 __inet6_hash(hinfo, sk);
248 spin_unlock_bh(&head->lock);
249 return 0;
250 } else {
251 spin_unlock(&head->lock);
252 /* No definite answer... Walk to established hash table */
253 ret = __inet6_check_established(death_row, sk, snum, NULL);
254out:
255 local_bh_enable();
256 return ret;
257 }
258}
259
260EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 1cf02765fb..89d12b4817 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -200,6 +200,8 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label)
200 return NULL; 200 return NULL;
201} 201}
202 202
203EXPORT_SYMBOL_GPL(fl6_sock_lookup);
204
203void fl6_free_socklist(struct sock *sk) 205void fl6_free_socklist(struct sock *sk)
204{ 206{
205 struct ipv6_pinfo *np = inet6_sk(sk); 207 struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8523c76ebf..b4c4beba0e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -775,6 +775,8 @@ out_err_release:
775 return err; 775 return err;
776} 776}
777 777
778EXPORT_SYMBOL_GPL(ip6_dst_lookup);
779
778static inline int ip6_ufo_append_data(struct sock *sk, 780static inline int ip6_ufo_append_data(struct sock *sk,
779 int getfrag(void *from, char *to, int offset, int len, 781 int getfrag(void *from, char *to, int offset, int len,
780 int odd, struct sk_buff *skb), 782 int odd, struct sk_buff *skb),
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 55917fb170..626dd39685 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -47,6 +47,7 @@
47#include <linux/rtnetlink.h> 47#include <linux/rtnetlink.h>
48#include <net/icmp.h> 48#include <net/icmp.h>
49#include <net/ipv6.h> 49#include <net/ipv6.h>
50#include <net/protocol.h>
50#include <linux/ipv6.h> 51#include <linux/ipv6.h>
51#include <linux/icmpv6.h> 52#include <linux/icmpv6.h>
52 53
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 3620718def..c63868dd2c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -163,17 +163,17 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
163 sk_refcnt_debug_dec(sk); 163 sk_refcnt_debug_dec(sk);
164 164
165 if (sk->sk_protocol == IPPROTO_TCP) { 165 if (sk->sk_protocol == IPPROTO_TCP) {
166 struct tcp_sock *tp = tcp_sk(sk); 166 struct inet_connection_sock *icsk = inet_csk(sk);
167 167
168 local_bh_disable(); 168 local_bh_disable();
169 sock_prot_dec_use(sk->sk_prot); 169 sock_prot_dec_use(sk->sk_prot);
170 sock_prot_inc_use(&tcp_prot); 170 sock_prot_inc_use(&tcp_prot);
171 local_bh_enable(); 171 local_bh_enable();
172 sk->sk_prot = &tcp_prot; 172 sk->sk_prot = &tcp_prot;
173 tp->af_specific = &ipv4_specific; 173 icsk->icsk_af_ops = &ipv4_specific;
174 sk->sk_socket->ops = &inet_stream_ops; 174 sk->sk_socket->ops = &inet_stream_ops;
175 sk->sk_family = PF_INET; 175 sk->sk_family = PF_INET;
176 tcp_sync_mss(sk, tp->pmtu_cookie); 176 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
177 } else { 177 } else {
178 local_bh_disable(); 178 local_bh_disable();
179 sock_prot_dec_use(sk->sk_prot); 179 sock_prot_dec_use(sk->sk_prot);
@@ -317,14 +317,15 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
317 } 317 }
318 318
319 retv = 0; 319 retv = 0;
320 if (sk->sk_type == SOCK_STREAM) { 320 if (inet_sk(sk)->is_icsk) {
321 if (opt) { 321 if (opt) {
322 struct tcp_sock *tp = tcp_sk(sk); 322 struct inet_connection_sock *icsk = inet_csk(sk);
323 if (!((1 << sk->sk_state) & 323 if (!((1 << sk->sk_state) &
324 (TCPF_LISTEN | TCPF_CLOSE)) 324 (TCPF_LISTEN | TCPF_CLOSE))
325 && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { 325 && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
326 tp->ext_header_len = opt->opt_flen + opt->opt_nflen; 326 icsk->icsk_ext_hdr_len =
327 tcp_sync_mss(sk, tp->pmtu_cookie); 327 opt->opt_flen + opt->opt_nflen;
328 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
328 } 329 }
329 } 330 }
330 opt = xchg(&np->opt, opt); 331 opt = xchg(&np->opt, opt);
@@ -380,14 +381,15 @@ sticky_done:
380 goto done; 381 goto done;
381update: 382update:
382 retv = 0; 383 retv = 0;
383 if (sk->sk_type == SOCK_STREAM) { 384 if (inet_sk(sk)->is_icsk) {
384 if (opt) { 385 if (opt) {
385 struct tcp_sock *tp = tcp_sk(sk); 386 struct inet_connection_sock *icsk = inet_csk(sk);
386 if (!((1 << sk->sk_state) & 387 if (!((1 << sk->sk_state) &
387 (TCPF_LISTEN | TCPF_CLOSE)) 388 (TCPF_LISTEN | TCPF_CLOSE))
388 && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { 389 && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
389 tp->ext_header_len = opt->opt_flen + opt->opt_nflen; 390 icsk->icsk_ext_hdr_len =
390 tcp_sync_mss(sk, tp->pmtu_cookie); 391 opt->opt_flen + opt->opt_nflen;
392 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
391 } 393 }
392 } 394 }
393 opt = xchg(&np->opt, opt); 395 opt = xchg(&np->opt, opt);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index f829a4ad3c..1cf305a9f8 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -224,7 +224,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
224 224
225 mc_lst->ifindex = dev->ifindex; 225 mc_lst->ifindex = dev->ifindex;
226 mc_lst->sfmode = MCAST_EXCLUDE; 226 mc_lst->sfmode = MCAST_EXCLUDE;
227 mc_lst->sflock = RW_LOCK_UNLOCKED; 227 rwlock_init(&mc_lst->sflock);
228 mc_lst->sflist = NULL; 228 mc_lst->sflist = NULL;
229 229
230 /* 230 /*
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 95d469271c..ea43ef1d94 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -15,6 +15,7 @@
15 * - new extension header parser code 15 * - new extension header parser code
16 */ 16 */
17#include <linux/config.h> 17#include <linux/config.h>
18#include <linux/in.h>
18#include <linux/skbuff.h> 19#include <linux/skbuff.h>
19#include <linux/kmod.h> 20#include <linux/kmod.h>
20#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
@@ -86,11 +87,6 @@ static DECLARE_MUTEX(ip6t_mutex);
86 context stops packets coming through and allows user context to read 87 context stops packets coming through and allows user context to read
87 the counters or update the rules. 88 the counters or update the rules.
88 89
89 To be cache friendly on SMP, we arrange them like so:
90 [ n-entries ]
91 ... cache-align padding ...
92 [ n-entries ]
93
94 Hence the start of any table is given by get_table() below. */ 90 Hence the start of any table is given by get_table() below. */
95 91
96/* The table itself */ 92/* The table itself */
@@ -108,20 +104,15 @@ struct ip6t_table_info
108 unsigned int underflow[NF_IP6_NUMHOOKS]; 104 unsigned int underflow[NF_IP6_NUMHOOKS];
109 105
110 /* ip6t_entry tables: one per CPU */ 106 /* ip6t_entry tables: one per CPU */
111 char entries[0] ____cacheline_aligned; 107 void *entries[NR_CPUS];
112}; 108};
113 109
114static LIST_HEAD(ip6t_target); 110static LIST_HEAD(ip6t_target);
115static LIST_HEAD(ip6t_match); 111static LIST_HEAD(ip6t_match);
116static LIST_HEAD(ip6t_tables); 112static LIST_HEAD(ip6t_tables);
113#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
117#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) 114#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
118 115
119#ifdef CONFIG_SMP
120#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
121#else
122#define TABLE_OFFSET(t,p) 0
123#endif
124
125#if 0 116#if 0
126#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) 117#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
127#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) 118#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
@@ -376,8 +367,7 @@ ip6t_do_table(struct sk_buff **pskb,
376 367
377 read_lock_bh(&table->lock); 368 read_lock_bh(&table->lock);
378 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 369 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
379 table_base = (void *)table->private->entries 370 table_base = (void *)table->private->entries[smp_processor_id()];
380 + TABLE_OFFSET(table->private, smp_processor_id());
381 e = get_entry(table_base, table->private->hook_entry[hook]); 371 e = get_entry(table_base, table->private->hook_entry[hook]);
382 372
383#ifdef CONFIG_NETFILTER_DEBUG 373#ifdef CONFIG_NETFILTER_DEBUG
@@ -649,7 +639,8 @@ unconditional(const struct ip6t_ip6 *ipv6)
649/* Figures out from what hook each rule can be called: returns 0 if 639/* Figures out from what hook each rule can be called: returns 0 if
650 there are loops. Puts hook bitmask in comefrom. */ 640 there are loops. Puts hook bitmask in comefrom. */
651static int 641static int
652mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) 642mark_source_chains(struct ip6t_table_info *newinfo,
643 unsigned int valid_hooks, void *entry0)
653{ 644{
654 unsigned int hook; 645 unsigned int hook;
655 646
@@ -658,7 +649,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
658 for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { 649 for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
659 unsigned int pos = newinfo->hook_entry[hook]; 650 unsigned int pos = newinfo->hook_entry[hook];
660 struct ip6t_entry *e 651 struct ip6t_entry *e
661 = (struct ip6t_entry *)(newinfo->entries + pos); 652 = (struct ip6t_entry *)(entry0 + pos);
662 653
663 if (!(valid_hooks & (1 << hook))) 654 if (!(valid_hooks & (1 << hook)))
664 continue; 655 continue;
@@ -708,13 +699,13 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
708 goto next; 699 goto next;
709 700
710 e = (struct ip6t_entry *) 701 e = (struct ip6t_entry *)
711 (newinfo->entries + pos); 702 (entry0 + pos);
712 } while (oldpos == pos + e->next_offset); 703 } while (oldpos == pos + e->next_offset);
713 704
714 /* Move along one */ 705 /* Move along one */
715 size = e->next_offset; 706 size = e->next_offset;
716 e = (struct ip6t_entry *) 707 e = (struct ip6t_entry *)
717 (newinfo->entries + pos + size); 708 (entry0 + pos + size);
718 e->counters.pcnt = pos; 709 e->counters.pcnt = pos;
719 pos += size; 710 pos += size;
720 } else { 711 } else {
@@ -731,7 +722,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
731 newpos = pos + e->next_offset; 722 newpos = pos + e->next_offset;
732 } 723 }
733 e = (struct ip6t_entry *) 724 e = (struct ip6t_entry *)
734 (newinfo->entries + newpos); 725 (entry0 + newpos);
735 e->counters.pcnt = pos; 726 e->counters.pcnt = pos;
736 pos = newpos; 727 pos = newpos;
737 } 728 }
@@ -941,6 +932,7 @@ static int
941translate_table(const char *name, 932translate_table(const char *name,
942 unsigned int valid_hooks, 933 unsigned int valid_hooks,
943 struct ip6t_table_info *newinfo, 934 struct ip6t_table_info *newinfo,
935 void *entry0,
944 unsigned int size, 936 unsigned int size,
945 unsigned int number, 937 unsigned int number,
946 const unsigned int *hook_entries, 938 const unsigned int *hook_entries,
@@ -961,11 +953,11 @@ translate_table(const char *name,
961 duprintf("translate_table: size %u\n", newinfo->size); 953 duprintf("translate_table: size %u\n", newinfo->size);
962 i = 0; 954 i = 0;
963 /* Walk through entries, checking offsets. */ 955 /* Walk through entries, checking offsets. */
964 ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, 956 ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
965 check_entry_size_and_hooks, 957 check_entry_size_and_hooks,
966 newinfo, 958 newinfo,
967 newinfo->entries, 959 entry0,
968 newinfo->entries + size, 960 entry0 + size,
969 hook_entries, underflows, &i); 961 hook_entries, underflows, &i);
970 if (ret != 0) 962 if (ret != 0)
971 return ret; 963 return ret;
@@ -993,27 +985,24 @@ translate_table(const char *name,
993 } 985 }
994 } 986 }
995 987
996 if (!mark_source_chains(newinfo, valid_hooks)) 988 if (!mark_source_chains(newinfo, valid_hooks, entry0))
997 return -ELOOP; 989 return -ELOOP;
998 990
999 /* Finally, each sanity check must pass */ 991 /* Finally, each sanity check must pass */
1000 i = 0; 992 i = 0;
1001 ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, 993 ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
1002 check_entry, name, size, &i); 994 check_entry, name, size, &i);
1003 995
1004 if (ret != 0) { 996 if (ret != 0) {
1005 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, 997 IP6T_ENTRY_ITERATE(entry0, newinfo->size,
1006 cleanup_entry, &i); 998 cleanup_entry, &i);
1007 return ret; 999 return ret;
1008 } 1000 }
1009 1001
1010 /* And one copy for every other CPU */ 1002 /* And one copy for every other CPU */
1011 for_each_cpu(i) { 1003 for_each_cpu(i) {
1012 if (i == 0) 1004 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
1013 continue; 1005 memcpy(newinfo->entries[i], entry0, newinfo->size);
1014 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
1015 newinfo->entries,
1016 SMP_ALIGN(newinfo->size));
1017 } 1006 }
1018 1007
1019 return ret; 1008 return ret;
@@ -1029,15 +1018,12 @@ replace_table(struct ip6t_table *table,
1029 1018
1030#ifdef CONFIG_NETFILTER_DEBUG 1019#ifdef CONFIG_NETFILTER_DEBUG
1031 { 1020 {
1032 struct ip6t_entry *table_base; 1021 int cpu;
1033 unsigned int i;
1034 1022
1035 for_each_cpu(i) { 1023 for_each_cpu(cpu) {
1036 table_base = 1024 struct ip6t_entry *table_base = newinfo->entries[cpu];
1037 (void *)newinfo->entries 1025 if (table_base)
1038 + TABLE_OFFSET(newinfo, i); 1026 table_base->comefrom = 0xdead57ac;
1039
1040 table_base->comefrom = 0xdead57ac;
1041 } 1027 }
1042 } 1028 }
1043#endif 1029#endif
@@ -1072,16 +1058,44 @@ add_entry_to_counter(const struct ip6t_entry *e,
1072 return 0; 1058 return 0;
1073} 1059}
1074 1060
1061static inline int
1062set_entry_to_counter(const struct ip6t_entry *e,
1063 struct ip6t_counters total[],
1064 unsigned int *i)
1065{
1066 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
1067
1068 (*i)++;
1069 return 0;
1070}
1071
1075static void 1072static void
1076get_counters(const struct ip6t_table_info *t, 1073get_counters(const struct ip6t_table_info *t,
1077 struct ip6t_counters counters[]) 1074 struct ip6t_counters counters[])
1078{ 1075{
1079 unsigned int cpu; 1076 unsigned int cpu;
1080 unsigned int i; 1077 unsigned int i;
1078 unsigned int curcpu;
1079
1080 /* Instead of clearing (by a previous call to memset())
1081 * the counters and using adds, we set the counters
1082 * with data used by 'current' CPU
1083 * We dont care about preemption here.
1084 */
1085 curcpu = raw_smp_processor_id();
1086
1087 i = 0;
1088 IP6T_ENTRY_ITERATE(t->entries[curcpu],
1089 t->size,
1090 set_entry_to_counter,
1091 counters,
1092 &i);
1081 1093
1082 for_each_cpu(cpu) { 1094 for_each_cpu(cpu) {
1095 if (cpu == curcpu)
1096 continue;
1083 i = 0; 1097 i = 0;
1084 IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), 1098 IP6T_ENTRY_ITERATE(t->entries[cpu],
1085 t->size, 1099 t->size,
1086 add_entry_to_counter, 1100 add_entry_to_counter,
1087 counters, 1101 counters,
@@ -1098,6 +1112,7 @@ copy_entries_to_user(unsigned int total_size,
1098 struct ip6t_entry *e; 1112 struct ip6t_entry *e;
1099 struct ip6t_counters *counters; 1113 struct ip6t_counters *counters;
1100 int ret = 0; 1114 int ret = 0;
1115 void *loc_cpu_entry;
1101 1116
1102 /* We need atomic snapshot of counters: rest doesn't change 1117 /* We need atomic snapshot of counters: rest doesn't change
1103 (other than comefrom, which userspace doesn't care 1118 (other than comefrom, which userspace doesn't care
@@ -1109,13 +1124,13 @@ copy_entries_to_user(unsigned int total_size,
1109 return -ENOMEM; 1124 return -ENOMEM;
1110 1125
1111 /* First, sum counters... */ 1126 /* First, sum counters... */
1112 memset(counters, 0, countersize);
1113 write_lock_bh(&table->lock); 1127 write_lock_bh(&table->lock);
1114 get_counters(table->private, counters); 1128 get_counters(table->private, counters);
1115 write_unlock_bh(&table->lock); 1129 write_unlock_bh(&table->lock);
1116 1130
1117 /* ... then copy entire thing from CPU 0... */ 1131 /* choose the copy that is on ourc node/cpu */
1118 if (copy_to_user(userptr, table->private->entries, total_size) != 0) { 1132 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1133 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1119 ret = -EFAULT; 1134 ret = -EFAULT;
1120 goto free_counters; 1135 goto free_counters;
1121 } 1136 }
@@ -1127,7 +1142,7 @@ copy_entries_to_user(unsigned int total_size,
1127 struct ip6t_entry_match *m; 1142 struct ip6t_entry_match *m;
1128 struct ip6t_entry_target *t; 1143 struct ip6t_entry_target *t;
1129 1144
1130 e = (struct ip6t_entry *)(table->private->entries + off); 1145 e = (struct ip6t_entry *)(loc_cpu_entry + off);
1131 if (copy_to_user(userptr + off 1146 if (copy_to_user(userptr + off
1132 + offsetof(struct ip6t_entry, counters), 1147 + offsetof(struct ip6t_entry, counters),
1133 &counters[num], 1148 &counters[num],
@@ -1196,6 +1211,46 @@ get_entries(const struct ip6t_get_entries *entries,
1196 return ret; 1211 return ret;
1197} 1212}
1198 1213
1214static void free_table_info(struct ip6t_table_info *info)
1215{
1216 int cpu;
1217 for_each_cpu(cpu) {
1218 if (info->size <= PAGE_SIZE)
1219 kfree(info->entries[cpu]);
1220 else
1221 vfree(info->entries[cpu]);
1222 }
1223 kfree(info);
1224}
1225
1226static struct ip6t_table_info *alloc_table_info(unsigned int size)
1227{
1228 struct ip6t_table_info *newinfo;
1229 int cpu;
1230
1231 newinfo = kzalloc(sizeof(struct ip6t_table_info), GFP_KERNEL);
1232 if (!newinfo)
1233 return NULL;
1234
1235 newinfo->size = size;
1236
1237 for_each_cpu(cpu) {
1238 if (size <= PAGE_SIZE)
1239 newinfo->entries[cpu] = kmalloc_node(size,
1240 GFP_KERNEL,
1241 cpu_to_node(cpu));
1242 else
1243 newinfo->entries[cpu] = vmalloc_node(size,
1244 cpu_to_node(cpu));
1245 if (newinfo->entries[cpu] == NULL) {
1246 free_table_info(newinfo);
1247 return NULL;
1248 }
1249 }
1250
1251 return newinfo;
1252}
1253
1199static int 1254static int
1200do_replace(void __user *user, unsigned int len) 1255do_replace(void __user *user, unsigned int len)
1201{ 1256{
@@ -1204,6 +1259,7 @@ do_replace(void __user *user, unsigned int len)
1204 struct ip6t_table *t; 1259 struct ip6t_table *t;
1205 struct ip6t_table_info *newinfo, *oldinfo; 1260 struct ip6t_table_info *newinfo, *oldinfo;
1206 struct ip6t_counters *counters; 1261 struct ip6t_counters *counters;
1262 void *loc_cpu_entry, *loc_cpu_old_entry;
1207 1263
1208 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1264 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1209 return -EFAULT; 1265 return -EFAULT;
@@ -1212,13 +1268,13 @@ do_replace(void __user *user, unsigned int len)
1212 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) 1268 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1213 return -ENOMEM; 1269 return -ENOMEM;
1214 1270
1215 newinfo = vmalloc(sizeof(struct ip6t_table_info) 1271 newinfo = alloc_table_info(tmp.size);
1216 + SMP_ALIGN(tmp.size) *
1217 (highest_possible_processor_id()+1));
1218 if (!newinfo) 1272 if (!newinfo)
1219 return -ENOMEM; 1273 return -ENOMEM;
1220 1274
1221 if (copy_from_user(newinfo->entries, user + sizeof(tmp), 1275 /* choose the copy that is on our node/cpu */
1276 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1277 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1222 tmp.size) != 0) { 1278 tmp.size) != 0) {
1223 ret = -EFAULT; 1279 ret = -EFAULT;
1224 goto free_newinfo; 1280 goto free_newinfo;
@@ -1229,10 +1285,9 @@ do_replace(void __user *user, unsigned int len)
1229 ret = -ENOMEM; 1285 ret = -ENOMEM;
1230 goto free_newinfo; 1286 goto free_newinfo;
1231 } 1287 }
1232 memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
1233 1288
1234 ret = translate_table(tmp.name, tmp.valid_hooks, 1289 ret = translate_table(tmp.name, tmp.valid_hooks,
1235 newinfo, tmp.size, tmp.num_entries, 1290 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1236 tmp.hook_entry, tmp.underflow); 1291 tmp.hook_entry, tmp.underflow);
1237 if (ret != 0) 1292 if (ret != 0)
1238 goto free_newinfo_counters; 1293 goto free_newinfo_counters;
@@ -1271,8 +1326,9 @@ do_replace(void __user *user, unsigned int len)
1271 /* Get the old counters. */ 1326 /* Get the old counters. */
1272 get_counters(oldinfo, counters); 1327 get_counters(oldinfo, counters);
1273 /* Decrease module usage counts and free resource */ 1328 /* Decrease module usage counts and free resource */
1274 IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); 1329 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1275 vfree(oldinfo); 1330 IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1331 free_table_info(oldinfo);
1276 if (copy_to_user(tmp.counters, counters, 1332 if (copy_to_user(tmp.counters, counters,
1277 sizeof(struct ip6t_counters) * tmp.num_counters) != 0) 1333 sizeof(struct ip6t_counters) * tmp.num_counters) != 0)
1278 ret = -EFAULT; 1334 ret = -EFAULT;
@@ -1284,11 +1340,11 @@ do_replace(void __user *user, unsigned int len)
1284 module_put(t->me); 1340 module_put(t->me);
1285 up(&ip6t_mutex); 1341 up(&ip6t_mutex);
1286 free_newinfo_counters_untrans: 1342 free_newinfo_counters_untrans:
1287 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); 1343 IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1288 free_newinfo_counters: 1344 free_newinfo_counters:
1289 vfree(counters); 1345 vfree(counters);
1290 free_newinfo: 1346 free_newinfo:
1291 vfree(newinfo); 1347 free_table_info(newinfo);
1292 return ret; 1348 return ret;
1293} 1349}
1294 1350
@@ -1321,6 +1377,7 @@ do_add_counters(void __user *user, unsigned int len)
1321 struct ip6t_counters_info tmp, *paddc; 1377 struct ip6t_counters_info tmp, *paddc;
1322 struct ip6t_table *t; 1378 struct ip6t_table *t;
1323 int ret = 0; 1379 int ret = 0;
1380 void *loc_cpu_entry;
1324 1381
1325 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1382 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1326 return -EFAULT; 1383 return -EFAULT;
@@ -1350,7 +1407,9 @@ do_add_counters(void __user *user, unsigned int len)
1350 } 1407 }
1351 1408
1352 i = 0; 1409 i = 0;
1353 IP6T_ENTRY_ITERATE(t->private->entries, 1410 /* Choose the copy that is on our node */
1411 loc_cpu_entry = t->private->entries[smp_processor_id()];
1412 IP6T_ENTRY_ITERATE(loc_cpu_entry,
1354 t->private->size, 1413 t->private->size,
1355 add_counter_to_entry, 1414 add_counter_to_entry,
1356 paddc->counters, 1415 paddc->counters,
@@ -1543,28 +1602,29 @@ int ip6t_register_table(struct ip6t_table *table,
1543 struct ip6t_table_info *newinfo; 1602 struct ip6t_table_info *newinfo;
1544 static struct ip6t_table_info bootstrap 1603 static struct ip6t_table_info bootstrap
1545 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1604 = { 0, 0, 0, { 0 }, { 0 }, { } };
1605 void *loc_cpu_entry;
1546 1606
1547 newinfo = vmalloc(sizeof(struct ip6t_table_info) 1607 newinfo = alloc_table_info(repl->size);
1548 + SMP_ALIGN(repl->size) *
1549 (highest_possible_processor_id()+1));
1550 if (!newinfo) 1608 if (!newinfo)
1551 return -ENOMEM; 1609 return -ENOMEM;
1552 1610
1553 memcpy(newinfo->entries, repl->entries, repl->size); 1611 /* choose the copy on our node/cpu */
1612 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1613 memcpy(loc_cpu_entry, repl->entries, repl->size);
1554 1614
1555 ret = translate_table(table->name, table->valid_hooks, 1615 ret = translate_table(table->name, table->valid_hooks,
1556 newinfo, repl->size, 1616 newinfo, loc_cpu_entry, repl->size,
1557 repl->num_entries, 1617 repl->num_entries,
1558 repl->hook_entry, 1618 repl->hook_entry,
1559 repl->underflow); 1619 repl->underflow);
1560 if (ret != 0) { 1620 if (ret != 0) {
1561 vfree(newinfo); 1621 free_table_info(newinfo);
1562 return ret; 1622 return ret;
1563 } 1623 }
1564 1624
1565 ret = down_interruptible(&ip6t_mutex); 1625 ret = down_interruptible(&ip6t_mutex);
1566 if (ret != 0) { 1626 if (ret != 0) {
1567 vfree(newinfo); 1627 free_table_info(newinfo);
1568 return ret; 1628 return ret;
1569 } 1629 }
1570 1630
@@ -1593,20 +1653,23 @@ int ip6t_register_table(struct ip6t_table *table,
1593 return ret; 1653 return ret;
1594 1654
1595 free_unlock: 1655 free_unlock:
1596 vfree(newinfo); 1656 free_table_info(newinfo);
1597 goto unlock; 1657 goto unlock;
1598} 1658}
1599 1659
1600void ip6t_unregister_table(struct ip6t_table *table) 1660void ip6t_unregister_table(struct ip6t_table *table)
1601{ 1661{
1662 void *loc_cpu_entry;
1663
1602 down(&ip6t_mutex); 1664 down(&ip6t_mutex);
1603 LIST_DELETE(&ip6t_tables, table); 1665 LIST_DELETE(&ip6t_tables, table);
1604 up(&ip6t_mutex); 1666 up(&ip6t_mutex);
1605 1667
1606 /* Decrease module usage counts and free resources */ 1668 /* Decrease module usage counts and free resources */
1607 IP6T_ENTRY_ITERATE(table->private->entries, table->private->size, 1669 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1670 IP6T_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
1608 cleanup_entry, NULL); 1671 cleanup_entry, NULL);
1609 vfree(table->private); 1672 free_table_info(table->private);
1610} 1673}
1611 1674
1612/* Returns 1 if the port is matched by the range, 0 otherwise */ 1675/* Returns 1 if the port is matched by the range, 0 otherwise */
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 0cd1d1bd90..ae4653bfd6 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/moduleparam.h> 14#include <linux/moduleparam.h>
15#include <linux/skbuff.h> 15#include <linux/skbuff.h>
16#include <linux/if_arp.h>
16#include <linux/ip.h> 17#include <linux/ip.h>
17#include <linux/spinlock.h> 18#include <linux/spinlock.h>
18#include <linux/icmpv6.h> 19#include <linux/icmpv6.h>
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index dde37793d2..268918d5de 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/ip.h>
12#include <linux/ipv6.h> 13#include <linux/ipv6.h>
13#include <linux/types.h> 14#include <linux/types.h>
14#include <net/checksum.h> 15#include <net/checksum.h>
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
index 24bc0cde43..65937de1b5 100644
--- a/net/ipv6/netfilter/ip6t_esp.c
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/ip.h>
12#include <linux/ipv6.h> 13#include <linux/ipv6.h>
13#include <linux/types.h> 14#include <linux/types.h>
14#include <net/checksum.h> 15#include <net/checksum.h>
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c2c52af9e5..f3e5ffbd59 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -98,7 +98,7 @@ struct nf_ct_frag6_queue
98#define FRAG6Q_HASHSZ 64 98#define FRAG6Q_HASHSZ 64
99 99
100static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ]; 100static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ];
101static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED; 101static DEFINE_RWLOCK(nf_ct_frag6_lock);
102static u32 nf_ct_frag6_hash_rnd; 102static u32 nf_ct_frag6_hash_rnd;
103static LIST_HEAD(nf_ct_frag6_lru_list); 103static LIST_HEAD(nf_ct_frag6_lru_list);
104int nf_ct_frag6_nqueues = 0; 104int nf_ct_frag6_nqueues = 0;
@@ -371,7 +371,7 @@ nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct
371 init_timer(&fq->timer); 371 init_timer(&fq->timer);
372 fq->timer.function = nf_ct_frag6_expire; 372 fq->timer.function = nf_ct_frag6_expire;
373 fq->timer.data = (long) fq; 373 fq->timer.data = (long) fq;
374 fq->lock = SPIN_LOCK_UNLOCKED; 374 spin_lock_init(&fq->lock);
375 atomic_set(&fq->refcnt, 1); 375 atomic_set(&fq->refcnt, 1);
376 376
377 return nf_ct_frag6_intern(hash, fq); 377 return nf_ct_frag6_intern(hash, fq);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a66900cda2..66f1d12ea5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -32,6 +32,7 @@
32#include <linux/icmpv6.h> 32#include <linux/icmpv6.h>
33#include <linux/netfilter.h> 33#include <linux/netfilter.h>
34#include <linux/netfilter_ipv6.h> 34#include <linux/netfilter_ipv6.h>
35#include <linux/skbuff.h>
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
36#include <asm/ioctls.h> 37#include <asm/ioctls.h>
37#include <asm/bug.h> 38#include <asm/bug.h>
@@ -433,25 +434,14 @@ out:
433 return err; 434 return err;
434 435
435csum_copy_err: 436csum_copy_err:
436 /* Clear queue. */ 437 skb_kill_datagram(sk, skb, flags);
437 if (flags&MSG_PEEK) {
438 int clear = 0;
439 spin_lock_bh(&sk->sk_receive_queue.lock);
440 if (skb == skb_peek(&sk->sk_receive_queue)) {
441 __skb_unlink(skb, &sk->sk_receive_queue);
442 clear = 1;
443 }
444 spin_unlock_bh(&sk->sk_receive_queue.lock);
445 if (clear)
446 kfree_skb(skb);
447 }
448 438
449 /* Error for blocking case is chosen to masquerade 439 /* Error for blocking case is chosen to masquerade
450 as some normal condition. 440 as some normal condition.
451 */ 441 */
452 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; 442 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
453 /* FIXME: increment a raw6 drops counter here */ 443 /* FIXME: increment a raw6 drops counter here */
454 goto out_free; 444 goto out;
455} 445}
456 446
457static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, 447static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8827389aba..2947bc56d8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -48,6 +48,7 @@
48#include <net/tcp.h> 48#include <net/tcp.h>
49#include <net/ndisc.h> 49#include <net/ndisc.h>
50#include <net/inet6_hashtables.h> 50#include <net/inet6_hashtables.h>
51#include <net/inet6_connection_sock.h>
51#include <net/ipv6.h> 52#include <net/ipv6.h>
52#include <net/transp_v6.h> 53#include <net/transp_v6.h>
53#include <net/addrconf.h> 54#include <net/addrconf.h>
@@ -59,6 +60,7 @@
59#include <net/addrconf.h> 60#include <net/addrconf.h>
60#include <net/snmp.h> 61#include <net/snmp.h>
61#include <net/dsfield.h> 62#include <net/dsfield.h>
63#include <net/timewait_sock.h>
62 64
63#include <asm/uaccess.h> 65#include <asm/uaccess.h>
64 66
@@ -67,224 +69,33 @@
67 69
68static void tcp_v6_send_reset(struct sk_buff *skb); 70static void tcp_v6_send_reset(struct sk_buff *skb);
69static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req); 71static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
70static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 72static void tcp_v6_send_check(struct sock *sk, int len,
71 struct sk_buff *skb); 73 struct sk_buff *skb);
72 74
73static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 75static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
74static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 76
76static struct tcp_func ipv6_mapped; 77static struct inet_connection_sock_af_ops ipv6_mapped;
77static struct tcp_func ipv6_specific; 78static struct inet_connection_sock_af_ops ipv6_specific;
78 79
79static inline int tcp_v6_bind_conflict(const struct sock *sk,
80 const struct inet_bind_bucket *tb)
81{
82 const struct sock *sk2;
83 const struct hlist_node *node;
84
85 /* We must walk the whole port owner list in this case. -DaveM */
86 sk_for_each_bound(sk2, node, &tb->owners) {
87 if (sk != sk2 &&
88 (!sk->sk_bound_dev_if ||
89 !sk2->sk_bound_dev_if ||
90 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
91 (!sk->sk_reuse || !sk2->sk_reuse ||
92 sk2->sk_state == TCP_LISTEN) &&
93 ipv6_rcv_saddr_equal(sk, sk2))
94 break;
95 }
96
97 return node != NULL;
98}
99
100/* Grrr, addr_type already calculated by caller, but I don't want
101 * to add some silly "cookie" argument to this method just for that.
102 * But it doesn't matter, the recalculation is in the rarest path
103 * this function ever takes.
104 */
105static int tcp_v6_get_port(struct sock *sk, unsigned short snum) 80static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
106{ 81{
107 struct inet_bind_hashbucket *head; 82 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
108 struct inet_bind_bucket *tb; 83 inet6_csk_bind_conflict);
109 struct hlist_node *node;
110 int ret;
111
112 local_bh_disable();
113 if (snum == 0) {
114 int low = sysctl_local_port_range[0];
115 int high = sysctl_local_port_range[1];
116 int remaining = (high - low) + 1;
117 int rover = net_random() % (high - low) + low;
118
119 do {
120 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
121 spin_lock(&head->lock);
122 inet_bind_bucket_for_each(tb, node, &head->chain)
123 if (tb->port == rover)
124 goto next;
125 break;
126 next:
127 spin_unlock(&head->lock);
128 if (++rover > high)
129 rover = low;
130 } while (--remaining > 0);
131
132 /* Exhausted local port range during search? It is not
133 * possible for us to be holding one of the bind hash
134 * locks if this test triggers, because if 'remaining'
135 * drops to zero, we broke out of the do/while loop at
136 * the top level, not from the 'break;' statement.
137 */
138 ret = 1;
139 if (unlikely(remaining <= 0))
140 goto fail;
141
142 /* OK, here is the one we will use. */
143 snum = rover;
144 } else {
145 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
146 spin_lock(&head->lock);
147 inet_bind_bucket_for_each(tb, node, &head->chain)
148 if (tb->port == snum)
149 goto tb_found;
150 }
151 tb = NULL;
152 goto tb_not_found;
153tb_found:
154 if (tb && !hlist_empty(&tb->owners)) {
155 if (tb->fastreuse > 0 && sk->sk_reuse &&
156 sk->sk_state != TCP_LISTEN) {
157 goto success;
158 } else {
159 ret = 1;
160 if (tcp_v6_bind_conflict(sk, tb))
161 goto fail_unlock;
162 }
163 }
164tb_not_found:
165 ret = 1;
166 if (tb == NULL) {
167 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
168 if (tb == NULL)
169 goto fail_unlock;
170 }
171 if (hlist_empty(&tb->owners)) {
172 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
173 tb->fastreuse = 1;
174 else
175 tb->fastreuse = 0;
176 } else if (tb->fastreuse &&
177 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
178 tb->fastreuse = 0;
179
180success:
181 if (!inet_csk(sk)->icsk_bind_hash)
182 inet_bind_hash(sk, tb, snum);
183 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
184 ret = 0;
185
186fail_unlock:
187 spin_unlock(&head->lock);
188fail:
189 local_bh_enable();
190 return ret;
191}
192
193static __inline__ void __tcp_v6_hash(struct sock *sk)
194{
195 struct hlist_head *list;
196 rwlock_t *lock;
197
198 BUG_TRAP(sk_unhashed(sk));
199
200 if (sk->sk_state == TCP_LISTEN) {
201 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
202 lock = &tcp_hashinfo.lhash_lock;
203 inet_listen_wlock(&tcp_hashinfo);
204 } else {
205 unsigned int hash;
206 sk->sk_hash = hash = inet6_sk_ehashfn(sk);
207 hash &= (tcp_hashinfo.ehash_size - 1);
208 list = &tcp_hashinfo.ehash[hash].chain;
209 lock = &tcp_hashinfo.ehash[hash].lock;
210 write_lock(lock);
211 }
212
213 __sk_add_node(sk, list);
214 sock_prot_inc_use(sk->sk_prot);
215 write_unlock(lock);
216} 84}
217 85
218
219static void tcp_v6_hash(struct sock *sk) 86static void tcp_v6_hash(struct sock *sk)
220{ 87{
221 if (sk->sk_state != TCP_CLOSE) { 88 if (sk->sk_state != TCP_CLOSE) {
222 struct tcp_sock *tp = tcp_sk(sk); 89 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
223
224 if (tp->af_specific == &ipv6_mapped) {
225 tcp_prot.hash(sk); 90 tcp_prot.hash(sk);
226 return; 91 return;
227 } 92 }
228 local_bh_disable(); 93 local_bh_disable();
229 __tcp_v6_hash(sk); 94 __inet6_hash(&tcp_hashinfo, sk);
230 local_bh_enable(); 95 local_bh_enable();
231 } 96 }
232} 97}
233 98
234/*
235 * Open request hash tables.
236 */
237
238static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
239{
240 u32 a, b, c;
241
242 a = raddr->s6_addr32[0];
243 b = raddr->s6_addr32[1];
244 c = raddr->s6_addr32[2];
245
246 a += JHASH_GOLDEN_RATIO;
247 b += JHASH_GOLDEN_RATIO;
248 c += rnd;
249 __jhash_mix(a, b, c);
250
251 a += raddr->s6_addr32[3];
252 b += (u32) rport;
253 __jhash_mix(a, b, c);
254
255 return c & (TCP_SYNQ_HSIZE - 1);
256}
257
258static struct request_sock *tcp_v6_search_req(const struct sock *sk,
259 struct request_sock ***prevp,
260 __u16 rport,
261 struct in6_addr *raddr,
262 struct in6_addr *laddr,
263 int iif)
264{
265 const struct inet_connection_sock *icsk = inet_csk(sk);
266 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
267 struct request_sock *req, **prev;
268
269 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
270 (req = *prev) != NULL;
271 prev = &req->dl_next) {
272 const struct tcp6_request_sock *treq = tcp6_rsk(req);
273
274 if (inet_rsk(req)->rmt_port == rport &&
275 req->rsk_ops->family == AF_INET6 &&
276 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
277 ipv6_addr_equal(&treq->loc_addr, laddr) &&
278 (!treq->iif || treq->iif == iif)) {
279 BUG_TRAP(req->sk == NULL);
280 *prevp = prev;
281 return req;
282 }
283 }
284
285 return NULL;
286}
287
288static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len, 99static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
289 struct in6_addr *saddr, 100 struct in6_addr *saddr,
290 struct in6_addr *daddr, 101 struct in6_addr *daddr,
@@ -308,195 +119,12 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
308 } 119 }
309} 120}
310 121
311static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
312 struct inet_timewait_sock **twp)
313{
314 struct inet_sock *inet = inet_sk(sk);
315 const struct ipv6_pinfo *np = inet6_sk(sk);
316 const struct in6_addr *daddr = &np->rcv_saddr;
317 const struct in6_addr *saddr = &np->daddr;
318 const int dif = sk->sk_bound_dev_if;
319 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
320 unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
321 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
322 struct sock *sk2;
323 const struct hlist_node *node;
324 struct inet_timewait_sock *tw;
325
326 prefetch(head->chain.first);
327 write_lock(&head->lock);
328
329 /* Check TIME-WAIT sockets first. */
330 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
331 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
332
333 tw = inet_twsk(sk2);
334
335 if(*((__u32 *)&(tw->tw_dport)) == ports &&
336 sk2->sk_family == PF_INET6 &&
337 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
338 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
339 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
340 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
341 struct tcp_sock *tp = tcp_sk(sk);
342
343 if (tcptw->tw_ts_recent_stamp &&
344 (!twp ||
345 (sysctl_tcp_tw_reuse &&
346 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
347 /* See comment in tcp_ipv4.c */
348 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
349 if (!tp->write_seq)
350 tp->write_seq = 1;
351 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
352 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
353 sock_hold(sk2);
354 goto unique;
355 } else
356 goto not_unique;
357 }
358 }
359 tw = NULL;
360
361 /* And established part... */
362 sk_for_each(sk2, node, &head->chain) {
363 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
364 goto not_unique;
365 }
366
367unique:
368 BUG_TRAP(sk_unhashed(sk));
369 __sk_add_node(sk, &head->chain);
370 sk->sk_hash = hash;
371 sock_prot_inc_use(sk->sk_prot);
372 write_unlock(&head->lock);
373
374 if (twp) {
375 *twp = tw;
376 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
377 } else if (tw) {
378 /* Silly. Should hash-dance instead... */
379 inet_twsk_deschedule(tw, &tcp_death_row);
380 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
381
382 inet_twsk_put(tw);
383 }
384 return 0;
385
386not_unique:
387 write_unlock(&head->lock);
388 return -EADDRNOTAVAIL;
389}
390
391static inline u32 tcpv6_port_offset(const struct sock *sk)
392{
393 const struct inet_sock *inet = inet_sk(sk);
394 const struct ipv6_pinfo *np = inet6_sk(sk);
395
396 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
397 np->daddr.s6_addr32,
398 inet->dport);
399}
400
401static int tcp_v6_hash_connect(struct sock *sk)
402{
403 unsigned short snum = inet_sk(sk)->num;
404 struct inet_bind_hashbucket *head;
405 struct inet_bind_bucket *tb;
406 int ret;
407
408 if (!snum) {
409 int low = sysctl_local_port_range[0];
410 int high = sysctl_local_port_range[1];
411 int range = high - low;
412 int i;
413 int port;
414 static u32 hint;
415 u32 offset = hint + tcpv6_port_offset(sk);
416 struct hlist_node *node;
417 struct inet_timewait_sock *tw = NULL;
418
419 local_bh_disable();
420 for (i = 1; i <= range; i++) {
421 port = low + (i + offset) % range;
422 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
423 spin_lock(&head->lock);
424
425 /* Does not bother with rcv_saddr checks,
426 * because the established check is already
427 * unique enough.
428 */
429 inet_bind_bucket_for_each(tb, node, &head->chain) {
430 if (tb->port == port) {
431 BUG_TRAP(!hlist_empty(&tb->owners));
432 if (tb->fastreuse >= 0)
433 goto next_port;
434 if (!__tcp_v6_check_established(sk,
435 port,
436 &tw))
437 goto ok;
438 goto next_port;
439 }
440 }
441
442 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
443 if (!tb) {
444 spin_unlock(&head->lock);
445 break;
446 }
447 tb->fastreuse = -1;
448 goto ok;
449
450 next_port:
451 spin_unlock(&head->lock);
452 }
453 local_bh_enable();
454
455 return -EADDRNOTAVAIL;
456
457ok:
458 hint += i;
459
460 /* Head lock still held and bh's disabled */
461 inet_bind_hash(sk, tb, port);
462 if (sk_unhashed(sk)) {
463 inet_sk(sk)->sport = htons(port);
464 __tcp_v6_hash(sk);
465 }
466 spin_unlock(&head->lock);
467
468 if (tw) {
469 inet_twsk_deschedule(tw, &tcp_death_row);
470 inet_twsk_put(tw);
471 }
472
473 ret = 0;
474 goto out;
475 }
476
477 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
478 tb = inet_csk(sk)->icsk_bind_hash;
479 spin_lock_bh(&head->lock);
480
481 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
482 __tcp_v6_hash(sk);
483 spin_unlock_bh(&head->lock);
484 return 0;
485 } else {
486 spin_unlock(&head->lock);
487 /* No definite answer... Walk to established hash table */
488 ret = __tcp_v6_check_established(sk, snum, NULL);
489out:
490 local_bh_enable();
491 return ret;
492 }
493}
494
495static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 122static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
496 int addr_len) 123 int addr_len)
497{ 124{
498 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 125 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
499 struct inet_sock *inet = inet_sk(sk); 126 struct inet_sock *inet = inet_sk(sk);
127 struct inet_connection_sock *icsk = inet_csk(sk);
500 struct ipv6_pinfo *np = inet6_sk(sk); 128 struct ipv6_pinfo *np = inet6_sk(sk);
501 struct tcp_sock *tp = tcp_sk(sk); 129 struct tcp_sock *tp = tcp_sk(sk);
502 struct in6_addr *saddr = NULL, *final_p = NULL, final; 130 struct in6_addr *saddr = NULL, *final_p = NULL, final;
@@ -571,7 +199,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
571 */ 199 */
572 200
573 if (addr_type == IPV6_ADDR_MAPPED) { 201 if (addr_type == IPV6_ADDR_MAPPED) {
574 u32 exthdrlen = tp->ext_header_len; 202 u32 exthdrlen = icsk->icsk_ext_hdr_len;
575 struct sockaddr_in sin; 203 struct sockaddr_in sin;
576 204
577 SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); 205 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
@@ -583,14 +211,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
583 sin.sin_port = usin->sin6_port; 211 sin.sin_port = usin->sin6_port;
584 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 212 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
585 213
586 tp->af_specific = &ipv6_mapped; 214 icsk->icsk_af_ops = &ipv6_mapped;
587 sk->sk_backlog_rcv = tcp_v4_do_rcv; 215 sk->sk_backlog_rcv = tcp_v4_do_rcv;
588 216
589 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 217 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
590 218
591 if (err) { 219 if (err) {
592 tp->ext_header_len = exthdrlen; 220 icsk->icsk_ext_hdr_len = exthdrlen;
593 tp->af_specific = &ipv6_specific; 221 icsk->icsk_af_ops = &ipv6_specific;
594 sk->sk_backlog_rcv = tcp_v6_do_rcv; 222 sk->sk_backlog_rcv = tcp_v6_do_rcv;
595 goto failure; 223 goto failure;
596 } else { 224 } else {
@@ -643,16 +271,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
643 sk->sk_route_caps = dst->dev->features & 271 sk->sk_route_caps = dst->dev->features &
644 ~(NETIF_F_IP_CSUM | NETIF_F_TSO); 272 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
645 273
646 tp->ext_header_len = 0; 274 icsk->icsk_ext_hdr_len = 0;
647 if (np->opt) 275 if (np->opt)
648 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen; 276 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
277 np->opt->opt_nflen);
649 278
650 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 279 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
651 280
652 inet->dport = usin->sin6_port; 281 inet->dport = usin->sin6_port;
653 282
654 tcp_set_state(sk, TCP_SYN_SENT); 283 tcp_set_state(sk, TCP_SYN_SENT);
655 err = tcp_v6_hash_connect(sk); 284 err = inet6_hash_connect(&tcp_death_row, sk);
656 if (err) 285 if (err)
657 goto late_failure; 286 goto late_failure;
658 287
@@ -758,7 +387,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
758 } else 387 } else
759 dst_hold(dst); 388 dst_hold(dst);
760 389
761 if (tp->pmtu_cookie > dst_mtu(dst)) { 390 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
762 tcp_sync_mss(sk, dst_mtu(dst)); 391 tcp_sync_mss(sk, dst_mtu(dst));
763 tcp_simple_retransmit(sk); 392 tcp_simple_retransmit(sk);
764 } /* else let the usual retransmit timer handle it */ 393 } /* else let the usual retransmit timer handle it */
@@ -775,8 +404,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
775 if (sock_owned_by_user(sk)) 404 if (sock_owned_by_user(sk))
776 goto out; 405 goto out;
777 406
778 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr, 407 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
779 &hdr->saddr, inet6_iif(skb)); 408 &hdr->saddr, inet6_iif(skb));
780 if (!req) 409 if (!req)
781 goto out; 410 goto out;
782 411
@@ -822,7 +451,7 @@ out:
822static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, 451static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
823 struct dst_entry *dst) 452 struct dst_entry *dst)
824{ 453{
825 struct tcp6_request_sock *treq = tcp6_rsk(req); 454 struct inet6_request_sock *treq = inet6_rsk(req);
826 struct ipv6_pinfo *np = inet6_sk(sk); 455 struct ipv6_pinfo *np = inet6_sk(sk);
827 struct sk_buff * skb; 456 struct sk_buff * skb;
828 struct ipv6_txoptions *opt = NULL; 457 struct ipv6_txoptions *opt = NULL;
@@ -888,8 +517,8 @@ done:
888 517
889static void tcp_v6_reqsk_destructor(struct request_sock *req) 518static void tcp_v6_reqsk_destructor(struct request_sock *req)
890{ 519{
891 if (tcp6_rsk(req)->pktopts) 520 if (inet6_rsk(req)->pktopts)
892 kfree_skb(tcp6_rsk(req)->pktopts); 521 kfree_skb(inet6_rsk(req)->pktopts);
893} 522}
894 523
895static struct request_sock_ops tcp6_request_sock_ops = { 524static struct request_sock_ops tcp6_request_sock_ops = {
@@ -901,26 +530,15 @@ static struct request_sock_ops tcp6_request_sock_ops = {
901 .send_reset = tcp_v6_send_reset 530 .send_reset = tcp_v6_send_reset
902}; 531};
903 532
904static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) 533static struct timewait_sock_ops tcp6_timewait_sock_ops = {
905{ 534 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
906 struct ipv6_pinfo *np = inet6_sk(sk); 535 .twsk_unique = tcp_twsk_unique,
907 struct inet6_skb_parm *opt = IP6CB(skb); 536};
908
909 if (np->rxopt.all) {
910 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
911 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
912 (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
913 ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
914 return 1;
915 }
916 return 0;
917}
918
919 537
920static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 538static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
921 struct sk_buff *skb)
922{ 539{
923 struct ipv6_pinfo *np = inet6_sk(sk); 540 struct ipv6_pinfo *np = inet6_sk(sk);
541 struct tcphdr *th = skb->h.th;
924 542
925 if (skb->ip_summed == CHECKSUM_HW) { 543 if (skb->ip_summed == CHECKSUM_HW) {
926 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); 544 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
@@ -1091,8 +709,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1091 struct sock *nsk; 709 struct sock *nsk;
1092 710
1093 /* Find possible connection requests. */ 711 /* Find possible connection requests. */
1094 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr, 712 req = inet6_csk_search_req(sk, &prev, th->source,
1095 &skb->nh.ipv6h->daddr, inet6_iif(skb)); 713 &skb->nh.ipv6h->saddr,
714 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1096 if (req) 715 if (req)
1097 return tcp_check_req(sk, skb, req, prev); 716 return tcp_check_req(sk, skb, req, prev);
1098 717
@@ -1116,23 +735,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1116 return sk; 735 return sk;
1117} 736}
1118 737
1119static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1120{
1121 struct inet_connection_sock *icsk = inet_csk(sk);
1122 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1123 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1124
1125 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1126 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1127}
1128
1129
1130/* FIXME: this is substantially similar to the ipv4 code. 738/* FIXME: this is substantially similar to the ipv4 code.
1131 * Can some kind of merge be done? -- erics 739 * Can some kind of merge be done? -- erics
1132 */ 740 */
1133static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 741static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1134{ 742{
1135 struct tcp6_request_sock *treq; 743 struct inet6_request_sock *treq;
1136 struct ipv6_pinfo *np = inet6_sk(sk); 744 struct ipv6_pinfo *np = inet6_sk(sk);
1137 struct tcp_options_received tmp_opt; 745 struct tcp_options_received tmp_opt;
1138 struct tcp_sock *tp = tcp_sk(sk); 746 struct tcp_sock *tp = tcp_sk(sk);
@@ -1157,7 +765,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1157 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 765 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1158 goto drop; 766 goto drop;
1159 767
1160 req = reqsk_alloc(&tcp6_request_sock_ops); 768 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1161 if (req == NULL) 769 if (req == NULL)
1162 goto drop; 770 goto drop;
1163 771
@@ -1170,7 +778,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1170 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 778 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1171 tcp_openreq_init(req, &tmp_opt, skb); 779 tcp_openreq_init(req, &tmp_opt, skb);
1172 780
1173 treq = tcp6_rsk(req); 781 treq = inet6_rsk(req);
1174 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr); 782 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1175 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr); 783 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1176 TCP_ECN_create_request(req, skb->h.th); 784 TCP_ECN_create_request(req, skb->h.th);
@@ -1196,8 +804,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1196 if (tcp_v6_send_synack(sk, req, NULL)) 804 if (tcp_v6_send_synack(sk, req, NULL))
1197 goto drop; 805 goto drop;
1198 806
1199 tcp_v6_synq_add(sk, req); 807 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1200
1201 return 0; 808 return 0;
1202 809
1203drop: 810drop:
@@ -1212,7 +819,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1212 struct request_sock *req, 819 struct request_sock *req,
1213 struct dst_entry *dst) 820 struct dst_entry *dst)
1214{ 821{
1215 struct tcp6_request_sock *treq = tcp6_rsk(req); 822 struct inet6_request_sock *treq = inet6_rsk(req);
1216 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 823 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1217 struct tcp6_sock *newtcp6sk; 824 struct tcp6_sock *newtcp6sk;
1218 struct inet_sock *newinet; 825 struct inet_sock *newinet;
@@ -1247,7 +854,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1247 854
1248 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); 855 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1249 856
1250 newtp->af_specific = &ipv6_mapped; 857 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1251 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 858 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1252 newnp->pktoptions = NULL; 859 newnp->pktoptions = NULL;
1253 newnp->opt = NULL; 860 newnp->opt = NULL;
@@ -1261,10 +868,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1261 */ 868 */
1262 869
1263 /* It is tricky place. Until this moment IPv4 tcp 870 /* It is tricky place. Until this moment IPv4 tcp
1264 worked with IPv6 af_tcp.af_specific. 871 worked with IPv6 icsk.icsk_af_ops.
1265 Sync it now. 872 Sync it now.
1266 */ 873 */
1267 tcp_sync_mss(newsk, newtp->pmtu_cookie); 874 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1268 875
1269 return newsk; 876 return newsk;
1270 } 877 }
@@ -1371,10 +978,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1371 sock_kfree_s(sk, opt, opt->tot_len); 978 sock_kfree_s(sk, opt, opt->tot_len);
1372 } 979 }
1373 980
1374 newtp->ext_header_len = 0; 981 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1375 if (newnp->opt) 982 if (newnp->opt)
1376 newtp->ext_header_len = newnp->opt->opt_nflen + 983 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1377 newnp->opt->opt_flen; 984 newnp->opt->opt_flen);
1378 985
1379 tcp_sync_mss(newsk, dst_mtu(dst)); 986 tcp_sync_mss(newsk, dst_mtu(dst));
1380 newtp->advmss = dst_metric(dst, RTAX_ADVMSS); 987 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
@@ -1382,7 +989,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1382 989
1383 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; 990 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1384 991
1385 __tcp_v6_hash(newsk); 992 __inet6_hash(&tcp_hashinfo, newsk);
1386 inet_inherit_port(&tcp_hashinfo, sk, newsk); 993 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1387 994
1388 return newsk; 995 return newsk;
@@ -1679,139 +1286,16 @@ do_time_wait:
1679 goto discard_it; 1286 goto discard_it;
1680} 1287}
1681 1288
1682static int tcp_v6_rebuild_header(struct sock *sk)
1683{
1684 int err;
1685 struct dst_entry *dst;
1686 struct ipv6_pinfo *np = inet6_sk(sk);
1687
1688 dst = __sk_dst_check(sk, np->dst_cookie);
1689
1690 if (dst == NULL) {
1691 struct inet_sock *inet = inet_sk(sk);
1692 struct in6_addr *final_p = NULL, final;
1693 struct flowi fl;
1694
1695 memset(&fl, 0, sizeof(fl));
1696 fl.proto = IPPROTO_TCP;
1697 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1698 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1699 fl.fl6_flowlabel = np->flow_label;
1700 fl.oif = sk->sk_bound_dev_if;
1701 fl.fl_ip_dport = inet->dport;
1702 fl.fl_ip_sport = inet->sport;
1703
1704 if (np->opt && np->opt->srcrt) {
1705 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1706 ipv6_addr_copy(&final, &fl.fl6_dst);
1707 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1708 final_p = &final;
1709 }
1710
1711 err = ip6_dst_lookup(sk, &dst, &fl);
1712 if (err) {
1713 sk->sk_route_caps = 0;
1714 return err;
1715 }
1716 if (final_p)
1717 ipv6_addr_copy(&fl.fl6_dst, final_p);
1718
1719 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1720 sk->sk_err_soft = -err;
1721 return err;
1722 }
1723
1724 ip6_dst_store(sk, dst, NULL);
1725 sk->sk_route_caps = dst->dev->features &
1726 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1727 }
1728
1729 return 0;
1730}
1731
1732static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1733{
1734 struct sock *sk = skb->sk;
1735 struct inet_sock *inet = inet_sk(sk);
1736 struct ipv6_pinfo *np = inet6_sk(sk);
1737 struct flowi fl;
1738 struct dst_entry *dst;
1739 struct in6_addr *final_p = NULL, final;
1740
1741 memset(&fl, 0, sizeof(fl));
1742 fl.proto = IPPROTO_TCP;
1743 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1744 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1745 fl.fl6_flowlabel = np->flow_label;
1746 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1747 fl.oif = sk->sk_bound_dev_if;
1748 fl.fl_ip_sport = inet->sport;
1749 fl.fl_ip_dport = inet->dport;
1750
1751 if (np->opt && np->opt->srcrt) {
1752 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1753 ipv6_addr_copy(&final, &fl.fl6_dst);
1754 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1755 final_p = &final;
1756 }
1757
1758 dst = __sk_dst_check(sk, np->dst_cookie);
1759
1760 if (dst == NULL) {
1761 int err = ip6_dst_lookup(sk, &dst, &fl);
1762
1763 if (err) {
1764 sk->sk_err_soft = -err;
1765 return err;
1766 }
1767
1768 if (final_p)
1769 ipv6_addr_copy(&fl.fl6_dst, final_p);
1770
1771 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1772 sk->sk_route_caps = 0;
1773 return err;
1774 }
1775
1776 ip6_dst_store(sk, dst, NULL);
1777 sk->sk_route_caps = dst->dev->features &
1778 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1779 }
1780
1781 skb->dst = dst_clone(dst);
1782
1783 /* Restore final destination back after routing done */
1784 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1785
1786 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1787}
1788
1789static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1790{
1791 struct ipv6_pinfo *np = inet6_sk(sk);
1792 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1793
1794 sin6->sin6_family = AF_INET6;
1795 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1796 sin6->sin6_port = inet_sk(sk)->dport;
1797 /* We do not store received flowlabel for TCP */
1798 sin6->sin6_flowinfo = 0;
1799 sin6->sin6_scope_id = 0;
1800 if (sk->sk_bound_dev_if &&
1801 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1802 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1803}
1804
1805static int tcp_v6_remember_stamp(struct sock *sk) 1289static int tcp_v6_remember_stamp(struct sock *sk)
1806{ 1290{
1807 /* Alas, not yet... */ 1291 /* Alas, not yet... */
1808 return 0; 1292 return 0;
1809} 1293}
1810 1294
1811static struct tcp_func ipv6_specific = { 1295static struct inet_connection_sock_af_ops ipv6_specific = {
1812 .queue_xmit = tcp_v6_xmit, 1296 .queue_xmit = inet6_csk_xmit,
1813 .send_check = tcp_v6_send_check, 1297 .send_check = tcp_v6_send_check,
1814 .rebuild_header = tcp_v6_rebuild_header, 1298 .rebuild_header = inet6_sk_rebuild_header,
1815 .conn_request = tcp_v6_conn_request, 1299 .conn_request = tcp_v6_conn_request,
1816 .syn_recv_sock = tcp_v6_syn_recv_sock, 1300 .syn_recv_sock = tcp_v6_syn_recv_sock,
1817 .remember_stamp = tcp_v6_remember_stamp, 1301 .remember_stamp = tcp_v6_remember_stamp,
@@ -1819,7 +1303,7 @@ static struct tcp_func ipv6_specific = {
1819 1303
1820 .setsockopt = ipv6_setsockopt, 1304 .setsockopt = ipv6_setsockopt,
1821 .getsockopt = ipv6_getsockopt, 1305 .getsockopt = ipv6_getsockopt,
1822 .addr2sockaddr = v6_addr2sockaddr, 1306 .addr2sockaddr = inet6_csk_addr2sockaddr,
1823 .sockaddr_len = sizeof(struct sockaddr_in6) 1307 .sockaddr_len = sizeof(struct sockaddr_in6)
1824}; 1308};
1825 1309
@@ -1827,7 +1311,7 @@ static struct tcp_func ipv6_specific = {
1827 * TCP over IPv4 via INET6 API 1311 * TCP over IPv4 via INET6 API
1828 */ 1312 */
1829 1313
1830static struct tcp_func ipv6_mapped = { 1314static struct inet_connection_sock_af_ops ipv6_mapped = {
1831 .queue_xmit = ip_queue_xmit, 1315 .queue_xmit = ip_queue_xmit,
1832 .send_check = tcp_v4_send_check, 1316 .send_check = tcp_v4_send_check,
1833 .rebuild_header = inet_sk_rebuild_header, 1317 .rebuild_header = inet_sk_rebuild_header,
@@ -1838,7 +1322,7 @@ static struct tcp_func ipv6_mapped = {
1838 1322
1839 .setsockopt = ipv6_setsockopt, 1323 .setsockopt = ipv6_setsockopt,
1840 .getsockopt = ipv6_getsockopt, 1324 .getsockopt = ipv6_getsockopt,
1841 .addr2sockaddr = v6_addr2sockaddr, 1325 .addr2sockaddr = inet6_csk_addr2sockaddr,
1842 .sockaddr_len = sizeof(struct sockaddr_in6) 1326 .sockaddr_len = sizeof(struct sockaddr_in6)
1843}; 1327};
1844 1328
@@ -1877,8 +1361,9 @@ static int tcp_v6_init_sock(struct sock *sk)
1877 1361
1878 sk->sk_state = TCP_CLOSE; 1362 sk->sk_state = TCP_CLOSE;
1879 1363
1880 tp->af_specific = &ipv6_specific; 1364 icsk->icsk_af_ops = &ipv6_specific;
1881 icsk->icsk_ca_ops = &tcp_init_congestion_ops; 1365 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1366 icsk->icsk_sync_mss = tcp_sync_mss;
1882 sk->sk_write_space = sk_stream_write_space; 1367 sk->sk_write_space = sk_stream_write_space;
1883 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 1368 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1884 1369
@@ -1900,14 +1385,13 @@ static int tcp_v6_destroy_sock(struct sock *sk)
1900static void get_openreq6(struct seq_file *seq, 1385static void get_openreq6(struct seq_file *seq,
1901 struct sock *sk, struct request_sock *req, int i, int uid) 1386 struct sock *sk, struct request_sock *req, int i, int uid)
1902{ 1387{
1903 struct in6_addr *dest, *src;
1904 int ttd = req->expires - jiffies; 1388 int ttd = req->expires - jiffies;
1389 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1390 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1905 1391
1906 if (ttd < 0) 1392 if (ttd < 0)
1907 ttd = 0; 1393 ttd = 0;
1908 1394
1909 src = &tcp6_rsk(req)->loc_addr;
1910 dest = &tcp6_rsk(req)->rmt_addr;
1911 seq_printf(seq, 1395 seq_printf(seq,
1912 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1396 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1913 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n", 1397 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
@@ -1988,14 +1472,14 @@ static void get_timewait6_sock(struct seq_file *seq,
1988{ 1472{
1989 struct in6_addr *dest, *src; 1473 struct in6_addr *dest, *src;
1990 __u16 destp, srcp; 1474 __u16 destp, srcp;
1991 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); 1475 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1992 int ttd = tw->tw_ttd - jiffies; 1476 int ttd = tw->tw_ttd - jiffies;
1993 1477
1994 if (ttd < 0) 1478 if (ttd < 0)
1995 ttd = 0; 1479 ttd = 0;
1996 1480
1997 dest = &tcp6tw->tw_v6_daddr; 1481 dest = &tw6->tw_v6_daddr;
1998 src = &tcp6tw->tw_v6_rcv_saddr; 1482 src = &tw6->tw_v6_rcv_saddr;
1999 destp = ntohs(tw->tw_dport); 1483 destp = ntohs(tw->tw_dport);
2000 srcp = ntohs(tw->tw_sport); 1484 srcp = ntohs(tw->tw_sport);
2001 1485
@@ -2093,7 +1577,7 @@ struct proto tcpv6_prot = {
2093 .sysctl_rmem = sysctl_tcp_rmem, 1577 .sysctl_rmem = sysctl_tcp_rmem,
2094 .max_header = MAX_TCP_HEADER, 1578 .max_header = MAX_TCP_HEADER,
2095 .obj_size = sizeof(struct tcp6_sock), 1579 .obj_size = sizeof(struct tcp6_sock),
2096 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1580 .twsk_prot = &tcp6_timewait_sock_ops,
2097 .rsk_prot = &tcp6_request_sock_ops, 1581 .rsk_prot = &tcp6_request_sock_ops,
2098}; 1582};
2099 1583
@@ -2110,7 +1594,8 @@ static struct inet_protosw tcpv6_protosw = {
2110 .ops = &inet6_stream_ops, 1594 .ops = &inet6_stream_ops,
2111 .capability = -1, 1595 .capability = -1,
2112 .no_check = 0, 1596 .no_check = 0,
2113 .flags = INET_PROTOSW_PERMANENT, 1597 .flags = INET_PROTOSW_PERMANENT |
1598 INET_PROTOSW_ICSK,
2114}; 1599};
2115 1600
2116void __init tcpv6_init(void) 1601void __init tcpv6_init(void)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5cc8731eb5..d8538dcea8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -36,6 +36,7 @@
36#include <linux/ipv6.h> 36#include <linux/ipv6.h>
37#include <linux/icmpv6.h> 37#include <linux/icmpv6.h>
38#include <linux/init.h> 38#include <linux/init.h>
39#include <linux/skbuff.h>
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
40 41
41#include <net/sock.h> 42#include <net/sock.h>
@@ -300,20 +301,7 @@ out:
300 return err; 301 return err;
301 302
302csum_copy_err: 303csum_copy_err:
303 /* Clear queue. */ 304 skb_kill_datagram(sk, skb, flags);
304 if (flags&MSG_PEEK) {
305 int clear = 0;
306 spin_lock_bh(&sk->sk_receive_queue.lock);
307 if (skb == skb_peek(&sk->sk_receive_queue)) {
308 __skb_unlink(skb, &sk->sk_receive_queue);
309 clear = 1;
310 }
311 spin_unlock_bh(&sk->sk_receive_queue.lock);
312 if (clear)
313 kfree_skb(skb);
314 }
315
316 skb_free_datagram(sk, skb);
317 305
318 if (flags & MSG_DONTWAIT) { 306 if (flags & MSG_DONTWAIT) {
319 UDP6_INC_STATS_USER(UDP_MIB_INERRORS); 307 UDP6_INC_STATS_USER(UDP_MIB_INERRORS);