aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorRussell King <rmk@dyn-67.arm.linux.org.uk>2006-01-07 09:40:05 -0500
committerRussell King <rmk+kernel@arm.linux.org.uk>2006-01-07 09:40:05 -0500
commit123656d4cc8c946f578ebd18c2050f5251720428 (patch)
tree3d5432eff034a3b9cfdc98b37e245abe5695342d /net/ipv6
parenta62c80e559809e6c7851ec04d30575e85ad6f6ed (diff)
parent0aec63e67c69545ca757a73a66f5dcf05fa484bf (diff)
Merge with Linus' kernel.
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Makefile3
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/af_inet6.c90
-rw-r--r--net/ipv6/ah6.c1
-rw-r--r--net/ipv6/esp6.c1
-rw-r--r--net/ipv6/exthdrs.c4
-rw-r--r--net/ipv6/inet6_connection_sock.c199
-rw-r--r--net/ipv6/inet6_hashtables.c183
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ipcomp6.c1
-rw-r--r--net/ipv6/ipv6_sockglue.c24
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c315
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c1
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c3
-rw-r--r--net/ipv6/netfilter/ip6t_dst.c4
-rw-r--r--net/ipv6/netfilter/ip6t_esp.c3
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c2
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c4
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c47
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c77
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c4
-rw-r--r--net/ipv6/raw.c16
-rw-r--r--net/ipv6/sit.c3
-rw-r--r--net/ipv6/tcp_ipv6.c639
-rw-r--r--net/ipv6/udp.c16
28 files changed, 864 insertions, 786 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 6460eec834b7..9601fd7f9d66 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -8,7 +8,8 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
8 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ 8 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
9 protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ 9 protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
10 exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ 10 exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
11 ip6_flowlabel.o ipv6_syms.o netfilter.o 11 ip6_flowlabel.o ipv6_syms.o netfilter.o \
12 inet6_connection_sock.o
12 13
13ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ 14ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
14 xfrm6_output.o 15 xfrm6_output.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a60585fd85ad..704fb73e6c5f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1195,7 +1195,7 @@ struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *
1195int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) 1195int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
1196{ 1196{
1197 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; 1197 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
1198 const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); 1198 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
1199 u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; 1199 u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
1200 u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); 1200 u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
1201 int sk_ipv6only = ipv6_only_sock(sk); 1201 int sk_ipv6only = ipv6_only_sock(sk);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d9546380fa04..68afc53be662 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -167,6 +167,7 @@ lookup_protocol:
167 sk->sk_reuse = 1; 167 sk->sk_reuse = 1;
168 168
169 inet = inet_sk(sk); 169 inet = inet_sk(sk);
170 inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
170 171
171 if (SOCK_RAW == sock->type) { 172 if (SOCK_RAW == sock->type) {
172 inet->num = protocol; 173 inet->num = protocol;
@@ -389,6 +390,8 @@ int inet6_destroy_sock(struct sock *sk)
389 return 0; 390 return 0;
390} 391}
391 392
393EXPORT_SYMBOL_GPL(inet6_destroy_sock);
394
392/* 395/*
393 * This does both peername and sockname. 396 * This does both peername and sockname.
394 */ 397 */
@@ -431,7 +434,6 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
431int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 434int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
432{ 435{
433 struct sock *sk = sock->sk; 436 struct sock *sk = sock->sk;
434 int err = -EINVAL;
435 437
436 switch(cmd) 438 switch(cmd)
437 { 439 {
@@ -450,16 +452,15 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
450 case SIOCSIFDSTADDR: 452 case SIOCSIFDSTADDR:
451 return addrconf_set_dstaddr((void __user *) arg); 453 return addrconf_set_dstaddr((void __user *) arg);
452 default: 454 default:
453 if (!sk->sk_prot->ioctl || 455 if (!sk->sk_prot->ioctl)
454 (err = sk->sk_prot->ioctl(sk, cmd, arg)) == -ENOIOCTLCMD) 456 return -ENOIOCTLCMD;
455 return(dev_ioctl(cmd,(void __user *) arg)); 457 return sk->sk_prot->ioctl(sk, cmd, arg);
456 return err;
457 } 458 }
458 /*NOTREACHED*/ 459 /*NOTREACHED*/
459 return(0); 460 return(0);
460} 461}
461 462
462struct proto_ops inet6_stream_ops = { 463const struct proto_ops inet6_stream_ops = {
463 .family = PF_INET6, 464 .family = PF_INET6,
464 .owner = THIS_MODULE, 465 .owner = THIS_MODULE,
465 .release = inet6_release, 466 .release = inet6_release,
@@ -480,7 +481,7 @@ struct proto_ops inet6_stream_ops = {
480 .sendpage = tcp_sendpage 481 .sendpage = tcp_sendpage
481}; 482};
482 483
483struct proto_ops inet6_dgram_ops = { 484const struct proto_ops inet6_dgram_ops = {
484 .family = PF_INET6, 485 .family = PF_INET6,
485 .owner = THIS_MODULE, 486 .owner = THIS_MODULE,
486 .release = inet6_release, 487 .release = inet6_release,
@@ -508,7 +509,7 @@ static struct net_proto_family inet6_family_ops = {
508}; 509};
509 510
510/* Same as inet6_dgram_ops, sans udp_poll. */ 511/* Same as inet6_dgram_ops, sans udp_poll. */
511static struct proto_ops inet6_sockraw_ops = { 512static const struct proto_ops inet6_sockraw_ops = {
512 .family = PF_INET6, 513 .family = PF_INET6,
513 .owner = THIS_MODULE, 514 .owner = THIS_MODULE,
514 .release = inet6_release, 515 .release = inet6_release,
@@ -609,6 +610,79 @@ inet6_unregister_protosw(struct inet_protosw *p)
609 } 610 }
610} 611}
611 612
613int inet6_sk_rebuild_header(struct sock *sk)
614{
615 int err;
616 struct dst_entry *dst;
617 struct ipv6_pinfo *np = inet6_sk(sk);
618
619 dst = __sk_dst_check(sk, np->dst_cookie);
620
621 if (dst == NULL) {
622 struct inet_sock *inet = inet_sk(sk);
623 struct in6_addr *final_p = NULL, final;
624 struct flowi fl;
625
626 memset(&fl, 0, sizeof(fl));
627 fl.proto = sk->sk_protocol;
628 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
629 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
630 fl.fl6_flowlabel = np->flow_label;
631 fl.oif = sk->sk_bound_dev_if;
632 fl.fl_ip_dport = inet->dport;
633 fl.fl_ip_sport = inet->sport;
634
635 if (np->opt && np->opt->srcrt) {
636 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
637 ipv6_addr_copy(&final, &fl.fl6_dst);
638 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
639 final_p = &final;
640 }
641
642 err = ip6_dst_lookup(sk, &dst, &fl);
643 if (err) {
644 sk->sk_route_caps = 0;
645 return err;
646 }
647 if (final_p)
648 ipv6_addr_copy(&fl.fl6_dst, final_p);
649
650 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
651 sk->sk_err_soft = -err;
652 return err;
653 }
654
655 ip6_dst_store(sk, dst, NULL);
656 sk->sk_route_caps = dst->dev->features &
657 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
658 }
659
660 return 0;
661}
662
663EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
664
665int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
666{
667 struct ipv6_pinfo *np = inet6_sk(sk);
668 struct inet6_skb_parm *opt = IP6CB(skb);
669
670 if (np->rxopt.all) {
671 if ((opt->hop && (np->rxopt.bits.hopopts ||
672 np->rxopt.bits.ohopopts)) ||
673 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) &&
674 np->rxopt.bits.rxflow) ||
675 (opt->srcrt && (np->rxopt.bits.srcrt ||
676 np->rxopt.bits.osrcrt)) ||
677 ((opt->dst1 || opt->dst0) &&
678 (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
679 return 1;
680 }
681 return 0;
682}
683
684EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
685
612int 686int
613snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) 687snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
614{ 688{
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f3629730eb15..13cc7f895583 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -33,6 +33,7 @@
33#include <linux/string.h> 33#include <linux/string.h>
34#include <net/icmp.h> 34#include <net/icmp.h>
35#include <net/ipv6.h> 35#include <net/ipv6.h>
36#include <net/protocol.h>
36#include <net/xfrm.h> 37#include <net/xfrm.h>
37#include <asm/scatterlist.h> 38#include <asm/scatterlist.h>
38 39
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 8bfbe9970793..6de8ee1a5ad9 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -36,6 +36,7 @@
36#include <linux/random.h> 36#include <linux/random.h>
37#include <net/icmp.h> 37#include <net/icmp.h>
38#include <net/ipv6.h> 38#include <net/ipv6.h>
39#include <net/protocol.h>
39#include <linux/icmpv6.h> 40#include <linux/icmpv6.h>
40 41
41static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) 42static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index be6faf311387..113374dc342c 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -413,6 +413,8 @@ ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
413 return opt; 413 return opt;
414} 414}
415 415
416EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
417
416/********************************** 418/**********************************
417 Hop-by-hop options. 419 Hop-by-hop options.
418 **********************************/ 420 **********************************/
@@ -579,6 +581,8 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
579 return opt2; 581 return opt2;
580} 582}
581 583
584EXPORT_SYMBOL_GPL(ipv6_dup_options);
585
582static int ipv6_renew_option(void *ohdr, 586static int ipv6_renew_option(void *ohdr,
583 struct ipv6_opt_hdr __user *newopt, int newoptlen, 587 struct ipv6_opt_hdr __user *newopt, int newoptlen,
584 int inherit, 588 int inherit,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
new file mode 100644
index 000000000000..792f90f0f9ec
--- /dev/null
+++ b/net/ipv6/inet6_connection_sock.c
@@ -0,0 +1,199 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Support for INET6 connection oriented protocols.
7 *
8 * Authors: See the TCPv6 sources
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or(at your option) any later version.
14 */
15
16#include <linux/config.h>
17#include <linux/module.h>
18#include <linux/in6.h>
19#include <linux/ipv6.h>
20#include <linux/jhash.h>
21
22#include <net/addrconf.h>
23#include <net/inet_connection_sock.h>
24#include <net/inet_ecn.h>
25#include <net/inet_hashtables.h>
26#include <net/ip6_route.h>
27#include <net/sock.h>
28
29int inet6_csk_bind_conflict(const struct sock *sk,
30 const struct inet_bind_bucket *tb)
31{
32 const struct sock *sk2;
33 const struct hlist_node *node;
34
35 /* We must walk the whole port owner list in this case. -DaveM */
36 sk_for_each_bound(sk2, node, &tb->owners) {
37 if (sk != sk2 &&
38 (!sk->sk_bound_dev_if ||
39 !sk2->sk_bound_dev_if ||
40 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
41 (!sk->sk_reuse || !sk2->sk_reuse ||
42 sk2->sk_state == TCP_LISTEN) &&
43 ipv6_rcv_saddr_equal(sk, sk2))
44 break;
45 }
46
47 return node != NULL;
48}
49
50EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
51
52/*
53 * request_sock (formerly open request) hash tables.
54 */
55static u32 inet6_synq_hash(const struct in6_addr *raddr, const u16 rport,
56 const u32 rnd, const u16 synq_hsize)
57{
58 u32 a = raddr->s6_addr32[0];
59 u32 b = raddr->s6_addr32[1];
60 u32 c = raddr->s6_addr32[2];
61
62 a += JHASH_GOLDEN_RATIO;
63 b += JHASH_GOLDEN_RATIO;
64 c += rnd;
65 __jhash_mix(a, b, c);
66
67 a += raddr->s6_addr32[3];
68 b += (u32)rport;
69 __jhash_mix(a, b, c);
70
71 return c & (synq_hsize - 1);
72}
73
74struct request_sock *inet6_csk_search_req(const struct sock *sk,
75 struct request_sock ***prevp,
76 const __u16 rport,
77 const struct in6_addr *raddr,
78 const struct in6_addr *laddr,
79 const int iif)
80{
81 const struct inet_connection_sock *icsk = inet_csk(sk);
82 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
83 struct request_sock *req, **prev;
84
85 for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport,
86 lopt->hash_rnd,
87 lopt->nr_table_entries)];
88 (req = *prev) != NULL;
89 prev = &req->dl_next) {
90 const struct inet6_request_sock *treq = inet6_rsk(req);
91
92 if (inet_rsk(req)->rmt_port == rport &&
93 req->rsk_ops->family == AF_INET6 &&
94 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
95 ipv6_addr_equal(&treq->loc_addr, laddr) &&
96 (!treq->iif || treq->iif == iif)) {
97 BUG_TRAP(req->sk == NULL);
98 *prevp = prev;
99 return req;
100 }
101 }
102
103 return NULL;
104}
105
106EXPORT_SYMBOL_GPL(inet6_csk_search_req);
107
108void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
109 struct request_sock *req,
110 const unsigned long timeout)
111{
112 struct inet_connection_sock *icsk = inet_csk(sk);
113 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
114 const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
115 inet_rsk(req)->rmt_port,
116 lopt->hash_rnd, lopt->nr_table_entries);
117
118 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
119 inet_csk_reqsk_queue_added(sk, timeout);
120}
121
122EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
123
124void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
125{
126 struct ipv6_pinfo *np = inet6_sk(sk);
127 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
128
129 sin6->sin6_family = AF_INET6;
130 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
131 sin6->sin6_port = inet_sk(sk)->dport;
132 /* We do not store received flowlabel for TCP */
133 sin6->sin6_flowinfo = 0;
134 sin6->sin6_scope_id = 0;
135 if (sk->sk_bound_dev_if &&
136 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
137 sin6->sin6_scope_id = sk->sk_bound_dev_if;
138}
139
140EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
141
142int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
143{
144 struct sock *sk = skb->sk;
145 struct inet_sock *inet = inet_sk(sk);
146 struct ipv6_pinfo *np = inet6_sk(sk);
147 struct flowi fl;
148 struct dst_entry *dst;
149 struct in6_addr *final_p = NULL, final;
150
151 memset(&fl, 0, sizeof(fl));
152 fl.proto = sk->sk_protocol;
153 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
154 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
155 fl.fl6_flowlabel = np->flow_label;
156 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
157 fl.oif = sk->sk_bound_dev_if;
158 fl.fl_ip_sport = inet->sport;
159 fl.fl_ip_dport = inet->dport;
160
161 if (np->opt && np->opt->srcrt) {
162 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
163 ipv6_addr_copy(&final, &fl.fl6_dst);
164 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
165 final_p = &final;
166 }
167
168 dst = __sk_dst_check(sk, np->dst_cookie);
169
170 if (dst == NULL) {
171 int err = ip6_dst_lookup(sk, &dst, &fl);
172
173 if (err) {
174 sk->sk_err_soft = -err;
175 return err;
176 }
177
178 if (final_p)
179 ipv6_addr_copy(&fl.fl6_dst, final_p);
180
181 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
182 sk->sk_route_caps = 0;
183 return err;
184 }
185
186 ip6_dst_store(sk, dst, NULL);
187 sk->sk_route_caps = dst->dev->features &
188 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
189 }
190
191 skb->dst = dst_clone(dst);
192
193 /* Restore final destination back after routing done */
194 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
195
196 return ip6_xmit(sk, skb, &fl, np->opt, 0);
197}
198
199EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 01d5f46d4e40..4154f3a8b6cf 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -5,7 +5,8 @@
5 * 5 *
6 * Generic INET6 transport hashtables 6 * Generic INET6 transport hashtables
7 * 7 *
8 * Authors: Lotsa people, from code originally in tcp 8 * Authors: Lotsa people, from code originally in tcp, generalised here
9 * by Arnaldo Carvalho de Melo <acme@mandriva.com>
9 * 10 *
10 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
@@ -14,12 +15,13 @@
14 */ 15 */
15 16
16#include <linux/config.h> 17#include <linux/config.h>
17
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/random.h>
19 20
20#include <net/inet_connection_sock.h> 21#include <net/inet_connection_sock.h>
21#include <net/inet_hashtables.h> 22#include <net/inet_hashtables.h>
22#include <net/inet6_hashtables.h> 23#include <net/inet6_hashtables.h>
24#include <net/ip.h>
23 25
24struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, 26struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
25 const struct in6_addr *daddr, 27 const struct in6_addr *daddr,
@@ -79,3 +81,180 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
79} 81}
80 82
81EXPORT_SYMBOL_GPL(inet6_lookup); 83EXPORT_SYMBOL_GPL(inet6_lookup);
84
85static int __inet6_check_established(struct inet_timewait_death_row *death_row,
86 struct sock *sk, const __u16 lport,
87 struct inet_timewait_sock **twp)
88{
89 struct inet_hashinfo *hinfo = death_row->hashinfo;
90 const struct inet_sock *inet = inet_sk(sk);
91 const struct ipv6_pinfo *np = inet6_sk(sk);
92 const struct in6_addr *daddr = &np->rcv_saddr;
93 const struct in6_addr *saddr = &np->daddr;
94 const int dif = sk->sk_bound_dev_if;
95 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
96 const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr,
97 inet->dport);
98 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
99 struct sock *sk2;
100 const struct hlist_node *node;
101 struct inet_timewait_sock *tw;
102
103 prefetch(head->chain.first);
104 write_lock(&head->lock);
105
106 /* Check TIME-WAIT sockets first. */
107 sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
108 const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
109
110 tw = inet_twsk(sk2);
111
112 if(*((__u32 *)&(tw->tw_dport)) == ports &&
113 sk2->sk_family == PF_INET6 &&
114 ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) &&
115 ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
116 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
117 if (twsk_unique(sk, sk2, twp))
118 goto unique;
119 else
120 goto not_unique;
121 }
122 }
123 tw = NULL;
124
125 /* And established part... */
126 sk_for_each(sk2, node, &head->chain) {
127 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
128 goto not_unique;
129 }
130
131unique:
132 BUG_TRAP(sk_unhashed(sk));
133 __sk_add_node(sk, &head->chain);
134 sk->sk_hash = hash;
135 sock_prot_inc_use(sk->sk_prot);
136 write_unlock(&head->lock);
137
138 if (twp != NULL) {
139 *twp = tw;
140 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
141 } else if (tw != NULL) {
142 /* Silly. Should hash-dance instead... */
143 inet_twsk_deschedule(tw, death_row);
144 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
145
146 inet_twsk_put(tw);
147 }
148 return 0;
149
150not_unique:
151 write_unlock(&head->lock);
152 return -EADDRNOTAVAIL;
153}
154
155static inline u32 inet6_sk_port_offset(const struct sock *sk)
156{
157 const struct inet_sock *inet = inet_sk(sk);
158 const struct ipv6_pinfo *np = inet6_sk(sk);
159 return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
160 np->daddr.s6_addr32,
161 inet->dport);
162}
163
164int inet6_hash_connect(struct inet_timewait_death_row *death_row,
165 struct sock *sk)
166{
167 struct inet_hashinfo *hinfo = death_row->hashinfo;
168 const unsigned short snum = inet_sk(sk)->num;
169 struct inet_bind_hashbucket *head;
170 struct inet_bind_bucket *tb;
171 int ret;
172
173 if (snum == 0) {
174 const int low = sysctl_local_port_range[0];
175 const int high = sysctl_local_port_range[1];
176 const int range = high - low;
177 int i, port;
178 static u32 hint;
179 const u32 offset = hint + inet6_sk_port_offset(sk);
180 struct hlist_node *node;
181 struct inet_timewait_sock *tw = NULL;
182
183 local_bh_disable();
184 for (i = 1; i <= range; i++) {
185 port = low + (i + offset) % range;
186 head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
187 spin_lock(&head->lock);
188
189 /* Does not bother with rcv_saddr checks,
190 * because the established check is already
191 * unique enough.
192 */
193 inet_bind_bucket_for_each(tb, node, &head->chain) {
194 if (tb->port == port) {
195 BUG_TRAP(!hlist_empty(&tb->owners));
196 if (tb->fastreuse >= 0)
197 goto next_port;
198 if (!__inet6_check_established(death_row,
199 sk, port,
200 &tw))
201 goto ok;
202 goto next_port;
203 }
204 }
205
206 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
207 head, port);
208 if (!tb) {
209 spin_unlock(&head->lock);
210 break;
211 }
212 tb->fastreuse = -1;
213 goto ok;
214
215 next_port:
216 spin_unlock(&head->lock);
217 }
218 local_bh_enable();
219
220 return -EADDRNOTAVAIL;
221
222ok:
223 hint += i;
224
225 /* Head lock still held and bh's disabled */
226 inet_bind_hash(sk, tb, port);
227 if (sk_unhashed(sk)) {
228 inet_sk(sk)->sport = htons(port);
229 __inet6_hash(hinfo, sk);
230 }
231 spin_unlock(&head->lock);
232
233 if (tw) {
234 inet_twsk_deschedule(tw, death_row);
235 inet_twsk_put(tw);
236 }
237
238 ret = 0;
239 goto out;
240 }
241
242 head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
243 tb = inet_csk(sk)->icsk_bind_hash;
244 spin_lock_bh(&head->lock);
245
246 if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
247 __inet6_hash(hinfo, sk);
248 spin_unlock_bh(&head->lock);
249 return 0;
250 } else {
251 spin_unlock(&head->lock);
252 /* No definite answer... Walk to established hash table */
253 ret = __inet6_check_established(death_row, sk, snum, NULL);
254out:
255 local_bh_enable();
256 return ret;
257 }
258}
259
260EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 1cf02765fb5c..89d12b4817a9 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -200,6 +200,8 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label)
200 return NULL; 200 return NULL;
201} 201}
202 202
203EXPORT_SYMBOL_GPL(fl6_sock_lookup);
204
203void fl6_free_socklist(struct sock *sk) 205void fl6_free_socklist(struct sock *sk)
204{ 206{
205 struct ipv6_pinfo *np = inet6_sk(sk); 207 struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8523c76ebf76..b4c4beba0ede 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -775,6 +775,8 @@ out_err_release:
775 return err; 775 return err;
776} 776}
777 777
778EXPORT_SYMBOL_GPL(ip6_dst_lookup);
779
778static inline int ip6_ufo_append_data(struct sock *sk, 780static inline int ip6_ufo_append_data(struct sock *sk,
779 int getfrag(void *from, char *to, int offset, int len, 781 int getfrag(void *from, char *to, int offset, int len,
780 int odd, struct sk_buff *skb), 782 int odd, struct sk_buff *skb),
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 55917fb17094..626dd39685f2 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -47,6 +47,7 @@
47#include <linux/rtnetlink.h> 47#include <linux/rtnetlink.h>
48#include <net/icmp.h> 48#include <net/icmp.h>
49#include <net/ipv6.h> 49#include <net/ipv6.h>
50#include <net/protocol.h>
50#include <linux/ipv6.h> 51#include <linux/ipv6.h>
51#include <linux/icmpv6.h> 52#include <linux/icmpv6.h>
52 53
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 3620718defe6..c63868dd2ca2 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -163,17 +163,17 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
163 sk_refcnt_debug_dec(sk); 163 sk_refcnt_debug_dec(sk);
164 164
165 if (sk->sk_protocol == IPPROTO_TCP) { 165 if (sk->sk_protocol == IPPROTO_TCP) {
166 struct tcp_sock *tp = tcp_sk(sk); 166 struct inet_connection_sock *icsk = inet_csk(sk);
167 167
168 local_bh_disable(); 168 local_bh_disable();
169 sock_prot_dec_use(sk->sk_prot); 169 sock_prot_dec_use(sk->sk_prot);
170 sock_prot_inc_use(&tcp_prot); 170 sock_prot_inc_use(&tcp_prot);
171 local_bh_enable(); 171 local_bh_enable();
172 sk->sk_prot = &tcp_prot; 172 sk->sk_prot = &tcp_prot;
173 tp->af_specific = &ipv4_specific; 173 icsk->icsk_af_ops = &ipv4_specific;
174 sk->sk_socket->ops = &inet_stream_ops; 174 sk->sk_socket->ops = &inet_stream_ops;
175 sk->sk_family = PF_INET; 175 sk->sk_family = PF_INET;
176 tcp_sync_mss(sk, tp->pmtu_cookie); 176 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
177 } else { 177 } else {
178 local_bh_disable(); 178 local_bh_disable();
179 sock_prot_dec_use(sk->sk_prot); 179 sock_prot_dec_use(sk->sk_prot);
@@ -317,14 +317,15 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
317 } 317 }
318 318
319 retv = 0; 319 retv = 0;
320 if (sk->sk_type == SOCK_STREAM) { 320 if (inet_sk(sk)->is_icsk) {
321 if (opt) { 321 if (opt) {
322 struct tcp_sock *tp = tcp_sk(sk); 322 struct inet_connection_sock *icsk = inet_csk(sk);
323 if (!((1 << sk->sk_state) & 323 if (!((1 << sk->sk_state) &
324 (TCPF_LISTEN | TCPF_CLOSE)) 324 (TCPF_LISTEN | TCPF_CLOSE))
325 && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { 325 && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
326 tp->ext_header_len = opt->opt_flen + opt->opt_nflen; 326 icsk->icsk_ext_hdr_len =
327 tcp_sync_mss(sk, tp->pmtu_cookie); 327 opt->opt_flen + opt->opt_nflen;
328 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
328 } 329 }
329 } 330 }
330 opt = xchg(&np->opt, opt); 331 opt = xchg(&np->opt, opt);
@@ -380,14 +381,15 @@ sticky_done:
380 goto done; 381 goto done;
381update: 382update:
382 retv = 0; 383 retv = 0;
383 if (sk->sk_type == SOCK_STREAM) { 384 if (inet_sk(sk)->is_icsk) {
384 if (opt) { 385 if (opt) {
385 struct tcp_sock *tp = tcp_sk(sk); 386 struct inet_connection_sock *icsk = inet_csk(sk);
386 if (!((1 << sk->sk_state) & 387 if (!((1 << sk->sk_state) &
387 (TCPF_LISTEN | TCPF_CLOSE)) 388 (TCPF_LISTEN | TCPF_CLOSE))
388 && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { 389 && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
389 tp->ext_header_len = opt->opt_flen + opt->opt_nflen; 390 icsk->icsk_ext_hdr_len =
390 tcp_sync_mss(sk, tp->pmtu_cookie); 391 opt->opt_flen + opt->opt_nflen;
392 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
391 } 393 }
392 } 394 }
393 opt = xchg(&np->opt, opt); 395 opt = xchg(&np->opt, opt);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index f829a4ad3ccc..1cf305a9f8dd 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -224,7 +224,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
224 224
225 mc_lst->ifindex = dev->ifindex; 225 mc_lst->ifindex = dev->ifindex;
226 mc_lst->sfmode = MCAST_EXCLUDE; 226 mc_lst->sfmode = MCAST_EXCLUDE;
227 mc_lst->sflock = RW_LOCK_UNLOCKED; 227 rwlock_init(&mc_lst->sflock);
228 mc_lst->sflist = NULL; 228 mc_lst->sflist = NULL;
229 229
230 /* 230 /*
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 95d469271c4d..925b42d48347 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -15,6 +15,7 @@
15 * - new extension header parser code 15 * - new extension header parser code
16 */ 16 */
17#include <linux/config.h> 17#include <linux/config.h>
18#include <linux/in.h>
18#include <linux/skbuff.h> 19#include <linux/skbuff.h>
19#include <linux/kmod.h> 20#include <linux/kmod.h>
20#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
@@ -86,11 +87,6 @@ static DECLARE_MUTEX(ip6t_mutex);
86 context stops packets coming through and allows user context to read 87 context stops packets coming through and allows user context to read
87 the counters or update the rules. 88 the counters or update the rules.
88 89
89 To be cache friendly on SMP, we arrange them like so:
90 [ n-entries ]
91 ... cache-align padding ...
92 [ n-entries ]
93
94 Hence the start of any table is given by get_table() below. */ 90 Hence the start of any table is given by get_table() below. */
95 91
96/* The table itself */ 92/* The table itself */
@@ -108,33 +104,29 @@ struct ip6t_table_info
108 unsigned int underflow[NF_IP6_NUMHOOKS]; 104 unsigned int underflow[NF_IP6_NUMHOOKS];
109 105
110 /* ip6t_entry tables: one per CPU */ 106 /* ip6t_entry tables: one per CPU */
111 char entries[0] ____cacheline_aligned; 107 void *entries[NR_CPUS];
112}; 108};
113 109
114static LIST_HEAD(ip6t_target); 110static LIST_HEAD(ip6t_target);
115static LIST_HEAD(ip6t_match); 111static LIST_HEAD(ip6t_match);
116static LIST_HEAD(ip6t_tables); 112static LIST_HEAD(ip6t_tables);
113#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
117#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) 114#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
118 115
119#ifdef CONFIG_SMP
120#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
121#else
122#define TABLE_OFFSET(t,p) 0
123#endif
124
125#if 0 116#if 0
126#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) 117#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
127#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) 118#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
128#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0) 119#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
129#endif 120#endif
130 121
131static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask, 122int
132 struct in6_addr addr2) 123ip6_masked_addrcmp(const struct in6_addr *addr1, const struct in6_addr *mask,
124 const struct in6_addr *addr2)
133{ 125{
134 int i; 126 int i;
135 for( i = 0; i < 16; i++){ 127 for( i = 0; i < 16; i++){
136 if((addr1.s6_addr[i] & mask.s6_addr[i]) != 128 if((addr1->s6_addr[i] & mask->s6_addr[i]) !=
137 (addr2.s6_addr[i] & mask.s6_addr[i])) 129 (addr2->s6_addr[i] & mask->s6_addr[i]))
138 return 1; 130 return 1;
139 } 131 }
140 return 0; 132 return 0;
@@ -168,10 +160,10 @@ ip6_packet_match(const struct sk_buff *skb,
168 160
169#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg)) 161#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
170 162
171 if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src), 163 if (FWINV(ip6_masked_addrcmp(&ipv6->saddr, &ip6info->smsk,
172 IP6T_INV_SRCIP) 164 &ip6info->src), IP6T_INV_SRCIP)
173 || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst), 165 || FWINV(ip6_masked_addrcmp(&ipv6->daddr, &ip6info->dmsk,
174 IP6T_INV_DSTIP)) { 166 &ip6info->dst), IP6T_INV_DSTIP)) {
175 dprintf("Source or dest mismatch.\n"); 167 dprintf("Source or dest mismatch.\n");
176/* 168/*
177 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr, 169 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
@@ -214,69 +206,21 @@ ip6_packet_match(const struct sk_buff *skb,
214 206
215 /* look for the desired protocol header */ 207 /* look for the desired protocol header */
216 if((ip6info->flags & IP6T_F_PROTO)) { 208 if((ip6info->flags & IP6T_F_PROTO)) {
217 u_int8_t currenthdr = ipv6->nexthdr; 209 int protohdr;
218 struct ipv6_opt_hdr _hdr, *hp; 210 unsigned short _frag_off;
219 u_int16_t ptr; /* Header offset in skb */
220 u_int16_t hdrlen; /* Header */
221 u_int16_t _fragoff = 0, *fp = NULL;
222
223 ptr = IPV6_HDR_LEN;
224
225 while (ip6t_ext_hdr(currenthdr)) {
226 /* Is there enough space for the next ext header? */
227 if (skb->len - ptr < IPV6_OPTHDR_LEN)
228 return 0;
229
230 /* NONE or ESP: there isn't protocol part */
231 /* If we want to count these packets in '-p all',
232 * we will change the return 0 to 1*/
233 if ((currenthdr == IPPROTO_NONE) ||
234 (currenthdr == IPPROTO_ESP))
235 break;
236 211
237 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); 212 protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off);
238 BUG_ON(hp == NULL); 213 if (protohdr < 0)
239 214 return 0;
240 /* Size calculation */
241 if (currenthdr == IPPROTO_FRAGMENT) {
242 fp = skb_header_pointer(skb,
243 ptr+offsetof(struct frag_hdr,
244 frag_off),
245 sizeof(_fragoff),
246 &_fragoff);
247 if (fp == NULL)
248 return 0;
249
250 _fragoff = ntohs(*fp) & ~0x7;
251 hdrlen = 8;
252 } else if (currenthdr == IPPROTO_AH)
253 hdrlen = (hp->hdrlen+2)<<2;
254 else
255 hdrlen = ipv6_optlen(hp);
256
257 currenthdr = hp->nexthdr;
258 ptr += hdrlen;
259 /* ptr is too large */
260 if ( ptr > skb->len )
261 return 0;
262 if (_fragoff) {
263 if (ip6t_ext_hdr(currenthdr))
264 return 0;
265 break;
266 }
267 }
268
269 *protoff = ptr;
270 *fragoff = _fragoff;
271 215
272 /* currenthdr contains the protocol header */ 216 *fragoff = _frag_off;
273 217
274 dprintf("Packet protocol %hi ?= %s%hi.\n", 218 dprintf("Packet protocol %hi ?= %s%hi.\n",
275 currenthdr, 219 protohdr,
276 ip6info->invflags & IP6T_INV_PROTO ? "!":"", 220 ip6info->invflags & IP6T_INV_PROTO ? "!":"",
277 ip6info->proto); 221 ip6info->proto);
278 222
279 if (ip6info->proto == currenthdr) { 223 if (ip6info->proto == protohdr) {
280 if(ip6info->invflags & IP6T_INV_PROTO) { 224 if(ip6info->invflags & IP6T_INV_PROTO) {
281 return 0; 225 return 0;
282 } 226 }
@@ -376,8 +320,7 @@ ip6t_do_table(struct sk_buff **pskb,
376 320
377 read_lock_bh(&table->lock); 321 read_lock_bh(&table->lock);
378 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 322 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
379 table_base = (void *)table->private->entries 323 table_base = (void *)table->private->entries[smp_processor_id()];
380 + TABLE_OFFSET(table->private, smp_processor_id());
381 e = get_entry(table_base, table->private->hook_entry[hook]); 324 e = get_entry(table_base, table->private->hook_entry[hook]);
382 325
383#ifdef CONFIG_NETFILTER_DEBUG 326#ifdef CONFIG_NETFILTER_DEBUG
@@ -649,7 +592,8 @@ unconditional(const struct ip6t_ip6 *ipv6)
649/* Figures out from what hook each rule can be called: returns 0 if 592/* Figures out from what hook each rule can be called: returns 0 if
650 there are loops. Puts hook bitmask in comefrom. */ 593 there are loops. Puts hook bitmask in comefrom. */
651static int 594static int
652mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) 595mark_source_chains(struct ip6t_table_info *newinfo,
596 unsigned int valid_hooks, void *entry0)
653{ 597{
654 unsigned int hook; 598 unsigned int hook;
655 599
@@ -658,7 +602,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
658 for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { 602 for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
659 unsigned int pos = newinfo->hook_entry[hook]; 603 unsigned int pos = newinfo->hook_entry[hook];
660 struct ip6t_entry *e 604 struct ip6t_entry *e
661 = (struct ip6t_entry *)(newinfo->entries + pos); 605 = (struct ip6t_entry *)(entry0 + pos);
662 606
663 if (!(valid_hooks & (1 << hook))) 607 if (!(valid_hooks & (1 << hook)))
664 continue; 608 continue;
@@ -708,13 +652,13 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
708 goto next; 652 goto next;
709 653
710 e = (struct ip6t_entry *) 654 e = (struct ip6t_entry *)
711 (newinfo->entries + pos); 655 (entry0 + pos);
712 } while (oldpos == pos + e->next_offset); 656 } while (oldpos == pos + e->next_offset);
713 657
714 /* Move along one */ 658 /* Move along one */
715 size = e->next_offset; 659 size = e->next_offset;
716 e = (struct ip6t_entry *) 660 e = (struct ip6t_entry *)
717 (newinfo->entries + pos + size); 661 (entry0 + pos + size);
718 e->counters.pcnt = pos; 662 e->counters.pcnt = pos;
719 pos += size; 663 pos += size;
720 } else { 664 } else {
@@ -731,7 +675,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
731 newpos = pos + e->next_offset; 675 newpos = pos + e->next_offset;
732 } 676 }
733 e = (struct ip6t_entry *) 677 e = (struct ip6t_entry *)
734 (newinfo->entries + newpos); 678 (entry0 + newpos);
735 e->counters.pcnt = pos; 679 e->counters.pcnt = pos;
736 pos = newpos; 680 pos = newpos;
737 } 681 }
@@ -941,6 +885,7 @@ static int
941translate_table(const char *name, 885translate_table(const char *name,
942 unsigned int valid_hooks, 886 unsigned int valid_hooks,
943 struct ip6t_table_info *newinfo, 887 struct ip6t_table_info *newinfo,
888 void *entry0,
944 unsigned int size, 889 unsigned int size,
945 unsigned int number, 890 unsigned int number,
946 const unsigned int *hook_entries, 891 const unsigned int *hook_entries,
@@ -961,11 +906,11 @@ translate_table(const char *name,
961 duprintf("translate_table: size %u\n", newinfo->size); 906 duprintf("translate_table: size %u\n", newinfo->size);
962 i = 0; 907 i = 0;
963 /* Walk through entries, checking offsets. */ 908 /* Walk through entries, checking offsets. */
964 ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, 909 ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
965 check_entry_size_and_hooks, 910 check_entry_size_and_hooks,
966 newinfo, 911 newinfo,
967 newinfo->entries, 912 entry0,
968 newinfo->entries + size, 913 entry0 + size,
969 hook_entries, underflows, &i); 914 hook_entries, underflows, &i);
970 if (ret != 0) 915 if (ret != 0)
971 return ret; 916 return ret;
@@ -993,27 +938,24 @@ translate_table(const char *name,
993 } 938 }
994 } 939 }
995 940
996 if (!mark_source_chains(newinfo, valid_hooks)) 941 if (!mark_source_chains(newinfo, valid_hooks, entry0))
997 return -ELOOP; 942 return -ELOOP;
998 943
999 /* Finally, each sanity check must pass */ 944 /* Finally, each sanity check must pass */
1000 i = 0; 945 i = 0;
1001 ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, 946 ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
1002 check_entry, name, size, &i); 947 check_entry, name, size, &i);
1003 948
1004 if (ret != 0) { 949 if (ret != 0) {
1005 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, 950 IP6T_ENTRY_ITERATE(entry0, newinfo->size,
1006 cleanup_entry, &i); 951 cleanup_entry, &i);
1007 return ret; 952 return ret;
1008 } 953 }
1009 954
1010 /* And one copy for every other CPU */ 955 /* And one copy for every other CPU */
1011 for_each_cpu(i) { 956 for_each_cpu(i) {
1012 if (i == 0) 957 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
1013 continue; 958 memcpy(newinfo->entries[i], entry0, newinfo->size);
1014 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
1015 newinfo->entries,
1016 SMP_ALIGN(newinfo->size));
1017 } 959 }
1018 960
1019 return ret; 961 return ret;
@@ -1029,15 +971,12 @@ replace_table(struct ip6t_table *table,
1029 971
1030#ifdef CONFIG_NETFILTER_DEBUG 972#ifdef CONFIG_NETFILTER_DEBUG
1031 { 973 {
1032 struct ip6t_entry *table_base; 974 int cpu;
1033 unsigned int i;
1034 975
1035 for_each_cpu(i) { 976 for_each_cpu(cpu) {
1036 table_base = 977 struct ip6t_entry *table_base = newinfo->entries[cpu];
1037 (void *)newinfo->entries 978 if (table_base)
1038 + TABLE_OFFSET(newinfo, i); 979 table_base->comefrom = 0xdead57ac;
1039
1040 table_base->comefrom = 0xdead57ac;
1041 } 980 }
1042 } 981 }
1043#endif 982#endif
@@ -1072,16 +1011,44 @@ add_entry_to_counter(const struct ip6t_entry *e,
1072 return 0; 1011 return 0;
1073} 1012}
1074 1013
1014static inline int
1015set_entry_to_counter(const struct ip6t_entry *e,
1016 struct ip6t_counters total[],
1017 unsigned int *i)
1018{
1019 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
1020
1021 (*i)++;
1022 return 0;
1023}
1024
1075static void 1025static void
1076get_counters(const struct ip6t_table_info *t, 1026get_counters(const struct ip6t_table_info *t,
1077 struct ip6t_counters counters[]) 1027 struct ip6t_counters counters[])
1078{ 1028{
1079 unsigned int cpu; 1029 unsigned int cpu;
1080 unsigned int i; 1030 unsigned int i;
1031 unsigned int curcpu;
1032
1033 /* Instead of clearing (by a previous call to memset())
1034 * the counters and using adds, we set the counters
1035 * with data used by 'current' CPU
1036 * We dont care about preemption here.
1037 */
1038 curcpu = raw_smp_processor_id();
1039
1040 i = 0;
1041 IP6T_ENTRY_ITERATE(t->entries[curcpu],
1042 t->size,
1043 set_entry_to_counter,
1044 counters,
1045 &i);
1081 1046
1082 for_each_cpu(cpu) { 1047 for_each_cpu(cpu) {
1048 if (cpu == curcpu)
1049 continue;
1083 i = 0; 1050 i = 0;
1084 IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), 1051 IP6T_ENTRY_ITERATE(t->entries[cpu],
1085 t->size, 1052 t->size,
1086 add_entry_to_counter, 1053 add_entry_to_counter,
1087 counters, 1054 counters,
@@ -1098,6 +1065,7 @@ copy_entries_to_user(unsigned int total_size,
1098 struct ip6t_entry *e; 1065 struct ip6t_entry *e;
1099 struct ip6t_counters *counters; 1066 struct ip6t_counters *counters;
1100 int ret = 0; 1067 int ret = 0;
1068 void *loc_cpu_entry;
1101 1069
1102 /* We need atomic snapshot of counters: rest doesn't change 1070 /* We need atomic snapshot of counters: rest doesn't change
1103 (other than comefrom, which userspace doesn't care 1071 (other than comefrom, which userspace doesn't care
@@ -1109,13 +1077,13 @@ copy_entries_to_user(unsigned int total_size,
1109 return -ENOMEM; 1077 return -ENOMEM;
1110 1078
1111 /* First, sum counters... */ 1079 /* First, sum counters... */
1112 memset(counters, 0, countersize);
1113 write_lock_bh(&table->lock); 1080 write_lock_bh(&table->lock);
1114 get_counters(table->private, counters); 1081 get_counters(table->private, counters);
1115 write_unlock_bh(&table->lock); 1082 write_unlock_bh(&table->lock);
1116 1083
1117 /* ... then copy entire thing from CPU 0... */ 1084 /* choose the copy that is on ourc node/cpu */
1118 if (copy_to_user(userptr, table->private->entries, total_size) != 0) { 1085 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1086 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1119 ret = -EFAULT; 1087 ret = -EFAULT;
1120 goto free_counters; 1088 goto free_counters;
1121 } 1089 }
@@ -1127,7 +1095,7 @@ copy_entries_to_user(unsigned int total_size,
1127 struct ip6t_entry_match *m; 1095 struct ip6t_entry_match *m;
1128 struct ip6t_entry_target *t; 1096 struct ip6t_entry_target *t;
1129 1097
1130 e = (struct ip6t_entry *)(table->private->entries + off); 1098 e = (struct ip6t_entry *)(loc_cpu_entry + off);
1131 if (copy_to_user(userptr + off 1099 if (copy_to_user(userptr + off
1132 + offsetof(struct ip6t_entry, counters), 1100 + offsetof(struct ip6t_entry, counters),
1133 &counters[num], 1101 &counters[num],
@@ -1196,6 +1164,46 @@ get_entries(const struct ip6t_get_entries *entries,
1196 return ret; 1164 return ret;
1197} 1165}
1198 1166
1167static void free_table_info(struct ip6t_table_info *info)
1168{
1169 int cpu;
1170 for_each_cpu(cpu) {
1171 if (info->size <= PAGE_SIZE)
1172 kfree(info->entries[cpu]);
1173 else
1174 vfree(info->entries[cpu]);
1175 }
1176 kfree(info);
1177}
1178
1179static struct ip6t_table_info *alloc_table_info(unsigned int size)
1180{
1181 struct ip6t_table_info *newinfo;
1182 int cpu;
1183
1184 newinfo = kzalloc(sizeof(struct ip6t_table_info), GFP_KERNEL);
1185 if (!newinfo)
1186 return NULL;
1187
1188 newinfo->size = size;
1189
1190 for_each_cpu(cpu) {
1191 if (size <= PAGE_SIZE)
1192 newinfo->entries[cpu] = kmalloc_node(size,
1193 GFP_KERNEL,
1194 cpu_to_node(cpu));
1195 else
1196 newinfo->entries[cpu] = vmalloc_node(size,
1197 cpu_to_node(cpu));
1198 if (newinfo->entries[cpu] == NULL) {
1199 free_table_info(newinfo);
1200 return NULL;
1201 }
1202 }
1203
1204 return newinfo;
1205}
1206
1199static int 1207static int
1200do_replace(void __user *user, unsigned int len) 1208do_replace(void __user *user, unsigned int len)
1201{ 1209{
@@ -1204,6 +1212,7 @@ do_replace(void __user *user, unsigned int len)
1204 struct ip6t_table *t; 1212 struct ip6t_table *t;
1205 struct ip6t_table_info *newinfo, *oldinfo; 1213 struct ip6t_table_info *newinfo, *oldinfo;
1206 struct ip6t_counters *counters; 1214 struct ip6t_counters *counters;
1215 void *loc_cpu_entry, *loc_cpu_old_entry;
1207 1216
1208 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1217 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1209 return -EFAULT; 1218 return -EFAULT;
@@ -1212,13 +1221,13 @@ do_replace(void __user *user, unsigned int len)
1212 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) 1221 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1213 return -ENOMEM; 1222 return -ENOMEM;
1214 1223
1215 newinfo = vmalloc(sizeof(struct ip6t_table_info) 1224 newinfo = alloc_table_info(tmp.size);
1216 + SMP_ALIGN(tmp.size) *
1217 (highest_possible_processor_id()+1));
1218 if (!newinfo) 1225 if (!newinfo)
1219 return -ENOMEM; 1226 return -ENOMEM;
1220 1227
1221 if (copy_from_user(newinfo->entries, user + sizeof(tmp), 1228 /* choose the copy that is on our node/cpu */
1229 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1230 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1222 tmp.size) != 0) { 1231 tmp.size) != 0) {
1223 ret = -EFAULT; 1232 ret = -EFAULT;
1224 goto free_newinfo; 1233 goto free_newinfo;
@@ -1229,10 +1238,9 @@ do_replace(void __user *user, unsigned int len)
1229 ret = -ENOMEM; 1238 ret = -ENOMEM;
1230 goto free_newinfo; 1239 goto free_newinfo;
1231 } 1240 }
1232 memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
1233 1241
1234 ret = translate_table(tmp.name, tmp.valid_hooks, 1242 ret = translate_table(tmp.name, tmp.valid_hooks,
1235 newinfo, tmp.size, tmp.num_entries, 1243 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1236 tmp.hook_entry, tmp.underflow); 1244 tmp.hook_entry, tmp.underflow);
1237 if (ret != 0) 1245 if (ret != 0)
1238 goto free_newinfo_counters; 1246 goto free_newinfo_counters;
@@ -1271,8 +1279,9 @@ do_replace(void __user *user, unsigned int len)
1271 /* Get the old counters. */ 1279 /* Get the old counters. */
1272 get_counters(oldinfo, counters); 1280 get_counters(oldinfo, counters);
1273 /* Decrease module usage counts and free resource */ 1281 /* Decrease module usage counts and free resource */
1274 IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); 1282 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1275 vfree(oldinfo); 1283 IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1284 free_table_info(oldinfo);
1276 if (copy_to_user(tmp.counters, counters, 1285 if (copy_to_user(tmp.counters, counters,
1277 sizeof(struct ip6t_counters) * tmp.num_counters) != 0) 1286 sizeof(struct ip6t_counters) * tmp.num_counters) != 0)
1278 ret = -EFAULT; 1287 ret = -EFAULT;
@@ -1284,11 +1293,11 @@ do_replace(void __user *user, unsigned int len)
1284 module_put(t->me); 1293 module_put(t->me);
1285 up(&ip6t_mutex); 1294 up(&ip6t_mutex);
1286 free_newinfo_counters_untrans: 1295 free_newinfo_counters_untrans:
1287 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); 1296 IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1288 free_newinfo_counters: 1297 free_newinfo_counters:
1289 vfree(counters); 1298 vfree(counters);
1290 free_newinfo: 1299 free_newinfo:
1291 vfree(newinfo); 1300 free_table_info(newinfo);
1292 return ret; 1301 return ret;
1293} 1302}
1294 1303
@@ -1321,6 +1330,7 @@ do_add_counters(void __user *user, unsigned int len)
1321 struct ip6t_counters_info tmp, *paddc; 1330 struct ip6t_counters_info tmp, *paddc;
1322 struct ip6t_table *t; 1331 struct ip6t_table *t;
1323 int ret = 0; 1332 int ret = 0;
1333 void *loc_cpu_entry;
1324 1334
1325 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1335 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1326 return -EFAULT; 1336 return -EFAULT;
@@ -1350,7 +1360,9 @@ do_add_counters(void __user *user, unsigned int len)
1350 } 1360 }
1351 1361
1352 i = 0; 1362 i = 0;
1353 IP6T_ENTRY_ITERATE(t->private->entries, 1363 /* Choose the copy that is on our node */
1364 loc_cpu_entry = t->private->entries[smp_processor_id()];
1365 IP6T_ENTRY_ITERATE(loc_cpu_entry,
1354 t->private->size, 1366 t->private->size,
1355 add_counter_to_entry, 1367 add_counter_to_entry,
1356 paddc->counters, 1368 paddc->counters,
@@ -1543,28 +1555,29 @@ int ip6t_register_table(struct ip6t_table *table,
1543 struct ip6t_table_info *newinfo; 1555 struct ip6t_table_info *newinfo;
1544 static struct ip6t_table_info bootstrap 1556 static struct ip6t_table_info bootstrap
1545 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1557 = { 0, 0, 0, { 0 }, { 0 }, { } };
1558 void *loc_cpu_entry;
1546 1559
1547 newinfo = vmalloc(sizeof(struct ip6t_table_info) 1560 newinfo = alloc_table_info(repl->size);
1548 + SMP_ALIGN(repl->size) *
1549 (highest_possible_processor_id()+1));
1550 if (!newinfo) 1561 if (!newinfo)
1551 return -ENOMEM; 1562 return -ENOMEM;
1552 1563
1553 memcpy(newinfo->entries, repl->entries, repl->size); 1564 /* choose the copy on our node/cpu */
1565 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1566 memcpy(loc_cpu_entry, repl->entries, repl->size);
1554 1567
1555 ret = translate_table(table->name, table->valid_hooks, 1568 ret = translate_table(table->name, table->valid_hooks,
1556 newinfo, repl->size, 1569 newinfo, loc_cpu_entry, repl->size,
1557 repl->num_entries, 1570 repl->num_entries,
1558 repl->hook_entry, 1571 repl->hook_entry,
1559 repl->underflow); 1572 repl->underflow);
1560 if (ret != 0) { 1573 if (ret != 0) {
1561 vfree(newinfo); 1574 free_table_info(newinfo);
1562 return ret; 1575 return ret;
1563 } 1576 }
1564 1577
1565 ret = down_interruptible(&ip6t_mutex); 1578 ret = down_interruptible(&ip6t_mutex);
1566 if (ret != 0) { 1579 if (ret != 0) {
1567 vfree(newinfo); 1580 free_table_info(newinfo);
1568 return ret; 1581 return ret;
1569 } 1582 }
1570 1583
@@ -1593,20 +1606,23 @@ int ip6t_register_table(struct ip6t_table *table,
1593 return ret; 1606 return ret;
1594 1607
1595 free_unlock: 1608 free_unlock:
1596 vfree(newinfo); 1609 free_table_info(newinfo);
1597 goto unlock; 1610 goto unlock;
1598} 1611}
1599 1612
1600void ip6t_unregister_table(struct ip6t_table *table) 1613void ip6t_unregister_table(struct ip6t_table *table)
1601{ 1614{
1615 void *loc_cpu_entry;
1616
1602 down(&ip6t_mutex); 1617 down(&ip6t_mutex);
1603 LIST_DELETE(&ip6t_tables, table); 1618 LIST_DELETE(&ip6t_tables, table);
1604 up(&ip6t_mutex); 1619 up(&ip6t_mutex);
1605 1620
1606 /* Decrease module usage counts and free resources */ 1621 /* Decrease module usage counts and free resources */
1607 IP6T_ENTRY_ITERATE(table->private->entries, table->private->size, 1622 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1623 IP6T_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
1608 cleanup_entry, NULL); 1624 cleanup_entry, NULL);
1609 vfree(table->private); 1625 free_table_info(table->private);
1610} 1626}
1611 1627
1612/* Returns 1 if the port is matched by the range, 0 otherwise */ 1628/* Returns 1 if the port is matched by the range, 0 otherwise */
@@ -2035,26 +2051,39 @@ static void __exit fini(void)
2035} 2051}
2036 2052
2037/* 2053/*
2038 * find specified header up to transport protocol header. 2054 * find the offset to specified header or the protocol number of last header
2039 * If found target header, the offset to the header is set to *offset 2055 * if target < 0. "last header" is transport protocol header, ESP, or
2040 * and return 0. otherwise, return -1. 2056 * "No next header".
2057 *
2058 * If target header is found, its offset is set in *offset and return protocol
2059 * number. Otherwise, return -1.
2060 *
2061 * Note that non-1st fragment is special case that "the protocol number
2062 * of last header" is "next header" field in Fragment header. In this case,
2063 * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
2064 * isn't NULL.
2041 * 2065 *
2042 * Notes: - non-1st Fragment Header isn't skipped.
2043 * - ESP header isn't skipped.
2044 * - The target header may be trancated.
2045 */ 2066 */
2046int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) 2067int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
2068 int target, unsigned short *fragoff)
2047{ 2069{
2048 unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data; 2070 unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
2049 u8 nexthdr = skb->nh.ipv6h->nexthdr; 2071 u8 nexthdr = skb->nh.ipv6h->nexthdr;
2050 unsigned int len = skb->len - start; 2072 unsigned int len = skb->len - start;
2051 2073
2074 if (fragoff)
2075 *fragoff = 0;
2076
2052 while (nexthdr != target) { 2077 while (nexthdr != target) {
2053 struct ipv6_opt_hdr _hdr, *hp; 2078 struct ipv6_opt_hdr _hdr, *hp;
2054 unsigned int hdrlen; 2079 unsigned int hdrlen;
2055 2080
2056 if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) 2081 if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
2082 if (target < 0)
2083 break;
2057 return -1; 2084 return -1;
2085 }
2086
2058 hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); 2087 hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
2059 if (hp == NULL) 2088 if (hp == NULL)
2060 return -1; 2089 return -1;
@@ -2068,8 +2097,17 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
2068 if (fp == NULL) 2097 if (fp == NULL)
2069 return -1; 2098 return -1;
2070 2099
2071 if (ntohs(*fp) & ~0x7) 2100 _frag_off = ntohs(*fp) & ~0x7;
2101 if (_frag_off) {
2102 if (target < 0 &&
2103 ((!ipv6_ext_hdr(hp->nexthdr)) ||
2104 nexthdr == NEXTHDR_NONE)) {
2105 if (fragoff)
2106 *fragoff = _frag_off;
2107 return hp->nexthdr;
2108 }
2072 return -1; 2109 return -1;
2110 }
2073 hdrlen = 8; 2111 hdrlen = 8;
2074 } else if (nexthdr == NEXTHDR_AUTH) 2112 } else if (nexthdr == NEXTHDR_AUTH)
2075 hdrlen = (hp->hdrlen + 2) << 2; 2113 hdrlen = (hp->hdrlen + 2) << 2;
@@ -2082,7 +2120,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
2082 } 2120 }
2083 2121
2084 *offset = start; 2122 *offset = start;
2085 return 0; 2123 return nexthdr;
2086} 2124}
2087 2125
2088EXPORT_SYMBOL(ip6t_register_table); 2126EXPORT_SYMBOL(ip6t_register_table);
@@ -2094,6 +2132,7 @@ EXPORT_SYMBOL(ip6t_register_target);
2094EXPORT_SYMBOL(ip6t_unregister_target); 2132EXPORT_SYMBOL(ip6t_unregister_target);
2095EXPORT_SYMBOL(ip6t_ext_hdr); 2133EXPORT_SYMBOL(ip6t_ext_hdr);
2096EXPORT_SYMBOL(ipv6_find_hdr); 2134EXPORT_SYMBOL(ipv6_find_hdr);
2135EXPORT_SYMBOL(ip6_masked_addrcmp);
2097 2136
2098module_init(init); 2137module_init(init);
2099module_exit(fini); 2138module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 0cd1d1bd9033..ae4653bfd654 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/moduleparam.h> 14#include <linux/moduleparam.h>
15#include <linux/skbuff.h> 15#include <linux/skbuff.h>
16#include <linux/if_arp.h>
16#include <linux/ip.h> 17#include <linux/ip.h>
17#include <linux/spinlock.h> 18#include <linux/spinlock.h>
18#include <linux/icmpv6.h> 19#include <linux/icmpv6.h>
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index dde37793d20b..f5c1a7ff4a1f 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/ip.h>
12#include <linux/ipv6.h> 13#include <linux/ipv6.h>
13#include <linux/types.h> 14#include <linux/types.h>
14#include <net/checksum.h> 15#include <net/checksum.h>
@@ -53,7 +54,7 @@ match(const struct sk_buff *skb,
53 unsigned int ptr; 54 unsigned int ptr;
54 unsigned int hdrlen = 0; 55 unsigned int hdrlen = 0;
55 56
56 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0) 57 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL) < 0)
57 return 0; 58 return 0;
58 59
59 ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); 60 ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
index c450a635e54b..48cf5f9efc95 100644
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ b/net/ipv6/netfilter/ip6t_dst.c
@@ -71,9 +71,9 @@ match(const struct sk_buff *skb,
71 unsigned int optlen; 71 unsigned int optlen;
72 72
73#if HOPBYHOP 73#if HOPBYHOP
74 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) 74 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
75#else 75#else
76 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) 76 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
77#endif 77#endif
78 return 0; 78 return 0;
79 79
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
index 24bc0cde43a1..e1828f6d0a40 100644
--- a/net/ipv6/netfilter/ip6t_esp.c
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/ip.h>
12#include <linux/ipv6.h> 13#include <linux/ipv6.h>
13#include <linux/types.h> 14#include <linux/types.h>
14#include <net/checksum.h> 15#include <net/checksum.h>
@@ -55,7 +56,7 @@ match(const struct sk_buff *skb,
55 /* Make sure this isn't an evil packet */ 56 /* Make sure this isn't an evil packet */
56 /*DEBUGP("ipv6_esp entered \n");*/ 57 /*DEBUGP("ipv6_esp entered \n");*/
57 58
58 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0) 59 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP, NULL) < 0)
59 return 0; 60 return 0;
60 61
61 eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); 62 eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 085d5f8eea29..d1549b268669 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -52,7 +52,7 @@ match(const struct sk_buff *skb,
52 const struct ip6t_frag *fraginfo = matchinfo; 52 const struct ip6t_frag *fraginfo = matchinfo;
53 unsigned int ptr; 53 unsigned int ptr;
54 54
55 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0) 55 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL) < 0)
56 return 0; 56 return 0;
57 57
58 fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); 58 fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 1d09485111d0..e3bc8e2700e7 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -71,9 +71,9 @@ match(const struct sk_buff *skb,
71 unsigned int optlen; 71 unsigned int optlen;
72 72
73#if HOPBYHOP 73#if HOPBYHOP
74 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) 74 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
75#else 75#else
76 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) 76 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
77#endif 77#endif
78 return 0; 78 return 0;
79 79
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index beb2fd5cebbb..c1e770e45543 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -58,7 +58,7 @@ match(const struct sk_buff *skb,
58 unsigned int ret = 0; 58 unsigned int ret = 0;
59 struct in6_addr *ap, _addr; 59 struct in6_addr *ap, _addr;
60 60
61 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0) 61 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL) < 0)
62 return 0; 62 return 0;
63 63
64 rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); 64 rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 753a3ae8502b..704fbbe74874 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -401,6 +401,48 @@ static ctl_table nf_ct_net_table[] = {
401}; 401};
402#endif 402#endif
403 403
404#if defined(CONFIG_NF_CT_NETLINK) || \
405 defined(CONFIG_NF_CT_NETLINK_MODULE)
406
407#include <linux/netfilter/nfnetlink.h>
408#include <linux/netfilter/nfnetlink_conntrack.h>
409
410static int ipv6_tuple_to_nfattr(struct sk_buff *skb,
411 const struct nf_conntrack_tuple *tuple)
412{
413 NFA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4,
414 &tuple->src.u3.ip6);
415 NFA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4,
416 &tuple->dst.u3.ip6);
417 return 0;
418
419nfattr_failure:
420 return -1;
421}
422
423static const size_t cta_min_ip[CTA_IP_MAX] = {
424 [CTA_IP_V6_SRC-1] = sizeof(u_int32_t)*4,
425 [CTA_IP_V6_DST-1] = sizeof(u_int32_t)*4,
426};
427
428static int ipv6_nfattr_to_tuple(struct nfattr *tb[],
429 struct nf_conntrack_tuple *t)
430{
431 if (!tb[CTA_IP_V6_SRC-1] || !tb[CTA_IP_V6_DST-1])
432 return -EINVAL;
433
434 if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
435 return -EINVAL;
436
437 memcpy(&t->src.u3.ip6, NFA_DATA(tb[CTA_IP_V6_SRC-1]),
438 sizeof(u_int32_t) * 4);
439 memcpy(&t->dst.u3.ip6, NFA_DATA(tb[CTA_IP_V6_DST-1]),
440 sizeof(u_int32_t) * 4);
441
442 return 0;
443}
444#endif
445
404struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { 446struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
405 .l3proto = PF_INET6, 447 .l3proto = PF_INET6,
406 .name = "ipv6", 448 .name = "ipv6",
@@ -409,6 +451,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
409 .print_tuple = ipv6_print_tuple, 451 .print_tuple = ipv6_print_tuple,
410 .print_conntrack = ipv6_print_conntrack, 452 .print_conntrack = ipv6_print_conntrack,
411 .prepare = ipv6_prepare, 453 .prepare = ipv6_prepare,
454#if defined(CONFIG_NF_CT_NETLINK) || \
455 defined(CONFIG_NF_CT_NETLINK_MODULE)
456 .tuple_to_nfattr = ipv6_tuple_to_nfattr,
457 .nfattr_to_tuple = ipv6_nfattr_to_tuple,
458#endif
412 .get_features = ipv6_get_features, 459 .get_features = ipv6_get_features,
413 .me = THIS_MODULE, 460 .me = THIS_MODULE,
414}; 461};
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index a7e03cfacd06..09945c333055 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -57,17 +57,17 @@ static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
57 return 1; 57 return 1;
58} 58}
59 59
60/* Add 1; spaces filled with 0. */
61static u_int8_t invmap[] = {
62 [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
63 [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
64 [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1,
65 [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1
66};
67
60static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, 68static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
61 const struct nf_conntrack_tuple *orig) 69 const struct nf_conntrack_tuple *orig)
62{ 70{
63 /* Add 1; spaces filled with 0. */
64 static u_int8_t invmap[] = {
65 [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
66 [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
67 [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1,
68 [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1
69 };
70
71 int type = orig->dst.u.icmp.type - 128; 71 int type = orig->dst.u.icmp.type - 128;
72 if (type < 0 || type >= sizeof(invmap) || !invmap[type]) 72 if (type < 0 || type >= sizeof(invmap) || !invmap[type])
73 return 0; 73 return 0;
@@ -185,7 +185,7 @@ icmpv6_error_message(struct sk_buff *skb,
185 return -NF_ACCEPT; 185 return -NF_ACCEPT;
186 } 186 }
187 187
188 inproto = nf_ct_find_proto(PF_INET6, inprotonum); 188 inproto = __nf_ct_proto_find(PF_INET6, inprotonum);
189 189
190 /* Are they talking about one of our connections? */ 190 /* Are they talking about one of our connections? */
191 if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum, 191 if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum,
@@ -255,6 +255,60 @@ skipped:
255 return icmpv6_error_message(skb, dataoff, ctinfo, hooknum); 255 return icmpv6_error_message(skb, dataoff, ctinfo, hooknum);
256} 256}
257 257
258#if defined(CONFIG_NF_CT_NETLINK) || \
259 defined(CONFIG_NF_CT_NETLINK_MODULE)
260
261#include <linux/netfilter/nfnetlink.h>
262#include <linux/netfilter/nfnetlink_conntrack.h>
263static int icmpv6_tuple_to_nfattr(struct sk_buff *skb,
264 const struct nf_conntrack_tuple *t)
265{
266 NFA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t),
267 &t->src.u.icmp.id);
268 NFA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t),
269 &t->dst.u.icmp.type);
270 NFA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t),
271 &t->dst.u.icmp.code);
272
273 return 0;
274
275nfattr_failure:
276 return -1;
277}
278
279static const size_t cta_min_proto[CTA_PROTO_MAX] = {
280 [CTA_PROTO_ICMPV6_TYPE-1] = sizeof(u_int8_t),
281 [CTA_PROTO_ICMPV6_CODE-1] = sizeof(u_int8_t),
282 [CTA_PROTO_ICMPV6_ID-1] = sizeof(u_int16_t)
283};
284
285static int icmpv6_nfattr_to_tuple(struct nfattr *tb[],
286 struct nf_conntrack_tuple *tuple)
287{
288 if (!tb[CTA_PROTO_ICMPV6_TYPE-1]
289 || !tb[CTA_PROTO_ICMPV6_CODE-1]
290 || !tb[CTA_PROTO_ICMPV6_ID-1])
291 return -EINVAL;
292
293 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
294 return -EINVAL;
295
296 tuple->dst.u.icmp.type =
297 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_TYPE-1]);
298 tuple->dst.u.icmp.code =
299 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_CODE-1]);
300 tuple->src.u.icmp.id =
301 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]);
302
303 if (tuple->dst.u.icmp.type < 128
304 || tuple->dst.u.icmp.type - 128 >= sizeof(invmap)
305 || !invmap[tuple->dst.u.icmp.type - 128])
306 return -EINVAL;
307
308 return 0;
309}
310#endif
311
258struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 = 312struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
259{ 313{
260 .l3proto = PF_INET6, 314 .l3proto = PF_INET6,
@@ -267,6 +321,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
267 .packet = icmpv6_packet, 321 .packet = icmpv6_packet,
268 .new = icmpv6_new, 322 .new = icmpv6_new,
269 .error = icmpv6_error, 323 .error = icmpv6_error,
324#if defined(CONFIG_NF_CT_NETLINK) || \
325 defined(CONFIG_NF_CT_NETLINK_MODULE)
326 .tuple_to_nfattr = icmpv6_tuple_to_nfattr,
327 .nfattr_to_tuple = icmpv6_nfattr_to_tuple,
328#endif
270}; 329};
271 330
272EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6); 331EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c2c52af9e560..f3e5ffbd592f 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -98,7 +98,7 @@ struct nf_ct_frag6_queue
98#define FRAG6Q_HASHSZ 64 98#define FRAG6Q_HASHSZ 64
99 99
100static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ]; 100static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ];
101static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED; 101static DEFINE_RWLOCK(nf_ct_frag6_lock);
102static u32 nf_ct_frag6_hash_rnd; 102static u32 nf_ct_frag6_hash_rnd;
103static LIST_HEAD(nf_ct_frag6_lru_list); 103static LIST_HEAD(nf_ct_frag6_lru_list);
104int nf_ct_frag6_nqueues = 0; 104int nf_ct_frag6_nqueues = 0;
@@ -371,7 +371,7 @@ nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct
371 init_timer(&fq->timer); 371 init_timer(&fq->timer);
372 fq->timer.function = nf_ct_frag6_expire; 372 fq->timer.function = nf_ct_frag6_expire;
373 fq->timer.data = (long) fq; 373 fq->timer.data = (long) fq;
374 fq->lock = SPIN_LOCK_UNLOCKED; 374 spin_lock_init(&fq->lock);
375 atomic_set(&fq->refcnt, 1); 375 atomic_set(&fq->refcnt, 1);
376 376
377 return nf_ct_frag6_intern(hash, fq); 377 return nf_ct_frag6_intern(hash, fq);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a66900cda2af..66f1d12ea578 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -32,6 +32,7 @@
32#include <linux/icmpv6.h> 32#include <linux/icmpv6.h>
33#include <linux/netfilter.h> 33#include <linux/netfilter.h>
34#include <linux/netfilter_ipv6.h> 34#include <linux/netfilter_ipv6.h>
35#include <linux/skbuff.h>
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
36#include <asm/ioctls.h> 37#include <asm/ioctls.h>
37#include <asm/bug.h> 38#include <asm/bug.h>
@@ -433,25 +434,14 @@ out:
433 return err; 434 return err;
434 435
435csum_copy_err: 436csum_copy_err:
436 /* Clear queue. */ 437 skb_kill_datagram(sk, skb, flags);
437 if (flags&MSG_PEEK) {
438 int clear = 0;
439 spin_lock_bh(&sk->sk_receive_queue.lock);
440 if (skb == skb_peek(&sk->sk_receive_queue)) {
441 __skb_unlink(skb, &sk->sk_receive_queue);
442 clear = 1;
443 }
444 spin_unlock_bh(&sk->sk_receive_queue.lock);
445 if (clear)
446 kfree_skb(skb);
447 }
448 438
449 /* Error for blocking case is chosen to masquerade 439 /* Error for blocking case is chosen to masquerade
450 as some normal condition. 440 as some normal condition.
451 */ 441 */
452 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; 442 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
453 /* FIXME: increment a raw6 drops counter here */ 443 /* FIXME: increment a raw6 drops counter here */
454 goto out_free; 444 goto out;
455} 445}
456 446
457static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, 447static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index c3123c9e1a8e..577d49732b0f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -33,6 +33,7 @@
33#include <asm/uaccess.h> 33#include <asm/uaccess.h>
34#include <linux/init.h> 34#include <linux/init.h>
35#include <linux/netfilter_ipv4.h> 35#include <linux/netfilter_ipv4.h>
36#include <linux/if_ether.h>
36 37
37#include <net/sock.h> 38#include <net/sock.h>
38#include <net/snmp.h> 39#include <net/snmp.h>
@@ -720,7 +721,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
720 721
721 dev->type = ARPHRD_SIT; 722 dev->type = ARPHRD_SIT;
722 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 723 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
723 dev->mtu = 1500 - sizeof(struct iphdr); 724 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
724 dev->flags = IFF_NOARP; 725 dev->flags = IFF_NOARP;
725 dev->iflink = 0; 726 dev->iflink = 0;
726 dev->addr_len = 4; 727 dev->addr_len = 4;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8827389abaf7..2947bc56d8a0 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -48,6 +48,7 @@
48#include <net/tcp.h> 48#include <net/tcp.h>
49#include <net/ndisc.h> 49#include <net/ndisc.h>
50#include <net/inet6_hashtables.h> 50#include <net/inet6_hashtables.h>
51#include <net/inet6_connection_sock.h>
51#include <net/ipv6.h> 52#include <net/ipv6.h>
52#include <net/transp_v6.h> 53#include <net/transp_v6.h>
53#include <net/addrconf.h> 54#include <net/addrconf.h>
@@ -59,6 +60,7 @@
59#include <net/addrconf.h> 60#include <net/addrconf.h>
60#include <net/snmp.h> 61#include <net/snmp.h>
61#include <net/dsfield.h> 62#include <net/dsfield.h>
63#include <net/timewait_sock.h>
62 64
63#include <asm/uaccess.h> 65#include <asm/uaccess.h>
64 66
@@ -67,224 +69,33 @@
67 69
68static void tcp_v6_send_reset(struct sk_buff *skb); 70static void tcp_v6_send_reset(struct sk_buff *skb);
69static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req); 71static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
70static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 72static void tcp_v6_send_check(struct sock *sk, int len,
71 struct sk_buff *skb); 73 struct sk_buff *skb);
72 74
73static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 75static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
74static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 76
76static struct tcp_func ipv6_mapped; 77static struct inet_connection_sock_af_ops ipv6_mapped;
77static struct tcp_func ipv6_specific; 78static struct inet_connection_sock_af_ops ipv6_specific;
78 79
79static inline int tcp_v6_bind_conflict(const struct sock *sk,
80 const struct inet_bind_bucket *tb)
81{
82 const struct sock *sk2;
83 const struct hlist_node *node;
84
85 /* We must walk the whole port owner list in this case. -DaveM */
86 sk_for_each_bound(sk2, node, &tb->owners) {
87 if (sk != sk2 &&
88 (!sk->sk_bound_dev_if ||
89 !sk2->sk_bound_dev_if ||
90 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
91 (!sk->sk_reuse || !sk2->sk_reuse ||
92 sk2->sk_state == TCP_LISTEN) &&
93 ipv6_rcv_saddr_equal(sk, sk2))
94 break;
95 }
96
97 return node != NULL;
98}
99
100/* Grrr, addr_type already calculated by caller, but I don't want
101 * to add some silly "cookie" argument to this method just for that.
102 * But it doesn't matter, the recalculation is in the rarest path
103 * this function ever takes.
104 */
105static int tcp_v6_get_port(struct sock *sk, unsigned short snum) 80static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
106{ 81{
107 struct inet_bind_hashbucket *head; 82 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
108 struct inet_bind_bucket *tb; 83 inet6_csk_bind_conflict);
109 struct hlist_node *node;
110 int ret;
111
112 local_bh_disable();
113 if (snum == 0) {
114 int low = sysctl_local_port_range[0];
115 int high = sysctl_local_port_range[1];
116 int remaining = (high - low) + 1;
117 int rover = net_random() % (high - low) + low;
118
119 do {
120 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
121 spin_lock(&head->lock);
122 inet_bind_bucket_for_each(tb, node, &head->chain)
123 if (tb->port == rover)
124 goto next;
125 break;
126 next:
127 spin_unlock(&head->lock);
128 if (++rover > high)
129 rover = low;
130 } while (--remaining > 0);
131
132 /* Exhausted local port range during search? It is not
133 * possible for us to be holding one of the bind hash
134 * locks if this test triggers, because if 'remaining'
135 * drops to zero, we broke out of the do/while loop at
136 * the top level, not from the 'break;' statement.
137 */
138 ret = 1;
139 if (unlikely(remaining <= 0))
140 goto fail;
141
142 /* OK, here is the one we will use. */
143 snum = rover;
144 } else {
145 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
146 spin_lock(&head->lock);
147 inet_bind_bucket_for_each(tb, node, &head->chain)
148 if (tb->port == snum)
149 goto tb_found;
150 }
151 tb = NULL;
152 goto tb_not_found;
153tb_found:
154 if (tb && !hlist_empty(&tb->owners)) {
155 if (tb->fastreuse > 0 && sk->sk_reuse &&
156 sk->sk_state != TCP_LISTEN) {
157 goto success;
158 } else {
159 ret = 1;
160 if (tcp_v6_bind_conflict(sk, tb))
161 goto fail_unlock;
162 }
163 }
164tb_not_found:
165 ret = 1;
166 if (tb == NULL) {
167 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
168 if (tb == NULL)
169 goto fail_unlock;
170 }
171 if (hlist_empty(&tb->owners)) {
172 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
173 tb->fastreuse = 1;
174 else
175 tb->fastreuse = 0;
176 } else if (tb->fastreuse &&
177 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
178 tb->fastreuse = 0;
179
180success:
181 if (!inet_csk(sk)->icsk_bind_hash)
182 inet_bind_hash(sk, tb, snum);
183 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
184 ret = 0;
185
186fail_unlock:
187 spin_unlock(&head->lock);
188fail:
189 local_bh_enable();
190 return ret;
191}
192
193static __inline__ void __tcp_v6_hash(struct sock *sk)
194{
195 struct hlist_head *list;
196 rwlock_t *lock;
197
198 BUG_TRAP(sk_unhashed(sk));
199
200 if (sk->sk_state == TCP_LISTEN) {
201 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
202 lock = &tcp_hashinfo.lhash_lock;
203 inet_listen_wlock(&tcp_hashinfo);
204 } else {
205 unsigned int hash;
206 sk->sk_hash = hash = inet6_sk_ehashfn(sk);
207 hash &= (tcp_hashinfo.ehash_size - 1);
208 list = &tcp_hashinfo.ehash[hash].chain;
209 lock = &tcp_hashinfo.ehash[hash].lock;
210 write_lock(lock);
211 }
212
213 __sk_add_node(sk, list);
214 sock_prot_inc_use(sk->sk_prot);
215 write_unlock(lock);
216} 84}
217 85
218
219static void tcp_v6_hash(struct sock *sk) 86static void tcp_v6_hash(struct sock *sk)
220{ 87{
221 if (sk->sk_state != TCP_CLOSE) { 88 if (sk->sk_state != TCP_CLOSE) {
222 struct tcp_sock *tp = tcp_sk(sk); 89 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
223
224 if (tp->af_specific == &ipv6_mapped) {
225 tcp_prot.hash(sk); 90 tcp_prot.hash(sk);
226 return; 91 return;
227 } 92 }
228 local_bh_disable(); 93 local_bh_disable();
229 __tcp_v6_hash(sk); 94 __inet6_hash(&tcp_hashinfo, sk);
230 local_bh_enable(); 95 local_bh_enable();
231 } 96 }
232} 97}
233 98
234/*
235 * Open request hash tables.
236 */
237
238static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
239{
240 u32 a, b, c;
241
242 a = raddr->s6_addr32[0];
243 b = raddr->s6_addr32[1];
244 c = raddr->s6_addr32[2];
245
246 a += JHASH_GOLDEN_RATIO;
247 b += JHASH_GOLDEN_RATIO;
248 c += rnd;
249 __jhash_mix(a, b, c);
250
251 a += raddr->s6_addr32[3];
252 b += (u32) rport;
253 __jhash_mix(a, b, c);
254
255 return c & (TCP_SYNQ_HSIZE - 1);
256}
257
258static struct request_sock *tcp_v6_search_req(const struct sock *sk,
259 struct request_sock ***prevp,
260 __u16 rport,
261 struct in6_addr *raddr,
262 struct in6_addr *laddr,
263 int iif)
264{
265 const struct inet_connection_sock *icsk = inet_csk(sk);
266 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
267 struct request_sock *req, **prev;
268
269 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
270 (req = *prev) != NULL;
271 prev = &req->dl_next) {
272 const struct tcp6_request_sock *treq = tcp6_rsk(req);
273
274 if (inet_rsk(req)->rmt_port == rport &&
275 req->rsk_ops->family == AF_INET6 &&
276 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
277 ipv6_addr_equal(&treq->loc_addr, laddr) &&
278 (!treq->iif || treq->iif == iif)) {
279 BUG_TRAP(req->sk == NULL);
280 *prevp = prev;
281 return req;
282 }
283 }
284
285 return NULL;
286}
287
288static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len, 99static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
289 struct in6_addr *saddr, 100 struct in6_addr *saddr,
290 struct in6_addr *daddr, 101 struct in6_addr *daddr,
@@ -308,195 +119,12 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
308 } 119 }
309} 120}
310 121
311static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
312 struct inet_timewait_sock **twp)
313{
314 struct inet_sock *inet = inet_sk(sk);
315 const struct ipv6_pinfo *np = inet6_sk(sk);
316 const struct in6_addr *daddr = &np->rcv_saddr;
317 const struct in6_addr *saddr = &np->daddr;
318 const int dif = sk->sk_bound_dev_if;
319 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
320 unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
321 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
322 struct sock *sk2;
323 const struct hlist_node *node;
324 struct inet_timewait_sock *tw;
325
326 prefetch(head->chain.first);
327 write_lock(&head->lock);
328
329 /* Check TIME-WAIT sockets first. */
330 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
331 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
332
333 tw = inet_twsk(sk2);
334
335 if(*((__u32 *)&(tw->tw_dport)) == ports &&
336 sk2->sk_family == PF_INET6 &&
337 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
338 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
339 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
340 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
341 struct tcp_sock *tp = tcp_sk(sk);
342
343 if (tcptw->tw_ts_recent_stamp &&
344 (!twp ||
345 (sysctl_tcp_tw_reuse &&
346 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
347 /* See comment in tcp_ipv4.c */
348 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
349 if (!tp->write_seq)
350 tp->write_seq = 1;
351 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
352 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
353 sock_hold(sk2);
354 goto unique;
355 } else
356 goto not_unique;
357 }
358 }
359 tw = NULL;
360
361 /* And established part... */
362 sk_for_each(sk2, node, &head->chain) {
363 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
364 goto not_unique;
365 }
366
367unique:
368 BUG_TRAP(sk_unhashed(sk));
369 __sk_add_node(sk, &head->chain);
370 sk->sk_hash = hash;
371 sock_prot_inc_use(sk->sk_prot);
372 write_unlock(&head->lock);
373
374 if (twp) {
375 *twp = tw;
376 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
377 } else if (tw) {
378 /* Silly. Should hash-dance instead... */
379 inet_twsk_deschedule(tw, &tcp_death_row);
380 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
381
382 inet_twsk_put(tw);
383 }
384 return 0;
385
386not_unique:
387 write_unlock(&head->lock);
388 return -EADDRNOTAVAIL;
389}
390
391static inline u32 tcpv6_port_offset(const struct sock *sk)
392{
393 const struct inet_sock *inet = inet_sk(sk);
394 const struct ipv6_pinfo *np = inet6_sk(sk);
395
396 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
397 np->daddr.s6_addr32,
398 inet->dport);
399}
400
401static int tcp_v6_hash_connect(struct sock *sk)
402{
403 unsigned short snum = inet_sk(sk)->num;
404 struct inet_bind_hashbucket *head;
405 struct inet_bind_bucket *tb;
406 int ret;
407
408 if (!snum) {
409 int low = sysctl_local_port_range[0];
410 int high = sysctl_local_port_range[1];
411 int range = high - low;
412 int i;
413 int port;
414 static u32 hint;
415 u32 offset = hint + tcpv6_port_offset(sk);
416 struct hlist_node *node;
417 struct inet_timewait_sock *tw = NULL;
418
419 local_bh_disable();
420 for (i = 1; i <= range; i++) {
421 port = low + (i + offset) % range;
422 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
423 spin_lock(&head->lock);
424
425 /* Does not bother with rcv_saddr checks,
426 * because the established check is already
427 * unique enough.
428 */
429 inet_bind_bucket_for_each(tb, node, &head->chain) {
430 if (tb->port == port) {
431 BUG_TRAP(!hlist_empty(&tb->owners));
432 if (tb->fastreuse >= 0)
433 goto next_port;
434 if (!__tcp_v6_check_established(sk,
435 port,
436 &tw))
437 goto ok;
438 goto next_port;
439 }
440 }
441
442 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
443 if (!tb) {
444 spin_unlock(&head->lock);
445 break;
446 }
447 tb->fastreuse = -1;
448 goto ok;
449
450 next_port:
451 spin_unlock(&head->lock);
452 }
453 local_bh_enable();
454
455 return -EADDRNOTAVAIL;
456
457ok:
458 hint += i;
459
460 /* Head lock still held and bh's disabled */
461 inet_bind_hash(sk, tb, port);
462 if (sk_unhashed(sk)) {
463 inet_sk(sk)->sport = htons(port);
464 __tcp_v6_hash(sk);
465 }
466 spin_unlock(&head->lock);
467
468 if (tw) {
469 inet_twsk_deschedule(tw, &tcp_death_row);
470 inet_twsk_put(tw);
471 }
472
473 ret = 0;
474 goto out;
475 }
476
477 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
478 tb = inet_csk(sk)->icsk_bind_hash;
479 spin_lock_bh(&head->lock);
480
481 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
482 __tcp_v6_hash(sk);
483 spin_unlock_bh(&head->lock);
484 return 0;
485 } else {
486 spin_unlock(&head->lock);
487 /* No definite answer... Walk to established hash table */
488 ret = __tcp_v6_check_established(sk, snum, NULL);
489out:
490 local_bh_enable();
491 return ret;
492 }
493}
494
495static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 122static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
496 int addr_len) 123 int addr_len)
497{ 124{
498 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 125 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
499 struct inet_sock *inet = inet_sk(sk); 126 struct inet_sock *inet = inet_sk(sk);
127 struct inet_connection_sock *icsk = inet_csk(sk);
500 struct ipv6_pinfo *np = inet6_sk(sk); 128 struct ipv6_pinfo *np = inet6_sk(sk);
501 struct tcp_sock *tp = tcp_sk(sk); 129 struct tcp_sock *tp = tcp_sk(sk);
502 struct in6_addr *saddr = NULL, *final_p = NULL, final; 130 struct in6_addr *saddr = NULL, *final_p = NULL, final;
@@ -571,7 +199,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
571 */ 199 */
572 200
573 if (addr_type == IPV6_ADDR_MAPPED) { 201 if (addr_type == IPV6_ADDR_MAPPED) {
574 u32 exthdrlen = tp->ext_header_len; 202 u32 exthdrlen = icsk->icsk_ext_hdr_len;
575 struct sockaddr_in sin; 203 struct sockaddr_in sin;
576 204
577 SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); 205 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
@@ -583,14 +211,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
583 sin.sin_port = usin->sin6_port; 211 sin.sin_port = usin->sin6_port;
584 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 212 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
585 213
586 tp->af_specific = &ipv6_mapped; 214 icsk->icsk_af_ops = &ipv6_mapped;
587 sk->sk_backlog_rcv = tcp_v4_do_rcv; 215 sk->sk_backlog_rcv = tcp_v4_do_rcv;
588 216
589 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 217 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
590 218
591 if (err) { 219 if (err) {
592 tp->ext_header_len = exthdrlen; 220 icsk->icsk_ext_hdr_len = exthdrlen;
593 tp->af_specific = &ipv6_specific; 221 icsk->icsk_af_ops = &ipv6_specific;
594 sk->sk_backlog_rcv = tcp_v6_do_rcv; 222 sk->sk_backlog_rcv = tcp_v6_do_rcv;
595 goto failure; 223 goto failure;
596 } else { 224 } else {
@@ -643,16 +271,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
643 sk->sk_route_caps = dst->dev->features & 271 sk->sk_route_caps = dst->dev->features &
644 ~(NETIF_F_IP_CSUM | NETIF_F_TSO); 272 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
645 273
646 tp->ext_header_len = 0; 274 icsk->icsk_ext_hdr_len = 0;
647 if (np->opt) 275 if (np->opt)
648 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen; 276 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
277 np->opt->opt_nflen);
649 278
650 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 279 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
651 280
652 inet->dport = usin->sin6_port; 281 inet->dport = usin->sin6_port;
653 282
654 tcp_set_state(sk, TCP_SYN_SENT); 283 tcp_set_state(sk, TCP_SYN_SENT);
655 err = tcp_v6_hash_connect(sk); 284 err = inet6_hash_connect(&tcp_death_row, sk);
656 if (err) 285 if (err)
657 goto late_failure; 286 goto late_failure;
658 287
@@ -758,7 +387,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
758 } else 387 } else
759 dst_hold(dst); 388 dst_hold(dst);
760 389
761 if (tp->pmtu_cookie > dst_mtu(dst)) { 390 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
762 tcp_sync_mss(sk, dst_mtu(dst)); 391 tcp_sync_mss(sk, dst_mtu(dst));
763 tcp_simple_retransmit(sk); 392 tcp_simple_retransmit(sk);
764 } /* else let the usual retransmit timer handle it */ 393 } /* else let the usual retransmit timer handle it */
@@ -775,8 +404,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
775 if (sock_owned_by_user(sk)) 404 if (sock_owned_by_user(sk))
776 goto out; 405 goto out;
777 406
778 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr, 407 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
779 &hdr->saddr, inet6_iif(skb)); 408 &hdr->saddr, inet6_iif(skb));
780 if (!req) 409 if (!req)
781 goto out; 410 goto out;
782 411
@@ -822,7 +451,7 @@ out:
822static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, 451static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
823 struct dst_entry *dst) 452 struct dst_entry *dst)
824{ 453{
825 struct tcp6_request_sock *treq = tcp6_rsk(req); 454 struct inet6_request_sock *treq = inet6_rsk(req);
826 struct ipv6_pinfo *np = inet6_sk(sk); 455 struct ipv6_pinfo *np = inet6_sk(sk);
827 struct sk_buff * skb; 456 struct sk_buff * skb;
828 struct ipv6_txoptions *opt = NULL; 457 struct ipv6_txoptions *opt = NULL;
@@ -888,8 +517,8 @@ done:
888 517
889static void tcp_v6_reqsk_destructor(struct request_sock *req) 518static void tcp_v6_reqsk_destructor(struct request_sock *req)
890{ 519{
891 if (tcp6_rsk(req)->pktopts) 520 if (inet6_rsk(req)->pktopts)
892 kfree_skb(tcp6_rsk(req)->pktopts); 521 kfree_skb(inet6_rsk(req)->pktopts);
893} 522}
894 523
895static struct request_sock_ops tcp6_request_sock_ops = { 524static struct request_sock_ops tcp6_request_sock_ops = {
@@ -901,26 +530,15 @@ static struct request_sock_ops tcp6_request_sock_ops = {
901 .send_reset = tcp_v6_send_reset 530 .send_reset = tcp_v6_send_reset
902}; 531};
903 532
904static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) 533static struct timewait_sock_ops tcp6_timewait_sock_ops = {
905{ 534 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
906 struct ipv6_pinfo *np = inet6_sk(sk); 535 .twsk_unique = tcp_twsk_unique,
907 struct inet6_skb_parm *opt = IP6CB(skb); 536};
908
909 if (np->rxopt.all) {
910 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
911 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
912 (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
913 ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
914 return 1;
915 }
916 return 0;
917}
918
919 537
920static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 538static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
921 struct sk_buff *skb)
922{ 539{
923 struct ipv6_pinfo *np = inet6_sk(sk); 540 struct ipv6_pinfo *np = inet6_sk(sk);
541 struct tcphdr *th = skb->h.th;
924 542
925 if (skb->ip_summed == CHECKSUM_HW) { 543 if (skb->ip_summed == CHECKSUM_HW) {
926 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); 544 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
@@ -1091,8 +709,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1091 struct sock *nsk; 709 struct sock *nsk;
1092 710
1093 /* Find possible connection requests. */ 711 /* Find possible connection requests. */
1094 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr, 712 req = inet6_csk_search_req(sk, &prev, th->source,
1095 &skb->nh.ipv6h->daddr, inet6_iif(skb)); 713 &skb->nh.ipv6h->saddr,
714 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1096 if (req) 715 if (req)
1097 return tcp_check_req(sk, skb, req, prev); 716 return tcp_check_req(sk, skb, req, prev);
1098 717
@@ -1116,23 +735,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1116 return sk; 735 return sk;
1117} 736}
1118 737
1119static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1120{
1121 struct inet_connection_sock *icsk = inet_csk(sk);
1122 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1123 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1124
1125 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1126 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1127}
1128
1129
1130/* FIXME: this is substantially similar to the ipv4 code. 738/* FIXME: this is substantially similar to the ipv4 code.
1131 * Can some kind of merge be done? -- erics 739 * Can some kind of merge be done? -- erics
1132 */ 740 */
1133static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 741static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1134{ 742{
1135 struct tcp6_request_sock *treq; 743 struct inet6_request_sock *treq;
1136 struct ipv6_pinfo *np = inet6_sk(sk); 744 struct ipv6_pinfo *np = inet6_sk(sk);
1137 struct tcp_options_received tmp_opt; 745 struct tcp_options_received tmp_opt;
1138 struct tcp_sock *tp = tcp_sk(sk); 746 struct tcp_sock *tp = tcp_sk(sk);
@@ -1157,7 +765,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1157 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 765 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1158 goto drop; 766 goto drop;
1159 767
1160 req = reqsk_alloc(&tcp6_request_sock_ops); 768 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1161 if (req == NULL) 769 if (req == NULL)
1162 goto drop; 770 goto drop;
1163 771
@@ -1170,7 +778,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1170 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 778 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1171 tcp_openreq_init(req, &tmp_opt, skb); 779 tcp_openreq_init(req, &tmp_opt, skb);
1172 780
1173 treq = tcp6_rsk(req); 781 treq = inet6_rsk(req);
1174 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr); 782 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1175 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr); 783 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1176 TCP_ECN_create_request(req, skb->h.th); 784 TCP_ECN_create_request(req, skb->h.th);
@@ -1196,8 +804,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1196 if (tcp_v6_send_synack(sk, req, NULL)) 804 if (tcp_v6_send_synack(sk, req, NULL))
1197 goto drop; 805 goto drop;
1198 806
1199 tcp_v6_synq_add(sk, req); 807 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1200
1201 return 0; 808 return 0;
1202 809
1203drop: 810drop:
@@ -1212,7 +819,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1212 struct request_sock *req, 819 struct request_sock *req,
1213 struct dst_entry *dst) 820 struct dst_entry *dst)
1214{ 821{
1215 struct tcp6_request_sock *treq = tcp6_rsk(req); 822 struct inet6_request_sock *treq = inet6_rsk(req);
1216 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 823 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1217 struct tcp6_sock *newtcp6sk; 824 struct tcp6_sock *newtcp6sk;
1218 struct inet_sock *newinet; 825 struct inet_sock *newinet;
@@ -1247,7 +854,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1247 854
1248 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); 855 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1249 856
1250 newtp->af_specific = &ipv6_mapped; 857 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1251 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 858 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1252 newnp->pktoptions = NULL; 859 newnp->pktoptions = NULL;
1253 newnp->opt = NULL; 860 newnp->opt = NULL;
@@ -1261,10 +868,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1261 */ 868 */
1262 869
1263 /* It is tricky place. Until this moment IPv4 tcp 870 /* It is tricky place. Until this moment IPv4 tcp
1264 worked with IPv6 af_tcp.af_specific. 871 worked with IPv6 icsk.icsk_af_ops.
1265 Sync it now. 872 Sync it now.
1266 */ 873 */
1267 tcp_sync_mss(newsk, newtp->pmtu_cookie); 874 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1268 875
1269 return newsk; 876 return newsk;
1270 } 877 }
@@ -1371,10 +978,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1371 sock_kfree_s(sk, opt, opt->tot_len); 978 sock_kfree_s(sk, opt, opt->tot_len);
1372 } 979 }
1373 980
1374 newtp->ext_header_len = 0; 981 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1375 if (newnp->opt) 982 if (newnp->opt)
1376 newtp->ext_header_len = newnp->opt->opt_nflen + 983 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1377 newnp->opt->opt_flen; 984 newnp->opt->opt_flen);
1378 985
1379 tcp_sync_mss(newsk, dst_mtu(dst)); 986 tcp_sync_mss(newsk, dst_mtu(dst));
1380 newtp->advmss = dst_metric(dst, RTAX_ADVMSS); 987 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
@@ -1382,7 +989,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1382 989
1383 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; 990 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1384 991
1385 __tcp_v6_hash(newsk); 992 __inet6_hash(&tcp_hashinfo, newsk);
1386 inet_inherit_port(&tcp_hashinfo, sk, newsk); 993 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1387 994
1388 return newsk; 995 return newsk;
@@ -1679,139 +1286,16 @@ do_time_wait:
1679 goto discard_it; 1286 goto discard_it;
1680} 1287}
1681 1288
1682static int tcp_v6_rebuild_header(struct sock *sk)
1683{
1684 int err;
1685 struct dst_entry *dst;
1686 struct ipv6_pinfo *np = inet6_sk(sk);
1687
1688 dst = __sk_dst_check(sk, np->dst_cookie);
1689
1690 if (dst == NULL) {
1691 struct inet_sock *inet = inet_sk(sk);
1692 struct in6_addr *final_p = NULL, final;
1693 struct flowi fl;
1694
1695 memset(&fl, 0, sizeof(fl));
1696 fl.proto = IPPROTO_TCP;
1697 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1698 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1699 fl.fl6_flowlabel = np->flow_label;
1700 fl.oif = sk->sk_bound_dev_if;
1701 fl.fl_ip_dport = inet->dport;
1702 fl.fl_ip_sport = inet->sport;
1703
1704 if (np->opt && np->opt->srcrt) {
1705 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1706 ipv6_addr_copy(&final, &fl.fl6_dst);
1707 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1708 final_p = &final;
1709 }
1710
1711 err = ip6_dst_lookup(sk, &dst, &fl);
1712 if (err) {
1713 sk->sk_route_caps = 0;
1714 return err;
1715 }
1716 if (final_p)
1717 ipv6_addr_copy(&fl.fl6_dst, final_p);
1718
1719 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1720 sk->sk_err_soft = -err;
1721 return err;
1722 }
1723
1724 ip6_dst_store(sk, dst, NULL);
1725 sk->sk_route_caps = dst->dev->features &
1726 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1727 }
1728
1729 return 0;
1730}
1731
1732static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1733{
1734 struct sock *sk = skb->sk;
1735 struct inet_sock *inet = inet_sk(sk);
1736 struct ipv6_pinfo *np = inet6_sk(sk);
1737 struct flowi fl;
1738 struct dst_entry *dst;
1739 struct in6_addr *final_p = NULL, final;
1740
1741 memset(&fl, 0, sizeof(fl));
1742 fl.proto = IPPROTO_TCP;
1743 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1744 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1745 fl.fl6_flowlabel = np->flow_label;
1746 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1747 fl.oif = sk->sk_bound_dev_if;
1748 fl.fl_ip_sport = inet->sport;
1749 fl.fl_ip_dport = inet->dport;
1750
1751 if (np->opt && np->opt->srcrt) {
1752 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1753 ipv6_addr_copy(&final, &fl.fl6_dst);
1754 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1755 final_p = &final;
1756 }
1757
1758 dst = __sk_dst_check(sk, np->dst_cookie);
1759
1760 if (dst == NULL) {
1761 int err = ip6_dst_lookup(sk, &dst, &fl);
1762
1763 if (err) {
1764 sk->sk_err_soft = -err;
1765 return err;
1766 }
1767
1768 if (final_p)
1769 ipv6_addr_copy(&fl.fl6_dst, final_p);
1770
1771 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1772 sk->sk_route_caps = 0;
1773 return err;
1774 }
1775
1776 ip6_dst_store(sk, dst, NULL);
1777 sk->sk_route_caps = dst->dev->features &
1778 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1779 }
1780
1781 skb->dst = dst_clone(dst);
1782
1783 /* Restore final destination back after routing done */
1784 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1785
1786 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1787}
1788
1789static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1790{
1791 struct ipv6_pinfo *np = inet6_sk(sk);
1792 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1793
1794 sin6->sin6_family = AF_INET6;
1795 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1796 sin6->sin6_port = inet_sk(sk)->dport;
1797 /* We do not store received flowlabel for TCP */
1798 sin6->sin6_flowinfo = 0;
1799 sin6->sin6_scope_id = 0;
1800 if (sk->sk_bound_dev_if &&
1801 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1802 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1803}
1804
1805static int tcp_v6_remember_stamp(struct sock *sk) 1289static int tcp_v6_remember_stamp(struct sock *sk)
1806{ 1290{
1807 /* Alas, not yet... */ 1291 /* Alas, not yet... */
1808 return 0; 1292 return 0;
1809} 1293}
1810 1294
1811static struct tcp_func ipv6_specific = { 1295static struct inet_connection_sock_af_ops ipv6_specific = {
1812 .queue_xmit = tcp_v6_xmit, 1296 .queue_xmit = inet6_csk_xmit,
1813 .send_check = tcp_v6_send_check, 1297 .send_check = tcp_v6_send_check,
1814 .rebuild_header = tcp_v6_rebuild_header, 1298 .rebuild_header = inet6_sk_rebuild_header,
1815 .conn_request = tcp_v6_conn_request, 1299 .conn_request = tcp_v6_conn_request,
1816 .syn_recv_sock = tcp_v6_syn_recv_sock, 1300 .syn_recv_sock = tcp_v6_syn_recv_sock,
1817 .remember_stamp = tcp_v6_remember_stamp, 1301 .remember_stamp = tcp_v6_remember_stamp,
@@ -1819,7 +1303,7 @@ static struct tcp_func ipv6_specific = {
1819 1303
1820 .setsockopt = ipv6_setsockopt, 1304 .setsockopt = ipv6_setsockopt,
1821 .getsockopt = ipv6_getsockopt, 1305 .getsockopt = ipv6_getsockopt,
1822 .addr2sockaddr = v6_addr2sockaddr, 1306 .addr2sockaddr = inet6_csk_addr2sockaddr,
1823 .sockaddr_len = sizeof(struct sockaddr_in6) 1307 .sockaddr_len = sizeof(struct sockaddr_in6)
1824}; 1308};
1825 1309
@@ -1827,7 +1311,7 @@ static struct tcp_func ipv6_specific = {
1827 * TCP over IPv4 via INET6 API 1311 * TCP over IPv4 via INET6 API
1828 */ 1312 */
1829 1313
1830static struct tcp_func ipv6_mapped = { 1314static struct inet_connection_sock_af_ops ipv6_mapped = {
1831 .queue_xmit = ip_queue_xmit, 1315 .queue_xmit = ip_queue_xmit,
1832 .send_check = tcp_v4_send_check, 1316 .send_check = tcp_v4_send_check,
1833 .rebuild_header = inet_sk_rebuild_header, 1317 .rebuild_header = inet_sk_rebuild_header,
@@ -1838,7 +1322,7 @@ static struct tcp_func ipv6_mapped = {
1838 1322
1839 .setsockopt = ipv6_setsockopt, 1323 .setsockopt = ipv6_setsockopt,
1840 .getsockopt = ipv6_getsockopt, 1324 .getsockopt = ipv6_getsockopt,
1841 .addr2sockaddr = v6_addr2sockaddr, 1325 .addr2sockaddr = inet6_csk_addr2sockaddr,
1842 .sockaddr_len = sizeof(struct sockaddr_in6) 1326 .sockaddr_len = sizeof(struct sockaddr_in6)
1843}; 1327};
1844 1328
@@ -1877,8 +1361,9 @@ static int tcp_v6_init_sock(struct sock *sk)
1877 1361
1878 sk->sk_state = TCP_CLOSE; 1362 sk->sk_state = TCP_CLOSE;
1879 1363
1880 tp->af_specific = &ipv6_specific; 1364 icsk->icsk_af_ops = &ipv6_specific;
1881 icsk->icsk_ca_ops = &tcp_init_congestion_ops; 1365 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1366 icsk->icsk_sync_mss = tcp_sync_mss;
1882 sk->sk_write_space = sk_stream_write_space; 1367 sk->sk_write_space = sk_stream_write_space;
1883 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 1368 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1884 1369
@@ -1900,14 +1385,13 @@ static int tcp_v6_destroy_sock(struct sock *sk)
1900static void get_openreq6(struct seq_file *seq, 1385static void get_openreq6(struct seq_file *seq,
1901 struct sock *sk, struct request_sock *req, int i, int uid) 1386 struct sock *sk, struct request_sock *req, int i, int uid)
1902{ 1387{
1903 struct in6_addr *dest, *src;
1904 int ttd = req->expires - jiffies; 1388 int ttd = req->expires - jiffies;
1389 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1390 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1905 1391
1906 if (ttd < 0) 1392 if (ttd < 0)
1907 ttd = 0; 1393 ttd = 0;
1908 1394
1909 src = &tcp6_rsk(req)->loc_addr;
1910 dest = &tcp6_rsk(req)->rmt_addr;
1911 seq_printf(seq, 1395 seq_printf(seq,
1912 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1396 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1913 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n", 1397 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
@@ -1988,14 +1472,14 @@ static void get_timewait6_sock(struct seq_file *seq,
1988{ 1472{
1989 struct in6_addr *dest, *src; 1473 struct in6_addr *dest, *src;
1990 __u16 destp, srcp; 1474 __u16 destp, srcp;
1991 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); 1475 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1992 int ttd = tw->tw_ttd - jiffies; 1476 int ttd = tw->tw_ttd - jiffies;
1993 1477
1994 if (ttd < 0) 1478 if (ttd < 0)
1995 ttd = 0; 1479 ttd = 0;
1996 1480
1997 dest = &tcp6tw->tw_v6_daddr; 1481 dest = &tw6->tw_v6_daddr;
1998 src = &tcp6tw->tw_v6_rcv_saddr; 1482 src = &tw6->tw_v6_rcv_saddr;
1999 destp = ntohs(tw->tw_dport); 1483 destp = ntohs(tw->tw_dport);
2000 srcp = ntohs(tw->tw_sport); 1484 srcp = ntohs(tw->tw_sport);
2001 1485
@@ -2093,7 +1577,7 @@ struct proto tcpv6_prot = {
2093 .sysctl_rmem = sysctl_tcp_rmem, 1577 .sysctl_rmem = sysctl_tcp_rmem,
2094 .max_header = MAX_TCP_HEADER, 1578 .max_header = MAX_TCP_HEADER,
2095 .obj_size = sizeof(struct tcp6_sock), 1579 .obj_size = sizeof(struct tcp6_sock),
2096 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1580 .twsk_prot = &tcp6_timewait_sock_ops,
2097 .rsk_prot = &tcp6_request_sock_ops, 1581 .rsk_prot = &tcp6_request_sock_ops,
2098}; 1582};
2099 1583
@@ -2110,7 +1594,8 @@ static struct inet_protosw tcpv6_protosw = {
2110 .ops = &inet6_stream_ops, 1594 .ops = &inet6_stream_ops,
2111 .capability = -1, 1595 .capability = -1,
2112 .no_check = 0, 1596 .no_check = 0,
2113 .flags = INET_PROTOSW_PERMANENT, 1597 .flags = INET_PROTOSW_PERMANENT |
1598 INET_PROTOSW_ICSK,
2114}; 1599};
2115 1600
2116void __init tcpv6_init(void) 1601void __init tcpv6_init(void)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5cc8731eb55b..d8538dcea813 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -36,6 +36,7 @@
36#include <linux/ipv6.h> 36#include <linux/ipv6.h>
37#include <linux/icmpv6.h> 37#include <linux/icmpv6.h>
38#include <linux/init.h> 38#include <linux/init.h>
39#include <linux/skbuff.h>
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
40 41
41#include <net/sock.h> 42#include <net/sock.h>
@@ -300,20 +301,7 @@ out:
300 return err; 301 return err;
301 302
302csum_copy_err: 303csum_copy_err:
303 /* Clear queue. */ 304 skb_kill_datagram(sk, skb, flags);
304 if (flags&MSG_PEEK) {
305 int clear = 0;
306 spin_lock_bh(&sk->sk_receive_queue.lock);
307 if (skb == skb_peek(&sk->sk_receive_queue)) {
308 __skb_unlink(skb, &sk->sk_receive_queue);
309 clear = 1;
310 }
311 spin_unlock_bh(&sk->sk_receive_queue.lock);
312 if (clear)
313 kfree_skb(skb);
314 }
315
316 skb_free_datagram(sk, skb);
317 305
318 if (flags & MSG_DONTWAIT) { 306 if (flags & MSG_DONTWAIT) {
319 UDP6_INC_STATS_USER(UDP_MIB_INERRORS); 307 UDP6_INC_STATS_USER(UDP_MIB_INERRORS);