aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig79
-rw-r--r--net/ipv6/Makefile25
-rw-r--r--net/ipv6/addrconf.c3615
-rw-r--r--net/ipv6/af_inet6.c867
-rw-r--r--net/ipv6/ah6.c478
-rw-r--r--net/ipv6/anycast.c594
-rw-r--r--net/ipv6/datagram.c600
-rw-r--r--net/ipv6/esp6.c424
-rw-r--r--net/ipv6/exthdrs.c575
-rw-r--r--net/ipv6/exthdrs_core.c109
-rw-r--r--net/ipv6/icmp.c822
-rw-r--r--net/ipv6/ip6_fib.c1225
-rw-r--r--net/ipv6/ip6_flowlabel.c706
-rw-r--r--net/ipv6/ip6_input.c269
-rw-r--r--net/ipv6/ip6_output.c1197
-rw-r--r--net/ipv6/ip6_tunnel.c1163
-rw-r--r--net/ipv6/ipcomp6.c524
-rw-r--r--net/ipv6/ipv6_sockglue.c704
-rw-r--r--net/ipv6/ipv6_syms.c41
-rw-r--r--net/ipv6/mcast.c2499
-rw-r--r--net/ipv6/ndisc.c1690
-rw-r--r--net/ipv6/netfilter/Kconfig242
-rw-r--r--net/ipv6/netfilter/Makefile26
-rw-r--r--net/ipv6/netfilter/ip6_queue.c741
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1970
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c509
-rw-r--r--net/ipv6/netfilter/ip6t_MARK.c78
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c208
-rw-r--r--net/ipv6/netfilter/ip6t_dst.c298
-rw-r--r--net/ipv6/netfilter/ip6t_esp.c181
-rw-r--r--net/ipv6/netfilter/ip6t_eui64.c101
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c229
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c298
-rw-r--r--net/ipv6/netfilter/ip6t_hl.c80
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c167
-rw-r--r--net/ipv6/netfilter/ip6t_length.c66
-rw-r--r--net/ipv6/netfilter/ip6t_limit.c147
-rw-r--r--net/ipv6/netfilter/ip6t_mac.c80
-rw-r--r--net/ipv6/netfilter/ip6t_mark.c66
-rw-r--r--net/ipv6/netfilter/ip6t_multiport.c125
-rw-r--r--net/ipv6/netfilter/ip6t_owner.c174
-rw-r--r--net/ipv6/netfilter/ip6t_physdev.c135
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c301
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c214
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c287
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c182
-rw-r--r--net/ipv6/proc.c303
-rw-r--r--net/ipv6/protocol.c86
-rw-r--r--net/ipv6/raw.c1157
-rw-r--r--net/ipv6/reassembly.c771
-rw-r--r--net/ipv6/route.c2131
-rw-r--r--net/ipv6/sit.c833
-rw-r--r--net/ipv6/sysctl_net_ipv6.c125
-rw-r--r--net/ipv6/tcp_ipv6.c2265
-rw-r--r--net/ipv6/udp.c1075
-rw-r--r--net/ipv6/xfrm6_input.c150
-rw-r--r--net/ipv6/xfrm6_output.c143
-rw-r--r--net/ipv6/xfrm6_policy.c295
-rw-r--r--net/ipv6/xfrm6_state.c136
-rw-r--r--net/ipv6/xfrm6_tunnel.c543
60 files changed, 35124 insertions, 0 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
new file mode 100644
index 000000000000..e66ca9381cfd
--- /dev/null
+++ b/net/ipv6/Kconfig
@@ -0,0 +1,79 @@
1#
2# IPv6 configuration
3#
4config IPV6_PRIVACY
5 bool "IPv6: Privacy Extensions (RFC 3041) support"
6 depends on IPV6
7 ---help---
8 Privacy Extensions for Stateless Address Autoconfiguration in IPv6
9 support. With this option, additional periodically-alter
10 pseudo-random global-scope unicast address(es) will assigned to
11 your interface(s).
12
13 By default, kernel do not generate temporary addresses.
14 To use temporary addresses, do
15
16 echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr
17
18 See <file:Documentation/networking/ip-sysctl.txt> for details.
19
20config INET6_AH
21 tristate "IPv6: AH transformation"
22 depends on IPV6
23 select XFRM
24 select CRYPTO
25 select CRYPTO_HMAC
26 select CRYPTO_MD5
27 select CRYPTO_SHA1
28 ---help---
29 Support for IPsec AH.
30
31 If unsure, say Y.
32
33config INET6_ESP
34 tristate "IPv6: ESP transformation"
35 depends on IPV6
36 select XFRM
37 select CRYPTO
38 select CRYPTO_HMAC
39 select CRYPTO_MD5
40 select CRYPTO_SHA1
41 select CRYPTO_DES
42 ---help---
43 Support for IPsec ESP.
44
45 If unsure, say Y.
46
47config INET6_IPCOMP
48 tristate "IPv6: IPComp transformation"
49 depends on IPV6
50 select XFRM
51 select INET6_TUNNEL
52 select CRYPTO
53 select CRYPTO_DEFLATE
54 ---help---
55 Support for IP Payload Compression Protocol (IPComp) (RFC3173),
56 typically needed for IPsec.
57
58 If unsure, say Y.
59
60config INET6_TUNNEL
61 tristate "IPv6: tunnel transformation"
62 depends on IPV6
63 select XFRM
64 ---help---
65 Support for generic IPv6-in-IPv6 tunnel transformation, which is
66 required by the IPv6-in-IPv6 tunneling module as well as tunnel mode
67 IPComp.
68
69 If unsure, say Y.
70
71config IPV6_TUNNEL
72 tristate "IPv6: IPv6-in-IPv6 tunnel"
73 depends on IPV6
74 select INET6_TUNNEL
75 ---help---
76 Support for IPv6-in-IPv6 tunnels described in RFC 2473.
77
78 If unsure, say N.
79
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
new file mode 100644
index 000000000000..b39e04940590
--- /dev/null
+++ b/net/ipv6/Makefile
@@ -0,0 +1,25 @@
1#
2# Makefile for the Linux TCP/IP (INET6) layer.
3#
4
5obj-$(CONFIG_IPV6) += ipv6.o
6
7ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
8 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
9 protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
10 exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
11 ip6_flowlabel.o ipv6_syms.o
12
13ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
14 xfrm6_output.o
15ipv6-objs += $(ipv6-y)
16
17obj-$(CONFIG_INET6_AH) += ah6.o
18obj-$(CONFIG_INET6_ESP) += esp6.o
19obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
20obj-$(CONFIG_INET6_TUNNEL) += xfrm6_tunnel.o
21obj-$(CONFIG_NETFILTER) += netfilter/
22
23obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
24
25obj-y += exthdrs_core.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
new file mode 100644
index 000000000000..5ffde14ddc09
--- /dev/null
+++ b/net/ipv6/addrconf.c
@@ -0,0 +1,3615 @@
1/*
2 * IPv6 Address [auto]configuration
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 *
9 * $Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16
17/*
18 * Changes:
19 *
20 * Janos Farkas : delete timer on ifdown
21 * <chexum@bankinf.banki.hu>
22 * Andi Kleen : kill double kfree on module
23 * unload.
24 * Maciej W. Rozycki : FDDI support
25 * sekiya@USAGI : Don't send too many RS
26 * packets.
27 * yoshfuji@USAGI : Fixed interval between DAD
28 * packets.
29 * YOSHIFUJI Hideaki @USAGI : improved accuracy of
30 * address validation timer.
31 * YOSHIFUJI Hideaki @USAGI : Privacy Extensions (RFC3041)
32 * support.
33 * Yuji SEKIYA @USAGI : Don't assign a same IPv6
34 * address on a same interface.
35 * YOSHIFUJI Hideaki @USAGI : ARCnet support
36 * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to
37 * seq_file.
38 */
39
40#include <linux/config.h>
41#include <linux/errno.h>
42#include <linux/types.h>
43#include <linux/socket.h>
44#include <linux/sockios.h>
45#include <linux/sched.h>
46#include <linux/net.h>
47#include <linux/in6.h>
48#include <linux/netdevice.h>
49#include <linux/if_arp.h>
50#include <linux/if_arcnet.h>
51#include <linux/if_infiniband.h>
52#include <linux/route.h>
53#include <linux/inetdevice.h>
54#include <linux/init.h>
55#ifdef CONFIG_SYSCTL
56#include <linux/sysctl.h>
57#endif
58#include <linux/delay.h>
59#include <linux/notifier.h>
60
61#include <net/sock.h>
62#include <net/snmp.h>
63
64#include <net/ipv6.h>
65#include <net/protocol.h>
66#include <net/ndisc.h>
67#include <net/ip6_route.h>
68#include <net/addrconf.h>
69#include <net/tcp.h>
70#include <net/ip.h>
71#include <linux/if_tunnel.h>
72#include <linux/rtnetlink.h>
73
74#ifdef CONFIG_IPV6_PRIVACY
75#include <linux/random.h>
76#include <linux/crypto.h>
77#include <asm/scatterlist.h>
78#endif
79
80#include <asm/uaccess.h>
81
82#include <linux/proc_fs.h>
83#include <linux/seq_file.h>
84
85/* Set to 3 to get tracing... */
86#define ACONF_DEBUG 2
87
88#if ACONF_DEBUG >= 3
89#define ADBG(x) printk x
90#else
91#define ADBG(x)
92#endif
93
94#define INFINITY_LIFE_TIME 0xFFFFFFFF
95#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
96
97#ifdef CONFIG_SYSCTL
98static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p);
99static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
100#endif
101
102#ifdef CONFIG_IPV6_PRIVACY
103static int __ipv6_regen_rndid(struct inet6_dev *idev);
104static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
105static void ipv6_regen_rndid(unsigned long data);
106
107static int desync_factor = MAX_DESYNC_FACTOR * HZ;
108static struct crypto_tfm *md5_tfm;
109static DEFINE_SPINLOCK(md5_tfm_lock);
110#endif
111
112static int ipv6_count_addresses(struct inet6_dev *idev);
113
114/*
115 * Configured unicast address hash table
116 */
117static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE];
118static DEFINE_RWLOCK(addrconf_hash_lock);
119
120/* Protects inet6 devices */
121DEFINE_RWLOCK(addrconf_lock);
122
123static void addrconf_verify(unsigned long);
124
125static struct timer_list addr_chk_timer =
126 TIMER_INITIALIZER(addrconf_verify, 0, 0);
127static DEFINE_SPINLOCK(addrconf_verify_lock);
128
129static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
130static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
131
132static int addrconf_ifdown(struct net_device *dev, int how);
133
134static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags);
135static void addrconf_dad_timer(unsigned long data);
136static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
137static void addrconf_rs_timer(unsigned long data);
138static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
139static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
140
141static void inet6_prefix_notify(int event, struct inet6_dev *idev,
142 struct prefix_info *pinfo);
143static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev);
144
145static struct notifier_block *inet6addr_chain;
146
147struct ipv6_devconf ipv6_devconf = {
148 .forwarding = 0,
149 .hop_limit = IPV6_DEFAULT_HOPLIMIT,
150 .mtu6 = IPV6_MIN_MTU,
151 .accept_ra = 1,
152 .accept_redirects = 1,
153 .autoconf = 1,
154 .force_mld_version = 0,
155 .dad_transmits = 1,
156 .rtr_solicits = MAX_RTR_SOLICITATIONS,
157 .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
158 .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
159#ifdef CONFIG_IPV6_PRIVACY
160 .use_tempaddr = 0,
161 .temp_valid_lft = TEMP_VALID_LIFETIME,
162 .temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
163 .regen_max_retry = REGEN_MAX_RETRY,
164 .max_desync_factor = MAX_DESYNC_FACTOR,
165#endif
166 .max_addresses = IPV6_MAX_ADDRESSES,
167};
168
169static struct ipv6_devconf ipv6_devconf_dflt = {
170 .forwarding = 0,
171 .hop_limit = IPV6_DEFAULT_HOPLIMIT,
172 .mtu6 = IPV6_MIN_MTU,
173 .accept_ra = 1,
174 .accept_redirects = 1,
175 .autoconf = 1,
176 .dad_transmits = 1,
177 .rtr_solicits = MAX_RTR_SOLICITATIONS,
178 .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
179 .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
180#ifdef CONFIG_IPV6_PRIVACY
181 .use_tempaddr = 0,
182 .temp_valid_lft = TEMP_VALID_LIFETIME,
183 .temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
184 .regen_max_retry = REGEN_MAX_RETRY,
185 .max_desync_factor = MAX_DESYNC_FACTOR,
186#endif
187 .max_addresses = IPV6_MAX_ADDRESSES,
188};
189
190/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
191#if 0
192const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
193#endif
194const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
195
196int ipv6_addr_type(const struct in6_addr *addr)
197{
198 int type;
199 u32 st;
200
201 st = addr->s6_addr32[0];
202
203 if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) {
204 type = IPV6_ADDR_MULTICAST;
205
206 switch((st & htonl(0x00FF0000))) {
207 case __constant_htonl(0x00010000):
208 type |= IPV6_ADDR_LOOPBACK;
209 break;
210
211 case __constant_htonl(0x00020000):
212 type |= IPV6_ADDR_LINKLOCAL;
213 break;
214
215 case __constant_htonl(0x00050000):
216 type |= IPV6_ADDR_SITELOCAL;
217 break;
218 };
219 return type;
220 }
221
222 type = IPV6_ADDR_UNICAST;
223
224 /* Consider all addresses with the first three bits different of
225 000 and 111 as finished.
226 */
227 if ((st & htonl(0xE0000000)) != htonl(0x00000000) &&
228 (st & htonl(0xE0000000)) != htonl(0xE0000000))
229 return type;
230
231 if ((st & htonl(0xFFC00000)) == htonl(0xFE800000))
232 return (IPV6_ADDR_LINKLOCAL | type);
233
234 if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000))
235 return (IPV6_ADDR_SITELOCAL | type);
236
237 if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) {
238 if (addr->s6_addr32[2] == 0) {
239 if (addr->s6_addr32[3] == 0)
240 return IPV6_ADDR_ANY;
241
242 if (addr->s6_addr32[3] == htonl(0x00000001))
243 return (IPV6_ADDR_LOOPBACK | type);
244
245 return (IPV6_ADDR_COMPATv4 | type);
246 }
247
248 if (addr->s6_addr32[2] == htonl(0x0000ffff))
249 return IPV6_ADDR_MAPPED;
250 }
251
252 st &= htonl(0xFF000000);
253 if (st == 0)
254 return IPV6_ADDR_RESERVED;
255 st &= htonl(0xFE000000);
256 if (st == htonl(0x02000000))
257 return IPV6_ADDR_RESERVED; /* for NSAP */
258 if (st == htonl(0x04000000))
259 return IPV6_ADDR_RESERVED; /* for IPX */
260 return type;
261}
262
263static void addrconf_del_timer(struct inet6_ifaddr *ifp)
264{
265 if (del_timer(&ifp->timer))
266 __in6_ifa_put(ifp);
267}
268
269enum addrconf_timer_t
270{
271 AC_NONE,
272 AC_DAD,
273 AC_RS,
274};
275
276static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
277 enum addrconf_timer_t what,
278 unsigned long when)
279{
280 if (!del_timer(&ifp->timer))
281 in6_ifa_hold(ifp);
282
283 switch (what) {
284 case AC_DAD:
285 ifp->timer.function = addrconf_dad_timer;
286 break;
287 case AC_RS:
288 ifp->timer.function = addrconf_rs_timer;
289 break;
290 default:;
291 }
292 ifp->timer.expires = jiffies + when;
293 add_timer(&ifp->timer);
294}
295
296/* Nobody refers to this device, we may destroy it. */
297
298void in6_dev_finish_destroy(struct inet6_dev *idev)
299{
300 struct net_device *dev = idev->dev;
301 BUG_TRAP(idev->addr_list==NULL);
302 BUG_TRAP(idev->mc_list==NULL);
303#ifdef NET_REFCNT_DEBUG
304 printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL");
305#endif
306 dev_put(dev);
307 if (!idev->dead) {
308 printk("Freeing alive inet6 device %p\n", idev);
309 return;
310 }
311 snmp6_free_dev(idev);
312 kfree(idev);
313}
314
315static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
316{
317 struct inet6_dev *ndev;
318
319 ASSERT_RTNL();
320
321 if (dev->mtu < IPV6_MIN_MTU)
322 return NULL;
323
324 ndev = kmalloc(sizeof(struct inet6_dev), GFP_KERNEL);
325
326 if (ndev) {
327 memset(ndev, 0, sizeof(struct inet6_dev));
328
329 rwlock_init(&ndev->lock);
330 ndev->dev = dev;
331 memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf));
332 ndev->cnf.mtu6 = dev->mtu;
333 ndev->cnf.sysctl = NULL;
334 ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
335 if (ndev->nd_parms == NULL) {
336 kfree(ndev);
337 return NULL;
338 }
339 /* We refer to the device */
340 dev_hold(dev);
341
342 if (snmp6_alloc_dev(ndev) < 0) {
343 ADBG((KERN_WARNING
344 "%s(): cannot allocate memory for statistics; dev=%s.\n",
345 __FUNCTION__, dev->name));
346 neigh_parms_release(&nd_tbl, ndev->nd_parms);
347 ndev->dead = 1;
348 in6_dev_finish_destroy(ndev);
349 return NULL;
350 }
351
352 if (snmp6_register_dev(ndev) < 0) {
353 ADBG((KERN_WARNING
354 "%s(): cannot create /proc/net/dev_snmp6/%s\n",
355 __FUNCTION__, dev->name));
356 neigh_parms_release(&nd_tbl, ndev->nd_parms);
357 ndev->dead = 1;
358 in6_dev_finish_destroy(ndev);
359 return NULL;
360 }
361
362 /* One reference from device. We must do this before
363 * we invoke __ipv6_regen_rndid().
364 */
365 in6_dev_hold(ndev);
366
367#ifdef CONFIG_IPV6_PRIVACY
368 get_random_bytes(ndev->rndid, sizeof(ndev->rndid));
369 get_random_bytes(ndev->entropy, sizeof(ndev->entropy));
370 init_timer(&ndev->regen_timer);
371 ndev->regen_timer.function = ipv6_regen_rndid;
372 ndev->regen_timer.data = (unsigned long) ndev;
373 if ((dev->flags&IFF_LOOPBACK) ||
374 dev->type == ARPHRD_TUNNEL ||
375 dev->type == ARPHRD_SIT) {
376 printk(KERN_INFO
377 "Disabled Privacy Extensions on device %p(%s)\n",
378 dev, dev->name);
379 ndev->cnf.use_tempaddr = -1;
380 } else {
381 in6_dev_hold(ndev);
382 ipv6_regen_rndid((unsigned long) ndev);
383 }
384#endif
385
386 write_lock_bh(&addrconf_lock);
387 dev->ip6_ptr = ndev;
388 write_unlock_bh(&addrconf_lock);
389
390 ipv6_mc_init_dev(ndev);
391 ndev->tstamp = jiffies;
392#ifdef CONFIG_SYSCTL
393 neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6,
394 NET_IPV6_NEIGH, "ipv6",
395 &ndisc_ifinfo_sysctl_change,
396 NULL);
397 addrconf_sysctl_register(ndev, &ndev->cnf);
398#endif
399 }
400 return ndev;
401}
402
403static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
404{
405 struct inet6_dev *idev;
406
407 ASSERT_RTNL();
408
409 if ((idev = __in6_dev_get(dev)) == NULL) {
410 if ((idev = ipv6_add_dev(dev)) == NULL)
411 return NULL;
412 }
413 if (dev->flags&IFF_UP)
414 ipv6_mc_up(idev);
415 return idev;
416}
417
418#ifdef CONFIG_SYSCTL
419static void dev_forward_change(struct inet6_dev *idev)
420{
421 struct net_device *dev;
422 struct inet6_ifaddr *ifa;
423 struct in6_addr addr;
424
425 if (!idev)
426 return;
427 dev = idev->dev;
428 if (dev && (dev->flags & IFF_MULTICAST)) {
429 ipv6_addr_all_routers(&addr);
430
431 if (idev->cnf.forwarding)
432 ipv6_dev_mc_inc(dev, &addr);
433 else
434 ipv6_dev_mc_dec(dev, &addr);
435 }
436 for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
437 if (idev->cnf.forwarding)
438 addrconf_join_anycast(ifa);
439 else
440 addrconf_leave_anycast(ifa);
441 }
442}
443
444
445static void addrconf_forward_change(void)
446{
447 struct net_device *dev;
448 struct inet6_dev *idev;
449
450 read_lock(&dev_base_lock);
451 for (dev=dev_base; dev; dev=dev->next) {
452 read_lock(&addrconf_lock);
453 idev = __in6_dev_get(dev);
454 if (idev) {
455 int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
456 idev->cnf.forwarding = ipv6_devconf.forwarding;
457 if (changed)
458 dev_forward_change(idev);
459 }
460 read_unlock(&addrconf_lock);
461 }
462 read_unlock(&dev_base_lock);
463}
464#endif
465
466/* Nobody refers to this ifaddr, destroy it */
467
468void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
469{
470 BUG_TRAP(ifp->if_next==NULL);
471 BUG_TRAP(ifp->lst_next==NULL);
472#ifdef NET_REFCNT_DEBUG
473 printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
474#endif
475
476 in6_dev_put(ifp->idev);
477
478 if (del_timer(&ifp->timer))
479 printk("Timer is still running, when freeing ifa=%p\n", ifp);
480
481 if (!ifp->dead) {
482 printk("Freeing alive inet6 address %p\n", ifp);
483 return;
484 }
485 dst_release(&ifp->rt->u.dst);
486
487 kfree(ifp);
488}
489
490/* On success it returns ifp with increased reference count */
491
492static struct inet6_ifaddr *
493ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
494 int scope, unsigned flags)
495{
496 struct inet6_ifaddr *ifa = NULL;
497 struct rt6_info *rt;
498 int hash;
499 int err = 0;
500
501 read_lock_bh(&addrconf_lock);
502 if (idev->dead) {
503 err = -ENODEV; /*XXX*/
504 goto out2;
505 }
506
507 write_lock(&addrconf_hash_lock);
508
509 /* Ignore adding duplicate addresses on an interface */
510 if (ipv6_chk_same_addr(addr, idev->dev)) {
511 ADBG(("ipv6_add_addr: already assigned\n"));
512 err = -EEXIST;
513 goto out;
514 }
515
516 ifa = kmalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
517
518 if (ifa == NULL) {
519 ADBG(("ipv6_add_addr: malloc failed\n"));
520 err = -ENOBUFS;
521 goto out;
522 }
523
524 rt = addrconf_dst_alloc(idev, addr, 0);
525 if (IS_ERR(rt)) {
526 err = PTR_ERR(rt);
527 goto out;
528 }
529
530 memset(ifa, 0, sizeof(struct inet6_ifaddr));
531 ipv6_addr_copy(&ifa->addr, addr);
532
533 spin_lock_init(&ifa->lock);
534 init_timer(&ifa->timer);
535 ifa->timer.data = (unsigned long) ifa;
536 ifa->scope = scope;
537 ifa->prefix_len = pfxlen;
538 ifa->flags = flags | IFA_F_TENTATIVE;
539 ifa->cstamp = ifa->tstamp = jiffies;
540
541 ifa->idev = idev;
542 in6_dev_hold(idev);
543 /* For caller */
544 in6_ifa_hold(ifa);
545
546 /* Add to big hash table */
547 hash = ipv6_addr_hash(addr);
548
549 ifa->lst_next = inet6_addr_lst[hash];
550 inet6_addr_lst[hash] = ifa;
551 in6_ifa_hold(ifa);
552 write_unlock(&addrconf_hash_lock);
553
554 write_lock(&idev->lock);
555 /* Add to inet6_dev unicast addr list. */
556 ifa->if_next = idev->addr_list;
557 idev->addr_list = ifa;
558
559#ifdef CONFIG_IPV6_PRIVACY
560 if (ifa->flags&IFA_F_TEMPORARY) {
561 ifa->tmp_next = idev->tempaddr_list;
562 idev->tempaddr_list = ifa;
563 in6_ifa_hold(ifa);
564 }
565#endif
566
567 ifa->rt = rt;
568
569 in6_ifa_hold(ifa);
570 write_unlock(&idev->lock);
571out2:
572 read_unlock_bh(&addrconf_lock);
573
574 if (unlikely(err == 0))
575 notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
576 else {
577 kfree(ifa);
578 ifa = ERR_PTR(err);
579 }
580
581 return ifa;
582out:
583 write_unlock(&addrconf_hash_lock);
584 goto out2;
585}
586
587/* This function wants to get referenced ifp and releases it before return */
588
589static void ipv6_del_addr(struct inet6_ifaddr *ifp)
590{
591 struct inet6_ifaddr *ifa, **ifap;
592 struct inet6_dev *idev = ifp->idev;
593 int hash;
594 int deleted = 0, onlink = 0;
595 unsigned long expires = jiffies;
596
597 hash = ipv6_addr_hash(&ifp->addr);
598
599 ifp->dead = 1;
600
601 write_lock_bh(&addrconf_hash_lock);
602 for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
603 ifap = &ifa->lst_next) {
604 if (ifa == ifp) {
605 *ifap = ifa->lst_next;
606 __in6_ifa_put(ifp);
607 ifa->lst_next = NULL;
608 break;
609 }
610 }
611 write_unlock_bh(&addrconf_hash_lock);
612
613 write_lock_bh(&idev->lock);
614#ifdef CONFIG_IPV6_PRIVACY
615 if (ifp->flags&IFA_F_TEMPORARY) {
616 for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL;
617 ifap = &ifa->tmp_next) {
618 if (ifa == ifp) {
619 *ifap = ifa->tmp_next;
620 if (ifp->ifpub) {
621 in6_ifa_put(ifp->ifpub);
622 ifp->ifpub = NULL;
623 }
624 __in6_ifa_put(ifp);
625 ifa->tmp_next = NULL;
626 break;
627 }
628 }
629 }
630#endif
631
632 for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;
633 ifap = &ifa->if_next) {
634 if (ifa == ifp) {
635 *ifap = ifa->if_next;
636 __in6_ifa_put(ifp);
637 ifa->if_next = NULL;
638 if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
639 break;
640 deleted = 1;
641 } else if (ifp->flags & IFA_F_PERMANENT) {
642 if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
643 ifp->prefix_len)) {
644 if (ifa->flags & IFA_F_PERMANENT) {
645 onlink = 1;
646 if (deleted)
647 break;
648 } else {
649 unsigned long lifetime;
650
651 if (!onlink)
652 onlink = -1;
653
654 spin_lock(&ifa->lock);
655 lifetime = min_t(unsigned long,
656 ifa->valid_lft, 0x7fffffffUL/HZ);
657 if (time_before(expires,
658 ifa->tstamp + lifetime * HZ))
659 expires = ifa->tstamp + lifetime * HZ;
660 spin_unlock(&ifa->lock);
661 }
662 }
663 }
664 }
665 write_unlock_bh(&idev->lock);
666
667 ipv6_ifa_notify(RTM_DELADDR, ifp);
668
669 notifier_call_chain(&inet6addr_chain,NETDEV_DOWN,ifp);
670
671 addrconf_del_timer(ifp);
672
673 /*
674 * Purge or update corresponding prefix
675 *
676 * 1) we don't purge prefix here if address was not permanent.
677 * prefix is managed by its own lifetime.
678 * 2) if there're no addresses, delete prefix.
679 * 3) if there're still other permanent address(es),
680 * corresponding prefix is still permanent.
681 * 4) otherwise, update prefix lifetime to the
682 * longest valid lifetime among the corresponding
683 * addresses on the device.
684 * Note: subsequent RA will update lifetime.
685 *
686 * --yoshfuji
687 */
688 if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
689 struct in6_addr prefix;
690 struct rt6_info *rt;
691
692 ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
693 rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1);
694
695 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
696 if (onlink == 0) {
697 ip6_del_rt(rt, NULL, NULL);
698 rt = NULL;
699 } else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
700 rt->rt6i_expires = expires;
701 rt->rt6i_flags |= RTF_EXPIRES;
702 }
703 }
704 dst_release(&rt->u.dst);
705 }
706
707 in6_ifa_put(ifp);
708}
709
710#ifdef CONFIG_IPV6_PRIVACY
711static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
712{
713 struct inet6_dev *idev = ifp->idev;
714 struct in6_addr addr, *tmpaddr;
715 unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp;
716 int tmp_plen;
717 int ret = 0;
718 int max_addresses;
719
720 write_lock(&idev->lock);
721 if (ift) {
722 spin_lock_bh(&ift->lock);
723 memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8);
724 spin_unlock_bh(&ift->lock);
725 tmpaddr = &addr;
726 } else {
727 tmpaddr = NULL;
728 }
729retry:
730 in6_dev_hold(idev);
731 if (idev->cnf.use_tempaddr <= 0) {
732 write_unlock(&idev->lock);
733 printk(KERN_INFO
734 "ipv6_create_tempaddr(): use_tempaddr is disabled.\n");
735 in6_dev_put(idev);
736 ret = -1;
737 goto out;
738 }
739 spin_lock_bh(&ifp->lock);
740 if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
741 idev->cnf.use_tempaddr = -1; /*XXX*/
742 spin_unlock_bh(&ifp->lock);
743 write_unlock(&idev->lock);
744 printk(KERN_WARNING
745 "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n");
746 in6_dev_put(idev);
747 ret = -1;
748 goto out;
749 }
750 in6_ifa_hold(ifp);
751 memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
752 if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
753 spin_unlock_bh(&ifp->lock);
754 write_unlock(&idev->lock);
755 printk(KERN_WARNING
756 "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n");
757 in6_ifa_put(ifp);
758 in6_dev_put(idev);
759 ret = -1;
760 goto out;
761 }
762 memcpy(&addr.s6_addr[8], idev->rndid, 8);
763 tmp_valid_lft = min_t(__u32,
764 ifp->valid_lft,
765 idev->cnf.temp_valid_lft);
766 tmp_prefered_lft = min_t(__u32,
767 ifp->prefered_lft,
768 idev->cnf.temp_prefered_lft - desync_factor / HZ);
769 tmp_plen = ifp->prefix_len;
770 max_addresses = idev->cnf.max_addresses;
771 tmp_cstamp = ifp->cstamp;
772 tmp_tstamp = ifp->tstamp;
773 spin_unlock_bh(&ifp->lock);
774
775 write_unlock(&idev->lock);
776 ift = !max_addresses ||
777 ipv6_count_addresses(idev) < max_addresses ?
778 ipv6_add_addr(idev, &addr, tmp_plen,
779 ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, IFA_F_TEMPORARY) : NULL;
780 if (!ift || IS_ERR(ift)) {
781 in6_ifa_put(ifp);
782 in6_dev_put(idev);
783 printk(KERN_INFO
784 "ipv6_create_tempaddr(): retry temporary address regeneration.\n");
785 tmpaddr = &addr;
786 write_lock(&idev->lock);
787 goto retry;
788 }
789
790 spin_lock_bh(&ift->lock);
791 ift->ifpub = ifp;
792 ift->valid_lft = tmp_valid_lft;
793 ift->prefered_lft = tmp_prefered_lft;
794 ift->cstamp = tmp_cstamp;
795 ift->tstamp = tmp_tstamp;
796 spin_unlock_bh(&ift->lock);
797
798 addrconf_dad_start(ift, 0);
799 in6_ifa_put(ift);
800 in6_dev_put(idev);
801out:
802 return ret;
803}
804#endif
805
806/*
807 * Choose an appropriate source address
808 * should do:
809 * i) get an address with an appropriate scope
810 * ii) see if there is a specific route for the destination and use
811 * an address of the attached interface
812 * iii) don't use deprecated addresses
813 */
814static int inline ipv6_saddr_pref(const struct inet6_ifaddr *ifp, u8 invpref)
815{
816 int pref;
817 pref = ifp->flags&IFA_F_DEPRECATED ? 0 : 2;
818#ifdef CONFIG_IPV6_PRIVACY
819 pref |= (ifp->flags^invpref)&IFA_F_TEMPORARY ? 0 : 1;
820#endif
821 return pref;
822}
823
824#ifdef CONFIG_IPV6_PRIVACY
825#define IPV6_GET_SADDR_MAXSCORE(score) ((score) == 3)
826#else
827#define IPV6_GET_SADDR_MAXSCORE(score) (score)
828#endif
829
830int ipv6_dev_get_saddr(struct net_device *dev,
831 struct in6_addr *daddr, struct in6_addr *saddr)
832{
833 struct inet6_ifaddr *ifp = NULL;
834 struct inet6_ifaddr *match = NULL;
835 struct inet6_dev *idev;
836 int scope;
837 int err;
838 int hiscore = -1, score;
839
840 scope = ipv6_addr_scope(daddr);
841
842 /*
843 * known dev
844 * search dev and walk through dev addresses
845 */
846
847 if (dev) {
848 if (dev->flags & IFF_LOOPBACK)
849 scope = IFA_HOST;
850
851 read_lock(&addrconf_lock);
852 idev = __in6_dev_get(dev);
853 if (idev) {
854 read_lock_bh(&idev->lock);
855 for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
856 if (ifp->scope == scope) {
857 if (ifp->flags&IFA_F_TENTATIVE)
858 continue;
859#ifdef CONFIG_IPV6_PRIVACY
860 score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
861#else
862 score = ipv6_saddr_pref(ifp, 0);
863#endif
864 if (score <= hiscore)
865 continue;
866
867 if (match)
868 in6_ifa_put(match);
869 match = ifp;
870 hiscore = score;
871 in6_ifa_hold(ifp);
872
873 if (IPV6_GET_SADDR_MAXSCORE(score)) {
874 read_unlock_bh(&idev->lock);
875 read_unlock(&addrconf_lock);
876 goto out;
877 }
878 }
879 }
880 read_unlock_bh(&idev->lock);
881 }
882 read_unlock(&addrconf_lock);
883 }
884
885 if (scope == IFA_LINK)
886 goto out;
887
888 /*
889 * dev == NULL or search failed for specified dev
890 */
891
892 read_lock(&dev_base_lock);
893 read_lock(&addrconf_lock);
894 for (dev = dev_base; dev; dev=dev->next) {
895 idev = __in6_dev_get(dev);
896 if (idev) {
897 read_lock_bh(&idev->lock);
898 for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
899 if (ifp->scope == scope) {
900 if (ifp->flags&IFA_F_TENTATIVE)
901 continue;
902#ifdef CONFIG_IPV6_PRIVACY
903 score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
904#else
905 score = ipv6_saddr_pref(ifp, 0);
906#endif
907 if (score <= hiscore)
908 continue;
909
910 if (match)
911 in6_ifa_put(match);
912 match = ifp;
913 hiscore = score;
914 in6_ifa_hold(ifp);
915
916 if (IPV6_GET_SADDR_MAXSCORE(score)) {
917 read_unlock_bh(&idev->lock);
918 goto out_unlock_base;
919 }
920 }
921 }
922 read_unlock_bh(&idev->lock);
923 }
924 }
925
926out_unlock_base:
927 read_unlock(&addrconf_lock);
928 read_unlock(&dev_base_lock);
929
930out:
931 err = -EADDRNOTAVAIL;
932 if (match) {
933 ipv6_addr_copy(saddr, &match->addr);
934 err = 0;
935 in6_ifa_put(match);
936 }
937
938 return err;
939}
940
941
942int ipv6_get_saddr(struct dst_entry *dst,
943 struct in6_addr *daddr, struct in6_addr *saddr)
944{
945 return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_idev->dev : NULL, daddr, saddr);
946}
947
948
949int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
950{
951 struct inet6_dev *idev;
952 int err = -EADDRNOTAVAIL;
953
954 read_lock(&addrconf_lock);
955 if ((idev = __in6_dev_get(dev)) != NULL) {
956 struct inet6_ifaddr *ifp;
957
958 read_lock_bh(&idev->lock);
959 for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
960 if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
961 ipv6_addr_copy(addr, &ifp->addr);
962 err = 0;
963 break;
964 }
965 }
966 read_unlock_bh(&idev->lock);
967 }
968 read_unlock(&addrconf_lock);
969 return err;
970}
971
972static int ipv6_count_addresses(struct inet6_dev *idev)
973{
974 int cnt = 0;
975 struct inet6_ifaddr *ifp;
976
977 read_lock_bh(&idev->lock);
978 for (ifp=idev->addr_list; ifp; ifp=ifp->if_next)
979 cnt++;
980 read_unlock_bh(&idev->lock);
981 return cnt;
982}
983
984int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
985{
986 struct inet6_ifaddr * ifp;
987 u8 hash = ipv6_addr_hash(addr);
988
989 read_lock_bh(&addrconf_hash_lock);
990 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
991 if (ipv6_addr_equal(&ifp->addr, addr) &&
992 !(ifp->flags&IFA_F_TENTATIVE)) {
993 if (dev == NULL || ifp->idev->dev == dev ||
994 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))
995 break;
996 }
997 }
998 read_unlock_bh(&addrconf_hash_lock);
999 return ifp != NULL;
1000}
1001
1002static
1003int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
1004{
1005 struct inet6_ifaddr * ifp;
1006 u8 hash = ipv6_addr_hash(addr);
1007
1008 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
1009 if (ipv6_addr_equal(&ifp->addr, addr)) {
1010 if (dev == NULL || ifp->idev->dev == dev)
1011 break;
1012 }
1013 }
1014 return ifp != NULL;
1015}
1016
1017struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict)
1018{
1019 struct inet6_ifaddr * ifp;
1020 u8 hash = ipv6_addr_hash(addr);
1021
1022 read_lock_bh(&addrconf_hash_lock);
1023 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
1024 if (ipv6_addr_equal(&ifp->addr, addr)) {
1025 if (dev == NULL || ifp->idev->dev == dev ||
1026 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
1027 in6_ifa_hold(ifp);
1028 break;
1029 }
1030 }
1031 }
1032 read_unlock_bh(&addrconf_hash_lock);
1033
1034 return ifp;
1035}
1036
1037int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
1038{
1039 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
1040 const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2);
1041 u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
1042 u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
1043 int sk_ipv6only = ipv6_only_sock(sk);
1044 int sk2_ipv6only = tcp_v6_ipv6only(sk2);
1045 int addr_type = ipv6_addr_type(sk_rcv_saddr6);
1046 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
1047
1048 if (!sk2_rcv_saddr && !sk_ipv6only)
1049 return 1;
1050
1051 if (addr_type2 == IPV6_ADDR_ANY &&
1052 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
1053 return 1;
1054
1055 if (addr_type == IPV6_ADDR_ANY &&
1056 !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
1057 return 1;
1058
1059 if (sk2_rcv_saddr6 &&
1060 ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
1061 return 1;
1062
1063 if (addr_type == IPV6_ADDR_MAPPED &&
1064 !sk2_ipv6only &&
1065 (!sk2_rcv_saddr || !sk_rcv_saddr || sk_rcv_saddr == sk2_rcv_saddr))
1066 return 1;
1067
1068 return 0;
1069}
1070
1071/* Gets referenced address, destroys ifaddr */
1072
1073void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1074{
1075 if (net_ratelimit())
1076 printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
1077 if (ifp->flags&IFA_F_PERMANENT) {
1078 spin_lock_bh(&ifp->lock);
1079 addrconf_del_timer(ifp);
1080 ifp->flags |= IFA_F_TENTATIVE;
1081 spin_unlock_bh(&ifp->lock);
1082 in6_ifa_put(ifp);
1083#ifdef CONFIG_IPV6_PRIVACY
1084 } else if (ifp->flags&IFA_F_TEMPORARY) {
1085 struct inet6_ifaddr *ifpub;
1086 spin_lock_bh(&ifp->lock);
1087 ifpub = ifp->ifpub;
1088 if (ifpub) {
1089 in6_ifa_hold(ifpub);
1090 spin_unlock_bh(&ifp->lock);
1091 ipv6_create_tempaddr(ifpub, ifp);
1092 in6_ifa_put(ifpub);
1093 } else {
1094 spin_unlock_bh(&ifp->lock);
1095 }
1096 ipv6_del_addr(ifp);
1097#endif
1098 } else
1099 ipv6_del_addr(ifp);
1100}
1101
1102
1103/* Join to solicited addr multicast group. */
1104
1105void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr)
1106{
1107 struct in6_addr maddr;
1108
1109 if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
1110 return;
1111
1112 addrconf_addr_solict_mult(addr, &maddr);
1113 ipv6_dev_mc_inc(dev, &maddr);
1114}
1115
1116void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr)
1117{
1118 struct in6_addr maddr;
1119
1120 if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
1121 return;
1122
1123 addrconf_addr_solict_mult(addr, &maddr);
1124 __ipv6_dev_mc_dec(idev, &maddr);
1125}
1126
1127void addrconf_join_anycast(struct inet6_ifaddr *ifp)
1128{
1129 struct in6_addr addr;
1130 ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
1131 if (ipv6_addr_any(&addr))
1132 return;
1133 ipv6_dev_ac_inc(ifp->idev->dev, &addr);
1134}
1135
1136void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
1137{
1138 struct in6_addr addr;
1139 ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
1140 if (ipv6_addr_any(&addr))
1141 return;
1142 __ipv6_dev_ac_dec(ifp->idev, &addr);
1143}
1144
1145static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
1146{
1147 switch (dev->type) {
1148 case ARPHRD_ETHER:
1149 case ARPHRD_FDDI:
1150 case ARPHRD_IEEE802_TR:
1151 if (dev->addr_len != ETH_ALEN)
1152 return -1;
1153 memcpy(eui, dev->dev_addr, 3);
1154 memcpy(eui + 5, dev->dev_addr + 3, 3);
1155
1156 /*
1157 * The zSeries OSA network cards can be shared among various
1158 * OS instances, but the OSA cards have only one MAC address.
1159 * This leads to duplicate address conflicts in conjunction
1160 * with IPv6 if more than one instance uses the same card.
1161 *
1162 * The driver for these cards can deliver a unique 16-bit
1163 * identifier for each instance sharing the same card. It is
1164 * placed instead of 0xFFFE in the interface identifier. The
1165 * "u" bit of the interface identifier is not inverted in this
1166 * case. Hence the resulting interface identifier has local
1167 * scope according to RFC2373.
1168 */
1169 if (dev->dev_id) {
1170 eui[3] = (dev->dev_id >> 8) & 0xFF;
1171 eui[4] = dev->dev_id & 0xFF;
1172 } else {
1173 eui[3] = 0xFF;
1174 eui[4] = 0xFE;
1175 eui[0] ^= 2;
1176 }
1177 return 0;
1178 case ARPHRD_ARCNET:
1179 /* XXX: inherit EUI-64 from other interface -- yoshfuji */
1180 if (dev->addr_len != ARCNET_ALEN)
1181 return -1;
1182 memset(eui, 0, 7);
1183 eui[7] = *(u8*)dev->dev_addr;
1184 return 0;
1185 case ARPHRD_INFINIBAND:
1186 if (dev->addr_len != INFINIBAND_ALEN)
1187 return -1;
1188 memcpy(eui, dev->dev_addr + 12, 8);
1189 eui[0] |= 2;
1190 return 0;
1191 }
1192 return -1;
1193}
1194
1195static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
1196{
1197 int err = -1;
1198 struct inet6_ifaddr *ifp;
1199
1200 read_lock_bh(&idev->lock);
1201 for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
1202 if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
1203 memcpy(eui, ifp->addr.s6_addr+8, 8);
1204 err = 0;
1205 break;
1206 }
1207 }
1208 read_unlock_bh(&idev->lock);
1209 return err;
1210}
1211
1212#ifdef CONFIG_IPV6_PRIVACY
1213/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
1214static int __ipv6_regen_rndid(struct inet6_dev *idev)
1215{
1216 struct net_device *dev;
1217 struct scatterlist sg[2];
1218
1219 sg[0].page = virt_to_page(idev->entropy);
1220 sg[0].offset = offset_in_page(idev->entropy);
1221 sg[0].length = 8;
1222 sg[1].page = virt_to_page(idev->work_eui64);
1223 sg[1].offset = offset_in_page(idev->work_eui64);
1224 sg[1].length = 8;
1225
1226 dev = idev->dev;
1227
1228 if (ipv6_generate_eui64(idev->work_eui64, dev)) {
1229 printk(KERN_INFO
1230 "__ipv6_regen_rndid(idev=%p): cannot get EUI64 identifier; use random bytes.\n",
1231 idev);
1232 get_random_bytes(idev->work_eui64, sizeof(idev->work_eui64));
1233 }
1234regen:
1235 spin_lock(&md5_tfm_lock);
1236 if (unlikely(md5_tfm == NULL)) {
1237 spin_unlock(&md5_tfm_lock);
1238 return -1;
1239 }
1240 crypto_digest_init(md5_tfm);
1241 crypto_digest_update(md5_tfm, sg, 2);
1242 crypto_digest_final(md5_tfm, idev->work_digest);
1243 spin_unlock(&md5_tfm_lock);
1244
1245 memcpy(idev->rndid, &idev->work_digest[0], 8);
1246 idev->rndid[0] &= ~0x02;
1247 memcpy(idev->entropy, &idev->work_digest[8], 8);
1248
1249 /*
1250 * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>:
1251 * check if generated address is not inappropriate
1252 *
1253 * - Reserved subnet anycast (RFC 2526)
1254 * 11111101 11....11 1xxxxxxx
1255 * - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1
1256 * 00-00-5E-FE-xx-xx-xx-xx
1257 * - value 0
1258 * - XXX: already assigned to an address on the device
1259 */
1260 if (idev->rndid[0] == 0xfd &&
1261 (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff &&
1262 (idev->rndid[7]&0x80))
1263 goto regen;
1264 if ((idev->rndid[0]|idev->rndid[1]) == 0) {
1265 if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe)
1266 goto regen;
1267 if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
1268 goto regen;
1269 }
1270
1271 return 0;
1272}
1273
1274static void ipv6_regen_rndid(unsigned long data)
1275{
1276 struct inet6_dev *idev = (struct inet6_dev *) data;
1277 unsigned long expires;
1278
1279 read_lock_bh(&addrconf_lock);
1280 write_lock_bh(&idev->lock);
1281
1282 if (idev->dead)
1283 goto out;
1284
1285 if (__ipv6_regen_rndid(idev) < 0)
1286 goto out;
1287
1288 expires = jiffies +
1289 idev->cnf.temp_prefered_lft * HZ -
1290 idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor;
1291 if (time_before(expires, jiffies)) {
1292 printk(KERN_WARNING
1293 "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
1294 idev->dev->name);
1295 goto out;
1296 }
1297
1298 if (!mod_timer(&idev->regen_timer, expires))
1299 in6_dev_hold(idev);
1300
1301out:
1302 write_unlock_bh(&idev->lock);
1303 read_unlock_bh(&addrconf_lock);
1304 in6_dev_put(idev);
1305}
1306
1307static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) {
1308 int ret = 0;
1309
1310 if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
1311 ret = __ipv6_regen_rndid(idev);
1312 return ret;
1313}
1314#endif
1315
1316/*
1317 * Add prefix route.
1318 */
1319
1320static void
1321addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
1322 unsigned long expires, unsigned flags)
1323{
1324 struct in6_rtmsg rtmsg;
1325
1326 memset(&rtmsg, 0, sizeof(rtmsg));
1327 ipv6_addr_copy(&rtmsg.rtmsg_dst, pfx);
1328 rtmsg.rtmsg_dst_len = plen;
1329 rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
1330 rtmsg.rtmsg_ifindex = dev->ifindex;
1331 rtmsg.rtmsg_info = expires;
1332 rtmsg.rtmsg_flags = RTF_UP|flags;
1333 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1334
1335 /* Prevent useless cloning on PtP SIT.
1336 This thing is done here expecting that the whole
1337 class of non-broadcast devices need not cloning.
1338 */
1339 if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
1340 rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
1341
1342 ip6_route_add(&rtmsg, NULL, NULL);
1343}
1344
1345/* Create "default" multicast route to the interface */
1346
1347static void addrconf_add_mroute(struct net_device *dev)
1348{
1349 struct in6_rtmsg rtmsg;
1350
1351 memset(&rtmsg, 0, sizeof(rtmsg));
1352 ipv6_addr_set(&rtmsg.rtmsg_dst,
1353 htonl(0xFF000000), 0, 0, 0);
1354 rtmsg.rtmsg_dst_len = 8;
1355 rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
1356 rtmsg.rtmsg_ifindex = dev->ifindex;
1357 rtmsg.rtmsg_flags = RTF_UP;
1358 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1359 ip6_route_add(&rtmsg, NULL, NULL);
1360}
1361
1362static void sit_route_add(struct net_device *dev)
1363{
1364 struct in6_rtmsg rtmsg;
1365
1366 memset(&rtmsg, 0, sizeof(rtmsg));
1367
1368 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1369 rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
1370
1371 /* prefix length - 96 bits "::d.d.d.d" */
1372 rtmsg.rtmsg_dst_len = 96;
1373 rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP;
1374 rtmsg.rtmsg_ifindex = dev->ifindex;
1375
1376 ip6_route_add(&rtmsg, NULL, NULL);
1377}
1378
1379static void addrconf_add_lroute(struct net_device *dev)
1380{
1381 struct in6_addr addr;
1382
1383 ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
1384 addrconf_prefix_route(&addr, 64, dev, 0, 0);
1385}
1386
1387static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
1388{
1389 struct inet6_dev *idev;
1390
1391 ASSERT_RTNL();
1392
1393 if ((idev = ipv6_find_idev(dev)) == NULL)
1394 return NULL;
1395
1396 /* Add default multicast route */
1397 addrconf_add_mroute(dev);
1398
1399 /* Add link local route */
1400 addrconf_add_lroute(dev);
1401 return idev;
1402}
1403
1404void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1405{
1406 struct prefix_info *pinfo;
1407 __u32 valid_lft;
1408 __u32 prefered_lft;
1409 int addr_type;
1410 unsigned long rt_expires;
1411 struct inet6_dev *in6_dev;
1412
1413 pinfo = (struct prefix_info *) opt;
1414
1415 if (len < sizeof(struct prefix_info)) {
1416 ADBG(("addrconf: prefix option too short\n"));
1417 return;
1418 }
1419
1420 /*
1421 * Validation checks ([ADDRCONF], page 19)
1422 */
1423
1424 addr_type = ipv6_addr_type(&pinfo->prefix);
1425
1426 if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL))
1427 return;
1428
1429 valid_lft = ntohl(pinfo->valid);
1430 prefered_lft = ntohl(pinfo->prefered);
1431
1432 if (prefered_lft > valid_lft) {
1433 if (net_ratelimit())
1434 printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n");
1435 return;
1436 }
1437
1438 in6_dev = in6_dev_get(dev);
1439
1440 if (in6_dev == NULL) {
1441 if (net_ratelimit())
1442 printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name);
1443 return;
1444 }
1445
1446 /*
1447 * Two things going on here:
1448 * 1) Add routes for on-link prefixes
1449 * 2) Configure prefixes with the auto flag set
1450 */
1451
1452 /* Avoid arithmetic overflow. Really, we could
1453 save rt_expires in seconds, likely valid_lft,
1454 but it would require division in fib gc, that it
1455 not good.
1456 */
1457 if (valid_lft >= 0x7FFFFFFF/HZ)
1458 rt_expires = 0;
1459 else
1460 rt_expires = jiffies + valid_lft * HZ;
1461
1462 if (pinfo->onlink) {
1463 struct rt6_info *rt;
1464 rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1);
1465
1466 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
1467 if (rt->rt6i_flags&RTF_EXPIRES) {
1468 if (valid_lft == 0) {
1469 ip6_del_rt(rt, NULL, NULL);
1470 rt = NULL;
1471 } else {
1472 rt->rt6i_expires = rt_expires;
1473 }
1474 }
1475 } else if (valid_lft) {
1476 addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
1477 dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
1478 }
1479 if (rt)
1480 dst_release(&rt->u.dst);
1481 }
1482
1483 /* Try to figure out our local address for this prefix */
1484
1485 if (pinfo->autoconf && in6_dev->cnf.autoconf) {
1486 struct inet6_ifaddr * ifp;
1487 struct in6_addr addr;
1488 int create = 0, update_lft = 0;
1489
1490 if (pinfo->prefix_len == 64) {
1491 memcpy(&addr, &pinfo->prefix, 8);
1492 if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
1493 ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
1494 in6_dev_put(in6_dev);
1495 return;
1496 }
1497 goto ok;
1498 }
1499 if (net_ratelimit())
1500 printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n",
1501 pinfo->prefix_len);
1502 in6_dev_put(in6_dev);
1503 return;
1504
1505ok:
1506
1507 ifp = ipv6_get_ifaddr(&addr, dev, 1);
1508
1509 if (ifp == NULL && valid_lft) {
1510 int max_addresses = in6_dev->cnf.max_addresses;
1511
1512 /* Do not allow to create too much of autoconfigured
1513 * addresses; this would be too easy way to crash kernel.
1514 */
1515 if (!max_addresses ||
1516 ipv6_count_addresses(in6_dev) < max_addresses)
1517 ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
1518 addr_type&IPV6_ADDR_SCOPE_MASK, 0);
1519
1520 if (!ifp || IS_ERR(ifp)) {
1521 in6_dev_put(in6_dev);
1522 return;
1523 }
1524
1525 update_lft = create = 1;
1526 ifp->cstamp = jiffies;
1527 addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT);
1528 }
1529
1530 if (ifp) {
1531 int flags;
1532 unsigned long now;
1533#ifdef CONFIG_IPV6_PRIVACY
1534 struct inet6_ifaddr *ift;
1535#endif
1536 u32 stored_lft;
1537
1538 /* update lifetime (RFC2462 5.5.3 e) */
1539 spin_lock(&ifp->lock);
1540 now = jiffies;
1541 if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
1542 stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
1543 else
1544 stored_lft = 0;
1545 if (!update_lft && stored_lft) {
1546 if (valid_lft > MIN_VALID_LIFETIME ||
1547 valid_lft > stored_lft)
1548 update_lft = 1;
1549 else if (stored_lft <= MIN_VALID_LIFETIME) {
1550 /* valid_lft <= stored_lft is always true */
1551 /* XXX: IPsec */
1552 update_lft = 0;
1553 } else {
1554 valid_lft = MIN_VALID_LIFETIME;
1555 if (valid_lft < prefered_lft)
1556 prefered_lft = valid_lft;
1557 update_lft = 1;
1558 }
1559 }
1560
1561 if (update_lft) {
1562 ifp->valid_lft = valid_lft;
1563 ifp->prefered_lft = prefered_lft;
1564 ifp->tstamp = now;
1565 flags = ifp->flags;
1566 ifp->flags &= ~IFA_F_DEPRECATED;
1567 spin_unlock(&ifp->lock);
1568
1569 if (!(flags&IFA_F_TENTATIVE))
1570 ipv6_ifa_notify(0, ifp);
1571 } else
1572 spin_unlock(&ifp->lock);
1573
1574#ifdef CONFIG_IPV6_PRIVACY
1575 read_lock_bh(&in6_dev->lock);
1576 /* update all temporary addresses in the list */
1577 for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) {
1578 /*
1579 * When adjusting the lifetimes of an existing
1580 * temporary address, only lower the lifetimes.
1581 * Implementations must not increase the
1582 * lifetimes of an existing temporary address
1583 * when processing a Prefix Information Option.
1584 */
1585 spin_lock(&ift->lock);
1586 flags = ift->flags;
1587 if (ift->valid_lft > valid_lft &&
1588 ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ)
1589 ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ;
1590 if (ift->prefered_lft > prefered_lft &&
1591 ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ)
1592 ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ;
1593 spin_unlock(&ift->lock);
1594 if (!(flags&IFA_F_TENTATIVE))
1595 ipv6_ifa_notify(0, ift);
1596 }
1597
1598 if (create && in6_dev->cnf.use_tempaddr > 0) {
1599 /*
1600 * When a new public address is created as described in [ADDRCONF],
1601 * also create a new temporary address.
1602 */
1603 read_unlock_bh(&in6_dev->lock);
1604 ipv6_create_tempaddr(ifp, NULL);
1605 } else {
1606 read_unlock_bh(&in6_dev->lock);
1607 }
1608#endif
1609 in6_ifa_put(ifp);
1610 addrconf_verify(0);
1611 }
1612 }
1613 inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
1614 in6_dev_put(in6_dev);
1615}
1616
1617/*
1618 * Set destination address.
1619 * Special case for SIT interfaces where we create a new "virtual"
1620 * device.
1621 */
1622int addrconf_set_dstaddr(void __user *arg)
1623{
1624 struct in6_ifreq ireq;
1625 struct net_device *dev;
1626 int err = -EINVAL;
1627
1628 rtnl_lock();
1629
1630 err = -EFAULT;
1631 if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
1632 goto err_exit;
1633
1634 dev = __dev_get_by_index(ireq.ifr6_ifindex);
1635
1636 err = -ENODEV;
1637 if (dev == NULL)
1638 goto err_exit;
1639
1640 if (dev->type == ARPHRD_SIT) {
1641 struct ifreq ifr;
1642 mm_segment_t oldfs;
1643 struct ip_tunnel_parm p;
1644
1645 err = -EADDRNOTAVAIL;
1646 if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4))
1647 goto err_exit;
1648
1649 memset(&p, 0, sizeof(p));
1650 p.iph.daddr = ireq.ifr6_addr.s6_addr32[3];
1651 p.iph.saddr = 0;
1652 p.iph.version = 4;
1653 p.iph.ihl = 5;
1654 p.iph.protocol = IPPROTO_IPV6;
1655 p.iph.ttl = 64;
1656 ifr.ifr_ifru.ifru_data = (void __user *)&p;
1657
1658 oldfs = get_fs(); set_fs(KERNEL_DS);
1659 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
1660 set_fs(oldfs);
1661
1662 if (err == 0) {
1663 err = -ENOBUFS;
1664 if ((dev = __dev_get_by_name(p.name)) == NULL)
1665 goto err_exit;
1666 err = dev_open(dev);
1667 }
1668 }
1669
1670err_exit:
1671 rtnl_unlock();
1672 return err;
1673}
1674
1675/*
1676 * Manual configuration of address on an interface
1677 */
1678static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen)
1679{
1680 struct inet6_ifaddr *ifp;
1681 struct inet6_dev *idev;
1682 struct net_device *dev;
1683 int scope;
1684
1685 ASSERT_RTNL();
1686
1687 if ((dev = __dev_get_by_index(ifindex)) == NULL)
1688 return -ENODEV;
1689
1690 if (!(dev->flags&IFF_UP))
1691 return -ENETDOWN;
1692
1693 if ((idev = addrconf_add_dev(dev)) == NULL)
1694 return -ENOBUFS;
1695
1696 scope = ipv6_addr_scope(pfx);
1697
1698 ifp = ipv6_add_addr(idev, pfx, plen, scope, IFA_F_PERMANENT);
1699 if (!IS_ERR(ifp)) {
1700 addrconf_dad_start(ifp, 0);
1701 in6_ifa_put(ifp);
1702 return 0;
1703 }
1704
1705 return PTR_ERR(ifp);
1706}
1707
1708static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
1709{
1710 struct inet6_ifaddr *ifp;
1711 struct inet6_dev *idev;
1712 struct net_device *dev;
1713
1714 if ((dev = __dev_get_by_index(ifindex)) == NULL)
1715 return -ENODEV;
1716
1717 if ((idev = __in6_dev_get(dev)) == NULL)
1718 return -ENXIO;
1719
1720 read_lock_bh(&idev->lock);
1721 for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) {
1722 if (ifp->prefix_len == plen &&
1723 ipv6_addr_equal(pfx, &ifp->addr)) {
1724 in6_ifa_hold(ifp);
1725 read_unlock_bh(&idev->lock);
1726
1727 ipv6_del_addr(ifp);
1728
1729 /* If the last address is deleted administratively,
1730 disable IPv6 on this interface.
1731 */
1732 if (idev->addr_list == NULL)
1733 addrconf_ifdown(idev->dev, 1);
1734 return 0;
1735 }
1736 }
1737 read_unlock_bh(&idev->lock);
1738 return -EADDRNOTAVAIL;
1739}
1740
1741
1742int addrconf_add_ifaddr(void __user *arg)
1743{
1744 struct in6_ifreq ireq;
1745 int err;
1746
1747 if (!capable(CAP_NET_ADMIN))
1748 return -EPERM;
1749
1750 if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
1751 return -EFAULT;
1752
1753 rtnl_lock();
1754 err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
1755 rtnl_unlock();
1756 return err;
1757}
1758
1759int addrconf_del_ifaddr(void __user *arg)
1760{
1761 struct in6_ifreq ireq;
1762 int err;
1763
1764 if (!capable(CAP_NET_ADMIN))
1765 return -EPERM;
1766
1767 if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
1768 return -EFAULT;
1769
1770 rtnl_lock();
1771 err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
1772 rtnl_unlock();
1773 return err;
1774}
1775
1776static void sit_add_v4_addrs(struct inet6_dev *idev)
1777{
1778 struct inet6_ifaddr * ifp;
1779 struct in6_addr addr;
1780 struct net_device *dev;
1781 int scope;
1782
1783 ASSERT_RTNL();
1784
1785 memset(&addr, 0, sizeof(struct in6_addr));
1786 memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
1787
1788 if (idev->dev->flags&IFF_POINTOPOINT) {
1789 addr.s6_addr32[0] = htonl(0xfe800000);
1790 scope = IFA_LINK;
1791 } else {
1792 scope = IPV6_ADDR_COMPATv4;
1793 }
1794
1795 if (addr.s6_addr32[3]) {
1796 ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT);
1797 if (!IS_ERR(ifp)) {
1798 spin_lock_bh(&ifp->lock);
1799 ifp->flags &= ~IFA_F_TENTATIVE;
1800 spin_unlock_bh(&ifp->lock);
1801 ipv6_ifa_notify(RTM_NEWADDR, ifp);
1802 in6_ifa_put(ifp);
1803 }
1804 return;
1805 }
1806
1807 for (dev = dev_base; dev != NULL; dev = dev->next) {
1808 struct in_device * in_dev = __in_dev_get(dev);
1809 if (in_dev && (dev->flags & IFF_UP)) {
1810 struct in_ifaddr * ifa;
1811
1812 int flag = scope;
1813
1814 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1815 int plen;
1816
1817 addr.s6_addr32[3] = ifa->ifa_local;
1818
1819 if (ifa->ifa_scope == RT_SCOPE_LINK)
1820 continue;
1821 if (ifa->ifa_scope >= RT_SCOPE_HOST) {
1822 if (idev->dev->flags&IFF_POINTOPOINT)
1823 continue;
1824 flag |= IFA_HOST;
1825 }
1826 if (idev->dev->flags&IFF_POINTOPOINT)
1827 plen = 64;
1828 else
1829 plen = 96;
1830
1831 ifp = ipv6_add_addr(idev, &addr, plen, flag,
1832 IFA_F_PERMANENT);
1833 if (!IS_ERR(ifp)) {
1834 spin_lock_bh(&ifp->lock);
1835 ifp->flags &= ~IFA_F_TENTATIVE;
1836 spin_unlock_bh(&ifp->lock);
1837 ipv6_ifa_notify(RTM_NEWADDR, ifp);
1838 in6_ifa_put(ifp);
1839 }
1840 }
1841 }
1842 }
1843}
1844
1845static void init_loopback(struct net_device *dev)
1846{
1847 struct inet6_dev *idev;
1848 struct inet6_ifaddr * ifp;
1849
1850 /* ::1 */
1851
1852 ASSERT_RTNL();
1853
1854 if ((idev = ipv6_find_idev(dev)) == NULL) {
1855 printk(KERN_DEBUG "init loopback: add_dev failed\n");
1856 return;
1857 }
1858
1859 ifp = ipv6_add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFA_F_PERMANENT);
1860 if (!IS_ERR(ifp)) {
1861 spin_lock_bh(&ifp->lock);
1862 ifp->flags &= ~IFA_F_TENTATIVE;
1863 spin_unlock_bh(&ifp->lock);
1864 ipv6_ifa_notify(RTM_NEWADDR, ifp);
1865 in6_ifa_put(ifp);
1866 }
1867}
1868
1869static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
1870{
1871 struct inet6_ifaddr * ifp;
1872
1873 ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, IFA_F_PERMANENT);
1874 if (!IS_ERR(ifp)) {
1875 addrconf_dad_start(ifp, 0);
1876 in6_ifa_put(ifp);
1877 }
1878}
1879
1880static void addrconf_dev_config(struct net_device *dev)
1881{
1882 struct in6_addr addr;
1883 struct inet6_dev * idev;
1884
1885 ASSERT_RTNL();
1886
1887 if ((dev->type != ARPHRD_ETHER) &&
1888 (dev->type != ARPHRD_FDDI) &&
1889 (dev->type != ARPHRD_IEEE802_TR) &&
1890 (dev->type != ARPHRD_ARCNET) &&
1891 (dev->type != ARPHRD_INFINIBAND)) {
1892 /* Alas, we support only Ethernet autoconfiguration. */
1893 return;
1894 }
1895
1896 idev = addrconf_add_dev(dev);
1897 if (idev == NULL)
1898 return;
1899
1900 memset(&addr, 0, sizeof(struct in6_addr));
1901 addr.s6_addr32[0] = htonl(0xFE800000);
1902
1903 if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0)
1904 addrconf_add_linklocal(idev, &addr);
1905}
1906
1907static void addrconf_sit_config(struct net_device *dev)
1908{
1909 struct inet6_dev *idev;
1910
1911 ASSERT_RTNL();
1912
1913 /*
1914 * Configure the tunnel with one of our IPv4
1915 * addresses... we should configure all of
1916 * our v4 addrs in the tunnel
1917 */
1918
1919 if ((idev = ipv6_find_idev(dev)) == NULL) {
1920 printk(KERN_DEBUG "init sit: add_dev failed\n");
1921 return;
1922 }
1923
1924 sit_add_v4_addrs(idev);
1925
1926 if (dev->flags&IFF_POINTOPOINT) {
1927 addrconf_add_mroute(dev);
1928 addrconf_add_lroute(dev);
1929 } else
1930 sit_route_add(dev);
1931}
1932
1933static inline int
1934ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
1935{
1936 struct in6_addr lladdr;
1937
1938 if (!ipv6_get_lladdr(link_dev, &lladdr)) {
1939 addrconf_add_linklocal(idev, &lladdr);
1940 return 0;
1941 }
1942 return -1;
1943}
1944
1945static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
1946{
1947 struct net_device *link_dev;
1948
1949 /* first try to inherit the link-local address from the link device */
1950 if (idev->dev->iflink &&
1951 (link_dev = __dev_get_by_index(idev->dev->iflink))) {
1952 if (!ipv6_inherit_linklocal(idev, link_dev))
1953 return;
1954 }
1955 /* then try to inherit it from any device */
1956 for (link_dev = dev_base; link_dev; link_dev = link_dev->next) {
1957 if (!ipv6_inherit_linklocal(idev, link_dev))
1958 return;
1959 }
1960 printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n");
1961}
1962
1963/*
1964 * Autoconfigure tunnel with a link-local address so routing protocols,
1965 * DHCPv6, MLD etc. can be run over the virtual link
1966 */
1967
1968static void addrconf_ip6_tnl_config(struct net_device *dev)
1969{
1970 struct inet6_dev *idev;
1971
1972 ASSERT_RTNL();
1973
1974 if ((idev = addrconf_add_dev(dev)) == NULL) {
1975 printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
1976 return;
1977 }
1978 ip6_tnl_add_linklocal(idev);
1979 addrconf_add_mroute(dev);
1980}
1981
1982static int addrconf_notify(struct notifier_block *this, unsigned long event,
1983 void * data)
1984{
1985 struct net_device *dev = (struct net_device *) data;
1986 struct inet6_dev *idev = __in6_dev_get(dev);
1987
1988 switch(event) {
1989 case NETDEV_UP:
1990 switch(dev->type) {
1991 case ARPHRD_SIT:
1992 addrconf_sit_config(dev);
1993 break;
1994 case ARPHRD_TUNNEL6:
1995 addrconf_ip6_tnl_config(dev);
1996 break;
1997 case ARPHRD_LOOPBACK:
1998 init_loopback(dev);
1999 break;
2000
2001 default:
2002 addrconf_dev_config(dev);
2003 break;
2004 };
2005 if (idev) {
2006 /* If the MTU changed during the interface down, when the
2007 interface up, the changed MTU must be reflected in the
2008 idev as well as routers.
2009 */
2010 if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) {
2011 rt6_mtu_change(dev, dev->mtu);
2012 idev->cnf.mtu6 = dev->mtu;
2013 }
2014 idev->tstamp = jiffies;
2015 inet6_ifinfo_notify(RTM_NEWLINK, idev);
2016 /* If the changed mtu during down is lower than IPV6_MIN_MTU
2017 stop IPv6 on this interface.
2018 */
2019 if (dev->mtu < IPV6_MIN_MTU)
2020 addrconf_ifdown(dev, event != NETDEV_DOWN);
2021 }
2022 break;
2023
2024 case NETDEV_CHANGEMTU:
2025 if ( idev && dev->mtu >= IPV6_MIN_MTU) {
2026 rt6_mtu_change(dev, dev->mtu);
2027 idev->cnf.mtu6 = dev->mtu;
2028 break;
2029 }
2030
2031 /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */
2032
2033 case NETDEV_DOWN:
2034 case NETDEV_UNREGISTER:
2035 /*
2036 * Remove all addresses from this interface.
2037 */
2038 addrconf_ifdown(dev, event != NETDEV_DOWN);
2039 break;
2040 case NETDEV_CHANGE:
2041 break;
2042 case NETDEV_CHANGENAME:
2043#ifdef CONFIG_SYSCTL
2044 if (idev) {
2045 addrconf_sysctl_unregister(&idev->cnf);
2046 neigh_sysctl_unregister(idev->nd_parms);
2047 neigh_sysctl_register(dev, idev->nd_parms,
2048 NET_IPV6, NET_IPV6_NEIGH, "ipv6",
2049 &ndisc_ifinfo_sysctl_change,
2050 NULL);
2051 addrconf_sysctl_register(idev, &idev->cnf);
2052 }
2053#endif
2054 break;
2055 };
2056
2057 return NOTIFY_OK;
2058}
2059
2060/*
2061 * addrconf module should be notified of a device going up
2062 */
2063static struct notifier_block ipv6_dev_notf = {
2064 .notifier_call = addrconf_notify,
2065 .priority = 0
2066};
2067
2068static int addrconf_ifdown(struct net_device *dev, int how)
2069{
2070 struct inet6_dev *idev;
2071 struct inet6_ifaddr *ifa, **bifa;
2072 int i;
2073
2074 ASSERT_RTNL();
2075
2076 if (dev == &loopback_dev && how == 1)
2077 how = 0;
2078
2079 rt6_ifdown(dev);
2080 neigh_ifdown(&nd_tbl, dev);
2081
2082 idev = __in6_dev_get(dev);
2083 if (idev == NULL)
2084 return -ENODEV;
2085
2086 /* Step 1: remove reference to ipv6 device from parent device.
2087 Do not dev_put!
2088 */
2089 if (how == 1) {
2090 write_lock_bh(&addrconf_lock);
2091 dev->ip6_ptr = NULL;
2092 idev->dead = 1;
2093 write_unlock_bh(&addrconf_lock);
2094
2095 /* Step 1.5: remove snmp6 entry */
2096 snmp6_unregister_dev(idev);
2097
2098 }
2099
2100 /* Step 2: clear hash table */
2101 for (i=0; i<IN6_ADDR_HSIZE; i++) {
2102 bifa = &inet6_addr_lst[i];
2103
2104 write_lock_bh(&addrconf_hash_lock);
2105 while ((ifa = *bifa) != NULL) {
2106 if (ifa->idev == idev) {
2107 *bifa = ifa->lst_next;
2108 ifa->lst_next = NULL;
2109 addrconf_del_timer(ifa);
2110 in6_ifa_put(ifa);
2111 continue;
2112 }
2113 bifa = &ifa->lst_next;
2114 }
2115 write_unlock_bh(&addrconf_hash_lock);
2116 }
2117
2118 write_lock_bh(&idev->lock);
2119
2120 /* Step 3: clear flags for stateless addrconf */
2121 if (how != 1)
2122 idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD);
2123
2124 /* Step 4: clear address list */
2125#ifdef CONFIG_IPV6_PRIVACY
2126 if (how == 1 && del_timer(&idev->regen_timer))
2127 in6_dev_put(idev);
2128
2129 /* clear tempaddr list */
2130 while ((ifa = idev->tempaddr_list) != NULL) {
2131 idev->tempaddr_list = ifa->tmp_next;
2132 ifa->tmp_next = NULL;
2133 ifa->dead = 1;
2134 write_unlock_bh(&idev->lock);
2135 spin_lock_bh(&ifa->lock);
2136
2137 if (ifa->ifpub) {
2138 in6_ifa_put(ifa->ifpub);
2139 ifa->ifpub = NULL;
2140 }
2141 spin_unlock_bh(&ifa->lock);
2142 in6_ifa_put(ifa);
2143 write_lock_bh(&idev->lock);
2144 }
2145#endif
2146 while ((ifa = idev->addr_list) != NULL) {
2147 idev->addr_list = ifa->if_next;
2148 ifa->if_next = NULL;
2149 ifa->dead = 1;
2150 addrconf_del_timer(ifa);
2151 write_unlock_bh(&idev->lock);
2152
2153 __ipv6_ifa_notify(RTM_DELADDR, ifa);
2154 in6_ifa_put(ifa);
2155
2156 write_lock_bh(&idev->lock);
2157 }
2158 write_unlock_bh(&idev->lock);
2159
2160 /* Step 5: Discard multicast list */
2161
2162 if (how == 1)
2163 ipv6_mc_destroy_dev(idev);
2164 else
2165 ipv6_mc_down(idev);
2166
2167 /* Step 5: netlink notification of this interface */
2168 idev->tstamp = jiffies;
2169 inet6_ifinfo_notify(RTM_NEWLINK, idev);
2170
2171 /* Shot the device (if unregistered) */
2172
2173 if (how == 1) {
2174#ifdef CONFIG_SYSCTL
2175 addrconf_sysctl_unregister(&idev->cnf);
2176 neigh_sysctl_unregister(idev->nd_parms);
2177#endif
2178 neigh_parms_release(&nd_tbl, idev->nd_parms);
2179 neigh_ifdown(&nd_tbl, dev);
2180 in6_dev_put(idev);
2181 }
2182 return 0;
2183}
2184
2185static void addrconf_rs_timer(unsigned long data)
2186{
2187 struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
2188
2189 if (ifp->idev->cnf.forwarding)
2190 goto out;
2191
2192 if (ifp->idev->if_flags & IF_RA_RCVD) {
2193 /*
2194 * Announcement received after solicitation
2195 * was sent
2196 */
2197 goto out;
2198 }
2199
2200 spin_lock(&ifp->lock);
2201 if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) {
2202 struct in6_addr all_routers;
2203
2204 /* The wait after the last probe can be shorter */
2205 addrconf_mod_timer(ifp, AC_RS,
2206 (ifp->probes == ifp->idev->cnf.rtr_solicits) ?
2207 ifp->idev->cnf.rtr_solicit_delay :
2208 ifp->idev->cnf.rtr_solicit_interval);
2209 spin_unlock(&ifp->lock);
2210
2211 ipv6_addr_all_routers(&all_routers);
2212
2213 ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
2214 } else {
2215 spin_unlock(&ifp->lock);
2216 /*
2217 * Note: we do not support deprecated "all on-link"
2218 * assumption any longer.
2219 */
2220 printk(KERN_DEBUG "%s: no IPv6 routers present\n",
2221 ifp->idev->dev->name);
2222 }
2223
2224out:
2225 in6_ifa_put(ifp);
2226}
2227
2228/*
2229 * Duplicate Address Detection
2230 */
2231static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags)
2232{
2233 struct inet6_dev *idev = ifp->idev;
2234 struct net_device *dev = idev->dev;
2235 unsigned long rand_num;
2236
2237 addrconf_join_solict(dev, &ifp->addr);
2238
2239 if (ifp->prefix_len != 128 && (ifp->flags&IFA_F_PERMANENT))
2240 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 0,
2241 flags);
2242
2243 net_srandom(ifp->addr.s6_addr32[3]);
2244 rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
2245
2246 read_lock_bh(&idev->lock);
2247 if (ifp->dead)
2248 goto out;
2249 spin_lock_bh(&ifp->lock);
2250
2251 if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
2252 !(ifp->flags&IFA_F_TENTATIVE)) {
2253 ifp->flags &= ~IFA_F_TENTATIVE;
2254 spin_unlock_bh(&ifp->lock);
2255 read_unlock_bh(&idev->lock);
2256
2257 addrconf_dad_completed(ifp);
2258 return;
2259 }
2260
2261 ifp->probes = idev->cnf.dad_transmits;
2262 addrconf_mod_timer(ifp, AC_DAD, rand_num);
2263
2264 spin_unlock_bh(&ifp->lock);
2265out:
2266 read_unlock_bh(&idev->lock);
2267}
2268
2269static void addrconf_dad_timer(unsigned long data)
2270{
2271 struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
2272 struct inet6_dev *idev = ifp->idev;
2273 struct in6_addr unspec;
2274 struct in6_addr mcaddr;
2275
2276 read_lock_bh(&idev->lock);
2277 if (idev->dead) {
2278 read_unlock_bh(&idev->lock);
2279 goto out;
2280 }
2281 spin_lock_bh(&ifp->lock);
2282 if (ifp->probes == 0) {
2283 /*
2284 * DAD was successful
2285 */
2286
2287 ifp->flags &= ~IFA_F_TENTATIVE;
2288 spin_unlock_bh(&ifp->lock);
2289 read_unlock_bh(&idev->lock);
2290
2291 addrconf_dad_completed(ifp);
2292
2293 goto out;
2294 }
2295
2296 ifp->probes--;
2297 addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
2298 spin_unlock_bh(&ifp->lock);
2299 read_unlock_bh(&idev->lock);
2300
2301 /* send a neighbour solicitation for our addr */
2302 memset(&unspec, 0, sizeof(unspec));
2303 addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
2304 ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
2305out:
2306 in6_ifa_put(ifp);
2307}
2308
2309static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
2310{
2311 struct net_device * dev = ifp->idev->dev;
2312
2313 /*
2314 * Configure the address for reception. Now it is valid.
2315 */
2316
2317 ipv6_ifa_notify(RTM_NEWADDR, ifp);
2318
2319 /* If added prefix is link local and forwarding is off,
2320 start sending router solicitations.
2321 */
2322
2323 if (ifp->idev->cnf.forwarding == 0 &&
2324 ifp->idev->cnf.rtr_solicits > 0 &&
2325 (dev->flags&IFF_LOOPBACK) == 0 &&
2326 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
2327 struct in6_addr all_routers;
2328
2329 ipv6_addr_all_routers(&all_routers);
2330
2331 /*
2332 * If a host as already performed a random delay
2333 * [...] as part of DAD [...] there is no need
2334 * to delay again before sending the first RS
2335 */
2336 ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
2337
2338 spin_lock_bh(&ifp->lock);
2339 ifp->probes = 1;
2340 ifp->idev->if_flags |= IF_RS_SENT;
2341 addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval);
2342 spin_unlock_bh(&ifp->lock);
2343 }
2344}
2345
2346#ifdef CONFIG_PROC_FS
2347struct if6_iter_state {
2348 int bucket;
2349};
2350
2351static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
2352{
2353 struct inet6_ifaddr *ifa = NULL;
2354 struct if6_iter_state *state = seq->private;
2355
2356 for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
2357 ifa = inet6_addr_lst[state->bucket];
2358 if (ifa)
2359 break;
2360 }
2361 return ifa;
2362}
2363
2364static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
2365{
2366 struct if6_iter_state *state = seq->private;
2367
2368 ifa = ifa->lst_next;
2369try_again:
2370 if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
2371 ifa = inet6_addr_lst[state->bucket];
2372 goto try_again;
2373 }
2374 return ifa;
2375}
2376
2377static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
2378{
2379 struct inet6_ifaddr *ifa = if6_get_first(seq);
2380
2381 if (ifa)
2382 while(pos && (ifa = if6_get_next(seq, ifa)) != NULL)
2383 --pos;
2384 return pos ? NULL : ifa;
2385}
2386
2387static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
2388{
2389 read_lock_bh(&addrconf_hash_lock);
2390 return if6_get_idx(seq, *pos);
2391}
2392
2393static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2394{
2395 struct inet6_ifaddr *ifa;
2396
2397 ifa = if6_get_next(seq, v);
2398 ++*pos;
2399 return ifa;
2400}
2401
2402static void if6_seq_stop(struct seq_file *seq, void *v)
2403{
2404 read_unlock_bh(&addrconf_hash_lock);
2405}
2406
2407static int if6_seq_show(struct seq_file *seq, void *v)
2408{
2409 struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
2410 seq_printf(seq,
2411 "%04x%04x%04x%04x%04x%04x%04x%04x %02x %02x %02x %02x %8s\n",
2412 NIP6(ifp->addr),
2413 ifp->idev->dev->ifindex,
2414 ifp->prefix_len,
2415 ifp->scope,
2416 ifp->flags,
2417 ifp->idev->dev->name);
2418 return 0;
2419}
2420
2421static struct seq_operations if6_seq_ops = {
2422 .start = if6_seq_start,
2423 .next = if6_seq_next,
2424 .show = if6_seq_show,
2425 .stop = if6_seq_stop,
2426};
2427
2428static int if6_seq_open(struct inode *inode, struct file *file)
2429{
2430 struct seq_file *seq;
2431 int rc = -ENOMEM;
2432 struct if6_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
2433
2434 if (!s)
2435 goto out;
2436 memset(s, 0, sizeof(*s));
2437
2438 rc = seq_open(file, &if6_seq_ops);
2439 if (rc)
2440 goto out_kfree;
2441
2442 seq = file->private_data;
2443 seq->private = s;
2444out:
2445 return rc;
2446out_kfree:
2447 kfree(s);
2448 goto out;
2449}
2450
2451static struct file_operations if6_fops = {
2452 .owner = THIS_MODULE,
2453 .open = if6_seq_open,
2454 .read = seq_read,
2455 .llseek = seq_lseek,
2456 .release = seq_release_private,
2457};
2458
2459int __init if6_proc_init(void)
2460{
2461 if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
2462 return -ENOMEM;
2463 return 0;
2464}
2465
2466void if6_proc_exit(void)
2467{
2468 proc_net_remove("if_inet6");
2469}
2470#endif /* CONFIG_PROC_FS */
2471
2472/*
2473 * Periodic address status verification
2474 */
2475
2476static void addrconf_verify(unsigned long foo)
2477{
2478 struct inet6_ifaddr *ifp;
2479 unsigned long now, next;
2480 int i;
2481
2482 spin_lock_bh(&addrconf_verify_lock);
2483 now = jiffies;
2484 next = now + ADDR_CHECK_FREQUENCY;
2485
2486 del_timer(&addr_chk_timer);
2487
2488 for (i=0; i < IN6_ADDR_HSIZE; i++) {
2489
2490restart:
2491 write_lock(&addrconf_hash_lock);
2492 for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
2493 unsigned long age;
2494#ifdef CONFIG_IPV6_PRIVACY
2495 unsigned long regen_advance;
2496#endif
2497
2498 if (ifp->flags & IFA_F_PERMANENT)
2499 continue;
2500
2501 spin_lock(&ifp->lock);
2502 age = (now - ifp->tstamp) / HZ;
2503
2504#ifdef CONFIG_IPV6_PRIVACY
2505 regen_advance = ifp->idev->cnf.regen_max_retry *
2506 ifp->idev->cnf.dad_transmits *
2507 ifp->idev->nd_parms->retrans_time / HZ;
2508#endif
2509
2510 if (age >= ifp->valid_lft) {
2511 spin_unlock(&ifp->lock);
2512 in6_ifa_hold(ifp);
2513 write_unlock(&addrconf_hash_lock);
2514 ipv6_del_addr(ifp);
2515 goto restart;
2516 } else if (age >= ifp->prefered_lft) {
2517 /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */
2518 int deprecate = 0;
2519
2520 if (!(ifp->flags&IFA_F_DEPRECATED)) {
2521 deprecate = 1;
2522 ifp->flags |= IFA_F_DEPRECATED;
2523 }
2524
2525 if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))
2526 next = ifp->tstamp + ifp->valid_lft * HZ;
2527
2528 spin_unlock(&ifp->lock);
2529
2530 if (deprecate) {
2531 in6_ifa_hold(ifp);
2532 write_unlock(&addrconf_hash_lock);
2533
2534 ipv6_ifa_notify(0, ifp);
2535 in6_ifa_put(ifp);
2536 goto restart;
2537 }
2538#ifdef CONFIG_IPV6_PRIVACY
2539 } else if ((ifp->flags&IFA_F_TEMPORARY) &&
2540 !(ifp->flags&IFA_F_TENTATIVE)) {
2541 if (age >= ifp->prefered_lft - regen_advance) {
2542 struct inet6_ifaddr *ifpub = ifp->ifpub;
2543 if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
2544 next = ifp->tstamp + ifp->prefered_lft * HZ;
2545 if (!ifp->regen_count && ifpub) {
2546 ifp->regen_count++;
2547 in6_ifa_hold(ifp);
2548 in6_ifa_hold(ifpub);
2549 spin_unlock(&ifp->lock);
2550 write_unlock(&addrconf_hash_lock);
2551 ipv6_create_tempaddr(ifpub, ifp);
2552 in6_ifa_put(ifpub);
2553 in6_ifa_put(ifp);
2554 goto restart;
2555 }
2556 } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
2557 next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
2558 spin_unlock(&ifp->lock);
2559#endif
2560 } else {
2561 /* ifp->prefered_lft <= ifp->valid_lft */
2562 if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
2563 next = ifp->tstamp + ifp->prefered_lft * HZ;
2564 spin_unlock(&ifp->lock);
2565 }
2566 }
2567 write_unlock(&addrconf_hash_lock);
2568 }
2569
2570 addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
2571 add_timer(&addr_chk_timer);
2572 spin_unlock_bh(&addrconf_verify_lock);
2573}
2574
2575static int
2576inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2577{
2578 struct rtattr **rta = arg;
2579 struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
2580 struct in6_addr *pfx;
2581
2582 pfx = NULL;
2583 if (rta[IFA_ADDRESS-1]) {
2584 if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
2585 return -EINVAL;
2586 pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
2587 }
2588 if (rta[IFA_LOCAL-1]) {
2589 if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))
2590 return -EINVAL;
2591 pfx = RTA_DATA(rta[IFA_LOCAL-1]);
2592 }
2593 if (pfx == NULL)
2594 return -EINVAL;
2595
2596 return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
2597}
2598
2599static int
2600inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2601{
2602 struct rtattr **rta = arg;
2603 struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
2604 struct in6_addr *pfx;
2605
2606 pfx = NULL;
2607 if (rta[IFA_ADDRESS-1]) {
2608 if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
2609 return -EINVAL;
2610 pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
2611 }
2612 if (rta[IFA_LOCAL-1]) {
2613 if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))
2614 return -EINVAL;
2615 pfx = RTA_DATA(rta[IFA_LOCAL-1]);
2616 }
2617 if (pfx == NULL)
2618 return -EINVAL;
2619
2620 return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
2621}
2622
2623static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
2624 u32 pid, u32 seq, int event)
2625{
2626 struct ifaddrmsg *ifm;
2627 struct nlmsghdr *nlh;
2628 struct ifa_cacheinfo ci;
2629 unsigned char *b = skb->tail;
2630
2631 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
2632 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2633 ifm = NLMSG_DATA(nlh);
2634 ifm->ifa_family = AF_INET6;
2635 ifm->ifa_prefixlen = ifa->prefix_len;
2636 ifm->ifa_flags = ifa->flags;
2637 ifm->ifa_scope = RT_SCOPE_UNIVERSE;
2638 if (ifa->scope&IFA_HOST)
2639 ifm->ifa_scope = RT_SCOPE_HOST;
2640 else if (ifa->scope&IFA_LINK)
2641 ifm->ifa_scope = RT_SCOPE_LINK;
2642 else if (ifa->scope&IFA_SITE)
2643 ifm->ifa_scope = RT_SCOPE_SITE;
2644 ifm->ifa_index = ifa->idev->dev->ifindex;
2645 RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr);
2646 if (!(ifa->flags&IFA_F_PERMANENT)) {
2647 ci.ifa_prefered = ifa->prefered_lft;
2648 ci.ifa_valid = ifa->valid_lft;
2649 if (ci.ifa_prefered != INFINITY_LIFE_TIME) {
2650 long tval = (jiffies - ifa->tstamp)/HZ;
2651 ci.ifa_prefered -= tval;
2652 if (ci.ifa_valid != INFINITY_LIFE_TIME)
2653 ci.ifa_valid -= tval;
2654 }
2655 } else {
2656 ci.ifa_prefered = INFINITY_LIFE_TIME;
2657 ci.ifa_valid = INFINITY_LIFE_TIME;
2658 }
2659 ci.cstamp = (__u32)(TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) / HZ * 100
2660 + TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
2661 ci.tstamp = (__u32)(TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) / HZ * 100
2662 + TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
2663 RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
2664 nlh->nlmsg_len = skb->tail - b;
2665 return skb->len;
2666
2667nlmsg_failure:
2668rtattr_failure:
2669 skb_trim(skb, b - skb->data);
2670 return -1;
2671}
2672
2673static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
2674 u32 pid, u32 seq, int event)
2675{
2676 struct ifaddrmsg *ifm;
2677 struct nlmsghdr *nlh;
2678 struct ifa_cacheinfo ci;
2679 unsigned char *b = skb->tail;
2680
2681 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
2682 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2683 ifm = NLMSG_DATA(nlh);
2684 ifm->ifa_family = AF_INET6;
2685 ifm->ifa_prefixlen = 128;
2686 ifm->ifa_flags = IFA_F_PERMANENT;
2687 ifm->ifa_scope = RT_SCOPE_UNIVERSE;
2688 if (ipv6_addr_scope(&ifmca->mca_addr)&IFA_SITE)
2689 ifm->ifa_scope = RT_SCOPE_SITE;
2690 ifm->ifa_index = ifmca->idev->dev->ifindex;
2691 RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr);
2692 ci.cstamp = (__u32)(TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) / HZ
2693 * 100 + TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) % HZ
2694 * 100 / HZ);
2695 ci.tstamp = (__u32)(TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) / HZ
2696 * 100 + TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) % HZ
2697 * 100 / HZ);
2698 ci.ifa_prefered = INFINITY_LIFE_TIME;
2699 ci.ifa_valid = INFINITY_LIFE_TIME;
2700 RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
2701 nlh->nlmsg_len = skb->tail - b;
2702 return skb->len;
2703
2704nlmsg_failure:
2705rtattr_failure:
2706 skb_trim(skb, b - skb->data);
2707 return -1;
2708}
2709
2710static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
2711 u32 pid, u32 seq, int event)
2712{
2713 struct ifaddrmsg *ifm;
2714 struct nlmsghdr *nlh;
2715 struct ifa_cacheinfo ci;
2716 unsigned char *b = skb->tail;
2717
2718 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
2719 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2720 ifm = NLMSG_DATA(nlh);
2721 ifm->ifa_family = AF_INET6;
2722 ifm->ifa_prefixlen = 128;
2723 ifm->ifa_flags = IFA_F_PERMANENT;
2724 ifm->ifa_scope = RT_SCOPE_UNIVERSE;
2725 if (ipv6_addr_scope(&ifaca->aca_addr)&IFA_SITE)
2726 ifm->ifa_scope = RT_SCOPE_SITE;
2727 ifm->ifa_index = ifaca->aca_idev->dev->ifindex;
2728 RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr);
2729 ci.cstamp = (__u32)(TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) / HZ
2730 * 100 + TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) % HZ
2731 * 100 / HZ);
2732 ci.tstamp = (__u32)(TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) / HZ
2733 * 100 + TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) % HZ
2734 * 100 / HZ);
2735 ci.ifa_prefered = INFINITY_LIFE_TIME;
2736 ci.ifa_valid = INFINITY_LIFE_TIME;
2737 RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
2738 nlh->nlmsg_len = skb->tail - b;
2739 return skb->len;
2740
2741nlmsg_failure:
2742rtattr_failure:
2743 skb_trim(skb, b - skb->data);
2744 return -1;
2745}
2746
2747enum addr_type_t
2748{
2749 UNICAST_ADDR,
2750 MULTICAST_ADDR,
2751 ANYCAST_ADDR,
2752};
2753
2754static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2755 enum addr_type_t type)
2756{
2757 int idx, ip_idx;
2758 int s_idx, s_ip_idx;
2759 int err = 1;
2760 struct net_device *dev;
2761 struct inet6_dev *idev = NULL;
2762 struct inet6_ifaddr *ifa;
2763 struct ifmcaddr6 *ifmca;
2764 struct ifacaddr6 *ifaca;
2765
2766 s_idx = cb->args[0];
2767 s_ip_idx = ip_idx = cb->args[1];
2768 read_lock(&dev_base_lock);
2769
2770 for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
2771 if (idx < s_idx)
2772 continue;
2773 if (idx > s_idx)
2774 s_ip_idx = 0;
2775 ip_idx = 0;
2776 if ((idev = in6_dev_get(dev)) == NULL)
2777 continue;
2778 read_lock_bh(&idev->lock);
2779 switch (type) {
2780 case UNICAST_ADDR:
2781 /* unicast address */
2782 for (ifa = idev->addr_list; ifa;
2783 ifa = ifa->if_next, ip_idx++) {
2784 if (ip_idx < s_ip_idx)
2785 continue;
2786 if ((err = inet6_fill_ifaddr(skb, ifa,
2787 NETLINK_CB(cb->skb).pid,
2788 cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0)
2789 goto done;
2790 }
2791 /* temp addr */
2792#ifdef CONFIG_IPV6_PRIVACY
2793 for (ifa = idev->tempaddr_list; ifa;
2794 ifa = ifa->tmp_next, ip_idx++) {
2795 if (ip_idx < s_ip_idx)
2796 continue;
2797 if ((err = inet6_fill_ifaddr(skb, ifa,
2798 NETLINK_CB(cb->skb).pid,
2799 cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0)
2800 goto done;
2801 }
2802#endif
2803 break;
2804 case MULTICAST_ADDR:
2805 /* multicast address */
2806 for (ifmca = idev->mc_list; ifmca;
2807 ifmca = ifmca->next, ip_idx++) {
2808 if (ip_idx < s_ip_idx)
2809 continue;
2810 if ((err = inet6_fill_ifmcaddr(skb, ifmca,
2811 NETLINK_CB(cb->skb).pid,
2812 cb->nlh->nlmsg_seq, RTM_GETMULTICAST)) <= 0)
2813 goto done;
2814 }
2815 break;
2816 case ANYCAST_ADDR:
2817 /* anycast address */
2818 for (ifaca = idev->ac_list; ifaca;
2819 ifaca = ifaca->aca_next, ip_idx++) {
2820 if (ip_idx < s_ip_idx)
2821 continue;
2822 if ((err = inet6_fill_ifacaddr(skb, ifaca,
2823 NETLINK_CB(cb->skb).pid,
2824 cb->nlh->nlmsg_seq, RTM_GETANYCAST)) <= 0)
2825 goto done;
2826 }
2827 break;
2828 default:
2829 break;
2830 }
2831 read_unlock_bh(&idev->lock);
2832 in6_dev_put(idev);
2833 }
2834done:
2835 if (err <= 0) {
2836 read_unlock_bh(&idev->lock);
2837 in6_dev_put(idev);
2838 }
2839 read_unlock(&dev_base_lock);
2840 cb->args[0] = idx;
2841 cb->args[1] = ip_idx;
2842 return skb->len;
2843}
2844
2845static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
2846{
2847 enum addr_type_t type = UNICAST_ADDR;
2848 return inet6_dump_addr(skb, cb, type);
2849}
2850
2851static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
2852{
2853 enum addr_type_t type = MULTICAST_ADDR;
2854 return inet6_dump_addr(skb, cb, type);
2855}
2856
2857
2858static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
2859{
2860 enum addr_type_t type = ANYCAST_ADDR;
2861 return inet6_dump_addr(skb, cb, type);
2862}
2863
2864static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
2865{
2866 struct sk_buff *skb;
2867 int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128);
2868
2869 skb = alloc_skb(size, GFP_ATOMIC);
2870 if (!skb) {
2871 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS);
2872 return;
2873 }
2874 if (inet6_fill_ifaddr(skb, ifa, 0, 0, event) < 0) {
2875 kfree_skb(skb);
2876 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL);
2877 return;
2878 }
2879 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFADDR;
2880 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFADDR, GFP_ATOMIC);
2881}
2882
2883static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
2884 __s32 *array, int bytes)
2885{
2886 memset(array, 0, bytes);
2887 array[DEVCONF_FORWARDING] = cnf->forwarding;
2888 array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
2889 array[DEVCONF_MTU6] = cnf->mtu6;
2890 array[DEVCONF_ACCEPT_RA] = cnf->accept_ra;
2891 array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects;
2892 array[DEVCONF_AUTOCONF] = cnf->autoconf;
2893 array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
2894 array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
2895 array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval;
2896 array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay;
2897 array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
2898#ifdef CONFIG_IPV6_PRIVACY
2899 array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
2900 array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
2901 array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
2902 array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
2903 array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
2904#endif
2905 array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
2906}
2907
2908static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
2909 u32 pid, u32 seq, int event)
2910{
2911 struct net_device *dev = idev->dev;
2912 __s32 *array = NULL;
2913 struct ifinfomsg *r;
2914 struct nlmsghdr *nlh;
2915 unsigned char *b = skb->tail;
2916 struct rtattr *subattr;
2917 __u32 mtu = dev->mtu;
2918 struct ifla_cacheinfo ci;
2919
2920 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r));
2921 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2922 r = NLMSG_DATA(nlh);
2923 r->ifi_family = AF_INET6;
2924 r->ifi_type = dev->type;
2925 r->ifi_index = dev->ifindex;
2926 r->ifi_flags = dev_get_flags(dev);
2927 r->ifi_change = 0;
2928
2929 RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
2930
2931 if (dev->addr_len)
2932 RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
2933
2934 RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
2935 if (dev->ifindex != dev->iflink)
2936 RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink);
2937
2938 subattr = (struct rtattr*)skb->tail;
2939
2940 RTA_PUT(skb, IFLA_PROTINFO, 0, NULL);
2941
2942 /* return the device flags */
2943 RTA_PUT(skb, IFLA_INET6_FLAGS, sizeof(__u32), &idev->if_flags);
2944
2945 /* return interface cacheinfo */
2946 ci.max_reasm_len = IPV6_MAXPLEN;
2947 ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
2948 + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
2949 ci.reachable_time = idev->nd_parms->reachable_time;
2950 ci.retrans_time = idev->nd_parms->retrans_time;
2951 RTA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
2952
2953 /* return the device sysctl params */
2954 if ((array = kmalloc(DEVCONF_MAX * sizeof(*array), GFP_ATOMIC)) == NULL)
2955 goto rtattr_failure;
2956 ipv6_store_devconf(&idev->cnf, array, DEVCONF_MAX * sizeof(*array));
2957 RTA_PUT(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(*array), array);
2958
2959 /* XXX - Statistics/MC not implemented */
2960 subattr->rta_len = skb->tail - (u8*)subattr;
2961
2962 nlh->nlmsg_len = skb->tail - b;
2963 kfree(array);
2964 return skb->len;
2965
2966nlmsg_failure:
2967rtattr_failure:
2968 if (array)
2969 kfree(array);
2970 skb_trim(skb, b - skb->data);
2971 return -1;
2972}
2973
2974static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
2975{
2976 int idx, err;
2977 int s_idx = cb->args[0];
2978 struct net_device *dev;
2979 struct inet6_dev *idev;
2980
2981 read_lock(&dev_base_lock);
2982 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
2983 if (idx < s_idx)
2984 continue;
2985 if ((idev = in6_dev_get(dev)) == NULL)
2986 continue;
2987 err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
2988 cb->nlh->nlmsg_seq, RTM_NEWLINK);
2989 in6_dev_put(idev);
2990 if (err <= 0)
2991 break;
2992 }
2993 read_unlock(&dev_base_lock);
2994 cb->args[0] = idx;
2995
2996 return skb->len;
2997}
2998
2999void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3000{
3001 struct sk_buff *skb;
3002 /* 128 bytes ?? */
3003 int size = NLMSG_SPACE(sizeof(struct ifinfomsg)+128);
3004
3005 skb = alloc_skb(size, GFP_ATOMIC);
3006 if (!skb) {
3007 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS);
3008 return;
3009 }
3010 if (inet6_fill_ifinfo(skb, idev, 0, 0, event) < 0) {
3011 kfree_skb(skb);
3012 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL);
3013 return;
3014 }
3015 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFINFO;
3016 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFINFO, GFP_ATOMIC);
3017}
3018
3019static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
3020 struct prefix_info *pinfo, u32 pid, u32 seq, int event)
3021{
3022 struct prefixmsg *pmsg;
3023 struct nlmsghdr *nlh;
3024 unsigned char *b = skb->tail;
3025 struct prefix_cacheinfo ci;
3026
3027 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*pmsg));
3028
3029 if (pid)
3030 nlh->nlmsg_flags |= NLM_F_MULTI;
3031
3032 pmsg = NLMSG_DATA(nlh);
3033 pmsg->prefix_family = AF_INET6;
3034 pmsg->prefix_ifindex = idev->dev->ifindex;
3035 pmsg->prefix_len = pinfo->prefix_len;
3036 pmsg->prefix_type = pinfo->type;
3037
3038 pmsg->prefix_flags = 0;
3039 if (pinfo->onlink)
3040 pmsg->prefix_flags |= IF_PREFIX_ONLINK;
3041 if (pinfo->autoconf)
3042 pmsg->prefix_flags |= IF_PREFIX_AUTOCONF;
3043
3044 RTA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix);
3045
3046 ci.preferred_time = ntohl(pinfo->prefered);
3047 ci.valid_time = ntohl(pinfo->valid);
3048 RTA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci);
3049
3050 nlh->nlmsg_len = skb->tail - b;
3051 return skb->len;
3052
3053nlmsg_failure:
3054rtattr_failure:
3055 skb_trim(skb, b - skb->data);
3056 return -1;
3057}
3058
3059static void inet6_prefix_notify(int event, struct inet6_dev *idev,
3060 struct prefix_info *pinfo)
3061{
3062 struct sk_buff *skb;
3063 int size = NLMSG_SPACE(sizeof(struct prefixmsg)+128);
3064
3065 skb = alloc_skb(size, GFP_ATOMIC);
3066 if (!skb) {
3067 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS);
3068 return;
3069 }
3070 if (inet6_fill_prefix(skb, idev, pinfo, 0, 0, event) < 0) {
3071 kfree_skb(skb);
3072 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL);
3073 return;
3074 }
3075 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_PREFIX;
3076 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC);
3077}
3078
3079static struct rtnetlink_link inet6_rtnetlink_table[RTM_MAX - RTM_BASE + 1] = {
3080 [RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, },
3081 [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, },
3082 [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, },
3083 [RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, },
3084 [RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
3085 [RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, },
3086 [RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, },
3087 [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, },
3088 [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute,
3089 .dumpit = inet6_dump_fib, },
3090};
3091
3092static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
3093{
3094 inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
3095
3096 switch (event) {
3097 case RTM_NEWADDR:
3098 dst_hold(&ifp->rt->u.dst);
3099 if (ip6_ins_rt(ifp->rt, NULL, NULL))
3100 dst_release(&ifp->rt->u.dst);
3101 if (ifp->idev->cnf.forwarding)
3102 addrconf_join_anycast(ifp);
3103 break;
3104 case RTM_DELADDR:
3105 if (ifp->idev->cnf.forwarding)
3106 addrconf_leave_anycast(ifp);
3107 addrconf_leave_solict(ifp->idev, &ifp->addr);
3108 dst_hold(&ifp->rt->u.dst);
3109 if (ip6_del_rt(ifp->rt, NULL, NULL))
3110 dst_free(&ifp->rt->u.dst);
3111 else
3112 dst_release(&ifp->rt->u.dst);
3113 break;
3114 }
3115}
3116
3117static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
3118{
3119 read_lock_bh(&addrconf_lock);
3120 if (likely(ifp->idev->dead == 0))
3121 __ipv6_ifa_notify(event, ifp);
3122 read_unlock_bh(&addrconf_lock);
3123}
3124
3125#ifdef CONFIG_SYSCTL
3126
3127static
3128int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
3129 void __user *buffer, size_t *lenp, loff_t *ppos)
3130{
3131 int *valp = ctl->data;
3132 int val = *valp;
3133 int ret;
3134
3135 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
3136
3137 if (write && valp != &ipv6_devconf_dflt.forwarding) {
3138 if (valp != &ipv6_devconf.forwarding) {
3139 if ((!*valp) ^ (!val)) {
3140 struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
3141 if (idev == NULL)
3142 return ret;
3143 dev_forward_change(idev);
3144 }
3145 } else {
3146 ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
3147 addrconf_forward_change();
3148 }
3149 if (*valp)
3150 rt6_purge_dflt_routers();
3151 }
3152
3153 return ret;
3154}
3155
3156static int addrconf_sysctl_forward_strategy(ctl_table *table,
3157 int __user *name, int nlen,
3158 void __user *oldval,
3159 size_t __user *oldlenp,
3160 void __user *newval, size_t newlen,
3161 void **context)
3162{
3163 int *valp = table->data;
3164 int new;
3165
3166 if (!newval || !newlen)
3167 return 0;
3168 if (newlen != sizeof(int))
3169 return -EINVAL;
3170 if (get_user(new, (int __user *)newval))
3171 return -EFAULT;
3172 if (new == *valp)
3173 return 0;
3174 if (oldval && oldlenp) {
3175 size_t len;
3176 if (get_user(len, oldlenp))
3177 return -EFAULT;
3178 if (len) {
3179 if (len > table->maxlen)
3180 len = table->maxlen;
3181 if (copy_to_user(oldval, valp, len))
3182 return -EFAULT;
3183 if (put_user(len, oldlenp))
3184 return -EFAULT;
3185 }
3186 }
3187
3188 if (valp != &ipv6_devconf_dflt.forwarding) {
3189 if (valp != &ipv6_devconf.forwarding) {
3190 struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
3191 int changed;
3192 if (unlikely(idev == NULL))
3193 return -ENODEV;
3194 changed = (!*valp) ^ (!new);
3195 *valp = new;
3196 if (changed)
3197 dev_forward_change(idev);
3198 } else {
3199 *valp = new;
3200 addrconf_forward_change();
3201 }
3202
3203 if (*valp)
3204 rt6_purge_dflt_routers();
3205 } else
3206 *valp = new;
3207
3208 return 1;
3209}
3210
3211static struct addrconf_sysctl_table
3212{
3213 struct ctl_table_header *sysctl_header;
3214 ctl_table addrconf_vars[__NET_IPV6_MAX];
3215 ctl_table addrconf_dev[2];
3216 ctl_table addrconf_conf_dir[2];
3217 ctl_table addrconf_proto_dir[2];
3218 ctl_table addrconf_root_dir[2];
3219} addrconf_sysctl = {
3220 .sysctl_header = NULL,
3221 .addrconf_vars = {
3222 {
3223 .ctl_name = NET_IPV6_FORWARDING,
3224 .procname = "forwarding",
3225 .data = &ipv6_devconf.forwarding,
3226 .maxlen = sizeof(int),
3227 .mode = 0644,
3228 .proc_handler = &addrconf_sysctl_forward,
3229 .strategy = &addrconf_sysctl_forward_strategy,
3230 },
3231 {
3232 .ctl_name = NET_IPV6_HOP_LIMIT,
3233 .procname = "hop_limit",
3234 .data = &ipv6_devconf.hop_limit,
3235 .maxlen = sizeof(int),
3236 .mode = 0644,
3237 .proc_handler = proc_dointvec,
3238 },
3239 {
3240 .ctl_name = NET_IPV6_MTU,
3241 .procname = "mtu",
3242 .data = &ipv6_devconf.mtu6,
3243 .maxlen = sizeof(int),
3244 .mode = 0644,
3245 .proc_handler = &proc_dointvec,
3246 },
3247 {
3248 .ctl_name = NET_IPV6_ACCEPT_RA,
3249 .procname = "accept_ra",
3250 .data = &ipv6_devconf.accept_ra,
3251 .maxlen = sizeof(int),
3252 .mode = 0644,
3253 .proc_handler = &proc_dointvec,
3254 },
3255 {
3256 .ctl_name = NET_IPV6_ACCEPT_REDIRECTS,
3257 .procname = "accept_redirects",
3258 .data = &ipv6_devconf.accept_redirects,
3259 .maxlen = sizeof(int),
3260 .mode = 0644,
3261 .proc_handler = &proc_dointvec,
3262 },
3263 {
3264 .ctl_name = NET_IPV6_AUTOCONF,
3265 .procname = "autoconf",
3266 .data = &ipv6_devconf.autoconf,
3267 .maxlen = sizeof(int),
3268 .mode = 0644,
3269 .proc_handler = &proc_dointvec,
3270 },
3271 {
3272 .ctl_name = NET_IPV6_DAD_TRANSMITS,
3273 .procname = "dad_transmits",
3274 .data = &ipv6_devconf.dad_transmits,
3275 .maxlen = sizeof(int),
3276 .mode = 0644,
3277 .proc_handler = &proc_dointvec,
3278 },
3279 {
3280 .ctl_name = NET_IPV6_RTR_SOLICITS,
3281 .procname = "router_solicitations",
3282 .data = &ipv6_devconf.rtr_solicits,
3283 .maxlen = sizeof(int),
3284 .mode = 0644,
3285 .proc_handler = &proc_dointvec,
3286 },
3287 {
3288 .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL,
3289 .procname = "router_solicitation_interval",
3290 .data = &ipv6_devconf.rtr_solicit_interval,
3291 .maxlen = sizeof(int),
3292 .mode = 0644,
3293 .proc_handler = &proc_dointvec_jiffies,
3294 .strategy = &sysctl_jiffies,
3295 },
3296 {
3297 .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY,
3298 .procname = "router_solicitation_delay",
3299 .data = &ipv6_devconf.rtr_solicit_delay,
3300 .maxlen = sizeof(int),
3301 .mode = 0644,
3302 .proc_handler = &proc_dointvec_jiffies,
3303 .strategy = &sysctl_jiffies,
3304 },
3305 {
3306 .ctl_name = NET_IPV6_FORCE_MLD_VERSION,
3307 .procname = "force_mld_version",
3308 .data = &ipv6_devconf.force_mld_version,
3309 .maxlen = sizeof(int),
3310 .mode = 0644,
3311 .proc_handler = &proc_dointvec,
3312 },
3313#ifdef CONFIG_IPV6_PRIVACY
3314 {
3315 .ctl_name = NET_IPV6_USE_TEMPADDR,
3316 .procname = "use_tempaddr",
3317 .data = &ipv6_devconf.use_tempaddr,
3318 .maxlen = sizeof(int),
3319 .mode = 0644,
3320 .proc_handler = &proc_dointvec,
3321 },
3322 {
3323 .ctl_name = NET_IPV6_TEMP_VALID_LFT,
3324 .procname = "temp_valid_lft",
3325 .data = &ipv6_devconf.temp_valid_lft,
3326 .maxlen = sizeof(int),
3327 .mode = 0644,
3328 .proc_handler = &proc_dointvec,
3329 },
3330 {
3331 .ctl_name = NET_IPV6_TEMP_PREFERED_LFT,
3332 .procname = "temp_prefered_lft",
3333 .data = &ipv6_devconf.temp_prefered_lft,
3334 .maxlen = sizeof(int),
3335 .mode = 0644,
3336 .proc_handler = &proc_dointvec,
3337 },
3338 {
3339 .ctl_name = NET_IPV6_REGEN_MAX_RETRY,
3340 .procname = "regen_max_retry",
3341 .data = &ipv6_devconf.regen_max_retry,
3342 .maxlen = sizeof(int),
3343 .mode = 0644,
3344 .proc_handler = &proc_dointvec,
3345 },
3346 {
3347 .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR,
3348 .procname = "max_desync_factor",
3349 .data = &ipv6_devconf.max_desync_factor,
3350 .maxlen = sizeof(int),
3351 .mode = 0644,
3352 .proc_handler = &proc_dointvec,
3353 },
3354#endif
3355 {
3356 .ctl_name = NET_IPV6_MAX_ADDRESSES,
3357 .procname = "max_addresses",
3358 .data = &ipv6_devconf.max_addresses,
3359 .maxlen = sizeof(int),
3360 .mode = 0644,
3361 .proc_handler = &proc_dointvec,
3362 },
3363 {
3364 .ctl_name = 0, /* sentinel */
3365 }
3366 },
3367 .addrconf_dev = {
3368 {
3369 .ctl_name = NET_PROTO_CONF_ALL,
3370 .procname = "all",
3371 .mode = 0555,
3372 .child = addrconf_sysctl.addrconf_vars,
3373 },
3374 {
3375 .ctl_name = 0, /* sentinel */
3376 }
3377 },
3378 .addrconf_conf_dir = {
3379 {
3380 .ctl_name = NET_IPV6_CONF,
3381 .procname = "conf",
3382 .mode = 0555,
3383 .child = addrconf_sysctl.addrconf_dev,
3384 },
3385 {
3386 .ctl_name = 0, /* sentinel */
3387 }
3388 },
3389 .addrconf_proto_dir = {
3390 {
3391 .ctl_name = NET_IPV6,
3392 .procname = "ipv6",
3393 .mode = 0555,
3394 .child = addrconf_sysctl.addrconf_conf_dir,
3395 },
3396 {
3397 .ctl_name = 0, /* sentinel */
3398 }
3399 },
3400 .addrconf_root_dir = {
3401 {
3402 .ctl_name = CTL_NET,
3403 .procname = "net",
3404 .mode = 0555,
3405 .child = addrconf_sysctl.addrconf_proto_dir,
3406 },
3407 {
3408 .ctl_name = 0, /* sentinel */
3409 }
3410 },
3411};
3412
3413static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
3414{
3415 int i;
3416 struct net_device *dev = idev ? idev->dev : NULL;
3417 struct addrconf_sysctl_table *t;
3418 char *dev_name = NULL;
3419
3420 t = kmalloc(sizeof(*t), GFP_KERNEL);
3421 if (t == NULL)
3422 return;
3423 memcpy(t, &addrconf_sysctl, sizeof(*t));
3424 for (i=0; t->addrconf_vars[i].data; i++) {
3425 t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
3426 t->addrconf_vars[i].de = NULL;
3427 t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
3428 }
3429 if (dev) {
3430 dev_name = dev->name;
3431 t->addrconf_dev[0].ctl_name = dev->ifindex;
3432 } else {
3433 dev_name = "default";
3434 t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
3435 }
3436
3437 /*
3438 * Make a copy of dev_name, because '.procname' is regarded as const
3439 * by sysctl and we wouldn't want anyone to change it under our feet
3440 * (see SIOCSIFNAME).
3441 */
3442 dev_name = net_sysctl_strdup(dev_name);
3443 if (!dev_name)
3444 goto free;
3445
3446 t->addrconf_dev[0].procname = dev_name;
3447
3448 t->addrconf_dev[0].child = t->addrconf_vars;
3449 t->addrconf_dev[0].de = NULL;
3450 t->addrconf_conf_dir[0].child = t->addrconf_dev;
3451 t->addrconf_conf_dir[0].de = NULL;
3452 t->addrconf_proto_dir[0].child = t->addrconf_conf_dir;
3453 t->addrconf_proto_dir[0].de = NULL;
3454 t->addrconf_root_dir[0].child = t->addrconf_proto_dir;
3455 t->addrconf_root_dir[0].de = NULL;
3456
3457 t->sysctl_header = register_sysctl_table(t->addrconf_root_dir, 0);
3458 if (t->sysctl_header == NULL)
3459 goto free_procname;
3460 else
3461 p->sysctl = t;
3462 return;
3463
3464 /* error path */
3465 free_procname:
3466 kfree(dev_name);
3467 free:
3468 kfree(t);
3469
3470 return;
3471}
3472
3473static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
3474{
3475 if (p->sysctl) {
3476 struct addrconf_sysctl_table *t = p->sysctl;
3477 p->sysctl = NULL;
3478 unregister_sysctl_table(t->sysctl_header);
3479 kfree(t->addrconf_dev[0].procname);
3480 kfree(t);
3481 }
3482}
3483
3484
3485#endif
3486
3487/*
3488 * Device notifier
3489 */
3490
3491int register_inet6addr_notifier(struct notifier_block *nb)
3492{
3493 return notifier_chain_register(&inet6addr_chain, nb);
3494}
3495
3496int unregister_inet6addr_notifier(struct notifier_block *nb)
3497{
3498 return notifier_chain_unregister(&inet6addr_chain,nb);
3499}
3500
3501/*
3502 * Init / cleanup code
3503 */
3504
3505int __init addrconf_init(void)
3506{
3507 int err = 0;
3508
3509 /* The addrconf netdev notifier requires that loopback_dev
3510 * has it's ipv6 private information allocated and setup
3511 * before it can bring up and give link-local addresses
3512 * to other devices which are up.
3513 *
3514 * Unfortunately, loopback_dev is not necessarily the first
3515 * entry in the global dev_base list of net devices. In fact,
3516 * it is likely to be the very last entry on that list.
3517 * So this causes the notifier registry below to try and
3518 * give link-local addresses to all devices besides loopback_dev
3519 * first, then loopback_dev, which cases all the non-loopback_dev
3520 * devices to fail to get a link-local address.
3521 *
3522 * So, as a temporary fix, allocate the ipv6 structure for
3523 * loopback_dev first by hand.
3524 * Longer term, all of the dependencies ipv6 has upon the loopback
3525 * device and it being up should be removed.
3526 */
3527 rtnl_lock();
3528 if (!ipv6_add_dev(&loopback_dev))
3529 err = -ENOMEM;
3530 rtnl_unlock();
3531 if (err)
3532 return err;
3533
3534 register_netdevice_notifier(&ipv6_dev_notf);
3535
3536#ifdef CONFIG_IPV6_PRIVACY
3537 md5_tfm = crypto_alloc_tfm("md5", 0);
3538 if (unlikely(md5_tfm == NULL))
3539 printk(KERN_WARNING
3540 "failed to load transform for md5\n");
3541#endif
3542
3543 addrconf_verify(0);
3544 rtnetlink_links[PF_INET6] = inet6_rtnetlink_table;
3545#ifdef CONFIG_SYSCTL
3546 addrconf_sysctl.sysctl_header =
3547 register_sysctl_table(addrconf_sysctl.addrconf_root_dir, 0);
3548 addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
3549#endif
3550
3551 return 0;
3552}
3553
3554void __exit addrconf_cleanup(void)
3555{
3556 struct net_device *dev;
3557 struct inet6_dev *idev;
3558 struct inet6_ifaddr *ifa;
3559 int i;
3560
3561 unregister_netdevice_notifier(&ipv6_dev_notf);
3562
3563 rtnetlink_links[PF_INET6] = NULL;
3564#ifdef CONFIG_SYSCTL
3565 addrconf_sysctl_unregister(&ipv6_devconf_dflt);
3566 addrconf_sysctl_unregister(&ipv6_devconf);
3567#endif
3568
3569 rtnl_lock();
3570
3571 /*
3572 * clean dev list.
3573 */
3574
3575 for (dev=dev_base; dev; dev=dev->next) {
3576 if ((idev = __in6_dev_get(dev)) == NULL)
3577 continue;
3578 addrconf_ifdown(dev, 1);
3579 }
3580 addrconf_ifdown(&loopback_dev, 2);
3581
3582 /*
3583 * Check hash table.
3584 */
3585
3586 write_lock_bh(&addrconf_hash_lock);
3587 for (i=0; i < IN6_ADDR_HSIZE; i++) {
3588 for (ifa=inet6_addr_lst[i]; ifa; ) {
3589 struct inet6_ifaddr *bifa;
3590
3591 bifa = ifa;
3592 ifa = ifa->lst_next;
3593 printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa);
3594 /* Do not free it; something is wrong.
3595 Now we can investigate it with debugger.
3596 */
3597 }
3598 }
3599 write_unlock_bh(&addrconf_hash_lock);
3600
3601 del_timer(&addr_chk_timer);
3602
3603 rtnl_unlock();
3604
3605#ifdef CONFIG_IPV6_PRIVACY
3606 if (likely(md5_tfm != NULL)) {
3607 crypto_free_tfm(md5_tfm);
3608 md5_tfm = NULL;
3609 }
3610#endif
3611
3612#ifdef CONFIG_PROC_FS
3613 proc_net_remove("if_inet6");
3614#endif
3615}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
new file mode 100644
index 000000000000..768b11703daf
--- /dev/null
+++ b/net/ipv6/af_inet6.c
@@ -0,0 +1,867 @@
1/*
2 * PF_INET6 socket protocol family
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Adapted from linux/net/ipv4/af_inet.c
9 *
10 * $Id: af_inet6.c,v 1.66 2002/02/01 22:01:04 davem Exp $
11 *
12 * Fixes:
13 * piggy, Karl Knutson : Socket protocol table
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * Arnaldo Melo : check proc_net_create return, cleanups
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License
19 * as published by the Free Software Foundation; either version
20 * 2 of the License, or (at your option) any later version.
21 */
22
23
24#include <linux/module.h>
25#include <linux/config.h>
26#include <linux/errno.h>
27#include <linux/types.h>
28#include <linux/socket.h>
29#include <linux/in.h>
30#include <linux/kernel.h>
31#include <linux/major.h>
32#include <linux/sched.h>
33#include <linux/timer.h>
34#include <linux/string.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/fcntl.h>
38#include <linux/mm.h>
39#include <linux/interrupt.h>
40#include <linux/proc_fs.h>
41#include <linux/stat.h>
42#include <linux/init.h>
43
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/icmpv6.h>
47#include <linux/smp_lock.h>
48
49#include <net/ip.h>
50#include <net/ipv6.h>
51#include <net/udp.h>
52#include <net/tcp.h>
53#include <net/ipip.h>
54#include <net/protocol.h>
55#include <net/inet_common.h>
56#include <net/transp_v6.h>
57#include <net/ip6_route.h>
58#include <net/addrconf.h>
59#ifdef CONFIG_IPV6_TUNNEL
60#include <net/ip6_tunnel.h>
61#endif
62
63#include <asm/uaccess.h>
64#include <asm/system.h>
65
66MODULE_AUTHOR("Cast of dozens");
67MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
68MODULE_LICENSE("GPL");
69
70/* IPv6 procfs goodies... */
71
72#ifdef CONFIG_PROC_FS
73extern int raw6_proc_init(void);
74extern void raw6_proc_exit(void);
75extern int tcp6_proc_init(void);
76extern void tcp6_proc_exit(void);
77extern int udp6_proc_init(void);
78extern void udp6_proc_exit(void);
79extern int ipv6_misc_proc_init(void);
80extern void ipv6_misc_proc_exit(void);
81extern int ac6_proc_init(void);
82extern void ac6_proc_exit(void);
83extern int if6_proc_init(void);
84extern void if6_proc_exit(void);
85#endif
86
87int sysctl_ipv6_bindv6only;
88
89#ifdef INET_REFCNT_DEBUG
90atomic_t inet6_sock_nr;
91#endif
92
93/* The inetsw table contains everything that inet_create needs to
94 * build a new socket.
95 */
96static struct list_head inetsw6[SOCK_MAX];
97static DEFINE_SPINLOCK(inetsw6_lock);
98
99static void inet6_sock_destruct(struct sock *sk)
100{
101 inet_sock_destruct(sk);
102
103#ifdef INET_REFCNT_DEBUG
104 atomic_dec(&inet6_sock_nr);
105#endif
106}
107
108static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
109{
110 const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
111
112 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
113}
114
115static int inet6_create(struct socket *sock, int protocol)
116{
117 struct inet_sock *inet;
118 struct ipv6_pinfo *np;
119 struct sock *sk;
120 struct list_head *p;
121 struct inet_protosw *answer;
122 struct proto *answer_prot;
123 unsigned char answer_flags;
124 char answer_no_check;
125 int rc;
126
127 /* Look for the requested type/protocol pair. */
128 answer = NULL;
129 rcu_read_lock();
130 list_for_each_rcu(p, &inetsw6[sock->type]) {
131 answer = list_entry(p, struct inet_protosw, list);
132
133 /* Check the non-wild match. */
134 if (protocol == answer->protocol) {
135 if (protocol != IPPROTO_IP)
136 break;
137 } else {
138 /* Check for the two wild cases. */
139 if (IPPROTO_IP == protocol) {
140 protocol = answer->protocol;
141 break;
142 }
143 if (IPPROTO_IP == answer->protocol)
144 break;
145 }
146 answer = NULL;
147 }
148
149 rc = -ESOCKTNOSUPPORT;
150 if (!answer)
151 goto out_rcu_unlock;
152 rc = -EPERM;
153 if (answer->capability > 0 && !capable(answer->capability))
154 goto out_rcu_unlock;
155 rc = -EPROTONOSUPPORT;
156 if (!protocol)
157 goto out_rcu_unlock;
158
159 sock->ops = answer->ops;
160
161 answer_prot = answer->prot;
162 answer_no_check = answer->no_check;
163 answer_flags = answer->flags;
164 rcu_read_unlock();
165
166 BUG_TRAP(answer_prot->slab != NULL);
167
168 rc = -ENOBUFS;
169 sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1);
170 if (sk == NULL)
171 goto out;
172
173 sock_init_data(sock, sk);
174
175 rc = 0;
176 sk->sk_no_check = answer_no_check;
177 if (INET_PROTOSW_REUSE & answer_flags)
178 sk->sk_reuse = 1;
179
180 inet = inet_sk(sk);
181
182 if (SOCK_RAW == sock->type) {
183 inet->num = protocol;
184 if (IPPROTO_RAW == protocol)
185 inet->hdrincl = 1;
186 }
187
188 sk->sk_destruct = inet6_sock_destruct;
189 sk->sk_family = PF_INET6;
190 sk->sk_protocol = protocol;
191
192 sk->sk_backlog_rcv = answer->prot->backlog_rcv;
193
194 inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
195 np->hop_limit = -1;
196 np->mcast_hops = -1;
197 np->mc_loop = 1;
198 np->pmtudisc = IPV6_PMTUDISC_WANT;
199 np->ipv6only = sysctl_ipv6_bindv6only;
200
201 /* Init the ipv4 part of the socket since we can have sockets
202 * using v6 API for ipv4.
203 */
204 inet->uc_ttl = -1;
205
206 inet->mc_loop = 1;
207 inet->mc_ttl = 1;
208 inet->mc_index = 0;
209 inet->mc_list = NULL;
210
211 if (ipv4_config.no_pmtu_disc)
212 inet->pmtudisc = IP_PMTUDISC_DONT;
213 else
214 inet->pmtudisc = IP_PMTUDISC_WANT;
215
216
217#ifdef INET_REFCNT_DEBUG
218 atomic_inc(&inet6_sock_nr);
219 atomic_inc(&inet_sock_nr);
220#endif
221 if (inet->num) {
222 /* It assumes that any protocol which allows
223 * the user to assign a number at socket
224 * creation time automatically shares.
225 */
226 inet->sport = ntohs(inet->num);
227 sk->sk_prot->hash(sk);
228 }
229 if (sk->sk_prot->init) {
230 rc = sk->sk_prot->init(sk);
231 if (rc) {
232 sk_common_release(sk);
233 goto out;
234 }
235 }
236out:
237 return rc;
238out_rcu_unlock:
239 rcu_read_unlock();
240 goto out;
241}
242
243
244/* bind for INET6 API */
245int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
246{
247 struct sockaddr_in6 *addr=(struct sockaddr_in6 *)uaddr;
248 struct sock *sk = sock->sk;
249 struct inet_sock *inet = inet_sk(sk);
250 struct ipv6_pinfo *np = inet6_sk(sk);
251 __u32 v4addr = 0;
252 unsigned short snum;
253 int addr_type = 0;
254 int err = 0;
255
256 /* If the socket has its own bind function then use it. */
257 if (sk->sk_prot->bind)
258 return sk->sk_prot->bind(sk, uaddr, addr_len);
259
260 if (addr_len < SIN6_LEN_RFC2133)
261 return -EINVAL;
262 addr_type = ipv6_addr_type(&addr->sin6_addr);
263 if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
264 return -EINVAL;
265
266 snum = ntohs(addr->sin6_port);
267 if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
268 return -EACCES;
269
270 lock_sock(sk);
271
272 /* Check these errors (active socket, double bind). */
273 if (sk->sk_state != TCP_CLOSE || inet->num) {
274 err = -EINVAL;
275 goto out;
276 }
277
278 /* Check if the address belongs to the host. */
279 if (addr_type == IPV6_ADDR_MAPPED) {
280 v4addr = addr->sin6_addr.s6_addr32[3];
281 if (inet_addr_type(v4addr) != RTN_LOCAL) {
282 err = -EADDRNOTAVAIL;
283 goto out;
284 }
285 } else {
286 if (addr_type != IPV6_ADDR_ANY) {
287 struct net_device *dev = NULL;
288
289 if (addr_type & IPV6_ADDR_LINKLOCAL) {
290 if (addr_len >= sizeof(struct sockaddr_in6) &&
291 addr->sin6_scope_id) {
292 /* Override any existing binding, if another one
293 * is supplied by user.
294 */
295 sk->sk_bound_dev_if = addr->sin6_scope_id;
296 }
297
298 /* Binding to link-local address requires an interface */
299 if (!sk->sk_bound_dev_if) {
300 err = -EINVAL;
301 goto out;
302 }
303 dev = dev_get_by_index(sk->sk_bound_dev_if);
304 if (!dev) {
305 err = -ENODEV;
306 goto out;
307 }
308 }
309
310 /* ipv4 addr of the socket is invalid. Only the
311 * unspecified and mapped address have a v4 equivalent.
312 */
313 v4addr = LOOPBACK4_IPV6;
314 if (!(addr_type & IPV6_ADDR_MULTICAST)) {
315 if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) {
316 if (dev)
317 dev_put(dev);
318 err = -EADDRNOTAVAIL;
319 goto out;
320 }
321 }
322 if (dev)
323 dev_put(dev);
324 }
325 }
326
327 inet->rcv_saddr = v4addr;
328 inet->saddr = v4addr;
329
330 ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
331
332 if (!(addr_type & IPV6_ADDR_MULTICAST))
333 ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
334
335 /* Make sure we are allowed to bind here. */
336 if (sk->sk_prot->get_port(sk, snum)) {
337 inet_reset_saddr(sk);
338 err = -EADDRINUSE;
339 goto out;
340 }
341
342 if (addr_type != IPV6_ADDR_ANY)
343 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
344 if (snum)
345 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
346 inet->sport = ntohs(inet->num);
347 inet->dport = 0;
348 inet->daddr = 0;
349out:
350 release_sock(sk);
351 return err;
352}
353
354int inet6_release(struct socket *sock)
355{
356 struct sock *sk = sock->sk;
357
358 if (sk == NULL)
359 return -EINVAL;
360
361 /* Free mc lists */
362 ipv6_sock_mc_close(sk);
363
364 /* Free ac lists */
365 ipv6_sock_ac_close(sk);
366
367 return inet_release(sock);
368}
369
370int inet6_destroy_sock(struct sock *sk)
371{
372 struct ipv6_pinfo *np = inet6_sk(sk);
373 struct sk_buff *skb;
374 struct ipv6_txoptions *opt;
375
376 /*
377 * Release destination entry
378 */
379
380 sk_dst_reset(sk);
381
382 /* Release rx options */
383
384 if ((skb = xchg(&np->pktoptions, NULL)) != NULL)
385 kfree_skb(skb);
386
387 /* Free flowlabels */
388 fl6_free_socklist(sk);
389
390 /* Free tx options */
391
392 if ((opt = xchg(&np->opt, NULL)) != NULL)
393 sock_kfree_s(sk, opt, opt->tot_len);
394
395 return 0;
396}
397
398/*
399 * This does both peername and sockname.
400 */
401
402int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
403 int *uaddr_len, int peer)
404{
405 struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr;
406 struct sock *sk = sock->sk;
407 struct inet_sock *inet = inet_sk(sk);
408 struct ipv6_pinfo *np = inet6_sk(sk);
409
410 sin->sin6_family = AF_INET6;
411 sin->sin6_flowinfo = 0;
412 sin->sin6_scope_id = 0;
413 if (peer) {
414 if (!inet->dport)
415 return -ENOTCONN;
416 if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
417 peer == 1)
418 return -ENOTCONN;
419 sin->sin6_port = inet->dport;
420 ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
421 if (np->sndflow)
422 sin->sin6_flowinfo = np->flow_label;
423 } else {
424 if (ipv6_addr_any(&np->rcv_saddr))
425 ipv6_addr_copy(&sin->sin6_addr, &np->saddr);
426 else
427 ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr);
428
429 sin->sin6_port = inet->sport;
430 }
431 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
432 sin->sin6_scope_id = sk->sk_bound_dev_if;
433 *uaddr_len = sizeof(*sin);
434 return(0);
435}
436
437int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
438{
439 struct sock *sk = sock->sk;
440 int err = -EINVAL;
441
442 switch(cmd)
443 {
444 case SIOCGSTAMP:
445 return sock_get_timestamp(sk, (struct timeval __user *)arg);
446
447 case SIOCADDRT:
448 case SIOCDELRT:
449
450 return(ipv6_route_ioctl(cmd,(void __user *)arg));
451
452 case SIOCSIFADDR:
453 return addrconf_add_ifaddr((void __user *) arg);
454 case SIOCDIFADDR:
455 return addrconf_del_ifaddr((void __user *) arg);
456 case SIOCSIFDSTADDR:
457 return addrconf_set_dstaddr((void __user *) arg);
458 default:
459 if (!sk->sk_prot->ioctl ||
460 (err = sk->sk_prot->ioctl(sk, cmd, arg)) == -ENOIOCTLCMD)
461 return(dev_ioctl(cmd,(void __user *) arg));
462 return err;
463 }
464 /*NOTREACHED*/
465 return(0);
466}
467
468struct proto_ops inet6_stream_ops = {
469 .family = PF_INET6,
470 .owner = THIS_MODULE,
471 .release = inet6_release,
472 .bind = inet6_bind,
473 .connect = inet_stream_connect, /* ok */
474 .socketpair = sock_no_socketpair, /* a do nothing */
475 .accept = inet_accept, /* ok */
476 .getname = inet6_getname,
477 .poll = tcp_poll, /* ok */
478 .ioctl = inet6_ioctl, /* must change */
479 .listen = inet_listen, /* ok */
480 .shutdown = inet_shutdown, /* ok */
481 .setsockopt = sock_common_setsockopt, /* ok */
482 .getsockopt = sock_common_getsockopt, /* ok */
483 .sendmsg = inet_sendmsg, /* ok */
484 .recvmsg = sock_common_recvmsg, /* ok */
485 .mmap = sock_no_mmap,
486 .sendpage = tcp_sendpage
487};
488
489struct proto_ops inet6_dgram_ops = {
490 .family = PF_INET6,
491 .owner = THIS_MODULE,
492 .release = inet6_release,
493 .bind = inet6_bind,
494 .connect = inet_dgram_connect, /* ok */
495 .socketpair = sock_no_socketpair, /* a do nothing */
496 .accept = sock_no_accept, /* a do nothing */
497 .getname = inet6_getname,
498 .poll = udp_poll, /* ok */
499 .ioctl = inet6_ioctl, /* must change */
500 .listen = sock_no_listen, /* ok */
501 .shutdown = inet_shutdown, /* ok */
502 .setsockopt = sock_common_setsockopt, /* ok */
503 .getsockopt = sock_common_getsockopt, /* ok */
504 .sendmsg = inet_sendmsg, /* ok */
505 .recvmsg = sock_common_recvmsg, /* ok */
506 .mmap = sock_no_mmap,
507 .sendpage = sock_no_sendpage,
508};
509
510static struct net_proto_family inet6_family_ops = {
511 .family = PF_INET6,
512 .create = inet6_create,
513 .owner = THIS_MODULE,
514};
515
516#ifdef CONFIG_SYSCTL
517extern void ipv6_sysctl_register(void);
518extern void ipv6_sysctl_unregister(void);
519#endif
520
521/* Same as inet6_dgram_ops, sans udp_poll. */
522static struct proto_ops inet6_sockraw_ops = {
523 .family = PF_INET6,
524 .owner = THIS_MODULE,
525 .release = inet6_release,
526 .bind = inet6_bind,
527 .connect = inet_dgram_connect, /* ok */
528 .socketpair = sock_no_socketpair, /* a do nothing */
529 .accept = sock_no_accept, /* a do nothing */
530 .getname = inet6_getname,
531 .poll = datagram_poll, /* ok */
532 .ioctl = inet6_ioctl, /* must change */
533 .listen = sock_no_listen, /* ok */
534 .shutdown = inet_shutdown, /* ok */
535 .setsockopt = sock_common_setsockopt, /* ok */
536 .getsockopt = sock_common_getsockopt, /* ok */
537 .sendmsg = inet_sendmsg, /* ok */
538 .recvmsg = sock_common_recvmsg, /* ok */
539 .mmap = sock_no_mmap,
540 .sendpage = sock_no_sendpage,
541};
542
543static struct inet_protosw rawv6_protosw = {
544 .type = SOCK_RAW,
545 .protocol = IPPROTO_IP, /* wild card */
546 .prot = &rawv6_prot,
547 .ops = &inet6_sockraw_ops,
548 .capability = CAP_NET_RAW,
549 .no_check = UDP_CSUM_DEFAULT,
550 .flags = INET_PROTOSW_REUSE,
551};
552
553void
554inet6_register_protosw(struct inet_protosw *p)
555{
556 struct list_head *lh;
557 struct inet_protosw *answer;
558 int protocol = p->protocol;
559 struct list_head *last_perm;
560
561 spin_lock_bh(&inetsw6_lock);
562
563 if (p->type >= SOCK_MAX)
564 goto out_illegal;
565
566 /* If we are trying to override a permanent protocol, bail. */
567 answer = NULL;
568 last_perm = &inetsw6[p->type];
569 list_for_each(lh, &inetsw6[p->type]) {
570 answer = list_entry(lh, struct inet_protosw, list);
571
572 /* Check only the non-wild match. */
573 if (INET_PROTOSW_PERMANENT & answer->flags) {
574 if (protocol == answer->protocol)
575 break;
576 last_perm = lh;
577 }
578
579 answer = NULL;
580 }
581 if (answer)
582 goto out_permanent;
583
584 /* Add the new entry after the last permanent entry if any, so that
585 * the new entry does not override a permanent entry when matched with
586 * a wild-card protocol. But it is allowed to override any existing
587 * non-permanent entry. This means that when we remove this entry, the
588 * system automatically returns to the old behavior.
589 */
590 list_add_rcu(&p->list, last_perm);
591out:
592 spin_unlock_bh(&inetsw6_lock);
593 return;
594
595out_permanent:
596 printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
597 protocol);
598 goto out;
599
600out_illegal:
601 printk(KERN_ERR
602 "Ignoring attempt to register invalid socket type %d.\n",
603 p->type);
604 goto out;
605}
606
607void
608inet6_unregister_protosw(struct inet_protosw *p)
609{
610 if (INET_PROTOSW_PERMANENT & p->flags) {
611 printk(KERN_ERR
612 "Attempt to unregister permanent protocol %d.\n",
613 p->protocol);
614 } else {
615 spin_lock_bh(&inetsw6_lock);
616 list_del_rcu(&p->list);
617 spin_unlock_bh(&inetsw6_lock);
618
619 synchronize_net();
620 }
621}
622
623int
624snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
625{
626 if (ptr == NULL)
627 return -EINVAL;
628
629 ptr[0] = __alloc_percpu(mibsize, mibalign);
630 if (!ptr[0])
631 goto err0;
632
633 ptr[1] = __alloc_percpu(mibsize, mibalign);
634 if (!ptr[1])
635 goto err1;
636
637 return 0;
638
639err1:
640 free_percpu(ptr[0]);
641 ptr[0] = NULL;
642err0:
643 return -ENOMEM;
644}
645
646void
647snmp6_mib_free(void *ptr[2])
648{
649 if (ptr == NULL)
650 return;
651 if (ptr[0])
652 free_percpu(ptr[0]);
653 if (ptr[1])
654 free_percpu(ptr[1]);
655 ptr[0] = ptr[1] = NULL;
656}
657
658static int __init init_ipv6_mibs(void)
659{
660 if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
661 __alignof__(struct ipstats_mib)) < 0)
662 goto err_ip_mib;
663 if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
664 __alignof__(struct icmpv6_mib)) < 0)
665 goto err_icmp_mib;
666 if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
667 __alignof__(struct udp_mib)) < 0)
668 goto err_udp_mib;
669 return 0;
670
671err_udp_mib:
672 snmp6_mib_free((void **)icmpv6_statistics);
673err_icmp_mib:
674 snmp6_mib_free((void **)ipv6_statistics);
675err_ip_mib:
676 return -ENOMEM;
677
678}
679
680static void cleanup_ipv6_mibs(void)
681{
682 snmp6_mib_free((void **)ipv6_statistics);
683 snmp6_mib_free((void **)icmpv6_statistics);
684 snmp6_mib_free((void **)udp_stats_in6);
685}
686
687extern int ipv6_misc_proc_init(void);
688
689static int __init inet6_init(void)
690{
691 struct sk_buff *dummy_skb;
692 struct list_head *r;
693 int err;
694
695#ifdef MODULE
696#if 0 /* FIXME --RR */
697 if (!mod_member_present(&__this_module, can_unload))
698 return -EINVAL;
699
700 __this_module.can_unload = &ipv6_unload;
701#endif
702#endif
703
704 if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)) {
705 printk(KERN_CRIT "inet6_proto_init: size fault\n");
706 return -EINVAL;
707 }
708
709 err = proto_register(&tcpv6_prot, 1);
710 if (err)
711 goto out;
712
713 err = proto_register(&udpv6_prot, 1);
714 if (err)
715 goto out_unregister_tcp_proto;
716
717 err = proto_register(&rawv6_prot, 1);
718 if (err)
719 goto out_unregister_udp_proto;
720
721
722 /* Register the socket-side information for inet6_create. */
723 for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
724 INIT_LIST_HEAD(r);
725
726 /* We MUST register RAW sockets before we create the ICMP6,
727 * IGMP6, or NDISC control sockets.
728 */
729 inet6_register_protosw(&rawv6_protosw);
730
731 /* Register the family here so that the init calls below will
732 * be able to create sockets. (?? is this dangerous ??)
733 */
734 (void) sock_register(&inet6_family_ops);
735
736 /* Initialise ipv6 mibs */
737 err = init_ipv6_mibs();
738 if (err)
739 goto out_unregister_raw_proto;
740
741 /*
742 * ipngwg API draft makes clear that the correct semantics
743 * for TCP and UDP is to consider one TCP and UDP instance
744 * in a host availiable by both INET and INET6 APIs and
745 * able to communicate via both network protocols.
746 */
747
748#ifdef CONFIG_SYSCTL
749 ipv6_sysctl_register();
750#endif
751 err = icmpv6_init(&inet6_family_ops);
752 if (err)
753 goto icmp_fail;
754 err = ndisc_init(&inet6_family_ops);
755 if (err)
756 goto ndisc_fail;
757 err = igmp6_init(&inet6_family_ops);
758 if (err)
759 goto igmp_fail;
760 /* Create /proc/foo6 entries. */
761#ifdef CONFIG_PROC_FS
762 err = -ENOMEM;
763 if (raw6_proc_init())
764 goto proc_raw6_fail;
765 if (tcp6_proc_init())
766 goto proc_tcp6_fail;
767 if (udp6_proc_init())
768 goto proc_udp6_fail;
769 if (ipv6_misc_proc_init())
770 goto proc_misc6_fail;
771
772 if (ac6_proc_init())
773 goto proc_anycast6_fail;
774 if (if6_proc_init())
775 goto proc_if6_fail;
776#endif
777 ipv6_packet_init();
778 ip6_route_init();
779 ip6_flowlabel_init();
780 err = addrconf_init();
781 if (err)
782 goto addrconf_fail;
783 sit_init();
784
785 /* Init v6 extension headers. */
786 ipv6_rthdr_init();
787 ipv6_frag_init();
788 ipv6_nodata_init();
789 ipv6_destopt_init();
790
791 /* Init v6 transport protocols. */
792 udpv6_init();
793 tcpv6_init();
794 err = 0;
795out:
796 return err;
797
798addrconf_fail:
799 ip6_flowlabel_cleanup();
800 ip6_route_cleanup();
801 ipv6_packet_cleanup();
802#ifdef CONFIG_PROC_FS
803 if6_proc_exit();
804proc_if6_fail:
805 ac6_proc_exit();
806proc_anycast6_fail:
807 ipv6_misc_proc_exit();
808proc_misc6_fail:
809 udp6_proc_exit();
810proc_udp6_fail:
811 tcp6_proc_exit();
812proc_tcp6_fail:
813 raw6_proc_exit();
814proc_raw6_fail:
815#endif
816 igmp6_cleanup();
817igmp_fail:
818 ndisc_cleanup();
819ndisc_fail:
820 icmpv6_cleanup();
821icmp_fail:
822#ifdef CONFIG_SYSCTL
823 ipv6_sysctl_unregister();
824#endif
825 cleanup_ipv6_mibs();
826out_unregister_raw_proto:
827 proto_unregister(&rawv6_prot);
828out_unregister_udp_proto:
829 proto_unregister(&udpv6_prot);
830out_unregister_tcp_proto:
831 proto_unregister(&tcpv6_prot);
832 goto out;
833}
834module_init(inet6_init);
835
836static void __exit inet6_exit(void)
837{
838 /* First of all disallow new sockets creation. */
839 sock_unregister(PF_INET6);
840#ifdef CONFIG_PROC_FS
841 if6_proc_exit();
842 ac6_proc_exit();
843 ipv6_misc_proc_exit();
844 udp6_proc_exit();
845 tcp6_proc_exit();
846 raw6_proc_exit();
847#endif
848 /* Cleanup code parts. */
849 sit_cleanup();
850 ip6_flowlabel_cleanup();
851 addrconf_cleanup();
852 ip6_route_cleanup();
853 ipv6_packet_cleanup();
854 igmp6_cleanup();
855 ndisc_cleanup();
856 icmpv6_cleanup();
857#ifdef CONFIG_SYSCTL
858 ipv6_sysctl_unregister();
859#endif
860 cleanup_ipv6_mibs();
861 proto_unregister(&rawv6_prot);
862 proto_unregister(&udpv6_prot);
863 proto_unregister(&tcpv6_prot);
864}
865module_exit(inet6_exit);
866
867MODULE_ALIAS_NETPROTO(PF_INET6);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
new file mode 100644
index 000000000000..e3ecf626cbf7
--- /dev/null
+++ b/net/ipv6/ah6.c
@@ -0,0 +1,478 @@
1/*
2 * Copyright (C)2002 USAGI/WIDE Project
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Authors
19 *
20 * Mitsuru KANDA @USAGI : IPv6 Support
21 * Kazunori MIYAZAWA @USAGI :
22 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
23 *
24 * This file is derived from net/ipv4/ah.c.
25 */
26
27#include <linux/config.h>
28#include <linux/module.h>
29#include <net/ip.h>
30#include <net/ah.h>
31#include <linux/crypto.h>
32#include <linux/pfkeyv2.h>
33#include <linux/string.h>
34#include <net/icmp.h>
35#include <net/ipv6.h>
36#include <net/xfrm.h>
37#include <asm/scatterlist.h>
38
39static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
40{
41 u8 *opt = (u8 *)opthdr;
42 int len = ipv6_optlen(opthdr);
43 int off = 0;
44 int optlen = 0;
45
46 off += 2;
47 len -= 2;
48
49 while (len > 0) {
50
51 switch (opt[off]) {
52
53 case IPV6_TLV_PAD0:
54 optlen = 1;
55 break;
56 default:
57 if (len < 2)
58 goto bad;
59 optlen = opt[off+1]+2;
60 if (len < optlen)
61 goto bad;
62 if (opt[off] & 0x20)
63 memset(&opt[off+2], 0, opt[off+1]);
64 break;
65 }
66
67 off += optlen;
68 len -= optlen;
69 }
70 if (len == 0)
71 return 1;
72
73bad:
74 return 0;
75}
76
77/**
78 * ipv6_rearrange_rthdr - rearrange IPv6 routing header
79 * @iph: IPv6 header
80 * @rthdr: routing header
81 *
82 * Rearrange the destination address in @iph and the addresses in @rthdr
83 * so that they appear in the order they will at the final destination.
84 * See Appendix A2 of RFC 2402 for details.
85 */
86static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
87{
88 int segments, segments_left;
89 struct in6_addr *addrs;
90 struct in6_addr final_addr;
91
92 segments_left = rthdr->segments_left;
93 if (segments_left == 0)
94 return;
95 rthdr->segments_left = 0;
96
97 /* The value of rthdr->hdrlen has been verified either by the system
98 * call if it is locally generated, or by ipv6_rthdr_rcv() for incoming
99 * packets. So we can assume that it is even and that segments is
100 * greater than or equal to segments_left.
101 *
102 * For the same reason we can assume that this option is of type 0.
103 */
104 segments = rthdr->hdrlen >> 1;
105
106 addrs = ((struct rt0_hdr *)rthdr)->addr;
107 ipv6_addr_copy(&final_addr, addrs + segments - 1);
108
109 addrs += segments - segments_left;
110 memmove(addrs + 1, addrs, (segments_left - 1) * sizeof(*addrs));
111
112 ipv6_addr_copy(addrs, &iph->daddr);
113 ipv6_addr_copy(&iph->daddr, &final_addr);
114}
115
116static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
117{
118 union {
119 struct ipv6hdr *iph;
120 struct ipv6_opt_hdr *opth;
121 struct ipv6_rt_hdr *rth;
122 char *raw;
123 } exthdr = { .iph = iph };
124 char *end = exthdr.raw + len;
125 int nexthdr = iph->nexthdr;
126
127 exthdr.iph++;
128
129 while (exthdr.raw < end) {
130 switch (nexthdr) {
131 case NEXTHDR_HOP:
132 case NEXTHDR_DEST:
133 if (!zero_out_mutable_opts(exthdr.opth)) {
134 LIMIT_NETDEBUG(printk(
135 KERN_WARNING "overrun %sopts\n",
136 nexthdr == NEXTHDR_HOP ?
137 "hop" : "dest"));
138 return -EINVAL;
139 }
140 break;
141
142 case NEXTHDR_ROUTING:
143 ipv6_rearrange_rthdr(iph, exthdr.rth);
144 break;
145
146 default :
147 return 0;
148 }
149
150 nexthdr = exthdr.opth->nexthdr;
151 exthdr.raw += ipv6_optlen(exthdr.opth);
152 }
153
154 return 0;
155}
156
157static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
158{
159 int err;
160 int extlen;
161 struct ipv6hdr *top_iph;
162 struct ip_auth_hdr *ah;
163 struct ah_data *ahp;
164 u8 nexthdr;
165 char tmp_base[8];
166 struct {
167 struct in6_addr daddr;
168 char hdrs[0];
169 } *tmp_ext;
170
171 top_iph = (struct ipv6hdr *)skb->data;
172 top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
173
174 nexthdr = *skb->nh.raw;
175 *skb->nh.raw = IPPROTO_AH;
176
177 /* When there are no extension headers, we only need to save the first
178 * 8 bytes of the base IP header.
179 */
180 memcpy(tmp_base, top_iph, sizeof(tmp_base));
181
182 tmp_ext = NULL;
183 extlen = skb->h.raw - (unsigned char *)(top_iph + 1);
184 if (extlen) {
185 extlen += sizeof(*tmp_ext);
186 tmp_ext = kmalloc(extlen, GFP_ATOMIC);
187 if (!tmp_ext) {
188 err = -ENOMEM;
189 goto error;
190 }
191 memcpy(tmp_ext, &top_iph->daddr, extlen);
192 err = ipv6_clear_mutable_options(top_iph,
193 extlen - sizeof(*tmp_ext) +
194 sizeof(*top_iph));
195 if (err)
196 goto error_free_iph;
197 }
198
199 ah = (struct ip_auth_hdr *)skb->h.raw;
200 ah->nexthdr = nexthdr;
201
202 top_iph->priority = 0;
203 top_iph->flow_lbl[0] = 0;
204 top_iph->flow_lbl[1] = 0;
205 top_iph->flow_lbl[2] = 0;
206 top_iph->hop_limit = 0;
207
208 ahp = x->data;
209 ah->hdrlen = (XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) +
210 ahp->icv_trunc_len) >> 2) - 2;
211
212 ah->reserved = 0;
213 ah->spi = x->id.spi;
214 ah->seq_no = htonl(++x->replay.oseq);
215 ahp->icv(ahp, skb, ah->auth_data);
216
217 err = 0;
218
219 memcpy(top_iph, tmp_base, sizeof(tmp_base));
220 if (tmp_ext) {
221 memcpy(&top_iph->daddr, tmp_ext, extlen);
222error_free_iph:
223 kfree(tmp_ext);
224 }
225
226error:
227 return err;
228}
229
230static int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
231{
232 /*
233 * Before process AH
234 * [IPv6][Ext1][Ext2][AH][Dest][Payload]
235 * |<-------------->| hdr_len
236 *
237 * To erase AH:
238 * Keeping copy of cleared headers. After AH processing,
239 * Moving the pointer of skb->nh.raw by using skb_pull as long as AH
240 * header length. Then copy back the copy as long as hdr_len
241 * If destination header following AH exists, copy it into after [Ext2].
242 *
243 * |<>|[IPv6][Ext1][Ext2][Dest][Payload]
244 * There is offset of AH before IPv6 header after the process.
245 */
246
247 struct ipv6_auth_hdr *ah;
248 struct ah_data *ahp;
249 unsigned char *tmp_hdr = NULL;
250 u16 hdr_len;
251 u16 ah_hlen;
252 int nexthdr;
253
254 if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
255 goto out;
256
257 /* We are going to _remove_ AH header to keep sockets happy,
258 * so... Later this can change. */
259 if (skb_cloned(skb) &&
260 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
261 goto out;
262
263 hdr_len = skb->data - skb->nh.raw;
264 ah = (struct ipv6_auth_hdr*)skb->data;
265 ahp = x->data;
266 nexthdr = ah->nexthdr;
267 ah_hlen = (ah->hdrlen + 2) << 2;
268
269 if (ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_full_len) &&
270 ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len))
271 goto out;
272
273 if (!pskb_may_pull(skb, ah_hlen))
274 goto out;
275
276 tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
277 if (!tmp_hdr)
278 goto out;
279 memcpy(tmp_hdr, skb->nh.raw, hdr_len);
280 if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len))
281 goto out;
282 skb->nh.ipv6h->priority = 0;
283 skb->nh.ipv6h->flow_lbl[0] = 0;
284 skb->nh.ipv6h->flow_lbl[1] = 0;
285 skb->nh.ipv6h->flow_lbl[2] = 0;
286 skb->nh.ipv6h->hop_limit = 0;
287
288 {
289 u8 auth_data[MAX_AH_AUTH_LEN];
290
291 memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
292 memset(ah->auth_data, 0, ahp->icv_trunc_len);
293 skb_push(skb, skb->data - skb->nh.raw);
294 ahp->icv(ahp, skb, ah->auth_data);
295 if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
296 LIMIT_NETDEBUG(
297 printk(KERN_WARNING "ipsec ah authentication error\n"));
298 x->stats.integrity_failed++;
299 goto free_out;
300 }
301 }
302
303 skb->nh.raw = skb_pull(skb, ah_hlen);
304 memcpy(skb->nh.raw, tmp_hdr, hdr_len);
305 skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
306 skb_pull(skb, hdr_len);
307 skb->h.raw = skb->data;
308
309
310 kfree(tmp_hdr);
311
312 return nexthdr;
313
314free_out:
315 kfree(tmp_hdr);
316out:
317 return -EINVAL;
318}
319
320static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
321 int type, int code, int offset, __u32 info)
322{
323 struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
324 struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
325 struct xfrm_state *x;
326
327 if (type != ICMPV6_DEST_UNREACH &&
328 type != ICMPV6_PKT_TOOBIG)
329 return;
330
331 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
332 if (!x)
333 return;
334
335 NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/"
336 "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
337 ntohl(ah->spi), NIP6(iph->daddr)));
338
339 xfrm_state_put(x);
340}
341
342static int ah6_init_state(struct xfrm_state *x, void *args)
343{
344 struct ah_data *ahp = NULL;
345 struct xfrm_algo_desc *aalg_desc;
346
347 if (!x->aalg)
348 goto error;
349
350 /* null auth can use a zero length key */
351 if (x->aalg->alg_key_len > 512)
352 goto error;
353
354 if (x->encap)
355 goto error;
356
357 ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
358 if (ahp == NULL)
359 return -ENOMEM;
360
361 memset(ahp, 0, sizeof(*ahp));
362
363 ahp->key = x->aalg->alg_key;
364 ahp->key_len = (x->aalg->alg_key_len+7)/8;
365 ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
366 if (!ahp->tfm)
367 goto error;
368 ahp->icv = ah_hmac_digest;
369
370 /*
371 * Lookup the algorithm description maintained by xfrm_algo,
372 * verify crypto transform properties, and store information
373 * we need for AH processing. This lookup cannot fail here
374 * after a successful crypto_alloc_tfm().
375 */
376 aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
377 BUG_ON(!aalg_desc);
378
379 if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
380 crypto_tfm_alg_digestsize(ahp->tfm)) {
381 printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
382 x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
383 aalg_desc->uinfo.auth.icv_fullbits/8);
384 goto error;
385 }
386
387 ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
388 ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
389
390 BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
391
392 ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL);
393 if (!ahp->work_icv)
394 goto error;
395
396 x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len);
397 if (x->props.mode)
398 x->props.header_len += sizeof(struct ipv6hdr);
399 x->data = ahp;
400
401 return 0;
402
403error:
404 if (ahp) {
405 if (ahp->work_icv)
406 kfree(ahp->work_icv);
407 if (ahp->tfm)
408 crypto_free_tfm(ahp->tfm);
409 kfree(ahp);
410 }
411 return -EINVAL;
412}
413
414static void ah6_destroy(struct xfrm_state *x)
415{
416 struct ah_data *ahp = x->data;
417
418 if (!ahp)
419 return;
420
421 if (ahp->work_icv) {
422 kfree(ahp->work_icv);
423 ahp->work_icv = NULL;
424 }
425 if (ahp->tfm) {
426 crypto_free_tfm(ahp->tfm);
427 ahp->tfm = NULL;
428 }
429 kfree(ahp);
430}
431
432static struct xfrm_type ah6_type =
433{
434 .description = "AH6",
435 .owner = THIS_MODULE,
436 .proto = IPPROTO_AH,
437 .init_state = ah6_init_state,
438 .destructor = ah6_destroy,
439 .input = ah6_input,
440 .output = ah6_output
441};
442
443static struct inet6_protocol ah6_protocol = {
444 .handler = xfrm6_rcv,
445 .err_handler = ah6_err,
446 .flags = INET6_PROTO_NOPOLICY,
447};
448
449static int __init ah6_init(void)
450{
451 if (xfrm_register_type(&ah6_type, AF_INET6) < 0) {
452 printk(KERN_INFO "ipv6 ah init: can't add xfrm type\n");
453 return -EAGAIN;
454 }
455
456 if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
457 printk(KERN_INFO "ipv6 ah init: can't add protocol\n");
458 xfrm_unregister_type(&ah6_type, AF_INET6);
459 return -EAGAIN;
460 }
461
462 return 0;
463}
464
465static void __exit ah6_fini(void)
466{
467 if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
468 printk(KERN_INFO "ipv6 ah close: can't remove protocol\n");
469
470 if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
471 printk(KERN_INFO "ipv6 ah close: can't remove xfrm type\n");
472
473}
474
475module_init(ah6_init);
476module_exit(ah6_fini);
477
478MODULE_LICENSE("GPL");
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
new file mode 100644
index 000000000000..5d22ca3cca2e
--- /dev/null
+++ b/net/ipv6/anycast.c
@@ -0,0 +1,594 @@
1/*
2 * Anycast support for IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * David L Stevens (dlstevens@us.ibm.com)
7 *
8 * based heavily on net/ipv6/mcast.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/config.h>
17#include <linux/module.h>
18#include <linux/errno.h>
19#include <linux/types.h>
20#include <linux/random.h>
21#include <linux/string.h>
22#include <linux/socket.h>
23#include <linux/sockios.h>
24#include <linux/sched.h>
25#include <linux/net.h>
26#include <linux/in6.h>
27#include <linux/netdevice.h>
28#include <linux/if_arp.h>
29#include <linux/route.h>
30#include <linux/init.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33
34#include <net/sock.h>
35#include <net/snmp.h>
36
37#include <net/ipv6.h>
38#include <net/protocol.h>
39#include <net/if_inet6.h>
40#include <net/ndisc.h>
41#include <net/addrconf.h>
42#include <net/ip6_route.h>
43
44#include <net/checksum.h>
45
46static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr);
47
48/* Big ac list lock for all the sockets */
49static DEFINE_RWLOCK(ipv6_sk_ac_lock);
50
51static int
52ip6_onlink(struct in6_addr *addr, struct net_device *dev)
53{
54 struct inet6_dev *idev;
55 struct inet6_ifaddr *ifa;
56 int onlink;
57
58 onlink = 0;
59 read_lock(&addrconf_lock);
60 idev = __in6_dev_get(dev);
61 if (idev) {
62 read_lock_bh(&idev->lock);
63 for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
64 onlink = ipv6_prefix_equal(addr, &ifa->addr,
65 ifa->prefix_len);
66 if (onlink)
67 break;
68 }
69 read_unlock_bh(&idev->lock);
70 }
71 read_unlock(&addrconf_lock);
72 return onlink;
73}
74
75/*
76 * socket join an anycast group
77 */
78
79int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
80{
81 struct ipv6_pinfo *np = inet6_sk(sk);
82 struct net_device *dev = NULL;
83 struct inet6_dev *idev;
84 struct ipv6_ac_socklist *pac;
85 int ishost = !ipv6_devconf.forwarding;
86 int err = 0;
87
88 if (!capable(CAP_NET_ADMIN))
89 return -EPERM;
90 if (ipv6_addr_is_multicast(addr))
91 return -EINVAL;
92 if (ipv6_chk_addr(addr, NULL, 0))
93 return -EINVAL;
94
95 pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
96 if (pac == NULL)
97 return -ENOMEM;
98 pac->acl_next = NULL;
99 ipv6_addr_copy(&pac->acl_addr, addr);
100
101 if (ifindex == 0) {
102 struct rt6_info *rt;
103
104 rt = rt6_lookup(addr, NULL, 0, 0);
105 if (rt) {
106 dev = rt->rt6i_dev;
107 dev_hold(dev);
108 dst_release(&rt->u.dst);
109 } else if (ishost) {
110 err = -EADDRNOTAVAIL;
111 goto out_free_pac;
112 } else {
113 /* router, no matching interface: just pick one */
114
115 dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK);
116 }
117 } else
118 dev = dev_get_by_index(ifindex);
119
120 if (dev == NULL) {
121 err = -ENODEV;
122 goto out_free_pac;
123 }
124
125 idev = in6_dev_get(dev);
126 if (!idev) {
127 if (ifindex)
128 err = -ENODEV;
129 else
130 err = -EADDRNOTAVAIL;
131 goto out_dev_put;
132 }
133 /* reset ishost, now that we have a specific device */
134 ishost = !idev->cnf.forwarding;
135 in6_dev_put(idev);
136
137 pac->acl_ifindex = dev->ifindex;
138
139 /* XXX
140 * For hosts, allow link-local or matching prefix anycasts.
141 * This obviates the need for propagating anycast routes while
142 * still allowing some non-router anycast participation.
143 */
144 if (!ip6_onlink(addr, dev)) {
145 if (ishost)
146 err = -EADDRNOTAVAIL;
147 if (err)
148 goto out_dev_put;
149 }
150
151 err = ipv6_dev_ac_inc(dev, addr);
152 if (err)
153 goto out_dev_put;
154
155 write_lock_bh(&ipv6_sk_ac_lock);
156 pac->acl_next = np->ipv6_ac_list;
157 np->ipv6_ac_list = pac;
158 write_unlock_bh(&ipv6_sk_ac_lock);
159
160 dev_put(dev);
161
162 return 0;
163
164out_dev_put:
165 dev_put(dev);
166out_free_pac:
167 sock_kfree_s(sk, pac, sizeof(*pac));
168 return err;
169}
170
171/*
172 * socket leave an anycast group
173 */
174int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
175{
176 struct ipv6_pinfo *np = inet6_sk(sk);
177 struct net_device *dev;
178 struct ipv6_ac_socklist *pac, *prev_pac;
179
180 write_lock_bh(&ipv6_sk_ac_lock);
181 prev_pac = NULL;
182 for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
183 if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
184 ipv6_addr_equal(&pac->acl_addr, addr))
185 break;
186 prev_pac = pac;
187 }
188 if (!pac) {
189 write_unlock_bh(&ipv6_sk_ac_lock);
190 return -ENOENT;
191 }
192 if (prev_pac)
193 prev_pac->acl_next = pac->acl_next;
194 else
195 np->ipv6_ac_list = pac->acl_next;
196
197 write_unlock_bh(&ipv6_sk_ac_lock);
198
199 dev = dev_get_by_index(pac->acl_ifindex);
200 if (dev) {
201 ipv6_dev_ac_dec(dev, &pac->acl_addr);
202 dev_put(dev);
203 }
204 sock_kfree_s(sk, pac, sizeof(*pac));
205 return 0;
206}
207
208void ipv6_sock_ac_close(struct sock *sk)
209{
210 struct ipv6_pinfo *np = inet6_sk(sk);
211 struct net_device *dev = NULL;
212 struct ipv6_ac_socklist *pac;
213 int prev_index;
214
215 write_lock_bh(&ipv6_sk_ac_lock);
216 pac = np->ipv6_ac_list;
217 np->ipv6_ac_list = NULL;
218 write_unlock_bh(&ipv6_sk_ac_lock);
219
220 prev_index = 0;
221 while (pac) {
222 struct ipv6_ac_socklist *next = pac->acl_next;
223
224 if (pac->acl_ifindex != prev_index) {
225 if (dev)
226 dev_put(dev);
227 dev = dev_get_by_index(pac->acl_ifindex);
228 prev_index = pac->acl_ifindex;
229 }
230 if (dev)
231 ipv6_dev_ac_dec(dev, &pac->acl_addr);
232 sock_kfree_s(sk, pac, sizeof(*pac));
233 pac = next;
234 }
235 if (dev)
236 dev_put(dev);
237}
238
239#if 0
240/* The function is not used, which is funny. Apparently, author
241 * supposed to use it to filter out datagrams inside udp/raw but forgot.
242 *
243 * It is OK, anycasts are not special comparing to delivery to unicasts.
244 */
245
246int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex)
247{
248 struct ipv6_ac_socklist *pac;
249 struct ipv6_pinfo *np = inet6_sk(sk);
250 int found;
251
252 found = 0;
253 read_lock(&ipv6_sk_ac_lock);
254 for (pac=np->ipv6_ac_list; pac; pac=pac->acl_next) {
255 if (ifindex && pac->acl_ifindex != ifindex)
256 continue;
257 found = ipv6_addr_equal(&pac->acl_addr, addr);
258 if (found)
259 break;
260 }
261 read_unlock(&ipv6_sk_ac_lock);
262
263 return found;
264}
265
266#endif
267
268static void aca_put(struct ifacaddr6 *ac)
269{
270 if (atomic_dec_and_test(&ac->aca_refcnt)) {
271 in6_dev_put(ac->aca_idev);
272 dst_release(&ac->aca_rt->u.dst);
273 kfree(ac);
274 }
275}
276
277/*
278 * device anycast group inc (add if not found)
279 */
280int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
281{
282 struct ifacaddr6 *aca;
283 struct inet6_dev *idev;
284 struct rt6_info *rt;
285 int err;
286
287 idev = in6_dev_get(dev);
288
289 if (idev == NULL)
290 return -EINVAL;
291
292 write_lock_bh(&idev->lock);
293 if (idev->dead) {
294 err = -ENODEV;
295 goto out;
296 }
297
298 for (aca = idev->ac_list; aca; aca = aca->aca_next) {
299 if (ipv6_addr_equal(&aca->aca_addr, addr)) {
300 aca->aca_users++;
301 err = 0;
302 goto out;
303 }
304 }
305
306 /*
307 * not found: create a new one.
308 */
309
310 aca = kmalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
311
312 if (aca == NULL) {
313 err = -ENOMEM;
314 goto out;
315 }
316
317 rt = addrconf_dst_alloc(idev, addr, 1);
318 if (IS_ERR(rt)) {
319 kfree(aca);
320 err = PTR_ERR(rt);
321 goto out;
322 }
323
324 memset(aca, 0, sizeof(struct ifacaddr6));
325
326 ipv6_addr_copy(&aca->aca_addr, addr);
327 aca->aca_idev = idev;
328 aca->aca_rt = rt;
329 aca->aca_users = 1;
330 /* aca_tstamp should be updated upon changes */
331 aca->aca_cstamp = aca->aca_tstamp = jiffies;
332 atomic_set(&aca->aca_refcnt, 2);
333 spin_lock_init(&aca->aca_lock);
334
335 aca->aca_next = idev->ac_list;
336 idev->ac_list = aca;
337 write_unlock_bh(&idev->lock);
338
339 dst_hold(&rt->u.dst);
340 if (ip6_ins_rt(rt, NULL, NULL))
341 dst_release(&rt->u.dst);
342
343 addrconf_join_solict(dev, &aca->aca_addr);
344
345 aca_put(aca);
346 return 0;
347out:
348 write_unlock_bh(&idev->lock);
349 in6_dev_put(idev);
350 return err;
351}
352
353/*
354 * device anycast group decrement
355 */
356int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
357{
358 struct ifacaddr6 *aca, *prev_aca;
359
360 write_lock_bh(&idev->lock);
361 prev_aca = NULL;
362 for (aca = idev->ac_list; aca; aca = aca->aca_next) {
363 if (ipv6_addr_equal(&aca->aca_addr, addr))
364 break;
365 prev_aca = aca;
366 }
367 if (!aca) {
368 write_unlock_bh(&idev->lock);
369 return -ENOENT;
370 }
371 if (--aca->aca_users > 0) {
372 write_unlock_bh(&idev->lock);
373 return 0;
374 }
375 if (prev_aca)
376 prev_aca->aca_next = aca->aca_next;
377 else
378 idev->ac_list = aca->aca_next;
379 write_unlock_bh(&idev->lock);
380 addrconf_leave_solict(idev, &aca->aca_addr);
381
382 dst_hold(&aca->aca_rt->u.dst);
383 if (ip6_del_rt(aca->aca_rt, NULL, NULL))
384 dst_free(&aca->aca_rt->u.dst);
385 else
386 dst_release(&aca->aca_rt->u.dst);
387
388 aca_put(aca);
389 return 0;
390}
391
392static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
393{
394 int ret;
395 struct inet6_dev *idev = in6_dev_get(dev);
396 if (idev == NULL)
397 return -ENODEV;
398 ret = __ipv6_dev_ac_dec(idev, addr);
399 in6_dev_put(idev);
400 return ret;
401}
402
403/*
404 * check if the interface has this anycast address
405 */
406static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
407{
408 struct inet6_dev *idev;
409 struct ifacaddr6 *aca;
410
411 idev = in6_dev_get(dev);
412 if (idev) {
413 read_lock_bh(&idev->lock);
414 for (aca = idev->ac_list; aca; aca = aca->aca_next)
415 if (ipv6_addr_equal(&aca->aca_addr, addr))
416 break;
417 read_unlock_bh(&idev->lock);
418 in6_dev_put(idev);
419 return aca != 0;
420 }
421 return 0;
422}
423
424/*
425 * check if given interface (or any, if dev==0) has this anycast address
426 */
427int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr)
428{
429 if (dev)
430 return ipv6_chk_acast_dev(dev, addr);
431 read_lock(&dev_base_lock);
432 for (dev=dev_base; dev; dev=dev->next)
433 if (ipv6_chk_acast_dev(dev, addr))
434 break;
435 read_unlock(&dev_base_lock);
436 return dev != 0;
437}
438
439
440#ifdef CONFIG_PROC_FS
441struct ac6_iter_state {
442 struct net_device *dev;
443 struct inet6_dev *idev;
444};
445
446#define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private)
447
448static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
449{
450 struct ifacaddr6 *im = NULL;
451 struct ac6_iter_state *state = ac6_seq_private(seq);
452
453 for (state->dev = dev_base, state->idev = NULL;
454 state->dev;
455 state->dev = state->dev->next) {
456 struct inet6_dev *idev;
457 idev = in6_dev_get(state->dev);
458 if (!idev)
459 continue;
460 read_lock_bh(&idev->lock);
461 im = idev->ac_list;
462 if (im) {
463 state->idev = idev;
464 break;
465 }
466 read_unlock_bh(&idev->lock);
467 }
468 return im;
469}
470
471static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
472{
473 struct ac6_iter_state *state = ac6_seq_private(seq);
474
475 im = im->aca_next;
476 while (!im) {
477 if (likely(state->idev != NULL)) {
478 read_unlock_bh(&state->idev->lock);
479 in6_dev_put(state->idev);
480 }
481 state->dev = state->dev->next;
482 if (!state->dev) {
483 state->idev = NULL;
484 break;
485 }
486 state->idev = in6_dev_get(state->dev);
487 if (!state->idev)
488 continue;
489 read_lock_bh(&state->idev->lock);
490 im = state->idev->ac_list;
491 }
492 return im;
493}
494
495static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
496{
497 struct ifacaddr6 *im = ac6_get_first(seq);
498 if (im)
499 while (pos && (im = ac6_get_next(seq, im)) != NULL)
500 --pos;
501 return pos ? NULL : im;
502}
503
504static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
505{
506 read_lock(&dev_base_lock);
507 return ac6_get_idx(seq, *pos);
508}
509
510static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
511{
512 struct ifacaddr6 *im;
513 im = ac6_get_next(seq, v);
514 ++*pos;
515 return im;
516}
517
518static void ac6_seq_stop(struct seq_file *seq, void *v)
519{
520 struct ac6_iter_state *state = ac6_seq_private(seq);
521 if (likely(state->idev != NULL)) {
522 read_unlock_bh(&state->idev->lock);
523 in6_dev_put(state->idev);
524 }
525 read_unlock(&dev_base_lock);
526}
527
528static int ac6_seq_show(struct seq_file *seq, void *v)
529{
530 struct ifacaddr6 *im = (struct ifacaddr6 *)v;
531 struct ac6_iter_state *state = ac6_seq_private(seq);
532
533 seq_printf(seq,
534 "%-4d %-15s "
535 "%04x%04x%04x%04x%04x%04x%04x%04x "
536 "%5d\n",
537 state->dev->ifindex, state->dev->name,
538 NIP6(im->aca_addr),
539 im->aca_users);
540 return 0;
541}
542
543static struct seq_operations ac6_seq_ops = {
544 .start = ac6_seq_start,
545 .next = ac6_seq_next,
546 .stop = ac6_seq_stop,
547 .show = ac6_seq_show,
548};
549
550static int ac6_seq_open(struct inode *inode, struct file *file)
551{
552 struct seq_file *seq;
553 int rc = -ENOMEM;
554 struct ac6_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
555
556 if (!s)
557 goto out;
558
559 rc = seq_open(file, &ac6_seq_ops);
560 if (rc)
561 goto out_kfree;
562
563 seq = file->private_data;
564 seq->private = s;
565 memset(s, 0, sizeof(*s));
566out:
567 return rc;
568out_kfree:
569 kfree(s);
570 goto out;
571}
572
573static struct file_operations ac6_seq_fops = {
574 .owner = THIS_MODULE,
575 .open = ac6_seq_open,
576 .read = seq_read,
577 .llseek = seq_lseek,
578 .release = seq_release_private,
579};
580
581int __init ac6_proc_init(void)
582{
583 if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops))
584 return -ENOMEM;
585
586 return 0;
587}
588
589void ac6_proc_exit(void)
590{
591 proc_net_remove("anycast6");
592}
593#endif
594
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
new file mode 100644
index 000000000000..65b9375df57d
--- /dev/null
+++ b/net/ipv6/datagram.c
@@ -0,0 +1,600 @@
1/*
2 * common UDP/RAW code
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: datagram.c,v 1.24 2002/02/01 22:01:04 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/errno.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
19#include <linux/sched.h>
20#include <linux/interrupt.h>
21#include <linux/socket.h>
22#include <linux/sockios.h>
23#include <linux/in6.h>
24#include <linux/ipv6.h>
25#include <linux/route.h>
26
27#include <net/ipv6.h>
28#include <net/ndisc.h>
29#include <net/addrconf.h>
30#include <net/transp_v6.h>
31#include <net/ip6_route.h>
32
33#include <linux/errqueue.h>
34#include <asm/uaccess.h>
35
36int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
37{
38 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
39 struct inet_sock *inet = inet_sk(sk);
40 struct ipv6_pinfo *np = inet6_sk(sk);
41 struct in6_addr *daddr, *final_p = NULL, final;
42 struct dst_entry *dst;
43 struct flowi fl;
44 struct ip6_flowlabel *flowlabel = NULL;
45 int addr_type;
46 int err;
47
48 if (usin->sin6_family == AF_INET) {
49 if (__ipv6_only_sock(sk))
50 return -EAFNOSUPPORT;
51 err = ip4_datagram_connect(sk, uaddr, addr_len);
52 goto ipv4_connected;
53 }
54
55 if (addr_len < SIN6_LEN_RFC2133)
56 return -EINVAL;
57
58 if (usin->sin6_family != AF_INET6)
59 return -EAFNOSUPPORT;
60
61 memset(&fl, 0, sizeof(fl));
62 if (np->sndflow) {
63 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
64 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
65 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
66 if (flowlabel == NULL)
67 return -EINVAL;
68 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
69 }
70 }
71
72 addr_type = ipv6_addr_type(&usin->sin6_addr);
73
74 if (addr_type == IPV6_ADDR_ANY) {
75 /*
76 * connect to self
77 */
78 usin->sin6_addr.s6_addr[15] = 0x01;
79 }
80
81 daddr = &usin->sin6_addr;
82
83 if (addr_type == IPV6_ADDR_MAPPED) {
84 struct sockaddr_in sin;
85
86 if (__ipv6_only_sock(sk)) {
87 err = -ENETUNREACH;
88 goto out;
89 }
90 sin.sin_family = AF_INET;
91 sin.sin_addr.s_addr = daddr->s6_addr32[3];
92 sin.sin_port = usin->sin6_port;
93
94 err = ip4_datagram_connect(sk,
95 (struct sockaddr*) &sin,
96 sizeof(sin));
97
98ipv4_connected:
99 if (err)
100 goto out;
101
102 ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000ffff), inet->daddr);
103
104 if (ipv6_addr_any(&np->saddr)) {
105 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000ffff),
106 inet->saddr);
107 }
108
109 if (ipv6_addr_any(&np->rcv_saddr)) {
110 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000ffff),
111 inet->rcv_saddr);
112 }
113 goto out;
114 }
115
116 if (addr_type&IPV6_ADDR_LINKLOCAL) {
117 if (addr_len >= sizeof(struct sockaddr_in6) &&
118 usin->sin6_scope_id) {
119 if (sk->sk_bound_dev_if &&
120 sk->sk_bound_dev_if != usin->sin6_scope_id) {
121 err = -EINVAL;
122 goto out;
123 }
124 sk->sk_bound_dev_if = usin->sin6_scope_id;
125 if (!sk->sk_bound_dev_if &&
126 (addr_type & IPV6_ADDR_MULTICAST))
127 fl.oif = np->mcast_oif;
128 }
129
130 /* Connect to link-local address requires an interface */
131 if (!sk->sk_bound_dev_if) {
132 err = -EINVAL;
133 goto out;
134 }
135 }
136
137 ipv6_addr_copy(&np->daddr, daddr);
138 np->flow_label = fl.fl6_flowlabel;
139
140 inet->dport = usin->sin6_port;
141
142 /*
143 * Check for a route to destination an obtain the
144 * destination cache for it.
145 */
146
147 fl.proto = sk->sk_protocol;
148 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
149 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
150 fl.oif = sk->sk_bound_dev_if;
151 fl.fl_ip_dport = inet->dport;
152 fl.fl_ip_sport = inet->sport;
153
154 if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
155 fl.oif = np->mcast_oif;
156
157 if (flowlabel) {
158 if (flowlabel->opt && flowlabel->opt->srcrt) {
159 struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
160 ipv6_addr_copy(&final, &fl.fl6_dst);
161 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
162 final_p = &final;
163 }
164 } else if (np->opt && np->opt->srcrt) {
165 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
166 ipv6_addr_copy(&final, &fl.fl6_dst);
167 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
168 final_p = &final;
169 }
170
171 err = ip6_dst_lookup(sk, &dst, &fl);
172 if (err)
173 goto out;
174 if (final_p)
175 ipv6_addr_copy(&fl.fl6_dst, final_p);
176
177 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
178 dst_release(dst);
179 goto out;
180 }
181
182 /* source address lookup done in ip6_dst_lookup */
183
184 if (ipv6_addr_any(&np->saddr))
185 ipv6_addr_copy(&np->saddr, &fl.fl6_src);
186
187 if (ipv6_addr_any(&np->rcv_saddr)) {
188 ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
189 inet->rcv_saddr = LOOPBACK4_IPV6;
190 }
191
192 ip6_dst_store(sk, dst,
193 ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
194 &np->daddr : NULL);
195
196 sk->sk_state = TCP_ESTABLISHED;
197out:
198 fl6_sock_release(flowlabel);
199 return err;
200}
201
202void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
203 u16 port, u32 info, u8 *payload)
204{
205 struct ipv6_pinfo *np = inet6_sk(sk);
206 struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw;
207 struct sock_exterr_skb *serr;
208
209 if (!np->recverr)
210 return;
211
212 skb = skb_clone(skb, GFP_ATOMIC);
213 if (!skb)
214 return;
215
216 serr = SKB_EXT_ERR(skb);
217 serr->ee.ee_errno = err;
218 serr->ee.ee_origin = SO_EE_ORIGIN_ICMP6;
219 serr->ee.ee_type = icmph->icmp6_type;
220 serr->ee.ee_code = icmph->icmp6_code;
221 serr->ee.ee_pad = 0;
222 serr->ee.ee_info = info;
223 serr->ee.ee_data = 0;
224 serr->addr_offset = (u8*)&(((struct ipv6hdr*)(icmph+1))->daddr) - skb->nh.raw;
225 serr->port = port;
226
227 skb->h.raw = payload;
228 __skb_pull(skb, payload - skb->data);
229
230 if (sock_queue_err_skb(sk, skb))
231 kfree_skb(skb);
232}
233
234void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
235{
236 struct ipv6_pinfo *np = inet6_sk(sk);
237 struct sock_exterr_skb *serr;
238 struct ipv6hdr *iph;
239 struct sk_buff *skb;
240
241 if (!np->recverr)
242 return;
243
244 skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
245 if (!skb)
246 return;
247
248 iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr));
249 skb->nh.ipv6h = iph;
250 ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
251
252 serr = SKB_EXT_ERR(skb);
253 serr->ee.ee_errno = err;
254 serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
255 serr->ee.ee_type = 0;
256 serr->ee.ee_code = 0;
257 serr->ee.ee_pad = 0;
258 serr->ee.ee_info = info;
259 serr->ee.ee_data = 0;
260 serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
261 serr->port = fl->fl_ip_dport;
262
263 skb->h.raw = skb->tail;
264 __skb_pull(skb, skb->tail - skb->data);
265
266 if (sock_queue_err_skb(sk, skb))
267 kfree_skb(skb);
268}
269
270/*
271 * Handle MSG_ERRQUEUE
272 */
273int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
274{
275 struct ipv6_pinfo *np = inet6_sk(sk);
276 struct sock_exterr_skb *serr;
277 struct sk_buff *skb, *skb2;
278 struct sockaddr_in6 *sin;
279 struct {
280 struct sock_extended_err ee;
281 struct sockaddr_in6 offender;
282 } errhdr;
283 int err;
284 int copied;
285
286 err = -EAGAIN;
287 skb = skb_dequeue(&sk->sk_error_queue);
288 if (skb == NULL)
289 goto out;
290
291 copied = skb->len;
292 if (copied > len) {
293 msg->msg_flags |= MSG_TRUNC;
294 copied = len;
295 }
296 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
297 if (err)
298 goto out_free_skb;
299
300 sock_recv_timestamp(msg, sk, skb);
301
302 serr = SKB_EXT_ERR(skb);
303
304 sin = (struct sockaddr_in6 *)msg->msg_name;
305 if (sin) {
306 sin->sin6_family = AF_INET6;
307 sin->sin6_flowinfo = 0;
308 sin->sin6_port = serr->port;
309 sin->sin6_scope_id = 0;
310 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
311 ipv6_addr_copy(&sin->sin6_addr,
312 (struct in6_addr *)(skb->nh.raw + serr->addr_offset));
313 if (np->sndflow)
314 sin->sin6_flowinfo = *(u32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK;
315 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
316 sin->sin6_scope_id = IP6CB(skb)->iif;
317 } else {
318 ipv6_addr_set(&sin->sin6_addr, 0, 0,
319 htonl(0xffff),
320 *(u32*)(skb->nh.raw + serr->addr_offset));
321 }
322 }
323
324 memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
325 sin = &errhdr.offender;
326 sin->sin6_family = AF_UNSPEC;
327 if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
328 sin->sin6_family = AF_INET6;
329 sin->sin6_flowinfo = 0;
330 sin->sin6_scope_id = 0;
331 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
332 ipv6_addr_copy(&sin->sin6_addr, &skb->nh.ipv6h->saddr);
333 if (np->rxopt.all)
334 datagram_recv_ctl(sk, msg, skb);
335 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
336 sin->sin6_scope_id = IP6CB(skb)->iif;
337 } else {
338 struct inet_sock *inet = inet_sk(sk);
339
340 ipv6_addr_set(&sin->sin6_addr, 0, 0,
341 htonl(0xffff),
342 skb->nh.iph->saddr);
343 if (inet->cmsg_flags)
344 ip_cmsg_recv(msg, skb);
345 }
346 }
347
348 put_cmsg(msg, SOL_IPV6, IPV6_RECVERR, sizeof(errhdr), &errhdr);
349
350 /* Now we could try to dump offended packet options */
351
352 msg->msg_flags |= MSG_ERRQUEUE;
353 err = copied;
354
355 /* Reset and regenerate socket error */
356 spin_lock_irq(&sk->sk_error_queue.lock);
357 sk->sk_err = 0;
358 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
359 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
360 spin_unlock_irq(&sk->sk_error_queue.lock);
361 sk->sk_error_report(sk);
362 } else {
363 spin_unlock_irq(&sk->sk_error_queue.lock);
364 }
365
366out_free_skb:
367 kfree_skb(skb);
368out:
369 return err;
370}
371
372
373
374int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
375{
376 struct ipv6_pinfo *np = inet6_sk(sk);
377 struct inet6_skb_parm *opt = IP6CB(skb);
378
379 if (np->rxopt.bits.rxinfo) {
380 struct in6_pktinfo src_info;
381
382 src_info.ipi6_ifindex = opt->iif;
383 ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
384 put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
385 }
386
387 if (np->rxopt.bits.rxhlim) {
388 int hlim = skb->nh.ipv6h->hop_limit;
389 put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
390 }
391
392 if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) {
393 u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
394 put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
395 }
396 if (np->rxopt.bits.hopopts && opt->hop) {
397 u8 *ptr = skb->nh.raw + opt->hop;
398 put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
399 }
400 if (np->rxopt.bits.dstopts && opt->dst0) {
401 u8 *ptr = skb->nh.raw + opt->dst0;
402 put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr);
403 }
404 if (np->rxopt.bits.srcrt && opt->srcrt) {
405 struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt);
406 put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
407 }
408 if (np->rxopt.bits.dstopts && opt->dst1) {
409 u8 *ptr = skb->nh.raw + opt->dst1;
410 put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr);
411 }
412 return 0;
413}
414
415int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
416 struct ipv6_txoptions *opt,
417 int *hlimit)
418{
419 struct in6_pktinfo *src_info;
420 struct cmsghdr *cmsg;
421 struct ipv6_rt_hdr *rthdr;
422 struct ipv6_opt_hdr *hdr;
423 int len;
424 int err = 0;
425
426 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
427 int addr_type;
428 struct net_device *dev = NULL;
429
430 if (!CMSG_OK(msg, cmsg)) {
431 err = -EINVAL;
432 goto exit_f;
433 }
434
435 if (cmsg->cmsg_level != SOL_IPV6)
436 continue;
437
438 switch (cmsg->cmsg_type) {
439 case IPV6_PKTINFO:
440 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
441 err = -EINVAL;
442 goto exit_f;
443 }
444
445 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
446
447 if (src_info->ipi6_ifindex) {
448 if (fl->oif && src_info->ipi6_ifindex != fl->oif)
449 return -EINVAL;
450 fl->oif = src_info->ipi6_ifindex;
451 }
452
453 addr_type = ipv6_addr_type(&src_info->ipi6_addr);
454
455 if (addr_type == IPV6_ADDR_ANY)
456 break;
457
458 if (addr_type & IPV6_ADDR_LINKLOCAL) {
459 if (!src_info->ipi6_ifindex)
460 return -EINVAL;
461 else {
462 dev = dev_get_by_index(src_info->ipi6_ifindex);
463 if (!dev)
464 return -ENODEV;
465 }
466 }
467 if (!ipv6_chk_addr(&src_info->ipi6_addr, dev, 0)) {
468 if (dev)
469 dev_put(dev);
470 err = -EINVAL;
471 goto exit_f;
472 }
473 if (dev)
474 dev_put(dev);
475
476 ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr);
477 break;
478
479 case IPV6_FLOWINFO:
480 if (cmsg->cmsg_len < CMSG_LEN(4)) {
481 err = -EINVAL;
482 goto exit_f;
483 }
484
485 if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) {
486 if ((fl->fl6_flowlabel^*(u32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
487 err = -EINVAL;
488 goto exit_f;
489 }
490 }
491 fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(u32 *)CMSG_DATA(cmsg);
492 break;
493
494 case IPV6_HOPOPTS:
495 if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
496 err = -EINVAL;
497 goto exit_f;
498 }
499
500 hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
501 len = ((hdr->hdrlen + 1) << 3);
502 if (cmsg->cmsg_len < CMSG_LEN(len)) {
503 err = -EINVAL;
504 goto exit_f;
505 }
506 if (!capable(CAP_NET_RAW)) {
507 err = -EPERM;
508 goto exit_f;
509 }
510 opt->opt_nflen += len;
511 opt->hopopt = hdr;
512 break;
513
514 case IPV6_DSTOPTS:
515 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
516 err = -EINVAL;
517 goto exit_f;
518 }
519
520 hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
521 len = ((hdr->hdrlen + 1) << 3);
522 if (cmsg->cmsg_len < CMSG_LEN(len)) {
523 err = -EINVAL;
524 goto exit_f;
525 }
526 if (!capable(CAP_NET_RAW)) {
527 err = -EPERM;
528 goto exit_f;
529 }
530 if (opt->dst1opt) {
531 err = -EINVAL;
532 goto exit_f;
533 }
534 opt->opt_flen += len;
535 opt->dst1opt = hdr;
536 break;
537
538 case IPV6_RTHDR:
539 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) {
540 err = -EINVAL;
541 goto exit_f;
542 }
543
544 rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
545
546 /*
547 * TYPE 0
548 */
549 if (rthdr->type) {
550 err = -EINVAL;
551 goto exit_f;
552 }
553
554 len = ((rthdr->hdrlen + 1) << 3);
555
556 if (cmsg->cmsg_len < CMSG_LEN(len)) {
557 err = -EINVAL;
558 goto exit_f;
559 }
560
561 /* segments left must also match */
562 if ((rthdr->hdrlen >> 1) != rthdr->segments_left) {
563 err = -EINVAL;
564 goto exit_f;
565 }
566
567 opt->opt_nflen += len;
568 opt->srcrt = rthdr;
569
570 if (opt->dst1opt) {
571 int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3);
572
573 opt->opt_nflen += dsthdrlen;
574 opt->dst0opt = opt->dst1opt;
575 opt->dst1opt = NULL;
576 opt->opt_flen -= dsthdrlen;
577 }
578
579 break;
580
581 case IPV6_HOPLIMIT:
582 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
583 err = -EINVAL;
584 goto exit_f;
585 }
586
587 *hlimit = *(int *)CMSG_DATA(cmsg);
588 break;
589
590 default:
591 LIMIT_NETDEBUG(
592 printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type));
593 err = -EINVAL;
594 break;
595 };
596 }
597
598exit_f:
599 return err;
600}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
new file mode 100644
index 000000000000..be7095d6babe
--- /dev/null
+++ b/net/ipv6/esp6.c
@@ -0,0 +1,424 @@
1/*
2 * Copyright (C)2002 USAGI/WIDE Project
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Authors
19 *
20 * Mitsuru KANDA @USAGI : IPv6 Support
21 * Kazunori MIYAZAWA @USAGI :
22 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
23 *
24 * This file is derived from net/ipv4/esp.c
25 */
26
27#include <linux/config.h>
28#include <linux/module.h>
29#include <net/ip.h>
30#include <net/xfrm.h>
31#include <net/esp.h>
32#include <asm/scatterlist.h>
33#include <linux/crypto.h>
34#include <linux/pfkeyv2.h>
35#include <linux/random.h>
36#include <net/icmp.h>
37#include <net/ipv6.h>
38#include <linux/icmpv6.h>
39
40static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
41{
42 int err;
43 int hdr_len;
44 struct ipv6hdr *top_iph;
45 struct ipv6_esp_hdr *esph;
46 struct crypto_tfm *tfm;
47 struct esp_data *esp;
48 struct sk_buff *trailer;
49 int blksize;
50 int clen;
51 int alen;
52 int nfrags;
53
54 esp = x->data;
55 hdr_len = skb->h.raw - skb->data +
56 sizeof(*esph) + esp->conf.ivlen;
57
58 /* Strip IP+ESP header. */
59 __skb_pull(skb, hdr_len);
60
61 /* Now skb is pure payload to encrypt */
62 err = -ENOMEM;
63
64 /* Round to block size */
65 clen = skb->len;
66
67 alen = esp->auth.icv_trunc_len;
68 tfm = esp->conf.tfm;
69 blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3;
70 clen = (clen + 2 + blksize-1)&~(blksize-1);
71 if (esp->conf.padlen)
72 clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);
73
74 if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) {
75 goto error;
76 }
77
78 /* Fill padding... */
79 do {
80 int i;
81 for (i=0; i<clen-skb->len - 2; i++)
82 *(u8*)(trailer->tail + i) = i+1;
83 } while (0);
84 *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
85 pskb_put(skb, trailer, clen - skb->len);
86
87 top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
88 esph = (struct ipv6_esp_hdr *)skb->h.raw;
89 top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
90 *(u8*)(trailer->tail - 1) = *skb->nh.raw;
91 *skb->nh.raw = IPPROTO_ESP;
92
93 esph->spi = x->id.spi;
94 esph->seq_no = htonl(++x->replay.oseq);
95
96 if (esp->conf.ivlen)
97 crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
98
99 do {
100 struct scatterlist *sg = &esp->sgbuf[0];
101
102 if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
103 sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
104 if (!sg)
105 goto error;
106 }
107 skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
108 crypto_cipher_encrypt(tfm, sg, sg, clen);
109 if (unlikely(sg != &esp->sgbuf[0]))
110 kfree(sg);
111 } while (0);
112
113 if (esp->conf.ivlen) {
114 memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
115 crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
116 }
117
118 if (esp->auth.icv_full_len) {
119 esp->auth.icv(esp, skb, (u8*)esph-skb->data,
120 sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
121 pskb_put(skb, trailer, alen);
122 }
123
124 err = 0;
125
126error:
127 return err;
128}
129
130static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
131{
132 struct ipv6hdr *iph;
133 struct ipv6_esp_hdr *esph;
134 struct esp_data *esp = x->data;
135 struct sk_buff *trailer;
136 int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
137 int alen = esp->auth.icv_trunc_len;
138 int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
139
140 int hdr_len = skb->h.raw - skb->nh.raw;
141 int nfrags;
142 unsigned char *tmp_hdr = NULL;
143 int ret = 0;
144
145 if (!pskb_may_pull(skb, sizeof(struct ipv6_esp_hdr))) {
146 ret = -EINVAL;
147 goto out_nofree;
148 }
149
150 if (elen <= 0 || (elen & (blksize-1))) {
151 ret = -EINVAL;
152 goto out_nofree;
153 }
154
155 tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
156 if (!tmp_hdr) {
157 ret = -ENOMEM;
158 goto out_nofree;
159 }
160 memcpy(tmp_hdr, skb->nh.raw, hdr_len);
161
162 /* If integrity check is required, do this. */
163 if (esp->auth.icv_full_len) {
164 u8 sum[esp->auth.icv_full_len];
165 u8 sum1[alen];
166
167 esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
168
169 if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
170 BUG();
171
172 if (unlikely(memcmp(sum, sum1, alen))) {
173 x->stats.integrity_failed++;
174 ret = -EINVAL;
175 goto out;
176 }
177 }
178
179 if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) {
180 ret = -EINVAL;
181 goto out;
182 }
183
184 skb->ip_summed = CHECKSUM_NONE;
185
186 esph = (struct ipv6_esp_hdr*)skb->data;
187 iph = skb->nh.ipv6h;
188
189 /* Get ivec. This can be wrong, check against another impls. */
190 if (esp->conf.ivlen)
191 crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
192
193 {
194 u8 nexthdr[2];
195 struct scatterlist *sg = &esp->sgbuf[0];
196 u8 padlen;
197
198 if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
199 sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
200 if (!sg) {
201 ret = -ENOMEM;
202 goto out;
203 }
204 }
205 skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen);
206 crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
207 if (unlikely(sg != &esp->sgbuf[0]))
208 kfree(sg);
209
210 if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
211 BUG();
212
213 padlen = nexthdr[0];
214 if (padlen+2 >= elen) {
215 LIMIT_NETDEBUG(
216 printk(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen));
217 ret = -EINVAL;
218 goto out;
219 }
220 /* ... check padding bits here. Silly. :-) */
221
222 pskb_trim(skb, skb->len - alen - padlen - 2);
223 skb->h.raw = skb_pull(skb, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen);
224 skb->nh.raw += sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
225 memcpy(skb->nh.raw, tmp_hdr, hdr_len);
226 skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
227 ret = nexthdr[1];
228 }
229
230out:
231 kfree(tmp_hdr);
232out_nofree:
233 return ret;
234}
235
236static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
237{
238 struct esp_data *esp = x->data;
239 u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
240
241 if (x->props.mode) {
242 mtu = (mtu + 2 + blksize-1)&~(blksize-1);
243 } else {
244 /* The worst case. */
245 mtu += 2 + blksize;
246 }
247 if (esp->conf.padlen)
248 mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1);
249
250 return mtu + x->props.header_len + esp->auth.icv_full_len;
251}
252
253static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
254 int type, int code, int offset, __u32 info)
255{
256 struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
257 struct ipv6_esp_hdr *esph = (struct ipv6_esp_hdr*)(skb->data+offset);
258 struct xfrm_state *x;
259
260 if (type != ICMPV6_DEST_UNREACH &&
261 type != ICMPV6_PKT_TOOBIG)
262 return;
263
264 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
265 if (!x)
266 return;
267 printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/"
268 "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
269 ntohl(esph->spi), NIP6(iph->daddr));
270 xfrm_state_put(x);
271}
272
273static void esp6_destroy(struct xfrm_state *x)
274{
275 struct esp_data *esp = x->data;
276
277 if (!esp)
278 return;
279
280 if (esp->conf.tfm) {
281 crypto_free_tfm(esp->conf.tfm);
282 esp->conf.tfm = NULL;
283 }
284 if (esp->conf.ivec) {
285 kfree(esp->conf.ivec);
286 esp->conf.ivec = NULL;
287 }
288 if (esp->auth.tfm) {
289 crypto_free_tfm(esp->auth.tfm);
290 esp->auth.tfm = NULL;
291 }
292 if (esp->auth.work_icv) {
293 kfree(esp->auth.work_icv);
294 esp->auth.work_icv = NULL;
295 }
296 kfree(esp);
297}
298
299static int esp6_init_state(struct xfrm_state *x, void *args)
300{
301 struct esp_data *esp = NULL;
302
303 /* null auth and encryption can have zero length keys */
304 if (x->aalg) {
305 if (x->aalg->alg_key_len > 512)
306 goto error;
307 }
308 if (x->ealg == NULL)
309 goto error;
310
311 if (x->encap)
312 goto error;
313
314 esp = kmalloc(sizeof(*esp), GFP_KERNEL);
315 if (esp == NULL)
316 return -ENOMEM;
317
318 memset(esp, 0, sizeof(*esp));
319
320 if (x->aalg) {
321 struct xfrm_algo_desc *aalg_desc;
322
323 esp->auth.key = x->aalg->alg_key;
324 esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
325 esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
326 if (esp->auth.tfm == NULL)
327 goto error;
328 esp->auth.icv = esp_hmac_digest;
329
330 aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
331 BUG_ON(!aalg_desc);
332
333 if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
334 crypto_tfm_alg_digestsize(esp->auth.tfm)) {
335 printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
336 x->aalg->alg_name,
337 crypto_tfm_alg_digestsize(esp->auth.tfm),
338 aalg_desc->uinfo.auth.icv_fullbits/8);
339 goto error;
340 }
341
342 esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
343 esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
344
345 esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL);
346 if (!esp->auth.work_icv)
347 goto error;
348 }
349 esp->conf.key = x->ealg->alg_key;
350 esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
351 if (x->props.ealgo == SADB_EALG_NULL)
352 esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
353 else
354 esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
355 if (esp->conf.tfm == NULL)
356 goto error;
357 esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
358 esp->conf.padlen = 0;
359 if (esp->conf.ivlen) {
360 esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
361 if (unlikely(esp->conf.ivec == NULL))
362 goto error;
363 get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
364 }
365 if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len))
366 goto error;
367 x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
368 if (x->props.mode)
369 x->props.header_len += sizeof(struct ipv6hdr);
370 x->data = esp;
371 return 0;
372
373error:
374 x->data = esp;
375 esp6_destroy(x);
376 x->data = NULL;
377 return -EINVAL;
378}
379
380static struct xfrm_type esp6_type =
381{
382 .description = "ESP6",
383 .owner = THIS_MODULE,
384 .proto = IPPROTO_ESP,
385 .init_state = esp6_init_state,
386 .destructor = esp6_destroy,
387 .get_max_size = esp6_get_max_size,
388 .input = esp6_input,
389 .output = esp6_output
390};
391
392static struct inet6_protocol esp6_protocol = {
393 .handler = xfrm6_rcv,
394 .err_handler = esp6_err,
395 .flags = INET6_PROTO_NOPOLICY,
396};
397
398static int __init esp6_init(void)
399{
400 if (xfrm_register_type(&esp6_type, AF_INET6) < 0) {
401 printk(KERN_INFO "ipv6 esp init: can't add xfrm type\n");
402 return -EAGAIN;
403 }
404 if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
405 printk(KERN_INFO "ipv6 esp init: can't add protocol\n");
406 xfrm_unregister_type(&esp6_type, AF_INET6);
407 return -EAGAIN;
408 }
409
410 return 0;
411}
412
413static void __exit esp6_fini(void)
414{
415 if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
416 printk(KERN_INFO "ipv6 esp close: can't remove protocol\n");
417 if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
418 printk(KERN_INFO "ipv6 esp close: can't remove xfrm type\n");
419}
420
421module_init(esp6_init);
422module_exit(esp6_fini);
423
424MODULE_LICENSE("GPL");
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
new file mode 100644
index 000000000000..e0839eafc3a9
--- /dev/null
+++ b/net/ipv6/exthdrs.c
@@ -0,0 +1,575 @@
1/*
2 * Extension Header handling for IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Andi Kleen <ak@muc.de>
8 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
9 *
10 * $Id: exthdrs.c,v 1.13 2001/06/19 15:58:56 davem Exp $
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18/* Changes:
19 * yoshfuji : ensure not to overrun while parsing
20 * tlv options.
21 * Mitsuru KANDA @USAGI and: Remove ipv6_parse_exthdrs().
22 * YOSHIFUJI Hideaki @USAGI Register inbound extension header
23 * handlers as inet6_protocol{}.
24 */
25
26#include <linux/errno.h>
27#include <linux/types.h>
28#include <linux/socket.h>
29#include <linux/sockios.h>
30#include <linux/sched.h>
31#include <linux/net.h>
32#include <linux/netdevice.h>
33#include <linux/in6.h>
34#include <linux/icmpv6.h>
35
36#include <net/sock.h>
37#include <net/snmp.h>
38
39#include <net/ipv6.h>
40#include <net/protocol.h>
41#include <net/transp_v6.h>
42#include <net/rawv6.h>
43#include <net/ndisc.h>
44#include <net/ip6_route.h>
45#include <net/addrconf.h>
46
47#include <asm/uaccess.h>
48
49/*
50 * Parsing tlv encoded headers.
51 *
52 * Parsing function "func" returns 1, if parsing succeed
53 * and 0, if it failed.
54 * It MUST NOT touch skb->h.
55 */
56
57struct tlvtype_proc {
58 int type;
59 int (*func)(struct sk_buff *skb, int offset);
60};
61
62/*********************
63 Generic functions
64 *********************/
65
66/* An unknown option is detected, decide what to do */
67
68static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
69{
70 switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
71 case 0: /* ignore */
72 return 1;
73
74 case 1: /* drop packet */
75 break;
76
77 case 3: /* Send ICMP if not a multicast address and drop packet */
78 /* Actually, it is redundant check. icmp_send
79 will recheck in any case.
80 */
81 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
82 break;
83 case 2: /* send ICMP PARM PROB regardless and drop packet */
84 icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
85 return 0;
86 };
87
88 kfree_skb(skb);
89 return 0;
90}
91
92/* Parse tlv encoded option header (hop-by-hop or destination) */
93
94static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
95{
96 struct tlvtype_proc *curr;
97 int off = skb->h.raw - skb->nh.raw;
98 int len = ((skb->h.raw[1]+1)<<3);
99
100 if ((skb->h.raw + len) - skb->data > skb_headlen(skb))
101 goto bad;
102
103 off += 2;
104 len -= 2;
105
106 while (len > 0) {
107 int optlen = skb->nh.raw[off+1]+2;
108
109 switch (skb->nh.raw[off]) {
110 case IPV6_TLV_PAD0:
111 optlen = 1;
112 break;
113
114 case IPV6_TLV_PADN:
115 break;
116
117 default: /* Other TLV code so scan list */
118 if (optlen > len)
119 goto bad;
120 for (curr=procs; curr->type >= 0; curr++) {
121 if (curr->type == skb->nh.raw[off]) {
122 /* type specific length/alignment
123 checks will be performed in the
124 func(). */
125 if (curr->func(skb, off) == 0)
126 return 0;
127 break;
128 }
129 }
130 if (curr->type < 0) {
131 if (ip6_tlvopt_unknown(skb, off) == 0)
132 return 0;
133 }
134 break;
135 }
136 off += optlen;
137 len -= optlen;
138 }
139 if (len == 0)
140 return 1;
141bad:
142 kfree_skb(skb);
143 return 0;
144}
145
146/*****************************
147 Destination options header.
148 *****************************/
149
150static struct tlvtype_proc tlvprocdestopt_lst[] = {
151 /* No destination options are defined now */
152 {-1, NULL}
153};
154
155static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
156{
157 struct sk_buff *skb = *skbp;
158 struct inet6_skb_parm *opt = IP6CB(skb);
159
160 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
161 !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
162 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
163 kfree_skb(skb);
164 return -1;
165 }
166
167 opt->dst1 = skb->h.raw - skb->nh.raw;
168
169 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
170 skb->h.raw += ((skb->h.raw[1]+1)<<3);
171 *nhoffp = opt->dst1;
172 return 1;
173 }
174
175 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
176 return -1;
177}
178
179static struct inet6_protocol destopt_protocol = {
180 .handler = ipv6_destopt_rcv,
181 .flags = INET6_PROTO_NOPOLICY,
182};
183
184void __init ipv6_destopt_init(void)
185{
186 if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0)
187 printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n");
188}
189
190/********************************
191 NONE header. No data in packet.
192 ********************************/
193
194static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
195{
196 struct sk_buff *skb = *skbp;
197
198 kfree_skb(skb);
199 return 0;
200}
201
202static struct inet6_protocol nodata_protocol = {
203 .handler = ipv6_nodata_rcv,
204 .flags = INET6_PROTO_NOPOLICY,
205};
206
207void __init ipv6_nodata_init(void)
208{
209 if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0)
210 printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n");
211}
212
213/********************************
214 Routing header.
215 ********************************/
216
217static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
218{
219 struct sk_buff *skb = *skbp;
220 struct inet6_skb_parm *opt = IP6CB(skb);
221 struct in6_addr *addr;
222 struct in6_addr daddr;
223 int n, i;
224
225 struct ipv6_rt_hdr *hdr;
226 struct rt0_hdr *rthdr;
227
228 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
229 !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
230 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
231 kfree_skb(skb);
232 return -1;
233 }
234
235 hdr = (struct ipv6_rt_hdr *) skb->h.raw;
236
237 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) ||
238 skb->pkt_type != PACKET_HOST) {
239 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
240 kfree_skb(skb);
241 return -1;
242 }
243
244looped_back:
245 if (hdr->segments_left == 0) {
246 opt->srcrt = skb->h.raw - skb->nh.raw;
247 skb->h.raw += (hdr->hdrlen + 1) << 3;
248 opt->dst0 = opt->dst1;
249 opt->dst1 = 0;
250 *nhoffp = (&hdr->nexthdr) - skb->nh.raw;
251 return 1;
252 }
253
254 if (hdr->type != IPV6_SRCRT_TYPE_0) {
255 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
256 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
257 return -1;
258 }
259
260 if (hdr->hdrlen & 0x01) {
261 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
262 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
263 return -1;
264 }
265
266 /*
267 * This is the routing header forwarding algorithm from
268 * RFC 2460, page 16.
269 */
270
271 n = hdr->hdrlen >> 1;
272
273 if (hdr->segments_left > n) {
274 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
275 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw);
276 return -1;
277 }
278
279 /* We are about to mangle packet header. Be careful!
280 Do not damage packets queued somewhere.
281 */
282 if (skb_cloned(skb)) {
283 struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
284 kfree_skb(skb);
285 /* the copy is a forwarded packet */
286 if (skb2 == NULL) {
287 IP6_INC_STATS_BH(IPSTATS_MIB_OUTDISCARDS);
288 return -1;
289 }
290 *skbp = skb = skb2;
291 opt = IP6CB(skb2);
292 hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
293 }
294
295 if (skb->ip_summed == CHECKSUM_HW)
296 skb->ip_summed = CHECKSUM_NONE;
297
298 i = n - --hdr->segments_left;
299
300 rthdr = (struct rt0_hdr *) hdr;
301 addr = rthdr->addr;
302 addr += i - 1;
303
304 if (ipv6_addr_is_multicast(addr)) {
305 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
306 kfree_skb(skb);
307 return -1;
308 }
309
310 ipv6_addr_copy(&daddr, addr);
311 ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr);
312 ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr);
313
314 dst_release(xchg(&skb->dst, NULL));
315 ip6_route_input(skb);
316 if (skb->dst->error) {
317 skb_push(skb, skb->data - skb->nh.raw);
318 dst_input(skb);
319 return -1;
320 }
321
322 if (skb->dst->dev->flags&IFF_LOOPBACK) {
323 if (skb->nh.ipv6h->hop_limit <= 1) {
324 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
325 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
326 0, skb->dev);
327 kfree_skb(skb);
328 return -1;
329 }
330 skb->nh.ipv6h->hop_limit--;
331 goto looped_back;
332 }
333
334 skb_push(skb, skb->data - skb->nh.raw);
335 dst_input(skb);
336 return -1;
337}
338
339static struct inet6_protocol rthdr_protocol = {
340 .handler = ipv6_rthdr_rcv,
341 .flags = INET6_PROTO_NOPOLICY,
342};
343
344void __init ipv6_rthdr_init(void)
345{
346 if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0)
347 printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n");
348};
349
350/*
351 This function inverts received rthdr.
352 NOTE: specs allow to make it automatically only if
353 packet authenticated.
354
355 I will not discuss it here (though, I am really pissed off at
356 this stupid requirement making rthdr idea useless)
357
358 Actually, it creates severe problems for us.
359 Embryonic requests has no associated sockets,
360 so that user have no control over it and
361 cannot not only to set reply options, but
362 even to know, that someone wants to connect
363 without success. :-(
364
365 For now we need to test the engine, so that I created
366 temporary (or permanent) backdoor.
367 If listening socket set IPV6_RTHDR to 2, then we invert header.
368 --ANK (980729)
369 */
370
371struct ipv6_txoptions *
372ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
373{
374 /* Received rthdr:
375
376 [ H1 -> H2 -> ... H_prev ] daddr=ME
377
378 Inverted result:
379 [ H_prev -> ... -> H1 ] daddr =sender
380
381 Note, that IP output engine will rewrite this rthdr
382 by rotating it left by one addr.
383 */
384
385 int n, i;
386 struct rt0_hdr *rthdr = (struct rt0_hdr*)hdr;
387 struct rt0_hdr *irthdr;
388 struct ipv6_txoptions *opt;
389 int hdrlen = ipv6_optlen(hdr);
390
391 if (hdr->segments_left ||
392 hdr->type != IPV6_SRCRT_TYPE_0 ||
393 hdr->hdrlen & 0x01)
394 return NULL;
395
396 n = hdr->hdrlen >> 1;
397 opt = sock_kmalloc(sk, sizeof(*opt) + hdrlen, GFP_ATOMIC);
398 if (opt == NULL)
399 return NULL;
400 memset(opt, 0, sizeof(*opt));
401 opt->tot_len = sizeof(*opt) + hdrlen;
402 opt->srcrt = (void*)(opt+1);
403 opt->opt_nflen = hdrlen;
404
405 memcpy(opt->srcrt, hdr, sizeof(*hdr));
406 irthdr = (struct rt0_hdr*)opt->srcrt;
407 /* Obsolete field, MBZ, when originated by us */
408 irthdr->bitmap = 0;
409 opt->srcrt->segments_left = n;
410 for (i=0; i<n; i++)
411 memcpy(irthdr->addr+i, rthdr->addr+(n-1-i), 16);
412 return opt;
413}
414
415/**********************************
416 Hop-by-hop options.
417 **********************************/
418
419/* Router Alert as of RFC 2711 */
420
421static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
422{
423 if (skb->nh.raw[optoff+1] == 2) {
424 IP6CB(skb)->ra = optoff;
425 return 1;
426 }
427 LIMIT_NETDEBUG(
428 printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", skb->nh.raw[optoff+1]));
429 kfree_skb(skb);
430 return 0;
431}
432
433/* Jumbo payload */
434
435static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
436{
437 u32 pkt_len;
438
439 if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
440 LIMIT_NETDEBUG(
441 printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1]));
442 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
443 goto drop;
444 }
445
446 pkt_len = ntohl(*(u32*)(skb->nh.raw+optoff+2));
447 if (pkt_len <= IPV6_MAXPLEN) {
448 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
449 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
450 return 0;
451 }
452 if (skb->nh.ipv6h->payload_len) {
453 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
454 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
455 return 0;
456 }
457
458 if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
459 IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
460 goto drop;
461 }
462 if (pkt_len + sizeof(struct ipv6hdr) < skb->len) {
463 __pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr));
464 if (skb->ip_summed == CHECKSUM_HW)
465 skb->ip_summed = CHECKSUM_NONE;
466 }
467 return 1;
468
469drop:
470 kfree_skb(skb);
471 return 0;
472}
473
474static struct tlvtype_proc tlvprochopopt_lst[] = {
475 {
476 .type = IPV6_TLV_ROUTERALERT,
477 .func = ipv6_hop_ra,
478 },
479 {
480 .type = IPV6_TLV_JUMBO,
481 .func = ipv6_hop_jumbo,
482 },
483 { -1, }
484};
485
486int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff)
487{
488 IP6CB(skb)->hop = sizeof(struct ipv6hdr);
489 if (ip6_parse_tlv(tlvprochopopt_lst, skb))
490 return sizeof(struct ipv6hdr);
491 return -1;
492}
493
494/*
495 * Creating outbound headers.
496 *
497 * "build" functions work when skb is filled from head to tail (datagram)
498 * "push" functions work when headers are added from tail to head (tcp)
499 *
500 * In both cases we assume, that caller reserved enough room
501 * for headers.
502 */
503
504static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
505 struct ipv6_rt_hdr *opt,
506 struct in6_addr **addr_p)
507{
508 struct rt0_hdr *phdr, *ihdr;
509 int hops;
510
511 ihdr = (struct rt0_hdr *) opt;
512
513 phdr = (struct rt0_hdr *) skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
514 memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
515
516 hops = ihdr->rt_hdr.hdrlen >> 1;
517
518 if (hops > 1)
519 memcpy(phdr->addr, ihdr->addr + 1,
520 (hops - 1) * sizeof(struct in6_addr));
521
522 ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p);
523 *addr_p = ihdr->addr;
524
525 phdr->rt_hdr.nexthdr = *proto;
526 *proto = NEXTHDR_ROUTING;
527}
528
529static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
530{
531 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt));
532
533 memcpy(h, opt, ipv6_optlen(opt));
534 h->nexthdr = *proto;
535 *proto = type;
536}
537
538void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
539 u8 *proto,
540 struct in6_addr **daddr)
541{
542 if (opt->srcrt)
543 ipv6_push_rthdr(skb, proto, opt->srcrt, daddr);
544 if (opt->dst0opt)
545 ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt);
546 if (opt->hopopt)
547 ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
548}
549
550void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
551{
552 if (opt->dst1opt)
553 ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt);
554}
555
556struct ipv6_txoptions *
557ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
558{
559 struct ipv6_txoptions *opt2;
560
561 opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC);
562 if (opt2) {
563 long dif = (char*)opt2 - (char*)opt;
564 memcpy(opt2, opt, opt->tot_len);
565 if (opt2->hopopt)
566 *((char**)&opt2->hopopt) += dif;
567 if (opt2->dst0opt)
568 *((char**)&opt2->dst0opt) += dif;
569 if (opt2->dst1opt)
570 *((char**)&opt2->dst1opt) += dif;
571 if (opt2->srcrt)
572 *((char**)&opt2->srcrt) += dif;
573 }
574 return opt2;
575}
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
new file mode 100644
index 000000000000..6dda815c013f
--- /dev/null
+++ b/net/ipv6/exthdrs_core.c
@@ -0,0 +1,109 @@
1/*
2 * IPv6 library code, needed by static components when full IPv6 support is
3 * not configured or static.
4 */
5#include <net/ipv6.h>
6
7/*
8 * find out if nexthdr is a well-known extension header or a protocol
9 */
10
11int ipv6_ext_hdr(u8 nexthdr)
12{
13 /*
14 * find out if nexthdr is an extension header or a protocol
15 */
16 return ( (nexthdr == NEXTHDR_HOP) ||
17 (nexthdr == NEXTHDR_ROUTING) ||
18 (nexthdr == NEXTHDR_FRAGMENT) ||
19 (nexthdr == NEXTHDR_AUTH) ||
20 (nexthdr == NEXTHDR_NONE) ||
21 (nexthdr == NEXTHDR_DEST) );
22}
23
24/*
25 * Skip any extension headers. This is used by the ICMP module.
26 *
27 * Note that strictly speaking this conflicts with RFC 2460 4.0:
28 * ...The contents and semantics of each extension header determine whether
29 * or not to proceed to the next header. Therefore, extension headers must
30 * be processed strictly in the order they appear in the packet; a
31 * receiver must not, for example, scan through a packet looking for a
32 * particular kind of extension header and process that header prior to
33 * processing all preceding ones.
34 *
35 * We do exactly this. This is a protocol bug. We can't decide after a
36 * seeing an unknown discard-with-error flavour TLV option if it's a
37 * ICMP error message or not (errors should never be send in reply to
38 * ICMP error messages).
39 *
40 * But I see no other way to do this. This might need to be reexamined
41 * when Linux implements ESP (and maybe AUTH) headers.
42 * --AK
43 *
44 * This function parses (probably truncated) exthdr set "hdr"
45 * of length "len". "nexthdrp" initially points to some place,
46 * where type of the first header can be found.
47 *
48 * It skips all well-known exthdrs, and returns pointer to the start
49 * of unparsable area i.e. the first header with unknown type.
50 * If it is not NULL *nexthdr is updated by type/protocol of this header.
51 *
52 * NOTES: - if packet terminated with NEXTHDR_NONE it returns NULL.
53 * - it may return pointer pointing beyond end of packet,
54 * if the last recognized header is truncated in the middle.
55 * - if packet is truncated, so that all parsed headers are skipped,
56 * it returns NULL.
57 * - First fragment header is skipped, not-first ones
58 * are considered as unparsable.
59 * - ESP is unparsable for now and considered like
60 * normal payload protocol.
61 * - Note also special handling of AUTH header. Thanks to IPsec wizards.
62 *
63 * --ANK (980726)
64 */
65
66int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, int len)
67{
68 u8 nexthdr = *nexthdrp;
69
70 while (ipv6_ext_hdr(nexthdr)) {
71 struct ipv6_opt_hdr _hdr, *hp;
72 int hdrlen;
73
74 if (len < (int)sizeof(struct ipv6_opt_hdr))
75 return -1;
76 if (nexthdr == NEXTHDR_NONE)
77 return -1;
78 hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
79 if (hp == NULL)
80 BUG();
81 if (nexthdr == NEXTHDR_FRAGMENT) {
82 unsigned short _frag_off, *fp;
83 fp = skb_header_pointer(skb,
84 start+offsetof(struct frag_hdr,
85 frag_off),
86 sizeof(_frag_off),
87 &_frag_off);
88 if (fp == NULL)
89 return -1;
90
91 if (ntohs(*fp) & ~0x7)
92 break;
93 hdrlen = 8;
94 } else if (nexthdr == NEXTHDR_AUTH)
95 hdrlen = (hp->hdrlen+2)<<2;
96 else
97 hdrlen = ipv6_optlen(hp);
98
99 nexthdr = hp->nexthdr;
100 len -= hdrlen;
101 start += hdrlen;
102 }
103
104 *nexthdrp = nexthdr;
105 return start;
106}
107
108EXPORT_SYMBOL(ipv6_ext_hdr);
109EXPORT_SYMBOL(ipv6_skip_exthdr);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
new file mode 100644
index 000000000000..87b9082ceab2
--- /dev/null
+++ b/net/ipv6/icmp.c
@@ -0,0 +1,822 @@
1/*
2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
9 *
10 * Based on net/ipv4/icmp.c
11 *
12 * RFC 1885
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 */
19
20/*
21 * Changes:
22 *
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
27 * fragments.
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
29 * Randy Dunlap and
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
32 */
33
34#include <linux/module.h>
35#include <linux/errno.h>
36#include <linux/types.h>
37#include <linux/socket.h>
38#include <linux/in.h>
39#include <linux/kernel.h>
40#include <linux/sched.h>
41#include <linux/sockios.h>
42#include <linux/net.h>
43#include <linux/skbuff.h>
44#include <linux/init.h>
45
46#ifdef CONFIG_SYSCTL
47#include <linux/sysctl.h>
48#endif
49
50#include <linux/inet.h>
51#include <linux/netdevice.h>
52#include <linux/icmpv6.h>
53
54#include <net/ip.h>
55#include <net/sock.h>
56
57#include <net/ipv6.h>
58#include <net/ip6_checksum.h>
59#include <net/protocol.h>
60#include <net/raw.h>
61#include <net/rawv6.h>
62#include <net/transp_v6.h>
63#include <net/ip6_route.h>
64#include <net/addrconf.h>
65#include <net/icmp.h>
66
67#include <asm/uaccess.h>
68#include <asm/system.h>
69
70DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
71
72/*
73 * The ICMP socket(s). This is the most convenient way to flow control
74 * our ICMP output as well as maintain a clean interface throughout
75 * all layers. All Socketless IP sends will soon be gone.
76 *
77 * On SMP we have one ICMP socket per-cpu.
78 */
79static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
80#define icmpv6_socket __get_cpu_var(__icmpv6_socket)
81
82static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
83
84static struct inet6_protocol icmpv6_protocol = {
85 .handler = icmpv6_rcv,
86 .flags = INET6_PROTO_FINAL,
87};
88
89static __inline__ int icmpv6_xmit_lock(void)
90{
91 local_bh_disable();
92
93 if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
94 /* This can happen if the output path (f.e. SIT or
95 * ip6ip6 tunnel) signals dst_link_failure() for an
96 * outgoing ICMP6 packet.
97 */
98 local_bh_enable();
99 return 1;
100 }
101 return 0;
102}
103
104static __inline__ void icmpv6_xmit_unlock(void)
105{
106 spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
107}
108
109/*
110 * Slightly more convenient version of icmpv6_send.
111 */
112void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
113{
114 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
115 kfree_skb(skb);
116}
117
118/*
119 * Figure out, may we reply to this packet with icmp error.
120 *
121 * We do not reply, if:
122 * - it was icmp error message.
123 * - it is truncated, so that it is known, that protocol is ICMPV6
124 * (i.e. in the middle of some exthdr)
125 *
126 * --ANK (980726)
127 */
128
129static int is_ineligible(struct sk_buff *skb)
130{
131 int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
132 int len = skb->len - ptr;
133 __u8 nexthdr = skb->nh.ipv6h->nexthdr;
134
135 if (len < 0)
136 return 1;
137
138 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, len);
139 if (ptr < 0)
140 return 0;
141 if (nexthdr == IPPROTO_ICMPV6) {
142 u8 _type, *tp;
143 tp = skb_header_pointer(skb,
144 ptr+offsetof(struct icmp6hdr, icmp6_type),
145 sizeof(_type), &_type);
146 if (tp == NULL ||
147 !(*tp & ICMPV6_INFOMSG_MASK))
148 return 1;
149 }
150 return 0;
151}
152
153static int sysctl_icmpv6_time = 1*HZ;
154
155/*
156 * Check the ICMP output rate limit
157 */
158static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
159 struct flowi *fl)
160{
161 struct dst_entry *dst;
162 int res = 0;
163
164 /* Informational messages are not limited. */
165 if (type & ICMPV6_INFOMSG_MASK)
166 return 1;
167
168 /* Do not limit pmtu discovery, it would break it. */
169 if (type == ICMPV6_PKT_TOOBIG)
170 return 1;
171
172 /*
173 * Look up the output route.
174 * XXX: perhaps the expire for routing entries cloned by
175 * this lookup should be more aggressive (not longer than timeout).
176 */
177 dst = ip6_route_output(sk, fl);
178 if (dst->error) {
179 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
180 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
181 res = 1;
182 } else {
183 struct rt6_info *rt = (struct rt6_info *)dst;
184 int tmo = sysctl_icmpv6_time;
185
186 /* Give more bandwidth to wider prefixes. */
187 if (rt->rt6i_dst.plen < 128)
188 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
189
190 res = xrlim_allow(dst, tmo);
191 }
192 dst_release(dst);
193 return res;
194}
195
196/*
197 * an inline helper for the "simple" if statement below
198 * checks if parameter problem report is caused by an
199 * unrecognized IPv6 option that has the Option Type
200 * highest-order two bits set to 10
201 */
202
203static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
204{
205 u8 _optval, *op;
206
207 offset += skb->nh.raw - skb->data;
208 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
209 if (op == NULL)
210 return 1;
211 return (*op & 0xC0) == 0x80;
212}
213
214static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
215{
216 struct sk_buff *skb;
217 struct icmp6hdr *icmp6h;
218 int err = 0;
219
220 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
221 goto out;
222
223 icmp6h = (struct icmp6hdr*) skb->h.raw;
224 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
225 icmp6h->icmp6_cksum = 0;
226
227 if (skb_queue_len(&sk->sk_write_queue) == 1) {
228 skb->csum = csum_partial((char *)icmp6h,
229 sizeof(struct icmp6hdr), skb->csum);
230 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
231 &fl->fl6_dst,
232 len, fl->proto,
233 skb->csum);
234 } else {
235 u32 tmp_csum = 0;
236
237 skb_queue_walk(&sk->sk_write_queue, skb) {
238 tmp_csum = csum_add(tmp_csum, skb->csum);
239 }
240
241 tmp_csum = csum_partial((char *)icmp6h,
242 sizeof(struct icmp6hdr), tmp_csum);
243 tmp_csum = csum_ipv6_magic(&fl->fl6_src,
244 &fl->fl6_dst,
245 len, fl->proto, tmp_csum);
246 icmp6h->icmp6_cksum = tmp_csum;
247 }
248 if (icmp6h->icmp6_cksum == 0)
249 icmp6h->icmp6_cksum = -1;
250 ip6_push_pending_frames(sk);
251out:
252 return err;
253}
254
255struct icmpv6_msg {
256 struct sk_buff *skb;
257 int offset;
258};
259
260static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
261{
262 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
263 struct sk_buff *org_skb = msg->skb;
264 __u32 csum = 0;
265
266 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
267 to, len, csum);
268 skb->csum = csum_block_add(skb->csum, csum, odd);
269 return 0;
270}
271
272/*
273 * Send an ICMP message in response to a packet in error
274 */
275void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
276 struct net_device *dev)
277{
278 struct inet6_dev *idev = NULL;
279 struct ipv6hdr *hdr = skb->nh.ipv6h;
280 struct sock *sk = icmpv6_socket->sk;
281 struct ipv6_pinfo *np = inet6_sk(sk);
282 struct in6_addr *saddr = NULL;
283 struct dst_entry *dst;
284 struct icmp6hdr tmp_hdr;
285 struct flowi fl;
286 struct icmpv6_msg msg;
287 int iif = 0;
288 int addr_type = 0;
289 int len;
290 int hlimit;
291 int err = 0;
292
293 if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
294 return;
295
296 /*
297 * Make sure we respect the rules
298 * i.e. RFC 1885 2.4(e)
299 * Rule (e.1) is enforced by not using icmpv6_send
300 * in any code that processes icmp errors.
301 */
302 addr_type = ipv6_addr_type(&hdr->daddr);
303
304 if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
305 saddr = &hdr->daddr;
306
307 /*
308 * Dest addr check
309 */
310
311 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
312 if (type != ICMPV6_PKT_TOOBIG &&
313 !(type == ICMPV6_PARAMPROB &&
314 code == ICMPV6_UNK_OPTION &&
315 (opt_unrec(skb, info))))
316 return;
317
318 saddr = NULL;
319 }
320
321 addr_type = ipv6_addr_type(&hdr->saddr);
322
323 /*
324 * Source addr check
325 */
326
327 if (addr_type & IPV6_ADDR_LINKLOCAL)
328 iif = skb->dev->ifindex;
329
330 /*
331 * Must not send if we know that source is Anycast also.
332 * for now we don't know that.
333 */
334 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
335 LIMIT_NETDEBUG(
336 printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"));
337 return;
338 }
339
340 /*
341 * Never answer to a ICMP packet.
342 */
343 if (is_ineligible(skb)) {
344 LIMIT_NETDEBUG(
345 printk(KERN_DEBUG "icmpv6_send: no reply to icmp error\n"));
346 return;
347 }
348
349 memset(&fl, 0, sizeof(fl));
350 fl.proto = IPPROTO_ICMPV6;
351 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
352 if (saddr)
353 ipv6_addr_copy(&fl.fl6_src, saddr);
354 fl.oif = iif;
355 fl.fl_icmp_type = type;
356 fl.fl_icmp_code = code;
357
358 if (icmpv6_xmit_lock())
359 return;
360
361 if (!icmpv6_xrlim_allow(sk, type, &fl))
362 goto out;
363
364 tmp_hdr.icmp6_type = type;
365 tmp_hdr.icmp6_code = code;
366 tmp_hdr.icmp6_cksum = 0;
367 tmp_hdr.icmp6_pointer = htonl(info);
368
369 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
370 fl.oif = np->mcast_oif;
371
372 err = ip6_dst_lookup(sk, &dst, &fl);
373 if (err)
374 goto out;
375 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
376 goto out_dst_release;
377
378 if (ipv6_addr_is_multicast(&fl.fl6_dst))
379 hlimit = np->mcast_hops;
380 else
381 hlimit = np->hop_limit;
382 if (hlimit < 0)
383 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
384 if (hlimit < 0)
385 hlimit = ipv6_get_hoplimit(dst->dev);
386
387 msg.skb = skb;
388 msg.offset = skb->nh.raw - skb->data;
389
390 len = skb->len - msg.offset;
391 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
392 if (len < 0) {
393 LIMIT_NETDEBUG(
394 printk(KERN_DEBUG "icmp: len problem\n"));
395 goto out_dst_release;
396 }
397
398 idev = in6_dev_get(skb->dev);
399
400 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
401 len + sizeof(struct icmp6hdr),
402 sizeof(struct icmp6hdr),
403 hlimit, NULL, &fl, (struct rt6_info*)dst,
404 MSG_DONTWAIT);
405 if (err) {
406 ip6_flush_pending_frames(sk);
407 goto out_put;
408 }
409 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
410
411 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
412 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_OUTDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
413 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
414
415out_put:
416 if (likely(idev != NULL))
417 in6_dev_put(idev);
418out_dst_release:
419 dst_release(dst);
420out:
421 icmpv6_xmit_unlock();
422}
423
424static void icmpv6_echo_reply(struct sk_buff *skb)
425{
426 struct sock *sk = icmpv6_socket->sk;
427 struct inet6_dev *idev;
428 struct ipv6_pinfo *np = inet6_sk(sk);
429 struct in6_addr *saddr = NULL;
430 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
431 struct icmp6hdr tmp_hdr;
432 struct flowi fl;
433 struct icmpv6_msg msg;
434 struct dst_entry *dst;
435 int err = 0;
436 int hlimit;
437
438 saddr = &skb->nh.ipv6h->daddr;
439
440 if (!ipv6_unicast_destination(skb))
441 saddr = NULL;
442
443 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
444 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
445
446 memset(&fl, 0, sizeof(fl));
447 fl.proto = IPPROTO_ICMPV6;
448 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
449 if (saddr)
450 ipv6_addr_copy(&fl.fl6_src, saddr);
451 fl.oif = skb->dev->ifindex;
452 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
453
454 if (icmpv6_xmit_lock())
455 return;
456
457 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
458 fl.oif = np->mcast_oif;
459
460 err = ip6_dst_lookup(sk, &dst, &fl);
461 if (err)
462 goto out;
463 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
464 goto out_dst_release;
465
466 if (ipv6_addr_is_multicast(&fl.fl6_dst))
467 hlimit = np->mcast_hops;
468 else
469 hlimit = np->hop_limit;
470 if (hlimit < 0)
471 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
472 if (hlimit < 0)
473 hlimit = ipv6_get_hoplimit(dst->dev);
474
475 idev = in6_dev_get(skb->dev);
476
477 msg.skb = skb;
478 msg.offset = 0;
479
480 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
481 sizeof(struct icmp6hdr), hlimit, NULL, &fl,
482 (struct rt6_info*)dst, MSG_DONTWAIT);
483
484 if (err) {
485 ip6_flush_pending_frames(sk);
486 goto out_put;
487 }
488 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
489
490 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTECHOREPLIES);
491 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
492
493out_put:
494 if (likely(idev != NULL))
495 in6_dev_put(idev);
496out_dst_release:
497 dst_release(dst);
498out:
499 icmpv6_xmit_unlock();
500}
501
502static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
503{
504 struct in6_addr *saddr, *daddr;
505 struct inet6_protocol *ipprot;
506 struct sock *sk;
507 int inner_offset;
508 int hash;
509 u8 nexthdr;
510
511 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
512 return;
513
514 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
515 if (ipv6_ext_hdr(nexthdr)) {
516 /* now skip over extension headers */
517 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, skb->len - sizeof(struct ipv6hdr));
518 if (inner_offset<0)
519 return;
520 } else {
521 inner_offset = sizeof(struct ipv6hdr);
522 }
523
524 /* Checkin header including 8 bytes of inner protocol header. */
525 if (!pskb_may_pull(skb, inner_offset+8))
526 return;
527
528 saddr = &skb->nh.ipv6h->saddr;
529 daddr = &skb->nh.ipv6h->daddr;
530
531 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
532 Without this we will not able f.e. to make source routed
533 pmtu discovery.
534 Corresponding argument (opt) to notifiers is already added.
535 --ANK (980726)
536 */
537
538 hash = nexthdr & (MAX_INET_PROTOS - 1);
539
540 rcu_read_lock();
541 ipprot = rcu_dereference(inet6_protos[hash]);
542 if (ipprot && ipprot->err_handler)
543 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
544 rcu_read_unlock();
545
546 read_lock(&raw_v6_lock);
547 if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
548 while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr))) {
549 rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
550 sk = sk_next(sk);
551 }
552 }
553 read_unlock(&raw_v6_lock);
554}
555
556/*
557 * Handle icmp messages
558 */
559
560static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
561{
562 struct sk_buff *skb = *pskb;
563 struct net_device *dev = skb->dev;
564 struct inet6_dev *idev = __in6_dev_get(dev);
565 struct in6_addr *saddr, *daddr;
566 struct ipv6hdr *orig_hdr;
567 struct icmp6hdr *hdr;
568 int type;
569
570 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
571
572 saddr = &skb->nh.ipv6h->saddr;
573 daddr = &skb->nh.ipv6h->daddr;
574
575 /* Perform checksum. */
576 if (skb->ip_summed == CHECKSUM_HW) {
577 skb->ip_summed = CHECKSUM_UNNECESSARY;
578 if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
579 skb->csum)) {
580 LIMIT_NETDEBUG(
581 printk(KERN_DEBUG "ICMPv6 hw checksum failed\n"));
582 skb->ip_summed = CHECKSUM_NONE;
583 }
584 }
585 if (skb->ip_summed == CHECKSUM_NONE) {
586 if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
587 skb_checksum(skb, 0, skb->len, 0))) {
588 LIMIT_NETDEBUG(
589 printk(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
590 NIP6(*saddr), NIP6(*daddr)));
591 goto discard_it;
592 }
593 }
594
595 if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
596 goto discard_it;
597
598 hdr = (struct icmp6hdr *) skb->h.raw;
599
600 type = hdr->icmp6_type;
601
602 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
603 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
604 else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT)
605 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INECHOS, type - ICMPV6_ECHO_REQUEST);
606
607 switch (type) {
608 case ICMPV6_ECHO_REQUEST:
609 icmpv6_echo_reply(skb);
610 break;
611
612 case ICMPV6_ECHO_REPLY:
613 /* we couldn't care less */
614 break;
615
616 case ICMPV6_PKT_TOOBIG:
617 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
618 standard destination cache. Seems, only "advanced"
619 destination cache will allow to solve this problem
620 --ANK (980726)
621 */
622 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
623 goto discard_it;
624 hdr = (struct icmp6hdr *) skb->h.raw;
625 orig_hdr = (struct ipv6hdr *) (hdr + 1);
626 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
627 ntohl(hdr->icmp6_mtu));
628
629 /*
630 * Drop through to notify
631 */
632
633 case ICMPV6_DEST_UNREACH:
634 case ICMPV6_TIME_EXCEED:
635 case ICMPV6_PARAMPROB:
636 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
637 break;
638
639 case NDISC_ROUTER_SOLICITATION:
640 case NDISC_ROUTER_ADVERTISEMENT:
641 case NDISC_NEIGHBOUR_SOLICITATION:
642 case NDISC_NEIGHBOUR_ADVERTISEMENT:
643 case NDISC_REDIRECT:
644 ndisc_rcv(skb);
645 break;
646
647 case ICMPV6_MGM_QUERY:
648 igmp6_event_query(skb);
649 break;
650
651 case ICMPV6_MGM_REPORT:
652 igmp6_event_report(skb);
653 break;
654
655 case ICMPV6_MGM_REDUCTION:
656 case ICMPV6_NI_QUERY:
657 case ICMPV6_NI_REPLY:
658 case ICMPV6_MLD2_REPORT:
659 case ICMPV6_DHAAD_REQUEST:
660 case ICMPV6_DHAAD_REPLY:
661 case ICMPV6_MOBILE_PREFIX_SOL:
662 case ICMPV6_MOBILE_PREFIX_ADV:
663 break;
664
665 default:
666 LIMIT_NETDEBUG(
667 printk(KERN_DEBUG "icmpv6: msg of unknown type\n"));
668
669 /* informational */
670 if (type & ICMPV6_INFOMSG_MASK)
671 break;
672
673 /*
674 * error of unknown type.
675 * must pass to upper level
676 */
677
678 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
679 };
680 kfree_skb(skb);
681 return 0;
682
683discard_it:
684 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
685 kfree_skb(skb);
686 return 0;
687}
688
689int __init icmpv6_init(struct net_proto_family *ops)
690{
691 struct sock *sk;
692 int err, i, j;
693
694 for (i = 0; i < NR_CPUS; i++) {
695 if (!cpu_possible(i))
696 continue;
697
698 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
699 &per_cpu(__icmpv6_socket, i));
700 if (err < 0) {
701 printk(KERN_ERR
702 "Failed to initialize the ICMP6 control socket "
703 "(err %d).\n",
704 err);
705 goto fail;
706 }
707
708 sk = per_cpu(__icmpv6_socket, i)->sk;
709 sk->sk_allocation = GFP_ATOMIC;
710
711 /* Enough space for 2 64K ICMP packets, including
712 * sk_buff struct overhead.
713 */
714 sk->sk_sndbuf =
715 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
716
717 sk->sk_prot->unhash(sk);
718 }
719
720
721 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
722 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
723 err = -EAGAIN;
724 goto fail;
725 }
726
727 return 0;
728
729 fail:
730 for (j = 0; j < i; j++) {
731 if (!cpu_possible(j))
732 continue;
733 sock_release(per_cpu(__icmpv6_socket, j));
734 }
735
736 return err;
737}
738
739void icmpv6_cleanup(void)
740{
741 int i;
742
743 for (i = 0; i < NR_CPUS; i++) {
744 if (!cpu_possible(i))
745 continue;
746 sock_release(per_cpu(__icmpv6_socket, i));
747 }
748 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
749}
750
751static struct icmp6_err {
752 int err;
753 int fatal;
754} tab_unreach[] = {
755 { /* NOROUTE */
756 .err = ENETUNREACH,
757 .fatal = 0,
758 },
759 { /* ADM_PROHIBITED */
760 .err = EACCES,
761 .fatal = 1,
762 },
763 { /* Was NOT_NEIGHBOUR, now reserved */
764 .err = EHOSTUNREACH,
765 .fatal = 0,
766 },
767 { /* ADDR_UNREACH */
768 .err = EHOSTUNREACH,
769 .fatal = 0,
770 },
771 { /* PORT_UNREACH */
772 .err = ECONNREFUSED,
773 .fatal = 1,
774 },
775};
776
777int icmpv6_err_convert(int type, int code, int *err)
778{
779 int fatal = 0;
780
781 *err = EPROTO;
782
783 switch (type) {
784 case ICMPV6_DEST_UNREACH:
785 fatal = 1;
786 if (code <= ICMPV6_PORT_UNREACH) {
787 *err = tab_unreach[code].err;
788 fatal = tab_unreach[code].fatal;
789 }
790 break;
791
792 case ICMPV6_PKT_TOOBIG:
793 *err = EMSGSIZE;
794 break;
795
796 case ICMPV6_PARAMPROB:
797 *err = EPROTO;
798 fatal = 1;
799 break;
800
801 case ICMPV6_TIME_EXCEED:
802 *err = EHOSTUNREACH;
803 break;
804 };
805
806 return fatal;
807}
808
809#ifdef CONFIG_SYSCTL
810ctl_table ipv6_icmp_table[] = {
811 {
812 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
813 .procname = "ratelimit",
814 .data = &sysctl_icmpv6_time,
815 .maxlen = sizeof(int),
816 .mode = 0644,
817 .proc_handler = &proc_dointvec
818 },
819 { .ctl_name = 0 },
820};
821#endif
822
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
new file mode 100644
index 000000000000..405740b75abb
--- /dev/null
+++ b/net/ipv6/ip6_fib.c
@@ -0,0 +1,1225 @@
1/*
2 * Linux INET6 implementation
3 * Forwarding Information Database
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/*
17 * Changes:
18 * Yuji SEKIYA @USAGI: Support default route on router node;
19 * remove ip6_null_entry from the top of
20 * routing table.
21 */
22#include <linux/config.h>
23#include <linux/errno.h>
24#include <linux/types.h>
25#include <linux/net.h>
26#include <linux/route.h>
27#include <linux/netdevice.h>
28#include <linux/in6.h>
29#include <linux/init.h>
30
31#ifdef CONFIG_PROC_FS
32#include <linux/proc_fs.h>
33#endif
34
35#include <net/ipv6.h>
36#include <net/ndisc.h>
37#include <net/addrconf.h>
38
39#include <net/ip6_fib.h>
40#include <net/ip6_route.h>
41
42#define RT6_DEBUG 2
43
44#if RT6_DEBUG >= 3
45#define RT6_TRACE(x...) printk(KERN_DEBUG x)
46#else
47#define RT6_TRACE(x...) do { ; } while (0)
48#endif
49
50struct rt6_statistics rt6_stats;
51
52static kmem_cache_t * fib6_node_kmem;
53
54enum fib_walk_state_t
55{
56#ifdef CONFIG_IPV6_SUBTREES
57 FWS_S,
58#endif
59 FWS_L,
60 FWS_R,
61 FWS_C,
62 FWS_U
63};
64
65struct fib6_cleaner_t
66{
67 struct fib6_walker_t w;
68 int (*func)(struct rt6_info *, void *arg);
69 void *arg;
70};
71
72DEFINE_RWLOCK(fib6_walker_lock);
73
74
75#ifdef CONFIG_IPV6_SUBTREES
76#define FWS_INIT FWS_S
77#define SUBTREE(fn) ((fn)->subtree)
78#else
79#define FWS_INIT FWS_L
80#define SUBTREE(fn) NULL
81#endif
82
83static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
84static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
85
86/*
87 * A routing update causes an increase of the serial number on the
88 * affected subtree. This allows for cached routes to be asynchronously
89 * tested when modifications are made to the destination cache as a
90 * result of redirects, path MTU changes, etc.
91 */
92
93static __u32 rt_sernum;
94
95static struct timer_list ip6_fib_timer = TIMER_INITIALIZER(fib6_run_gc, 0, 0);
96
97struct fib6_walker_t fib6_walker_list = {
98 .prev = &fib6_walker_list,
99 .next = &fib6_walker_list,
100};
101
102#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
103
104static __inline__ u32 fib6_new_sernum(void)
105{
106 u32 n = ++rt_sernum;
107 if ((__s32)n <= 0)
108 rt_sernum = n = 1;
109 return n;
110}
111
112/*
113 * Auxiliary address test functions for the radix tree.
114 *
115 * These assume a 32bit processor (although it will work on
116 * 64bit processors)
117 */
118
119/*
120 * test bit
121 */
122
123static __inline__ int addr_bit_set(void *token, int fn_bit)
124{
125 __u32 *addr = token;
126
127 return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
128}
129
130/*
131 * find the first different bit between two addresses
132 * length of address must be a multiple of 32bits
133 */
134
135static __inline__ int addr_diff(void *token1, void *token2, int addrlen)
136{
137 __u32 *a1 = token1;
138 __u32 *a2 = token2;
139 int i;
140
141 addrlen >>= 2;
142
143 for (i = 0; i < addrlen; i++) {
144 __u32 xb;
145
146 xb = a1[i] ^ a2[i];
147
148 if (xb) {
149 int j = 31;
150
151 xb = ntohl(xb);
152
153 while ((xb & (1 << j)) == 0)
154 j--;
155
156 return (i * 32 + 31 - j);
157 }
158 }
159
160 /*
161 * we should *never* get to this point since that
162 * would mean the addrs are equal
163 *
164 * However, we do get to it 8) And exacly, when
165 * addresses are equal 8)
166 *
167 * ip route add 1111::/128 via ...
168 * ip route add 1111::/64 via ...
169 * and we are here.
170 *
171 * Ideally, this function should stop comparison
172 * at prefix length. It does not, but it is still OK,
173 * if returned value is greater than prefix length.
174 * --ANK (980803)
175 */
176
177 return addrlen<<5;
178}
179
180static __inline__ struct fib6_node * node_alloc(void)
181{
182 struct fib6_node *fn;
183
184 if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL)
185 memset(fn, 0, sizeof(struct fib6_node));
186
187 return fn;
188}
189
190static __inline__ void node_free(struct fib6_node * fn)
191{
192 kmem_cache_free(fib6_node_kmem, fn);
193}
194
195static __inline__ void rt6_release(struct rt6_info *rt)
196{
197 if (atomic_dec_and_test(&rt->rt6i_ref))
198 dst_free(&rt->u.dst);
199}
200
201
202/*
203 * Routing Table
204 *
205 * return the appropriate node for a routing tree "add" operation
206 * by either creating and inserting or by returning an existing
207 * node.
208 */
209
210static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
211 int addrlen, int plen,
212 int offset)
213{
214 struct fib6_node *fn, *in, *ln;
215 struct fib6_node *pn = NULL;
216 struct rt6key *key;
217 int bit;
218 int dir = 0;
219 __u32 sernum = fib6_new_sernum();
220
221 RT6_TRACE("fib6_add_1\n");
222
223 /* insert node in tree */
224
225 fn = root;
226
227 do {
228 key = (struct rt6key *)((u8 *)fn->leaf + offset);
229
230 /*
231 * Prefix match
232 */
233 if (plen < fn->fn_bit ||
234 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
235 goto insert_above;
236
237 /*
238 * Exact match ?
239 */
240
241 if (plen == fn->fn_bit) {
242 /* clean up an intermediate node */
243 if ((fn->fn_flags & RTN_RTINFO) == 0) {
244 rt6_release(fn->leaf);
245 fn->leaf = NULL;
246 }
247
248 fn->fn_sernum = sernum;
249
250 return fn;
251 }
252
253 /*
254 * We have more bits to go
255 */
256
257 /* Try to walk down on tree. */
258 fn->fn_sernum = sernum;
259 dir = addr_bit_set(addr, fn->fn_bit);
260 pn = fn;
261 fn = dir ? fn->right: fn->left;
262 } while (fn);
263
264 /*
265 * We walked to the bottom of tree.
266 * Create new leaf node without children.
267 */
268
269 ln = node_alloc();
270
271 if (ln == NULL)
272 return NULL;
273 ln->fn_bit = plen;
274
275 ln->parent = pn;
276 ln->fn_sernum = sernum;
277
278 if (dir)
279 pn->right = ln;
280 else
281 pn->left = ln;
282
283 return ln;
284
285
286insert_above:
287 /*
288 * split since we don't have a common prefix anymore or
289 * we have a less significant route.
290 * we've to insert an intermediate node on the list
291 * this new node will point to the one we need to create
292 * and the current
293 */
294
295 pn = fn->parent;
296
297 /* find 1st bit in difference between the 2 addrs.
298
299 See comment in addr_diff: bit may be an invalid value,
300 but if it is >= plen, the value is ignored in any case.
301 */
302
303 bit = addr_diff(addr, &key->addr, addrlen);
304
305 /*
306 * (intermediate)[in]
307 * / \
308 * (new leaf node)[ln] (old node)[fn]
309 */
310 if (plen > bit) {
311 in = node_alloc();
312 ln = node_alloc();
313
314 if (in == NULL || ln == NULL) {
315 if (in)
316 node_free(in);
317 if (ln)
318 node_free(ln);
319 return NULL;
320 }
321
322 /*
323 * new intermediate node.
324 * RTN_RTINFO will
325 * be off since that an address that chooses one of
326 * the branches would not match less specific routes
327 * in the other branch
328 */
329
330 in->fn_bit = bit;
331
332 in->parent = pn;
333 in->leaf = fn->leaf;
334 atomic_inc(&in->leaf->rt6i_ref);
335
336 in->fn_sernum = sernum;
337
338 /* update parent pointer */
339 if (dir)
340 pn->right = in;
341 else
342 pn->left = in;
343
344 ln->fn_bit = plen;
345
346 ln->parent = in;
347 fn->parent = in;
348
349 ln->fn_sernum = sernum;
350
351 if (addr_bit_set(addr, bit)) {
352 in->right = ln;
353 in->left = fn;
354 } else {
355 in->left = ln;
356 in->right = fn;
357 }
358 } else { /* plen <= bit */
359
360 /*
361 * (new leaf node)[ln]
362 * / \
363 * (old node)[fn] NULL
364 */
365
366 ln = node_alloc();
367
368 if (ln == NULL)
369 return NULL;
370
371 ln->fn_bit = plen;
372
373 ln->parent = pn;
374
375 ln->fn_sernum = sernum;
376
377 if (dir)
378 pn->right = ln;
379 else
380 pn->left = ln;
381
382 if (addr_bit_set(&key->addr, plen))
383 ln->right = fn;
384 else
385 ln->left = fn;
386
387 fn->parent = ln;
388 }
389 return ln;
390}
391
392/*
393 * Insert routing information in a node.
394 */
395
396static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
397 struct nlmsghdr *nlh)
398{
399 struct rt6_info *iter = NULL;
400 struct rt6_info **ins;
401
402 ins = &fn->leaf;
403
404 if (fn->fn_flags&RTN_TL_ROOT &&
405 fn->leaf == &ip6_null_entry &&
406 !(rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ){
407 fn->leaf = rt;
408 rt->u.next = NULL;
409 goto out;
410 }
411
412 for (iter = fn->leaf; iter; iter=iter->u.next) {
413 /*
414 * Search for duplicates
415 */
416
417 if (iter->rt6i_metric == rt->rt6i_metric) {
418 /*
419 * Same priority level
420 */
421
422 if (iter->rt6i_dev == rt->rt6i_dev &&
423 iter->rt6i_idev == rt->rt6i_idev &&
424 ipv6_addr_equal(&iter->rt6i_gateway,
425 &rt->rt6i_gateway)) {
426 if (!(iter->rt6i_flags&RTF_EXPIRES))
427 return -EEXIST;
428 iter->rt6i_expires = rt->rt6i_expires;
429 if (!(rt->rt6i_flags&RTF_EXPIRES)) {
430 iter->rt6i_flags &= ~RTF_EXPIRES;
431 iter->rt6i_expires = 0;
432 }
433 return -EEXIST;
434 }
435 }
436
437 if (iter->rt6i_metric > rt->rt6i_metric)
438 break;
439
440 ins = &iter->u.next;
441 }
442
443 /*
444 * insert node
445 */
446
447out:
448 rt->u.next = iter;
449 *ins = rt;
450 rt->rt6i_node = fn;
451 atomic_inc(&rt->rt6i_ref);
452 inet6_rt_notify(RTM_NEWROUTE, rt, nlh);
453 rt6_stats.fib_rt_entries++;
454
455 if ((fn->fn_flags & RTN_RTINFO) == 0) {
456 rt6_stats.fib_route_nodes++;
457 fn->fn_flags |= RTN_RTINFO;
458 }
459
460 return 0;
461}
462
463static __inline__ void fib6_start_gc(struct rt6_info *rt)
464{
465 if (ip6_fib_timer.expires == 0 &&
466 (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
467 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
468}
469
470void fib6_force_start_gc(void)
471{
472 if (ip6_fib_timer.expires == 0)
473 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
474}
475
476/*
477 * Add routing information to the routing tree.
478 * <destination addr>/<source addr>
479 * with source addr info in sub-trees
480 */
481
482int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
483{
484 struct fib6_node *fn;
485 int err = -ENOMEM;
486
487 fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
488 rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst));
489
490 if (fn == NULL)
491 goto out;
492
493#ifdef CONFIG_IPV6_SUBTREES
494 if (rt->rt6i_src.plen) {
495 struct fib6_node *sn;
496
497 if (fn->subtree == NULL) {
498 struct fib6_node *sfn;
499
500 /*
501 * Create subtree.
502 *
503 * fn[main tree]
504 * |
505 * sfn[subtree root]
506 * \
507 * sn[new leaf node]
508 */
509
510 /* Create subtree root node */
511 sfn = node_alloc();
512 if (sfn == NULL)
513 goto st_failure;
514
515 sfn->leaf = &ip6_null_entry;
516 atomic_inc(&ip6_null_entry.rt6i_ref);
517 sfn->fn_flags = RTN_ROOT;
518 sfn->fn_sernum = fib6_new_sernum();
519
520 /* Now add the first leaf node to new subtree */
521
522 sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
523 sizeof(struct in6_addr), rt->rt6i_src.plen,
524 offsetof(struct rt6_info, rt6i_src));
525
526 if (sn == NULL) {
527 /* If it is failed, discard just allocated
528 root, and then (in st_failure) stale node
529 in main tree.
530 */
531 node_free(sfn);
532 goto st_failure;
533 }
534
535 /* Now link new subtree to main tree */
536 sfn->parent = fn;
537 fn->subtree = sfn;
538 if (fn->leaf == NULL) {
539 fn->leaf = rt;
540 atomic_inc(&rt->rt6i_ref);
541 }
542 } else {
543 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
544 sizeof(struct in6_addr), rt->rt6i_src.plen,
545 offsetof(struct rt6_info, rt6i_src));
546
547 if (sn == NULL)
548 goto st_failure;
549 }
550
551 fn = sn;
552 }
553#endif
554
555 err = fib6_add_rt2node(fn, rt, nlh);
556
557 if (err == 0) {
558 fib6_start_gc(rt);
559 if (!(rt->rt6i_flags&RTF_CACHE))
560 fib6_prune_clones(fn, rt);
561 }
562
563out:
564 if (err)
565 dst_free(&rt->u.dst);
566 return err;
567
568#ifdef CONFIG_IPV6_SUBTREES
569 /* Subtree creation failed, probably main tree node
570 is orphan. If it is, shoot it.
571 */
572st_failure:
573 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
574 fib6_repair_tree(fn);
575 dst_free(&rt->u.dst);
576 return err;
577#endif
578}
579
580/*
581 * Routing tree lookup
582 *
583 */
584
585struct lookup_args {
586 int offset; /* key offset on rt6_info */
587 struct in6_addr *addr; /* search key */
588};
589
590static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
591 struct lookup_args *args)
592{
593 struct fib6_node *fn;
594 int dir;
595
596 /*
597 * Descend on a tree
598 */
599
600 fn = root;
601
602 for (;;) {
603 struct fib6_node *next;
604
605 dir = addr_bit_set(args->addr, fn->fn_bit);
606
607 next = dir ? fn->right : fn->left;
608
609 if (next) {
610 fn = next;
611 continue;
612 }
613
614 break;
615 }
616
617 while ((fn->fn_flags & RTN_ROOT) == 0) {
618#ifdef CONFIG_IPV6_SUBTREES
619 if (fn->subtree) {
620 struct fib6_node *st;
621 struct lookup_args *narg;
622
623 narg = args + 1;
624
625 if (narg->addr) {
626 st = fib6_lookup_1(fn->subtree, narg);
627
628 if (st && !(st->fn_flags & RTN_ROOT))
629 return st;
630 }
631 }
632#endif
633
634 if (fn->fn_flags & RTN_RTINFO) {
635 struct rt6key *key;
636
637 key = (struct rt6key *) ((u8 *) fn->leaf +
638 args->offset);
639
640 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen))
641 return fn;
642 }
643
644 fn = fn->parent;
645 }
646
647 return NULL;
648}
649
650struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
651 struct in6_addr *saddr)
652{
653 struct lookup_args args[2];
654 struct fib6_node *fn;
655
656 args[0].offset = offsetof(struct rt6_info, rt6i_dst);
657 args[0].addr = daddr;
658
659#ifdef CONFIG_IPV6_SUBTREES
660 args[1].offset = offsetof(struct rt6_info, rt6i_src);
661 args[1].addr = saddr;
662#endif
663
664 fn = fib6_lookup_1(root, args);
665
666 if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
667 fn = root;
668
669 return fn;
670}
671
672/*
673 * Get node with specified destination prefix (and source prefix,
674 * if subtrees are used)
675 */
676
677
678static struct fib6_node * fib6_locate_1(struct fib6_node *root,
679 struct in6_addr *addr,
680 int plen, int offset)
681{
682 struct fib6_node *fn;
683
684 for (fn = root; fn ; ) {
685 struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
686
687 /*
688 * Prefix match
689 */
690 if (plen < fn->fn_bit ||
691 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
692 return NULL;
693
694 if (plen == fn->fn_bit)
695 return fn;
696
697 /*
698 * We have more bits to go
699 */
700 if (addr_bit_set(addr, fn->fn_bit))
701 fn = fn->right;
702 else
703 fn = fn->left;
704 }
705 return NULL;
706}
707
708struct fib6_node * fib6_locate(struct fib6_node *root,
709 struct in6_addr *daddr, int dst_len,
710 struct in6_addr *saddr, int src_len)
711{
712 struct fib6_node *fn;
713
714 fn = fib6_locate_1(root, daddr, dst_len,
715 offsetof(struct rt6_info, rt6i_dst));
716
717#ifdef CONFIG_IPV6_SUBTREES
718 if (src_len) {
719 BUG_TRAP(saddr!=NULL);
720 if (fn == NULL)
721 fn = fn->subtree;
722 if (fn)
723 fn = fib6_locate_1(fn, saddr, src_len,
724 offsetof(struct rt6_info, rt6i_src));
725 }
726#endif
727
728 if (fn && fn->fn_flags&RTN_RTINFO)
729 return fn;
730
731 return NULL;
732}
733
734
735/*
736 * Deletion
737 *
738 */
739
740static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
741{
742 if (fn->fn_flags&RTN_ROOT)
743 return &ip6_null_entry;
744
745 while(fn) {
746 if(fn->left)
747 return fn->left->leaf;
748
749 if(fn->right)
750 return fn->right->leaf;
751
752 fn = SUBTREE(fn);
753 }
754 return NULL;
755}
756
757/*
758 * Called to trim the tree of intermediate nodes when possible. "fn"
759 * is the node we want to try and remove.
760 */
761
762static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
763{
764 int children;
765 int nstate;
766 struct fib6_node *child, *pn;
767 struct fib6_walker_t *w;
768 int iter = 0;
769
770 for (;;) {
771 RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
772 iter++;
773
774 BUG_TRAP(!(fn->fn_flags&RTN_RTINFO));
775 BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT));
776 BUG_TRAP(fn->leaf==NULL);
777
778 children = 0;
779 child = NULL;
780 if (fn->right) child = fn->right, children |= 1;
781 if (fn->left) child = fn->left, children |= 2;
782
783 if (children == 3 || SUBTREE(fn)
784#ifdef CONFIG_IPV6_SUBTREES
785 /* Subtree root (i.e. fn) may have one child */
786 || (children && fn->fn_flags&RTN_ROOT)
787#endif
788 ) {
789 fn->leaf = fib6_find_prefix(fn);
790#if RT6_DEBUG >= 2
791 if (fn->leaf==NULL) {
792 BUG_TRAP(fn->leaf);
793 fn->leaf = &ip6_null_entry;
794 }
795#endif
796 atomic_inc(&fn->leaf->rt6i_ref);
797 return fn->parent;
798 }
799
800 pn = fn->parent;
801#ifdef CONFIG_IPV6_SUBTREES
802 if (SUBTREE(pn) == fn) {
803 BUG_TRAP(fn->fn_flags&RTN_ROOT);
804 SUBTREE(pn) = NULL;
805 nstate = FWS_L;
806 } else {
807 BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
808#endif
809 if (pn->right == fn) pn->right = child;
810 else if (pn->left == fn) pn->left = child;
811#if RT6_DEBUG >= 2
812 else BUG_TRAP(0);
813#endif
814 if (child)
815 child->parent = pn;
816 nstate = FWS_R;
817#ifdef CONFIG_IPV6_SUBTREES
818 }
819#endif
820
821 read_lock(&fib6_walker_lock);
822 FOR_WALKERS(w) {
823 if (child == NULL) {
824 if (w->root == fn) {
825 w->root = w->node = NULL;
826 RT6_TRACE("W %p adjusted by delroot 1\n", w);
827 } else if (w->node == fn) {
828 RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
829 w->node = pn;
830 w->state = nstate;
831 }
832 } else {
833 if (w->root == fn) {
834 w->root = child;
835 RT6_TRACE("W %p adjusted by delroot 2\n", w);
836 }
837 if (w->node == fn) {
838 w->node = child;
839 if (children&2) {
840 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
841 w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
842 } else {
843 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
844 w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
845 }
846 }
847 }
848 }
849 read_unlock(&fib6_walker_lock);
850
851 node_free(fn);
852 if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn))
853 return pn;
854
855 rt6_release(pn->leaf);
856 pn->leaf = NULL;
857 fn = pn;
858 }
859}
860
861static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
862 struct nlmsghdr *nlh, void *_rtattr)
863{
864 struct fib6_walker_t *w;
865 struct rt6_info *rt = *rtp;
866
867 RT6_TRACE("fib6_del_route\n");
868
869 /* Unlink it */
870 *rtp = rt->u.next;
871 rt->rt6i_node = NULL;
872 rt6_stats.fib_rt_entries--;
873 rt6_stats.fib_discarded_routes++;
874
875 /* Adjust walkers */
876 read_lock(&fib6_walker_lock);
877 FOR_WALKERS(w) {
878 if (w->state == FWS_C && w->leaf == rt) {
879 RT6_TRACE("walker %p adjusted by delroute\n", w);
880 w->leaf = rt->u.next;
881 if (w->leaf == NULL)
882 w->state = FWS_U;
883 }
884 }
885 read_unlock(&fib6_walker_lock);
886
887 rt->u.next = NULL;
888
889 if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT)
890 fn->leaf = &ip6_null_entry;
891
892 /* If it was last route, expunge its radix tree node */
893 if (fn->leaf == NULL) {
894 fn->fn_flags &= ~RTN_RTINFO;
895 rt6_stats.fib_route_nodes--;
896 fn = fib6_repair_tree(fn);
897 }
898
899 if (atomic_read(&rt->rt6i_ref) != 1) {
900 /* This route is used as dummy address holder in some split
901 * nodes. It is not leaked, but it still holds other resources,
902 * which must be released in time. So, scan ascendant nodes
903 * and replace dummy references to this route with references
904 * to still alive ones.
905 */
906 while (fn) {
907 if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) {
908 fn->leaf = fib6_find_prefix(fn);
909 atomic_inc(&fn->leaf->rt6i_ref);
910 rt6_release(rt);
911 }
912 fn = fn->parent;
913 }
914 /* No more references are possible at this point. */
915 if (atomic_read(&rt->rt6i_ref) != 1) BUG();
916 }
917
918 inet6_rt_notify(RTM_DELROUTE, rt, nlh);
919 rt6_release(rt);
920}
921
922int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
923{
924 struct fib6_node *fn = rt->rt6i_node;
925 struct rt6_info **rtp;
926
927#if RT6_DEBUG >= 2
928 if (rt->u.dst.obsolete>0) {
929 BUG_TRAP(fn==NULL);
930 return -ENOENT;
931 }
932#endif
933 if (fn == NULL || rt == &ip6_null_entry)
934 return -ENOENT;
935
936 BUG_TRAP(fn->fn_flags&RTN_RTINFO);
937
938 if (!(rt->rt6i_flags&RTF_CACHE))
939 fib6_prune_clones(fn, rt);
940
941 /*
942 * Walk the leaf entries looking for ourself
943 */
944
945 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
946 if (*rtp == rt) {
947 fib6_del_route(fn, rtp, nlh, _rtattr);
948 return 0;
949 }
950 }
951 return -ENOENT;
952}
953
954/*
955 * Tree traversal function.
956 *
957 * Certainly, it is not interrupt safe.
958 * However, it is internally reenterable wrt itself and fib6_add/fib6_del.
959 * It means, that we can modify tree during walking
960 * and use this function for garbage collection, clone pruning,
961 * cleaning tree when a device goes down etc. etc.
962 *
963 * It guarantees that every node will be traversed,
964 * and that it will be traversed only once.
965 *
966 * Callback function w->func may return:
967 * 0 -> continue walking.
968 * positive value -> walking is suspended (used by tree dumps,
969 * and probably by gc, if it will be split to several slices)
970 * negative value -> terminate walking.
971 *
972 * The function itself returns:
973 * 0 -> walk is complete.
974 * >0 -> walk is incomplete (i.e. suspended)
975 * <0 -> walk is terminated by an error.
976 */
977
978int fib6_walk_continue(struct fib6_walker_t *w)
979{
980 struct fib6_node *fn, *pn;
981
982 for (;;) {
983 fn = w->node;
984 if (fn == NULL)
985 return 0;
986
987 if (w->prune && fn != w->root &&
988 fn->fn_flags&RTN_RTINFO && w->state < FWS_C) {
989 w->state = FWS_C;
990 w->leaf = fn->leaf;
991 }
992 switch (w->state) {
993#ifdef CONFIG_IPV6_SUBTREES
994 case FWS_S:
995 if (SUBTREE(fn)) {
996 w->node = SUBTREE(fn);
997 continue;
998 }
999 w->state = FWS_L;
1000#endif
1001 case FWS_L:
1002 if (fn->left) {
1003 w->node = fn->left;
1004 w->state = FWS_INIT;
1005 continue;
1006 }
1007 w->state = FWS_R;
1008 case FWS_R:
1009 if (fn->right) {
1010 w->node = fn->right;
1011 w->state = FWS_INIT;
1012 continue;
1013 }
1014 w->state = FWS_C;
1015 w->leaf = fn->leaf;
1016 case FWS_C:
1017 if (w->leaf && fn->fn_flags&RTN_RTINFO) {
1018 int err = w->func(w);
1019 if (err)
1020 return err;
1021 continue;
1022 }
1023 w->state = FWS_U;
1024 case FWS_U:
1025 if (fn == w->root)
1026 return 0;
1027 pn = fn->parent;
1028 w->node = pn;
1029#ifdef CONFIG_IPV6_SUBTREES
1030 if (SUBTREE(pn) == fn) {
1031 BUG_TRAP(fn->fn_flags&RTN_ROOT);
1032 w->state = FWS_L;
1033 continue;
1034 }
1035#endif
1036 if (pn->left == fn) {
1037 w->state = FWS_R;
1038 continue;
1039 }
1040 if (pn->right == fn) {
1041 w->state = FWS_C;
1042 w->leaf = w->node->leaf;
1043 continue;
1044 }
1045#if RT6_DEBUG >= 2
1046 BUG_TRAP(0);
1047#endif
1048 }
1049 }
1050}
1051
1052int fib6_walk(struct fib6_walker_t *w)
1053{
1054 int res;
1055
1056 w->state = FWS_INIT;
1057 w->node = w->root;
1058
1059 fib6_walker_link(w);
1060 res = fib6_walk_continue(w);
1061 if (res <= 0)
1062 fib6_walker_unlink(w);
1063 return res;
1064}
1065
1066static int fib6_clean_node(struct fib6_walker_t *w)
1067{
1068 int res;
1069 struct rt6_info *rt;
1070 struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
1071
1072 for (rt = w->leaf; rt; rt = rt->u.next) {
1073 res = c->func(rt, c->arg);
1074 if (res < 0) {
1075 w->leaf = rt;
1076 res = fib6_del(rt, NULL, NULL);
1077 if (res) {
1078#if RT6_DEBUG >= 2
1079 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
1080#endif
1081 continue;
1082 }
1083 return 0;
1084 }
1085 BUG_TRAP(res==0);
1086 }
1087 w->leaf = rt;
1088 return 0;
1089}
1090
1091/*
1092 * Convenient frontend to tree walker.
1093 *
1094 * func is called on each route.
1095 * It may return -1 -> delete this route.
1096 * 0 -> continue walking
1097 *
1098 * prune==1 -> only immediate children of node (certainly,
1099 * ignoring pure split nodes) will be scanned.
1100 */
1101
1102void fib6_clean_tree(struct fib6_node *root,
1103 int (*func)(struct rt6_info *, void *arg),
1104 int prune, void *arg)
1105{
1106 struct fib6_cleaner_t c;
1107
1108 c.w.root = root;
1109 c.w.func = fib6_clean_node;
1110 c.w.prune = prune;
1111 c.func = func;
1112 c.arg = arg;
1113
1114 fib6_walk(&c.w);
1115}
1116
1117static int fib6_prune_clone(struct rt6_info *rt, void *arg)
1118{
1119 if (rt->rt6i_flags & RTF_CACHE) {
1120 RT6_TRACE("pruning clone %p\n", rt);
1121 return -1;
1122 }
1123
1124 return 0;
1125}
1126
1127static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt)
1128{
1129 fib6_clean_tree(fn, fib6_prune_clone, 1, rt);
1130}
1131
1132/*
1133 * Garbage collection
1134 */
1135
1136static struct fib6_gc_args
1137{
1138 int timeout;
1139 int more;
1140} gc_args;
1141
1142static int fib6_age(struct rt6_info *rt, void *arg)
1143{
1144 unsigned long now = jiffies;
1145
1146 /*
1147 * check addrconf expiration here.
1148 * Routes are expired even if they are in use.
1149 *
1150 * Also age clones. Note, that clones are aged out
1151 * only if they are not in use now.
1152 */
1153
1154 if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
1155 if (time_after(now, rt->rt6i_expires)) {
1156 RT6_TRACE("expiring %p\n", rt);
1157 rt6_reset_dflt_pointer(rt);
1158 return -1;
1159 }
1160 gc_args.more++;
1161 } else if (rt->rt6i_flags & RTF_CACHE) {
1162 if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
1163 time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) {
1164 RT6_TRACE("aging clone %p\n", rt);
1165 return -1;
1166 } else if ((rt->rt6i_flags & RTF_GATEWAY) &&
1167 (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) {
1168 RT6_TRACE("purging route %p via non-router but gateway\n",
1169 rt);
1170 return -1;
1171 }
1172 gc_args.more++;
1173 }
1174
1175 return 0;
1176}
1177
1178static DEFINE_SPINLOCK(fib6_gc_lock);
1179
1180void fib6_run_gc(unsigned long dummy)
1181{
1182 if (dummy != ~0UL) {
1183 spin_lock_bh(&fib6_gc_lock);
1184 gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
1185 } else {
1186 local_bh_disable();
1187 if (!spin_trylock(&fib6_gc_lock)) {
1188 mod_timer(&ip6_fib_timer, jiffies + HZ);
1189 local_bh_enable();
1190 return;
1191 }
1192 gc_args.timeout = ip6_rt_gc_interval;
1193 }
1194 gc_args.more = 0;
1195
1196
1197 write_lock_bh(&rt6_lock);
1198 ndisc_dst_gc(&gc_args.more);
1199 fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
1200 write_unlock_bh(&rt6_lock);
1201
1202 if (gc_args.more)
1203 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
1204 else {
1205 del_timer(&ip6_fib_timer);
1206 ip6_fib_timer.expires = 0;
1207 }
1208 spin_unlock_bh(&fib6_gc_lock);
1209}
1210
1211void __init fib6_init(void)
1212{
1213 fib6_node_kmem = kmem_cache_create("fib6_nodes",
1214 sizeof(struct fib6_node),
1215 0, SLAB_HWCACHE_ALIGN,
1216 NULL, NULL);
1217 if (!fib6_node_kmem)
1218 panic("cannot create fib6_nodes cache");
1219}
1220
1221void fib6_gc_cleanup(void)
1222{
1223 del_timer(&ip6_fib_timer);
1224 kmem_cache_destroy(fib6_node_kmem);
1225}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
new file mode 100644
index 000000000000..a93f6dc51979
--- /dev/null
+++ b/net/ipv6/ip6_flowlabel.c
@@ -0,0 +1,706 @@
1/*
2 * ip6_flowlabel.c IPv6 flowlabel manager.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
12#include <linux/config.h>
13#include <linux/errno.h>
14#include <linux/types.h>
15#include <linux/socket.h>
16#include <linux/net.h>
17#include <linux/netdevice.h>
18#include <linux/if_arp.h>
19#include <linux/in6.h>
20#include <linux/route.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23
24#include <net/sock.h>
25
26#include <net/ipv6.h>
27#include <net/ndisc.h>
28#include <net/protocol.h>
29#include <net/ip6_route.h>
30#include <net/addrconf.h>
31#include <net/rawv6.h>
32#include <net/icmp.h>
33#include <net/transp_v6.h>
34
35#include <asm/uaccess.h>
36
37#define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified
38 in old IPv6 RFC. Well, it was reasonable value.
39 */
40#define FL_MAX_LINGER 60 /* Maximal linger timeout */
41
42/* FL hash table */
43
44#define FL_MAX_PER_SOCK 32
45#define FL_MAX_SIZE 4096
46#define FL_HASH_MASK 255
47#define FL_HASH(l) (ntohl(l)&FL_HASH_MASK)
48
49static atomic_t fl_size = ATOMIC_INIT(0);
50static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1];
51
52static void ip6_fl_gc(unsigned long dummy);
53static struct timer_list ip6_fl_gc_timer = TIMER_INITIALIZER(ip6_fl_gc, 0, 0);
54
55/* FL hash table lock: it protects only of GC */
56
57static DEFINE_RWLOCK(ip6_fl_lock);
58
59/* Big socket sock */
60
61static DEFINE_RWLOCK(ip6_sk_fl_lock);
62
63
64static __inline__ struct ip6_flowlabel * __fl_lookup(u32 label)
65{
66 struct ip6_flowlabel *fl;
67
68 for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
69 if (fl->label == label)
70 return fl;
71 }
72 return NULL;
73}
74
75static struct ip6_flowlabel * fl_lookup(u32 label)
76{
77 struct ip6_flowlabel *fl;
78
79 read_lock_bh(&ip6_fl_lock);
80 fl = __fl_lookup(label);
81 if (fl)
82 atomic_inc(&fl->users);
83 read_unlock_bh(&ip6_fl_lock);
84 return fl;
85}
86
87
88static void fl_free(struct ip6_flowlabel *fl)
89{
90 if (fl)
91 kfree(fl->opt);
92 kfree(fl);
93}
94
95static void fl_release(struct ip6_flowlabel *fl)
96{
97 write_lock_bh(&ip6_fl_lock);
98
99 fl->lastuse = jiffies;
100 if (atomic_dec_and_test(&fl->users)) {
101 unsigned long ttd = fl->lastuse + fl->linger;
102 if (time_after(ttd, fl->expires))
103 fl->expires = ttd;
104 ttd = fl->expires;
105 if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
106 struct ipv6_txoptions *opt = fl->opt;
107 fl->opt = NULL;
108 kfree(opt);
109 }
110 if (!timer_pending(&ip6_fl_gc_timer) ||
111 time_after(ip6_fl_gc_timer.expires, ttd))
112 mod_timer(&ip6_fl_gc_timer, ttd);
113 }
114
115 write_unlock_bh(&ip6_fl_lock);
116}
117
118static void ip6_fl_gc(unsigned long dummy)
119{
120 int i;
121 unsigned long now = jiffies;
122 unsigned long sched = 0;
123
124 write_lock(&ip6_fl_lock);
125
126 for (i=0; i<=FL_HASH_MASK; i++) {
127 struct ip6_flowlabel *fl, **flp;
128 flp = &fl_ht[i];
129 while ((fl=*flp) != NULL) {
130 if (atomic_read(&fl->users) == 0) {
131 unsigned long ttd = fl->lastuse + fl->linger;
132 if (time_after(ttd, fl->expires))
133 fl->expires = ttd;
134 ttd = fl->expires;
135 if (time_after_eq(now, ttd)) {
136 *flp = fl->next;
137 fl_free(fl);
138 atomic_dec(&fl_size);
139 continue;
140 }
141 if (!sched || time_before(ttd, sched))
142 sched = ttd;
143 }
144 flp = &fl->next;
145 }
146 }
147 if (!sched && atomic_read(&fl_size))
148 sched = now + FL_MAX_LINGER;
149 if (sched) {
150 ip6_fl_gc_timer.expires = sched;
151 add_timer(&ip6_fl_gc_timer);
152 }
153 write_unlock(&ip6_fl_lock);
154}
155
156static int fl_intern(struct ip6_flowlabel *fl, __u32 label)
157{
158 fl->label = label & IPV6_FLOWLABEL_MASK;
159
160 write_lock_bh(&ip6_fl_lock);
161 if (label == 0) {
162 for (;;) {
163 fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
164 if (fl->label) {
165 struct ip6_flowlabel *lfl;
166 lfl = __fl_lookup(fl->label);
167 if (lfl == NULL)
168 break;
169 }
170 }
171 }
172
173 fl->lastuse = jiffies;
174 fl->next = fl_ht[FL_HASH(fl->label)];
175 fl_ht[FL_HASH(fl->label)] = fl;
176 atomic_inc(&fl_size);
177 write_unlock_bh(&ip6_fl_lock);
178 return 0;
179}
180
181
182
183/* Socket flowlabel lists */
184
185struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label)
186{
187 struct ipv6_fl_socklist *sfl;
188 struct ipv6_pinfo *np = inet6_sk(sk);
189
190 label &= IPV6_FLOWLABEL_MASK;
191
192 for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
193 struct ip6_flowlabel *fl = sfl->fl;
194 if (fl->label == label) {
195 fl->lastuse = jiffies;
196 atomic_inc(&fl->users);
197 return fl;
198 }
199 }
200 return NULL;
201}
202
203void fl6_free_socklist(struct sock *sk)
204{
205 struct ipv6_pinfo *np = inet6_sk(sk);
206 struct ipv6_fl_socklist *sfl;
207
208 while ((sfl = np->ipv6_fl_list) != NULL) {
209 np->ipv6_fl_list = sfl->next;
210 fl_release(sfl->fl);
211 kfree(sfl);
212 }
213}
214
215/* Service routines */
216
217
218/*
219 It is the only difficult place. flowlabel enforces equal headers
220 before and including routing header, however user may supply options
221 following rthdr.
222 */
223
224struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
225 struct ip6_flowlabel * fl,
226 struct ipv6_txoptions * fopt)
227{
228 struct ipv6_txoptions * fl_opt = fl->opt;
229
230 if (fopt == NULL || fopt->opt_flen == 0)
231 return fl_opt;
232
233 if (fl_opt != NULL) {
234 opt_space->hopopt = fl_opt->hopopt;
235 opt_space->dst0opt = fl_opt->dst0opt;
236 opt_space->srcrt = fl_opt->srcrt;
237 opt_space->opt_nflen = fl_opt->opt_nflen;
238 } else {
239 if (fopt->opt_nflen == 0)
240 return fopt;
241 opt_space->hopopt = NULL;
242 opt_space->dst0opt = NULL;
243 opt_space->srcrt = NULL;
244 opt_space->opt_nflen = 0;
245 }
246 opt_space->dst1opt = fopt->dst1opt;
247 opt_space->auth = fopt->auth;
248 opt_space->opt_flen = fopt->opt_flen;
249 return opt_space;
250}
251
252static unsigned long check_linger(unsigned long ttl)
253{
254 if (ttl < FL_MIN_LINGER)
255 return FL_MIN_LINGER*HZ;
256 if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
257 return 0;
258 return ttl*HZ;
259}
260
261static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
262{
263 linger = check_linger(linger);
264 if (!linger)
265 return -EPERM;
266 expires = check_linger(expires);
267 if (!expires)
268 return -EPERM;
269 fl->lastuse = jiffies;
270 if (time_before(fl->linger, linger))
271 fl->linger = linger;
272 if (time_before(expires, fl->linger))
273 expires = fl->linger;
274 if (time_before(fl->expires, fl->lastuse + expires))
275 fl->expires = fl->lastuse + expires;
276 return 0;
277}
278
279static struct ip6_flowlabel *
280fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int *err_p)
281{
282 struct ip6_flowlabel *fl;
283 int olen;
284 int addr_type;
285 int err;
286
287 err = -ENOMEM;
288 fl = kmalloc(sizeof(*fl), GFP_KERNEL);
289 if (fl == NULL)
290 goto done;
291 memset(fl, 0, sizeof(*fl));
292
293 olen = optlen - CMSG_ALIGN(sizeof(*freq));
294 if (olen > 0) {
295 struct msghdr msg;
296 struct flowi flowi;
297 int junk;
298
299 err = -ENOMEM;
300 fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
301 if (fl->opt == NULL)
302 goto done;
303
304 memset(fl->opt, 0, sizeof(*fl->opt));
305 fl->opt->tot_len = sizeof(*fl->opt) + olen;
306 err = -EFAULT;
307 if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
308 goto done;
309
310 msg.msg_controllen = olen;
311 msg.msg_control = (void*)(fl->opt+1);
312 flowi.oif = 0;
313
314 err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk);
315 if (err)
316 goto done;
317 err = -EINVAL;
318 if (fl->opt->opt_flen)
319 goto done;
320 if (fl->opt->opt_nflen == 0) {
321 kfree(fl->opt);
322 fl->opt = NULL;
323 }
324 }
325
326 fl->expires = jiffies;
327 err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
328 if (err)
329 goto done;
330 fl->share = freq->flr_share;
331 addr_type = ipv6_addr_type(&freq->flr_dst);
332 if ((addr_type&IPV6_ADDR_MAPPED)
333 || addr_type == IPV6_ADDR_ANY)
334 goto done;
335 ipv6_addr_copy(&fl->dst, &freq->flr_dst);
336 atomic_set(&fl->users, 1);
337 switch (fl->share) {
338 case IPV6_FL_S_EXCL:
339 case IPV6_FL_S_ANY:
340 break;
341 case IPV6_FL_S_PROCESS:
342 fl->owner = current->pid;
343 break;
344 case IPV6_FL_S_USER:
345 fl->owner = current->euid;
346 break;
347 default:
348 err = -EINVAL;
349 goto done;
350 }
351 return fl;
352
353done:
354 fl_free(fl);
355 *err_p = err;
356 return NULL;
357}
358
359static int mem_check(struct sock *sk)
360{
361 struct ipv6_pinfo *np = inet6_sk(sk);
362 struct ipv6_fl_socklist *sfl;
363 int room = FL_MAX_SIZE - atomic_read(&fl_size);
364 int count = 0;
365
366 if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
367 return 0;
368
369 for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
370 count++;
371
372 if (room <= 0 ||
373 ((count >= FL_MAX_PER_SOCK ||
374 (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4)
375 && !capable(CAP_NET_ADMIN)))
376 return -ENOBUFS;
377
378 return 0;
379}
380
381static int ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
382{
383 if (h1 == h2)
384 return 0;
385 if (h1 == NULL || h2 == NULL)
386 return 1;
387 if (h1->hdrlen != h2->hdrlen)
388 return 1;
389 return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
390}
391
392static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
393{
394 if (o1 == o2)
395 return 0;
396 if (o1 == NULL || o2 == NULL)
397 return 1;
398 if (o1->opt_nflen != o2->opt_nflen)
399 return 1;
400 if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
401 return 1;
402 if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
403 return 1;
404 if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
405 return 1;
406 return 0;
407}
408
409int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
410{
411 int err;
412 struct ipv6_pinfo *np = inet6_sk(sk);
413 struct in6_flowlabel_req freq;
414 struct ipv6_fl_socklist *sfl1=NULL;
415 struct ipv6_fl_socklist *sfl, **sflp;
416 struct ip6_flowlabel *fl;
417
418 if (optlen < sizeof(freq))
419 return -EINVAL;
420
421 if (copy_from_user(&freq, optval, sizeof(freq)))
422 return -EFAULT;
423
424 switch (freq.flr_action) {
425 case IPV6_FL_A_PUT:
426 write_lock_bh(&ip6_sk_fl_lock);
427 for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
428 if (sfl->fl->label == freq.flr_label) {
429 if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
430 np->flow_label &= ~IPV6_FLOWLABEL_MASK;
431 *sflp = sfl->next;
432 write_unlock_bh(&ip6_sk_fl_lock);
433 fl_release(sfl->fl);
434 kfree(sfl);
435 return 0;
436 }
437 }
438 write_unlock_bh(&ip6_sk_fl_lock);
439 return -ESRCH;
440
441 case IPV6_FL_A_RENEW:
442 read_lock_bh(&ip6_sk_fl_lock);
443 for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
444 if (sfl->fl->label == freq.flr_label) {
445 err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
446 read_unlock_bh(&ip6_sk_fl_lock);
447 return err;
448 }
449 }
450 read_unlock_bh(&ip6_sk_fl_lock);
451
452 if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) {
453 fl = fl_lookup(freq.flr_label);
454 if (fl) {
455 err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
456 fl_release(fl);
457 return err;
458 }
459 }
460 return -ESRCH;
461
462 case IPV6_FL_A_GET:
463 if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
464 return -EINVAL;
465
466 fl = fl_create(&freq, optval, optlen, &err);
467 if (fl == NULL)
468 return err;
469 sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
470
471 if (freq.flr_label) {
472 struct ip6_flowlabel *fl1 = NULL;
473
474 err = -EEXIST;
475 read_lock_bh(&ip6_sk_fl_lock);
476 for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
477 if (sfl->fl->label == freq.flr_label) {
478 if (freq.flr_flags&IPV6_FL_F_EXCL) {
479 read_unlock_bh(&ip6_sk_fl_lock);
480 goto done;
481 }
482 fl1 = sfl->fl;
483 atomic_inc(&fl->users);
484 break;
485 }
486 }
487 read_unlock_bh(&ip6_sk_fl_lock);
488
489 if (fl1 == NULL)
490 fl1 = fl_lookup(freq.flr_label);
491 if (fl1) {
492 err = -EEXIST;
493 if (freq.flr_flags&IPV6_FL_F_EXCL)
494 goto release;
495 err = -EPERM;
496 if (fl1->share == IPV6_FL_S_EXCL ||
497 fl1->share != fl->share ||
498 fl1->owner != fl->owner)
499 goto release;
500
501 err = -EINVAL;
502 if (!ipv6_addr_equal(&fl1->dst, &fl->dst) ||
503 ipv6_opt_cmp(fl1->opt, fl->opt))
504 goto release;
505
506 err = -ENOMEM;
507 if (sfl1 == NULL)
508 goto release;
509 if (fl->linger > fl1->linger)
510 fl1->linger = fl->linger;
511 if ((long)(fl->expires - fl1->expires) > 0)
512 fl1->expires = fl->expires;
513 write_lock_bh(&ip6_sk_fl_lock);
514 sfl1->fl = fl1;
515 sfl1->next = np->ipv6_fl_list;
516 np->ipv6_fl_list = sfl1;
517 write_unlock_bh(&ip6_sk_fl_lock);
518 fl_free(fl);
519 return 0;
520
521release:
522 fl_release(fl1);
523 goto done;
524 }
525 }
526 err = -ENOENT;
527 if (!(freq.flr_flags&IPV6_FL_F_CREATE))
528 goto done;
529
530 err = -ENOMEM;
531 if (sfl1 == NULL || (err = mem_check(sk)) != 0)
532 goto done;
533
534 err = fl_intern(fl, freq.flr_label);
535 if (err)
536 goto done;
537
538 /* Do not check for fault */
539 if (!freq.flr_label)
540 copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
541 &fl->label, sizeof(fl->label));
542
543 sfl1->fl = fl;
544 sfl1->next = np->ipv6_fl_list;
545 np->ipv6_fl_list = sfl1;
546 return 0;
547
548 default:
549 return -EINVAL;
550 }
551
552done:
553 fl_free(fl);
554 kfree(sfl1);
555 return err;
556}
557
558#ifdef CONFIG_PROC_FS
559
560struct ip6fl_iter_state {
561 int bucket;
562};
563
564#define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private)
565
566static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
567{
568 struct ip6_flowlabel *fl = NULL;
569 struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
570
571 for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
572 if (fl_ht[state->bucket]) {
573 fl = fl_ht[state->bucket];
574 break;
575 }
576 }
577 return fl;
578}
579
580static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
581{
582 struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
583
584 fl = fl->next;
585 while (!fl) {
586 if (++state->bucket <= FL_HASH_MASK)
587 fl = fl_ht[state->bucket];
588 }
589 return fl;
590}
591
592static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
593{
594 struct ip6_flowlabel *fl = ip6fl_get_first(seq);
595 if (fl)
596 while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
597 --pos;
598 return pos ? NULL : fl;
599}
600
601static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
602{
603 read_lock_bh(&ip6_fl_lock);
604 return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
605}
606
607static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
608{
609 struct ip6_flowlabel *fl;
610
611 if (v == SEQ_START_TOKEN)
612 fl = ip6fl_get_first(seq);
613 else
614 fl = ip6fl_get_next(seq, v);
615 ++*pos;
616 return fl;
617}
618
619static void ip6fl_seq_stop(struct seq_file *seq, void *v)
620{
621 read_unlock_bh(&ip6_fl_lock);
622}
623
624static void ip6fl_fl_seq_show(struct seq_file *seq, struct ip6_flowlabel *fl)
625{
626 while(fl) {
627 seq_printf(seq,
628 "%05X %-1d %-6d %-6d %-6ld %-8ld "
629 "%02x%02x%02x%02x%02x%02x%02x%02x "
630 "%-4d\n",
631 (unsigned)ntohl(fl->label),
632 fl->share,
633 (unsigned)fl->owner,
634 atomic_read(&fl->users),
635 fl->linger/HZ,
636 (long)(fl->expires - jiffies)/HZ,
637 NIP6(fl->dst),
638 fl->opt ? fl->opt->opt_nflen : 0);
639 fl = fl->next;
640 }
641}
642
643static int ip6fl_seq_show(struct seq_file *seq, void *v)
644{
645 if (v == SEQ_START_TOKEN)
646 seq_puts(seq, "Label S Owner Users Linger Expires "
647 "Dst Opt\n");
648 else
649 ip6fl_fl_seq_show(seq, v);
650 return 0;
651}
652
653static struct seq_operations ip6fl_seq_ops = {
654 .start = ip6fl_seq_start,
655 .next = ip6fl_seq_next,
656 .stop = ip6fl_seq_stop,
657 .show = ip6fl_seq_show,
658};
659
660static int ip6fl_seq_open(struct inode *inode, struct file *file)
661{
662 struct seq_file *seq;
663 int rc = -ENOMEM;
664 struct ip6fl_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
665
666 if (!s)
667 goto out;
668
669 rc = seq_open(file, &ip6fl_seq_ops);
670 if (rc)
671 goto out_kfree;
672
673 seq = file->private_data;
674 seq->private = s;
675 memset(s, 0, sizeof(*s));
676out:
677 return rc;
678out_kfree:
679 kfree(s);
680 goto out;
681}
682
683static struct file_operations ip6fl_seq_fops = {
684 .owner = THIS_MODULE,
685 .open = ip6fl_seq_open,
686 .read = seq_read,
687 .llseek = seq_lseek,
688 .release = seq_release_private,
689};
690#endif
691
692
693void ip6_flowlabel_init(void)
694{
695#ifdef CONFIG_PROC_FS
696 proc_net_fops_create("ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops);
697#endif
698}
699
700void ip6_flowlabel_cleanup(void)
701{
702 del_timer(&ip6_fl_gc_timer);
703#ifdef CONFIG_PROC_FS
704 proc_net_remove("ip6_flowlabel");
705#endif
706}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
new file mode 100644
index 000000000000..866f10726c58
--- /dev/null
+++ b/net/ipv6/ip6_input.c
@@ -0,0 +1,269 @@
1/*
2 * IPv6 input
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Ian P. Morris <I.P.Morris@soton.ac.uk>
8 *
9 * $Id: ip6_input.c,v 1.19 2000/12/13 18:31:50 davem Exp $
10 *
11 * Based in linux/net/ipv4/ip_input.c
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
17 */
18/* Changes
19 *
20 * Mitsuru KANDA @USAGI and
21 * YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs().
22 */
23
24#include <linux/errno.h>
25#include <linux/types.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/sched.h>
29#include <linux/net.h>
30#include <linux/netdevice.h>
31#include <linux/in6.h>
32#include <linux/icmpv6.h>
33
34#include <linux/netfilter.h>
35#include <linux/netfilter_ipv6.h>
36
37#include <net/sock.h>
38#include <net/snmp.h>
39
40#include <net/ipv6.h>
41#include <net/protocol.h>
42#include <net/transp_v6.h>
43#include <net/rawv6.h>
44#include <net/ndisc.h>
45#include <net/ip6_route.h>
46#include <net/addrconf.h>
47#include <net/xfrm.h>
48
49
50
51static inline int ip6_rcv_finish( struct sk_buff *skb)
52{
53 if (skb->dst == NULL)
54 ip6_route_input(skb);
55
56 return dst_input(skb);
57}
58
59int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
60{
61 struct ipv6hdr *hdr;
62 u32 pkt_len;
63
64 if (skb->pkt_type == PACKET_OTHERHOST)
65 goto drop;
66
67 IP6_INC_STATS_BH(IPSTATS_MIB_INRECEIVES);
68
69 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
70 IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
71 goto out;
72 }
73
74 /*
75 * Store incoming device index. When the packet will
76 * be queued, we cannot refer to skb->dev anymore.
77 *
78 * BTW, when we send a packet for our own local address on a
79 * non-loopback interface (e.g. ethX), it is being delivered
80 * via the loopback interface (lo) here; skb->dev = &loopback_dev.
81 * It, however, should be considered as if it is being
82 * arrived via the sending interface (ethX), because of the
83 * nature of scoping architecture. --yoshfuji
84 */
85 IP6CB(skb)->iif = skb->dst ? ((struct rt6_info *)skb->dst)->rt6i_idev->dev->ifindex : dev->ifindex;
86
87 if (skb->len < sizeof(struct ipv6hdr))
88 goto err;
89
90 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) {
91 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
92 goto drop;
93 }
94
95 hdr = skb->nh.ipv6h;
96
97 if (hdr->version != 6)
98 goto err;
99
100 pkt_len = ntohs(hdr->payload_len);
101
102 /* pkt_len may be zero if Jumbo payload option is present */
103 if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
104 if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
105 goto truncated;
106 if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
107 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
108 goto drop;
109 }
110 hdr = skb->nh.ipv6h;
111 }
112
113 if (hdr->nexthdr == NEXTHDR_HOP) {
114 skb->h.raw = (u8*)(hdr+1);
115 if (ipv6_parse_hopopts(skb, offsetof(struct ipv6hdr, nexthdr)) < 0) {
116 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
117 return 0;
118 }
119 hdr = skb->nh.ipv6h;
120 }
121
122 return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish);
123truncated:
124 IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
125err:
126 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
127drop:
128 kfree_skb(skb);
129out:
130 return 0;
131}
132
133/*
134 * Deliver the packet to the host
135 */
136
137
138static inline int ip6_input_finish(struct sk_buff *skb)
139{
140 struct inet6_protocol *ipprot;
141 struct sock *raw_sk;
142 unsigned int nhoff;
143 int nexthdr;
144 u8 hash;
145
146 skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
147
148 /*
149 * Parse extension headers
150 */
151
152 nexthdr = skb->nh.ipv6h->nexthdr;
153 nhoff = offsetof(struct ipv6hdr, nexthdr);
154
155 /* Skip hop-by-hop options, they are already parsed. */
156 if (nexthdr == NEXTHDR_HOP) {
157 nhoff = sizeof(struct ipv6hdr);
158 nexthdr = skb->h.raw[0];
159 skb->h.raw += (skb->h.raw[1]+1)<<3;
160 }
161
162 rcu_read_lock();
163resubmit:
164 if (!pskb_pull(skb, skb->h.raw - skb->data))
165 goto discard;
166 nexthdr = skb->nh.raw[nhoff];
167
168 raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
169 if (raw_sk)
170 ipv6_raw_deliver(skb, nexthdr);
171
172 hash = nexthdr & (MAX_INET_PROTOS - 1);
173 if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
174 int ret;
175
176 if (ipprot->flags & INET6_PROTO_FINAL) {
177 struct ipv6hdr *hdr;
178
179 skb_postpull_rcsum(skb, skb->nh.raw,
180 skb->h.raw - skb->nh.raw);
181 hdr = skb->nh.ipv6h;
182 if (ipv6_addr_is_multicast(&hdr->daddr) &&
183 !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
184 &hdr->saddr) &&
185 !ipv6_is_mld(skb, nexthdr))
186 goto discard;
187 }
188 if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
189 !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
190 goto discard;
191
192 ret = ipprot->handler(&skb, &nhoff);
193 if (ret > 0)
194 goto resubmit;
195 else if (ret == 0)
196 IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
197 } else {
198 if (!raw_sk) {
199 if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
200 IP6_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
201 icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhoff);
202 }
203 } else {
204 IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
205 kfree_skb(skb);
206 }
207 }
208 rcu_read_unlock();
209 return 0;
210
211discard:
212 IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
213 rcu_read_unlock();
214 kfree_skb(skb);
215 return 0;
216}
217
218
219int ip6_input(struct sk_buff *skb)
220{
221 return NF_HOOK(PF_INET6,NF_IP6_LOCAL_IN, skb, skb->dev, NULL, ip6_input_finish);
222}
223
224int ip6_mc_input(struct sk_buff *skb)
225{
226 struct ipv6hdr *hdr;
227 int deliver;
228
229 IP6_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
230
231 hdr = skb->nh.ipv6h;
232 deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) ||
233 ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
234
235 /*
236 * IPv6 multicast router mode isnt currently supported.
237 */
238#if 0
239 if (ipv6_config.multicast_route) {
240 int addr_type;
241
242 addr_type = ipv6_addr_type(&hdr->daddr);
243
244 if (!(addr_type & (IPV6_ADDR_LOOPBACK | IPV6_ADDR_LINKLOCAL))) {
245 struct sk_buff *skb2;
246 struct dst_entry *dst;
247
248 dst = skb->dst;
249
250 if (deliver) {
251 skb2 = skb_clone(skb, GFP_ATOMIC);
252 dst_output(skb2);
253 } else {
254 dst_output(skb);
255 return 0;
256 }
257 }
258 }
259#endif
260
261 if (likely(deliver)) {
262 ip6_input(skb);
263 return 0;
264 }
265 /* discard */
266 kfree_skb(skb);
267
268 return 0;
269}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
new file mode 100644
index 000000000000..49208ba75094
--- /dev/null
+++ b/net/ipv6/ip6_output.c
@@ -0,0 +1,1197 @@
1/*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on linux/net/ipv4/ip_output.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
22 * etc.
23 *
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
28 * for datagram xmit
29 */
30
31#include <linux/config.h>
32#include <linux/errno.h>
33#include <linux/types.h>
34#include <linux/string.h>
35#include <linux/socket.h>
36#include <linux/net.h>
37#include <linux/netdevice.h>
38#include <linux/if_arp.h>
39#include <linux/in6.h>
40#include <linux/tcp.h>
41#include <linux/route.h>
42
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
58
59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62{
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
65
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
71}
72
73static inline int ip6_output_finish(struct sk_buff *skb)
74{
75
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
78
79 if (hh) {
80 int hh_alen;
81
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
90
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
92 kfree_skb(skb);
93 return -EINVAL;
94
95}
96
97/* dev_loopback_xmit for use with netfilter. */
98static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
99{
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
105
106 netif_rx(newskb);
107 return 0;
108}
109
110
111static int ip6_output2(struct sk_buff *skb)
112{
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
115
116 skb->protocol = htons(ETH_P_IPV6);
117 skb->dev = dev;
118
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
121
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
126
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
129 */
130 if (newskb)
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
132 newskb->dev,
133 ip6_dev_loopback_xmit);
134
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
137 kfree_skb(skb);
138 return 0;
139 }
140 }
141
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
143 }
144
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
146}
147
148int ip6_output(struct sk_buff *skb)
149{
150 if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst))
151 return ip6_fragment(skb, ip6_output2);
152 else
153 return ip6_output2(skb);
154}
155
156#ifdef CONFIG_NETFILTER
157int ip6_route_me_harder(struct sk_buff *skb)
158{
159 struct ipv6hdr *iph = skb->nh.ipv6h;
160 struct dst_entry *dst;
161 struct flowi fl = {
162 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
163 .nl_u =
164 { .ip6_u =
165 { .daddr = iph->daddr,
166 .saddr = iph->saddr, } },
167 .proto = iph->nexthdr,
168 };
169
170 dst = ip6_route_output(skb->sk, &fl);
171
172 if (dst->error) {
173 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
174 LIMIT_NETDEBUG(
175 printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
176 dst_release(dst);
177 return -EINVAL;
178 }
179
180 /* Drop old route. */
181 dst_release(skb->dst);
182
183 skb->dst = dst;
184 return 0;
185}
186#endif
187
188static inline int ip6_maybe_reroute(struct sk_buff *skb)
189{
190#ifdef CONFIG_NETFILTER
191 if (skb->nfcache & NFC_ALTERED){
192 if (ip6_route_me_harder(skb) != 0){
193 kfree_skb(skb);
194 return -EINVAL;
195 }
196 }
197#endif /* CONFIG_NETFILTER */
198 return dst_output(skb);
199}
200
201/*
202 * xmit an sk_buff (used by TCP)
203 */
204
205int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
206 struct ipv6_txoptions *opt, int ipfragok)
207{
208 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
209 struct in6_addr *first_hop = &fl->fl6_dst;
210 struct dst_entry *dst = skb->dst;
211 struct ipv6hdr *hdr;
212 u8 proto = fl->proto;
213 int seg_len = skb->len;
214 int hlimit;
215 u32 mtu;
216
217 if (opt) {
218 int head_room;
219
220 /* First: exthdrs may take lots of space (~8K for now)
221 MAX_HEADER is not enough.
222 */
223 head_room = opt->opt_nflen + opt->opt_flen;
224 seg_len += head_room;
225 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
226
227 if (skb_headroom(skb) < head_room) {
228 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
229 kfree_skb(skb);
230 skb = skb2;
231 if (skb == NULL) {
232 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
233 return -ENOBUFS;
234 }
235 if (sk)
236 skb_set_owner_w(skb, sk);
237 }
238 if (opt->opt_flen)
239 ipv6_push_frag_opts(skb, opt, &proto);
240 if (opt->opt_nflen)
241 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
242 }
243
244 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
245
246 /*
247 * Fill in the IPv6 header
248 */
249
250 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
251 hlimit = -1;
252 if (np)
253 hlimit = np->hop_limit;
254 if (hlimit < 0)
255 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
256 if (hlimit < 0)
257 hlimit = ipv6_get_hoplimit(dst->dev);
258
259 hdr->payload_len = htons(seg_len);
260 hdr->nexthdr = proto;
261 hdr->hop_limit = hlimit;
262
263 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
264 ipv6_addr_copy(&hdr->daddr, first_hop);
265
266 mtu = dst_mtu(dst);
267 if ((skb->len <= mtu) || ipfragok) {
268 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
269 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
270 }
271
272 if (net_ratelimit())
273 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
274 skb->dev = dst->dev;
275 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
276 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
277 kfree_skb(skb);
278 return -EMSGSIZE;
279}
280
281/*
282 * To avoid extra problems ND packets are send through this
283 * routine. It's code duplication but I really want to avoid
284 * extra checks since ipv6_build_header is used by TCP (which
285 * is for us performance critical)
286 */
287
288int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
289 struct in6_addr *saddr, struct in6_addr *daddr,
290 int proto, int len)
291{
292 struct ipv6_pinfo *np = inet6_sk(sk);
293 struct ipv6hdr *hdr;
294 int totlen;
295
296 skb->protocol = htons(ETH_P_IPV6);
297 skb->dev = dev;
298
299 totlen = len + sizeof(struct ipv6hdr);
300
301 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
302 skb->nh.ipv6h = hdr;
303
304 *(u32*)hdr = htonl(0x60000000);
305
306 hdr->payload_len = htons(len);
307 hdr->nexthdr = proto;
308 hdr->hop_limit = np->hop_limit;
309
310 ipv6_addr_copy(&hdr->saddr, saddr);
311 ipv6_addr_copy(&hdr->daddr, daddr);
312
313 return 0;
314}
315
316static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
317{
318 struct ip6_ra_chain *ra;
319 struct sock *last = NULL;
320
321 read_lock(&ip6_ra_lock);
322 for (ra = ip6_ra_chain; ra; ra = ra->next) {
323 struct sock *sk = ra->sk;
324 if (sk && ra->sel == sel) {
325 if (last) {
326 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
327 if (skb2)
328 rawv6_rcv(last, skb2);
329 }
330 last = sk;
331 }
332 }
333
334 if (last) {
335 rawv6_rcv(last, skb);
336 read_unlock(&ip6_ra_lock);
337 return 1;
338 }
339 read_unlock(&ip6_ra_lock);
340 return 0;
341}
342
343static inline int ip6_forward_finish(struct sk_buff *skb)
344{
345 return dst_output(skb);
346}
347
348int ip6_forward(struct sk_buff *skb)
349{
350 struct dst_entry *dst = skb->dst;
351 struct ipv6hdr *hdr = skb->nh.ipv6h;
352 struct inet6_skb_parm *opt = IP6CB(skb);
353
354 if (ipv6_devconf.forwarding == 0)
355 goto error;
356
357 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
358 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
359 goto drop;
360 }
361
362 skb->ip_summed = CHECKSUM_NONE;
363
364 /*
365 * We DO NOT make any processing on
366 * RA packets, pushing them to user level AS IS
367 * without ane WARRANTY that application will be able
368 * to interpret them. The reason is that we
369 * cannot make anything clever here.
370 *
371 * We are not end-node, so that if packet contains
372 * AH/ESP, we cannot make anything.
373 * Defragmentation also would be mistake, RA packets
374 * cannot be fragmented, because there is no warranty
375 * that different fragments will go along one path. --ANK
376 */
377 if (opt->ra) {
378 u8 *ptr = skb->nh.raw + opt->ra;
379 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
380 return 0;
381 }
382
383 /*
384 * check and decrement ttl
385 */
386 if (hdr->hop_limit <= 1) {
387 /* Force OUTPUT device used as source address */
388 skb->dev = dst->dev;
389 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
390 0, skb->dev);
391
392 kfree_skb(skb);
393 return -ETIMEDOUT;
394 }
395
396 if (!xfrm6_route_forward(skb)) {
397 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
398 goto drop;
399 }
400 dst = skb->dst;
401
402 /* IPv6 specs say nothing about it, but it is clear that we cannot
403 send redirects to source routed frames.
404 */
405 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
406 struct in6_addr *target = NULL;
407 struct rt6_info *rt;
408 struct neighbour *n = dst->neighbour;
409
410 /*
411 * incoming and outgoing devices are the same
412 * send a redirect.
413 */
414
415 rt = (struct rt6_info *) dst;
416 if ((rt->rt6i_flags & RTF_GATEWAY))
417 target = (struct in6_addr*)&n->primary_key;
418 else
419 target = &hdr->daddr;
420
421 /* Limit redirects both by destination (here)
422 and by source (inside ndisc_send_redirect)
423 */
424 if (xrlim_allow(dst, 1*HZ))
425 ndisc_send_redirect(skb, n, target);
426 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
427 |IPV6_ADDR_LINKLOCAL)) {
428 /* This check is security critical. */
429 goto error;
430 }
431
432 if (skb->len > dst_mtu(dst)) {
433 /* Again, force OUTPUT device used as source address */
434 skb->dev = dst->dev;
435 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
436 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
437 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
438 kfree_skb(skb);
439 return -EMSGSIZE;
440 }
441
442 if (skb_cow(skb, dst->dev->hard_header_len)) {
443 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
444 goto drop;
445 }
446
447 hdr = skb->nh.ipv6h;
448
449 /* Mangling hops number delayed to point after skb COW */
450
451 hdr->hop_limit--;
452
453 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
454 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
455
456error:
457 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
458drop:
459 kfree_skb(skb);
460 return -EINVAL;
461}
462
463static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
464{
465 to->pkt_type = from->pkt_type;
466 to->priority = from->priority;
467 to->protocol = from->protocol;
468 to->security = from->security;
469 dst_release(to->dst);
470 to->dst = dst_clone(from->dst);
471 to->dev = from->dev;
472
473#ifdef CONFIG_NET_SCHED
474 to->tc_index = from->tc_index;
475#endif
476#ifdef CONFIG_NETFILTER
477 to->nfmark = from->nfmark;
478 /* Connection association is same as pre-frag packet */
479 to->nfct = from->nfct;
480 nf_conntrack_get(to->nfct);
481 to->nfctinfo = from->nfctinfo;
482#ifdef CONFIG_BRIDGE_NETFILTER
483 nf_bridge_put(to->nf_bridge);
484 to->nf_bridge = from->nf_bridge;
485 nf_bridge_get(to->nf_bridge);
486#endif
487#ifdef CONFIG_NETFILTER_DEBUG
488 to->nf_debug = from->nf_debug;
489#endif
490#endif
491}
492
493int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
494{
495 u16 offset = sizeof(struct ipv6hdr);
496 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
497 unsigned int packet_len = skb->tail - skb->nh.raw;
498 int found_rhdr = 0;
499 *nexthdr = &skb->nh.ipv6h->nexthdr;
500
501 while (offset + 1 <= packet_len) {
502
503 switch (**nexthdr) {
504
505 case NEXTHDR_HOP:
506 case NEXTHDR_ROUTING:
507 case NEXTHDR_DEST:
508 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
509 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
510 offset += ipv6_optlen(exthdr);
511 *nexthdr = &exthdr->nexthdr;
512 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
513 break;
514 default :
515 return offset;
516 }
517 }
518
519 return offset;
520}
521
522static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
523{
524 struct net_device *dev;
525 struct sk_buff *frag;
526 struct rt6_info *rt = (struct rt6_info*)skb->dst;
527 struct ipv6hdr *tmp_hdr;
528 struct frag_hdr *fh;
529 unsigned int mtu, hlen, left, len;
530 u32 frag_id = 0;
531 int ptr, offset = 0, err=0;
532 u8 *prevhdr, nexthdr = 0;
533
534 dev = rt->u.dst.dev;
535 hlen = ip6_find_1stfragopt(skb, &prevhdr);
536 nexthdr = *prevhdr;
537
538 mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
539
540 if (skb_shinfo(skb)->frag_list) {
541 int first_len = skb_pagelen(skb);
542
543 if (first_len - hlen > mtu ||
544 ((first_len - hlen) & 7) ||
545 skb_cloned(skb))
546 goto slow_path;
547
548 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
549 /* Correct geometry. */
550 if (frag->len > mtu ||
551 ((frag->len & 7) && frag->next) ||
552 skb_headroom(frag) < hlen)
553 goto slow_path;
554
555 /* Correct socket ownership. */
556 if (frag->sk == NULL)
557 goto slow_path;
558
559 /* Partially cloned skb? */
560 if (skb_shared(frag))
561 goto slow_path;
562 }
563
564 err = 0;
565 offset = 0;
566 frag = skb_shinfo(skb)->frag_list;
567 skb_shinfo(skb)->frag_list = NULL;
568 /* BUILD HEADER */
569
570 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
571 if (!tmp_hdr) {
572 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
573 return -ENOMEM;
574 }
575
576 *prevhdr = NEXTHDR_FRAGMENT;
577 memcpy(tmp_hdr, skb->nh.raw, hlen);
578 __skb_pull(skb, hlen);
579 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
580 skb->nh.raw = __skb_push(skb, hlen);
581 memcpy(skb->nh.raw, tmp_hdr, hlen);
582
583 ipv6_select_ident(skb, fh);
584 fh->nexthdr = nexthdr;
585 fh->reserved = 0;
586 fh->frag_off = htons(IP6_MF);
587 frag_id = fh->identification;
588
589 first_len = skb_pagelen(skb);
590 skb->data_len = first_len - skb_headlen(skb);
591 skb->len = first_len;
592 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
593
594
595 for (;;) {
596 /* Prepare header of the next frame,
597 * before previous one went down. */
598 if (frag) {
599 frag->ip_summed = CHECKSUM_NONE;
600 frag->h.raw = frag->data;
601 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
602 frag->nh.raw = __skb_push(frag, hlen);
603 memcpy(frag->nh.raw, tmp_hdr, hlen);
604 offset += skb->len - hlen - sizeof(struct frag_hdr);
605 fh->nexthdr = nexthdr;
606 fh->reserved = 0;
607 fh->frag_off = htons(offset);
608 if (frag->next != NULL)
609 fh->frag_off |= htons(IP6_MF);
610 fh->identification = frag_id;
611 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
612 ip6_copy_metadata(frag, skb);
613 }
614
615 err = output(skb);
616 if (err || !frag)
617 break;
618
619 skb = frag;
620 frag = skb->next;
621 skb->next = NULL;
622 }
623
624 if (tmp_hdr)
625 kfree(tmp_hdr);
626
627 if (err == 0) {
628 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
629 return 0;
630 }
631
632 while (frag) {
633 skb = frag->next;
634 kfree_skb(frag);
635 frag = skb;
636 }
637
638 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
639 return err;
640 }
641
642slow_path:
643 left = skb->len - hlen; /* Space per frame */
644 ptr = hlen; /* Where to start from */
645
646 /*
647 * Fragment the datagram.
648 */
649
650 *prevhdr = NEXTHDR_FRAGMENT;
651
652 /*
653 * Keep copying data until we run out.
654 */
655 while(left > 0) {
656 len = left;
657 /* IF: it doesn't fit, use 'mtu' - the data space left */
658 if (len > mtu)
659 len = mtu;
660 /* IF: we are not sending upto and including the packet end
661 then align the next start on an eight byte boundary */
662 if (len < left) {
663 len &= ~7;
664 }
665 /*
666 * Allocate buffer.
667 */
668
669 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
670 NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
671 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
672 err = -ENOMEM;
673 goto fail;
674 }
675
676 /*
677 * Set up data on packet
678 */
679
680 ip6_copy_metadata(frag, skb);
681 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
682 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
683 frag->nh.raw = frag->data;
684 fh = (struct frag_hdr*)(frag->data + hlen);
685 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
686
687 /*
688 * Charge the memory for the fragment to any owner
689 * it might possess
690 */
691 if (skb->sk)
692 skb_set_owner_w(frag, skb->sk);
693
694 /*
695 * Copy the packet header into the new buffer.
696 */
697 memcpy(frag->nh.raw, skb->data, hlen);
698
699 /*
700 * Build fragment header.
701 */
702 fh->nexthdr = nexthdr;
703 fh->reserved = 0;
704 if (frag_id) {
705 ipv6_select_ident(skb, fh);
706 frag_id = fh->identification;
707 } else
708 fh->identification = frag_id;
709
710 /*
711 * Copy a block of the IP datagram.
712 */
713 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
714 BUG();
715 left -= len;
716
717 fh->frag_off = htons(offset);
718 if (left > 0)
719 fh->frag_off |= htons(IP6_MF);
720 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
721
722 ptr += len;
723 offset += len;
724
725 /*
726 * Put this fragment into the sending queue.
727 */
728
729 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
730
731 err = output(frag);
732 if (err)
733 goto fail;
734 }
735 kfree_skb(skb);
736 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
737 return err;
738
739fail:
740 kfree_skb(skb);
741 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
742 return err;
743}
744
745int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
746{
747 int err = 0;
748
749 *dst = NULL;
750 if (sk) {
751 struct ipv6_pinfo *np = inet6_sk(sk);
752
753 *dst = sk_dst_check(sk, np->dst_cookie);
754 if (*dst) {
755 struct rt6_info *rt = (struct rt6_info*)*dst;
756
757 /* Yes, checking route validity in not connected
758 case is not very simple. Take into account,
759 that we do not support routing by source, TOS,
760 and MSG_DONTROUTE --ANK (980726)
761
762 1. If route was host route, check that
763 cached destination is current.
764 If it is network route, we still may
765 check its validity using saved pointer
766 to the last used address: daddr_cache.
767 We do not want to save whole address now,
768 (because main consumer of this service
769 is tcp, which has not this problem),
770 so that the last trick works only on connected
771 sockets.
772 2. oif also should be the same.
773 */
774
775 if (((rt->rt6i_dst.plen != 128 ||
776 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
777 && (np->daddr_cache == NULL ||
778 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
779 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
780 dst_release(*dst);
781 *dst = NULL;
782 }
783 }
784 }
785
786 if (*dst == NULL)
787 *dst = ip6_route_output(sk, fl);
788
789 if ((err = (*dst)->error))
790 goto out_err_release;
791
792 if (ipv6_addr_any(&fl->fl6_src)) {
793 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
794
795 if (err) {
796#if IP6_DEBUG >= 2
797 printk(KERN_DEBUG "ip6_dst_lookup: "
798 "no available source address\n");
799#endif
800 goto out_err_release;
801 }
802 }
803
804 return 0;
805
806out_err_release:
807 dst_release(*dst);
808 *dst = NULL;
809 return err;
810}
811
812int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
813 void *from, int length, int transhdrlen,
814 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
815 unsigned int flags)
816{
817 struct inet_sock *inet = inet_sk(sk);
818 struct ipv6_pinfo *np = inet6_sk(sk);
819 struct sk_buff *skb;
820 unsigned int maxfraglen, fragheaderlen;
821 int exthdrlen;
822 int hh_len;
823 int mtu;
824 int copy;
825 int err;
826 int offset = 0;
827 int csummode = CHECKSUM_NONE;
828
829 if (flags&MSG_PROBE)
830 return 0;
831 if (skb_queue_empty(&sk->sk_write_queue)) {
832 /*
833 * setup for corking
834 */
835 if (opt) {
836 if (np->cork.opt == NULL) {
837 np->cork.opt = kmalloc(opt->tot_len,
838 sk->sk_allocation);
839 if (unlikely(np->cork.opt == NULL))
840 return -ENOBUFS;
841 } else if (np->cork.opt->tot_len < opt->tot_len) {
842 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
843 return -EINVAL;
844 }
845 memcpy(np->cork.opt, opt, opt->tot_len);
846 inet->cork.flags |= IPCORK_OPT;
847 /* need source address above miyazawa*/
848 }
849 dst_hold(&rt->u.dst);
850 np->cork.rt = rt;
851 inet->cork.fl = *fl;
852 np->cork.hop_limit = hlimit;
853 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
854 if (dst_allfrag(rt->u.dst.path))
855 inet->cork.flags |= IPCORK_ALLFRAG;
856 inet->cork.length = 0;
857 sk->sk_sndmsg_page = NULL;
858 sk->sk_sndmsg_off = 0;
859 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
860 length += exthdrlen;
861 transhdrlen += exthdrlen;
862 } else {
863 rt = np->cork.rt;
864 fl = &inet->cork.fl;
865 if (inet->cork.flags & IPCORK_OPT)
866 opt = np->cork.opt;
867 transhdrlen = 0;
868 exthdrlen = 0;
869 mtu = inet->cork.fragsize;
870 }
871
872 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
873
874 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
875 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
876
877 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
878 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
879 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
880 return -EMSGSIZE;
881 }
882 }
883
884 /*
885 * Let's try using as much space as possible.
886 * Use MTU if total length of the message fits into the MTU.
887 * Otherwise, we need to reserve fragment header and
888 * fragment alignment (= 8-15 octects, in total).
889 *
890 * Note that we may need to "move" the data from the tail of
891 * of the buffer to the new fragment when we split
892 * the message.
893 *
894 * FIXME: It may be fragmented into multiple chunks
895 * at once if non-fragmentable extension headers
896 * are too large.
897 * --yoshfuji
898 */
899
900 inet->cork.length += length;
901
902 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
903 goto alloc_new_skb;
904
905 while (length > 0) {
906 /* Check if the remaining data fits into current packet. */
907 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
908 if (copy < length)
909 copy = maxfraglen - skb->len;
910
911 if (copy <= 0) {
912 char *data;
913 unsigned int datalen;
914 unsigned int fraglen;
915 unsigned int fraggap;
916 unsigned int alloclen;
917 struct sk_buff *skb_prev;
918alloc_new_skb:
919 skb_prev = skb;
920
921 /* There's no room in the current skb */
922 if (skb_prev)
923 fraggap = skb_prev->len - maxfraglen;
924 else
925 fraggap = 0;
926
927 /*
928 * If remaining data exceeds the mtu,
929 * we know we need more fragment(s).
930 */
931 datalen = length + fraggap;
932 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
933 datalen = maxfraglen - fragheaderlen;
934
935 fraglen = datalen + fragheaderlen;
936 if ((flags & MSG_MORE) &&
937 !(rt->u.dst.dev->features&NETIF_F_SG))
938 alloclen = mtu;
939 else
940 alloclen = datalen + fragheaderlen;
941
942 /*
943 * The last fragment gets additional space at tail.
944 * Note: we overallocate on fragments with MSG_MODE
945 * because we have no idea if we're the last one.
946 */
947 if (datalen == length + fraggap)
948 alloclen += rt->u.dst.trailer_len;
949
950 /*
951 * We just reserve space for fragment header.
952 * Note: this may be overallocation if the message
953 * (without MSG_MORE) fits into the MTU.
954 */
955 alloclen += sizeof(struct frag_hdr);
956
957 if (transhdrlen) {
958 skb = sock_alloc_send_skb(sk,
959 alloclen + hh_len,
960 (flags & MSG_DONTWAIT), &err);
961 } else {
962 skb = NULL;
963 if (atomic_read(&sk->sk_wmem_alloc) <=
964 2 * sk->sk_sndbuf)
965 skb = sock_wmalloc(sk,
966 alloclen + hh_len, 1,
967 sk->sk_allocation);
968 if (unlikely(skb == NULL))
969 err = -ENOBUFS;
970 }
971 if (skb == NULL)
972 goto error;
973 /*
974 * Fill in the control structures
975 */
976 skb->ip_summed = csummode;
977 skb->csum = 0;
978 /* reserve for fragmentation */
979 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
980
981 /*
982 * Find where to start putting bytes
983 */
984 data = skb_put(skb, fraglen);
985 skb->nh.raw = data + exthdrlen;
986 data += fragheaderlen;
987 skb->h.raw = data + exthdrlen;
988
989 if (fraggap) {
990 skb->csum = skb_copy_and_csum_bits(
991 skb_prev, maxfraglen,
992 data + transhdrlen, fraggap, 0);
993 skb_prev->csum = csum_sub(skb_prev->csum,
994 skb->csum);
995 data += fraggap;
996 skb_trim(skb_prev, maxfraglen);
997 }
998 copy = datalen - transhdrlen - fraggap;
999 if (copy < 0) {
1000 err = -EINVAL;
1001 kfree_skb(skb);
1002 goto error;
1003 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1004 err = -EFAULT;
1005 kfree_skb(skb);
1006 goto error;
1007 }
1008
1009 offset += copy;
1010 length -= datalen - fraggap;
1011 transhdrlen = 0;
1012 exthdrlen = 0;
1013 csummode = CHECKSUM_NONE;
1014
1015 /*
1016 * Put the packet on the pending queue
1017 */
1018 __skb_queue_tail(&sk->sk_write_queue, skb);
1019 continue;
1020 }
1021
1022 if (copy > length)
1023 copy = length;
1024
1025 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1026 unsigned int off;
1027
1028 off = skb->len;
1029 if (getfrag(from, skb_put(skb, copy),
1030 offset, copy, off, skb) < 0) {
1031 __skb_trim(skb, off);
1032 err = -EFAULT;
1033 goto error;
1034 }
1035 } else {
1036 int i = skb_shinfo(skb)->nr_frags;
1037 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1038 struct page *page = sk->sk_sndmsg_page;
1039 int off = sk->sk_sndmsg_off;
1040 unsigned int left;
1041
1042 if (page && (left = PAGE_SIZE - off) > 0) {
1043 if (copy >= left)
1044 copy = left;
1045 if (page != frag->page) {
1046 if (i == MAX_SKB_FRAGS) {
1047 err = -EMSGSIZE;
1048 goto error;
1049 }
1050 get_page(page);
1051 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1052 frag = &skb_shinfo(skb)->frags[i];
1053 }
1054 } else if(i < MAX_SKB_FRAGS) {
1055 if (copy > PAGE_SIZE)
1056 copy = PAGE_SIZE;
1057 page = alloc_pages(sk->sk_allocation, 0);
1058 if (page == NULL) {
1059 err = -ENOMEM;
1060 goto error;
1061 }
1062 sk->sk_sndmsg_page = page;
1063 sk->sk_sndmsg_off = 0;
1064
1065 skb_fill_page_desc(skb, i, page, 0, 0);
1066 frag = &skb_shinfo(skb)->frags[i];
1067 skb->truesize += PAGE_SIZE;
1068 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1069 } else {
1070 err = -EMSGSIZE;
1071 goto error;
1072 }
1073 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1074 err = -EFAULT;
1075 goto error;
1076 }
1077 sk->sk_sndmsg_off += copy;
1078 frag->size += copy;
1079 skb->len += copy;
1080 skb->data_len += copy;
1081 }
1082 offset += copy;
1083 length -= copy;
1084 }
1085 return 0;
1086error:
1087 inet->cork.length -= length;
1088 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1089 return err;
1090}
1091
1092int ip6_push_pending_frames(struct sock *sk)
1093{
1094 struct sk_buff *skb, *tmp_skb;
1095 struct sk_buff **tail_skb;
1096 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1097 struct inet_sock *inet = inet_sk(sk);
1098 struct ipv6_pinfo *np = inet6_sk(sk);
1099 struct ipv6hdr *hdr;
1100 struct ipv6_txoptions *opt = np->cork.opt;
1101 struct rt6_info *rt = np->cork.rt;
1102 struct flowi *fl = &inet->cork.fl;
1103 unsigned char proto = fl->proto;
1104 int err = 0;
1105
1106 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1107 goto out;
1108 tail_skb = &(skb_shinfo(skb)->frag_list);
1109
1110 /* move skb->data to ip header from ext header */
1111 if (skb->data < skb->nh.raw)
1112 __skb_pull(skb, skb->nh.raw - skb->data);
1113 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1114 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1115 *tail_skb = tmp_skb;
1116 tail_skb = &(tmp_skb->next);
1117 skb->len += tmp_skb->len;
1118 skb->data_len += tmp_skb->len;
1119#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
1120 skb->truesize += tmp_skb->truesize;
1121 __sock_put(tmp_skb->sk);
1122 tmp_skb->destructor = NULL;
1123 tmp_skb->sk = NULL;
1124#endif
1125 }
1126
1127 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1128 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1129 if (opt && opt->opt_flen)
1130 ipv6_push_frag_opts(skb, opt, &proto);
1131 if (opt && opt->opt_nflen)
1132 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1133
1134 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1135
1136 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1137
1138 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1139 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1140 else
1141 hdr->payload_len = 0;
1142 hdr->hop_limit = np->cork.hop_limit;
1143 hdr->nexthdr = proto;
1144 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1145 ipv6_addr_copy(&hdr->daddr, final_dst);
1146
1147 skb->dst = dst_clone(&rt->u.dst);
1148 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1149 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1150 if (err) {
1151 if (err > 0)
1152 err = inet->recverr ? net_xmit_errno(err) : 0;
1153 if (err)
1154 goto error;
1155 }
1156
1157out:
1158 inet->cork.flags &= ~IPCORK_OPT;
1159 if (np->cork.opt) {
1160 kfree(np->cork.opt);
1161 np->cork.opt = NULL;
1162 }
1163 if (np->cork.rt) {
1164 dst_release(&np->cork.rt->u.dst);
1165 np->cork.rt = NULL;
1166 inet->cork.flags &= ~IPCORK_ALLFRAG;
1167 }
1168 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1169 return err;
1170error:
1171 goto out;
1172}
1173
1174void ip6_flush_pending_frames(struct sock *sk)
1175{
1176 struct inet_sock *inet = inet_sk(sk);
1177 struct ipv6_pinfo *np = inet6_sk(sk);
1178 struct sk_buff *skb;
1179
1180 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1181 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1182 kfree_skb(skb);
1183 }
1184
1185 inet->cork.flags &= ~IPCORK_OPT;
1186
1187 if (np->cork.opt) {
1188 kfree(np->cork.opt);
1189 np->cork.opt = NULL;
1190 }
1191 if (np->cork.rt) {
1192 dst_release(&np->cork.rt->u.dst);
1193 np->cork.rt = NULL;
1194 inet->cork.flags &= ~IPCORK_ALLFRAG;
1195 }
1196 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1197}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
new file mode 100644
index 000000000000..3b1c9fa184ae
--- /dev/null
+++ b/net/ipv6/ip6_tunnel.c
@@ -0,0 +1,1163 @@
1/*
2 * IPv6 over IPv6 tunnel device
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Ville Nuorvala <vnuorval@tcs.hut.fi>
7 *
8 * $Id$
9 *
10 * Based on:
11 * linux/net/ipv6/sit.c
12 *
13 * RFC 2473
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 *
20 */
21
22#include <linux/config.h>
23#include <linux/module.h>
24#include <linux/errno.h>
25#include <linux/types.h>
26#include <linux/sockios.h>
27#include <linux/if.h>
28#include <linux/in.h>
29#include <linux/ip.h>
30#include <linux/if_tunnel.h>
31#include <linux/net.h>
32#include <linux/in6.h>
33#include <linux/netdevice.h>
34#include <linux/if_arp.h>
35#include <linux/icmpv6.h>
36#include <linux/init.h>
37#include <linux/route.h>
38#include <linux/rtnetlink.h>
39#include <linux/netfilter_ipv6.h>
40
41#include <asm/uaccess.h>
42#include <asm/atomic.h>
43
44#include <net/ip.h>
45#include <net/ipv6.h>
46#include <net/protocol.h>
47#include <net/ip6_route.h>
48#include <net/addrconf.h>
49#include <net/ip6_tunnel.h>
50#include <net/xfrm.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53
54MODULE_AUTHOR("Ville Nuorvala");
55MODULE_DESCRIPTION("IPv6-in-IPv6 tunnel");
56MODULE_LICENSE("GPL");
57
58#define IPV6_TLV_TEL_DST_SIZE 8
59
60#ifdef IP6_TNL_DEBUG
61#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __FUNCTION__)
62#else
63#define IP6_TNL_TRACE(x...) do {;} while(0)
64#endif
65
66#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
67
68#define HASH_SIZE 32
69
70#define HASH(addr) (((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
71 (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
72 (HASH_SIZE - 1))
73
74static int ip6ip6_fb_tnl_dev_init(struct net_device *dev);
75static int ip6ip6_tnl_dev_init(struct net_device *dev);
76static void ip6ip6_tnl_dev_setup(struct net_device *dev);
77
78/* the IPv6 tunnel fallback device */
79static struct net_device *ip6ip6_fb_tnl_dev;
80
81
82/* lists for storing tunnels in use */
83static struct ip6_tnl *tnls_r_l[HASH_SIZE];
84static struct ip6_tnl *tnls_wc[1];
85static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l };
86
87/* lock for the tunnel lists */
88static DEFINE_RWLOCK(ip6ip6_lock);
89
90static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
91{
92 struct dst_entry *dst = t->dst_cache;
93
94 if (dst && dst->obsolete &&
95 dst->ops->check(dst, t->dst_cookie) == NULL) {
96 t->dst_cache = NULL;
97 dst_release(dst);
98 return NULL;
99 }
100
101 return dst;
102}
103
104static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
105{
106 dst_release(t->dst_cache);
107 t->dst_cache = NULL;
108}
109
110static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
111{
112 struct rt6_info *rt = (struct rt6_info *) dst;
113 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
114 dst_release(t->dst_cache);
115 t->dst_cache = dst;
116}
117
118/**
119 * ip6ip6_tnl_lookup - fetch tunnel matching the end-point addresses
120 * @remote: the address of the tunnel exit-point
121 * @local: the address of the tunnel entry-point
122 *
123 * Return:
124 * tunnel matching given end-points if found,
125 * else fallback tunnel if its device is up,
126 * else %NULL
127 **/
128
129static struct ip6_tnl *
130ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
131{
132 unsigned h0 = HASH(remote);
133 unsigned h1 = HASH(local);
134 struct ip6_tnl *t;
135
136 for (t = tnls_r_l[h0 ^ h1]; t; t = t->next) {
137 if (ipv6_addr_equal(local, &t->parms.laddr) &&
138 ipv6_addr_equal(remote, &t->parms.raddr) &&
139 (t->dev->flags & IFF_UP))
140 return t;
141 }
142 if ((t = tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP))
143 return t;
144
145 return NULL;
146}
147
148/**
149 * ip6ip6_bucket - get head of list matching given tunnel parameters
150 * @p: parameters containing tunnel end-points
151 *
152 * Description:
153 * ip6ip6_bucket() returns the head of the list matching the
154 * &struct in6_addr entries laddr and raddr in @p.
155 *
156 * Return: head of IPv6 tunnel list
157 **/
158
159static struct ip6_tnl **
160ip6ip6_bucket(struct ip6_tnl_parm *p)
161{
162 struct in6_addr *remote = &p->raddr;
163 struct in6_addr *local = &p->laddr;
164 unsigned h = 0;
165 int prio = 0;
166
167 if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
168 prio = 1;
169 h = HASH(remote) ^ HASH(local);
170 }
171 return &tnls[prio][h];
172}
173
174/**
175 * ip6ip6_tnl_link - add tunnel to hash table
176 * @t: tunnel to be added
177 **/
178
179static void
180ip6ip6_tnl_link(struct ip6_tnl *t)
181{
182 struct ip6_tnl **tp = ip6ip6_bucket(&t->parms);
183
184 t->next = *tp;
185 write_lock_bh(&ip6ip6_lock);
186 *tp = t;
187 write_unlock_bh(&ip6ip6_lock);
188}
189
190/**
191 * ip6ip6_tnl_unlink - remove tunnel from hash table
192 * @t: tunnel to be removed
193 **/
194
195static void
196ip6ip6_tnl_unlink(struct ip6_tnl *t)
197{
198 struct ip6_tnl **tp;
199
200 for (tp = ip6ip6_bucket(&t->parms); *tp; tp = &(*tp)->next) {
201 if (t == *tp) {
202 write_lock_bh(&ip6ip6_lock);
203 *tp = t->next;
204 write_unlock_bh(&ip6ip6_lock);
205 break;
206 }
207 }
208}
209
210/**
211 * ip6_tnl_create() - create a new tunnel
212 * @p: tunnel parameters
213 * @pt: pointer to new tunnel
214 *
215 * Description:
216 * Create tunnel matching given parameters.
217 *
218 * Return:
219 * 0 on success
220 **/
221
222static int
223ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt)
224{
225 struct net_device *dev;
226 struct ip6_tnl *t;
227 char name[IFNAMSIZ];
228 int err;
229
230 if (p->name[0]) {
231 strlcpy(name, p->name, IFNAMSIZ);
232 } else {
233 int i;
234 for (i = 1; i < IP6_TNL_MAX; i++) {
235 sprintf(name, "ip6tnl%d", i);
236 if (__dev_get_by_name(name) == NULL)
237 break;
238 }
239 if (i == IP6_TNL_MAX)
240 return -ENOBUFS;
241 }
242 dev = alloc_netdev(sizeof (*t), name, ip6ip6_tnl_dev_setup);
243 if (dev == NULL)
244 return -ENOMEM;
245
246 t = dev->priv;
247 dev->init = ip6ip6_tnl_dev_init;
248 t->parms = *p;
249
250 if ((err = register_netdevice(dev)) < 0) {
251 free_netdev(dev);
252 return err;
253 }
254 dev_hold(dev);
255
256 ip6ip6_tnl_link(t);
257 *pt = t;
258 return 0;
259}
260
261/**
262 * ip6ip6_tnl_locate - find or create tunnel matching given parameters
263 * @p: tunnel parameters
264 * @create: != 0 if allowed to create new tunnel if no match found
265 *
266 * Description:
267 * ip6ip6_tnl_locate() first tries to locate an existing tunnel
268 * based on @parms. If this is unsuccessful, but @create is set a new
269 * tunnel device is created and registered for use.
270 *
271 * Return:
272 * 0 if tunnel located or created,
273 * -EINVAL if parameters incorrect,
274 * -ENODEV if no matching tunnel available
275 **/
276
277static int
278ip6ip6_tnl_locate(struct ip6_tnl_parm *p, struct ip6_tnl **pt, int create)
279{
280 struct in6_addr *remote = &p->raddr;
281 struct in6_addr *local = &p->laddr;
282 struct ip6_tnl *t;
283
284 if (p->proto != IPPROTO_IPV6)
285 return -EINVAL;
286
287 for (t = *ip6ip6_bucket(p); t; t = t->next) {
288 if (ipv6_addr_equal(local, &t->parms.laddr) &&
289 ipv6_addr_equal(remote, &t->parms.raddr)) {
290 *pt = t;
291 return (create ? -EEXIST : 0);
292 }
293 }
294 if (!create)
295 return -ENODEV;
296
297 return ip6_tnl_create(p, pt);
298}
299
300/**
301 * ip6ip6_tnl_dev_uninit - tunnel device uninitializer
302 * @dev: the device to be destroyed
303 *
304 * Description:
305 * ip6ip6_tnl_dev_uninit() removes tunnel from its list
306 **/
307
308static void
309ip6ip6_tnl_dev_uninit(struct net_device *dev)
310{
311 struct ip6_tnl *t = dev->priv;
312
313 if (dev == ip6ip6_fb_tnl_dev) {
314 write_lock_bh(&ip6ip6_lock);
315 tnls_wc[0] = NULL;
316 write_unlock_bh(&ip6ip6_lock);
317 } else {
318 ip6ip6_tnl_unlink(t);
319 }
320 ip6_tnl_dst_reset(t);
321 dev_put(dev);
322}
323
324/**
325 * parse_tvl_tnl_enc_lim - handle encapsulation limit option
326 * @skb: received socket buffer
327 *
328 * Return:
329 * 0 if none was found,
330 * else index to encapsulation limit
331 **/
332
333static __u16
334parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
335{
336 struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw;
337 __u8 nexthdr = ipv6h->nexthdr;
338 __u16 off = sizeof (*ipv6h);
339
340 while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
341 __u16 optlen = 0;
342 struct ipv6_opt_hdr *hdr;
343 if (raw + off + sizeof (*hdr) > skb->data &&
344 !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
345 break;
346
347 hdr = (struct ipv6_opt_hdr *) (raw + off);
348 if (nexthdr == NEXTHDR_FRAGMENT) {
349 struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
350 if (frag_hdr->frag_off)
351 break;
352 optlen = 8;
353 } else if (nexthdr == NEXTHDR_AUTH) {
354 optlen = (hdr->hdrlen + 2) << 2;
355 } else {
356 optlen = ipv6_optlen(hdr);
357 }
358 if (nexthdr == NEXTHDR_DEST) {
359 __u16 i = off + 2;
360 while (1) {
361 struct ipv6_tlv_tnl_enc_lim *tel;
362
363 /* No more room for encapsulation limit */
364 if (i + sizeof (*tel) > off + optlen)
365 break;
366
367 tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
368 /* return index of option if found and valid */
369 if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
370 tel->length == 1)
371 return i;
372 /* else jump to next option */
373 if (tel->type)
374 i += tel->length + 2;
375 else
376 i++;
377 }
378 }
379 nexthdr = hdr->nexthdr;
380 off += optlen;
381 }
382 return 0;
383}
384
385/**
386 * ip6ip6_err - tunnel error handler
387 *
388 * Description:
389 * ip6ip6_err() should handle errors in the tunnel according
390 * to the specifications in RFC 2473.
391 **/
392
393static void
394ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
395 int type, int code, int offset, __u32 info)
396{
397 struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
398 struct ip6_tnl *t;
399 int rel_msg = 0;
400 int rel_type = ICMPV6_DEST_UNREACH;
401 int rel_code = ICMPV6_ADDR_UNREACH;
402 __u32 rel_info = 0;
403 __u16 len;
404
405 /* If the packet doesn't contain the original IPv6 header we are
406 in trouble since we might need the source address for further
407 processing of the error. */
408
409 read_lock(&ip6ip6_lock);
410 if ((t = ip6ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
411 goto out;
412
413 switch (type) {
414 __u32 teli;
415 struct ipv6_tlv_tnl_enc_lim *tel;
416 __u32 mtu;
417 case ICMPV6_DEST_UNREACH:
418 if (net_ratelimit())
419 printk(KERN_WARNING
420 "%s: Path to destination invalid "
421 "or inactive!\n", t->parms.name);
422 rel_msg = 1;
423 break;
424 case ICMPV6_TIME_EXCEED:
425 if (code == ICMPV6_EXC_HOPLIMIT) {
426 if (net_ratelimit())
427 printk(KERN_WARNING
428 "%s: Too small hop limit or "
429 "routing loop in tunnel!\n",
430 t->parms.name);
431 rel_msg = 1;
432 }
433 break;
434 case ICMPV6_PARAMPROB:
435 /* ignore if parameter problem not caused by a tunnel
436 encapsulation limit sub-option */
437 if (code != ICMPV6_HDR_FIELD) {
438 break;
439 }
440 teli = parse_tlv_tnl_enc_lim(skb, skb->data);
441
442 if (teli && teli == ntohl(info) - 2) {
443 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
444 if (tel->encap_limit == 0) {
445 if (net_ratelimit())
446 printk(KERN_WARNING
447 "%s: Too small encapsulation "
448 "limit or routing loop in "
449 "tunnel!\n", t->parms.name);
450 rel_msg = 1;
451 }
452 }
453 break;
454 case ICMPV6_PKT_TOOBIG:
455 mtu = ntohl(info) - offset;
456 if (mtu < IPV6_MIN_MTU)
457 mtu = IPV6_MIN_MTU;
458 t->dev->mtu = mtu;
459
460 if ((len = sizeof (*ipv6h) + ipv6h->payload_len) > mtu) {
461 rel_type = ICMPV6_PKT_TOOBIG;
462 rel_code = 0;
463 rel_info = mtu;
464 rel_msg = 1;
465 }
466 break;
467 }
468 if (rel_msg && pskb_may_pull(skb, offset + sizeof (*ipv6h))) {
469 struct rt6_info *rt;
470 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
471 if (!skb2)
472 goto out;
473
474 dst_release(skb2->dst);
475 skb2->dst = NULL;
476 skb_pull(skb2, offset);
477 skb2->nh.raw = skb2->data;
478
479 /* Try to guess incoming interface */
480 rt = rt6_lookup(&skb2->nh.ipv6h->saddr, NULL, 0, 0);
481
482 if (rt && rt->rt6i_dev)
483 skb2->dev = rt->rt6i_dev;
484
485 icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
486
487 if (rt)
488 dst_release(&rt->u.dst);
489
490 kfree_skb(skb2);
491 }
492out:
493 read_unlock(&ip6ip6_lock);
494}
495
496static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
497 struct sk_buff *skb)
498{
499 struct ipv6hdr *inner_iph = skb->nh.ipv6h;
500
501 if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
502 IP6_ECN_set_ce(inner_iph);
503}
504
505/**
506 * ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally
507 * @skb: received socket buffer
508 *
509 * Return: 0
510 **/
511
512static int
513ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
514{
515 struct sk_buff *skb = *pskb;
516 struct ipv6hdr *ipv6h;
517 struct ip6_tnl *t;
518
519 if (!pskb_may_pull(skb, sizeof (*ipv6h)))
520 goto discard;
521
522 ipv6h = skb->nh.ipv6h;
523
524 read_lock(&ip6ip6_lock);
525
526 if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
527 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
528 kfree_skb(skb);
529 return 0;
530 }
531
532 if (!(t->parms.flags & IP6_TNL_F_CAP_RCV)) {
533 t->stat.rx_dropped++;
534 read_unlock(&ip6ip6_lock);
535 goto discard;
536 }
537 secpath_reset(skb);
538 skb->mac.raw = skb->nh.raw;
539 skb->nh.raw = skb->data;
540 skb->protocol = htons(ETH_P_IPV6);
541 skb->pkt_type = PACKET_HOST;
542 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
543 skb->dev = t->dev;
544 dst_release(skb->dst);
545 skb->dst = NULL;
546 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
547 ipv6_copy_dscp(ipv6h, skb->nh.ipv6h);
548 ip6ip6_ecn_decapsulate(ipv6h, skb);
549 t->stat.rx_packets++;
550 t->stat.rx_bytes += skb->len;
551 netif_rx(skb);
552 read_unlock(&ip6ip6_lock);
553 return 0;
554 }
555 read_unlock(&ip6ip6_lock);
556 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
557discard:
558 return 1;
559}
560
561static inline struct ipv6_txoptions *create_tel(__u8 encap_limit)
562{
563 struct ipv6_tlv_tnl_enc_lim *tel;
564 struct ipv6_txoptions *opt;
565 __u8 *raw;
566
567 int opt_len = sizeof(*opt) + 8;
568
569 if (!(opt = kmalloc(opt_len, GFP_ATOMIC))) {
570 return NULL;
571 }
572 memset(opt, 0, opt_len);
573 opt->tot_len = opt_len;
574 opt->dst0opt = (struct ipv6_opt_hdr *) (opt + 1);
575 opt->opt_nflen = 8;
576
577 tel = (struct ipv6_tlv_tnl_enc_lim *) (opt->dst0opt + 1);
578 tel->type = IPV6_TLV_TNL_ENCAP_LIMIT;
579 tel->length = 1;
580 tel->encap_limit = encap_limit;
581
582 raw = (__u8 *) opt->dst0opt;
583 raw[5] = IPV6_TLV_PADN;
584 raw[6] = 1;
585
586 return opt;
587}
588
589/**
590 * ip6ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
591 * @t: the outgoing tunnel device
592 * @hdr: IPv6 header from the incoming packet
593 *
594 * Description:
595 * Avoid trivial tunneling loop by checking that tunnel exit-point
596 * doesn't match source of incoming packet.
597 *
598 * Return:
599 * 1 if conflict,
600 * 0 else
601 **/
602
603static inline int
604ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
605{
606 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
607}
608
609/**
610 * ip6ip6_tnl_xmit - encapsulate packet and send
611 * @skb: the outgoing socket buffer
612 * @dev: the outgoing tunnel device
613 *
614 * Description:
615 * Build new header and do some sanity checks on the packet before sending
616 * it.
617 *
618 * Return:
619 * 0
620 **/
621
622static int
623ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
624{
625 struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
626 struct net_device_stats *stats = &t->stat;
627 struct ipv6hdr *ipv6h = skb->nh.ipv6h;
628 struct ipv6_txoptions *opt = NULL;
629 int encap_limit = -1;
630 __u16 offset;
631 struct flowi fl;
632 struct dst_entry *dst;
633 struct net_device *tdev;
634 int mtu;
635 int max_headroom = sizeof(struct ipv6hdr);
636 u8 proto;
637 int err;
638 int pkt_len;
639 int dsfield;
640
641 if (t->recursion++) {
642 stats->collisions++;
643 goto tx_err;
644 }
645 if (skb->protocol != htons(ETH_P_IPV6) ||
646 !(t->parms.flags & IP6_TNL_F_CAP_XMIT) ||
647 ip6ip6_tnl_addr_conflict(t, ipv6h)) {
648 goto tx_err;
649 }
650 if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
651 struct ipv6_tlv_tnl_enc_lim *tel;
652 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
653 if (tel->encap_limit == 0) {
654 icmpv6_send(skb, ICMPV6_PARAMPROB,
655 ICMPV6_HDR_FIELD, offset + 2, skb->dev);
656 goto tx_err;
657 }
658 encap_limit = tel->encap_limit - 1;
659 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
660 encap_limit = t->parms.encap_limit;
661 }
662 memcpy(&fl, &t->fl, sizeof (fl));
663 proto = fl.proto;
664
665 dsfield = ipv6_get_dsfield(ipv6h);
666 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
667 fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_TCLASS_MASK);
668 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
669 fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_FLOWLABEL_MASK);
670
671 if (encap_limit >= 0 && (opt = create_tel(encap_limit)) == NULL)
672 goto tx_err;
673
674 if ((dst = ip6_tnl_dst_check(t)) != NULL)
675 dst_hold(dst);
676 else
677 dst = ip6_route_output(NULL, &fl);
678
679 if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0)
680 goto tx_err_link_failure;
681
682 tdev = dst->dev;
683
684 if (tdev == dev) {
685 stats->collisions++;
686 if (net_ratelimit())
687 printk(KERN_WARNING
688 "%s: Local routing loop detected!\n",
689 t->parms.name);
690 goto tx_err_dst_release;
691 }
692 mtu = dst_mtu(dst) - sizeof (*ipv6h);
693 if (opt) {
694 max_headroom += 8;
695 mtu -= 8;
696 }
697 if (mtu < IPV6_MIN_MTU)
698 mtu = IPV6_MIN_MTU;
699 if (skb->dst && mtu < dst_mtu(skb->dst)) {
700 struct rt6_info *rt = (struct rt6_info *) skb->dst;
701 rt->rt6i_flags |= RTF_MODIFIED;
702 rt->u.dst.metrics[RTAX_MTU-1] = mtu;
703 }
704 if (skb->len > mtu) {
705 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
706 goto tx_err_dst_release;
707 }
708
709 /*
710 * Okay, now see if we can stuff it in the buffer as-is.
711 */
712 max_headroom += LL_RESERVED_SPACE(tdev);
713
714 if (skb_headroom(skb) < max_headroom ||
715 skb_cloned(skb) || skb_shared(skb)) {
716 struct sk_buff *new_skb;
717
718 if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
719 goto tx_err_dst_release;
720
721 if (skb->sk)
722 skb_set_owner_w(new_skb, skb->sk);
723 kfree_skb(skb);
724 skb = new_skb;
725 }
726 dst_release(skb->dst);
727 skb->dst = dst_clone(dst);
728
729 skb->h.raw = skb->nh.raw;
730
731 if (opt)
732 ipv6_push_nfrag_opts(skb, opt, &proto, NULL);
733
734 skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr));
735 ipv6h = skb->nh.ipv6h;
736 *(u32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000);
737 dsfield = INET_ECN_encapsulate(0, dsfield);
738 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
739 ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
740 ipv6h->hop_limit = t->parms.hop_limit;
741 ipv6h->nexthdr = proto;
742 ipv6_addr_copy(&ipv6h->saddr, &fl.fl6_src);
743 ipv6_addr_copy(&ipv6h->daddr, &fl.fl6_dst);
744 nf_reset(skb);
745 pkt_len = skb->len;
746 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
747 skb->dst->dev, dst_output);
748
749 if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) {
750 stats->tx_bytes += pkt_len;
751 stats->tx_packets++;
752 } else {
753 stats->tx_errors++;
754 stats->tx_aborted_errors++;
755 }
756 ip6_tnl_dst_store(t, dst);
757
758 if (opt)
759 kfree(opt);
760
761 t->recursion--;
762 return 0;
763tx_err_link_failure:
764 stats->tx_carrier_errors++;
765 dst_link_failure(skb);
766tx_err_dst_release:
767 dst_release(dst);
768 if (opt)
769 kfree(opt);
770tx_err:
771 stats->tx_errors++;
772 stats->tx_dropped++;
773 kfree_skb(skb);
774 t->recursion--;
775 return 0;
776}
777
778static void ip6_tnl_set_cap(struct ip6_tnl *t)
779{
780 struct ip6_tnl_parm *p = &t->parms;
781 struct in6_addr *laddr = &p->laddr;
782 struct in6_addr *raddr = &p->raddr;
783 int ltype = ipv6_addr_type(laddr);
784 int rtype = ipv6_addr_type(raddr);
785
786 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
787
788 if (ltype != IPV6_ADDR_ANY && rtype != IPV6_ADDR_ANY &&
789 ((ltype|rtype) &
790 (IPV6_ADDR_UNICAST|
791 IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL|
792 IPV6_ADDR_MAPPED|IPV6_ADDR_RESERVED)) == IPV6_ADDR_UNICAST) {
793 struct net_device *ldev = NULL;
794 int l_ok = 1;
795 int r_ok = 1;
796
797 if (p->link)
798 ldev = dev_get_by_index(p->link);
799
800 if (ltype&IPV6_ADDR_UNICAST && !ipv6_chk_addr(laddr, ldev, 0))
801 l_ok = 0;
802
803 if (rtype&IPV6_ADDR_UNICAST && ipv6_chk_addr(raddr, NULL, 0))
804 r_ok = 0;
805
806 if (l_ok && r_ok) {
807 if (ltype&IPV6_ADDR_UNICAST)
808 p->flags |= IP6_TNL_F_CAP_XMIT;
809 if (rtype&IPV6_ADDR_UNICAST)
810 p->flags |= IP6_TNL_F_CAP_RCV;
811 }
812 if (ldev)
813 dev_put(ldev);
814 }
815}
816
817static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
818{
819 struct net_device *dev = t->dev;
820 struct ip6_tnl_parm *p = &t->parms;
821 struct flowi *fl = &t->fl;
822
823 memcpy(&dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
824 memcpy(&dev->broadcast, &p->raddr, sizeof(struct in6_addr));
825
826 /* Set up flowi template */
827 ipv6_addr_copy(&fl->fl6_src, &p->laddr);
828 ipv6_addr_copy(&fl->fl6_dst, &p->raddr);
829 fl->oif = p->link;
830 fl->fl6_flowlabel = 0;
831
832 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
833 fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
834 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
835 fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
836
837 ip6_tnl_set_cap(t);
838
839 if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
840 dev->flags |= IFF_POINTOPOINT;
841 else
842 dev->flags &= ~IFF_POINTOPOINT;
843
844 dev->iflink = p->link;
845
846 if (p->flags & IP6_TNL_F_CAP_XMIT) {
847 struct rt6_info *rt = rt6_lookup(&p->raddr, &p->laddr,
848 p->link, 0);
849
850 if (rt == NULL)
851 return;
852
853 if (rt->rt6i_dev) {
854 dev->hard_header_len = rt->rt6i_dev->hard_header_len +
855 sizeof (struct ipv6hdr);
856
857 dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
858
859 if (dev->mtu < IPV6_MIN_MTU)
860 dev->mtu = IPV6_MIN_MTU;
861 }
862 dst_release(&rt->u.dst);
863 }
864}
865
866/**
867 * ip6ip6_tnl_change - update the tunnel parameters
868 * @t: tunnel to be changed
869 * @p: tunnel configuration parameters
870 * @active: != 0 if tunnel is ready for use
871 *
872 * Description:
873 * ip6ip6_tnl_change() updates the tunnel parameters
874 **/
875
876static int
877ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
878{
879 ipv6_addr_copy(&t->parms.laddr, &p->laddr);
880 ipv6_addr_copy(&t->parms.raddr, &p->raddr);
881 t->parms.flags = p->flags;
882 t->parms.hop_limit = p->hop_limit;
883 t->parms.encap_limit = p->encap_limit;
884 t->parms.flowinfo = p->flowinfo;
885 ip6ip6_tnl_link_config(t);
886 return 0;
887}
888
889/**
890 * ip6ip6_tnl_ioctl - configure ipv6 tunnels from userspace
891 * @dev: virtual device associated with tunnel
892 * @ifr: parameters passed from userspace
893 * @cmd: command to be performed
894 *
895 * Description:
896 * ip6ip6_tnl_ioctl() is used for managing IPv6 tunnels
897 * from userspace.
898 *
899 * The possible commands are the following:
900 * %SIOCGETTUNNEL: get tunnel parameters for device
901 * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
902 * %SIOCCHGTUNNEL: change tunnel parameters to those given
903 * %SIOCDELTUNNEL: delete tunnel
904 *
905 * The fallback device "ip6tnl0", created during module
906 * initialization, can be used for creating other tunnel devices.
907 *
908 * Return:
909 * 0 on success,
910 * %-EFAULT if unable to copy data to or from userspace,
911 * %-EPERM if current process hasn't %CAP_NET_ADMIN set
912 * %-EINVAL if passed tunnel parameters are invalid,
913 * %-EEXIST if changing a tunnel's parameters would cause a conflict
914 * %-ENODEV if attempting to change or delete a nonexisting device
915 **/
916
917static int
918ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
919{
920 int err = 0;
921 int create;
922 struct ip6_tnl_parm p;
923 struct ip6_tnl *t = NULL;
924
925 switch (cmd) {
926 case SIOCGETTUNNEL:
927 if (dev == ip6ip6_fb_tnl_dev) {
928 if (copy_from_user(&p,
929 ifr->ifr_ifru.ifru_data,
930 sizeof (p))) {
931 err = -EFAULT;
932 break;
933 }
934 if ((err = ip6ip6_tnl_locate(&p, &t, 0)) == -ENODEV)
935 t = (struct ip6_tnl *) dev->priv;
936 else if (err)
937 break;
938 } else
939 t = (struct ip6_tnl *) dev->priv;
940
941 memcpy(&p, &t->parms, sizeof (p));
942 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
943 err = -EFAULT;
944 }
945 break;
946 case SIOCADDTUNNEL:
947 case SIOCCHGTUNNEL:
948 err = -EPERM;
949 create = (cmd == SIOCADDTUNNEL);
950 if (!capable(CAP_NET_ADMIN))
951 break;
952 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
953 err = -EFAULT;
954 break;
955 }
956 if (!create && dev != ip6ip6_fb_tnl_dev) {
957 t = (struct ip6_tnl *) dev->priv;
958 }
959 if (!t && (err = ip6ip6_tnl_locate(&p, &t, create))) {
960 break;
961 }
962 if (cmd == SIOCCHGTUNNEL) {
963 if (t->dev != dev) {
964 err = -EEXIST;
965 break;
966 }
967 ip6ip6_tnl_unlink(t);
968 err = ip6ip6_tnl_change(t, &p);
969 ip6ip6_tnl_link(t);
970 netdev_state_change(dev);
971 }
972 if (copy_to_user(ifr->ifr_ifru.ifru_data,
973 &t->parms, sizeof (p))) {
974 err = -EFAULT;
975 } else {
976 err = 0;
977 }
978 break;
979 case SIOCDELTUNNEL:
980 err = -EPERM;
981 if (!capable(CAP_NET_ADMIN))
982 break;
983
984 if (dev == ip6ip6_fb_tnl_dev) {
985 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
986 sizeof (p))) {
987 err = -EFAULT;
988 break;
989 }
990 err = ip6ip6_tnl_locate(&p, &t, 0);
991 if (err)
992 break;
993 if (t == ip6ip6_fb_tnl_dev->priv) {
994 err = -EPERM;
995 break;
996 }
997 } else {
998 t = (struct ip6_tnl *) dev->priv;
999 }
1000 err = unregister_netdevice(t->dev);
1001 break;
1002 default:
1003 err = -EINVAL;
1004 }
1005 return err;
1006}
1007
1008/**
1009 * ip6ip6_tnl_get_stats - return the stats for tunnel device
1010 * @dev: virtual device associated with tunnel
1011 *
1012 * Return: stats for device
1013 **/
1014
1015static struct net_device_stats *
1016ip6ip6_tnl_get_stats(struct net_device *dev)
1017{
1018 return &(((struct ip6_tnl *) dev->priv)->stat);
1019}
1020
1021/**
1022 * ip6ip6_tnl_change_mtu - change mtu manually for tunnel device
1023 * @dev: virtual device associated with tunnel
1024 * @new_mtu: the new mtu
1025 *
1026 * Return:
1027 * 0 on success,
1028 * %-EINVAL if mtu too small
1029 **/
1030
1031static int
1032ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1033{
1034 if (new_mtu < IPV6_MIN_MTU) {
1035 return -EINVAL;
1036 }
1037 dev->mtu = new_mtu;
1038 return 0;
1039}
1040
1041/**
1042 * ip6ip6_tnl_dev_setup - setup virtual tunnel device
1043 * @dev: virtual device associated with tunnel
1044 *
1045 * Description:
1046 * Initialize function pointers and device parameters
1047 **/
1048
1049static void ip6ip6_tnl_dev_setup(struct net_device *dev)
1050{
1051 SET_MODULE_OWNER(dev);
1052 dev->uninit = ip6ip6_tnl_dev_uninit;
1053 dev->destructor = free_netdev;
1054 dev->hard_start_xmit = ip6ip6_tnl_xmit;
1055 dev->get_stats = ip6ip6_tnl_get_stats;
1056 dev->do_ioctl = ip6ip6_tnl_ioctl;
1057 dev->change_mtu = ip6ip6_tnl_change_mtu;
1058
1059 dev->type = ARPHRD_TUNNEL6;
1060 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
1061 dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
1062 dev->flags |= IFF_NOARP;
1063 dev->addr_len = sizeof(struct in6_addr);
1064}
1065
1066
1067/**
1068 * ip6ip6_tnl_dev_init_gen - general initializer for all tunnel devices
1069 * @dev: virtual device associated with tunnel
1070 **/
1071
1072static inline void
1073ip6ip6_tnl_dev_init_gen(struct net_device *dev)
1074{
1075 struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
1076 t->fl.proto = IPPROTO_IPV6;
1077 t->dev = dev;
1078 strcpy(t->parms.name, dev->name);
1079}
1080
1081/**
1082 * ip6ip6_tnl_dev_init - initializer for all non fallback tunnel devices
1083 * @dev: virtual device associated with tunnel
1084 **/
1085
1086static int
1087ip6ip6_tnl_dev_init(struct net_device *dev)
1088{
1089 struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
1090 ip6ip6_tnl_dev_init_gen(dev);
1091 ip6ip6_tnl_link_config(t);
1092 return 0;
1093}
1094
1095/**
1096 * ip6ip6_fb_tnl_dev_init - initializer for fallback tunnel device
1097 * @dev: fallback device
1098 *
1099 * Return: 0
1100 **/
1101
1102static int
1103ip6ip6_fb_tnl_dev_init(struct net_device *dev)
1104{
1105 struct ip6_tnl *t = dev->priv;
1106 ip6ip6_tnl_dev_init_gen(dev);
1107 dev_hold(dev);
1108 tnls_wc[0] = t;
1109 return 0;
1110}
1111
1112static struct xfrm6_tunnel ip6ip6_handler = {
1113 .handler = ip6ip6_rcv,
1114 .err_handler = ip6ip6_err,
1115};
1116
1117/**
1118 * ip6_tunnel_init - register protocol and reserve needed resources
1119 *
1120 * Return: 0 on success
1121 **/
1122
1123static int __init ip6_tunnel_init(void)
1124{
1125 int err;
1126
1127 if (xfrm6_tunnel_register(&ip6ip6_handler) < 0) {
1128 printk(KERN_ERR "ip6ip6 init: can't register tunnel\n");
1129 return -EAGAIN;
1130 }
1131 ip6ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
1132 ip6ip6_tnl_dev_setup);
1133
1134 if (!ip6ip6_fb_tnl_dev) {
1135 err = -ENOMEM;
1136 goto fail;
1137 }
1138 ip6ip6_fb_tnl_dev->init = ip6ip6_fb_tnl_dev_init;
1139
1140 if ((err = register_netdev(ip6ip6_fb_tnl_dev))) {
1141 free_netdev(ip6ip6_fb_tnl_dev);
1142 goto fail;
1143 }
1144 return 0;
1145fail:
1146 xfrm6_tunnel_deregister(&ip6ip6_handler);
1147 return err;
1148}
1149
1150/**
1151 * ip6_tunnel_cleanup - free resources and unregister protocol
1152 **/
1153
1154static void __exit ip6_tunnel_cleanup(void)
1155{
1156 if (xfrm6_tunnel_deregister(&ip6ip6_handler) < 0)
1157 printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n");
1158
1159 unregister_netdev(ip6ip6_fb_tnl_dev);
1160}
1161
1162module_init(ip6_tunnel_init);
1163module_exit(ip6_tunnel_cleanup);
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
new file mode 100644
index 000000000000..6cde5310cd76
--- /dev/null
+++ b/net/ipv6/ipcomp6.c
@@ -0,0 +1,524 @@
1/*
2 * IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173
3 *
4 * Copyright (C)2003 USAGI/WIDE Project
5 *
6 * Author Mitsuru KANDA <mk@linux-ipv6.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22/*
23 * [Memo]
24 *
25 * Outbound:
26 * The compression of IP datagram MUST be done before AH/ESP processing,
27 * fragmentation, and the addition of Hop-by-Hop/Routing header.
28 *
29 * Inbound:
30 * The decompression of IP datagram MUST be done after the reassembly,
31 * AH/ESP processing.
32 */
33#include <linux/config.h>
34#include <linux/module.h>
35#include <net/ip.h>
36#include <net/xfrm.h>
37#include <net/ipcomp.h>
38#include <asm/scatterlist.h>
39#include <asm/semaphore.h>
40#include <linux/crypto.h>
41#include <linux/pfkeyv2.h>
42#include <linux/random.h>
43#include <linux/percpu.h>
44#include <linux/smp.h>
45#include <linux/list.h>
46#include <linux/vmalloc.h>
47#include <linux/rtnetlink.h>
48#include <net/icmp.h>
49#include <net/ipv6.h>
50#include <linux/ipv6.h>
51#include <linux/icmpv6.h>
52
53struct ipcomp6_tfms {
54 struct list_head list;
55 struct crypto_tfm **tfms;
56 int users;
57};
58
59static DECLARE_MUTEX(ipcomp6_resource_sem);
60static void **ipcomp6_scratches;
61static int ipcomp6_scratch_users;
62static LIST_HEAD(ipcomp6_tfms_list);
63
64static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
65{
66 int err = 0;
67 u8 nexthdr = 0;
68 int hdr_len = skb->h.raw - skb->nh.raw;
69 unsigned char *tmp_hdr = NULL;
70 struct ipv6hdr *iph;
71 int plen, dlen;
72 struct ipcomp_data *ipcd = x->data;
73 u8 *start, *scratch;
74 struct crypto_tfm *tfm;
75 int cpu;
76
77 if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
78 skb_linearize(skb, GFP_ATOMIC) != 0) {
79 err = -ENOMEM;
80 goto out;
81 }
82
83 skb->ip_summed = CHECKSUM_NONE;
84
85 /* Remove ipcomp header and decompress original payload */
86 iph = skb->nh.ipv6h;
87 tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
88 if (!tmp_hdr)
89 goto out;
90 memcpy(tmp_hdr, iph, hdr_len);
91 nexthdr = *(u8 *)skb->data;
92 skb_pull(skb, sizeof(struct ipv6_comp_hdr));
93 skb->nh.raw += sizeof(struct ipv6_comp_hdr);
94 memcpy(skb->nh.raw, tmp_hdr, hdr_len);
95 iph = skb->nh.ipv6h;
96 iph->payload_len = htons(ntohs(iph->payload_len) - sizeof(struct ipv6_comp_hdr));
97 skb->h.raw = skb->data;
98
99 /* decompression */
100 plen = skb->len;
101 dlen = IPCOMP_SCRATCH_SIZE;
102 start = skb->data;
103
104 cpu = get_cpu();
105 scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
106 tfm = *per_cpu_ptr(ipcd->tfms, cpu);
107
108 err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
109 if (err) {
110 err = -EINVAL;
111 goto out_put_cpu;
112 }
113
114 if (dlen < (plen + sizeof(struct ipv6_comp_hdr))) {
115 err = -EINVAL;
116 goto out_put_cpu;
117 }
118
119 err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
120 if (err) {
121 goto out_put_cpu;
122 }
123
124 skb_put(skb, dlen - plen);
125 memcpy(skb->data, scratch, dlen);
126
127 iph = skb->nh.ipv6h;
128 iph->payload_len = htons(skb->len);
129
130out_put_cpu:
131 put_cpu();
132out:
133 if (tmp_hdr)
134 kfree(tmp_hdr);
135 if (err)
136 goto error_out;
137 return nexthdr;
138error_out:
139 return err;
140}
141
142static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
143{
144 int err;
145 struct ipv6hdr *top_iph;
146 int hdr_len;
147 struct ipv6_comp_hdr *ipch;
148 struct ipcomp_data *ipcd = x->data;
149 int plen, dlen;
150 u8 *start, *scratch;
151 struct crypto_tfm *tfm;
152 int cpu;
153
154 hdr_len = skb->h.raw - skb->data;
155
156 /* check whether datagram len is larger than threshold */
157 if ((skb->len - hdr_len) < ipcd->threshold) {
158 goto out_ok;
159 }
160
161 if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
162 skb_linearize(skb, GFP_ATOMIC) != 0) {
163 goto out_ok;
164 }
165
166 /* compression */
167 plen = skb->len - hdr_len;
168 dlen = IPCOMP_SCRATCH_SIZE;
169 start = skb->h.raw;
170
171 cpu = get_cpu();
172 scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
173 tfm = *per_cpu_ptr(ipcd->tfms, cpu);
174
175 err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
176 if (err || (dlen + sizeof(struct ipv6_comp_hdr)) >= plen) {
177 put_cpu();
178 goto out_ok;
179 }
180 memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
181 put_cpu();
182 pskb_trim(skb, hdr_len + dlen + sizeof(struct ip_comp_hdr));
183
184 /* insert ipcomp header and replace datagram */
185 top_iph = (struct ipv6hdr *)skb->data;
186
187 top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
188
189 ipch = (struct ipv6_comp_hdr *)start;
190 ipch->nexthdr = *skb->nh.raw;
191 ipch->flags = 0;
192 ipch->cpi = htons((u16 )ntohl(x->id.spi));
193 *skb->nh.raw = IPPROTO_COMP;
194
195out_ok:
196 return 0;
197}
198
199static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
200 int type, int code, int offset, __u32 info)
201{
202 u32 spi;
203 struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
204 struct ipv6_comp_hdr *ipcomph = (struct ipv6_comp_hdr*)(skb->data+offset);
205 struct xfrm_state *x;
206
207 if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG)
208 return;
209
210 spi = ntohl(ntohs(ipcomph->cpi));
211 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
212 if (!x)
213 return;
214
215 printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/"
216 "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
217 spi, NIP6(iph->daddr));
218 xfrm_state_put(x);
219}
220
221static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
222{
223 struct xfrm_state *t = NULL;
224
225 t = xfrm_state_alloc();
226 if (!t)
227 goto out;
228
229 t->id.proto = IPPROTO_IPV6;
230 t->id.spi = xfrm6_tunnel_alloc_spi((xfrm_address_t *)&x->props.saddr);
231 memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
232 memcpy(&t->sel, &x->sel, sizeof(t->sel));
233 t->props.family = AF_INET6;
234 t->props.mode = 1;
235 memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
236
237 t->type = xfrm_get_type(IPPROTO_IPV6, t->props.family);
238 if (t->type == NULL)
239 goto error;
240
241 if (t->type->init_state(t, NULL))
242 goto error;
243
244 t->km.state = XFRM_STATE_VALID;
245 atomic_set(&t->tunnel_users, 1);
246
247out:
248 return t;
249
250error:
251 xfrm_state_put(t);
252 goto out;
253}
254
255static int ipcomp6_tunnel_attach(struct xfrm_state *x)
256{
257 int err = 0;
258 struct xfrm_state *t = NULL;
259 u32 spi;
260
261 spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr);
262 if (spi)
263 t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr,
264 spi, IPPROTO_IPV6, AF_INET6);
265 if (!t) {
266 t = ipcomp6_tunnel_create(x);
267 if (!t) {
268 err = -EINVAL;
269 goto out;
270 }
271 xfrm_state_insert(t);
272 xfrm_state_hold(t);
273 }
274 x->tunnel = t;
275 atomic_inc(&t->tunnel_users);
276
277out:
278 return err;
279}
280
281static void ipcomp6_free_scratches(void)
282{
283 int i;
284 void **scratches;
285
286 if (--ipcomp6_scratch_users)
287 return;
288
289 scratches = ipcomp6_scratches;
290 if (!scratches)
291 return;
292
293 for_each_cpu(i) {
294 void *scratch = *per_cpu_ptr(scratches, i);
295 if (scratch)
296 vfree(scratch);
297 }
298
299 free_percpu(scratches);
300}
301
302static void **ipcomp6_alloc_scratches(void)
303{
304 int i;
305 void **scratches;
306
307 if (ipcomp6_scratch_users++)
308 return ipcomp6_scratches;
309
310 scratches = alloc_percpu(void *);
311 if (!scratches)
312 return NULL;
313
314 ipcomp6_scratches = scratches;
315
316 for_each_cpu(i) {
317 void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
318 if (!scratch)
319 return NULL;
320 *per_cpu_ptr(scratches, i) = scratch;
321 }
322
323 return scratches;
324}
325
326static void ipcomp6_free_tfms(struct crypto_tfm **tfms)
327{
328 struct ipcomp6_tfms *pos;
329 int cpu;
330
331 list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
332 if (pos->tfms == tfms)
333 break;
334 }
335
336 BUG_TRAP(pos);
337
338 if (--pos->users)
339 return;
340
341 list_del(&pos->list);
342 kfree(pos);
343
344 if (!tfms)
345 return;
346
347 for_each_cpu(cpu) {
348 struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
349 if (tfm)
350 crypto_free_tfm(tfm);
351 }
352 free_percpu(tfms);
353}
354
355static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name)
356{
357 struct ipcomp6_tfms *pos;
358 struct crypto_tfm **tfms;
359 int cpu;
360
361 /* This can be any valid CPU ID so we don't need locking. */
362 cpu = smp_processor_id();
363
364 list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
365 struct crypto_tfm *tfm;
366
367 tfms = pos->tfms;
368 tfm = *per_cpu_ptr(tfms, cpu);
369
370 if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) {
371 pos->users++;
372 return tfms;
373 }
374 }
375
376 pos = kmalloc(sizeof(*pos), GFP_KERNEL);
377 if (!pos)
378 return NULL;
379
380 pos->users = 1;
381 INIT_LIST_HEAD(&pos->list);
382 list_add(&pos->list, &ipcomp6_tfms_list);
383
384 pos->tfms = tfms = alloc_percpu(struct crypto_tfm *);
385 if (!tfms)
386 goto error;
387
388 for_each_cpu(cpu) {
389 struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0);
390 if (!tfm)
391 goto error;
392 *per_cpu_ptr(tfms, cpu) = tfm;
393 }
394
395 return tfms;
396
397error:
398 ipcomp6_free_tfms(tfms);
399 return NULL;
400}
401
402static void ipcomp6_free_data(struct ipcomp_data *ipcd)
403{
404 if (ipcd->tfms)
405 ipcomp6_free_tfms(ipcd->tfms);
406 ipcomp6_free_scratches();
407}
408
409static void ipcomp6_destroy(struct xfrm_state *x)
410{
411 struct ipcomp_data *ipcd = x->data;
412 if (!ipcd)
413 return;
414 xfrm_state_delete_tunnel(x);
415 down(&ipcomp6_resource_sem);
416 ipcomp6_free_data(ipcd);
417 up(&ipcomp6_resource_sem);
418 kfree(ipcd);
419
420 xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
421}
422
423static int ipcomp6_init_state(struct xfrm_state *x, void *args)
424{
425 int err;
426 struct ipcomp_data *ipcd;
427 struct xfrm_algo_desc *calg_desc;
428
429 err = -EINVAL;
430 if (!x->calg)
431 goto out;
432
433 if (x->encap)
434 goto out;
435
436 err = -ENOMEM;
437 ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
438 if (!ipcd)
439 goto out;
440
441 memset(ipcd, 0, sizeof(*ipcd));
442 x->props.header_len = 0;
443 if (x->props.mode)
444 x->props.header_len += sizeof(struct ipv6hdr);
445
446 down(&ipcomp6_resource_sem);
447 if (!ipcomp6_alloc_scratches())
448 goto error;
449
450 ipcd->tfms = ipcomp6_alloc_tfms(x->calg->alg_name);
451 if (!ipcd->tfms)
452 goto error;
453 up(&ipcomp6_resource_sem);
454
455 if (x->props.mode) {
456 err = ipcomp6_tunnel_attach(x);
457 if (err)
458 goto error_tunnel;
459 }
460
461 calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
462 BUG_ON(!calg_desc);
463 ipcd->threshold = calg_desc->uinfo.comp.threshold;
464 x->data = ipcd;
465 err = 0;
466out:
467 return err;
468error_tunnel:
469 down(&ipcomp6_resource_sem);
470error:
471 ipcomp6_free_data(ipcd);
472 up(&ipcomp6_resource_sem);
473 kfree(ipcd);
474
475 goto out;
476}
477
478static struct xfrm_type ipcomp6_type =
479{
480 .description = "IPCOMP6",
481 .owner = THIS_MODULE,
482 .proto = IPPROTO_COMP,
483 .init_state = ipcomp6_init_state,
484 .destructor = ipcomp6_destroy,
485 .input = ipcomp6_input,
486 .output = ipcomp6_output,
487};
488
489static struct inet6_protocol ipcomp6_protocol =
490{
491 .handler = xfrm6_rcv,
492 .err_handler = ipcomp6_err,
493 .flags = INET6_PROTO_NOPOLICY,
494};
495
496static int __init ipcomp6_init(void)
497{
498 if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) {
499 printk(KERN_INFO "ipcomp6 init: can't add xfrm type\n");
500 return -EAGAIN;
501 }
502 if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
503 printk(KERN_INFO "ipcomp6 init: can't add protocol\n");
504 xfrm_unregister_type(&ipcomp6_type, AF_INET6);
505 return -EAGAIN;
506 }
507 return 0;
508}
509
510static void __exit ipcomp6_fini(void)
511{
512 if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0)
513 printk(KERN_INFO "ipv6 ipcomp close: can't remove protocol\n");
514 if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
515 printk(KERN_INFO "ipv6 ipcomp close: can't remove xfrm type\n");
516}
517
518module_init(ipcomp6_init);
519module_exit(ipcomp6_fini);
520MODULE_LICENSE("GPL");
521MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173");
522MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>");
523
524
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
new file mode 100644
index 000000000000..279ab86be662
--- /dev/null
+++ b/net/ipv6/ipv6_sockglue.c
@@ -0,0 +1,704 @@
1/*
2 * IPv6 BSD socket options interface
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_sockglue.c
9 *
10 * $Id: ipv6_sockglue.c,v 1.41 2002/02/01 22:01:04 davem Exp $
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * FIXME: Make the setsockopt code POSIX compliant: That is
18 *
19 * o Return -EINVAL for setsockopt of short lengths
20 * o Truncate getsockopt returns
21 * o Return an optlen of the truncated length if need be
22 *
23 * Changes:
24 * David L Stevens <dlstevens@us.ibm.com>:
25 * - added multicast source filtering API for MLDv2
26 */
27
28#include <linux/module.h>
29#include <linux/config.h>
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/sched.h>
35#include <linux/net.h>
36#include <linux/in6.h>
37#include <linux/netdevice.h>
38#include <linux/if_arp.h>
39#include <linux/init.h>
40#include <linux/sysctl.h>
41#include <linux/netfilter.h>
42
43#include <net/sock.h>
44#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ndisc.h>
47#include <net/protocol.h>
48#include <net/transp_v6.h>
49#include <net/ip6_route.h>
50#include <net/addrconf.h>
51#include <net/inet_common.h>
52#include <net/tcp.h>
53#include <net/udp.h>
54#include <net/xfrm.h>
55
56#include <asm/uaccess.h>
57
58DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
59
60static struct packet_type ipv6_packet_type = {
61 .type = __constant_htons(ETH_P_IPV6),
62 .func = ipv6_rcv,
63};
64
65struct ip6_ra_chain *ip6_ra_chain;
66DEFINE_RWLOCK(ip6_ra_lock);
67
68int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
69{
70 struct ip6_ra_chain *ra, *new_ra, **rap;
71
72 /* RA packet may be delivered ONLY to IPPROTO_RAW socket */
73 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW)
74 return -EINVAL;
75
76 new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
77
78 write_lock_bh(&ip6_ra_lock);
79 for (rap = &ip6_ra_chain; (ra=*rap) != NULL; rap = &ra->next) {
80 if (ra->sk == sk) {
81 if (sel>=0) {
82 write_unlock_bh(&ip6_ra_lock);
83 if (new_ra)
84 kfree(new_ra);
85 return -EADDRINUSE;
86 }
87
88 *rap = ra->next;
89 write_unlock_bh(&ip6_ra_lock);
90
91 if (ra->destructor)
92 ra->destructor(sk);
93 sock_put(sk);
94 kfree(ra);
95 return 0;
96 }
97 }
98 if (new_ra == NULL) {
99 write_unlock_bh(&ip6_ra_lock);
100 return -ENOBUFS;
101 }
102 new_ra->sk = sk;
103 new_ra->sel = sel;
104 new_ra->destructor = destructor;
105 new_ra->next = ra;
106 *rap = new_ra;
107 sock_hold(sk);
108 write_unlock_bh(&ip6_ra_lock);
109 return 0;
110}
111
112extern int ip6_mc_source(int add, int omode, struct sock *sk,
113 struct group_source_req *pgsr);
114extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
115extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
116 struct group_filter __user *optval, int __user *optlen);
117
118
119int ipv6_setsockopt(struct sock *sk, int level, int optname,
120 char __user *optval, int optlen)
121{
122 struct ipv6_pinfo *np = inet6_sk(sk);
123 int val, valbool;
124 int retv = -ENOPROTOOPT;
125
126 if (level == SOL_IP && sk->sk_type != SOCK_RAW)
127 return udp_prot.setsockopt(sk, level, optname, optval, optlen);
128
129 if(level!=SOL_IPV6)
130 goto out;
131
132 if (optval == NULL)
133 val=0;
134 else if (get_user(val, (int __user *) optval))
135 return -EFAULT;
136
137 valbool = (val!=0);
138
139 lock_sock(sk);
140
141 switch (optname) {
142
143 case IPV6_ADDRFORM:
144 if (val == PF_INET) {
145 struct ipv6_txoptions *opt;
146 struct sk_buff *pktopt;
147
148 if (sk->sk_protocol != IPPROTO_UDP &&
149 sk->sk_protocol != IPPROTO_TCP)
150 break;
151
152 if (sk->sk_state != TCP_ESTABLISHED) {
153 retv = -ENOTCONN;
154 break;
155 }
156
157 if (ipv6_only_sock(sk) ||
158 !(ipv6_addr_type(&np->daddr) & IPV6_ADDR_MAPPED)) {
159 retv = -EADDRNOTAVAIL;
160 break;
161 }
162
163 fl6_free_socklist(sk);
164 ipv6_sock_mc_close(sk);
165
166 if (sk->sk_protocol == IPPROTO_TCP) {
167 struct tcp_sock *tp = tcp_sk(sk);
168
169 local_bh_disable();
170 sock_prot_dec_use(sk->sk_prot);
171 sock_prot_inc_use(&tcp_prot);
172 local_bh_enable();
173 sk->sk_prot = &tcp_prot;
174 tp->af_specific = &ipv4_specific;
175 sk->sk_socket->ops = &inet_stream_ops;
176 sk->sk_family = PF_INET;
177 tcp_sync_mss(sk, tp->pmtu_cookie);
178 } else {
179 local_bh_disable();
180 sock_prot_dec_use(sk->sk_prot);
181 sock_prot_inc_use(&udp_prot);
182 local_bh_enable();
183 sk->sk_prot = &udp_prot;
184 sk->sk_socket->ops = &inet_dgram_ops;
185 sk->sk_family = PF_INET;
186 }
187 opt = xchg(&np->opt, NULL);
188 if (opt)
189 sock_kfree_s(sk, opt, opt->tot_len);
190 pktopt = xchg(&np->pktoptions, NULL);
191 if (pktopt)
192 kfree_skb(pktopt);
193
194 sk->sk_destruct = inet_sock_destruct;
195#ifdef INET_REFCNT_DEBUG
196 atomic_dec(&inet6_sock_nr);
197#endif
198 module_put(THIS_MODULE);
199 retv = 0;
200 break;
201 }
202 goto e_inval;
203
204 case IPV6_V6ONLY:
205 if (inet_sk(sk)->num)
206 goto e_inval;
207 np->ipv6only = valbool;
208 retv = 0;
209 break;
210
211 case IPV6_PKTINFO:
212 np->rxopt.bits.rxinfo = valbool;
213 retv = 0;
214 break;
215
216 case IPV6_HOPLIMIT:
217 np->rxopt.bits.rxhlim = valbool;
218 retv = 0;
219 break;
220
221 case IPV6_RTHDR:
222 if (val < 0 || val > 2)
223 goto e_inval;
224 np->rxopt.bits.srcrt = val;
225 retv = 0;
226 break;
227
228 case IPV6_HOPOPTS:
229 np->rxopt.bits.hopopts = valbool;
230 retv = 0;
231 break;
232
233 case IPV6_DSTOPTS:
234 np->rxopt.bits.dstopts = valbool;
235 retv = 0;
236 break;
237
238 case IPV6_FLOWINFO:
239 np->rxopt.bits.rxflow = valbool;
240 retv = 0;
241 break;
242
243 case IPV6_PKTOPTIONS:
244 {
245 struct ipv6_txoptions *opt = NULL;
246 struct msghdr msg;
247 struct flowi fl;
248 int junk;
249
250 fl.fl6_flowlabel = 0;
251 fl.oif = sk->sk_bound_dev_if;
252
253 if (optlen == 0)
254 goto update;
255
256 /* 1K is probably excessive
257 * 1K is surely not enough, 2K per standard header is 16K.
258 */
259 retv = -EINVAL;
260 if (optlen > 64*1024)
261 break;
262
263 opt = sock_kmalloc(sk, sizeof(*opt) + optlen, GFP_KERNEL);
264 retv = -ENOBUFS;
265 if (opt == NULL)
266 break;
267
268 memset(opt, 0, sizeof(*opt));
269 opt->tot_len = sizeof(*opt) + optlen;
270 retv = -EFAULT;
271 if (copy_from_user(opt+1, optval, optlen))
272 goto done;
273
274 msg.msg_controllen = optlen;
275 msg.msg_control = (void*)(opt+1);
276
277 retv = datagram_send_ctl(&msg, &fl, opt, &junk);
278 if (retv)
279 goto done;
280update:
281 retv = 0;
282 if (sk->sk_type == SOCK_STREAM) {
283 if (opt) {
284 struct tcp_sock *tp = tcp_sk(sk);
285 if (!((1 << sk->sk_state) &
286 (TCPF_LISTEN | TCPF_CLOSE))
287 && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
288 tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
289 tcp_sync_mss(sk, tp->pmtu_cookie);
290 }
291 }
292 opt = xchg(&np->opt, opt);
293 sk_dst_reset(sk);
294 } else {
295 write_lock(&sk->sk_dst_lock);
296 opt = xchg(&np->opt, opt);
297 write_unlock(&sk->sk_dst_lock);
298 sk_dst_reset(sk);
299 }
300
301done:
302 if (opt)
303 sock_kfree_s(sk, opt, opt->tot_len);
304 break;
305 }
306 case IPV6_UNICAST_HOPS:
307 if (val > 255 || val < -1)
308 goto e_inval;
309 np->hop_limit = val;
310 retv = 0;
311 break;
312
313 case IPV6_MULTICAST_HOPS:
314 if (sk->sk_type == SOCK_STREAM)
315 goto e_inval;
316 if (val > 255 || val < -1)
317 goto e_inval;
318 np->mcast_hops = val;
319 retv = 0;
320 break;
321
322 case IPV6_MULTICAST_LOOP:
323 np->mc_loop = valbool;
324 retv = 0;
325 break;
326
327 case IPV6_MULTICAST_IF:
328 if (sk->sk_type == SOCK_STREAM)
329 goto e_inval;
330 if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
331 goto e_inval;
332
333 if (__dev_get_by_index(val) == NULL) {
334 retv = -ENODEV;
335 break;
336 }
337 np->mcast_oif = val;
338 retv = 0;
339 break;
340 case IPV6_ADD_MEMBERSHIP:
341 case IPV6_DROP_MEMBERSHIP:
342 {
343 struct ipv6_mreq mreq;
344
345 retv = -EFAULT;
346 if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
347 break;
348
349 if (optname == IPV6_ADD_MEMBERSHIP)
350 retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
351 else
352 retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
353 break;
354 }
355 case IPV6_JOIN_ANYCAST:
356 case IPV6_LEAVE_ANYCAST:
357 {
358 struct ipv6_mreq mreq;
359
360 if (optlen != sizeof(struct ipv6_mreq))
361 goto e_inval;
362
363 retv = -EFAULT;
364 if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
365 break;
366
367 if (optname == IPV6_JOIN_ANYCAST)
368 retv = ipv6_sock_ac_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
369 else
370 retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
371 break;
372 }
373 case MCAST_JOIN_GROUP:
374 case MCAST_LEAVE_GROUP:
375 {
376 struct group_req greq;
377 struct sockaddr_in6 *psin6;
378
379 retv = -EFAULT;
380 if (copy_from_user(&greq, optval, sizeof(struct group_req)))
381 break;
382 if (greq.gr_group.ss_family != AF_INET6) {
383 retv = -EADDRNOTAVAIL;
384 break;
385 }
386 psin6 = (struct sockaddr_in6 *)&greq.gr_group;
387 if (optname == MCAST_JOIN_GROUP)
388 retv = ipv6_sock_mc_join(sk, greq.gr_interface,
389 &psin6->sin6_addr);
390 else
391 retv = ipv6_sock_mc_drop(sk, greq.gr_interface,
392 &psin6->sin6_addr);
393 break;
394 }
395 case MCAST_JOIN_SOURCE_GROUP:
396 case MCAST_LEAVE_SOURCE_GROUP:
397 case MCAST_BLOCK_SOURCE:
398 case MCAST_UNBLOCK_SOURCE:
399 {
400 struct group_source_req greqs;
401 int omode, add;
402
403 if (optlen != sizeof(struct group_source_req))
404 goto e_inval;
405 if (copy_from_user(&greqs, optval, sizeof(greqs))) {
406 retv = -EFAULT;
407 break;
408 }
409 if (greqs.gsr_group.ss_family != AF_INET6 ||
410 greqs.gsr_source.ss_family != AF_INET6) {
411 retv = -EADDRNOTAVAIL;
412 break;
413 }
414 if (optname == MCAST_BLOCK_SOURCE) {
415 omode = MCAST_EXCLUDE;
416 add = 1;
417 } else if (optname == MCAST_UNBLOCK_SOURCE) {
418 omode = MCAST_EXCLUDE;
419 add = 0;
420 } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
421 struct sockaddr_in6 *psin6;
422
423 psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
424 retv = ipv6_sock_mc_join(sk, greqs.gsr_interface,
425 &psin6->sin6_addr);
426 if (retv)
427 break;
428 omode = MCAST_INCLUDE;
429 add = 1;
430 } else /*IP_DROP_SOURCE_MEMBERSHIP */ {
431 omode = MCAST_INCLUDE;
432 add = 0;
433 }
434 retv = ip6_mc_source(add, omode, sk, &greqs);
435 break;
436 }
437 case MCAST_MSFILTER:
438 {
439 extern int sysctl_optmem_max;
440 extern int sysctl_mld_max_msf;
441 struct group_filter *gsf;
442
443 if (optlen < GROUP_FILTER_SIZE(0))
444 goto e_inval;
445 if (optlen > sysctl_optmem_max) {
446 retv = -ENOBUFS;
447 break;
448 }
449 gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL);
450 if (gsf == 0) {
451 retv = -ENOBUFS;
452 break;
453 }
454 retv = -EFAULT;
455 if (copy_from_user(gsf, optval, optlen)) {
456 kfree(gsf);
457 break;
458 }
459 /* numsrc >= (4G-140)/128 overflow in 32 bits */
460 if (gsf->gf_numsrc >= 0x1ffffffU ||
461 gsf->gf_numsrc > sysctl_mld_max_msf) {
462 kfree(gsf);
463 retv = -ENOBUFS;
464 break;
465 }
466 if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
467 kfree(gsf);
468 retv = -EINVAL;
469 break;
470 }
471 retv = ip6_mc_msfilter(sk, gsf);
472 kfree(gsf);
473
474 break;
475 }
476 case IPV6_ROUTER_ALERT:
477 retv = ip6_ra_control(sk, val, NULL);
478 break;
479 case IPV6_MTU_DISCOVER:
480 if (val<0 || val>2)
481 goto e_inval;
482 np->pmtudisc = val;
483 retv = 0;
484 break;
485 case IPV6_MTU:
486 if (val && val < IPV6_MIN_MTU)
487 goto e_inval;
488 np->frag_size = val;
489 retv = 0;
490 break;
491 case IPV6_RECVERR:
492 np->recverr = valbool;
493 if (!val)
494 skb_queue_purge(&sk->sk_error_queue);
495 retv = 0;
496 break;
497 case IPV6_FLOWINFO_SEND:
498 np->sndflow = valbool;
499 retv = 0;
500 break;
501 case IPV6_FLOWLABEL_MGR:
502 retv = ipv6_flowlabel_opt(sk, optval, optlen);
503 break;
504 case IPV6_IPSEC_POLICY:
505 case IPV6_XFRM_POLICY:
506 retv = xfrm_user_policy(sk, optname, optval, optlen);
507 break;
508
509#ifdef CONFIG_NETFILTER
510 default:
511 retv = nf_setsockopt(sk, PF_INET6, optname, optval,
512 optlen);
513 break;
514#endif
515
516 }
517 release_sock(sk);
518
519out:
520 return retv;
521
522e_inval:
523 release_sock(sk);
524 return -EINVAL;
525}
526
527int ipv6_getsockopt(struct sock *sk, int level, int optname,
528 char __user *optval, int __user *optlen)
529{
530 struct ipv6_pinfo *np = inet6_sk(sk);
531 int len;
532 int val;
533
534 if (level == SOL_IP && sk->sk_type != SOCK_RAW)
535 return udp_prot.getsockopt(sk, level, optname, optval, optlen);
536 if(level!=SOL_IPV6)
537 return -ENOPROTOOPT;
538 if (get_user(len, optlen))
539 return -EFAULT;
540 switch (optname) {
541 case IPV6_ADDRFORM:
542 if (sk->sk_protocol != IPPROTO_UDP &&
543 sk->sk_protocol != IPPROTO_TCP)
544 return -EINVAL;
545 if (sk->sk_state != TCP_ESTABLISHED)
546 return -ENOTCONN;
547 val = sk->sk_family;
548 break;
549 case MCAST_MSFILTER:
550 {
551 struct group_filter gsf;
552 int err;
553
554 if (len < GROUP_FILTER_SIZE(0))
555 return -EINVAL;
556 if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0)))
557 return -EFAULT;
558 lock_sock(sk);
559 err = ip6_mc_msfget(sk, &gsf,
560 (struct group_filter __user *)optval, optlen);
561 release_sock(sk);
562 return err;
563 }
564
565 case IPV6_PKTOPTIONS:
566 {
567 struct msghdr msg;
568 struct sk_buff *skb;
569
570 if (sk->sk_type != SOCK_STREAM)
571 return -ENOPROTOOPT;
572
573 msg.msg_control = optval;
574 msg.msg_controllen = len;
575 msg.msg_flags = 0;
576
577 lock_sock(sk);
578 skb = np->pktoptions;
579 if (skb)
580 atomic_inc(&skb->users);
581 release_sock(sk);
582
583 if (skb) {
584 int err = datagram_recv_ctl(sk, &msg, skb);
585 kfree_skb(skb);
586 if (err)
587 return err;
588 } else {
589 if (np->rxopt.bits.rxinfo) {
590 struct in6_pktinfo src_info;
591 src_info.ipi6_ifindex = np->mcast_oif;
592 ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
593 put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
594 }
595 if (np->rxopt.bits.rxhlim) {
596 int hlim = np->mcast_hops;
597 put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
598 }
599 }
600 len -= msg.msg_controllen;
601 return put_user(len, optlen);
602 }
603 case IPV6_MTU:
604 {
605 struct dst_entry *dst;
606 val = 0;
607 lock_sock(sk);
608 dst = sk_dst_get(sk);
609 if (dst) {
610 val = dst_mtu(dst);
611 dst_release(dst);
612 }
613 release_sock(sk);
614 if (!val)
615 return -ENOTCONN;
616 break;
617 }
618
619 case IPV6_V6ONLY:
620 val = np->ipv6only;
621 break;
622
623 case IPV6_PKTINFO:
624 val = np->rxopt.bits.rxinfo;
625 break;
626
627 case IPV6_HOPLIMIT:
628 val = np->rxopt.bits.rxhlim;
629 break;
630
631 case IPV6_RTHDR:
632 val = np->rxopt.bits.srcrt;
633 break;
634
635 case IPV6_HOPOPTS:
636 val = np->rxopt.bits.hopopts;
637 break;
638
639 case IPV6_DSTOPTS:
640 val = np->rxopt.bits.dstopts;
641 break;
642
643 case IPV6_FLOWINFO:
644 val = np->rxopt.bits.rxflow;
645 break;
646
647 case IPV6_UNICAST_HOPS:
648 val = np->hop_limit;
649 break;
650
651 case IPV6_MULTICAST_HOPS:
652 val = np->mcast_hops;
653 break;
654
655 case IPV6_MULTICAST_LOOP:
656 val = np->mc_loop;
657 break;
658
659 case IPV6_MULTICAST_IF:
660 val = np->mcast_oif;
661 break;
662
663 case IPV6_MTU_DISCOVER:
664 val = np->pmtudisc;
665 break;
666
667 case IPV6_RECVERR:
668 val = np->recverr;
669 break;
670
671 case IPV6_FLOWINFO_SEND:
672 val = np->sndflow;
673 break;
674
675 default:
676#ifdef CONFIG_NETFILTER
677 lock_sock(sk);
678 val = nf_getsockopt(sk, PF_INET6, optname, optval,
679 &len);
680 release_sock(sk);
681 if (val >= 0)
682 val = put_user(len, optlen);
683 return val;
684#else
685 return -EINVAL;
686#endif
687 }
688 len = min_t(unsigned int, sizeof(int), len);
689 if(put_user(len, optlen))
690 return -EFAULT;
691 if(copy_to_user(optval,&val,len))
692 return -EFAULT;
693 return 0;
694}
695
696void __init ipv6_packet_init(void)
697{
698 dev_add_pack(&ipv6_packet_type);
699}
700
701void ipv6_packet_cleanup(void)
702{
703 dev_remove_pack(&ipv6_packet_type);
704}
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
new file mode 100644
index 000000000000..2f4c91ddc9a3
--- /dev/null
+++ b/net/ipv6/ipv6_syms.c
@@ -0,0 +1,41 @@
1
2#include <linux/config.h>
3#include <linux/module.h>
4#include <net/protocol.h>
5#include <net/ipv6.h>
6#include <net/addrconf.h>
7#include <net/ip6_route.h>
8#include <net/xfrm.h>
9
10EXPORT_SYMBOL(ipv6_addr_type);
11EXPORT_SYMBOL(icmpv6_send);
12EXPORT_SYMBOL(icmpv6_statistics);
13EXPORT_SYMBOL(icmpv6_err_convert);
14EXPORT_SYMBOL(ndisc_mc_map);
15EXPORT_SYMBOL(register_inet6addr_notifier);
16EXPORT_SYMBOL(unregister_inet6addr_notifier);
17EXPORT_SYMBOL(ip6_route_output);
18#ifdef CONFIG_NETFILTER
19EXPORT_SYMBOL(ip6_route_me_harder);
20#endif
21EXPORT_SYMBOL(addrconf_lock);
22EXPORT_SYMBOL(ipv6_setsockopt);
23EXPORT_SYMBOL(ipv6_getsockopt);
24EXPORT_SYMBOL(inet6_register_protosw);
25EXPORT_SYMBOL(inet6_unregister_protosw);
26EXPORT_SYMBOL(inet6_add_protocol);
27EXPORT_SYMBOL(inet6_del_protocol);
28EXPORT_SYMBOL(ip6_xmit);
29EXPORT_SYMBOL(inet6_release);
30EXPORT_SYMBOL(inet6_bind);
31EXPORT_SYMBOL(inet6_getname);
32EXPORT_SYMBOL(inet6_ioctl);
33EXPORT_SYMBOL(ipv6_get_saddr);
34EXPORT_SYMBOL(ipv6_chk_addr);
35EXPORT_SYMBOL(in6_dev_finish_destroy);
36#ifdef CONFIG_XFRM
37EXPORT_SYMBOL(xfrm6_rcv);
38#endif
39EXPORT_SYMBOL(rt6_lookup);
40EXPORT_SYMBOL(fl6_sock_lookup);
41EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
new file mode 100644
index 000000000000..393b6e6f50a9
--- /dev/null
+++ b/net/ipv6/mcast.c
@@ -0,0 +1,2499 @@
1/*
2 * Multicast support for IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: mcast.c,v 1.40 2002/02/08 03:57:19 davem Exp $
9 *
10 * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18/* Changes:
19 *
20 * yoshfuji : fix format of router-alert option
21 * YOSHIFUJI Hideaki @USAGI:
22 * Fixed source address for MLD message based on
23 * <draft-ietf-magma-mld-source-05.txt>.
24 * YOSHIFUJI Hideaki @USAGI:
25 * - Ignore Queries for invalid addresses.
26 * - MLD for link-local addresses.
27 * David L Stevens <dlstevens@us.ibm.com>:
28 * - MLDv2 support
29 */
30
31#include <linux/config.h>
32#include <linux/module.h>
33#include <linux/errno.h>
34#include <linux/types.h>
35#include <linux/string.h>
36#include <linux/socket.h>
37#include <linux/sockios.h>
38#include <linux/jiffies.h>
39#include <linux/times.h>
40#include <linux/net.h>
41#include <linux/in.h>
42#include <linux/in6.h>
43#include <linux/netdevice.h>
44#include <linux/if_arp.h>
45#include <linux/route.h>
46#include <linux/init.h>
47#include <linux/proc_fs.h>
48#include <linux/seq_file.h>
49
50#include <linux/netfilter.h>
51#include <linux/netfilter_ipv6.h>
52
53#include <net/sock.h>
54#include <net/snmp.h>
55
56#include <net/ipv6.h>
57#include <net/protocol.h>
58#include <net/if_inet6.h>
59#include <net/ndisc.h>
60#include <net/addrconf.h>
61#include <net/ip6_route.h>
62
63#include <net/ip6_checksum.h>
64
65/* Set to 3 to get tracing... */
66#define MCAST_DEBUG 2
67
68#if MCAST_DEBUG >= 3
69#define MDBG(x) printk x
70#else
71#define MDBG(x)
72#endif
73
74/*
75 * These header formats should be in a separate include file, but icmpv6.h
76 * doesn't have in6_addr defined in all cases, there is no __u128, and no
77 * other files reference these.
78 *
79 * +-DLS 4/14/03
80 */
81
82/* Multicast Listener Discovery version 2 headers */
83
84struct mld2_grec {
85 __u8 grec_type;
86 __u8 grec_auxwords;
87 __u16 grec_nsrcs;
88 struct in6_addr grec_mca;
89 struct in6_addr grec_src[0];
90};
91
92struct mld2_report {
93 __u8 type;
94 __u8 resv1;
95 __u16 csum;
96 __u16 resv2;
97 __u16 ngrec;
98 struct mld2_grec grec[0];
99};
100
101struct mld2_query {
102 __u8 type;
103 __u8 code;
104 __u16 csum;
105 __u16 mrc;
106 __u16 resv1;
107 struct in6_addr mca;
108#if defined(__LITTLE_ENDIAN_BITFIELD)
109 __u8 qrv:3,
110 suppress:1,
111 resv2:4;
112#elif defined(__BIG_ENDIAN_BITFIELD)
113 __u8 resv2:4,
114 suppress:1,
115 qrv:3;
116#else
117#error "Please fix <asm/byteorder.h>"
118#endif
119 __u8 qqic;
120 __u16 nsrcs;
121 struct in6_addr srcs[0];
122};
123
124static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
125
126/* Big mc list lock for all the sockets */
127static DEFINE_RWLOCK(ipv6_sk_mc_lock);
128
129static struct socket *igmp6_socket;
130
131int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr);
132
133static void igmp6_join_group(struct ifmcaddr6 *ma);
134static void igmp6_leave_group(struct ifmcaddr6 *ma);
135static void igmp6_timer_handler(unsigned long data);
136
137static void mld_gq_timer_expire(unsigned long data);
138static void mld_ifc_timer_expire(unsigned long data);
139static void mld_ifc_event(struct inet6_dev *idev);
140static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
141static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *addr);
142static void mld_clear_delrec(struct inet6_dev *idev);
143static int sf_setstate(struct ifmcaddr6 *pmc);
144static void sf_markstate(struct ifmcaddr6 *pmc);
145static void ip6_mc_clear_src(struct ifmcaddr6 *pmc);
146static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca,
147 int sfmode, int sfcount, struct in6_addr *psfsrc,
148 int delta);
149static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
150 int sfmode, int sfcount, struct in6_addr *psfsrc,
151 int delta);
152static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
153 struct inet6_dev *idev);
154
155
156#define IGMP6_UNSOLICITED_IVAL (10*HZ)
157#define MLD_QRV_DEFAULT 2
158
159#define MLD_V1_SEEN(idev) (ipv6_devconf.force_mld_version == 1 || \
160 (idev)->cnf.force_mld_version == 1 || \
161 ((idev)->mc_v1_seen && \
162 time_before(jiffies, (idev)->mc_v1_seen)))
163
164#define MLDV2_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value))
165#define MLDV2_EXP(thresh, nbmant, nbexp, value) \
166 ((value) < (thresh) ? (value) : \
167 ((MLDV2_MASK(value, nbmant) | (1<<(nbmant+nbexp))) << \
168 (MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp))))
169
170#define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value)
171#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
172
173#define IPV6_MLD_MAX_MSF 10
174
175int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF;
176
177/*
178 * socket join on multicast group
179 */
180
181int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
182{
183 struct net_device *dev = NULL;
184 struct ipv6_mc_socklist *mc_lst;
185 struct ipv6_pinfo *np = inet6_sk(sk);
186 int err;
187
188 if (!ipv6_addr_is_multicast(addr))
189 return -EINVAL;
190
191 mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
192
193 if (mc_lst == NULL)
194 return -ENOMEM;
195
196 mc_lst->next = NULL;
197 ipv6_addr_copy(&mc_lst->addr, addr);
198
199 if (ifindex == 0) {
200 struct rt6_info *rt;
201 rt = rt6_lookup(addr, NULL, 0, 0);
202 if (rt) {
203 dev = rt->rt6i_dev;
204 dev_hold(dev);
205 dst_release(&rt->u.dst);
206 }
207 } else
208 dev = dev_get_by_index(ifindex);
209
210 if (dev == NULL) {
211 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
212 return -ENODEV;
213 }
214
215 mc_lst->ifindex = dev->ifindex;
216 mc_lst->sfmode = MCAST_EXCLUDE;
217 mc_lst->sflist = NULL;
218
219 /*
220 * now add/increase the group membership on the device
221 */
222
223 err = ipv6_dev_mc_inc(dev, addr);
224
225 if (err) {
226 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
227 dev_put(dev);
228 return err;
229 }
230
231 write_lock_bh(&ipv6_sk_mc_lock);
232 mc_lst->next = np->ipv6_mc_list;
233 np->ipv6_mc_list = mc_lst;
234 write_unlock_bh(&ipv6_sk_mc_lock);
235
236 dev_put(dev);
237
238 return 0;
239}
240
241/*
242 * socket leave on multicast group
243 */
244int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
245{
246 struct ipv6_pinfo *np = inet6_sk(sk);
247 struct ipv6_mc_socklist *mc_lst, **lnk;
248
249 write_lock_bh(&ipv6_sk_mc_lock);
250 for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) {
251 if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
252 ipv6_addr_equal(&mc_lst->addr, addr)) {
253 struct net_device *dev;
254
255 *lnk = mc_lst->next;
256 write_unlock_bh(&ipv6_sk_mc_lock);
257
258 if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) {
259 struct inet6_dev *idev = in6_dev_get(dev);
260
261 if (idev) {
262 (void) ip6_mc_leave_src(sk,mc_lst,idev);
263 __ipv6_dev_mc_dec(idev, &mc_lst->addr);
264 in6_dev_put(idev);
265 }
266 dev_put(dev);
267 }
268 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
269 return 0;
270 }
271 }
272 write_unlock_bh(&ipv6_sk_mc_lock);
273
274 return -ENOENT;
275}
276
277static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex)
278{
279 struct net_device *dev = NULL;
280 struct inet6_dev *idev = NULL;
281
282 if (ifindex == 0) {
283 struct rt6_info *rt;
284
285 rt = rt6_lookup(group, NULL, 0, 0);
286 if (rt) {
287 dev = rt->rt6i_dev;
288 dev_hold(dev);
289 dst_release(&rt->u.dst);
290 }
291 } else
292 dev = dev_get_by_index(ifindex);
293
294 if (!dev)
295 return NULL;
296 idev = in6_dev_get(dev);
297 if (!idev) {
298 dev_put(dev);
299 return NULL;
300 }
301 read_lock_bh(&idev->lock);
302 if (idev->dead) {
303 read_unlock_bh(&idev->lock);
304 in6_dev_put(idev);
305 dev_put(dev);
306 return NULL;
307 }
308 return idev;
309}
310
311void ipv6_sock_mc_close(struct sock *sk)
312{
313 struct ipv6_pinfo *np = inet6_sk(sk);
314 struct ipv6_mc_socklist *mc_lst;
315
316 write_lock_bh(&ipv6_sk_mc_lock);
317 while ((mc_lst = np->ipv6_mc_list) != NULL) {
318 struct net_device *dev;
319
320 np->ipv6_mc_list = mc_lst->next;
321 write_unlock_bh(&ipv6_sk_mc_lock);
322
323 dev = dev_get_by_index(mc_lst->ifindex);
324 if (dev) {
325 struct inet6_dev *idev = in6_dev_get(dev);
326
327 if (idev) {
328 (void) ip6_mc_leave_src(sk, mc_lst, idev);
329 __ipv6_dev_mc_dec(idev, &mc_lst->addr);
330 in6_dev_put(idev);
331 }
332 dev_put(dev);
333 }
334
335 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
336
337 write_lock_bh(&ipv6_sk_mc_lock);
338 }
339 write_unlock_bh(&ipv6_sk_mc_lock);
340}
341
342int ip6_mc_source(int add, int omode, struct sock *sk,
343 struct group_source_req *pgsr)
344{
345 struct in6_addr *source, *group;
346 struct ipv6_mc_socklist *pmc;
347 struct net_device *dev;
348 struct inet6_dev *idev;
349 struct ipv6_pinfo *inet6 = inet6_sk(sk);
350 struct ip6_sf_socklist *psl;
351 int i, j, rv;
352 int err;
353
354 if (pgsr->gsr_group.ss_family != AF_INET6 ||
355 pgsr->gsr_source.ss_family != AF_INET6)
356 return -EINVAL;
357
358 source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr;
359 group = &((struct sockaddr_in6 *)&pgsr->gsr_group)->sin6_addr;
360
361 if (!ipv6_addr_is_multicast(group))
362 return -EINVAL;
363
364 idev = ip6_mc_find_dev(group, pgsr->gsr_interface);
365 if (!idev)
366 return -ENODEV;
367 dev = idev->dev;
368
369 err = -EADDRNOTAVAIL;
370
371 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
372 if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
373 continue;
374 if (ipv6_addr_equal(&pmc->addr, group))
375 break;
376 }
377 if (!pmc) /* must have a prior join */
378 goto done;
379 /* if a source filter was set, must be the same mode as before */
380 if (pmc->sflist) {
381 if (pmc->sfmode != omode)
382 goto done;
383 } else if (pmc->sfmode != omode) {
384 /* allow mode switches for empty-set filters */
385 ip6_mc_add_src(idev, group, omode, 0, NULL, 0);
386 ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
387 pmc->sfmode = omode;
388 }
389
390 psl = pmc->sflist;
391 if (!add) {
392 if (!psl)
393 goto done;
394 rv = !0;
395 for (i=0; i<psl->sl_count; i++) {
396 rv = memcmp(&psl->sl_addr[i], source,
397 sizeof(struct in6_addr));
398 if (rv == 0)
399 break;
400 }
401 if (rv) /* source not found */
402 goto done;
403
404 /* update the interface filter */
405 ip6_mc_del_src(idev, group, omode, 1, source, 1);
406
407 for (j=i+1; j<psl->sl_count; j++)
408 psl->sl_addr[j-1] = psl->sl_addr[j];
409 psl->sl_count--;
410 err = 0;
411 goto done;
412 }
413 /* else, add a new source to the filter */
414
415 if (psl && psl->sl_count >= sysctl_mld_max_msf) {
416 err = -ENOBUFS;
417 goto done;
418 }
419 if (!psl || psl->sl_count == psl->sl_max) {
420 struct ip6_sf_socklist *newpsl;
421 int count = IP6_SFBLOCK;
422
423 if (psl)
424 count += psl->sl_max;
425 newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk,
426 IP6_SFLSIZE(count), GFP_ATOMIC);
427 if (!newpsl) {
428 err = -ENOBUFS;
429 goto done;
430 }
431 newpsl->sl_max = count;
432 newpsl->sl_count = count - IP6_SFBLOCK;
433 if (psl) {
434 for (i=0; i<psl->sl_count; i++)
435 newpsl->sl_addr[i] = psl->sl_addr[i];
436 sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max));
437 }
438 pmc->sflist = psl = newpsl;
439 }
440 rv = 1; /* > 0 for insert logic below if sl_count is 0 */
441 for (i=0; i<psl->sl_count; i++) {
442 rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr));
443 if (rv == 0)
444 break;
445 }
446 if (rv == 0) /* address already there is an error */
447 goto done;
448 for (j=psl->sl_count-1; j>=i; j--)
449 psl->sl_addr[j+1] = psl->sl_addr[j];
450 psl->sl_addr[i] = *source;
451 psl->sl_count++;
452 err = 0;
453 /* update the interface list */
454 ip6_mc_add_src(idev, group, omode, 1, source, 1);
455done:
456 read_unlock_bh(&idev->lock);
457 in6_dev_put(idev);
458 dev_put(dev);
459 return err;
460}
461
462int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
463{
464 struct in6_addr *group;
465 struct ipv6_mc_socklist *pmc;
466 struct net_device *dev;
467 struct inet6_dev *idev;
468 struct ipv6_pinfo *inet6 = inet6_sk(sk);
469 struct ip6_sf_socklist *newpsl, *psl;
470 int i, err;
471
472 group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
473
474 if (!ipv6_addr_is_multicast(group))
475 return -EINVAL;
476 if (gsf->gf_fmode != MCAST_INCLUDE &&
477 gsf->gf_fmode != MCAST_EXCLUDE)
478 return -EINVAL;
479
480 idev = ip6_mc_find_dev(group, gsf->gf_interface);
481
482 if (!idev)
483 return -ENODEV;
484 dev = idev->dev;
485 err = -EADDRNOTAVAIL;
486
487 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
488 if (pmc->ifindex != gsf->gf_interface)
489 continue;
490 if (ipv6_addr_equal(&pmc->addr, group))
491 break;
492 }
493 if (!pmc) /* must have a prior join */
494 goto done;
495 if (gsf->gf_numsrc) {
496 newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk,
497 IP6_SFLSIZE(gsf->gf_numsrc), GFP_ATOMIC);
498 if (!newpsl) {
499 err = -ENOBUFS;
500 goto done;
501 }
502 newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc;
503 for (i=0; i<newpsl->sl_count; ++i) {
504 struct sockaddr_in6 *psin6;
505
506 psin6 = (struct sockaddr_in6 *)&gsf->gf_slist[i];
507 newpsl->sl_addr[i] = psin6->sin6_addr;
508 }
509 err = ip6_mc_add_src(idev, group, gsf->gf_fmode,
510 newpsl->sl_count, newpsl->sl_addr, 0);
511 if (err) {
512 sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
513 goto done;
514 }
515 } else
516 newpsl = NULL;
517 psl = pmc->sflist;
518 if (psl) {
519 (void) ip6_mc_del_src(idev, group, pmc->sfmode,
520 psl->sl_count, psl->sl_addr, 0);
521 sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max));
522 } else
523 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
524 pmc->sflist = newpsl;
525 pmc->sfmode = gsf->gf_fmode;
526done:
527 read_unlock_bh(&idev->lock);
528 in6_dev_put(idev);
529 dev_put(dev);
530 return err;
531}
532
533int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
534 struct group_filter __user *optval, int __user *optlen)
535{
536 int err, i, count, copycount;
537 struct in6_addr *group;
538 struct ipv6_mc_socklist *pmc;
539 struct inet6_dev *idev;
540 struct net_device *dev;
541 struct ipv6_pinfo *inet6 = inet6_sk(sk);
542 struct ip6_sf_socklist *psl;
543
544 group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
545
546 if (!ipv6_addr_is_multicast(group))
547 return -EINVAL;
548
549 idev = ip6_mc_find_dev(group, gsf->gf_interface);
550
551 if (!idev)
552 return -ENODEV;
553
554 dev = idev->dev;
555
556 err = -EADDRNOTAVAIL;
557
558 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
559 if (pmc->ifindex != gsf->gf_interface)
560 continue;
561 if (ipv6_addr_equal(group, &pmc->addr))
562 break;
563 }
564 if (!pmc) /* must have a prior join */
565 goto done;
566 gsf->gf_fmode = pmc->sfmode;
567 psl = pmc->sflist;
568 count = psl ? psl->sl_count : 0;
569 read_unlock_bh(&idev->lock);
570 in6_dev_put(idev);
571 dev_put(dev);
572
573 copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
574 gsf->gf_numsrc = count;
575 if (put_user(GROUP_FILTER_SIZE(copycount), optlen) ||
576 copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
577 return -EFAULT;
578 }
579 for (i=0; i<copycount; i++) {
580 struct sockaddr_in6 *psin6;
581 struct sockaddr_storage ss;
582
583 psin6 = (struct sockaddr_in6 *)&ss;
584 memset(&ss, 0, sizeof(ss));
585 psin6->sin6_family = AF_INET6;
586 psin6->sin6_addr = psl->sl_addr[i];
587 if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss)))
588 return -EFAULT;
589 }
590 return 0;
591done:
592 read_unlock_bh(&idev->lock);
593 in6_dev_put(idev);
594 dev_put(dev);
595 return err;
596}
597
598int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
599 struct in6_addr *src_addr)
600{
601 struct ipv6_pinfo *np = inet6_sk(sk);
602 struct ipv6_mc_socklist *mc;
603 struct ip6_sf_socklist *psl;
604 int rv = 1;
605
606 read_lock(&ipv6_sk_mc_lock);
607 for (mc = np->ipv6_mc_list; mc; mc = mc->next) {
608 if (ipv6_addr_equal(&mc->addr, mc_addr))
609 break;
610 }
611 if (!mc) {
612 read_unlock(&ipv6_sk_mc_lock);
613 return 1;
614 }
615 psl = mc->sflist;
616 if (!psl) {
617 rv = mc->sfmode == MCAST_EXCLUDE;
618 } else {
619 int i;
620
621 for (i=0; i<psl->sl_count; i++) {
622 if (ipv6_addr_equal(&psl->sl_addr[i], src_addr))
623 break;
624 }
625 if (mc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
626 rv = 0;
627 if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
628 rv = 0;
629 }
630 read_unlock(&ipv6_sk_mc_lock);
631
632 return rv;
633}
634
635static void ma_put(struct ifmcaddr6 *mc)
636{
637 if (atomic_dec_and_test(&mc->mca_refcnt)) {
638 in6_dev_put(mc->idev);
639 kfree(mc);
640 }
641}
642
643static void igmp6_group_added(struct ifmcaddr6 *mc)
644{
645 struct net_device *dev = mc->idev->dev;
646 char buf[MAX_ADDR_LEN];
647
648 spin_lock_bh(&mc->mca_lock);
649 if (!(mc->mca_flags&MAF_LOADED)) {
650 mc->mca_flags |= MAF_LOADED;
651 if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
652 dev_mc_add(dev, buf, dev->addr_len, 0);
653 }
654 spin_unlock_bh(&mc->mca_lock);
655
656 if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT))
657 return;
658
659 if (MLD_V1_SEEN(mc->idev)) {
660 igmp6_join_group(mc);
661 return;
662 }
663 /* else v2 */
664
665 mc->mca_crcount = mc->idev->mc_qrv;
666 mld_ifc_event(mc->idev);
667}
668
669static void igmp6_group_dropped(struct ifmcaddr6 *mc)
670{
671 struct net_device *dev = mc->idev->dev;
672 char buf[MAX_ADDR_LEN];
673
674 spin_lock_bh(&mc->mca_lock);
675 if (mc->mca_flags&MAF_LOADED) {
676 mc->mca_flags &= ~MAF_LOADED;
677 if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
678 dev_mc_delete(dev, buf, dev->addr_len, 0);
679 }
680
681 if (mc->mca_flags & MAF_NOREPORT)
682 goto done;
683 spin_unlock_bh(&mc->mca_lock);
684
685 if (!mc->idev->dead)
686 igmp6_leave_group(mc);
687
688 spin_lock_bh(&mc->mca_lock);
689 if (del_timer(&mc->mca_timer))
690 atomic_dec(&mc->mca_refcnt);
691done:
692 ip6_mc_clear_src(mc);
693 spin_unlock_bh(&mc->mca_lock);
694}
695
696/*
697 * deleted ifmcaddr6 manipulation
698 */
699static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
700{
701 struct ifmcaddr6 *pmc;
702
703 /* this is an "ifmcaddr6" for convenience; only the fields below
704 * are actually used. In particular, the refcnt and users are not
705 * used for management of the delete list. Using the same structure
706 * for deleted items allows change reports to use common code with
707 * non-deleted or query-response MCA's.
708 */
709 pmc = (struct ifmcaddr6 *)kmalloc(sizeof(*pmc), GFP_ATOMIC);
710 if (!pmc)
711 return;
712 memset(pmc, 0, sizeof(*pmc));
713 spin_lock_bh(&im->mca_lock);
714 spin_lock_init(&pmc->mca_lock);
715 pmc->idev = im->idev;
716 in6_dev_hold(idev);
717 pmc->mca_addr = im->mca_addr;
718 pmc->mca_crcount = idev->mc_qrv;
719 pmc->mca_sfmode = im->mca_sfmode;
720 if (pmc->mca_sfmode == MCAST_INCLUDE) {
721 struct ip6_sf_list *psf;
722
723 pmc->mca_tomb = im->mca_tomb;
724 pmc->mca_sources = im->mca_sources;
725 im->mca_tomb = im->mca_sources = NULL;
726 for (psf=pmc->mca_sources; psf; psf=psf->sf_next)
727 psf->sf_crcount = pmc->mca_crcount;
728 }
729 spin_unlock_bh(&im->mca_lock);
730
731 write_lock_bh(&idev->mc_lock);
732 pmc->next = idev->mc_tomb;
733 idev->mc_tomb = pmc;
734 write_unlock_bh(&idev->mc_lock);
735}
736
737static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *pmca)
738{
739 struct ifmcaddr6 *pmc, *pmc_prev;
740 struct ip6_sf_list *psf, *psf_next;
741
742 write_lock_bh(&idev->mc_lock);
743 pmc_prev = NULL;
744 for (pmc=idev->mc_tomb; pmc; pmc=pmc->next) {
745 if (ipv6_addr_equal(&pmc->mca_addr, pmca))
746 break;
747 pmc_prev = pmc;
748 }
749 if (pmc) {
750 if (pmc_prev)
751 pmc_prev->next = pmc->next;
752 else
753 idev->mc_tomb = pmc->next;
754 }
755 write_unlock_bh(&idev->mc_lock);
756 if (pmc) {
757 for (psf=pmc->mca_tomb; psf; psf=psf_next) {
758 psf_next = psf->sf_next;
759 kfree(psf);
760 }
761 in6_dev_put(pmc->idev);
762 kfree(pmc);
763 }
764}
765
766static void mld_clear_delrec(struct inet6_dev *idev)
767{
768 struct ifmcaddr6 *pmc, *nextpmc;
769
770 write_lock_bh(&idev->mc_lock);
771 pmc = idev->mc_tomb;
772 idev->mc_tomb = NULL;
773 write_unlock_bh(&idev->mc_lock);
774
775 for (; pmc; pmc = nextpmc) {
776 nextpmc = pmc->next;
777 ip6_mc_clear_src(pmc);
778 in6_dev_put(pmc->idev);
779 kfree(pmc);
780 }
781
782 /* clear dead sources, too */
783 read_lock_bh(&idev->lock);
784 for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
785 struct ip6_sf_list *psf, *psf_next;
786
787 spin_lock_bh(&pmc->mca_lock);
788 psf = pmc->mca_tomb;
789 pmc->mca_tomb = NULL;
790 spin_unlock_bh(&pmc->mca_lock);
791 for (; psf; psf=psf_next) {
792 psf_next = psf->sf_next;
793 kfree(psf);
794 }
795 }
796 read_unlock_bh(&idev->lock);
797}
798
799
800/*
801 * device multicast group inc (add if not found)
802 */
803int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr)
804{
805 struct ifmcaddr6 *mc;
806 struct inet6_dev *idev;
807
808 idev = in6_dev_get(dev);
809
810 if (idev == NULL)
811 return -EINVAL;
812
813 write_lock_bh(&idev->lock);
814 if (idev->dead) {
815 write_unlock_bh(&idev->lock);
816 in6_dev_put(idev);
817 return -ENODEV;
818 }
819
820 for (mc = idev->mc_list; mc; mc = mc->next) {
821 if (ipv6_addr_equal(&mc->mca_addr, addr)) {
822 mc->mca_users++;
823 write_unlock_bh(&idev->lock);
824 ip6_mc_add_src(idev, &mc->mca_addr, MCAST_EXCLUDE, 0,
825 NULL, 0);
826 in6_dev_put(idev);
827 return 0;
828 }
829 }
830
831 /*
832 * not found: create a new one.
833 */
834
835 mc = kmalloc(sizeof(struct ifmcaddr6), GFP_ATOMIC);
836
837 if (mc == NULL) {
838 write_unlock_bh(&idev->lock);
839 in6_dev_put(idev);
840 return -ENOMEM;
841 }
842
843 memset(mc, 0, sizeof(struct ifmcaddr6));
844 init_timer(&mc->mca_timer);
845 mc->mca_timer.function = igmp6_timer_handler;
846 mc->mca_timer.data = (unsigned long) mc;
847
848 ipv6_addr_copy(&mc->mca_addr, addr);
849 mc->idev = idev;
850 mc->mca_users = 1;
851 /* mca_stamp should be updated upon changes */
852 mc->mca_cstamp = mc->mca_tstamp = jiffies;
853 atomic_set(&mc->mca_refcnt, 2);
854 spin_lock_init(&mc->mca_lock);
855
856 /* initial mode is (EX, empty) */
857 mc->mca_sfmode = MCAST_EXCLUDE;
858 mc->mca_sfcount[MCAST_EXCLUDE] = 1;
859
860 if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
861 IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
862 mc->mca_flags |= MAF_NOREPORT;
863
864 mc->next = idev->mc_list;
865 idev->mc_list = mc;
866 write_unlock_bh(&idev->lock);
867
868 mld_del_delrec(idev, &mc->mca_addr);
869 igmp6_group_added(mc);
870 ma_put(mc);
871 return 0;
872}
873
874/*
875 * device multicast group del
876 */
877int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr)
878{
879 struct ifmcaddr6 *ma, **map;
880
881 write_lock_bh(&idev->lock);
882 for (map = &idev->mc_list; (ma=*map) != NULL; map = &ma->next) {
883 if (ipv6_addr_equal(&ma->mca_addr, addr)) {
884 if (--ma->mca_users == 0) {
885 *map = ma->next;
886 write_unlock_bh(&idev->lock);
887
888 igmp6_group_dropped(ma);
889
890 ma_put(ma);
891 return 0;
892 }
893 write_unlock_bh(&idev->lock);
894 return 0;
895 }
896 }
897 write_unlock_bh(&idev->lock);
898
899 return -ENOENT;
900}
901
902int ipv6_dev_mc_dec(struct net_device *dev, struct in6_addr *addr)
903{
904 struct inet6_dev *idev = in6_dev_get(dev);
905 int err;
906
907 if (!idev)
908 return -ENODEV;
909
910 err = __ipv6_dev_mc_dec(idev, addr);
911
912 in6_dev_put(idev);
913
914 return err;
915}
916
917/*
918 * identify MLD packets for MLD filter exceptions
919 */
920int ipv6_is_mld(struct sk_buff *skb, int nexthdr)
921{
922 struct icmp6hdr *pic;
923
924 if (nexthdr != IPPROTO_ICMPV6)
925 return 0;
926
927 if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
928 return 0;
929
930 pic = (struct icmp6hdr *)skb->h.raw;
931
932 switch (pic->icmp6_type) {
933 case ICMPV6_MGM_QUERY:
934 case ICMPV6_MGM_REPORT:
935 case ICMPV6_MGM_REDUCTION:
936 case ICMPV6_MLD2_REPORT:
937 return 1;
938 default:
939 break;
940 }
941 return 0;
942}
943
944/*
945 * check if the interface/address pair is valid
946 */
947int ipv6_chk_mcast_addr(struct net_device *dev, struct in6_addr *group,
948 struct in6_addr *src_addr)
949{
950 struct inet6_dev *idev;
951 struct ifmcaddr6 *mc;
952 int rv = 0;
953
954 idev = in6_dev_get(dev);
955 if (idev) {
956 read_lock_bh(&idev->lock);
957 for (mc = idev->mc_list; mc; mc=mc->next) {
958 if (ipv6_addr_equal(&mc->mca_addr, group))
959 break;
960 }
961 if (mc) {
962 if (src_addr && !ipv6_addr_any(src_addr)) {
963 struct ip6_sf_list *psf;
964
965 spin_lock_bh(&mc->mca_lock);
966 for (psf=mc->mca_sources;psf;psf=psf->sf_next) {
967 if (ipv6_addr_equal(&psf->sf_addr, src_addr))
968 break;
969 }
970 if (psf)
971 rv = psf->sf_count[MCAST_INCLUDE] ||
972 psf->sf_count[MCAST_EXCLUDE] !=
973 mc->mca_sfcount[MCAST_EXCLUDE];
974 else
975 rv = mc->mca_sfcount[MCAST_EXCLUDE] !=0;
976 spin_unlock_bh(&mc->mca_lock);
977 } else
978 rv = 1; /* don't filter unspecified source */
979 }
980 read_unlock_bh(&idev->lock);
981 in6_dev_put(idev);
982 }
983 return rv;
984}
985
986static void mld_gq_start_timer(struct inet6_dev *idev)
987{
988 int tv = net_random() % idev->mc_maxdelay;
989
990 idev->mc_gq_running = 1;
991 if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
992 in6_dev_hold(idev);
993}
994
995static void mld_ifc_start_timer(struct inet6_dev *idev, int delay)
996{
997 int tv = net_random() % delay;
998
999 if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
1000 in6_dev_hold(idev);
1001}
1002
1003/*
1004 * IGMP handling (alias multicast ICMPv6 messages)
1005 */
1006
1007static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
1008{
1009 unsigned long delay = resptime;
1010
1011 /* Do not start timer for these addresses */
1012 if (ipv6_addr_is_ll_all_nodes(&ma->mca_addr) ||
1013 IPV6_ADDR_MC_SCOPE(&ma->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
1014 return;
1015
1016 if (del_timer(&ma->mca_timer)) {
1017 atomic_dec(&ma->mca_refcnt);
1018 delay = ma->mca_timer.expires - jiffies;
1019 }
1020
1021 if (delay >= resptime) {
1022 if (resptime)
1023 delay = net_random() % resptime;
1024 else
1025 delay = 1;
1026 }
1027 ma->mca_timer.expires = jiffies + delay;
1028 if (!mod_timer(&ma->mca_timer, jiffies + delay))
1029 atomic_inc(&ma->mca_refcnt);
1030 ma->mca_flags |= MAF_TIMER_RUNNING;
1031}
1032
1033static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
1034 struct in6_addr *srcs)
1035{
1036 struct ip6_sf_list *psf;
1037 int i, scount;
1038
1039 scount = 0;
1040 for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
1041 if (scount == nsrcs)
1042 break;
1043 for (i=0; i<nsrcs; i++)
1044 if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
1045 psf->sf_gsresp = 1;
1046 scount++;
1047 break;
1048 }
1049 }
1050}
1051
1052int igmp6_event_query(struct sk_buff *skb)
1053{
1054 struct mld2_query *mlh2 = (struct mld2_query *) skb->h.raw;
1055 struct ifmcaddr6 *ma;
1056 struct in6_addr *group;
1057 unsigned long max_delay;
1058 struct inet6_dev *idev;
1059 struct icmp6hdr *hdr;
1060 int group_type;
1061 int mark = 0;
1062 int len;
1063
1064 if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
1065 return -EINVAL;
1066
1067 /* compute payload length excluding extension headers */
1068 len = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
1069 len -= (char *)skb->h.raw - (char *)skb->nh.ipv6h;
1070
1071 /* Drop queries with not link local source */
1072 if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr)&IPV6_ADDR_LINKLOCAL))
1073 return -EINVAL;
1074
1075 idev = in6_dev_get(skb->dev);
1076
1077 if (idev == NULL)
1078 return 0;
1079
1080 hdr = (struct icmp6hdr *) skb->h.raw;
1081 group = (struct in6_addr *) (hdr + 1);
1082 group_type = ipv6_addr_type(group);
1083
1084 if (group_type != IPV6_ADDR_ANY &&
1085 !(group_type&IPV6_ADDR_MULTICAST)) {
1086 in6_dev_put(idev);
1087 return -EINVAL;
1088 }
1089
1090 if (len == 24) {
1091 int switchback;
1092 /* MLDv1 router present */
1093
1094 /* Translate milliseconds to jiffies */
1095 max_delay = (ntohs(hdr->icmp6_maxdelay)*HZ)/1000;
1096
1097 switchback = (idev->mc_qrv + 1) * max_delay;
1098 idev->mc_v1_seen = jiffies + switchback;
1099
1100 /* cancel the interface change timer */
1101 idev->mc_ifc_count = 0;
1102 if (del_timer(&idev->mc_ifc_timer))
1103 __in6_dev_put(idev);
1104 /* clear deleted report items */
1105 mld_clear_delrec(idev);
1106 } else if (len >= 28) {
1107 max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000;
1108 if (!max_delay)
1109 max_delay = 1;
1110 idev->mc_maxdelay = max_delay;
1111 if (mlh2->qrv)
1112 idev->mc_qrv = mlh2->qrv;
1113 if (group_type == IPV6_ADDR_ANY) { /* general query */
1114 if (mlh2->nsrcs) {
1115 in6_dev_put(idev);
1116 return -EINVAL; /* no sources allowed */
1117 }
1118 mld_gq_start_timer(idev);
1119 in6_dev_put(idev);
1120 return 0;
1121 }
1122 /* mark sources to include, if group & source-specific */
1123 mark = mlh2->nsrcs != 0;
1124 } else {
1125 in6_dev_put(idev);
1126 return -EINVAL;
1127 }
1128
1129 read_lock_bh(&idev->lock);
1130 if (group_type == IPV6_ADDR_ANY) {
1131 for (ma = idev->mc_list; ma; ma=ma->next) {
1132 spin_lock_bh(&ma->mca_lock);
1133 igmp6_group_queried(ma, max_delay);
1134 spin_unlock_bh(&ma->mca_lock);
1135 }
1136 } else {
1137 for (ma = idev->mc_list; ma; ma=ma->next) {
1138 if (group_type != IPV6_ADDR_ANY &&
1139 !ipv6_addr_equal(group, &ma->mca_addr))
1140 continue;
1141 spin_lock_bh(&ma->mca_lock);
1142 if (ma->mca_flags & MAF_TIMER_RUNNING) {
1143 /* gsquery <- gsquery && mark */
1144 if (!mark)
1145 ma->mca_flags &= ~MAF_GSQUERY;
1146 } else {
1147 /* gsquery <- mark */
1148 if (mark)
1149 ma->mca_flags |= MAF_GSQUERY;
1150 else
1151 ma->mca_flags &= ~MAF_GSQUERY;
1152 }
1153 if (ma->mca_flags & MAF_GSQUERY)
1154 mld_marksources(ma, ntohs(mlh2->nsrcs),
1155 mlh2->srcs);
1156 igmp6_group_queried(ma, max_delay);
1157 spin_unlock_bh(&ma->mca_lock);
1158 if (group_type != IPV6_ADDR_ANY)
1159 break;
1160 }
1161 }
1162 read_unlock_bh(&idev->lock);
1163 in6_dev_put(idev);
1164
1165 return 0;
1166}
1167
1168
1169int igmp6_event_report(struct sk_buff *skb)
1170{
1171 struct ifmcaddr6 *ma;
1172 struct in6_addr *addrp;
1173 struct inet6_dev *idev;
1174 struct icmp6hdr *hdr;
1175 int addr_type;
1176
1177 /* Our own report looped back. Ignore it. */
1178 if (skb->pkt_type == PACKET_LOOPBACK)
1179 return 0;
1180
1181 if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
1182 return -EINVAL;
1183
1184 hdr = (struct icmp6hdr*) skb->h.raw;
1185
1186 /* Drop reports with not link local source */
1187 addr_type = ipv6_addr_type(&skb->nh.ipv6h->saddr);
1188 if (addr_type != IPV6_ADDR_ANY &&
1189 !(addr_type&IPV6_ADDR_LINKLOCAL))
1190 return -EINVAL;
1191
1192 addrp = (struct in6_addr *) (hdr + 1);
1193
1194 idev = in6_dev_get(skb->dev);
1195 if (idev == NULL)
1196 return -ENODEV;
1197
1198 /*
1199 * Cancel the timer for this group
1200 */
1201
1202 read_lock_bh(&idev->lock);
1203 for (ma = idev->mc_list; ma; ma=ma->next) {
1204 if (ipv6_addr_equal(&ma->mca_addr, addrp)) {
1205 spin_lock(&ma->mca_lock);
1206 if (del_timer(&ma->mca_timer))
1207 atomic_dec(&ma->mca_refcnt);
1208 ma->mca_flags &= ~(MAF_LAST_REPORTER|MAF_TIMER_RUNNING);
1209 spin_unlock(&ma->mca_lock);
1210 break;
1211 }
1212 }
1213 read_unlock_bh(&idev->lock);
1214 in6_dev_put(idev);
1215 return 0;
1216}
1217
1218static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
1219 int gdeleted, int sdeleted)
1220{
1221 switch (type) {
1222 case MLD2_MODE_IS_INCLUDE:
1223 case MLD2_MODE_IS_EXCLUDE:
1224 if (gdeleted || sdeleted)
1225 return 0;
1226 return !((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp);
1227 case MLD2_CHANGE_TO_INCLUDE:
1228 if (gdeleted || sdeleted)
1229 return 0;
1230 return psf->sf_count[MCAST_INCLUDE] != 0;
1231 case MLD2_CHANGE_TO_EXCLUDE:
1232 if (gdeleted || sdeleted)
1233 return 0;
1234 if (pmc->mca_sfcount[MCAST_EXCLUDE] == 0 ||
1235 psf->sf_count[MCAST_INCLUDE])
1236 return 0;
1237 return pmc->mca_sfcount[MCAST_EXCLUDE] ==
1238 psf->sf_count[MCAST_EXCLUDE];
1239 case MLD2_ALLOW_NEW_SOURCES:
1240 if (gdeleted || !psf->sf_crcount)
1241 return 0;
1242 return (pmc->mca_sfmode == MCAST_INCLUDE) ^ sdeleted;
1243 case MLD2_BLOCK_OLD_SOURCES:
1244 if (pmc->mca_sfmode == MCAST_INCLUDE)
1245 return gdeleted || (psf->sf_crcount && sdeleted);
1246 return psf->sf_crcount && !gdeleted && !sdeleted;
1247 }
1248 return 0;
1249}
1250
1251static int
1252mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
1253{
1254 struct ip6_sf_list *psf;
1255 int scount = 0;
1256
1257 for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
1258 if (!is_in(pmc, psf, type, gdeleted, sdeleted))
1259 continue;
1260 scount++;
1261 }
1262 return scount;
1263}
1264
1265static struct sk_buff *mld_newpack(struct net_device *dev, int size)
1266{
1267 struct sock *sk = igmp6_socket->sk;
1268 struct sk_buff *skb;
1269 struct mld2_report *pmr;
1270 struct in6_addr addr_buf;
1271 int err;
1272 u8 ra[8] = { IPPROTO_ICMPV6, 0,
1273 IPV6_TLV_ROUTERALERT, 2, 0, 0,
1274 IPV6_TLV_PADN, 0 };
1275
1276 /* we assume size > sizeof(ra) here */
1277 skb = sock_alloc_send_skb(sk, size + LL_RESERVED_SPACE(dev), 1, &err);
1278
1279 if (skb == 0)
1280 return NULL;
1281
1282 skb_reserve(skb, LL_RESERVED_SPACE(dev));
1283 if (dev->hard_header) {
1284 unsigned char ha[MAX_ADDR_LEN];
1285
1286 ndisc_mc_map(&mld2_all_mcr, ha, dev, 1);
1287 if (dev->hard_header(skb, dev, ETH_P_IPV6,ha,NULL,size) < 0) {
1288 kfree_skb(skb);
1289 return NULL;
1290 }
1291 }
1292
1293 if (ipv6_get_lladdr(dev, &addr_buf)) {
1294 /* <draft-ietf-magma-mld-source-05.txt>:
1295 * use unspecified address as the source address
1296 * when a valid link-local address is not available.
1297 */
1298 memset(&addr_buf, 0, sizeof(addr_buf));
1299 }
1300
1301 ip6_nd_hdr(sk, skb, dev, &addr_buf, &mld2_all_mcr, NEXTHDR_HOP, 0);
1302
1303 memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
1304
1305 pmr =(struct mld2_report *)skb_put(skb, sizeof(*pmr));
1306 skb->h.raw = (unsigned char *)pmr;
1307 pmr->type = ICMPV6_MLD2_REPORT;
1308 pmr->resv1 = 0;
1309 pmr->csum = 0;
1310 pmr->resv2 = 0;
1311 pmr->ngrec = 0;
1312 return skb;
1313}
1314
1315static void mld_sendpack(struct sk_buff *skb)
1316{
1317 struct ipv6hdr *pip6 = skb->nh.ipv6h;
1318 struct mld2_report *pmr = (struct mld2_report *)skb->h.raw;
1319 int payload_len, mldlen;
1320 struct inet6_dev *idev = in6_dev_get(skb->dev);
1321 int err;
1322
1323 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1324 payload_len = skb->tail - (unsigned char *)skb->nh.ipv6h -
1325 sizeof(struct ipv6hdr);
1326 mldlen = skb->tail - skb->h.raw;
1327 pip6->payload_len = htons(payload_len);
1328
1329 pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
1330 IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0));
1331 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
1332 dev_queue_xmit);
1333 if (!err) {
1334 ICMP6_INC_STATS(idev,ICMP6_MIB_OUTMSGS);
1335 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
1336 } else
1337 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1338
1339 if (likely(idev != NULL))
1340 in6_dev_put(idev);
1341}
1342
1343static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
1344{
1345 return sizeof(struct mld2_grec) + 4*mld_scount(pmc,type,gdel,sdel);
1346}
1347
1348static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1349 int type, struct mld2_grec **ppgr)
1350{
1351 struct net_device *dev = pmc->idev->dev;
1352 struct mld2_report *pmr;
1353 struct mld2_grec *pgr;
1354
1355 if (!skb)
1356 skb = mld_newpack(dev, dev->mtu);
1357 if (!skb)
1358 return NULL;
1359 pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
1360 pgr->grec_type = type;
1361 pgr->grec_auxwords = 0;
1362 pgr->grec_nsrcs = 0;
1363 pgr->grec_mca = pmc->mca_addr; /* structure copy */
1364 pmr = (struct mld2_report *)skb->h.raw;
1365 pmr->ngrec = htons(ntohs(pmr->ngrec)+1);
1366 *ppgr = pgr;
1367 return skb;
1368}
1369
1370#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \
1371 skb_tailroom(skb)) : 0)
1372
1373static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1374 int type, int gdeleted, int sdeleted)
1375{
1376 struct net_device *dev = pmc->idev->dev;
1377 struct mld2_report *pmr;
1378 struct mld2_grec *pgr = NULL;
1379 struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
1380 int scount, first, isquery, truncate;
1381
1382 if (pmc->mca_flags & MAF_NOREPORT)
1383 return skb;
1384
1385 isquery = type == MLD2_MODE_IS_INCLUDE ||
1386 type == MLD2_MODE_IS_EXCLUDE;
1387 truncate = type == MLD2_MODE_IS_EXCLUDE ||
1388 type == MLD2_CHANGE_TO_EXCLUDE;
1389
1390 psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources;
1391
1392 if (!*psf_list) {
1393 if (type == MLD2_ALLOW_NEW_SOURCES ||
1394 type == MLD2_BLOCK_OLD_SOURCES)
1395 return skb;
1396 if (pmc->mca_crcount || isquery) {
1397 /* make sure we have room for group header and at
1398 * least one source.
1399 */
1400 if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)+
1401 sizeof(struct in6_addr)) {
1402 mld_sendpack(skb);
1403 skb = NULL; /* add_grhead will get a new one */
1404 }
1405 skb = add_grhead(skb, pmc, type, &pgr);
1406 }
1407 return skb;
1408 }
1409 pmr = skb ? (struct mld2_report *)skb->h.raw : NULL;
1410
1411 /* EX and TO_EX get a fresh packet, if needed */
1412 if (truncate) {
1413 if (pmr && pmr->ngrec &&
1414 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
1415 if (skb)
1416 mld_sendpack(skb);
1417 skb = mld_newpack(dev, dev->mtu);
1418 }
1419 }
1420 first = 1;
1421 scount = 0;
1422 psf_prev = NULL;
1423 for (psf=*psf_list; psf; psf=psf_next) {
1424 struct in6_addr *psrc;
1425
1426 psf_next = psf->sf_next;
1427
1428 if (!is_in(pmc, psf, type, gdeleted, sdeleted)) {
1429 psf_prev = psf;
1430 continue;
1431 }
1432
1433 /* clear marks on query responses */
1434 if (isquery)
1435 psf->sf_gsresp = 0;
1436
1437 if (AVAILABLE(skb) < sizeof(*psrc) +
1438 first*sizeof(struct mld2_grec)) {
1439 if (truncate && !first)
1440 break; /* truncate these */
1441 if (pgr)
1442 pgr->grec_nsrcs = htons(scount);
1443 if (skb)
1444 mld_sendpack(skb);
1445 skb = mld_newpack(dev, dev->mtu);
1446 first = 1;
1447 scount = 0;
1448 }
1449 if (first) {
1450 skb = add_grhead(skb, pmc, type, &pgr);
1451 first = 0;
1452 }
1453 psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc));
1454 *psrc = psf->sf_addr;
1455 scount++;
1456 if ((type == MLD2_ALLOW_NEW_SOURCES ||
1457 type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
1458 psf->sf_crcount--;
1459 if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
1460 if (psf_prev)
1461 psf_prev->sf_next = psf->sf_next;
1462 else
1463 *psf_list = psf->sf_next;
1464 kfree(psf);
1465 continue;
1466 }
1467 }
1468 psf_prev = psf;
1469 }
1470 if (pgr)
1471 pgr->grec_nsrcs = htons(scount);
1472
1473 if (isquery)
1474 pmc->mca_flags &= ~MAF_GSQUERY; /* clear query state */
1475 return skb;
1476}
1477
1478static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
1479{
1480 struct sk_buff *skb = NULL;
1481 int type;
1482
1483 if (!pmc) {
1484 read_lock_bh(&idev->lock);
1485 for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
1486 if (pmc->mca_flags & MAF_NOREPORT)
1487 continue;
1488 spin_lock_bh(&pmc->mca_lock);
1489 if (pmc->mca_sfcount[MCAST_EXCLUDE])
1490 type = MLD2_MODE_IS_EXCLUDE;
1491 else
1492 type = MLD2_MODE_IS_INCLUDE;
1493 skb = add_grec(skb, pmc, type, 0, 0);
1494 spin_unlock_bh(&pmc->mca_lock);
1495 }
1496 read_unlock_bh(&idev->lock);
1497 } else {
1498 spin_lock_bh(&pmc->mca_lock);
1499 if (pmc->mca_sfcount[MCAST_EXCLUDE])
1500 type = MLD2_MODE_IS_EXCLUDE;
1501 else
1502 type = MLD2_MODE_IS_INCLUDE;
1503 skb = add_grec(skb, pmc, type, 0, 0);
1504 spin_unlock_bh(&pmc->mca_lock);
1505 }
1506 if (skb)
1507 mld_sendpack(skb);
1508}
1509
1510/*
1511 * remove zero-count source records from a source filter list
1512 */
1513static void mld_clear_zeros(struct ip6_sf_list **ppsf)
1514{
1515 struct ip6_sf_list *psf_prev, *psf_next, *psf;
1516
1517 psf_prev = NULL;
1518 for (psf=*ppsf; psf; psf = psf_next) {
1519 psf_next = psf->sf_next;
1520 if (psf->sf_crcount == 0) {
1521 if (psf_prev)
1522 psf_prev->sf_next = psf->sf_next;
1523 else
1524 *ppsf = psf->sf_next;
1525 kfree(psf);
1526 } else
1527 psf_prev = psf;
1528 }
1529}
1530
1531static void mld_send_cr(struct inet6_dev *idev)
1532{
1533 struct ifmcaddr6 *pmc, *pmc_prev, *pmc_next;
1534 struct sk_buff *skb = NULL;
1535 int type, dtype;
1536
1537 read_lock_bh(&idev->lock);
1538 write_lock_bh(&idev->mc_lock);
1539
1540 /* deleted MCA's */
1541 pmc_prev = NULL;
1542 for (pmc=idev->mc_tomb; pmc; pmc=pmc_next) {
1543 pmc_next = pmc->next;
1544 if (pmc->mca_sfmode == MCAST_INCLUDE) {
1545 type = MLD2_BLOCK_OLD_SOURCES;
1546 dtype = MLD2_BLOCK_OLD_SOURCES;
1547 skb = add_grec(skb, pmc, type, 1, 0);
1548 skb = add_grec(skb, pmc, dtype, 1, 1);
1549 }
1550 if (pmc->mca_crcount) {
1551 pmc->mca_crcount--;
1552 if (pmc->mca_sfmode == MCAST_EXCLUDE) {
1553 type = MLD2_CHANGE_TO_INCLUDE;
1554 skb = add_grec(skb, pmc, type, 1, 0);
1555 }
1556 if (pmc->mca_crcount == 0) {
1557 mld_clear_zeros(&pmc->mca_tomb);
1558 mld_clear_zeros(&pmc->mca_sources);
1559 }
1560 }
1561 if (pmc->mca_crcount == 0 && !pmc->mca_tomb &&
1562 !pmc->mca_sources) {
1563 if (pmc_prev)
1564 pmc_prev->next = pmc_next;
1565 else
1566 idev->mc_tomb = pmc_next;
1567 in6_dev_put(pmc->idev);
1568 kfree(pmc);
1569 } else
1570 pmc_prev = pmc;
1571 }
1572 write_unlock_bh(&idev->mc_lock);
1573
1574 /* change recs */
1575 for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
1576 spin_lock_bh(&pmc->mca_lock);
1577 if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
1578 type = MLD2_BLOCK_OLD_SOURCES;
1579 dtype = MLD2_ALLOW_NEW_SOURCES;
1580 } else {
1581 type = MLD2_ALLOW_NEW_SOURCES;
1582 dtype = MLD2_BLOCK_OLD_SOURCES;
1583 }
1584 skb = add_grec(skb, pmc, type, 0, 0);
1585 skb = add_grec(skb, pmc, dtype, 0, 1); /* deleted sources */
1586
1587 /* filter mode changes */
1588 if (pmc->mca_crcount) {
1589 pmc->mca_crcount--;
1590 if (pmc->mca_sfmode == MCAST_EXCLUDE)
1591 type = MLD2_CHANGE_TO_EXCLUDE;
1592 else
1593 type = MLD2_CHANGE_TO_INCLUDE;
1594 skb = add_grec(skb, pmc, type, 0, 0);
1595 }
1596 spin_unlock_bh(&pmc->mca_lock);
1597 }
1598 read_unlock_bh(&idev->lock);
1599 if (!skb)
1600 return;
1601 (void) mld_sendpack(skb);
1602}
1603
1604static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1605{
1606 struct sock *sk = igmp6_socket->sk;
1607 struct inet6_dev *idev;
1608 struct sk_buff *skb;
1609 struct icmp6hdr *hdr;
1610 struct in6_addr *snd_addr;
1611 struct in6_addr *addrp;
1612 struct in6_addr addr_buf;
1613 struct in6_addr all_routers;
1614 int err, len, payload_len, full_len;
1615 u8 ra[8] = { IPPROTO_ICMPV6, 0,
1616 IPV6_TLV_ROUTERALERT, 2, 0, 0,
1617 IPV6_TLV_PADN, 0 };
1618
1619 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1620 snd_addr = addr;
1621 if (type == ICMPV6_MGM_REDUCTION) {
1622 snd_addr = &all_routers;
1623 ipv6_addr_all_routers(&all_routers);
1624 }
1625
1626 len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
1627 payload_len = len + sizeof(ra);
1628 full_len = sizeof(struct ipv6hdr) + payload_len;
1629
1630 skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err);
1631
1632 if (skb == NULL) {
1633 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1634 return;
1635 }
1636
1637 skb_reserve(skb, LL_RESERVED_SPACE(dev));
1638 if (dev->hard_header) {
1639 unsigned char ha[MAX_ADDR_LEN];
1640 ndisc_mc_map(snd_addr, ha, dev, 1);
1641 if (dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, full_len) < 0)
1642 goto out;
1643 }
1644
1645 if (ipv6_get_lladdr(dev, &addr_buf)) {
1646 /* <draft-ietf-magma-mld-source-05.txt>:
1647 * use unspecified address as the source address
1648 * when a valid link-local address is not available.
1649 */
1650 memset(&addr_buf, 0, sizeof(addr_buf));
1651 }
1652
1653 ip6_nd_hdr(sk, skb, dev, &addr_buf, snd_addr, NEXTHDR_HOP, payload_len);
1654
1655 memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
1656
1657 hdr = (struct icmp6hdr *) skb_put(skb, sizeof(struct icmp6hdr));
1658 memset(hdr, 0, sizeof(struct icmp6hdr));
1659 hdr->icmp6_type = type;
1660
1661 addrp = (struct in6_addr *) skb_put(skb, sizeof(struct in6_addr));
1662 ipv6_addr_copy(addrp, addr);
1663
1664 hdr->icmp6_cksum = csum_ipv6_magic(&addr_buf, snd_addr, len,
1665 IPPROTO_ICMPV6,
1666 csum_partial((__u8 *) hdr, len, 0));
1667
1668 idev = in6_dev_get(skb->dev);
1669
1670 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
1671 dev_queue_xmit);
1672 if (!err) {
1673 if (type == ICMPV6_MGM_REDUCTION)
1674 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBREDUCTIONS);
1675 else
1676 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBRESPONSES);
1677 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
1678 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
1679 } else
1680 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1681
1682 if (likely(idev != NULL))
1683 in6_dev_put(idev);
1684 return;
1685
1686out:
1687 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1688 kfree_skb(skb);
1689}
1690
1691static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
1692 struct in6_addr *psfsrc)
1693{
1694 struct ip6_sf_list *psf, *psf_prev;
1695 int rv = 0;
1696
1697 psf_prev = NULL;
1698 for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
1699 if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
1700 break;
1701 psf_prev = psf;
1702 }
1703 if (!psf || psf->sf_count[sfmode] == 0) {
1704 /* source filter not found, or count wrong => bug */
1705 return -ESRCH;
1706 }
1707 psf->sf_count[sfmode]--;
1708 if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
1709 struct inet6_dev *idev = pmc->idev;
1710
1711 /* no more filters for this source */
1712 if (psf_prev)
1713 psf_prev->sf_next = psf->sf_next;
1714 else
1715 pmc->mca_sources = psf->sf_next;
1716 if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) &&
1717 !MLD_V1_SEEN(idev)) {
1718 psf->sf_crcount = idev->mc_qrv;
1719 psf->sf_next = pmc->mca_tomb;
1720 pmc->mca_tomb = psf;
1721 rv = 1;
1722 } else
1723 kfree(psf);
1724 }
1725 return rv;
1726}
1727
1728static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca,
1729 int sfmode, int sfcount, struct in6_addr *psfsrc,
1730 int delta)
1731{
1732 struct ifmcaddr6 *pmc;
1733 int changerec = 0;
1734 int i, err;
1735
1736 if (!idev)
1737 return -ENODEV;
1738 read_lock_bh(&idev->lock);
1739 for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
1740 if (ipv6_addr_equal(pmca, &pmc->mca_addr))
1741 break;
1742 }
1743 if (!pmc) {
1744 /* MCA not found?? bug */
1745 read_unlock_bh(&idev->lock);
1746 return -ESRCH;
1747 }
1748 spin_lock_bh(&pmc->mca_lock);
1749 sf_markstate(pmc);
1750 if (!delta) {
1751 if (!pmc->mca_sfcount[sfmode]) {
1752 spin_unlock_bh(&pmc->mca_lock);
1753 read_unlock_bh(&idev->lock);
1754 return -EINVAL;
1755 }
1756 pmc->mca_sfcount[sfmode]--;
1757 }
1758 err = 0;
1759 for (i=0; i<sfcount; i++) {
1760 int rv = ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]);
1761
1762 changerec |= rv > 0;
1763 if (!err && rv < 0)
1764 err = rv;
1765 }
1766 if (pmc->mca_sfmode == MCAST_EXCLUDE &&
1767 pmc->mca_sfcount[MCAST_EXCLUDE] == 0 &&
1768 pmc->mca_sfcount[MCAST_INCLUDE]) {
1769 struct ip6_sf_list *psf;
1770
1771 /* filter mode change */
1772 pmc->mca_sfmode = MCAST_INCLUDE;
1773 pmc->mca_crcount = idev->mc_qrv;
1774 idev->mc_ifc_count = pmc->mca_crcount;
1775 for (psf=pmc->mca_sources; psf; psf = psf->sf_next)
1776 psf->sf_crcount = 0;
1777 mld_ifc_event(pmc->idev);
1778 } else if (sf_setstate(pmc) || changerec)
1779 mld_ifc_event(pmc->idev);
1780 spin_unlock_bh(&pmc->mca_lock);
1781 read_unlock_bh(&idev->lock);
1782 return err;
1783}
1784
1785/*
1786 * Add multicast single-source filter to the interface list
1787 */
1788static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
1789 struct in6_addr *psfsrc, int delta)
1790{
1791 struct ip6_sf_list *psf, *psf_prev;
1792
1793 psf_prev = NULL;
1794 for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
1795 if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
1796 break;
1797 psf_prev = psf;
1798 }
1799 if (!psf) {
1800 psf = (struct ip6_sf_list *)kmalloc(sizeof(*psf), GFP_ATOMIC);
1801 if (!psf)
1802 return -ENOBUFS;
1803 memset(psf, 0, sizeof(*psf));
1804 psf->sf_addr = *psfsrc;
1805 if (psf_prev) {
1806 psf_prev->sf_next = psf;
1807 } else
1808 pmc->mca_sources = psf;
1809 }
1810 psf->sf_count[sfmode]++;
1811 return 0;
1812}
1813
1814static void sf_markstate(struct ifmcaddr6 *pmc)
1815{
1816 struct ip6_sf_list *psf;
1817 int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
1818
1819 for (psf=pmc->mca_sources; psf; psf=psf->sf_next)
1820 if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
1821 psf->sf_oldin = mca_xcount ==
1822 psf->sf_count[MCAST_EXCLUDE] &&
1823 !psf->sf_count[MCAST_INCLUDE];
1824 } else
1825 psf->sf_oldin = psf->sf_count[MCAST_INCLUDE] != 0;
1826}
1827
1828static int sf_setstate(struct ifmcaddr6 *pmc)
1829{
1830 struct ip6_sf_list *psf;
1831 int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
1832 int qrv = pmc->idev->mc_qrv;
1833 int new_in, rv;
1834
1835 rv = 0;
1836 for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
1837 if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
1838 new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] &&
1839 !psf->sf_count[MCAST_INCLUDE];
1840 } else
1841 new_in = psf->sf_count[MCAST_INCLUDE] != 0;
1842 if (new_in != psf->sf_oldin) {
1843 psf->sf_crcount = qrv;
1844 rv++;
1845 }
1846 }
1847 return rv;
1848}
1849
1850/*
1851 * Add multicast source filter list to the interface list
1852 */
1853static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
1854 int sfmode, int sfcount, struct in6_addr *psfsrc,
1855 int delta)
1856{
1857 struct ifmcaddr6 *pmc;
1858 int isexclude;
1859 int i, err;
1860
1861 if (!idev)
1862 return -ENODEV;
1863 read_lock_bh(&idev->lock);
1864 for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
1865 if (ipv6_addr_equal(pmca, &pmc->mca_addr))
1866 break;
1867 }
1868 if (!pmc) {
1869 /* MCA not found?? bug */
1870 read_unlock_bh(&idev->lock);
1871 return -ESRCH;
1872 }
1873 spin_lock_bh(&pmc->mca_lock);
1874
1875 sf_markstate(pmc);
1876 isexclude = pmc->mca_sfmode == MCAST_EXCLUDE;
1877 if (!delta)
1878 pmc->mca_sfcount[sfmode]++;
1879 err = 0;
1880 for (i=0; i<sfcount; i++) {
1881 err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i], delta);
1882 if (err)
1883 break;
1884 }
1885 if (err) {
1886 int j;
1887
1888 if (!delta)
1889 pmc->mca_sfcount[sfmode]--;
1890 for (j=0; j<i; j++)
1891 (void) ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]);
1892 } else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
1893 struct inet6_dev *idev = pmc->idev;
1894 struct ip6_sf_list *psf;
1895
1896 /* filter mode change */
1897 if (pmc->mca_sfcount[MCAST_EXCLUDE])
1898 pmc->mca_sfmode = MCAST_EXCLUDE;
1899 else if (pmc->mca_sfcount[MCAST_INCLUDE])
1900 pmc->mca_sfmode = MCAST_INCLUDE;
1901 /* else no filters; keep old mode for reports */
1902
1903 pmc->mca_crcount = idev->mc_qrv;
1904 idev->mc_ifc_count = pmc->mca_crcount;
1905 for (psf=pmc->mca_sources; psf; psf = psf->sf_next)
1906 psf->sf_crcount = 0;
1907 mld_ifc_event(idev);
1908 } else if (sf_setstate(pmc))
1909 mld_ifc_event(idev);
1910 spin_unlock_bh(&pmc->mca_lock);
1911 read_unlock_bh(&idev->lock);
1912 return err;
1913}
1914
1915static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
1916{
1917 struct ip6_sf_list *psf, *nextpsf;
1918
1919 for (psf=pmc->mca_tomb; psf; psf=nextpsf) {
1920 nextpsf = psf->sf_next;
1921 kfree(psf);
1922 }
1923 pmc->mca_tomb = NULL;
1924 for (psf=pmc->mca_sources; psf; psf=nextpsf) {
1925 nextpsf = psf->sf_next;
1926 kfree(psf);
1927 }
1928 pmc->mca_sources = NULL;
1929 pmc->mca_sfmode = MCAST_EXCLUDE;
1930 pmc->mca_sfcount[MCAST_EXCLUDE] = 0;
1931 pmc->mca_sfcount[MCAST_EXCLUDE] = 1;
1932}
1933
1934
1935static void igmp6_join_group(struct ifmcaddr6 *ma)
1936{
1937 unsigned long delay;
1938
1939 if (ma->mca_flags & MAF_NOREPORT)
1940 return;
1941
1942 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
1943
1944 delay = net_random() % IGMP6_UNSOLICITED_IVAL;
1945
1946 spin_lock_bh(&ma->mca_lock);
1947 if (del_timer(&ma->mca_timer)) {
1948 atomic_dec(&ma->mca_refcnt);
1949 delay = ma->mca_timer.expires - jiffies;
1950 }
1951
1952 if (!mod_timer(&ma->mca_timer, jiffies + delay))
1953 atomic_inc(&ma->mca_refcnt);
1954 ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER;
1955 spin_unlock_bh(&ma->mca_lock);
1956}
1957
1958static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
1959 struct inet6_dev *idev)
1960{
1961 int err;
1962
1963 if (iml->sflist == 0) {
1964 /* any-source empty exclude case */
1965 return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
1966 }
1967 err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
1968 iml->sflist->sl_count, iml->sflist->sl_addr, 0);
1969 sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
1970 iml->sflist = NULL;
1971 return err;
1972}
1973
1974static void igmp6_leave_group(struct ifmcaddr6 *ma)
1975{
1976 if (MLD_V1_SEEN(ma->idev)) {
1977 if (ma->mca_flags & MAF_LAST_REPORTER)
1978 igmp6_send(&ma->mca_addr, ma->idev->dev,
1979 ICMPV6_MGM_REDUCTION);
1980 } else {
1981 mld_add_delrec(ma->idev, ma);
1982 mld_ifc_event(ma->idev);
1983 }
1984}
1985
1986static void mld_gq_timer_expire(unsigned long data)
1987{
1988 struct inet6_dev *idev = (struct inet6_dev *)data;
1989
1990 idev->mc_gq_running = 0;
1991 mld_send_report(idev, NULL);
1992 __in6_dev_put(idev);
1993}
1994
1995static void mld_ifc_timer_expire(unsigned long data)
1996{
1997 struct inet6_dev *idev = (struct inet6_dev *)data;
1998
1999 mld_send_cr(idev);
2000 if (idev->mc_ifc_count) {
2001 idev->mc_ifc_count--;
2002 if (idev->mc_ifc_count)
2003 mld_ifc_start_timer(idev, idev->mc_maxdelay);
2004 }
2005 __in6_dev_put(idev);
2006}
2007
2008static void mld_ifc_event(struct inet6_dev *idev)
2009{
2010 if (MLD_V1_SEEN(idev))
2011 return;
2012 idev->mc_ifc_count = idev->mc_qrv;
2013 mld_ifc_start_timer(idev, 1);
2014}
2015
2016
2017static void igmp6_timer_handler(unsigned long data)
2018{
2019 struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
2020
2021 if (MLD_V1_SEEN(ma->idev))
2022 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
2023 else
2024 mld_send_report(ma->idev, ma);
2025
2026 spin_lock(&ma->mca_lock);
2027 ma->mca_flags |= MAF_LAST_REPORTER;
2028 ma->mca_flags &= ~MAF_TIMER_RUNNING;
2029 spin_unlock(&ma->mca_lock);
2030 ma_put(ma);
2031}
2032
2033/* Device going down */
2034
2035void ipv6_mc_down(struct inet6_dev *idev)
2036{
2037 struct ifmcaddr6 *i;
2038
2039 /* Withdraw multicast list */
2040
2041 read_lock_bh(&idev->lock);
2042 idev->mc_ifc_count = 0;
2043 if (del_timer(&idev->mc_ifc_timer))
2044 __in6_dev_put(idev);
2045 idev->mc_gq_running = 0;
2046 if (del_timer(&idev->mc_gq_timer))
2047 __in6_dev_put(idev);
2048
2049 for (i = idev->mc_list; i; i=i->next)
2050 igmp6_group_dropped(i);
2051 read_unlock_bh(&idev->lock);
2052
2053 mld_clear_delrec(idev);
2054}
2055
2056
2057/* Device going up */
2058
2059void ipv6_mc_up(struct inet6_dev *idev)
2060{
2061 struct ifmcaddr6 *i;
2062
2063 /* Install multicast list, except for all-nodes (already installed) */
2064
2065 read_lock_bh(&idev->lock);
2066 for (i = idev->mc_list; i; i=i->next)
2067 igmp6_group_added(i);
2068 read_unlock_bh(&idev->lock);
2069}
2070
2071/* IPv6 device initialization. */
2072
2073void ipv6_mc_init_dev(struct inet6_dev *idev)
2074{
2075 struct in6_addr maddr;
2076
2077 write_lock_bh(&idev->lock);
2078 rwlock_init(&idev->mc_lock);
2079 idev->mc_gq_running = 0;
2080 init_timer(&idev->mc_gq_timer);
2081 idev->mc_gq_timer.data = (unsigned long) idev;
2082 idev->mc_gq_timer.function = &mld_gq_timer_expire;
2083 idev->mc_tomb = NULL;
2084 idev->mc_ifc_count = 0;
2085 init_timer(&idev->mc_ifc_timer);
2086 idev->mc_ifc_timer.data = (unsigned long) idev;
2087 idev->mc_ifc_timer.function = &mld_ifc_timer_expire;
2088 idev->mc_qrv = MLD_QRV_DEFAULT;
2089 idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL;
2090 idev->mc_v1_seen = 0;
2091 write_unlock_bh(&idev->lock);
2092
2093 /* Add all-nodes address. */
2094 ipv6_addr_all_nodes(&maddr);
2095 ipv6_dev_mc_inc(idev->dev, &maddr);
2096}
2097
2098/*
2099 * Device is about to be destroyed: clean up.
2100 */
2101
2102void ipv6_mc_destroy_dev(struct inet6_dev *idev)
2103{
2104 struct ifmcaddr6 *i;
2105 struct in6_addr maddr;
2106
2107 /* Deactivate timers */
2108 ipv6_mc_down(idev);
2109
2110 /* Delete all-nodes address. */
2111 ipv6_addr_all_nodes(&maddr);
2112
2113 /* We cannot call ipv6_dev_mc_dec() directly, our caller in
2114 * addrconf.c has NULL'd out dev->ip6_ptr so in6_dev_get() will
2115 * fail.
2116 */
2117 __ipv6_dev_mc_dec(idev, &maddr);
2118
2119 if (idev->cnf.forwarding) {
2120 ipv6_addr_all_routers(&maddr);
2121 __ipv6_dev_mc_dec(idev, &maddr);
2122 }
2123
2124 write_lock_bh(&idev->lock);
2125 while ((i = idev->mc_list) != NULL) {
2126 idev->mc_list = i->next;
2127 write_unlock_bh(&idev->lock);
2128
2129 igmp6_group_dropped(i);
2130 ma_put(i);
2131
2132 write_lock_bh(&idev->lock);
2133 }
2134 write_unlock_bh(&idev->lock);
2135}
2136
2137#ifdef CONFIG_PROC_FS
2138struct igmp6_mc_iter_state {
2139 struct net_device *dev;
2140 struct inet6_dev *idev;
2141};
2142
2143#define igmp6_mc_seq_private(seq) ((struct igmp6_mc_iter_state *)(seq)->private)
2144
2145static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
2146{
2147 struct ifmcaddr6 *im = NULL;
2148 struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
2149
2150 for (state->dev = dev_base, state->idev = NULL;
2151 state->dev;
2152 state->dev = state->dev->next) {
2153 struct inet6_dev *idev;
2154 idev = in6_dev_get(state->dev);
2155 if (!idev)
2156 continue;
2157 read_lock_bh(&idev->lock);
2158 im = idev->mc_list;
2159 if (im) {
2160 state->idev = idev;
2161 break;
2162 }
2163 read_unlock_bh(&idev->lock);
2164 in6_dev_put(idev);
2165 }
2166 return im;
2167}
2168
2169static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr6 *im)
2170{
2171 struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
2172
2173 im = im->next;
2174 while (!im) {
2175 if (likely(state->idev != NULL)) {
2176 read_unlock_bh(&state->idev->lock);
2177 in6_dev_put(state->idev);
2178 }
2179 state->dev = state->dev->next;
2180 if (!state->dev) {
2181 state->idev = NULL;
2182 break;
2183 }
2184 state->idev = in6_dev_get(state->dev);
2185 if (!state->idev)
2186 continue;
2187 read_lock_bh(&state->idev->lock);
2188 im = state->idev->mc_list;
2189 }
2190 return im;
2191}
2192
2193static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos)
2194{
2195 struct ifmcaddr6 *im = igmp6_mc_get_first(seq);
2196 if (im)
2197 while (pos && (im = igmp6_mc_get_next(seq, im)) != NULL)
2198 --pos;
2199 return pos ? NULL : im;
2200}
2201
2202static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos)
2203{
2204 read_lock(&dev_base_lock);
2205 return igmp6_mc_get_idx(seq, *pos);
2206}
2207
2208static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2209{
2210 struct ifmcaddr6 *im;
2211 im = igmp6_mc_get_next(seq, v);
2212 ++*pos;
2213 return im;
2214}
2215
2216static void igmp6_mc_seq_stop(struct seq_file *seq, void *v)
2217{
2218 struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
2219 if (likely(state->idev != NULL)) {
2220 read_unlock_bh(&state->idev->lock);
2221 in6_dev_put(state->idev);
2222 state->idev = NULL;
2223 }
2224 state->dev = NULL;
2225 read_unlock(&dev_base_lock);
2226}
2227
2228static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
2229{
2230 struct ifmcaddr6 *im = (struct ifmcaddr6 *)v;
2231 struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
2232
2233 seq_printf(seq,
2234 "%-4d %-15s %04x%04x%04x%04x%04x%04x%04x%04x %5d %08X %ld\n",
2235 state->dev->ifindex, state->dev->name,
2236 NIP6(im->mca_addr),
2237 im->mca_users, im->mca_flags,
2238 (im->mca_flags&MAF_TIMER_RUNNING) ?
2239 jiffies_to_clock_t(im->mca_timer.expires-jiffies) : 0);
2240 return 0;
2241}
2242
2243static struct seq_operations igmp6_mc_seq_ops = {
2244 .start = igmp6_mc_seq_start,
2245 .next = igmp6_mc_seq_next,
2246 .stop = igmp6_mc_seq_stop,
2247 .show = igmp6_mc_seq_show,
2248};
2249
2250static int igmp6_mc_seq_open(struct inode *inode, struct file *file)
2251{
2252 struct seq_file *seq;
2253 int rc = -ENOMEM;
2254 struct igmp6_mc_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
2255
2256 if (!s)
2257 goto out;
2258
2259 rc = seq_open(file, &igmp6_mc_seq_ops);
2260 if (rc)
2261 goto out_kfree;
2262
2263 seq = file->private_data;
2264 seq->private = s;
2265 memset(s, 0, sizeof(*s));
2266out:
2267 return rc;
2268out_kfree:
2269 kfree(s);
2270 goto out;
2271}
2272
2273static struct file_operations igmp6_mc_seq_fops = {
2274 .owner = THIS_MODULE,
2275 .open = igmp6_mc_seq_open,
2276 .read = seq_read,
2277 .llseek = seq_lseek,
2278 .release = seq_release_private,
2279};
2280
2281struct igmp6_mcf_iter_state {
2282 struct net_device *dev;
2283 struct inet6_dev *idev;
2284 struct ifmcaddr6 *im;
2285};
2286
2287#define igmp6_mcf_seq_private(seq) ((struct igmp6_mcf_iter_state *)(seq)->private)
2288
2289static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
2290{
2291 struct ip6_sf_list *psf = NULL;
2292 struct ifmcaddr6 *im = NULL;
2293 struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
2294
2295 for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
2296 state->dev;
2297 state->dev = state->dev->next) {
2298 struct inet6_dev *idev;
2299 idev = in6_dev_get(state->dev);
2300 if (unlikely(idev == NULL))
2301 continue;
2302 read_lock_bh(&idev->lock);
2303 im = idev->mc_list;
2304 if (likely(im != NULL)) {
2305 spin_lock_bh(&im->mca_lock);
2306 psf = im->mca_sources;
2307 if (likely(psf != NULL)) {
2308 state->im = im;
2309 state->idev = idev;
2310 break;
2311 }
2312 spin_unlock_bh(&im->mca_lock);
2313 }
2314 read_unlock_bh(&idev->lock);
2315 in6_dev_put(idev);
2316 }
2317 return psf;
2318}
2319
2320static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_sf_list *psf)
2321{
2322 struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
2323
2324 psf = psf->sf_next;
2325 while (!psf) {
2326 spin_unlock_bh(&state->im->mca_lock);
2327 state->im = state->im->next;
2328 while (!state->im) {
2329 if (likely(state->idev != NULL)) {
2330 read_unlock_bh(&state->idev->lock);
2331 in6_dev_put(state->idev);
2332 }
2333 state->dev = state->dev->next;
2334 if (!state->dev) {
2335 state->idev = NULL;
2336 goto out;
2337 }
2338 state->idev = in6_dev_get(state->dev);
2339 if (!state->idev)
2340 continue;
2341 read_lock_bh(&state->idev->lock);
2342 state->im = state->idev->mc_list;
2343 }
2344 if (!state->im)
2345 break;
2346 spin_lock_bh(&state->im->mca_lock);
2347 psf = state->im->mca_sources;
2348 }
2349out:
2350 return psf;
2351}
2352
2353static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos)
2354{
2355 struct ip6_sf_list *psf = igmp6_mcf_get_first(seq);
2356 if (psf)
2357 while (pos && (psf = igmp6_mcf_get_next(seq, psf)) != NULL)
2358 --pos;
2359 return pos ? NULL : psf;
2360}
2361
2362static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos)
2363{
2364 read_lock(&dev_base_lock);
2365 return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2366}
2367
2368static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2369{
2370 struct ip6_sf_list *psf;
2371 if (v == SEQ_START_TOKEN)
2372 psf = igmp6_mcf_get_first(seq);
2373 else
2374 psf = igmp6_mcf_get_next(seq, v);
2375 ++*pos;
2376 return psf;
2377}
2378
2379static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
2380{
2381 struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
2382 if (likely(state->im != NULL)) {
2383 spin_unlock_bh(&state->im->mca_lock);
2384 state->im = NULL;
2385 }
2386 if (likely(state->idev != NULL)) {
2387 read_unlock_bh(&state->idev->lock);
2388 in6_dev_put(state->idev);
2389 state->idev = NULL;
2390 }
2391 state->dev = NULL;
2392 read_unlock(&dev_base_lock);
2393}
2394
2395static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
2396{
2397 struct ip6_sf_list *psf = (struct ip6_sf_list *)v;
2398 struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
2399
2400 if (v == SEQ_START_TOKEN) {
2401 seq_printf(seq,
2402 "%3s %6s "
2403 "%32s %32s %6s %6s\n", "Idx",
2404 "Device", "Multicast Address",
2405 "Source Address", "INC", "EXC");
2406 } else {
2407 seq_printf(seq,
2408 "%3d %6.6s "
2409 "%04x%04x%04x%04x%04x%04x%04x%04x "
2410 "%04x%04x%04x%04x%04x%04x%04x%04x "
2411 "%6lu %6lu\n",
2412 state->dev->ifindex, state->dev->name,
2413 NIP6(state->im->mca_addr),
2414 NIP6(psf->sf_addr),
2415 psf->sf_count[MCAST_INCLUDE],
2416 psf->sf_count[MCAST_EXCLUDE]);
2417 }
2418 return 0;
2419}
2420
2421static struct seq_operations igmp6_mcf_seq_ops = {
2422 .start = igmp6_mcf_seq_start,
2423 .next = igmp6_mcf_seq_next,
2424 .stop = igmp6_mcf_seq_stop,
2425 .show = igmp6_mcf_seq_show,
2426};
2427
2428static int igmp6_mcf_seq_open(struct inode *inode, struct file *file)
2429{
2430 struct seq_file *seq;
2431 int rc = -ENOMEM;
2432 struct igmp6_mcf_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
2433
2434 if (!s)
2435 goto out;
2436
2437 rc = seq_open(file, &igmp6_mcf_seq_ops);
2438 if (rc)
2439 goto out_kfree;
2440
2441 seq = file->private_data;
2442 seq->private = s;
2443 memset(s, 0, sizeof(*s));
2444out:
2445 return rc;
2446out_kfree:
2447 kfree(s);
2448 goto out;
2449}
2450
2451static struct file_operations igmp6_mcf_seq_fops = {
2452 .owner = THIS_MODULE,
2453 .open = igmp6_mcf_seq_open,
2454 .read = seq_read,
2455 .llseek = seq_lseek,
2456 .release = seq_release_private,
2457};
2458#endif
2459
2460int __init igmp6_init(struct net_proto_family *ops)
2461{
2462 struct ipv6_pinfo *np;
2463 struct sock *sk;
2464 int err;
2465
2466 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &igmp6_socket);
2467 if (err < 0) {
2468 printk(KERN_ERR
2469 "Failed to initialize the IGMP6 control socket (err %d).\n",
2470 err);
2471 igmp6_socket = NULL; /* For safety. */
2472 return err;
2473 }
2474
2475 sk = igmp6_socket->sk;
2476 sk->sk_allocation = GFP_ATOMIC;
2477 sk->sk_prot->unhash(sk);
2478
2479 np = inet6_sk(sk);
2480 np->hop_limit = 1;
2481
2482#ifdef CONFIG_PROC_FS
2483 proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops);
2484 proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
2485#endif
2486
2487 return 0;
2488}
2489
2490void igmp6_cleanup(void)
2491{
2492 sock_release(igmp6_socket);
2493 igmp6_socket = NULL; /* for safety */
2494
2495#ifdef CONFIG_PROC_FS
2496 proc_net_remove("mcfilter6");
2497 proc_net_remove("igmp6");
2498#endif
2499}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
new file mode 100644
index 000000000000..7c291f4e9edc
--- /dev/null
+++ b/net/ipv6/ndisc.c
@@ -0,0 +1,1690 @@
1/*
2 * Neighbour Discovery for IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Mike Shaver <shaver@ingenia.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15/*
16 * Changes:
17 *
18 * Lars Fenneberg : fixed MTU setting on receipt
19 * of an RA.
20 *
21 * Janos Farkas : kmalloc failure checks
22 * Alexey Kuznetsov : state machine reworked
23 * and moved to net/core.
24 * Pekka Savola : RFC2461 validation
25 * YOSHIFUJI Hideaki @USAGI : Verify ND options properly
26 */
27
28/* Set to 3 to get tracing... */
29#define ND_DEBUG 1
30
31#define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
32#define ND_NOPRINTK(x...) do { ; } while(0)
33#define ND_PRINTK0 ND_PRINTK
34#define ND_PRINTK1 ND_NOPRINTK
35#define ND_PRINTK2 ND_NOPRINTK
36#define ND_PRINTK3 ND_NOPRINTK
37#if ND_DEBUG >= 1
38#undef ND_PRINTK1
39#define ND_PRINTK1 ND_PRINTK
40#endif
41#if ND_DEBUG >= 2
42#undef ND_PRINTK2
43#define ND_PRINTK2 ND_PRINTK
44#endif
45#if ND_DEBUG >= 3
46#undef ND_PRINTK3
47#define ND_PRINTK3 ND_PRINTK
48#endif
49
50#include <linux/module.h>
51#include <linux/config.h>
52#include <linux/errno.h>
53#include <linux/types.h>
54#include <linux/socket.h>
55#include <linux/sockios.h>
56#include <linux/sched.h>
57#include <linux/net.h>
58#include <linux/in6.h>
59#include <linux/route.h>
60#include <linux/init.h>
61#include <linux/rcupdate.h>
62#ifdef CONFIG_SYSCTL
63#include <linux/sysctl.h>
64#endif
65
66#include <linux/if_arp.h>
67#include <linux/ipv6.h>
68#include <linux/icmpv6.h>
69#include <linux/jhash.h>
70
71#include <net/sock.h>
72#include <net/snmp.h>
73
74#include <net/ipv6.h>
75#include <net/protocol.h>
76#include <net/ndisc.h>
77#include <net/ip6_route.h>
78#include <net/addrconf.h>
79#include <net/icmp.h>
80
81#include <net/flow.h>
82#include <net/ip6_checksum.h>
83#include <linux/proc_fs.h>
84
85#include <linux/netfilter.h>
86#include <linux/netfilter_ipv6.h>
87
88static struct socket *ndisc_socket;
89
90static u32 ndisc_hash(const void *pkey, const struct net_device *dev);
91static int ndisc_constructor(struct neighbour *neigh);
92static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
93static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
94static int pndisc_constructor(struct pneigh_entry *n);
95static void pndisc_destructor(struct pneigh_entry *n);
96static void pndisc_redo(struct sk_buff *skb);
97
98static struct neigh_ops ndisc_generic_ops = {
99 .family = AF_INET6,
100 .solicit = ndisc_solicit,
101 .error_report = ndisc_error_report,
102 .output = neigh_resolve_output,
103 .connected_output = neigh_connected_output,
104 .hh_output = dev_queue_xmit,
105 .queue_xmit = dev_queue_xmit,
106};
107
108static struct neigh_ops ndisc_hh_ops = {
109 .family = AF_INET6,
110 .solicit = ndisc_solicit,
111 .error_report = ndisc_error_report,
112 .output = neigh_resolve_output,
113 .connected_output = neigh_resolve_output,
114 .hh_output = dev_queue_xmit,
115 .queue_xmit = dev_queue_xmit,
116};
117
118
119static struct neigh_ops ndisc_direct_ops = {
120 .family = AF_INET6,
121 .output = dev_queue_xmit,
122 .connected_output = dev_queue_xmit,
123 .hh_output = dev_queue_xmit,
124 .queue_xmit = dev_queue_xmit,
125};
126
127struct neigh_table nd_tbl = {
128 .family = AF_INET6,
129 .entry_size = sizeof(struct neighbour) + sizeof(struct in6_addr),
130 .key_len = sizeof(struct in6_addr),
131 .hash = ndisc_hash,
132 .constructor = ndisc_constructor,
133 .pconstructor = pndisc_constructor,
134 .pdestructor = pndisc_destructor,
135 .proxy_redo = pndisc_redo,
136 .id = "ndisc_cache",
137 .parms = {
138 .tbl = &nd_tbl,
139 .base_reachable_time = 30 * HZ,
140 .retrans_time = 1 * HZ,
141 .gc_staletime = 60 * HZ,
142 .reachable_time = 30 * HZ,
143 .delay_probe_time = 5 * HZ,
144 .queue_len = 3,
145 .ucast_probes = 3,
146 .mcast_probes = 3,
147 .anycast_delay = 1 * HZ,
148 .proxy_delay = (8 * HZ) / 10,
149 .proxy_qlen = 64,
150 },
151 .gc_interval = 30 * HZ,
152 .gc_thresh1 = 128,
153 .gc_thresh2 = 512,
154 .gc_thresh3 = 1024,
155};
156
157/* ND options */
158struct ndisc_options {
159 struct nd_opt_hdr *nd_opt_array[__ND_OPT_MAX];
160};
161
162#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
163#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
164#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
165#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
166#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
167#define nd_opts_mtu nd_opt_array[ND_OPT_MTU]
168
169#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
170
171/*
172 * Return the padding between the option length and the start of the
173 * link addr. Currently only IP-over-InfiniBand needs this, although
174 * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
175 * also need a pad of 2.
176 */
177static int ndisc_addr_option_pad(unsigned short type)
178{
179 switch (type) {
180 case ARPHRD_INFINIBAND: return 2;
181 default: return 0;
182 }
183}
184
185static inline int ndisc_opt_addr_space(struct net_device *dev)
186{
187 return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
188}
189
190static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
191 unsigned short addr_type)
192{
193 int space = NDISC_OPT_SPACE(data_len);
194 int pad = ndisc_addr_option_pad(addr_type);
195
196 opt[0] = type;
197 opt[1] = space>>3;
198
199 memset(opt + 2, 0, pad);
200 opt += pad;
201 space -= pad;
202
203 memcpy(opt+2, data, data_len);
204 data_len += 2;
205 opt += data_len;
206 if ((space -= data_len) > 0)
207 memset(opt, 0, space);
208 return opt + space;
209}
210
211static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
212 struct nd_opt_hdr *end)
213{
214 int type;
215 if (!cur || !end || cur >= end)
216 return NULL;
217 type = cur->nd_opt_type;
218 do {
219 cur = ((void *)cur) + (cur->nd_opt_len << 3);
220 } while(cur < end && cur->nd_opt_type != type);
221 return (cur <= end && cur->nd_opt_type == type ? cur : NULL);
222}
223
224static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
225 struct ndisc_options *ndopts)
226{
227 struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
228
229 if (!nd_opt || opt_len < 0 || !ndopts)
230 return NULL;
231 memset(ndopts, 0, sizeof(*ndopts));
232 while (opt_len) {
233 int l;
234 if (opt_len < sizeof(struct nd_opt_hdr))
235 return NULL;
236 l = nd_opt->nd_opt_len << 3;
237 if (opt_len < l || l == 0)
238 return NULL;
239 switch (nd_opt->nd_opt_type) {
240 case ND_OPT_SOURCE_LL_ADDR:
241 case ND_OPT_TARGET_LL_ADDR:
242 case ND_OPT_MTU:
243 case ND_OPT_REDIRECT_HDR:
244 if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
245 ND_PRINTK2(KERN_WARNING
246 "%s(): duplicated ND6 option found: type=%d\n",
247 __FUNCTION__,
248 nd_opt->nd_opt_type);
249 } else {
250 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
251 }
252 break;
253 case ND_OPT_PREFIX_INFO:
254 ndopts->nd_opts_pi_end = nd_opt;
255 if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0)
256 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
257 break;
258 default:
259 /*
260 * Unknown options must be silently ignored,
261 * to accommodate future extension to the protocol.
262 */
263 ND_PRINTK2(KERN_NOTICE
264 "%s(): ignored unsupported option; type=%d, len=%d\n",
265 __FUNCTION__,
266 nd_opt->nd_opt_type, nd_opt->nd_opt_len);
267 }
268 opt_len -= l;
269 nd_opt = ((void *)nd_opt) + l;
270 }
271 return ndopts;
272}
273
274static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
275 struct net_device *dev)
276{
277 u8 *lladdr = (u8 *)(p + 1);
278 int lladdrlen = p->nd_opt_len << 3;
279 int prepad = ndisc_addr_option_pad(dev->type);
280 if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
281 return NULL;
282 return (lladdr + prepad);
283}
284
285int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
286{
287 switch (dev->type) {
288 case ARPHRD_ETHER:
289 case ARPHRD_IEEE802: /* Not sure. Check it later. --ANK */
290 case ARPHRD_FDDI:
291 ipv6_eth_mc_map(addr, buf);
292 return 0;
293 case ARPHRD_IEEE802_TR:
294 ipv6_tr_mc_map(addr,buf);
295 return 0;
296 case ARPHRD_ARCNET:
297 ipv6_arcnet_mc_map(addr, buf);
298 return 0;
299 case ARPHRD_INFINIBAND:
300 ipv6_ib_mc_map(addr, buf);
301 return 0;
302 default:
303 if (dir) {
304 memcpy(buf, dev->broadcast, dev->addr_len);
305 return 0;
306 }
307 }
308 return -EINVAL;
309}
310
311static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
312{
313 const u32 *p32 = pkey;
314 u32 addr_hash, i;
315
316 addr_hash = 0;
317 for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
318 addr_hash ^= *p32++;
319
320 return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd);
321}
322
323static int ndisc_constructor(struct neighbour *neigh)
324{
325 struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
326 struct net_device *dev = neigh->dev;
327 struct inet6_dev *in6_dev;
328 struct neigh_parms *parms;
329 int is_multicast = ipv6_addr_is_multicast(addr);
330
331 rcu_read_lock();
332 in6_dev = in6_dev_get(dev);
333 if (in6_dev == NULL) {
334 rcu_read_unlock();
335 return -EINVAL;
336 }
337
338 parms = in6_dev->nd_parms;
339 __neigh_parms_put(neigh->parms);
340 neigh->parms = neigh_parms_clone(parms);
341 rcu_read_unlock();
342
343 neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
344 if (dev->hard_header == NULL) {
345 neigh->nud_state = NUD_NOARP;
346 neigh->ops = &ndisc_direct_ops;
347 neigh->output = neigh->ops->queue_xmit;
348 } else {
349 if (is_multicast) {
350 neigh->nud_state = NUD_NOARP;
351 ndisc_mc_map(addr, neigh->ha, dev, 1);
352 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
353 neigh->nud_state = NUD_NOARP;
354 memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
355 if (dev->flags&IFF_LOOPBACK)
356 neigh->type = RTN_LOCAL;
357 } else if (dev->flags&IFF_POINTOPOINT) {
358 neigh->nud_state = NUD_NOARP;
359 memcpy(neigh->ha, dev->broadcast, dev->addr_len);
360 }
361 if (dev->hard_header_cache)
362 neigh->ops = &ndisc_hh_ops;
363 else
364 neigh->ops = &ndisc_generic_ops;
365 if (neigh->nud_state&NUD_VALID)
366 neigh->output = neigh->ops->connected_output;
367 else
368 neigh->output = neigh->ops->output;
369 }
370 in6_dev_put(in6_dev);
371 return 0;
372}
373
374static int pndisc_constructor(struct pneigh_entry *n)
375{
376 struct in6_addr *addr = (struct in6_addr*)&n->key;
377 struct in6_addr maddr;
378 struct net_device *dev = n->dev;
379
380 if (dev == NULL || __in6_dev_get(dev) == NULL)
381 return -EINVAL;
382 addrconf_addr_solict_mult(addr, &maddr);
383 ipv6_dev_mc_inc(dev, &maddr);
384 return 0;
385}
386
387static void pndisc_destructor(struct pneigh_entry *n)
388{
389 struct in6_addr *addr = (struct in6_addr*)&n->key;
390 struct in6_addr maddr;
391 struct net_device *dev = n->dev;
392
393 if (dev == NULL || __in6_dev_get(dev) == NULL)
394 return;
395 addrconf_addr_solict_mult(addr, &maddr);
396 ipv6_dev_mc_dec(dev, &maddr);
397}
398
399/*
400 * Send a Neighbour Advertisement
401 */
402
403static inline void ndisc_flow_init(struct flowi *fl, u8 type,
404 struct in6_addr *saddr, struct in6_addr *daddr)
405{
406 memset(fl, 0, sizeof(*fl));
407 ipv6_addr_copy(&fl->fl6_src, saddr);
408 ipv6_addr_copy(&fl->fl6_dst, daddr);
409 fl->proto = IPPROTO_ICMPV6;
410 fl->fl_icmp_type = type;
411 fl->fl_icmp_code = 0;
412}
413
414static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
415 struct in6_addr *daddr, struct in6_addr *solicited_addr,
416 int router, int solicited, int override, int inc_opt)
417{
418 struct in6_addr tmpaddr;
419 struct inet6_ifaddr *ifp;
420 struct inet6_dev *idev;
421 struct flowi fl;
422 struct dst_entry* dst;
423 struct sock *sk = ndisc_socket->sk;
424 struct in6_addr *src_addr;
425 struct nd_msg *msg;
426 int len;
427 struct sk_buff *skb;
428 int err;
429
430 len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
431
432 /* for anycast or proxy, solicited_addr != src_addr */
433 ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
434 if (ifp) {
435 src_addr = solicited_addr;
436 in6_ifa_put(ifp);
437 } else {
438 if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
439 return;
440 src_addr = &tmpaddr;
441 }
442
443 ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr);
444
445 dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
446 if (!dst)
447 return;
448
449 err = xfrm_lookup(&dst, &fl, NULL, 0);
450 if (err < 0) {
451 dst_release(dst);
452 return;
453 }
454
455 if (inc_opt) {
456 if (dev->addr_len)
457 len += ndisc_opt_addr_space(dev);
458 else
459 inc_opt = 0;
460 }
461
462 skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
463 1, &err);
464
465 if (skb == NULL) {
466 ND_PRINTK0(KERN_ERR
467 "ICMPv6 NA: %s() failed to allocate an skb.\n",
468 __FUNCTION__);
469 dst_release(dst);
470 return;
471 }
472
473 skb_reserve(skb, LL_RESERVED_SPACE(dev));
474 ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len);
475
476 msg = (struct nd_msg *)skb_put(skb, len);
477 skb->h.raw = (unsigned char*)msg;
478
479 msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
480 msg->icmph.icmp6_code = 0;
481 msg->icmph.icmp6_cksum = 0;
482
483 msg->icmph.icmp6_unused = 0;
484 msg->icmph.icmp6_router = router;
485 msg->icmph.icmp6_solicited = solicited;
486 msg->icmph.icmp6_override = !!override;
487
488 /* Set the target address. */
489 ipv6_addr_copy(&msg->target, solicited_addr);
490
491 if (inc_opt)
492 ndisc_fill_addr_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr,
493 dev->addr_len, dev->type);
494
495 /* checksum */
496 msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, len,
497 IPPROTO_ICMPV6,
498 csum_partial((__u8 *) msg,
499 len, 0));
500
501 skb->dst = dst;
502 idev = in6_dev_get(dst->dev);
503 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
504 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
505 if (!err) {
506 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
507 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
508 }
509
510 if (likely(idev != NULL))
511 in6_dev_put(idev);
512}
513
514void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
515 struct in6_addr *solicit,
516 struct in6_addr *daddr, struct in6_addr *saddr)
517{
518 struct flowi fl;
519 struct dst_entry* dst;
520 struct inet6_dev *idev;
521 struct sock *sk = ndisc_socket->sk;
522 struct sk_buff *skb;
523 struct nd_msg *msg;
524 struct in6_addr addr_buf;
525 int len;
526 int err;
527 int send_llinfo;
528
529 if (saddr == NULL) {
530 if (ipv6_get_lladdr(dev, &addr_buf))
531 return;
532 saddr = &addr_buf;
533 }
534
535 ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr);
536
537 dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
538 if (!dst)
539 return;
540
541 err = xfrm_lookup(&dst, &fl, NULL, 0);
542 if (err < 0) {
543 dst_release(dst);
544 return;
545 }
546
547 len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
548 send_llinfo = dev->addr_len && !ipv6_addr_any(saddr);
549 if (send_llinfo)
550 len += ndisc_opt_addr_space(dev);
551
552 skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
553 1, &err);
554 if (skb == NULL) {
555 ND_PRINTK0(KERN_ERR
556 "ICMPv6 NA: %s() failed to allocate an skb.\n",
557 __FUNCTION__);
558 dst_release(dst);
559 return;
560 }
561
562 skb_reserve(skb, LL_RESERVED_SPACE(dev));
563 ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
564
565 msg = (struct nd_msg *)skb_put(skb, len);
566 skb->h.raw = (unsigned char*)msg;
567 msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
568 msg->icmph.icmp6_code = 0;
569 msg->icmph.icmp6_cksum = 0;
570 msg->icmph.icmp6_unused = 0;
571
572 /* Set the target address. */
573 ipv6_addr_copy(&msg->target, solicit);
574
575 if (send_llinfo)
576 ndisc_fill_addr_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
577 dev->addr_len, dev->type);
578
579 /* checksum */
580 msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr,
581 daddr, len,
582 IPPROTO_ICMPV6,
583 csum_partial((__u8 *) msg,
584 len, 0));
585 /* send it! */
586 skb->dst = dst;
587 idev = in6_dev_get(dst->dev);
588 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
589 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
590 if (!err) {
591 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORSOLICITS);
592 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
593 }
594
595 if (likely(idev != NULL))
596 in6_dev_put(idev);
597}
598
599void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
600 struct in6_addr *daddr)
601{
602 struct flowi fl;
603 struct dst_entry* dst;
604 struct inet6_dev *idev;
605 struct sock *sk = ndisc_socket->sk;
606 struct sk_buff *skb;
607 struct icmp6hdr *hdr;
608 __u8 * opt;
609 int len;
610 int err;
611
612 ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr);
613
614 dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output);
615 if (!dst)
616 return;
617
618 err = xfrm_lookup(&dst, &fl, NULL, 0);
619 if (err < 0) {
620 dst_release(dst);
621 return;
622 }
623
624 len = sizeof(struct icmp6hdr);
625 if (dev->addr_len)
626 len += ndisc_opt_addr_space(dev);
627
628 skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
629 1, &err);
630 if (skb == NULL) {
631 ND_PRINTK0(KERN_ERR
632 "ICMPv6 RS: %s() failed to allocate an skb.\n",
633 __FUNCTION__);
634 dst_release(dst);
635 return;
636 }
637
638 skb_reserve(skb, LL_RESERVED_SPACE(dev));
639 ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
640
641 hdr = (struct icmp6hdr *)skb_put(skb, len);
642 skb->h.raw = (unsigned char*)hdr;
643 hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
644 hdr->icmp6_code = 0;
645 hdr->icmp6_cksum = 0;
646 hdr->icmp6_unused = 0;
647
648 opt = (u8*) (hdr + 1);
649
650 if (dev->addr_len)
651 ndisc_fill_addr_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
652 dev->addr_len, dev->type);
653
654 /* checksum */
655 hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len,
656 IPPROTO_ICMPV6,
657 csum_partial((__u8 *) hdr, len, 0));
658
659 /* send it! */
660 skb->dst = dst;
661 idev = in6_dev_get(dst->dev);
662 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
663 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
664 if (!err) {
665 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTROUTERSOLICITS);
666 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
667 }
668
669 if (likely(idev != NULL))
670 in6_dev_put(idev);
671}
672
673
674static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
675{
676 /*
677 * "The sender MUST return an ICMP
678 * destination unreachable"
679 */
680 dst_link_failure(skb);
681 kfree_skb(skb);
682}
683
684/* Called with locked neigh: either read or both */
685
686static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
687{
688 struct in6_addr *saddr = NULL;
689 struct in6_addr mcaddr;
690 struct net_device *dev = neigh->dev;
691 struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
692 int probes = atomic_read(&neigh->probes);
693
694 if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev, 1))
695 saddr = &skb->nh.ipv6h->saddr;
696
697 if ((probes -= neigh->parms->ucast_probes) < 0) {
698 if (!(neigh->nud_state & NUD_VALID)) {
699 ND_PRINTK1(KERN_DEBUG
700 "%s(): trying to ucast probe in NUD_INVALID: "
701 "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
702 __FUNCTION__,
703 NIP6(*target));
704 }
705 ndisc_send_ns(dev, neigh, target, target, saddr);
706 } else if ((probes -= neigh->parms->app_probes) < 0) {
707#ifdef CONFIG_ARPD
708 neigh_app_ns(neigh);
709#endif
710 } else {
711 addrconf_addr_solict_mult(target, &mcaddr);
712 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
713 }
714}
715
716static void ndisc_recv_ns(struct sk_buff *skb)
717{
718 struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
719 struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
720 struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
721 u8 *lladdr = NULL;
722 u32 ndoptlen = skb->tail - msg->opt;
723 struct ndisc_options ndopts;
724 struct net_device *dev = skb->dev;
725 struct inet6_ifaddr *ifp;
726 struct inet6_dev *idev = NULL;
727 struct neighbour *neigh;
728 int dad = ipv6_addr_any(saddr);
729 int inc;
730
731 if (ipv6_addr_is_multicast(&msg->target)) {
732 ND_PRINTK2(KERN_WARNING
733 "ICMPv6 NS: multicast target address");
734 return;
735 }
736
737 /*
738 * RFC2461 7.1.1:
739 * DAD has to be destined for solicited node multicast address.
740 */
741 if (dad &&
742 !(daddr->s6_addr32[0] == htonl(0xff020000) &&
743 daddr->s6_addr32[1] == htonl(0x00000000) &&
744 daddr->s6_addr32[2] == htonl(0x00000001) &&
745 daddr->s6_addr [12] == 0xff )) {
746 ND_PRINTK2(KERN_WARNING
747 "ICMPv6 NS: bad DAD packet (wrong destination)\n");
748 return;
749 }
750
751 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
752 ND_PRINTK2(KERN_WARNING
753 "ICMPv6 NS: invalid ND options\n");
754 return;
755 }
756
757 if (ndopts.nd_opts_src_lladdr) {
758 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
759 if (!lladdr) {
760 ND_PRINTK2(KERN_WARNING
761 "ICMPv6 NS: invalid link-layer address length\n");
762 return;
763 }
764
765 /* RFC2461 7.1.1:
766 * If the IP source address is the unspecified address,
767 * there MUST NOT be source link-layer address option
768 * in the message.
769 */
770 if (dad) {
771 ND_PRINTK2(KERN_WARNING
772 "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
773 return;
774 }
775 }
776
777 inc = ipv6_addr_is_multicast(daddr);
778
779 if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) {
780 if (ifp->flags & IFA_F_TENTATIVE) {
781 /* Address is tentative. If the source
782 is unspecified address, it is someone
783 does DAD, otherwise we ignore solicitations
784 until DAD timer expires.
785 */
786 if (!dad)
787 goto out;
788 if (dev->type == ARPHRD_IEEE802_TR) {
789 unsigned char *sadr = skb->mac.raw;
790 if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
791 sadr[9] == dev->dev_addr[1] &&
792 sadr[10] == dev->dev_addr[2] &&
793 sadr[11] == dev->dev_addr[3] &&
794 sadr[12] == dev->dev_addr[4] &&
795 sadr[13] == dev->dev_addr[5]) {
796 /* looped-back to us */
797 goto out;
798 }
799 }
800 addrconf_dad_failure(ifp);
801 return;
802 }
803
804 idev = ifp->idev;
805 } else {
806 idev = in6_dev_get(dev);
807 if (!idev) {
808 /* XXX: count this drop? */
809 return;
810 }
811
812 if (ipv6_chk_acast_addr(dev, &msg->target) ||
813 (idev->cnf.forwarding &&
814 pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) {
815 if (skb->stamp.tv_sec != LOCALLY_ENQUEUED &&
816 skb->pkt_type != PACKET_HOST &&
817 inc != 0 &&
818 idev->nd_parms->proxy_delay != 0) {
819 /*
820 * for anycast or proxy,
821 * sender should delay its response
822 * by a random time between 0 and
823 * MAX_ANYCAST_DELAY_TIME seconds.
824 * (RFC2461) -- yoshfuji
825 */
826 struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
827 if (n)
828 pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
829 goto out;
830 }
831 } else
832 goto out;
833 }
834
835 if (dad) {
836 struct in6_addr maddr;
837
838 ipv6_addr_all_nodes(&maddr);
839 ndisc_send_na(dev, NULL, &maddr, &msg->target,
840 idev->cnf.forwarding, 0, (ifp != NULL), 1);
841 goto out;
842 }
843
844 if (inc)
845 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
846 else
847 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
848
849 /*
850 * update / create cache entry
851 * for the source address
852 */
853 neigh = __neigh_lookup(&nd_tbl, saddr, dev,
854 !inc || lladdr || !dev->addr_len);
855 if (neigh)
856 neigh_update(neigh, lladdr, NUD_STALE,
857 NEIGH_UPDATE_F_WEAK_OVERRIDE|
858 NEIGH_UPDATE_F_OVERRIDE);
859 if (neigh || !dev->hard_header) {
860 ndisc_send_na(dev, neigh, saddr, &msg->target,
861 idev->cnf.forwarding,
862 1, (ifp != NULL && inc), inc);
863 if (neigh)
864 neigh_release(neigh);
865 }
866
867out:
868 if (ifp)
869 in6_ifa_put(ifp);
870 else
871 in6_dev_put(idev);
872
873 return;
874}
875
876static void ndisc_recv_na(struct sk_buff *skb)
877{
878 struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
879 struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
880 struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
881 u8 *lladdr = NULL;
882 u32 ndoptlen = skb->tail - msg->opt;
883 struct ndisc_options ndopts;
884 struct net_device *dev = skb->dev;
885 struct inet6_ifaddr *ifp;
886 struct neighbour *neigh;
887
888 if (skb->len < sizeof(struct nd_msg)) {
889 ND_PRINTK2(KERN_WARNING
890 "ICMPv6 NA: packet too short\n");
891 return;
892 }
893
894 if (ipv6_addr_is_multicast(&msg->target)) {
895 ND_PRINTK2(KERN_WARNING
896 "ICMPv6 NA: target address is multicast.\n");
897 return;
898 }
899
900 if (ipv6_addr_is_multicast(daddr) &&
901 msg->icmph.icmp6_solicited) {
902 ND_PRINTK2(KERN_WARNING
903 "ICMPv6 NA: solicited NA is multicasted.\n");
904 return;
905 }
906
907 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
908 ND_PRINTK2(KERN_WARNING
909 "ICMPv6 NS: invalid ND option\n");
910 return;
911 }
912 if (ndopts.nd_opts_tgt_lladdr) {
913 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
914 if (!lladdr) {
915 ND_PRINTK2(KERN_WARNING
916 "ICMPv6 NA: invalid link-layer address length\n");
917 return;
918 }
919 }
920 if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1))) {
921 if (ifp->flags & IFA_F_TENTATIVE) {
922 addrconf_dad_failure(ifp);
923 return;
924 }
925 /* What should we make now? The advertisement
926 is invalid, but ndisc specs say nothing
927 about it. It could be misconfiguration, or
928 an smart proxy agent tries to help us :-)
929 */
930 ND_PRINTK1(KERN_WARNING
931 "ICMPv6 NA: someone advertises our address on %s!\n",
932 ifp->idev->dev->name);
933 in6_ifa_put(ifp);
934 return;
935 }
936 neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
937
938 if (neigh) {
939 u8 old_flags = neigh->flags;
940
941 if (neigh->nud_state & NUD_FAILED)
942 goto out;
943
944 neigh_update(neigh, lladdr,
945 msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
946 NEIGH_UPDATE_F_WEAK_OVERRIDE|
947 (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
948 NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
949 (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
950
951 if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
952 /*
953 * Change: router to host
954 */
955 struct rt6_info *rt;
956 rt = rt6_get_dflt_router(saddr, dev);
957 if (rt)
958 ip6_del_rt(rt, NULL, NULL);
959 }
960
961out:
962 neigh_release(neigh);
963 }
964}
965
966static void ndisc_recv_rs(struct sk_buff *skb)
967{
968 struct rs_msg *rs_msg = (struct rs_msg *) skb->h.raw;
969 unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
970 struct neighbour *neigh;
971 struct inet6_dev *idev;
972 struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
973 struct ndisc_options ndopts;
974 u8 *lladdr = NULL;
975
976 if (skb->len < sizeof(*rs_msg))
977 return;
978
979 idev = in6_dev_get(skb->dev);
980 if (!idev) {
981 if (net_ratelimit())
982 ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
983 return;
984 }
985
986 /* Don't accept RS if we're not in router mode */
987 if (!idev->cnf.forwarding)
988 goto out;
989
990 /*
991 * Don't update NCE if src = ::;
992 * this implies that the source node has no ip address assigned yet.
993 */
994 if (ipv6_addr_any(saddr))
995 goto out;
996
997 /* Parse ND options */
998 if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
999 if (net_ratelimit())
1000 ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
1001 goto out;
1002 }
1003
1004 if (ndopts.nd_opts_src_lladdr) {
1005 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1006 skb->dev);
1007 if (!lladdr)
1008 goto out;
1009 }
1010
1011 neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
1012 if (neigh) {
1013 neigh_update(neigh, lladdr, NUD_STALE,
1014 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1015 NEIGH_UPDATE_F_OVERRIDE|
1016 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1017 neigh_release(neigh);
1018 }
1019out:
1020 in6_dev_put(idev);
1021}
1022
1023static void ndisc_router_discovery(struct sk_buff *skb)
1024{
1025 struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw;
1026 struct neighbour *neigh = NULL;
1027 struct inet6_dev *in6_dev;
1028 struct rt6_info *rt;
1029 int lifetime;
1030 struct ndisc_options ndopts;
1031 int optlen;
1032
1033 __u8 * opt = (__u8 *)(ra_msg + 1);
1034
1035 optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg);
1036
1037 if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
1038 ND_PRINTK2(KERN_WARNING
1039 "ICMPv6 RA: source address is not link-local.\n");
1040 return;
1041 }
1042 if (optlen < 0) {
1043 ND_PRINTK2(KERN_WARNING
1044 "ICMPv6 RA: packet too short\n");
1045 return;
1046 }
1047
1048 /*
1049 * set the RA_RECV flag in the interface
1050 */
1051
1052 in6_dev = in6_dev_get(skb->dev);
1053 if (in6_dev == NULL) {
1054 ND_PRINTK0(KERN_ERR
1055 "ICMPv6 RA: can't find inet6 device for %s.\n",
1056 skb->dev->name);
1057 return;
1058 }
1059 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) {
1060 in6_dev_put(in6_dev);
1061 return;
1062 }
1063
1064 if (!ndisc_parse_options(opt, optlen, &ndopts)) {
1065 in6_dev_put(in6_dev);
1066 ND_PRINTK2(KERN_WARNING
1067 "ICMP6 RA: invalid ND options\n");
1068 return;
1069 }
1070
1071 if (in6_dev->if_flags & IF_RS_SENT) {
1072 /*
1073 * flag that an RA was received after an RS was sent
1074 * out on this interface.
1075 */
1076 in6_dev->if_flags |= IF_RA_RCVD;
1077 }
1078
1079 /*
1080 * Remember the managed/otherconf flags from most recently
1081 * received RA message (RFC 2462) -- yoshfuji
1082 */
1083 in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
1084 IF_RA_OTHERCONF)) |
1085 (ra_msg->icmph.icmp6_addrconf_managed ?
1086 IF_RA_MANAGED : 0) |
1087 (ra_msg->icmph.icmp6_addrconf_other ?
1088 IF_RA_OTHERCONF : 0);
1089
1090 lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
1091
1092 rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
1093
1094 if (rt)
1095 neigh = rt->rt6i_nexthop;
1096
1097 if (rt && lifetime == 0) {
1098 neigh_clone(neigh);
1099 ip6_del_rt(rt, NULL, NULL);
1100 rt = NULL;
1101 }
1102
1103 if (rt == NULL && lifetime) {
1104 ND_PRINTK3(KERN_DEBUG
1105 "ICMPv6 RA: adding default router.\n");
1106
1107 rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
1108 if (rt == NULL) {
1109 ND_PRINTK0(KERN_ERR
1110 "ICMPv6 RA: %s() failed to add default route.\n",
1111 __FUNCTION__);
1112 in6_dev_put(in6_dev);
1113 return;
1114 }
1115
1116 neigh = rt->rt6i_nexthop;
1117 if (neigh == NULL) {
1118 ND_PRINTK0(KERN_ERR
1119 "ICMPv6 RA: %s() got default router without neighbour.\n",
1120 __FUNCTION__);
1121 dst_release(&rt->u.dst);
1122 in6_dev_put(in6_dev);
1123 return;
1124 }
1125 neigh->flags |= NTF_ROUTER;
1126 }
1127
1128 if (rt)
1129 rt->rt6i_expires = jiffies + (HZ * lifetime);
1130
1131 if (ra_msg->icmph.icmp6_hop_limit) {
1132 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
1133 if (rt)
1134 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
1135 }
1136
1137 /*
1138 * Update Reachable Time and Retrans Timer
1139 */
1140
1141 if (in6_dev->nd_parms) {
1142 unsigned long rtime = ntohl(ra_msg->retrans_timer);
1143
1144 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
1145 rtime = (rtime*HZ)/1000;
1146 if (rtime < HZ/10)
1147 rtime = HZ/10;
1148 in6_dev->nd_parms->retrans_time = rtime;
1149 in6_dev->tstamp = jiffies;
1150 inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1151 }
1152
1153 rtime = ntohl(ra_msg->reachable_time);
1154 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
1155 rtime = (rtime*HZ)/1000;
1156
1157 if (rtime < HZ/10)
1158 rtime = HZ/10;
1159
1160 if (rtime != in6_dev->nd_parms->base_reachable_time) {
1161 in6_dev->nd_parms->base_reachable_time = rtime;
1162 in6_dev->nd_parms->gc_staletime = 3 * rtime;
1163 in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
1164 in6_dev->tstamp = jiffies;
1165 inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1166 }
1167 }
1168 }
1169
1170 /*
1171 * Process options.
1172 */
1173
1174 if (!neigh)
1175 neigh = __neigh_lookup(&nd_tbl, &skb->nh.ipv6h->saddr,
1176 skb->dev, 1);
1177 if (neigh) {
1178 u8 *lladdr = NULL;
1179 if (ndopts.nd_opts_src_lladdr) {
1180 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1181 skb->dev);
1182 if (!lladdr) {
1183 ND_PRINTK2(KERN_WARNING
1184 "ICMPv6 RA: invalid link-layer address length\n");
1185 goto out;
1186 }
1187 }
1188 neigh_update(neigh, lladdr, NUD_STALE,
1189 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1190 NEIGH_UPDATE_F_OVERRIDE|
1191 NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1192 NEIGH_UPDATE_F_ISROUTER);
1193 }
1194
1195 if (ndopts.nd_opts_pi) {
1196 struct nd_opt_hdr *p;
1197 for (p = ndopts.nd_opts_pi;
1198 p;
1199 p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1200 addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
1201 }
1202 }
1203
1204 if (ndopts.nd_opts_mtu) {
1205 u32 mtu;
1206
1207 memcpy(&mtu, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
1208 mtu = ntohl(mtu);
1209
1210 if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1211 ND_PRINTK2(KERN_WARNING
1212 "ICMPv6 RA: invalid mtu: %d\n",
1213 mtu);
1214 } else if (in6_dev->cnf.mtu6 != mtu) {
1215 in6_dev->cnf.mtu6 = mtu;
1216
1217 if (rt)
1218 rt->u.dst.metrics[RTAX_MTU-1] = mtu;
1219
1220 rt6_mtu_change(skb->dev, mtu);
1221 }
1222 }
1223
1224 if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1225 ND_PRINTK2(KERN_WARNING
1226 "ICMPv6 RA: invalid RA options");
1227 }
1228out:
1229 if (rt)
1230 dst_release(&rt->u.dst);
1231 else if (neigh)
1232 neigh_release(neigh);
1233 in6_dev_put(in6_dev);
1234}
1235
1236static void ndisc_redirect_rcv(struct sk_buff *skb)
1237{
1238 struct inet6_dev *in6_dev;
1239 struct icmp6hdr *icmph;
1240 struct in6_addr *dest;
1241 struct in6_addr *target; /* new first hop to destination */
1242 struct neighbour *neigh;
1243 int on_link = 0;
1244 struct ndisc_options ndopts;
1245 int optlen;
1246 u8 *lladdr = NULL;
1247
1248 if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
1249 ND_PRINTK2(KERN_WARNING
1250 "ICMPv6 Redirect: source address is not link-local.\n");
1251 return;
1252 }
1253
1254 optlen = skb->tail - skb->h.raw;
1255 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1256
1257 if (optlen < 0) {
1258 ND_PRINTK2(KERN_WARNING
1259 "ICMPv6 Redirect: packet too short\n");
1260 return;
1261 }
1262
1263 icmph = (struct icmp6hdr *) skb->h.raw;
1264 target = (struct in6_addr *) (icmph + 1);
1265 dest = target + 1;
1266
1267 if (ipv6_addr_is_multicast(dest)) {
1268 ND_PRINTK2(KERN_WARNING
1269 "ICMPv6 Redirect: destination address is multicast.\n");
1270 return;
1271 }
1272
1273 if (ipv6_addr_equal(dest, target)) {
1274 on_link = 1;
1275 } else if (!(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) {
1276 ND_PRINTK2(KERN_WARNING
1277 "ICMPv6 Redirect: target address is not link-local.\n");
1278 return;
1279 }
1280
1281 in6_dev = in6_dev_get(skb->dev);
1282 if (!in6_dev)
1283 return;
1284 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
1285 in6_dev_put(in6_dev);
1286 return;
1287 }
1288
1289 /* RFC2461 8.1:
1290 * The IP source address of the Redirect MUST be the same as the current
1291 * first-hop router for the specified ICMP Destination Address.
1292 */
1293
1294 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1295 ND_PRINTK2(KERN_WARNING
1296 "ICMPv6 Redirect: invalid ND options\n");
1297 in6_dev_put(in6_dev);
1298 return;
1299 }
1300 if (ndopts.nd_opts_tgt_lladdr) {
1301 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1302 skb->dev);
1303 if (!lladdr) {
1304 ND_PRINTK2(KERN_WARNING
1305 "ICMPv6 Redirect: invalid link-layer address length\n");
1306 in6_dev_put(in6_dev);
1307 return;
1308 }
1309 }
1310
1311 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1312 if (neigh) {
1313 rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, lladdr,
1314 on_link);
1315 neigh_release(neigh);
1316 }
1317 in6_dev_put(in6_dev);
1318}
1319
1320void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1321 struct in6_addr *target)
1322{
1323 struct sock *sk = ndisc_socket->sk;
1324 int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1325 struct sk_buff *buff;
1326 struct icmp6hdr *icmph;
1327 struct in6_addr saddr_buf;
1328 struct in6_addr *addrp;
1329 struct net_device *dev;
1330 struct rt6_info *rt;
1331 struct dst_entry *dst;
1332 struct inet6_dev *idev;
1333 struct flowi fl;
1334 u8 *opt;
1335 int rd_len;
1336 int err;
1337 int hlen;
1338 u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
1339
1340 dev = skb->dev;
1341
1342 if (ipv6_get_lladdr(dev, &saddr_buf)) {
1343 ND_PRINTK2(KERN_WARNING
1344 "ICMPv6 Redirect: no link-local address on %s\n",
1345 dev->name);
1346 return;
1347 }
1348
1349 ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr);
1350
1351 dst = ip6_route_output(NULL, &fl);
1352 if (dst == NULL)
1353 return;
1354
1355 err = xfrm_lookup(&dst, &fl, NULL, 0);
1356 if (err) {
1357 dst_release(dst);
1358 return;
1359 }
1360
1361 rt = (struct rt6_info *) dst;
1362
1363 if (rt->rt6i_flags & RTF_GATEWAY) {
1364 ND_PRINTK2(KERN_WARNING
1365 "ICMPv6 Redirect: destination is not a neighbour.\n");
1366 dst_release(dst);
1367 return;
1368 }
1369 if (!xrlim_allow(dst, 1*HZ)) {
1370 dst_release(dst);
1371 return;
1372 }
1373
1374 if (dev->addr_len) {
1375 read_lock_bh(&neigh->lock);
1376 if (neigh->nud_state & NUD_VALID) {
1377 memcpy(ha_buf, neigh->ha, dev->addr_len);
1378 read_unlock_bh(&neigh->lock);
1379 ha = ha_buf;
1380 len += ndisc_opt_addr_space(dev);
1381 } else
1382 read_unlock_bh(&neigh->lock);
1383 }
1384
1385 rd_len = min_t(unsigned int,
1386 IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
1387 rd_len &= ~0x7;
1388 len += rd_len;
1389
1390 buff = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
1391 1, &err);
1392 if (buff == NULL) {
1393 ND_PRINTK0(KERN_ERR
1394 "ICMPv6 Redirect: %s() failed to allocate an skb.\n",
1395 __FUNCTION__);
1396 dst_release(dst);
1397 return;
1398 }
1399
1400 hlen = 0;
1401
1402 skb_reserve(buff, LL_RESERVED_SPACE(dev));
1403 ip6_nd_hdr(sk, buff, dev, &saddr_buf, &skb->nh.ipv6h->saddr,
1404 IPPROTO_ICMPV6, len);
1405
1406 icmph = (struct icmp6hdr *)skb_put(buff, len);
1407 buff->h.raw = (unsigned char*)icmph;
1408
1409 memset(icmph, 0, sizeof(struct icmp6hdr));
1410 icmph->icmp6_type = NDISC_REDIRECT;
1411
1412 /*
1413 * copy target and destination addresses
1414 */
1415
1416 addrp = (struct in6_addr *)(icmph + 1);
1417 ipv6_addr_copy(addrp, target);
1418 addrp++;
1419 ipv6_addr_copy(addrp, &skb->nh.ipv6h->daddr);
1420
1421 opt = (u8*) (addrp + 1);
1422
1423 /*
1424 * include target_address option
1425 */
1426
1427 if (ha)
1428 opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
1429 dev->addr_len, dev->type);
1430
1431 /*
1432 * build redirect option and copy skb over to the new packet.
1433 */
1434
1435 memset(opt, 0, 8);
1436 *(opt++) = ND_OPT_REDIRECT_HDR;
1437 *(opt++) = (rd_len >> 3);
1438 opt += 6;
1439
1440 memcpy(opt, skb->nh.ipv6h, rd_len - 8);
1441
1442 icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &skb->nh.ipv6h->saddr,
1443 len, IPPROTO_ICMPV6,
1444 csum_partial((u8 *) icmph, len, 0));
1445
1446 buff->dst = dst;
1447 idev = in6_dev_get(dst->dev);
1448 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1449 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output);
1450 if (!err) {
1451 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTREDIRECTS);
1452 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
1453 }
1454
1455 if (likely(idev != NULL))
1456 in6_dev_put(idev);
1457}
1458
1459static void pndisc_redo(struct sk_buff *skb)
1460{
1461 ndisc_rcv(skb);
1462 kfree_skb(skb);
1463}
1464
1465int ndisc_rcv(struct sk_buff *skb)
1466{
1467 struct nd_msg *msg;
1468
1469 if (!pskb_may_pull(skb, skb->len))
1470 return 0;
1471
1472 msg = (struct nd_msg *) skb->h.raw;
1473
1474 __skb_push(skb, skb->data-skb->h.raw);
1475
1476 if (skb->nh.ipv6h->hop_limit != 255) {
1477 ND_PRINTK2(KERN_WARNING
1478 "ICMPv6 NDISC: invalid hop-limit: %d\n",
1479 skb->nh.ipv6h->hop_limit);
1480 return 0;
1481 }
1482
1483 if (msg->icmph.icmp6_code != 0) {
1484 ND_PRINTK2(KERN_WARNING
1485 "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
1486 msg->icmph.icmp6_code);
1487 return 0;
1488 }
1489
1490 switch (msg->icmph.icmp6_type) {
1491 case NDISC_NEIGHBOUR_SOLICITATION:
1492 ndisc_recv_ns(skb);
1493 break;
1494
1495 case NDISC_NEIGHBOUR_ADVERTISEMENT:
1496 ndisc_recv_na(skb);
1497 break;
1498
1499 case NDISC_ROUTER_SOLICITATION:
1500 ndisc_recv_rs(skb);
1501 break;
1502
1503 case NDISC_ROUTER_ADVERTISEMENT:
1504 ndisc_router_discovery(skb);
1505 break;
1506
1507 case NDISC_REDIRECT:
1508 ndisc_redirect_rcv(skb);
1509 break;
1510 };
1511
1512 return 0;
1513}
1514
1515static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1516{
1517 struct net_device *dev = ptr;
1518
1519 switch (event) {
1520 case NETDEV_CHANGEADDR:
1521 neigh_changeaddr(&nd_tbl, dev);
1522 fib6_run_gc(~0UL);
1523 break;
1524 case NETDEV_DOWN:
1525 neigh_ifdown(&nd_tbl, dev);
1526 fib6_run_gc(~0UL);
1527 break;
1528 default:
1529 break;
1530 }
1531
1532 return NOTIFY_DONE;
1533}
1534
1535static struct notifier_block ndisc_netdev_notifier = {
1536 .notifier_call = ndisc_netdev_event,
1537};
1538
1539#ifdef CONFIG_SYSCTL
1540static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
1541 const char *func, const char *dev_name)
1542{
1543 static char warncomm[TASK_COMM_LEN];
1544 static int warned;
1545 if (strcmp(warncomm, current->comm) && warned < 5) {
1546 strcpy(warncomm, current->comm);
1547 printk(KERN_WARNING
1548 "process `%s' is using deprecated sysctl (%s) "
1549 "net.ipv6.neigh.%s.%s; "
1550 "Use net.ipv6.neigh.%s.%s_ms "
1551 "instead.\n",
1552 warncomm, func,
1553 dev_name, ctl->procname,
1554 dev_name, ctl->procname);
1555 warned++;
1556 }
1557}
1558
1559int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
1560{
1561 struct net_device *dev = ctl->extra1;
1562 struct inet6_dev *idev;
1563 int ret;
1564
1565 if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
1566 ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
1567 ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
1568
1569 switch (ctl->ctl_name) {
1570 case NET_NEIGH_RETRANS_TIME:
1571 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1572 break;
1573 case NET_NEIGH_REACHABLE_TIME:
1574 ret = proc_dointvec_jiffies(ctl, write,
1575 filp, buffer, lenp, ppos);
1576 break;
1577 case NET_NEIGH_RETRANS_TIME_MS:
1578 case NET_NEIGH_REACHABLE_TIME_MS:
1579 ret = proc_dointvec_ms_jiffies(ctl, write,
1580 filp, buffer, lenp, ppos);
1581 break;
1582 default:
1583 ret = -1;
1584 }
1585
1586 if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
1587 if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
1588 ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
1589 idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1590 idev->tstamp = jiffies;
1591 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1592 in6_dev_put(idev);
1593 }
1594 return ret;
1595}
1596
1597static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
1598 int nlen, void __user *oldval,
1599 size_t __user *oldlenp,
1600 void __user *newval, size_t newlen,
1601 void **context)
1602{
1603 struct net_device *dev = ctl->extra1;
1604 struct inet6_dev *idev;
1605 int ret;
1606
1607 if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
1608 ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
1609 ndisc_warn_deprecated_sysctl(ctl, "procfs", dev ? dev->name : "default");
1610
1611 switch (ctl->ctl_name) {
1612 case NET_NEIGH_REACHABLE_TIME:
1613 ret = sysctl_jiffies(ctl, name, nlen,
1614 oldval, oldlenp, newval, newlen,
1615 context);
1616 break;
1617 case NET_NEIGH_RETRANS_TIME_MS:
1618 case NET_NEIGH_REACHABLE_TIME_MS:
1619 ret = sysctl_ms_jiffies(ctl, name, nlen,
1620 oldval, oldlenp, newval, newlen,
1621 context);
1622 break;
1623 default:
1624 ret = 0;
1625 }
1626
1627 if (newval && newlen && ret > 0 &&
1628 dev && (idev = in6_dev_get(dev)) != NULL) {
1629 if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
1630 ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
1631 idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1632 idev->tstamp = jiffies;
1633 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1634 in6_dev_put(idev);
1635 }
1636
1637 return ret;
1638}
1639
1640#endif
1641
1642int __init ndisc_init(struct net_proto_family *ops)
1643{
1644 struct ipv6_pinfo *np;
1645 struct sock *sk;
1646 int err;
1647
1648 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &ndisc_socket);
1649 if (err < 0) {
1650 ND_PRINTK0(KERN_ERR
1651 "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
1652 err);
1653 ndisc_socket = NULL; /* For safety. */
1654 return err;
1655 }
1656
1657 sk = ndisc_socket->sk;
1658 np = inet6_sk(sk);
1659 sk->sk_allocation = GFP_ATOMIC;
1660 np->hop_limit = 255;
1661 /* Do not loopback ndisc messages */
1662 np->mc_loop = 0;
1663 sk->sk_prot->unhash(sk);
1664
1665 /*
1666 * Initialize the neighbour table
1667 */
1668
1669 neigh_table_init(&nd_tbl);
1670
1671#ifdef CONFIG_SYSCTL
1672 neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH,
1673 "ipv6",
1674 &ndisc_ifinfo_sysctl_change,
1675 &ndisc_ifinfo_sysctl_strategy);
1676#endif
1677
1678 register_netdevice_notifier(&ndisc_netdev_notifier);
1679 return 0;
1680}
1681
1682void ndisc_cleanup(void)
1683{
1684#ifdef CONFIG_SYSCTL
1685 neigh_sysctl_unregister(&nd_tbl.parms);
1686#endif
1687 neigh_table_clear(&nd_tbl);
1688 sock_release(ndisc_socket);
1689 ndisc_socket = NULL; /* For safety. */
1690}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
new file mode 100644
index 000000000000..77ec704c9ee3
--- /dev/null
+++ b/net/ipv6/netfilter/Kconfig
@@ -0,0 +1,242 @@
1#
2# IP netfilter configuration
3#
4
5menu "IPv6: Netfilter Configuration (EXPERIMENTAL)"
6 depends on INET && IPV6 && NETFILTER && EXPERIMENTAL
7
8#tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP6_NF_CONNTRACK
9#if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then
10# dep_tristate ' FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK
11#fi
12config IP6_NF_QUEUE
13 tristate "Userspace queueing via NETLINK"
14 ---help---
15
16 This option adds a queue handler to the kernel for IPv6
17 packets which lets us to receive the filtered packets
18 with QUEUE target using libiptc as we can do with
19 the IPv4 now.
20
21 (C) Fernando Anton 2001
22 IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
23 Universidad Carlos III de Madrid
24 Universidad Politecnica de Alcala de Henares
25 email: <fanton@it.uc3m.es>.
26
27 To compile it as a module, choose M here. If unsure, say N.
28
29config IP6_NF_IPTABLES
30 tristate "IP6 tables support (required for filtering/masq/NAT)"
31 help
32 ip6tables is a general, extensible packet identification framework.
33 Currently only the packet filtering and packet mangling subsystem
34 for IPv6 use this, but connection tracking is going to follow.
35 Say 'Y' or 'M' here if you want to use either of those.
36
37 To compile it as a module, choose M here. If unsure, say N.
38
39# The simple matches.
40config IP6_NF_MATCH_LIMIT
41 tristate "limit match support"
42 depends on IP6_NF_IPTABLES
43 help
44 limit matching allows you to control the rate at which a rule can be
45 matched: mainly useful in combination with the LOG target ("LOG
46 target support", below) and to avoid some Denial of Service attacks.
47
48 To compile it as a module, choose M here. If unsure, say N.
49
50config IP6_NF_MATCH_MAC
51 tristate "MAC address match support"
52 depends on IP6_NF_IPTABLES
53 help
54 mac matching allows you to match packets based on the source
55 Ethernet address of the packet.
56
57 To compile it as a module, choose M here. If unsure, say N.
58
59config IP6_NF_MATCH_RT
60 tristate "Routing header match support"
61 depends on IP6_NF_IPTABLES
62 help
63 rt matching allows you to match packets based on the routing
64 header of the packet.
65
66 To compile it as a module, choose M here. If unsure, say N.
67
68config IP6_NF_MATCH_OPTS
69 tristate "Hop-by-hop and Dst opts header match support"
70 depends on IP6_NF_IPTABLES
71 help
72 This allows one to match packets based on the hop-by-hop
73 and destination options headers of a packet.
74
75 To compile it as a module, choose M here. If unsure, say N.
76
77config IP6_NF_MATCH_FRAG
78 tristate "Fragmentation header match support"
79 depends on IP6_NF_IPTABLES
80 help
81 frag matching allows you to match packets based on the fragmentation
82 header of the packet.
83
84 To compile it as a module, choose M here. If unsure, say N.
85
86config IP6_NF_MATCH_HL
87 tristate "HL match support"
88 depends on IP6_NF_IPTABLES
89 help
90 HL matching allows you to match packets based on the hop
91 limit of the packet.
92
93 To compile it as a module, choose M here. If unsure, say N.
94
95config IP6_NF_MATCH_MULTIPORT
96 tristate "Multiple port match support"
97 depends on IP6_NF_IPTABLES
98 help
99 Multiport matching allows you to match TCP or UDP packets based on
100 a series of source or destination ports: normally a rule can only
101 match a single range of ports.
102
103 To compile it as a module, choose M here. If unsure, say N.
104
105config IP6_NF_MATCH_OWNER
106 tristate "Owner match support"
107 depends on IP6_NF_IPTABLES
108 help
109 Packet owner matching allows you to match locally-generated packets
110 based on who created them: the user, group, process or session.
111
112 To compile it as a module, choose M here. If unsure, say N.
113
114# dep_tristate ' MAC address match support' CONFIG_IP6_NF_MATCH_MAC $CONFIG_IP6_NF_IPTABLES
115config IP6_NF_MATCH_MARK
116 tristate "netfilter MARK match support"
117 depends on IP6_NF_IPTABLES
118 help
119 Netfilter mark matching allows you to match packets based on the
120 `nfmark' value in the packet. This can be set by the MARK target
121 (see below).
122
123 To compile it as a module, choose M here. If unsure, say N.
124
125config IP6_NF_MATCH_IPV6HEADER
126 tristate "IPv6 Extension Headers Match"
127 depends on IP6_NF_IPTABLES
128 help
129 This module allows one to match packets based upon
130 the ipv6 extension headers.
131
132 To compile it as a module, choose M here. If unsure, say N.
133
134config IP6_NF_MATCH_AHESP
135 tristate "AH/ESP match support"
136 depends on IP6_NF_IPTABLES
137 help
138 This module allows one to match AH and ESP packets.
139
140 To compile it as a module, choose M here. If unsure, say N.
141
142config IP6_NF_MATCH_LENGTH
143 tristate "Packet Length match support"
144 depends on IP6_NF_IPTABLES
145 help
146 This option allows you to match the length of a packet against a
147 specific value or range of values.
148
149 To compile it as a module, choose M here. If unsure, say N.
150
151config IP6_NF_MATCH_EUI64
152 tristate "EUI64 address check"
153 depends on IP6_NF_IPTABLES
154 help
155 This module performs checking on the IPv6 source address
156 Compares the last 64 bits with the EUI64 (delivered
157 from the MAC address) address
158
159 To compile it as a module, choose M here. If unsure, say N.
160
161config IP6_NF_MATCH_PHYSDEV
162 tristate "Physdev match support"
163 depends on IP6_NF_IPTABLES && BRIDGE_NETFILTER
164 help
165 Physdev packet matching matches against the physical bridge ports
166 the IP packet arrived on or will leave by.
167
168 To compile it as a module, choose M here. If unsure, say N.
169
170# dep_tristate ' Multiple port match support' CONFIG_IP6_NF_MATCH_MULTIPORT $CONFIG_IP6_NF_IPTABLES
171# dep_tristate ' TOS match support' CONFIG_IP6_NF_MATCH_TOS $CONFIG_IP6_NF_IPTABLES
172# if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then
173# dep_tristate ' Connection state match support' CONFIG_IP6_NF_MATCH_STATE $CONFIG_IP6_NF_CONNTRACK $CONFIG_IP6_NF_IPTABLES
174# fi
175# if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
176# dep_tristate ' Unclean match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_UNCLEAN $CONFIG_IP6_NF_IPTABLES
177# dep_tristate ' Owner match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_OWNER $CONFIG_IP6_NF_IPTABLES
178# fi
179# The targets
180config IP6_NF_FILTER
181 tristate "Packet filtering"
182 depends on IP6_NF_IPTABLES
183 help
184 Packet filtering defines a table `filter', which has a series of
185 rules for simple packet filtering at local input, forwarding and
186 local output. See the man page for iptables(8).
187
188 To compile it as a module, choose M here. If unsure, say N.
189
190config IP6_NF_TARGET_LOG
191 tristate "LOG target support"
192 depends on IP6_NF_FILTER
193 help
194 This option adds a `LOG' target, which allows you to create rules in
195 any iptables table which records the packet header to the syslog.
196
197 To compile it as a module, choose M here. If unsure, say N.
198
199# if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then
200# dep_tristate ' REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER
201# if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
202# dep_tristate ' MIRROR target support (EXPERIMENTAL)' CONFIG_IP6_NF_TARGET_MIRROR $CONFIG_IP6_NF_FILTER
203# fi
204# fi
205config IP6_NF_MANGLE
206 tristate "Packet mangling"
207 depends on IP6_NF_IPTABLES
208 help
209 This option adds a `mangle' table to iptables: see the man page for
210 iptables(8). This table is used for various packet alterations
211 which can effect how the packet is routed.
212
213 To compile it as a module, choose M here. If unsure, say N.
214
215# dep_tristate ' TOS target support' CONFIG_IP6_NF_TARGET_TOS $CONFIG_IP_NF_MANGLE
216config IP6_NF_TARGET_MARK
217 tristate "MARK target support"
218 depends on IP6_NF_MANGLE
219 help
220 This option adds a `MARK' target, which allows you to create rules
221 in the `mangle' table which alter the netfilter mark (nfmark) field
222 associated with the packet packet prior to routing. This can change
223 the routing method (see `Use netfilter MARK value as routing
224 key') and can also be used by other subsystems to change their
225 behavior.
226
227 To compile it as a module, choose M here. If unsure, say N.
228
229#dep_tristate ' LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES
230config IP6_NF_RAW
231 tristate 'raw table support (required for TRACE)'
232 depends on IP6_NF_IPTABLES
233 help
234 This option adds a `raw' table to ip6tables. This table is the very
235 first in the netfilter framework and hooks in at the PREROUTING
236 and OUTPUT chains.
237
238 If you want to compile it as a module, say M here and read
239 <file:Documentation/modules.txt>. If unsure, say `N'.
240
241endmenu
242
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
new file mode 100644
index 000000000000..2e51714953b6
--- /dev/null
+++ b/net/ipv6/netfilter/Makefile
@@ -0,0 +1,26 @@
1#
2# Makefile for the netfilter modules on top of IPv6.
3#
4
5# Link order matters here.
6obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
7obj-$(CONFIG_IP6_NF_MATCH_LIMIT) += ip6t_limit.o
8obj-$(CONFIG_IP6_NF_MATCH_MARK) += ip6t_mark.o
9obj-$(CONFIG_IP6_NF_MATCH_LENGTH) += ip6t_length.o
10obj-$(CONFIG_IP6_NF_MATCH_MAC) += ip6t_mac.o
11obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
12obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o
13obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
14obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
15obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o
16obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
17obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o
18obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o
19obj-$(CONFIG_IP6_NF_MATCH_PHYSDEV) += ip6t_physdev.o
20obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
21obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
22obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o
23obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
24obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
25obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
26obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
new file mode 100644
index 000000000000..c54830b89593
--- /dev/null
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -0,0 +1,741 @@
1/*
2 * This is a module which is used for queueing IPv6 packets and
3 * communicating with userspace via netlink.
4 *
5 * (C) 2001 Fernando Anton, this code is GPL.
6 * IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
7 * Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
8 * Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
9 * email: fanton@it.uc3m.es
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 *
15 * 2001-11-06: First try. Working with ip_queue.c for IPv4 and trying
16 * to adapt it to IPv6
17 * HEAVILY based in ipqueue.c by James Morris. It's just
18 * a little modified version of it, so he's nearly the
19 * real coder of this.
20 * Few changes needed, mainly the hard_routing code and
21 * the netlink socket protocol (we're NETLINK_IP6_FW).
22 * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
23 * 2005-02-04: Added /proc counter for dropped packets; fixed so
24 * packets aren't delivered to user space if they're going
25 * to be dropped.
26 */
27#include <linux/module.h>
28#include <linux/skbuff.h>
29#include <linux/init.h>
30#include <linux/ipv6.h>
31#include <linux/notifier.h>
32#include <linux/netdevice.h>
33#include <linux/netfilter.h>
34#include <linux/netlink.h>
35#include <linux/spinlock.h>
36#include <linux/sysctl.h>
37#include <linux/proc_fs.h>
38#include <net/sock.h>
39#include <net/ipv6.h>
40#include <net/ip6_route.h>
41#include <linux/netfilter_ipv4/ip_queue.h>
42#include <linux/netfilter_ipv4/ip_tables.h>
43#include <linux/netfilter_ipv6/ip6_tables.h>
44
45#define IPQ_QMAX_DEFAULT 1024
46#define IPQ_PROC_FS_NAME "ip6_queue"
47#define NET_IPQ_QMAX 2088
48#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
49
50struct ipq_rt_info {
51 struct in6_addr daddr;
52 struct in6_addr saddr;
53};
54
55struct ipq_queue_entry {
56 struct list_head list;
57 struct nf_info *info;
58 struct sk_buff *skb;
59 struct ipq_rt_info rt_info;
60};
61
62typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
63
64static unsigned char copy_mode = IPQ_COPY_NONE;
65static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
66static DEFINE_RWLOCK(queue_lock);
67static int peer_pid;
68static unsigned int copy_range;
69static unsigned int queue_total;
70static unsigned int queue_dropped = 0;
71static unsigned int queue_user_dropped = 0;
72static struct sock *ipqnl;
73static LIST_HEAD(queue_list);
74static DECLARE_MUTEX(ipqnl_sem);
75
76static void
77ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
78{
79 nf_reinject(entry->skb, entry->info, verdict);
80 kfree(entry);
81}
82
83static inline void
84__ipq_enqueue_entry(struct ipq_queue_entry *entry)
85{
86 list_add(&entry->list, &queue_list);
87 queue_total++;
88}
89
90/*
91 * Find and return a queued entry matched by cmpfn, or return the last
92 * entry if cmpfn is NULL.
93 */
94static inline struct ipq_queue_entry *
95__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
96{
97 struct list_head *p;
98
99 list_for_each_prev(p, &queue_list) {
100 struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
101
102 if (!cmpfn || cmpfn(entry, data))
103 return entry;
104 }
105 return NULL;
106}
107
108static inline void
109__ipq_dequeue_entry(struct ipq_queue_entry *entry)
110{
111 list_del(&entry->list);
112 queue_total--;
113}
114
115static inline struct ipq_queue_entry *
116__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
117{
118 struct ipq_queue_entry *entry;
119
120 entry = __ipq_find_entry(cmpfn, data);
121 if (entry == NULL)
122 return NULL;
123
124 __ipq_dequeue_entry(entry);
125 return entry;
126}
127
128
129static inline void
130__ipq_flush(int verdict)
131{
132 struct ipq_queue_entry *entry;
133
134 while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
135 ipq_issue_verdict(entry, verdict);
136}
137
138static inline int
139__ipq_set_mode(unsigned char mode, unsigned int range)
140{
141 int status = 0;
142
143 switch(mode) {
144 case IPQ_COPY_NONE:
145 case IPQ_COPY_META:
146 copy_mode = mode;
147 copy_range = 0;
148 break;
149
150 case IPQ_COPY_PACKET:
151 copy_mode = mode;
152 copy_range = range;
153 if (copy_range > 0xFFFF)
154 copy_range = 0xFFFF;
155 break;
156
157 default:
158 status = -EINVAL;
159
160 }
161 return status;
162}
163
164static inline void
165__ipq_reset(void)
166{
167 peer_pid = 0;
168 net_disable_timestamp();
169 __ipq_set_mode(IPQ_COPY_NONE, 0);
170 __ipq_flush(NF_DROP);
171}
172
173static struct ipq_queue_entry *
174ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
175{
176 struct ipq_queue_entry *entry;
177
178 write_lock_bh(&queue_lock);
179 entry = __ipq_find_dequeue_entry(cmpfn, data);
180 write_unlock_bh(&queue_lock);
181 return entry;
182}
183
184static void
185ipq_flush(int verdict)
186{
187 write_lock_bh(&queue_lock);
188 __ipq_flush(verdict);
189 write_unlock_bh(&queue_lock);
190}
191
192static struct sk_buff *
193ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
194{
195 unsigned char *old_tail;
196 size_t size = 0;
197 size_t data_len = 0;
198 struct sk_buff *skb;
199 struct ipq_packet_msg *pmsg;
200 struct nlmsghdr *nlh;
201
202 read_lock_bh(&queue_lock);
203
204 switch (copy_mode) {
205 case IPQ_COPY_META:
206 case IPQ_COPY_NONE:
207 size = NLMSG_SPACE(sizeof(*pmsg));
208 data_len = 0;
209 break;
210
211 case IPQ_COPY_PACKET:
212 if (copy_range == 0 || copy_range > entry->skb->len)
213 data_len = entry->skb->len;
214 else
215 data_len = copy_range;
216
217 size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
218 break;
219
220 default:
221 *errp = -EINVAL;
222 read_unlock_bh(&queue_lock);
223 return NULL;
224 }
225
226 read_unlock_bh(&queue_lock);
227
228 skb = alloc_skb(size, GFP_ATOMIC);
229 if (!skb)
230 goto nlmsg_failure;
231
232 old_tail= skb->tail;
233 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
234 pmsg = NLMSG_DATA(nlh);
235 memset(pmsg, 0, sizeof(*pmsg));
236
237 pmsg->packet_id = (unsigned long )entry;
238 pmsg->data_len = data_len;
239 pmsg->timestamp_sec = entry->skb->stamp.tv_sec;
240 pmsg->timestamp_usec = entry->skb->stamp.tv_usec;
241 pmsg->mark = entry->skb->nfmark;
242 pmsg->hook = entry->info->hook;
243 pmsg->hw_protocol = entry->skb->protocol;
244
245 if (entry->info->indev)
246 strcpy(pmsg->indev_name, entry->info->indev->name);
247 else
248 pmsg->indev_name[0] = '\0';
249
250 if (entry->info->outdev)
251 strcpy(pmsg->outdev_name, entry->info->outdev->name);
252 else
253 pmsg->outdev_name[0] = '\0';
254
255 if (entry->info->indev && entry->skb->dev) {
256 pmsg->hw_type = entry->skb->dev->type;
257 if (entry->skb->dev->hard_header_parse)
258 pmsg->hw_addrlen =
259 entry->skb->dev->hard_header_parse(entry->skb,
260 pmsg->hw_addr);
261 }
262
263 if (data_len)
264 if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
265 BUG();
266
267 nlh->nlmsg_len = skb->tail - old_tail;
268 return skb;
269
270nlmsg_failure:
271 if (skb)
272 kfree_skb(skb);
273 *errp = -EINVAL;
274 printk(KERN_ERR "ip6_queue: error creating packet message\n");
275 return NULL;
276}
277
278static int
279ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
280{
281 int status = -EINVAL;
282 struct sk_buff *nskb;
283 struct ipq_queue_entry *entry;
284
285 if (copy_mode == IPQ_COPY_NONE)
286 return -EAGAIN;
287
288 entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
289 if (entry == NULL) {
290 printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n");
291 return -ENOMEM;
292 }
293
294 entry->info = info;
295 entry->skb = skb;
296
297 if (entry->info->hook == NF_IP_LOCAL_OUT) {
298 struct ipv6hdr *iph = skb->nh.ipv6h;
299
300 entry->rt_info.daddr = iph->daddr;
301 entry->rt_info.saddr = iph->saddr;
302 }
303
304 nskb = ipq_build_packet_message(entry, &status);
305 if (nskb == NULL)
306 goto err_out_free;
307
308 write_lock_bh(&queue_lock);
309
310 if (!peer_pid)
311 goto err_out_free_nskb;
312
313 if (queue_total >= queue_maxlen) {
314 queue_dropped++;
315 status = -ENOSPC;
316 if (net_ratelimit())
317 printk (KERN_WARNING "ip6_queue: fill at %d entries, "
318 "dropping packet(s). Dropped: %d\n", queue_total,
319 queue_dropped);
320 goto err_out_free_nskb;
321 }
322
323 /* netlink_unicast will either free the nskb or attach it to a socket */
324 status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
325 if (status < 0) {
326 queue_user_dropped++;
327 goto err_out_unlock;
328 }
329
330 __ipq_enqueue_entry(entry);
331
332 write_unlock_bh(&queue_lock);
333 return status;
334
335err_out_free_nskb:
336 kfree_skb(nskb);
337
338err_out_unlock:
339 write_unlock_bh(&queue_lock);
340
341err_out_free:
342 kfree(entry);
343 return status;
344}
345
346static int
347ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
348{
349 int diff;
350 struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
351
352 if (v->data_len < sizeof(*user_iph))
353 return 0;
354 diff = v->data_len - e->skb->len;
355 if (diff < 0)
356 skb_trim(e->skb, v->data_len);
357 else if (diff > 0) {
358 if (v->data_len > 0xFFFF)
359 return -EINVAL;
360 if (diff > skb_tailroom(e->skb)) {
361 struct sk_buff *newskb;
362
363 newskb = skb_copy_expand(e->skb,
364 skb_headroom(e->skb),
365 diff,
366 GFP_ATOMIC);
367 if (newskb == NULL) {
368 printk(KERN_WARNING "ip6_queue: OOM "
369 "in mangle, dropping packet\n");
370 return -ENOMEM;
371 }
372 if (e->skb->sk)
373 skb_set_owner_w(newskb, e->skb->sk);
374 kfree_skb(e->skb);
375 e->skb = newskb;
376 }
377 skb_put(e->skb, diff);
378 }
379 if (!skb_ip_make_writable(&e->skb, v->data_len))
380 return -ENOMEM;
381 memcpy(e->skb->data, v->payload, v->data_len);
382 e->skb->nfcache |= NFC_ALTERED;
383
384 /*
385 * Extra routing may needed on local out, as the QUEUE target never
386 * returns control to the table.
387 * Not a nice way to cmp, but works
388 */
389 if (e->info->hook == NF_IP_LOCAL_OUT) {
390 struct ipv6hdr *iph = e->skb->nh.ipv6h;
391 if (!ipv6_addr_equal(&iph->daddr, &e->rt_info.daddr) ||
392 !ipv6_addr_equal(&iph->saddr, &e->rt_info.saddr))
393 return ip6_route_me_harder(e->skb);
394 }
395 return 0;
396}
397
398static inline int
399id_cmp(struct ipq_queue_entry *e, unsigned long id)
400{
401 return (id == (unsigned long )e);
402}
403
404static int
405ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
406{
407 struct ipq_queue_entry *entry;
408
409 if (vmsg->value > NF_MAX_VERDICT)
410 return -EINVAL;
411
412 entry = ipq_find_dequeue_entry(id_cmp, vmsg->id);
413 if (entry == NULL)
414 return -ENOENT;
415 else {
416 int verdict = vmsg->value;
417
418 if (vmsg->data_len && vmsg->data_len == len)
419 if (ipq_mangle_ipv6(vmsg, entry) < 0)
420 verdict = NF_DROP;
421
422 ipq_issue_verdict(entry, verdict);
423 return 0;
424 }
425}
426
427static int
428ipq_set_mode(unsigned char mode, unsigned int range)
429{
430 int status;
431
432 write_lock_bh(&queue_lock);
433 status = __ipq_set_mode(mode, range);
434 write_unlock_bh(&queue_lock);
435 return status;
436}
437
438static int
439ipq_receive_peer(struct ipq_peer_msg *pmsg,
440 unsigned char type, unsigned int len)
441{
442 int status = 0;
443
444 if (len < sizeof(*pmsg))
445 return -EINVAL;
446
447 switch (type) {
448 case IPQM_MODE:
449 status = ipq_set_mode(pmsg->msg.mode.value,
450 pmsg->msg.mode.range);
451 break;
452
453 case IPQM_VERDICT:
454 if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
455 status = -EINVAL;
456 else
457 status = ipq_set_verdict(&pmsg->msg.verdict,
458 len - sizeof(*pmsg));
459 break;
460 default:
461 status = -EINVAL;
462 }
463 return status;
464}
465
466static int
467dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
468{
469 if (entry->info->indev)
470 if (entry->info->indev->ifindex == ifindex)
471 return 1;
472
473 if (entry->info->outdev)
474 if (entry->info->outdev->ifindex == ifindex)
475 return 1;
476
477 return 0;
478}
479
480static void
481ipq_dev_drop(int ifindex)
482{
483 struct ipq_queue_entry *entry;
484
485 while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
486 ipq_issue_verdict(entry, NF_DROP);
487}
488
489#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
490
491static inline void
492ipq_rcv_skb(struct sk_buff *skb)
493{
494 int status, type, pid, flags, nlmsglen, skblen;
495 struct nlmsghdr *nlh;
496
497 skblen = skb->len;
498 if (skblen < sizeof(*nlh))
499 return;
500
501 nlh = (struct nlmsghdr *)skb->data;
502 nlmsglen = nlh->nlmsg_len;
503 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
504 return;
505
506 pid = nlh->nlmsg_pid;
507 flags = nlh->nlmsg_flags;
508
509 if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
510 RCV_SKB_FAIL(-EINVAL);
511
512 if (flags & MSG_TRUNC)
513 RCV_SKB_FAIL(-ECOMM);
514
515 type = nlh->nlmsg_type;
516 if (type < NLMSG_NOOP || type >= IPQM_MAX)
517 RCV_SKB_FAIL(-EINVAL);
518
519 if (type <= IPQM_BASE)
520 return;
521
522 if (security_netlink_recv(skb))
523 RCV_SKB_FAIL(-EPERM);
524
525 write_lock_bh(&queue_lock);
526
527 if (peer_pid) {
528 if (peer_pid != pid) {
529 write_unlock_bh(&queue_lock);
530 RCV_SKB_FAIL(-EBUSY);
531 }
532 } else {
533 net_enable_timestamp();
534 peer_pid = pid;
535 }
536
537 write_unlock_bh(&queue_lock);
538
539 status = ipq_receive_peer(NLMSG_DATA(nlh), type,
540 skblen - NLMSG_LENGTH(0));
541 if (status < 0)
542 RCV_SKB_FAIL(status);
543
544 if (flags & NLM_F_ACK)
545 netlink_ack(skb, nlh, 0);
546 return;
547}
548
549static void
550ipq_rcv_sk(struct sock *sk, int len)
551{
552 do {
553 struct sk_buff *skb;
554
555 if (down_trylock(&ipqnl_sem))
556 return;
557
558 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
559 ipq_rcv_skb(skb);
560 kfree_skb(skb);
561 }
562
563 up(&ipqnl_sem);
564
565 } while (ipqnl && ipqnl->sk_receive_queue.qlen);
566}
567
568static int
569ipq_rcv_dev_event(struct notifier_block *this,
570 unsigned long event, void *ptr)
571{
572 struct net_device *dev = ptr;
573
574 /* Drop any packets associated with the downed device */
575 if (event == NETDEV_DOWN)
576 ipq_dev_drop(dev->ifindex);
577 return NOTIFY_DONE;
578}
579
580static struct notifier_block ipq_dev_notifier = {
581 .notifier_call = ipq_rcv_dev_event,
582};
583
584static int
585ipq_rcv_nl_event(struct notifier_block *this,
586 unsigned long event, void *ptr)
587{
588 struct netlink_notify *n = ptr;
589
590 if (event == NETLINK_URELEASE &&
591 n->protocol == NETLINK_IP6_FW && n->pid) {
592 write_lock_bh(&queue_lock);
593 if (n->pid == peer_pid)
594 __ipq_reset();
595 write_unlock_bh(&queue_lock);
596 }
597 return NOTIFY_DONE;
598}
599
600static struct notifier_block ipq_nl_notifier = {
601 .notifier_call = ipq_rcv_nl_event,
602};
603
604static struct ctl_table_header *ipq_sysctl_header;
605
606static ctl_table ipq_table[] = {
607 {
608 .ctl_name = NET_IPQ_QMAX,
609 .procname = NET_IPQ_QMAX_NAME,
610 .data = &queue_maxlen,
611 .maxlen = sizeof(queue_maxlen),
612 .mode = 0644,
613 .proc_handler = proc_dointvec
614 },
615 { .ctl_name = 0 }
616};
617
618static ctl_table ipq_dir_table[] = {
619 {
620 .ctl_name = NET_IPV6,
621 .procname = "ipv6",
622 .mode = 0555,
623 .child = ipq_table
624 },
625 { .ctl_name = 0 }
626};
627
628static ctl_table ipq_root_table[] = {
629 {
630 .ctl_name = CTL_NET,
631 .procname = "net",
632 .mode = 0555,
633 .child = ipq_dir_table
634 },
635 { .ctl_name = 0 }
636};
637
638static int
639ipq_get_info(char *buffer, char **start, off_t offset, int length)
640{
641 int len;
642
643 read_lock_bh(&queue_lock);
644
645 len = sprintf(buffer,
646 "Peer PID : %d\n"
647 "Copy mode : %hu\n"
648 "Copy range : %u\n"
649 "Queue length : %u\n"
650 "Queue max. length : %u\n"
651 "Queue dropped : %u\n"
652 "Netfilter dropped : %u\n",
653 peer_pid,
654 copy_mode,
655 copy_range,
656 queue_total,
657 queue_maxlen,
658 queue_dropped,
659 queue_user_dropped);
660
661 read_unlock_bh(&queue_lock);
662
663 *start = buffer + offset;
664 len -= offset;
665 if (len > length)
666 len = length;
667 else if (len < 0)
668 len = 0;
669 return len;
670}
671
672static int
673init_or_cleanup(int init)
674{
675 int status = -ENOMEM;
676 struct proc_dir_entry *proc;
677
678 if (!init)
679 goto cleanup;
680
681 netlink_register_notifier(&ipq_nl_notifier);
682 ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk);
683 if (ipqnl == NULL) {
684 printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
685 goto cleanup_netlink_notifier;
686 }
687
688 proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
689 if (proc)
690 proc->owner = THIS_MODULE;
691 else {
692 printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
693 goto cleanup_ipqnl;
694 }
695
696 register_netdevice_notifier(&ipq_dev_notifier);
697 ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
698
699 status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL);
700 if (status < 0) {
701 printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
702 goto cleanup_sysctl;
703 }
704 return status;
705
706cleanup:
707 nf_unregister_queue_handler(PF_INET6);
708 synchronize_net();
709 ipq_flush(NF_DROP);
710
711cleanup_sysctl:
712 unregister_sysctl_table(ipq_sysctl_header);
713 unregister_netdevice_notifier(&ipq_dev_notifier);
714 proc_net_remove(IPQ_PROC_FS_NAME);
715
716cleanup_ipqnl:
717 sock_release(ipqnl->sk_socket);
718 down(&ipqnl_sem);
719 up(&ipqnl_sem);
720
721cleanup_netlink_notifier:
722 netlink_unregister_notifier(&ipq_nl_notifier);
723 return status;
724}
725
726static int __init init(void)
727{
728
729 return init_or_cleanup(1);
730}
731
732static void __exit fini(void)
733{
734 init_or_cleanup(0);
735}
736
737MODULE_DESCRIPTION("IPv6 packet queue handler");
738MODULE_LICENSE("GPL");
739
740module_init(init);
741module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
new file mode 100644
index 000000000000..c735276fdd5f
--- /dev/null
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -0,0 +1,1970 @@
1/*
2 * Packet matching code.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2002 Netfilter core team <coreteam@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
13 * a table
14 * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
15 * - new extension header parser code
16 */
17#include <linux/config.h>
18#include <linux/skbuff.h>
19#include <linux/kmod.h>
20#include <linux/vmalloc.h>
21#include <linux/netdevice.h>
22#include <linux/module.h>
23#include <linux/tcp.h>
24#include <linux/udp.h>
25#include <linux/icmpv6.h>
26#include <net/ip.h>
27#include <net/ipv6.h>
28#include <asm/uaccess.h>
29#include <asm/semaphore.h>
30#include <linux/proc_fs.h>
31
32#include <linux/netfilter_ipv6/ip6_tables.h>
33
34MODULE_LICENSE("GPL");
35MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
36MODULE_DESCRIPTION("IPv6 packet filter");
37
38#define IPV6_HDR_LEN (sizeof(struct ipv6hdr))
39#define IPV6_OPTHDR_LEN (sizeof(struct ipv6_opt_hdr))
40
41/*#define DEBUG_IP_FIREWALL*/
42/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
43/*#define DEBUG_IP_FIREWALL_USER*/
44
45#ifdef DEBUG_IP_FIREWALL
46#define dprintf(format, args...) printk(format , ## args)
47#else
48#define dprintf(format, args...)
49#endif
50
51#ifdef DEBUG_IP_FIREWALL_USER
52#define duprintf(format, args...) printk(format , ## args)
53#else
54#define duprintf(format, args...)
55#endif
56
57#ifdef CONFIG_NETFILTER_DEBUG
58#define IP_NF_ASSERT(x) \
59do { \
60 if (!(x)) \
61 printk("IP_NF_ASSERT: %s:%s:%u\n", \
62 __FUNCTION__, __FILE__, __LINE__); \
63} while(0)
64#else
65#define IP_NF_ASSERT(x)
66#endif
67#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
68
69static DECLARE_MUTEX(ip6t_mutex);
70
71/* Must have mutex */
72#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
73#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
74#include <linux/netfilter_ipv4/lockhelp.h>
75#include <linux/netfilter_ipv4/listhelp.h>
76
77#if 0
78/* All the better to debug you with... */
79#define static
80#define inline
81#endif
82
83/* Locking is simple: we assume at worst case there will be one packet
84 in user context and one from bottom halves (or soft irq if Alexey's
85 softnet patch was applied).
86
87 We keep a set of rules for each CPU, so we can avoid write-locking
88 them; doing a readlock_bh() stops packets coming through if we're
89 in user context.
90
91 To be cache friendly on SMP, we arrange them like so:
92 [ n-entries ]
93 ... cache-align padding ...
94 [ n-entries ]
95
96 Hence the start of any table is given by get_table() below. */
97
98/* The table itself */
99struct ip6t_table_info
100{
101 /* Size per table */
102 unsigned int size;
103 /* Number of entries: FIXME. --RR */
104 unsigned int number;
105 /* Initial number of entries. Needed for module usage count */
106 unsigned int initial_entries;
107
108 /* Entry points and underflows */
109 unsigned int hook_entry[NF_IP6_NUMHOOKS];
110 unsigned int underflow[NF_IP6_NUMHOOKS];
111
112 /* ip6t_entry tables: one per CPU */
113 char entries[0] ____cacheline_aligned;
114};
115
116static LIST_HEAD(ip6t_target);
117static LIST_HEAD(ip6t_match);
118static LIST_HEAD(ip6t_tables);
119#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
120
121#ifdef CONFIG_SMP
122#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
123#else
124#define TABLE_OFFSET(t,p) 0
125#endif
126
127#if 0
128#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
129#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
130#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
131#endif
132
133static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask,
134 struct in6_addr addr2)
135{
136 int i;
137 for( i = 0; i < 16; i++){
138 if((addr1.s6_addr[i] & mask.s6_addr[i]) !=
139 (addr2.s6_addr[i] & mask.s6_addr[i]))
140 return 1;
141 }
142 return 0;
143}
144
145/* Check for an extension */
146int
147ip6t_ext_hdr(u8 nexthdr)
148{
149 return ( (nexthdr == IPPROTO_HOPOPTS) ||
150 (nexthdr == IPPROTO_ROUTING) ||
151 (nexthdr == IPPROTO_FRAGMENT) ||
152 (nexthdr == IPPROTO_ESP) ||
153 (nexthdr == IPPROTO_AH) ||
154 (nexthdr == IPPROTO_NONE) ||
155 (nexthdr == IPPROTO_DSTOPTS) );
156}
157
158/* Returns whether matches rule or not. */
159static inline int
160ip6_packet_match(const struct sk_buff *skb,
161 const char *indev,
162 const char *outdev,
163 const struct ip6t_ip6 *ip6info,
164 unsigned int *protoff,
165 int *fragoff)
166{
167 size_t i;
168 unsigned long ret;
169 const struct ipv6hdr *ipv6 = skb->nh.ipv6h;
170
171#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
172
173 if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src),
174 IP6T_INV_SRCIP)
175 || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst),
176 IP6T_INV_DSTIP)) {
177 dprintf("Source or dest mismatch.\n");
178/*
179 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
180 ipinfo->smsk.s_addr, ipinfo->src.s_addr,
181 ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : "");
182 dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
183 ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
184 ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
185 return 0;
186 }
187
188 /* Look for ifname matches; this should unroll nicely. */
189 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
190 ret |= (((const unsigned long *)indev)[i]
191 ^ ((const unsigned long *)ip6info->iniface)[i])
192 & ((const unsigned long *)ip6info->iniface_mask)[i];
193 }
194
195 if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
196 dprintf("VIA in mismatch (%s vs %s).%s\n",
197 indev, ip6info->iniface,
198 ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":"");
199 return 0;
200 }
201
202 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
203 ret |= (((const unsigned long *)outdev)[i]
204 ^ ((const unsigned long *)ip6info->outiface)[i])
205 & ((const unsigned long *)ip6info->outiface_mask)[i];
206 }
207
208 if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
209 dprintf("VIA out mismatch (%s vs %s).%s\n",
210 outdev, ip6info->outiface,
211 ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":"");
212 return 0;
213 }
214
215/* ... might want to do something with class and flowlabel here ... */
216
217 /* look for the desired protocol header */
218 if((ip6info->flags & IP6T_F_PROTO)) {
219 u_int8_t currenthdr = ipv6->nexthdr;
220 struct ipv6_opt_hdr _hdr, *hp;
221 u_int16_t ptr; /* Header offset in skb */
222 u_int16_t hdrlen; /* Header */
223 u_int16_t _fragoff = 0, *fp = NULL;
224
225 ptr = IPV6_HDR_LEN;
226
227 while (ip6t_ext_hdr(currenthdr)) {
228 /* Is there enough space for the next ext header? */
229 if (skb->len - ptr < IPV6_OPTHDR_LEN)
230 return 0;
231
232 /* NONE or ESP: there isn't protocol part */
233 /* If we want to count these packets in '-p all',
234 * we will change the return 0 to 1*/
235 if ((currenthdr == IPPROTO_NONE) ||
236 (currenthdr == IPPROTO_ESP))
237 break;
238
239 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
240 BUG_ON(hp == NULL);
241
242 /* Size calculation */
243 if (currenthdr == IPPROTO_FRAGMENT) {
244 fp = skb_header_pointer(skb,
245 ptr+offsetof(struct frag_hdr,
246 frag_off),
247 sizeof(_fragoff),
248 &_fragoff);
249 if (fp == NULL)
250 return 0;
251
252 _fragoff = ntohs(*fp) & ~0x7;
253 hdrlen = 8;
254 } else if (currenthdr == IPPROTO_AH)
255 hdrlen = (hp->hdrlen+2)<<2;
256 else
257 hdrlen = ipv6_optlen(hp);
258
259 currenthdr = hp->nexthdr;
260 ptr += hdrlen;
261 /* ptr is too large */
262 if ( ptr > skb->len )
263 return 0;
264 if (_fragoff) {
265 if (ip6t_ext_hdr(currenthdr))
266 return 0;
267 break;
268 }
269 }
270
271 *protoff = ptr;
272 *fragoff = _fragoff;
273
274 /* currenthdr contains the protocol header */
275
276 dprintf("Packet protocol %hi ?= %s%hi.\n",
277 currenthdr,
278 ip6info->invflags & IP6T_INV_PROTO ? "!":"",
279 ip6info->proto);
280
281 if (ip6info->proto == currenthdr) {
282 if(ip6info->invflags & IP6T_INV_PROTO) {
283 return 0;
284 }
285 return 1;
286 }
287
288 /* We need match for the '-p all', too! */
289 if ((ip6info->proto != 0) &&
290 !(ip6info->invflags & IP6T_INV_PROTO))
291 return 0;
292 }
293 return 1;
294}
295
296/* should be ip6 safe */
297static inline int
298ip6_checkentry(const struct ip6t_ip6 *ipv6)
299{
300 if (ipv6->flags & ~IP6T_F_MASK) {
301 duprintf("Unknown flag bits set: %08X\n",
302 ipv6->flags & ~IP6T_F_MASK);
303 return 0;
304 }
305 if (ipv6->invflags & ~IP6T_INV_MASK) {
306 duprintf("Unknown invflag bits set: %08X\n",
307 ipv6->invflags & ~IP6T_INV_MASK);
308 return 0;
309 }
310 return 1;
311}
312
313static unsigned int
314ip6t_error(struct sk_buff **pskb,
315 const struct net_device *in,
316 const struct net_device *out,
317 unsigned int hooknum,
318 const void *targinfo,
319 void *userinfo)
320{
321 if (net_ratelimit())
322 printk("ip6_tables: error: `%s'\n", (char *)targinfo);
323
324 return NF_DROP;
325}
326
327static inline
328int do_match(struct ip6t_entry_match *m,
329 const struct sk_buff *skb,
330 const struct net_device *in,
331 const struct net_device *out,
332 int offset,
333 unsigned int protoff,
334 int *hotdrop)
335{
336 /* Stop iteration if it doesn't match */
337 if (!m->u.kernel.match->match(skb, in, out, m->data,
338 offset, protoff, hotdrop))
339 return 1;
340 else
341 return 0;
342}
343
344static inline struct ip6t_entry *
345get_entry(void *base, unsigned int offset)
346{
347 return (struct ip6t_entry *)(base + offset);
348}
349
350/* Returns one of the generic firewall policies, like NF_ACCEPT. */
351unsigned int
352ip6t_do_table(struct sk_buff **pskb,
353 unsigned int hook,
354 const struct net_device *in,
355 const struct net_device *out,
356 struct ip6t_table *table,
357 void *userdata)
358{
359 static const char nulldevname[IFNAMSIZ];
360 int offset = 0;
361 unsigned int protoff = 0;
362 int hotdrop = 0;
363 /* Initializing verdict to NF_DROP keeps gcc happy. */
364 unsigned int verdict = NF_DROP;
365 const char *indev, *outdev;
366 void *table_base;
367 struct ip6t_entry *e, *back;
368
369 /* Initialization */
370 indev = in ? in->name : nulldevname;
371 outdev = out ? out->name : nulldevname;
372
373 /* We handle fragments by dealing with the first fragment as
374 * if it was a normal packet. All other fragments are treated
375 * normally, except that they will NEVER match rules that ask
376 * things we don't know, ie. tcp syn flag or ports). If the
377 * rule is also a fragment-specific rule, non-fragments won't
378 * match it. */
379
380 read_lock_bh(&table->lock);
381 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
382 table_base = (void *)table->private->entries
383 + TABLE_OFFSET(table->private, smp_processor_id());
384 e = get_entry(table_base, table->private->hook_entry[hook]);
385
386#ifdef CONFIG_NETFILTER_DEBUG
387 /* Check noone else using our table */
388 if (((struct ip6t_entry *)table_base)->comefrom != 0xdead57ac
389 && ((struct ip6t_entry *)table_base)->comefrom != 0xeeeeeeec) {
390 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
391 smp_processor_id(),
392 table->name,
393 &((struct ip6t_entry *)table_base)->comefrom,
394 ((struct ip6t_entry *)table_base)->comefrom);
395 }
396 ((struct ip6t_entry *)table_base)->comefrom = 0x57acc001;
397#endif
398
399 /* For return from builtin chain */
400 back = get_entry(table_base, table->private->underflow[hook]);
401
402 do {
403 IP_NF_ASSERT(e);
404 IP_NF_ASSERT(back);
405 (*pskb)->nfcache |= e->nfcache;
406 if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
407 &protoff, &offset)) {
408 struct ip6t_entry_target *t;
409
410 if (IP6T_MATCH_ITERATE(e, do_match,
411 *pskb, in, out,
412 offset, protoff, &hotdrop) != 0)
413 goto no_match;
414
415 ADD_COUNTER(e->counters,
416 ntohs((*pskb)->nh.ipv6h->payload_len)
417 + IPV6_HDR_LEN,
418 1);
419
420 t = ip6t_get_target(e);
421 IP_NF_ASSERT(t->u.kernel.target);
422 /* Standard target? */
423 if (!t->u.kernel.target->target) {
424 int v;
425
426 v = ((struct ip6t_standard_target *)t)->verdict;
427 if (v < 0) {
428 /* Pop from stack? */
429 if (v != IP6T_RETURN) {
430 verdict = (unsigned)(-v) - 1;
431 break;
432 }
433 e = back;
434 back = get_entry(table_base,
435 back->comefrom);
436 continue;
437 }
438 if (table_base + v
439 != (void *)e + e->next_offset) {
440 /* Save old back ptr in next entry */
441 struct ip6t_entry *next
442 = (void *)e + e->next_offset;
443 next->comefrom
444 = (void *)back - table_base;
445 /* set back pointer to next entry */
446 back = next;
447 }
448
449 e = get_entry(table_base, v);
450 } else {
451 /* Targets which reenter must return
452 abs. verdicts */
453#ifdef CONFIG_NETFILTER_DEBUG
454 ((struct ip6t_entry *)table_base)->comefrom
455 = 0xeeeeeeec;
456#endif
457 verdict = t->u.kernel.target->target(pskb,
458 in, out,
459 hook,
460 t->data,
461 userdata);
462
463#ifdef CONFIG_NETFILTER_DEBUG
464 if (((struct ip6t_entry *)table_base)->comefrom
465 != 0xeeeeeeec
466 && verdict == IP6T_CONTINUE) {
467 printk("Target %s reentered!\n",
468 t->u.kernel.target->name);
469 verdict = NF_DROP;
470 }
471 ((struct ip6t_entry *)table_base)->comefrom
472 = 0x57acc001;
473#endif
474 if (verdict == IP6T_CONTINUE)
475 e = (void *)e + e->next_offset;
476 else
477 /* Verdict */
478 break;
479 }
480 } else {
481
482 no_match:
483 e = (void *)e + e->next_offset;
484 }
485 } while (!hotdrop);
486
487#ifdef CONFIG_NETFILTER_DEBUG
488 ((struct ip6t_entry *)table_base)->comefrom = 0xdead57ac;
489#endif
490 read_unlock_bh(&table->lock);
491
492#ifdef DEBUG_ALLOW_ALL
493 return NF_ACCEPT;
494#else
495 if (hotdrop)
496 return NF_DROP;
497 else return verdict;
498#endif
499}
500
501/* If it succeeds, returns element and locks mutex */
502static inline void *
503find_inlist_lock_noload(struct list_head *head,
504 const char *name,
505 int *error,
506 struct semaphore *mutex)
507{
508 void *ret;
509
510#if 1
511 duprintf("find_inlist: searching for `%s' in %s.\n",
512 name, head == &ip6t_target ? "ip6t_target"
513 : head == &ip6t_match ? "ip6t_match"
514 : head == &ip6t_tables ? "ip6t_tables" : "UNKNOWN");
515#endif
516
517 *error = down_interruptible(mutex);
518 if (*error != 0)
519 return NULL;
520
521 ret = list_named_find(head, name);
522 if (!ret) {
523 *error = -ENOENT;
524 up(mutex);
525 }
526 return ret;
527}
528
529#ifndef CONFIG_KMOD
530#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
531#else
532static void *
533find_inlist_lock(struct list_head *head,
534 const char *name,
535 const char *prefix,
536 int *error,
537 struct semaphore *mutex)
538{
539 void *ret;
540
541 ret = find_inlist_lock_noload(head, name, error, mutex);
542 if (!ret) {
543 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
544 request_module("%s%s", prefix, name);
545 ret = find_inlist_lock_noload(head, name, error, mutex);
546 }
547
548 return ret;
549}
550#endif
551
552static inline struct ip6t_table *
553ip6t_find_table_lock(const char *name, int *error, struct semaphore *mutex)
554{
555 return find_inlist_lock(&ip6t_tables, name, "ip6table_", error, mutex);
556}
557
558static inline struct ip6t_match *
559find_match_lock(const char *name, int *error, struct semaphore *mutex)
560{
561 return find_inlist_lock(&ip6t_match, name, "ip6t_", error, mutex);
562}
563
564static struct ip6t_target *
565ip6t_find_target_lock(const char *name, int *error, struct semaphore *mutex)
566{
567 return find_inlist_lock(&ip6t_target, name, "ip6t_", error, mutex);
568}
569
570/* All zeroes == unconditional rule. */
571static inline int
572unconditional(const struct ip6t_ip6 *ipv6)
573{
574 unsigned int i;
575
576 for (i = 0; i < sizeof(*ipv6); i++)
577 if (((char *)ipv6)[i])
578 break;
579
580 return (i == sizeof(*ipv6));
581}
582
583/* Figures out from what hook each rule can be called: returns 0 if
584 there are loops. Puts hook bitmask in comefrom. */
585static int
586mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
587{
588 unsigned int hook;
589
590 /* No recursion; use packet counter to save back ptrs (reset
591 to 0 as we leave), and comefrom to save source hook bitmask */
592 for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
593 unsigned int pos = newinfo->hook_entry[hook];
594 struct ip6t_entry *e
595 = (struct ip6t_entry *)(newinfo->entries + pos);
596
597 if (!(valid_hooks & (1 << hook)))
598 continue;
599
600 /* Set initial back pointer. */
601 e->counters.pcnt = pos;
602
603 for (;;) {
604 struct ip6t_standard_target *t
605 = (void *)ip6t_get_target(e);
606
607 if (e->comefrom & (1 << NF_IP6_NUMHOOKS)) {
608 printk("iptables: loop hook %u pos %u %08X.\n",
609 hook, pos, e->comefrom);
610 return 0;
611 }
612 e->comefrom
613 |= ((1 << hook) | (1 << NF_IP6_NUMHOOKS));
614
615 /* Unconditional return/END. */
616 if (e->target_offset == sizeof(struct ip6t_entry)
617 && (strcmp(t->target.u.user.name,
618 IP6T_STANDARD_TARGET) == 0)
619 && t->verdict < 0
620 && unconditional(&e->ipv6)) {
621 unsigned int oldpos, size;
622
623 /* Return: backtrack through the last
624 big jump. */
625 do {
626 e->comefrom ^= (1<<NF_IP6_NUMHOOKS);
627#ifdef DEBUG_IP_FIREWALL_USER
628 if (e->comefrom
629 & (1 << NF_IP6_NUMHOOKS)) {
630 duprintf("Back unset "
631 "on hook %u "
632 "rule %u\n",
633 hook, pos);
634 }
635#endif
636 oldpos = pos;
637 pos = e->counters.pcnt;
638 e->counters.pcnt = 0;
639
640 /* We're at the start. */
641 if (pos == oldpos)
642 goto next;
643
644 e = (struct ip6t_entry *)
645 (newinfo->entries + pos);
646 } while (oldpos == pos + e->next_offset);
647
648 /* Move along one */
649 size = e->next_offset;
650 e = (struct ip6t_entry *)
651 (newinfo->entries + pos + size);
652 e->counters.pcnt = pos;
653 pos += size;
654 } else {
655 int newpos = t->verdict;
656
657 if (strcmp(t->target.u.user.name,
658 IP6T_STANDARD_TARGET) == 0
659 && newpos >= 0) {
660 /* This a jump; chase it. */
661 duprintf("Jump rule %u -> %u\n",
662 pos, newpos);
663 } else {
664 /* ... this is a fallthru */
665 newpos = pos + e->next_offset;
666 }
667 e = (struct ip6t_entry *)
668 (newinfo->entries + newpos);
669 e->counters.pcnt = pos;
670 pos = newpos;
671 }
672 }
673 next:
674 duprintf("Finished chain %u\n", hook);
675 }
676 return 1;
677}
678
679static inline int
680cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
681{
682 if (i && (*i)-- == 0)
683 return 1;
684
685 if (m->u.kernel.match->destroy)
686 m->u.kernel.match->destroy(m->data,
687 m->u.match_size - sizeof(*m));
688 module_put(m->u.kernel.match->me);
689 return 0;
690}
691
692static inline int
693standard_check(const struct ip6t_entry_target *t,
694 unsigned int max_offset)
695{
696 struct ip6t_standard_target *targ = (void *)t;
697
698 /* Check standard info. */
699 if (t->u.target_size
700 != IP6T_ALIGN(sizeof(struct ip6t_standard_target))) {
701 duprintf("standard_check: target size %u != %u\n",
702 t->u.target_size,
703 IP6T_ALIGN(sizeof(struct ip6t_standard_target)));
704 return 0;
705 }
706
707 if (targ->verdict >= 0
708 && targ->verdict > max_offset - sizeof(struct ip6t_entry)) {
709 duprintf("ip6t_standard_check: bad verdict (%i)\n",
710 targ->verdict);
711 return 0;
712 }
713
714 if (targ->verdict < -NF_MAX_VERDICT - 1) {
715 duprintf("ip6t_standard_check: bad negative verdict (%i)\n",
716 targ->verdict);
717 return 0;
718 }
719 return 1;
720}
721
722static inline int
723check_match(struct ip6t_entry_match *m,
724 const char *name,
725 const struct ip6t_ip6 *ipv6,
726 unsigned int hookmask,
727 unsigned int *i)
728{
729 int ret;
730 struct ip6t_match *match;
731
732 match = find_match_lock(m->u.user.name, &ret, &ip6t_mutex);
733 if (!match) {
734 // duprintf("check_match: `%s' not found\n", m->u.name);
735 return ret;
736 }
737 if (!try_module_get(match->me)) {
738 up(&ip6t_mutex);
739 return -ENOENT;
740 }
741 m->u.kernel.match = match;
742 up(&ip6t_mutex);
743
744 if (m->u.kernel.match->checkentry
745 && !m->u.kernel.match->checkentry(name, ipv6, m->data,
746 m->u.match_size - sizeof(*m),
747 hookmask)) {
748 module_put(m->u.kernel.match->me);
749 duprintf("ip_tables: check failed for `%s'.\n",
750 m->u.kernel.match->name);
751 return -EINVAL;
752 }
753
754 (*i)++;
755 return 0;
756}
757
758static struct ip6t_target ip6t_standard_target;
759
760static inline int
761check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
762 unsigned int *i)
763{
764 struct ip6t_entry_target *t;
765 struct ip6t_target *target;
766 int ret;
767 unsigned int j;
768
769 if (!ip6_checkentry(&e->ipv6)) {
770 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
771 return -EINVAL;
772 }
773
774 j = 0;
775 ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j);
776 if (ret != 0)
777 goto cleanup_matches;
778
779 t = ip6t_get_target(e);
780 target = ip6t_find_target_lock(t->u.user.name, &ret, &ip6t_mutex);
781 if (!target) {
782 duprintf("check_entry: `%s' not found\n", t->u.user.name);
783 goto cleanup_matches;
784 }
785 if (!try_module_get(target->me)) {
786 up(&ip6t_mutex);
787 ret = -ENOENT;
788 goto cleanup_matches;
789 }
790 t->u.kernel.target = target;
791 up(&ip6t_mutex);
792 if (!t->u.kernel.target) {
793 ret = -EBUSY;
794 goto cleanup_matches;
795 }
796 if (t->u.kernel.target == &ip6t_standard_target) {
797 if (!standard_check(t, size)) {
798 ret = -EINVAL;
799 goto cleanup_matches;
800 }
801 } else if (t->u.kernel.target->checkentry
802 && !t->u.kernel.target->checkentry(name, e, t->data,
803 t->u.target_size
804 - sizeof(*t),
805 e->comefrom)) {
806 module_put(t->u.kernel.target->me);
807 duprintf("ip_tables: check failed for `%s'.\n",
808 t->u.kernel.target->name);
809 ret = -EINVAL;
810 goto cleanup_matches;
811 }
812
813 (*i)++;
814 return 0;
815
816 cleanup_matches:
817 IP6T_MATCH_ITERATE(e, cleanup_match, &j);
818 return ret;
819}
820
821static inline int
822check_entry_size_and_hooks(struct ip6t_entry *e,
823 struct ip6t_table_info *newinfo,
824 unsigned char *base,
825 unsigned char *limit,
826 const unsigned int *hook_entries,
827 const unsigned int *underflows,
828 unsigned int *i)
829{
830 unsigned int h;
831
832 if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0
833 || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
834 duprintf("Bad offset %p\n", e);
835 return -EINVAL;
836 }
837
838 if (e->next_offset
839 < sizeof(struct ip6t_entry) + sizeof(struct ip6t_entry_target)) {
840 duprintf("checking: element %p size %u\n",
841 e, e->next_offset);
842 return -EINVAL;
843 }
844
845 /* Check hooks & underflows */
846 for (h = 0; h < NF_IP6_NUMHOOKS; h++) {
847 if ((unsigned char *)e - base == hook_entries[h])
848 newinfo->hook_entry[h] = hook_entries[h];
849 if ((unsigned char *)e - base == underflows[h])
850 newinfo->underflow[h] = underflows[h];
851 }
852
853 /* FIXME: underflows must be unconditional, standard verdicts
854 < 0 (not IP6T_RETURN). --RR */
855
856 /* Clear counters and comefrom */
857 e->counters = ((struct ip6t_counters) { 0, 0 });
858 e->comefrom = 0;
859
860 (*i)++;
861 return 0;
862}
863
864static inline int
865cleanup_entry(struct ip6t_entry *e, unsigned int *i)
866{
867 struct ip6t_entry_target *t;
868
869 if (i && (*i)-- == 0)
870 return 1;
871
872 /* Cleanup all matches */
873 IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
874 t = ip6t_get_target(e);
875 if (t->u.kernel.target->destroy)
876 t->u.kernel.target->destroy(t->data,
877 t->u.target_size - sizeof(*t));
878 module_put(t->u.kernel.target->me);
879 return 0;
880}
881
882/* Checks and translates the user-supplied table segment (held in
883 newinfo) */
884static int
885translate_table(const char *name,
886 unsigned int valid_hooks,
887 struct ip6t_table_info *newinfo,
888 unsigned int size,
889 unsigned int number,
890 const unsigned int *hook_entries,
891 const unsigned int *underflows)
892{
893 unsigned int i;
894 int ret;
895
896 newinfo->size = size;
897 newinfo->number = number;
898
899 /* Init all hooks to impossible value. */
900 for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
901 newinfo->hook_entry[i] = 0xFFFFFFFF;
902 newinfo->underflow[i] = 0xFFFFFFFF;
903 }
904
905 duprintf("translate_table: size %u\n", newinfo->size);
906 i = 0;
907 /* Walk through entries, checking offsets. */
908 ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
909 check_entry_size_and_hooks,
910 newinfo,
911 newinfo->entries,
912 newinfo->entries + size,
913 hook_entries, underflows, &i);
914 if (ret != 0)
915 return ret;
916
917 if (i != number) {
918 duprintf("translate_table: %u not %u entries\n",
919 i, number);
920 return -EINVAL;
921 }
922
923 /* Check hooks all assigned */
924 for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
925 /* Only hooks which are valid */
926 if (!(valid_hooks & (1 << i)))
927 continue;
928 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
929 duprintf("Invalid hook entry %u %u\n",
930 i, hook_entries[i]);
931 return -EINVAL;
932 }
933 if (newinfo->underflow[i] == 0xFFFFFFFF) {
934 duprintf("Invalid underflow %u %u\n",
935 i, underflows[i]);
936 return -EINVAL;
937 }
938 }
939
940 if (!mark_source_chains(newinfo, valid_hooks))
941 return -ELOOP;
942
943 /* Finally, each sanity check must pass */
944 i = 0;
945 ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
946 check_entry, name, size, &i);
947
948 if (ret != 0) {
949 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
950 cleanup_entry, &i);
951 return ret;
952 }
953
954 /* And one copy for every other CPU */
955 for (i = 1; i < num_possible_cpus(); i++) {
956 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
957 newinfo->entries,
958 SMP_ALIGN(newinfo->size));
959 }
960
961 return ret;
962}
963
964static struct ip6t_table_info *
965replace_table(struct ip6t_table *table,
966 unsigned int num_counters,
967 struct ip6t_table_info *newinfo,
968 int *error)
969{
970 struct ip6t_table_info *oldinfo;
971
972#ifdef CONFIG_NETFILTER_DEBUG
973 {
974 struct ip6t_entry *table_base;
975 unsigned int i;
976
977 for (i = 0; i < num_possible_cpus(); i++) {
978 table_base =
979 (void *)newinfo->entries
980 + TABLE_OFFSET(newinfo, i);
981
982 table_base->comefrom = 0xdead57ac;
983 }
984 }
985#endif
986
987 /* Do the substitution. */
988 write_lock_bh(&table->lock);
989 /* Check inside lock: is the old number correct? */
990 if (num_counters != table->private->number) {
991 duprintf("num_counters != table->private->number (%u/%u)\n",
992 num_counters, table->private->number);
993 write_unlock_bh(&table->lock);
994 *error = -EAGAIN;
995 return NULL;
996 }
997 oldinfo = table->private;
998 table->private = newinfo;
999 newinfo->initial_entries = oldinfo->initial_entries;
1000 write_unlock_bh(&table->lock);
1001
1002 return oldinfo;
1003}
1004
1005/* Gets counters. */
1006static inline int
1007add_entry_to_counter(const struct ip6t_entry *e,
1008 struct ip6t_counters total[],
1009 unsigned int *i)
1010{
1011 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
1012
1013 (*i)++;
1014 return 0;
1015}
1016
1017static void
1018get_counters(const struct ip6t_table_info *t,
1019 struct ip6t_counters counters[])
1020{
1021 unsigned int cpu;
1022 unsigned int i;
1023
1024 for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
1025 i = 0;
1026 IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
1027 t->size,
1028 add_entry_to_counter,
1029 counters,
1030 &i);
1031 }
1032}
1033
1034static int
1035copy_entries_to_user(unsigned int total_size,
1036 struct ip6t_table *table,
1037 void __user *userptr)
1038{
1039 unsigned int off, num, countersize;
1040 struct ip6t_entry *e;
1041 struct ip6t_counters *counters;
1042 int ret = 0;
1043
1044 /* We need atomic snapshot of counters: rest doesn't change
1045 (other than comefrom, which userspace doesn't care
1046 about). */
1047 countersize = sizeof(struct ip6t_counters) * table->private->number;
1048 counters = vmalloc(countersize);
1049
1050 if (counters == NULL)
1051 return -ENOMEM;
1052
1053 /* First, sum counters... */
1054 memset(counters, 0, countersize);
1055 write_lock_bh(&table->lock);
1056 get_counters(table->private, counters);
1057 write_unlock_bh(&table->lock);
1058
1059 /* ... then copy entire thing from CPU 0... */
1060 if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
1061 ret = -EFAULT;
1062 goto free_counters;
1063 }
1064
1065 /* FIXME: use iterator macros --RR */
1066 /* ... then go back and fix counters and names */
1067 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1068 unsigned int i;
1069 struct ip6t_entry_match *m;
1070 struct ip6t_entry_target *t;
1071
1072 e = (struct ip6t_entry *)(table->private->entries + off);
1073 if (copy_to_user(userptr + off
1074 + offsetof(struct ip6t_entry, counters),
1075 &counters[num],
1076 sizeof(counters[num])) != 0) {
1077 ret = -EFAULT;
1078 goto free_counters;
1079 }
1080
1081 for (i = sizeof(struct ip6t_entry);
1082 i < e->target_offset;
1083 i += m->u.match_size) {
1084 m = (void *)e + i;
1085
1086 if (copy_to_user(userptr + off + i
1087 + offsetof(struct ip6t_entry_match,
1088 u.user.name),
1089 m->u.kernel.match->name,
1090 strlen(m->u.kernel.match->name)+1)
1091 != 0) {
1092 ret = -EFAULT;
1093 goto free_counters;
1094 }
1095 }
1096
1097 t = ip6t_get_target(e);
1098 if (copy_to_user(userptr + off + e->target_offset
1099 + offsetof(struct ip6t_entry_target,
1100 u.user.name),
1101 t->u.kernel.target->name,
1102 strlen(t->u.kernel.target->name)+1) != 0) {
1103 ret = -EFAULT;
1104 goto free_counters;
1105 }
1106 }
1107
1108 free_counters:
1109 vfree(counters);
1110 return ret;
1111}
1112
1113static int
1114get_entries(const struct ip6t_get_entries *entries,
1115 struct ip6t_get_entries __user *uptr)
1116{
1117 int ret;
1118 struct ip6t_table *t;
1119
1120 t = ip6t_find_table_lock(entries->name, &ret, &ip6t_mutex);
1121 if (t) {
1122 duprintf("t->private->number = %u\n",
1123 t->private->number);
1124 if (entries->size == t->private->size)
1125 ret = copy_entries_to_user(t->private->size,
1126 t, uptr->entrytable);
1127 else {
1128 duprintf("get_entries: I've got %u not %u!\n",
1129 t->private->size,
1130 entries->size);
1131 ret = -EINVAL;
1132 }
1133 up(&ip6t_mutex);
1134 } else
1135 duprintf("get_entries: Can't find %s!\n",
1136 entries->name);
1137
1138 return ret;
1139}
1140
1141static int
1142do_replace(void __user *user, unsigned int len)
1143{
1144 int ret;
1145 struct ip6t_replace tmp;
1146 struct ip6t_table *t;
1147 struct ip6t_table_info *newinfo, *oldinfo;
1148 struct ip6t_counters *counters;
1149
1150 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1151 return -EFAULT;
1152
1153 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1154 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1155 return -ENOMEM;
1156
1157 newinfo = vmalloc(sizeof(struct ip6t_table_info)
1158 + SMP_ALIGN(tmp.size) * num_possible_cpus());
1159 if (!newinfo)
1160 return -ENOMEM;
1161
1162 if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1163 tmp.size) != 0) {
1164 ret = -EFAULT;
1165 goto free_newinfo;
1166 }
1167
1168 counters = vmalloc(tmp.num_counters * sizeof(struct ip6t_counters));
1169 if (!counters) {
1170 ret = -ENOMEM;
1171 goto free_newinfo;
1172 }
1173 memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
1174
1175 ret = translate_table(tmp.name, tmp.valid_hooks,
1176 newinfo, tmp.size, tmp.num_entries,
1177 tmp.hook_entry, tmp.underflow);
1178 if (ret != 0)
1179 goto free_newinfo_counters;
1180
1181 duprintf("ip_tables: Translated table\n");
1182
1183 t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex);
1184 if (!t)
1185 goto free_newinfo_counters_untrans;
1186
1187 /* You lied! */
1188 if (tmp.valid_hooks != t->valid_hooks) {
1189 duprintf("Valid hook crap: %08X vs %08X\n",
1190 tmp.valid_hooks, t->valid_hooks);
1191 ret = -EINVAL;
1192 goto free_newinfo_counters_untrans_unlock;
1193 }
1194
1195 /* Get a reference in advance, we're not allowed fail later */
1196 if (!try_module_get(t->me)) {
1197 ret = -EBUSY;
1198 goto free_newinfo_counters_untrans_unlock;
1199 }
1200
1201 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1202 if (!oldinfo)
1203 goto put_module;
1204
1205 /* Update module usage count based on number of rules */
1206 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1207 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1208 if ((oldinfo->number > oldinfo->initial_entries) ||
1209 (newinfo->number <= oldinfo->initial_entries))
1210 module_put(t->me);
1211 if ((oldinfo->number > oldinfo->initial_entries) &&
1212 (newinfo->number <= oldinfo->initial_entries))
1213 module_put(t->me);
1214
1215 /* Get the old counters. */
1216 get_counters(oldinfo, counters);
1217 /* Decrease module usage counts and free resource */
1218 IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1219 vfree(oldinfo);
1220 /* Silent error: too late now. */
1221 if (copy_to_user(tmp.counters, counters,
1222 sizeof(struct ip6t_counters) * tmp.num_counters) != 0)
1223 ret = -EFAULT;
1224 vfree(counters);
1225 up(&ip6t_mutex);
1226 return ret;
1227
1228 put_module:
1229 module_put(t->me);
1230 free_newinfo_counters_untrans_unlock:
1231 up(&ip6t_mutex);
1232 free_newinfo_counters_untrans:
1233 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1234 free_newinfo_counters:
1235 vfree(counters);
1236 free_newinfo:
1237 vfree(newinfo);
1238 return ret;
1239}
1240
1241/* We're lazy, and add to the first CPU; overflow works its fey magic
1242 * and everything is OK. */
1243static inline int
1244add_counter_to_entry(struct ip6t_entry *e,
1245 const struct ip6t_counters addme[],
1246 unsigned int *i)
1247{
1248#if 0
1249 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1250 *i,
1251 (long unsigned int)e->counters.pcnt,
1252 (long unsigned int)e->counters.bcnt,
1253 (long unsigned int)addme[*i].pcnt,
1254 (long unsigned int)addme[*i].bcnt);
1255#endif
1256
1257 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1258
1259 (*i)++;
1260 return 0;
1261}
1262
1263static int
1264do_add_counters(void __user *user, unsigned int len)
1265{
1266 unsigned int i;
1267 struct ip6t_counters_info tmp, *paddc;
1268 struct ip6t_table *t;
1269 int ret;
1270
1271 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1272 return -EFAULT;
1273
1274 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ip6t_counters))
1275 return -EINVAL;
1276
1277 paddc = vmalloc(len);
1278 if (!paddc)
1279 return -ENOMEM;
1280
1281 if (copy_from_user(paddc, user, len) != 0) {
1282 ret = -EFAULT;
1283 goto free;
1284 }
1285
1286 t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex);
1287 if (!t)
1288 goto free;
1289
1290 write_lock_bh(&t->lock);
1291 if (t->private->number != paddc->num_counters) {
1292 ret = -EINVAL;
1293 goto unlock_up_free;
1294 }
1295
1296 i = 0;
1297 IP6T_ENTRY_ITERATE(t->private->entries,
1298 t->private->size,
1299 add_counter_to_entry,
1300 paddc->counters,
1301 &i);
1302 unlock_up_free:
1303 write_unlock_bh(&t->lock);
1304 up(&ip6t_mutex);
1305 free:
1306 vfree(paddc);
1307
1308 return ret;
1309}
1310
1311static int
1312do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1313{
1314 int ret;
1315
1316 if (!capable(CAP_NET_ADMIN))
1317 return -EPERM;
1318
1319 switch (cmd) {
1320 case IP6T_SO_SET_REPLACE:
1321 ret = do_replace(user, len);
1322 break;
1323
1324 case IP6T_SO_SET_ADD_COUNTERS:
1325 ret = do_add_counters(user, len);
1326 break;
1327
1328 default:
1329 duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
1330 ret = -EINVAL;
1331 }
1332
1333 return ret;
1334}
1335
1336static int
1337do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1338{
1339 int ret;
1340
1341 if (!capable(CAP_NET_ADMIN))
1342 return -EPERM;
1343
1344 switch (cmd) {
1345 case IP6T_SO_GET_INFO: {
1346 char name[IP6T_TABLE_MAXNAMELEN];
1347 struct ip6t_table *t;
1348
1349 if (*len != sizeof(struct ip6t_getinfo)) {
1350 duprintf("length %u != %u\n", *len,
1351 sizeof(struct ip6t_getinfo));
1352 ret = -EINVAL;
1353 break;
1354 }
1355
1356 if (copy_from_user(name, user, sizeof(name)) != 0) {
1357 ret = -EFAULT;
1358 break;
1359 }
1360 name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
1361 t = ip6t_find_table_lock(name, &ret, &ip6t_mutex);
1362 if (t) {
1363 struct ip6t_getinfo info;
1364
1365 info.valid_hooks = t->valid_hooks;
1366 memcpy(info.hook_entry, t->private->hook_entry,
1367 sizeof(info.hook_entry));
1368 memcpy(info.underflow, t->private->underflow,
1369 sizeof(info.underflow));
1370 info.num_entries = t->private->number;
1371 info.size = t->private->size;
1372 memcpy(info.name, name, sizeof(info.name));
1373
1374 if (copy_to_user(user, &info, *len) != 0)
1375 ret = -EFAULT;
1376 else
1377 ret = 0;
1378
1379 up(&ip6t_mutex);
1380 }
1381 }
1382 break;
1383
1384 case IP6T_SO_GET_ENTRIES: {
1385 struct ip6t_get_entries get;
1386
1387 if (*len < sizeof(get)) {
1388 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1389 ret = -EINVAL;
1390 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1391 ret = -EFAULT;
1392 } else if (*len != sizeof(struct ip6t_get_entries) + get.size) {
1393 duprintf("get_entries: %u != %u\n", *len,
1394 sizeof(struct ip6t_get_entries) + get.size);
1395 ret = -EINVAL;
1396 } else
1397 ret = get_entries(&get, user);
1398 break;
1399 }
1400
1401 default:
1402 duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
1403 ret = -EINVAL;
1404 }
1405
1406 return ret;
1407}
1408
1409/* Registration hooks for targets. */
1410int
1411ip6t_register_target(struct ip6t_target *target)
1412{
1413 int ret;
1414
1415 ret = down_interruptible(&ip6t_mutex);
1416 if (ret != 0)
1417 return ret;
1418
1419 if (!list_named_insert(&ip6t_target, target)) {
1420 duprintf("ip6t_register_target: `%s' already in list!\n",
1421 target->name);
1422 ret = -EINVAL;
1423 }
1424 up(&ip6t_mutex);
1425 return ret;
1426}
1427
1428void
1429ip6t_unregister_target(struct ip6t_target *target)
1430{
1431 down(&ip6t_mutex);
1432 LIST_DELETE(&ip6t_target, target);
1433 up(&ip6t_mutex);
1434}
1435
1436int
1437ip6t_register_match(struct ip6t_match *match)
1438{
1439 int ret;
1440
1441 ret = down_interruptible(&ip6t_mutex);
1442 if (ret != 0)
1443 return ret;
1444
1445 if (!list_named_insert(&ip6t_match, match)) {
1446 duprintf("ip6t_register_match: `%s' already in list!\n",
1447 match->name);
1448 ret = -EINVAL;
1449 }
1450 up(&ip6t_mutex);
1451
1452 return ret;
1453}
1454
1455void
1456ip6t_unregister_match(struct ip6t_match *match)
1457{
1458 down(&ip6t_mutex);
1459 LIST_DELETE(&ip6t_match, match);
1460 up(&ip6t_mutex);
1461}
1462
1463int ip6t_register_table(struct ip6t_table *table,
1464 const struct ip6t_replace *repl)
1465{
1466 int ret;
1467 struct ip6t_table_info *newinfo;
1468 static struct ip6t_table_info bootstrap
1469 = { 0, 0, 0, { 0 }, { 0 }, { } };
1470
1471 newinfo = vmalloc(sizeof(struct ip6t_table_info)
1472 + SMP_ALIGN(repl->size) * num_possible_cpus());
1473 if (!newinfo)
1474 return -ENOMEM;
1475
1476 memcpy(newinfo->entries, repl->entries, repl->size);
1477
1478 ret = translate_table(table->name, table->valid_hooks,
1479 newinfo, repl->size,
1480 repl->num_entries,
1481 repl->hook_entry,
1482 repl->underflow);
1483 if (ret != 0) {
1484 vfree(newinfo);
1485 return ret;
1486 }
1487
1488 ret = down_interruptible(&ip6t_mutex);
1489 if (ret != 0) {
1490 vfree(newinfo);
1491 return ret;
1492 }
1493
1494 /* Don't autoload: we'd eat our tail... */
1495 if (list_named_find(&ip6t_tables, table->name)) {
1496 ret = -EEXIST;
1497 goto free_unlock;
1498 }
1499
1500 /* Simplifies replace_table code. */
1501 table->private = &bootstrap;
1502 if (!replace_table(table, 0, newinfo, &ret))
1503 goto free_unlock;
1504
1505 duprintf("table->private->number = %u\n",
1506 table->private->number);
1507
1508 /* save number of initial entries */
1509 table->private->initial_entries = table->private->number;
1510
1511 rwlock_init(&table->lock);
1512 list_prepend(&ip6t_tables, table);
1513
1514 unlock:
1515 up(&ip6t_mutex);
1516 return ret;
1517
1518 free_unlock:
1519 vfree(newinfo);
1520 goto unlock;
1521}
1522
1523void ip6t_unregister_table(struct ip6t_table *table)
1524{
1525 down(&ip6t_mutex);
1526 LIST_DELETE(&ip6t_tables, table);
1527 up(&ip6t_mutex);
1528
1529 /* Decrease module usage counts and free resources */
1530 IP6T_ENTRY_ITERATE(table->private->entries, table->private->size,
1531 cleanup_entry, NULL);
1532 vfree(table->private);
1533}
1534
1535/* Returns 1 if the port is matched by the range, 0 otherwise */
1536static inline int
1537port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1538{
1539 int ret;
1540
1541 ret = (port >= min && port <= max) ^ invert;
1542 return ret;
1543}
1544
1545static int
1546tcp_find_option(u_int8_t option,
1547 const struct sk_buff *skb,
1548 unsigned int tcpoff,
1549 unsigned int optlen,
1550 int invert,
1551 int *hotdrop)
1552{
1553 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1554 u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1555 unsigned int i;
1556
1557 duprintf("tcp_match: finding option\n");
1558 if (!optlen)
1559 return invert;
1560 /* If we don't have the whole header, drop packet. */
1561 op = skb_header_pointer(skb, tcpoff + sizeof(struct tcphdr), optlen,
1562 _opt);
1563 if (op == NULL) {
1564 *hotdrop = 1;
1565 return 0;
1566 }
1567
1568 for (i = 0; i < optlen; ) {
1569 if (op[i] == option) return !invert;
1570 if (op[i] < 2) i++;
1571 else i += op[i+1]?:1;
1572 }
1573
1574 return invert;
1575}
1576
1577static int
1578tcp_match(const struct sk_buff *skb,
1579 const struct net_device *in,
1580 const struct net_device *out,
1581 const void *matchinfo,
1582 int offset,
1583 unsigned int protoff,
1584 int *hotdrop)
1585{
1586 struct tcphdr _tcph, *th;
1587 const struct ip6t_tcp *tcpinfo = matchinfo;
1588
1589 if (offset) {
1590 /* To quote Alan:
1591
1592 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1593 causes this. Its a cracker trying to break in by doing a
1594 flag overwrite to pass the direction checks.
1595 */
1596 if (offset == 1) {
1597 duprintf("Dropping evil TCP offset=1 frag.\n");
1598 *hotdrop = 1;
1599 }
1600 /* Must not be a fragment. */
1601 return 0;
1602 }
1603
1604#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1605
1606 th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
1607 if (th == NULL) {
1608 /* We've been asked to examine this packet, and we
1609 can't. Hence, no choice but to drop. */
1610 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1611 *hotdrop = 1;
1612 return 0;
1613 }
1614
1615 if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1616 ntohs(th->source),
1617 !!(tcpinfo->invflags & IP6T_TCP_INV_SRCPT)))
1618 return 0;
1619 if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1620 ntohs(th->dest),
1621 !!(tcpinfo->invflags & IP6T_TCP_INV_DSTPT)))
1622 return 0;
1623 if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1624 == tcpinfo->flg_cmp,
1625 IP6T_TCP_INV_FLAGS))
1626 return 0;
1627 if (tcpinfo->option) {
1628 if (th->doff * 4 < sizeof(_tcph)) {
1629 *hotdrop = 1;
1630 return 0;
1631 }
1632 if (!tcp_find_option(tcpinfo->option, skb, protoff,
1633 th->doff*4 - sizeof(*th),
1634 tcpinfo->invflags & IP6T_TCP_INV_OPTION,
1635 hotdrop))
1636 return 0;
1637 }
1638 return 1;
1639}
1640
1641/* Called when user tries to insert an entry of this type. */
1642static int
1643tcp_checkentry(const char *tablename,
1644 const struct ip6t_ip6 *ipv6,
1645 void *matchinfo,
1646 unsigned int matchsize,
1647 unsigned int hook_mask)
1648{
1649 const struct ip6t_tcp *tcpinfo = matchinfo;
1650
1651 /* Must specify proto == TCP, and no unknown invflags */
1652 return ipv6->proto == IPPROTO_TCP
1653 && !(ipv6->invflags & IP6T_INV_PROTO)
1654 && matchsize == IP6T_ALIGN(sizeof(struct ip6t_tcp))
1655 && !(tcpinfo->invflags & ~IP6T_TCP_INV_MASK);
1656}
1657
1658static int
1659udp_match(const struct sk_buff *skb,
1660 const struct net_device *in,
1661 const struct net_device *out,
1662 const void *matchinfo,
1663 int offset,
1664 unsigned int protoff,
1665 int *hotdrop)
1666{
1667 struct udphdr _udph, *uh;
1668 const struct ip6t_udp *udpinfo = matchinfo;
1669
1670 /* Must not be a fragment. */
1671 if (offset)
1672 return 0;
1673
1674 uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph);
1675 if (uh == NULL) {
1676 /* We've been asked to examine this packet, and we
1677 can't. Hence, no choice but to drop. */
1678 duprintf("Dropping evil UDP tinygram.\n");
1679 *hotdrop = 1;
1680 return 0;
1681 }
1682
1683 return port_match(udpinfo->spts[0], udpinfo->spts[1],
1684 ntohs(uh->source),
1685 !!(udpinfo->invflags & IP6T_UDP_INV_SRCPT))
1686 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1687 ntohs(uh->dest),
1688 !!(udpinfo->invflags & IP6T_UDP_INV_DSTPT));
1689}
1690
1691/* Called when user tries to insert an entry of this type. */
1692static int
1693udp_checkentry(const char *tablename,
1694 const struct ip6t_ip6 *ipv6,
1695 void *matchinfo,
1696 unsigned int matchinfosize,
1697 unsigned int hook_mask)
1698{
1699 const struct ip6t_udp *udpinfo = matchinfo;
1700
1701 /* Must specify proto == UDP, and no unknown invflags */
1702 if (ipv6->proto != IPPROTO_UDP || (ipv6->invflags & IP6T_INV_PROTO)) {
1703 duprintf("ip6t_udp: Protocol %u != %u\n", ipv6->proto,
1704 IPPROTO_UDP);
1705 return 0;
1706 }
1707 if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_udp))) {
1708 duprintf("ip6t_udp: matchsize %u != %u\n",
1709 matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_udp)));
1710 return 0;
1711 }
1712 if (udpinfo->invflags & ~IP6T_UDP_INV_MASK) {
1713 duprintf("ip6t_udp: unknown flags %X\n",
1714 udpinfo->invflags);
1715 return 0;
1716 }
1717
1718 return 1;
1719}
1720
1721/* Returns 1 if the type and code is matched by the range, 0 otherwise */
1722static inline int
1723icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1724 u_int8_t type, u_int8_t code,
1725 int invert)
1726{
1727 return (type == test_type && code >= min_code && code <= max_code)
1728 ^ invert;
1729}
1730
1731static int
1732icmp6_match(const struct sk_buff *skb,
1733 const struct net_device *in,
1734 const struct net_device *out,
1735 const void *matchinfo,
1736 int offset,
1737 unsigned int protoff,
1738 int *hotdrop)
1739{
1740 struct icmp6hdr _icmp, *ic;
1741 const struct ip6t_icmp *icmpinfo = matchinfo;
1742
1743 /* Must not be a fragment. */
1744 if (offset)
1745 return 0;
1746
1747 ic = skb_header_pointer(skb, protoff, sizeof(_icmp), &_icmp);
1748 if (ic == NULL) {
1749 /* We've been asked to examine this packet, and we
1750 can't. Hence, no choice but to drop. */
1751 duprintf("Dropping evil ICMP tinygram.\n");
1752 *hotdrop = 1;
1753 return 0;
1754 }
1755
1756 return icmp6_type_code_match(icmpinfo->type,
1757 icmpinfo->code[0],
1758 icmpinfo->code[1],
1759 ic->icmp6_type, ic->icmp6_code,
1760 !!(icmpinfo->invflags&IP6T_ICMP_INV));
1761}
1762
1763/* Called when user tries to insert an entry of this type. */
1764static int
1765icmp6_checkentry(const char *tablename,
1766 const struct ip6t_ip6 *ipv6,
1767 void *matchinfo,
1768 unsigned int matchsize,
1769 unsigned int hook_mask)
1770{
1771 const struct ip6t_icmp *icmpinfo = matchinfo;
1772
1773 /* Must specify proto == ICMP, and no unknown invflags */
1774 return ipv6->proto == IPPROTO_ICMPV6
1775 && !(ipv6->invflags & IP6T_INV_PROTO)
1776 && matchsize == IP6T_ALIGN(sizeof(struct ip6t_icmp))
1777 && !(icmpinfo->invflags & ~IP6T_ICMP_INV);
1778}
1779
1780/* The built-in targets: standard (NULL) and error. */
1781static struct ip6t_target ip6t_standard_target = {
1782 .name = IP6T_STANDARD_TARGET,
1783};
1784
1785static struct ip6t_target ip6t_error_target = {
1786 .name = IP6T_ERROR_TARGET,
1787 .target = ip6t_error,
1788};
1789
1790static struct nf_sockopt_ops ip6t_sockopts = {
1791 .pf = PF_INET6,
1792 .set_optmin = IP6T_BASE_CTL,
1793 .set_optmax = IP6T_SO_SET_MAX+1,
1794 .set = do_ip6t_set_ctl,
1795 .get_optmin = IP6T_BASE_CTL,
1796 .get_optmax = IP6T_SO_GET_MAX+1,
1797 .get = do_ip6t_get_ctl,
1798};
1799
1800static struct ip6t_match tcp_matchstruct = {
1801 .name = "tcp",
1802 .match = &tcp_match,
1803 .checkentry = &tcp_checkentry,
1804};
1805
1806static struct ip6t_match udp_matchstruct = {
1807 .name = "udp",
1808 .match = &udp_match,
1809 .checkentry = &udp_checkentry,
1810};
1811
1812static struct ip6t_match icmp6_matchstruct = {
1813 .name = "icmp6",
1814 .match = &icmp6_match,
1815 .checkentry = &icmp6_checkentry,
1816};
1817
1818#ifdef CONFIG_PROC_FS
1819static inline int print_name(const char *i,
1820 off_t start_offset, char *buffer, int length,
1821 off_t *pos, unsigned int *count)
1822{
1823 if ((*count)++ >= start_offset) {
1824 unsigned int namelen;
1825
1826 namelen = sprintf(buffer + *pos, "%s\n",
1827 i + sizeof(struct list_head));
1828 if (*pos + namelen > length) {
1829 /* Stop iterating */
1830 return 1;
1831 }
1832 *pos += namelen;
1833 }
1834 return 0;
1835}
1836
1837static inline int print_target(const struct ip6t_target *t,
1838 off_t start_offset, char *buffer, int length,
1839 off_t *pos, unsigned int *count)
1840{
1841 if (t == &ip6t_standard_target || t == &ip6t_error_target)
1842 return 0;
1843 return print_name((char *)t, start_offset, buffer, length, pos, count);
1844}
1845
1846static int ip6t_get_tables(char *buffer, char **start, off_t offset, int length)
1847{
1848 off_t pos = 0;
1849 unsigned int count = 0;
1850
1851 if (down_interruptible(&ip6t_mutex) != 0)
1852 return 0;
1853
1854 LIST_FIND(&ip6t_tables, print_name, char *,
1855 offset, buffer, length, &pos, &count);
1856
1857 up(&ip6t_mutex);
1858
1859 /* `start' hack - see fs/proc/generic.c line ~105 */
1860 *start=(char *)((unsigned long)count-offset);
1861 return pos;
1862}
1863
1864static int ip6t_get_targets(char *buffer, char **start, off_t offset, int length)
1865{
1866 off_t pos = 0;
1867 unsigned int count = 0;
1868
1869 if (down_interruptible(&ip6t_mutex) != 0)
1870 return 0;
1871
1872 LIST_FIND(&ip6t_target, print_target, struct ip6t_target *,
1873 offset, buffer, length, &pos, &count);
1874
1875 up(&ip6t_mutex);
1876
1877 *start = (char *)((unsigned long)count - offset);
1878 return pos;
1879}
1880
1881static int ip6t_get_matches(char *buffer, char **start, off_t offset, int length)
1882{
1883 off_t pos = 0;
1884 unsigned int count = 0;
1885
1886 if (down_interruptible(&ip6t_mutex) != 0)
1887 return 0;
1888
1889 LIST_FIND(&ip6t_match, print_name, char *,
1890 offset, buffer, length, &pos, &count);
1891
1892 up(&ip6t_mutex);
1893
1894 *start = (char *)((unsigned long)count - offset);
1895 return pos;
1896}
1897
1898static struct { char *name; get_info_t *get_info; } ip6t_proc_entry[] =
1899{ { "ip6_tables_names", ip6t_get_tables },
1900 { "ip6_tables_targets", ip6t_get_targets },
1901 { "ip6_tables_matches", ip6t_get_matches },
1902 { NULL, NULL} };
1903#endif /*CONFIG_PROC_FS*/
1904
1905static int __init init(void)
1906{
1907 int ret;
1908
1909 /* Noone else will be downing sem now, so we won't sleep */
1910 down(&ip6t_mutex);
1911 list_append(&ip6t_target, &ip6t_standard_target);
1912 list_append(&ip6t_target, &ip6t_error_target);
1913 list_append(&ip6t_match, &tcp_matchstruct);
1914 list_append(&ip6t_match, &udp_matchstruct);
1915 list_append(&ip6t_match, &icmp6_matchstruct);
1916 up(&ip6t_mutex);
1917
1918 /* Register setsockopt */
1919 ret = nf_register_sockopt(&ip6t_sockopts);
1920 if (ret < 0) {
1921 duprintf("Unable to register sockopts.\n");
1922 return ret;
1923 }
1924
1925#ifdef CONFIG_PROC_FS
1926 {
1927 struct proc_dir_entry *proc;
1928 int i;
1929
1930 for (i = 0; ip6t_proc_entry[i].name; i++) {
1931 proc = proc_net_create(ip6t_proc_entry[i].name, 0,
1932 ip6t_proc_entry[i].get_info);
1933 if (!proc) {
1934 while (--i >= 0)
1935 proc_net_remove(ip6t_proc_entry[i].name);
1936 nf_unregister_sockopt(&ip6t_sockopts);
1937 return -ENOMEM;
1938 }
1939 proc->owner = THIS_MODULE;
1940 }
1941 }
1942#endif
1943
1944 printk("ip6_tables: (C) 2000-2002 Netfilter core team\n");
1945 return 0;
1946}
1947
1948static void __exit fini(void)
1949{
1950 nf_unregister_sockopt(&ip6t_sockopts);
1951#ifdef CONFIG_PROC_FS
1952 {
1953 int i;
1954 for (i = 0; ip6t_proc_entry[i].name; i++)
1955 proc_net_remove(ip6t_proc_entry[i].name);
1956 }
1957#endif
1958}
1959
1960EXPORT_SYMBOL(ip6t_register_table);
1961EXPORT_SYMBOL(ip6t_unregister_table);
1962EXPORT_SYMBOL(ip6t_do_table);
1963EXPORT_SYMBOL(ip6t_register_match);
1964EXPORT_SYMBOL(ip6t_unregister_match);
1965EXPORT_SYMBOL(ip6t_register_target);
1966EXPORT_SYMBOL(ip6t_unregister_target);
1967EXPORT_SYMBOL(ip6t_ext_hdr);
1968
1969module_init(init);
1970module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
new file mode 100644
index 000000000000..bfc3d0185d19
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -0,0 +1,509 @@
1/*
2 * This is a module which is used for logging packets.
3 */
4
5/* (C) 2001 Jan Rekorajski <baggins@pld.org.pl>
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/skbuff.h>
16#include <linux/ip.h>
17#include <linux/spinlock.h>
18#include <linux/icmpv6.h>
19#include <net/udp.h>
20#include <net/tcp.h>
21#include <net/ipv6.h>
22#include <linux/netfilter.h>
23#include <linux/netfilter_ipv6/ip6_tables.h>
24
25MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
26MODULE_DESCRIPTION("IP6 tables LOG target module");
27MODULE_LICENSE("GPL");
28
29static unsigned int nflog = 1;
30module_param(nflog, int, 0400);
31MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
32
33struct in_device;
34#include <net/route.h>
35#include <linux/netfilter_ipv6/ip6t_LOG.h>
36
37#if 0
38#define DEBUGP printk
39#else
40#define DEBUGP(format, args...)
41#endif
42
43/* Use lock to serialize, so printks don't overlap */
44static DEFINE_SPINLOCK(log_lock);
45
46/* One level of recursion won't kill us */
47static void dump_packet(const struct ip6t_log_info *info,
48 const struct sk_buff *skb, unsigned int ip6hoff,
49 int recurse)
50{
51 u_int8_t currenthdr;
52 int fragment;
53 struct ipv6hdr _ip6h, *ih;
54 unsigned int ptr;
55 unsigned int hdrlen = 0;
56
57 ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
58 if (ih == NULL) {
59 printk("TRUNCATED");
60 return;
61 }
62
63 /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000" */
64 printk("SRC=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(ih->saddr));
65 printk("DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(ih->daddr));
66
67 /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
68 printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
69 ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
70 (ntohl(*(u_int32_t *)ih) & 0x0ff00000) >> 20,
71 ih->hop_limit,
72 (ntohl(*(u_int32_t *)ih) & 0x000fffff));
73
74 fragment = 0;
75 ptr = ip6hoff + sizeof(struct ipv6hdr);
76 currenthdr = ih->nexthdr;
77 while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
78 struct ipv6_opt_hdr _hdr, *hp;
79
80 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
81 if (hp == NULL) {
82 printk("TRUNCATED");
83 return;
84 }
85
86 /* Max length: 48 "OPT (...) " */
87 if (info->logflags & IP6T_LOG_IPOPT)
88 printk("OPT ( ");
89
90 switch (currenthdr) {
91 case IPPROTO_FRAGMENT: {
92 struct frag_hdr _fhdr, *fh;
93
94 printk("FRAG:");
95 fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
96 &_fhdr);
97 if (fh == NULL) {
98 printk("TRUNCATED ");
99 return;
100 }
101
102 /* Max length: 6 "65535 " */
103 printk("%u ", ntohs(fh->frag_off) & 0xFFF8);
104
105 /* Max length: 11 "INCOMPLETE " */
106 if (fh->frag_off & htons(0x0001))
107 printk("INCOMPLETE ");
108
109 printk("ID:%08x ", ntohl(fh->identification));
110
111 if (ntohs(fh->frag_off) & 0xFFF8)
112 fragment = 1;
113
114 hdrlen = 8;
115
116 break;
117 }
118 case IPPROTO_DSTOPTS:
119 case IPPROTO_ROUTING:
120 case IPPROTO_HOPOPTS:
121 if (fragment) {
122 if (info->logflags & IP6T_LOG_IPOPT)
123 printk(")");
124 return;
125 }
126 hdrlen = ipv6_optlen(hp);
127 break;
128 /* Max Length */
129 case IPPROTO_AH:
130 if (info->logflags & IP6T_LOG_IPOPT) {
131 struct ip_auth_hdr _ahdr, *ah;
132
133 /* Max length: 3 "AH " */
134 printk("AH ");
135
136 if (fragment) {
137 printk(")");
138 return;
139 }
140
141 ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
142 &_ahdr);
143 if (ah == NULL) {
144 /*
145 * Max length: 26 "INCOMPLETE [65535
146 * bytes] )"
147 */
148 printk("INCOMPLETE [%u bytes] )",
149 skb->len - ptr);
150 return;
151 }
152
153 /* Length: 15 "SPI=0xF1234567 */
154 printk("SPI=0x%x ", ntohl(ah->spi));
155
156 }
157
158 hdrlen = (hp->hdrlen+2)<<2;
159 break;
160 case IPPROTO_ESP:
161 if (info->logflags & IP6T_LOG_IPOPT) {
162 struct ip_esp_hdr _esph, *eh;
163
164 /* Max length: 4 "ESP " */
165 printk("ESP ");
166
167 if (fragment) {
168 printk(")");
169 return;
170 }
171
172 /*
173 * Max length: 26 "INCOMPLETE [65535 bytes] )"
174 */
175 eh = skb_header_pointer(skb, ptr, sizeof(_esph),
176 &_esph);
177 if (eh == NULL) {
178 printk("INCOMPLETE [%u bytes] )",
179 skb->len - ptr);
180 return;
181 }
182
183 /* Length: 16 "SPI=0xF1234567 )" */
184 printk("SPI=0x%x )", ntohl(eh->spi) );
185
186 }
187 return;
188 default:
189 /* Max length: 20 "Unknown Ext Hdr 255" */
190 printk("Unknown Ext Hdr %u", currenthdr);
191 return;
192 }
193 if (info->logflags & IP6T_LOG_IPOPT)
194 printk(") ");
195
196 currenthdr = hp->nexthdr;
197 ptr += hdrlen;
198 }
199
200 switch (currenthdr) {
201 case IPPROTO_TCP: {
202 struct tcphdr _tcph, *th;
203
204 /* Max length: 10 "PROTO=TCP " */
205 printk("PROTO=TCP ");
206
207 if (fragment)
208 break;
209
210 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
211 th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph);
212 if (th == NULL) {
213 printk("INCOMPLETE [%u bytes] ", skb->len - ptr);
214 return;
215 }
216
217 /* Max length: 20 "SPT=65535 DPT=65535 " */
218 printk("SPT=%u DPT=%u ",
219 ntohs(th->source), ntohs(th->dest));
220 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
221 if (info->logflags & IP6T_LOG_TCPSEQ)
222 printk("SEQ=%u ACK=%u ",
223 ntohl(th->seq), ntohl(th->ack_seq));
224 /* Max length: 13 "WINDOW=65535 " */
225 printk("WINDOW=%u ", ntohs(th->window));
226 /* Max length: 9 "RES=0x3C " */
227 printk("RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
228 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
229 if (th->cwr)
230 printk("CWR ");
231 if (th->ece)
232 printk("ECE ");
233 if (th->urg)
234 printk("URG ");
235 if (th->ack)
236 printk("ACK ");
237 if (th->psh)
238 printk("PSH ");
239 if (th->rst)
240 printk("RST ");
241 if (th->syn)
242 printk("SYN ");
243 if (th->fin)
244 printk("FIN ");
245 /* Max length: 11 "URGP=65535 " */
246 printk("URGP=%u ", ntohs(th->urg_ptr));
247
248 if ((info->logflags & IP6T_LOG_TCPOPT)
249 && th->doff * 4 > sizeof(struct tcphdr)) {
250 u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
251 unsigned int i;
252 unsigned int optsize = th->doff * 4
253 - sizeof(struct tcphdr);
254
255 op = skb_header_pointer(skb,
256 ptr + sizeof(struct tcphdr),
257 optsize, _opt);
258 if (op == NULL) {
259 printk("OPT (TRUNCATED)");
260 return;
261 }
262
263 /* Max length: 127 "OPT (" 15*4*2chars ") " */
264 printk("OPT (");
265 for (i =0; i < optsize; i++)
266 printk("%02X", op[i]);
267 printk(") ");
268 }
269 break;
270 }
271 case IPPROTO_UDP: {
272 struct udphdr _udph, *uh;
273
274 /* Max length: 10 "PROTO=UDP " */
275 printk("PROTO=UDP ");
276
277 if (fragment)
278 break;
279
280 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
281 uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph);
282 if (uh == NULL) {
283 printk("INCOMPLETE [%u bytes] ", skb->len - ptr);
284 return;
285 }
286
287 /* Max length: 20 "SPT=65535 DPT=65535 " */
288 printk("SPT=%u DPT=%u LEN=%u ",
289 ntohs(uh->source), ntohs(uh->dest),
290 ntohs(uh->len));
291 break;
292 }
293 case IPPROTO_ICMPV6: {
294 struct icmp6hdr _icmp6h, *ic;
295
296 /* Max length: 13 "PROTO=ICMPv6 " */
297 printk("PROTO=ICMPv6 ");
298
299 if (fragment)
300 break;
301
302 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
303 ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
304 if (ic == NULL) {
305 printk("INCOMPLETE [%u bytes] ", skb->len - ptr);
306 return;
307 }
308
309 /* Max length: 18 "TYPE=255 CODE=255 " */
310 printk("TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code);
311
312 switch (ic->icmp6_type) {
313 case ICMPV6_ECHO_REQUEST:
314 case ICMPV6_ECHO_REPLY:
315 /* Max length: 19 "ID=65535 SEQ=65535 " */
316 printk("ID=%u SEQ=%u ",
317 ntohs(ic->icmp6_identifier),
318 ntohs(ic->icmp6_sequence));
319 break;
320 case ICMPV6_MGM_QUERY:
321 case ICMPV6_MGM_REPORT:
322 case ICMPV6_MGM_REDUCTION:
323 break;
324
325 case ICMPV6_PARAMPROB:
326 /* Max length: 17 "POINTER=ffffffff " */
327 printk("POINTER=%08x ", ntohl(ic->icmp6_pointer));
328 /* Fall through */
329 case ICMPV6_DEST_UNREACH:
330 case ICMPV6_PKT_TOOBIG:
331 case ICMPV6_TIME_EXCEED:
332 /* Max length: 3+maxlen */
333 if (recurse) {
334 printk("[");
335 dump_packet(info, skb, ptr + sizeof(_icmp6h),
336 0);
337 printk("] ");
338 }
339
340 /* Max length: 10 "MTU=65535 " */
341 if (ic->icmp6_type == ICMPV6_PKT_TOOBIG)
342 printk("MTU=%u ", ntohl(ic->icmp6_mtu));
343 }
344 break;
345 }
346 /* Max length: 10 "PROTO=255 " */
347 default:
348 printk("PROTO=%u ", currenthdr);
349 }
350
351 /* Max length: 15 "UID=4294967295 " */
352 if ((info->logflags & IP6T_LOG_UID) && recurse && skb->sk) {
353 read_lock_bh(&skb->sk->sk_callback_lock);
354 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
355 printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
356 read_unlock_bh(&skb->sk->sk_callback_lock);
357 }
358}
359
360static void
361ip6t_log_packet(unsigned int hooknum,
362 const struct sk_buff *skb,
363 const struct net_device *in,
364 const struct net_device *out,
365 const struct ip6t_log_info *loginfo,
366 const char *level_string,
367 const char *prefix)
368{
369 struct ipv6hdr *ipv6h = skb->nh.ipv6h;
370
371 spin_lock_bh(&log_lock);
372 printk(level_string);
373 printk("%sIN=%s OUT=%s ",
374 prefix == NULL ? loginfo->prefix : prefix,
375 in ? in->name : "",
376 out ? out->name : "");
377 if (in && !out) {
378 /* MAC logging for input chain only. */
379 printk("MAC=");
380 if (skb->dev && skb->dev->hard_header_len && skb->mac.raw != (void*)ipv6h) {
381 if (skb->dev->type != ARPHRD_SIT){
382 int i;
383 unsigned char *p = skb->mac.raw;
384 for (i = 0; i < skb->dev->hard_header_len; i++,p++)
385 printk("%02x%c", *p,
386 i==skb->dev->hard_header_len - 1
387 ? ' ':':');
388 } else {
389 int i;
390 unsigned char *p = skb->mac.raw;
391 if ( p - (ETH_ALEN*2+2) > skb->head ){
392 p -= (ETH_ALEN+2);
393 for (i = 0; i < (ETH_ALEN); i++,p++)
394 printk("%02x%s", *p,
395 i == ETH_ALEN-1 ? "->" : ":");
396 p -= (ETH_ALEN*2);
397 for (i = 0; i < (ETH_ALEN); i++,p++)
398 printk("%02x%c", *p,
399 i == ETH_ALEN-1 ? ' ' : ':');
400 }
401
402 if ((skb->dev->addr_len == 4) &&
403 skb->dev->hard_header_len > 20){
404 printk("TUNNEL=");
405 p = skb->mac.raw + 12;
406 for (i = 0; i < 4; i++,p++)
407 printk("%3d%s", *p,
408 i == 3 ? "->" : ".");
409 for (i = 0; i < 4; i++,p++)
410 printk("%3d%c", *p,
411 i == 3 ? ' ' : '.');
412 }
413 }
414 } else
415 printk(" ");
416 }
417
418 dump_packet(loginfo, skb, (u8*)skb->nh.ipv6h - skb->data, 1);
419 printk("\n");
420 spin_unlock_bh(&log_lock);
421}
422
423static unsigned int
424ip6t_log_target(struct sk_buff **pskb,
425 const struct net_device *in,
426 const struct net_device *out,
427 unsigned int hooknum,
428 const void *targinfo,
429 void *userinfo)
430{
431 const struct ip6t_log_info *loginfo = targinfo;
432 char level_string[4] = "< >";
433
434 level_string[1] = '0' + (loginfo->level % 8);
435 ip6t_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL);
436
437 return IP6T_CONTINUE;
438}
439
440static void
441ip6t_logfn(unsigned int hooknum,
442 const struct sk_buff *skb,
443 const struct net_device *in,
444 const struct net_device *out,
445 const char *prefix)
446{
447 struct ip6t_log_info loginfo = {
448 .level = 0,
449 .logflags = IP6T_LOG_MASK,
450 .prefix = ""
451 };
452
453 ip6t_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix);
454}
455
456static int ip6t_log_checkentry(const char *tablename,
457 const struct ip6t_entry *e,
458 void *targinfo,
459 unsigned int targinfosize,
460 unsigned int hook_mask)
461{
462 const struct ip6t_log_info *loginfo = targinfo;
463
464 if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_log_info))) {
465 DEBUGP("LOG: targinfosize %u != %u\n",
466 targinfosize, IP6T_ALIGN(sizeof(struct ip6t_log_info)));
467 return 0;
468 }
469
470 if (loginfo->level >= 8) {
471 DEBUGP("LOG: level %u >= 8\n", loginfo->level);
472 return 0;
473 }
474
475 if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
476 DEBUGP("LOG: prefix term %i\n",
477 loginfo->prefix[sizeof(loginfo->prefix)-1]);
478 return 0;
479 }
480
481 return 1;
482}
483
484static struct ip6t_target ip6t_log_reg = {
485 .name = "LOG",
486 .target = ip6t_log_target,
487 .checkentry = ip6t_log_checkentry,
488 .me = THIS_MODULE,
489};
490
491static int __init init(void)
492{
493 if (ip6t_register_target(&ip6t_log_reg))
494 return -EINVAL;
495 if (nflog)
496 nf_log_register(PF_INET6, &ip6t_logfn);
497
498 return 0;
499}
500
501static void __exit fini(void)
502{
503 if (nflog)
504 nf_log_unregister(PF_INET6, &ip6t_logfn);
505 ip6t_unregister_target(&ip6t_log_reg);
506}
507
508module_init(init);
509module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c
new file mode 100644
index 000000000000..d09ceb05013a
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_MARK.c
@@ -0,0 +1,78 @@
1/* This is a module which is used for setting the NFMARK field of an skb. */
2
3/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/ip.h>
13#include <net/checksum.h>
14
15#include <linux/netfilter_ipv6/ip6_tables.h>
16#include <linux/netfilter_ipv6/ip6t_MARK.h>
17
18MODULE_LICENSE("GPL");
19MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
20
21static unsigned int
22target(struct sk_buff **pskb,
23 const struct net_device *in,
24 const struct net_device *out,
25 unsigned int hooknum,
26 const void *targinfo,
27 void *userinfo)
28{
29 const struct ip6t_mark_target_info *markinfo = targinfo;
30
31 if((*pskb)->nfmark != markinfo->mark) {
32 (*pskb)->nfmark = markinfo->mark;
33 (*pskb)->nfcache |= NFC_ALTERED;
34 }
35 return IP6T_CONTINUE;
36}
37
38static int
39checkentry(const char *tablename,
40 const struct ip6t_entry *e,
41 void *targinfo,
42 unsigned int targinfosize,
43 unsigned int hook_mask)
44{
45 if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_mark_target_info))) {
46 printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
47 targinfosize,
48 IP6T_ALIGN(sizeof(struct ip6t_mark_target_info)));
49 return 0;
50 }
51
52 if (strcmp(tablename, "mangle") != 0) {
53 printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
54 return 0;
55 }
56
57 return 1;
58}
59
60static struct ip6t_target ip6t_mark_reg
61= { { NULL, NULL }, "MARK", target, checkentry, NULL, THIS_MODULE };
62
63static int __init init(void)
64{
65 printk(KERN_DEBUG "registering ipv6 mark target\n");
66 if (ip6t_register_target(&ip6t_mark_reg))
67 return -EINVAL;
68
69 return 0;
70}
71
72static void __exit fini(void)
73{
74 ip6t_unregister_target(&ip6t_mark_reg);
75}
76
77module_init(init);
78module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
new file mode 100644
index 000000000000..d5b94f142bba
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -0,0 +1,208 @@
1/* Kernel module to match AH parameters. */
2
3/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/ipv6.h>
13#include <linux/types.h>
14#include <net/checksum.h>
15#include <net/ipv6.h>
16
17#include <linux/netfilter_ipv6/ip6_tables.h>
18#include <linux/netfilter_ipv6/ip6t_ah.h>
19
20MODULE_LICENSE("GPL");
21MODULE_DESCRIPTION("IPv6 AH match");
22MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
23
24#if 0
25#define DEBUGP printk
26#else
27#define DEBUGP(format, args...)
28#endif
29
30/* Returns 1 if the spi is matched by the range, 0 otherwise */
31static inline int
32spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
33{
34 int r=0;
35 DEBUGP("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
36 min,spi,max);
37 r = (spi >= min && spi <= max) ^ invert;
38 DEBUGP(" result %s\n",r? "PASS\n" : "FAILED\n");
39 return r;
40}
41
42static int
43match(const struct sk_buff *skb,
44 const struct net_device *in,
45 const struct net_device *out,
46 const void *matchinfo,
47 int offset,
48 unsigned int protoff,
49 int *hotdrop)
50{
51 struct ip_auth_hdr *ah = NULL, _ah;
52 const struct ip6t_ah *ahinfo = matchinfo;
53 unsigned int temp;
54 int len;
55 u8 nexthdr;
56 unsigned int ptr;
57 unsigned int hdrlen = 0;
58
59 /*DEBUGP("IPv6 AH entered\n");*/
60 /* if (opt->auth == 0) return 0;
61 * It does not filled on output */
62
63 /* type of the 1st exthdr */
64 nexthdr = skb->nh.ipv6h->nexthdr;
65 /* pointer to the 1st exthdr */
66 ptr = sizeof(struct ipv6hdr);
67 /* available length */
68 len = skb->len - ptr;
69 temp = 0;
70
71 while (ip6t_ext_hdr(nexthdr)) {
72 struct ipv6_opt_hdr _hdr, *hp;
73
74 DEBUGP("ipv6_ah header iteration \n");
75
76 /* Is there enough space for the next ext header? */
77 if (len < sizeof(struct ipv6_opt_hdr))
78 return 0;
79 /* No more exthdr -> evaluate */
80 if (nexthdr == NEXTHDR_NONE)
81 break;
82 /* ESP -> evaluate */
83 if (nexthdr == NEXTHDR_ESP)
84 break;
85
86 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
87 BUG_ON(hp == NULL);
88
89 /* Calculate the header length */
90 if (nexthdr == NEXTHDR_FRAGMENT)
91 hdrlen = 8;
92 else if (nexthdr == NEXTHDR_AUTH)
93 hdrlen = (hp->hdrlen+2)<<2;
94 else
95 hdrlen = ipv6_optlen(hp);
96
97 /* AH -> evaluate */
98 if (nexthdr == NEXTHDR_AUTH) {
99 temp |= MASK_AH;
100 break;
101 }
102
103
104 /* set the flag */
105 switch (nexthdr) {
106 case NEXTHDR_HOP:
107 case NEXTHDR_ROUTING:
108 case NEXTHDR_FRAGMENT:
109 case NEXTHDR_AUTH:
110 case NEXTHDR_DEST:
111 break;
112 default:
113 DEBUGP("ipv6_ah match: unknown nextheader %u\n",nexthdr);
114 return 0;
115 }
116
117 nexthdr = hp->nexthdr;
118 len -= hdrlen;
119 ptr += hdrlen;
120 if (ptr > skb->len) {
121 DEBUGP("ipv6_ah: new pointer too large! \n");
122 break;
123 }
124 }
125
126 /* AH header not found */
127 if (temp != MASK_AH)
128 return 0;
129
130 if (len < sizeof(struct ip_auth_hdr)){
131 *hotdrop = 1;
132 return 0;
133 }
134
135 ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
136 BUG_ON(ah == NULL);
137
138 DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen);
139 DEBUGP("RES %04X ", ah->reserved);
140 DEBUGP("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi));
141
142 DEBUGP("IPv6 AH spi %02X ",
143 (spi_match(ahinfo->spis[0], ahinfo->spis[1],
144 ntohl(ah->spi),
145 !!(ahinfo->invflags & IP6T_AH_INV_SPI))));
146 DEBUGP("len %02X %04X %02X ",
147 ahinfo->hdrlen, hdrlen,
148 (!ahinfo->hdrlen ||
149 (ahinfo->hdrlen == hdrlen) ^
150 !!(ahinfo->invflags & IP6T_AH_INV_LEN)));
151 DEBUGP("res %02X %04X %02X\n",
152 ahinfo->hdrres, ah->reserved,
153 !(ahinfo->hdrres && ah->reserved));
154
155 return (ah != NULL)
156 &&
157 (spi_match(ahinfo->spis[0], ahinfo->spis[1],
158 ntohl(ah->spi),
159 !!(ahinfo->invflags & IP6T_AH_INV_SPI)))
160 &&
161 (!ahinfo->hdrlen ||
162 (ahinfo->hdrlen == hdrlen) ^
163 !!(ahinfo->invflags & IP6T_AH_INV_LEN))
164 &&
165 !(ahinfo->hdrres && ah->reserved);
166}
167
168/* Called when user tries to insert an entry of this type. */
169static int
170checkentry(const char *tablename,
171 const struct ip6t_ip6 *ip,
172 void *matchinfo,
173 unsigned int matchinfosize,
174 unsigned int hook_mask)
175{
176 const struct ip6t_ah *ahinfo = matchinfo;
177
178 if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_ah))) {
179 DEBUGP("ip6t_ah: matchsize %u != %u\n",
180 matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_ah)));
181 return 0;
182 }
183 if (ahinfo->invflags & ~IP6T_AH_INV_MASK) {
184 DEBUGP("ip6t_ah: unknown flags %X\n", ahinfo->invflags);
185 return 0;
186 }
187 return 1;
188}
189
190static struct ip6t_match ah_match = {
191 .name = "ah",
192 .match = &match,
193 .checkentry = &checkentry,
194 .me = THIS_MODULE,
195};
196
197static int __init init(void)
198{
199 return ip6t_register_match(&ah_match);
200}
201
202static void __exit cleanup(void)
203{
204 ip6t_unregister_match(&ah_match);
205}
206
207module_init(init);
208module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
new file mode 100644
index 000000000000..540925e4a7a8
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_dst.c
@@ -0,0 +1,298 @@
1/* Kernel module to match Hop-by-Hop and Destination parameters. */
2
3/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/ipv6.h>
13#include <linux/types.h>
14#include <net/checksum.h>
15#include <net/ipv6.h>
16
17#include <asm/byteorder.h>
18
19#include <linux/netfilter_ipv6/ip6_tables.h>
20#include <linux/netfilter_ipv6/ip6t_opts.h>
21
22#define HOPBYHOP 0
23
24MODULE_LICENSE("GPL");
25#if HOPBYHOP
26MODULE_DESCRIPTION("IPv6 HbH match");
27#else
28MODULE_DESCRIPTION("IPv6 DST match");
29#endif
30MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
31
32#if 0
33#define DEBUGP printk
34#else
35#define DEBUGP(format, args...)
36#endif
37
38/*
39 * (Type & 0xC0) >> 6
40 * 0 -> ignorable
41 * 1 -> must drop the packet
42 * 2 -> send ICMP PARM PROB regardless and drop packet
43 * 3 -> Send ICMP if not a multicast address and drop packet
44 * (Type & 0x20) >> 5
45 * 0 -> invariant
46 * 1 -> can change the routing
47 * (Type & 0x1F) Type
48 * 0 -> Pad1 (only 1 byte!)
49 * 1 -> PadN LENGTH info (total length = length + 2)
50 * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k )
51 * 5 -> RTALERT 2 x x
52 */
53
54static int
55match(const struct sk_buff *skb,
56 const struct net_device *in,
57 const struct net_device *out,
58 const void *matchinfo,
59 int offset,
60 unsigned int protoff,
61 int *hotdrop)
62{
63 struct ipv6_opt_hdr _optsh, *oh;
64 const struct ip6t_opts *optinfo = matchinfo;
65 unsigned int temp;
66 unsigned int len;
67 u8 nexthdr;
68 unsigned int ptr;
69 unsigned int hdrlen = 0;
70 unsigned int ret = 0;
71 u8 _opttype, *tp = NULL;
72 u8 _optlen, *lp = NULL;
73 unsigned int optlen;
74
75 /* type of the 1st exthdr */
76 nexthdr = skb->nh.ipv6h->nexthdr;
77 /* pointer to the 1st exthdr */
78 ptr = sizeof(struct ipv6hdr);
79 /* available length */
80 len = skb->len - ptr;
81 temp = 0;
82
83 while (ip6t_ext_hdr(nexthdr)) {
84 struct ipv6_opt_hdr _hdr, *hp;
85
86 DEBUGP("ipv6_opts header iteration \n");
87
88 /* Is there enough space for the next ext header? */
89 if (len < (int)sizeof(struct ipv6_opt_hdr))
90 return 0;
91 /* No more exthdr -> evaluate */
92 if (nexthdr == NEXTHDR_NONE) {
93 break;
94 }
95 /* ESP -> evaluate */
96 if (nexthdr == NEXTHDR_ESP) {
97 break;
98 }
99
100 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
101 BUG_ON(hp == NULL);
102
103 /* Calculate the header length */
104 if (nexthdr == NEXTHDR_FRAGMENT) {
105 hdrlen = 8;
106 } else if (nexthdr == NEXTHDR_AUTH)
107 hdrlen = (hp->hdrlen+2)<<2;
108 else
109 hdrlen = ipv6_optlen(hp);
110
111 /* OPTS -> evaluate */
112#if HOPBYHOP
113 if (nexthdr == NEXTHDR_HOP) {
114 temp |= MASK_HOPOPTS;
115#else
116 if (nexthdr == NEXTHDR_DEST) {
117 temp |= MASK_DSTOPTS;
118#endif
119 break;
120 }
121
122
123 /* set the flag */
124 switch (nexthdr){
125 case NEXTHDR_HOP:
126 case NEXTHDR_ROUTING:
127 case NEXTHDR_FRAGMENT:
128 case NEXTHDR_AUTH:
129 case NEXTHDR_DEST:
130 break;
131 default:
132 DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr);
133 return 0;
134 break;
135 }
136
137 nexthdr = hp->nexthdr;
138 len -= hdrlen;
139 ptr += hdrlen;
140 if ( ptr > skb->len ) {
141 DEBUGP("ipv6_opts: new pointer is too large! \n");
142 break;
143 }
144 }
145
146 /* OPTIONS header not found */
147#if HOPBYHOP
148 if ( temp != MASK_HOPOPTS ) return 0;
149#else
150 if ( temp != MASK_DSTOPTS ) return 0;
151#endif
152
153 if (len < (int)sizeof(struct ipv6_opt_hdr)){
154 *hotdrop = 1;
155 return 0;
156 }
157
158 if (len < hdrlen){
159 /* Packet smaller than it's length field */
160 return 0;
161 }
162
163 oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
164 BUG_ON(oh == NULL);
165
166 DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
167
168 DEBUGP("len %02X %04X %02X ",
169 optinfo->hdrlen, hdrlen,
170 (!(optinfo->flags & IP6T_OPTS_LEN) ||
171 ((optinfo->hdrlen == hdrlen) ^
172 !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
173
174 ret = (oh != NULL)
175 &&
176 (!(optinfo->flags & IP6T_OPTS_LEN) ||
177 ((optinfo->hdrlen == hdrlen) ^
178 !!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
179
180 ptr += 2;
181 hdrlen -= 2;
182 if ( !(optinfo->flags & IP6T_OPTS_OPTS) ){
183 return ret;
184 } else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
185 DEBUGP("Not strict - not implemented");
186 } else {
187 DEBUGP("Strict ");
188 DEBUGP("#%d ",optinfo->optsnr);
189 for(temp=0; temp<optinfo->optsnr; temp++){
190 /* type field exists ? */
191 if (hdrlen < 1)
192 break;
193 tp = skb_header_pointer(skb, ptr, sizeof(_opttype),
194 &_opttype);
195 if (tp == NULL)
196 break;
197
198 /* Type check */
199 if (*tp != (optinfo->opts[temp] & 0xFF00)>>8){
200 DEBUGP("Tbad %02X %02X\n",
201 *tp,
202 (optinfo->opts[temp] & 0xFF00)>>8);
203 return 0;
204 } else {
205 DEBUGP("Tok ");
206 }
207 /* Length check */
208 if (*tp) {
209 u16 spec_len;
210
211 /* length field exists ? */
212 if (hdrlen < 2)
213 break;
214 lp = skb_header_pointer(skb, ptr + 1,
215 sizeof(_optlen),
216 &_optlen);
217 if (lp == NULL)
218 break;
219 spec_len = optinfo->opts[temp] & 0x00FF;
220
221 if (spec_len != 0x00FF && spec_len != *lp) {
222 DEBUGP("Lbad %02X %04X\n", *lp,
223 spec_len);
224 return 0;
225 }
226 DEBUGP("Lok ");
227 optlen = *lp + 2;
228 } else {
229 DEBUGP("Pad1\n");
230 optlen = 1;
231 }
232
233 /* Step to the next */
234 DEBUGP("len%04X \n", optlen);
235
236 if ((ptr > skb->len - optlen || hdrlen < optlen) &&
237 (temp < optinfo->optsnr - 1)) {
238 DEBUGP("new pointer is too large! \n");
239 break;
240 }
241 ptr += optlen;
242 hdrlen -= optlen;
243 }
244 if (temp == optinfo->optsnr)
245 return ret;
246 else return 0;
247 }
248
249 return 0;
250}
251
252/* Called when user tries to insert an entry of this type. */
253static int
254checkentry(const char *tablename,
255 const struct ip6t_ip6 *ip,
256 void *matchinfo,
257 unsigned int matchinfosize,
258 unsigned int hook_mask)
259{
260 const struct ip6t_opts *optsinfo = matchinfo;
261
262 if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) {
263 DEBUGP("ip6t_opts: matchsize %u != %u\n",
264 matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts)));
265 return 0;
266 }
267 if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
268 DEBUGP("ip6t_opts: unknown flags %X\n",
269 optsinfo->invflags);
270 return 0;
271 }
272
273 return 1;
274}
275
276static struct ip6t_match opts_match = {
277#if HOPBYHOP
278 .name = "hbh",
279#else
280 .name = "dst",
281#endif
282 .match = &match,
283 .checkentry = &checkentry,
284 .me = THIS_MODULE,
285};
286
287static int __init init(void)
288{
289 return ip6t_register_match(&opts_match);
290}
291
292static void __exit cleanup(void)
293{
294 ip6t_unregister_match(&opts_match);
295}
296
297module_init(init);
298module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
new file mode 100644
index 000000000000..e39dd236fd8e
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -0,0 +1,181 @@
1/* Kernel module to match ESP parameters. */
2/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/ipv6.h>
13#include <linux/types.h>
14#include <net/checksum.h>
15#include <net/ipv6.h>
16
17#include <linux/netfilter_ipv6/ip6_tables.h>
18#include <linux/netfilter_ipv6/ip6t_esp.h>
19
20MODULE_LICENSE("GPL");
21MODULE_DESCRIPTION("IPv6 ESP match");
22MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
23
24#if 0
25#define DEBUGP printk
26#else
27#define DEBUGP(format, args...)
28#endif
29
30/* Returns 1 if the spi is matched by the range, 0 otherwise */
31static inline int
32spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
33{
34 int r=0;
35 DEBUGP("esp spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
36 min,spi,max);
37 r=(spi >= min && spi <= max) ^ invert;
38 DEBUGP(" result %s\n",r? "PASS\n" : "FAILED\n");
39 return r;
40}
41
42static int
43match(const struct sk_buff *skb,
44 const struct net_device *in,
45 const struct net_device *out,
46 const void *matchinfo,
47 int offset,
48 unsigned int protoff,
49 int *hotdrop)
50{
51 struct ip_esp_hdr _esp, *eh = NULL;
52 const struct ip6t_esp *espinfo = matchinfo;
53 unsigned int temp;
54 int len;
55 u8 nexthdr;
56 unsigned int ptr;
57
58 /* Make sure this isn't an evil packet */
59 /*DEBUGP("ipv6_esp entered \n");*/
60
61 /* type of the 1st exthdr */
62 nexthdr = skb->nh.ipv6h->nexthdr;
63 /* pointer to the 1st exthdr */
64 ptr = sizeof(struct ipv6hdr);
65 /* available length */
66 len = skb->len - ptr;
67 temp = 0;
68
69 while (ip6t_ext_hdr(nexthdr)) {
70 struct ipv6_opt_hdr _hdr, *hp;
71 int hdrlen;
72
73 DEBUGP("ipv6_esp header iteration \n");
74
75 /* Is there enough space for the next ext header? */
76 if (len < sizeof(struct ipv6_opt_hdr))
77 return 0;
78 /* No more exthdr -> evaluate */
79 if (nexthdr == NEXTHDR_NONE)
80 break;
81 /* ESP -> evaluate */
82 if (nexthdr == NEXTHDR_ESP) {
83 temp |= MASK_ESP;
84 break;
85 }
86
87 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
88 BUG_ON(hp == NULL);
89
90 /* Calculate the header length */
91 if (nexthdr == NEXTHDR_FRAGMENT)
92 hdrlen = 8;
93 else if (nexthdr == NEXTHDR_AUTH)
94 hdrlen = (hp->hdrlen+2)<<2;
95 else
96 hdrlen = ipv6_optlen(hp);
97
98 /* set the flag */
99 switch (nexthdr) {
100 case NEXTHDR_HOP:
101 case NEXTHDR_ROUTING:
102 case NEXTHDR_FRAGMENT:
103 case NEXTHDR_AUTH:
104 case NEXTHDR_DEST:
105 break;
106 default:
107 DEBUGP("ipv6_esp match: unknown nextheader %u\n",nexthdr);
108 return 0;
109 }
110
111 nexthdr = hp->nexthdr;
112 len -= hdrlen;
113 ptr += hdrlen;
114 if (ptr > skb->len) {
115 DEBUGP("ipv6_esp: new pointer too large! \n");
116 break;
117 }
118 }
119
120 /* ESP header not found */
121 if (temp != MASK_ESP)
122 return 0;
123
124 if (len < sizeof(struct ip_esp_hdr)) {
125 *hotdrop = 1;
126 return 0;
127 }
128
129 eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
130 BUG_ON(eh == NULL);
131
132 DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi));
133
134 return (eh != NULL)
135 && spi_match(espinfo->spis[0], espinfo->spis[1],
136 ntohl(eh->spi),
137 !!(espinfo->invflags & IP6T_ESP_INV_SPI));
138}
139
140/* Called when user tries to insert an entry of this type. */
141static int
142checkentry(const char *tablename,
143 const struct ip6t_ip6 *ip,
144 void *matchinfo,
145 unsigned int matchinfosize,
146 unsigned int hook_mask)
147{
148 const struct ip6t_esp *espinfo = matchinfo;
149
150 if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_esp))) {
151 DEBUGP("ip6t_esp: matchsize %u != %u\n",
152 matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_esp)));
153 return 0;
154 }
155 if (espinfo->invflags & ~IP6T_ESP_INV_MASK) {
156 DEBUGP("ip6t_esp: unknown flags %X\n",
157 espinfo->invflags);
158 return 0;
159 }
160 return 1;
161}
162
163static struct ip6t_match esp_match = {
164 .name = "esp",
165 .match = &match,
166 .checkentry = &checkentry,
167 .me = THIS_MODULE,
168};
169
170static int __init init(void)
171{
172 return ip6t_register_match(&esp_match);
173}
174
175static void __exit cleanup(void)
176{
177 ip6t_unregister_match(&esp_match);
178}
179
180module_init(init);
181module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
new file mode 100644
index 000000000000..616c2cbcd54d
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -0,0 +1,101 @@
1/* Kernel module to match EUI64 address parameters. */
2
3/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/ipv6.h>
13#include <linux/if_ether.h>
14
15#include <linux/netfilter_ipv6/ip6_tables.h>
16
17MODULE_DESCRIPTION("IPv6 EUI64 address checking match");
18MODULE_LICENSE("GPL");
19MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
20
21static int
22match(const struct sk_buff *skb,
23 const struct net_device *in,
24 const struct net_device *out,
25 const void *matchinfo,
26 int offset,
27 unsigned int protoff,
28 int *hotdrop)
29{
30
31 unsigned char eui64[8];
32 int i=0;
33
34 if ( !(skb->mac.raw >= skb->head
35 && (skb->mac.raw + ETH_HLEN) <= skb->data)
36 && offset != 0) {
37 *hotdrop = 1;
38 return 0;
39 }
40
41 memset(eui64, 0, sizeof(eui64));
42
43 if (eth_hdr(skb)->h_proto == ntohs(ETH_P_IPV6)) {
44 if (skb->nh.ipv6h->version == 0x6) {
45 memcpy(eui64, eth_hdr(skb)->h_source, 3);
46 memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3);
47 eui64[3]=0xff;
48 eui64[4]=0xfe;
49 eui64[0] |= 0x02;
50
51 i=0;
52 while ((skb->nh.ipv6h->saddr.s6_addr[8+i] ==
53 eui64[i]) && (i<8)) i++;
54
55 if ( i == 8 )
56 return 1;
57 }
58 }
59
60 return 0;
61}
62
63static int
64ip6t_eui64_checkentry(const char *tablename,
65 const struct ip6t_ip6 *ip,
66 void *matchinfo,
67 unsigned int matchsize,
68 unsigned int hook_mask)
69{
70 if (hook_mask
71 & ~((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) |
72 (1 << NF_IP6_FORWARD))) {
73 printk("ip6t_eui64: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
74 return 0;
75 }
76
77 if (matchsize != IP6T_ALIGN(sizeof(int)))
78 return 0;
79
80 return 1;
81}
82
83static struct ip6t_match eui64_match = {
84 .name = "eui64",
85 .match = &match,
86 .checkentry = &ip6t_eui64_checkentry,
87 .me = THIS_MODULE,
88};
89
90static int __init init(void)
91{
92 return ip6t_register_match(&eui64_match);
93}
94
95static void __exit fini(void)
96{
97 ip6t_unregister_match(&eui64_match);
98}
99
100module_init(init);
101module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
new file mode 100644
index 000000000000..4bfa30a9bc80
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -0,0 +1,229 @@
1/* Kernel module to match FRAG parameters. */
2
3/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/ipv6.h>
13#include <linux/types.h>
14#include <net/checksum.h>
15#include <net/ipv6.h>
16
17#include <linux/netfilter_ipv6/ip6_tables.h>
18#include <linux/netfilter_ipv6/ip6t_frag.h>
19
20MODULE_LICENSE("GPL");
21MODULE_DESCRIPTION("IPv6 FRAG match");
22MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
23
24#if 0
25#define DEBUGP printk
26#else
27#define DEBUGP(format, args...)
28#endif
29
30/* Returns 1 if the id is matched by the range, 0 otherwise */
31static inline int
32id_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert)
33{
34 int r=0;
35 DEBUGP("frag id_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
36 min,id,max);
37 r=(id >= min && id <= max) ^ invert;
38 DEBUGP(" result %s\n",r? "PASS" : "FAILED");
39 return r;
40}
41
42static int
43match(const struct sk_buff *skb,
44 const struct net_device *in,
45 const struct net_device *out,
46 const void *matchinfo,
47 int offset,
48 unsigned int protoff,
49 int *hotdrop)
50{
51 struct frag_hdr _frag, *fh = NULL;
52 const struct ip6t_frag *fraginfo = matchinfo;
53 unsigned int temp;
54 int len;
55 u8 nexthdr;
56 unsigned int ptr;
57 unsigned int hdrlen = 0;
58
59 /* type of the 1st exthdr */
60 nexthdr = skb->nh.ipv6h->nexthdr;
61 /* pointer to the 1st exthdr */
62 ptr = sizeof(struct ipv6hdr);
63 /* available length */
64 len = skb->len - ptr;
65 temp = 0;
66
67 while (ip6t_ext_hdr(nexthdr)) {
68 struct ipv6_opt_hdr _hdr, *hp;
69
70 DEBUGP("ipv6_frag header iteration \n");
71
72 /* Is there enough space for the next ext header? */
73 if (len < (int)sizeof(struct ipv6_opt_hdr))
74 return 0;
75 /* No more exthdr -> evaluate */
76 if (nexthdr == NEXTHDR_NONE) {
77 break;
78 }
79 /* ESP -> evaluate */
80 if (nexthdr == NEXTHDR_ESP) {
81 break;
82 }
83
84 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
85 BUG_ON(hp == NULL);
86
87 /* Calculate the header length */
88 if (nexthdr == NEXTHDR_FRAGMENT) {
89 hdrlen = 8;
90 } else if (nexthdr == NEXTHDR_AUTH)
91 hdrlen = (hp->hdrlen+2)<<2;
92 else
93 hdrlen = ipv6_optlen(hp);
94
95 /* FRAG -> evaluate */
96 if (nexthdr == NEXTHDR_FRAGMENT) {
97 temp |= MASK_FRAGMENT;
98 break;
99 }
100
101
102 /* set the flag */
103 switch (nexthdr){
104 case NEXTHDR_HOP:
105 case NEXTHDR_ROUTING:
106 case NEXTHDR_FRAGMENT:
107 case NEXTHDR_AUTH:
108 case NEXTHDR_DEST:
109 break;
110 default:
111 DEBUGP("ipv6_frag match: unknown nextheader %u\n",nexthdr);
112 return 0;
113 break;
114 }
115
116 nexthdr = hp->nexthdr;
117 len -= hdrlen;
118 ptr += hdrlen;
119 if ( ptr > skb->len ) {
120 DEBUGP("ipv6_frag: new pointer too large! \n");
121 break;
122 }
123 }
124
125 /* FRAG header not found */
126 if ( temp != MASK_FRAGMENT ) return 0;
127
128 if (len < sizeof(struct frag_hdr)){
129 *hotdrop = 1;
130 return 0;
131 }
132
133 fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
134 BUG_ON(fh == NULL);
135
136 DEBUGP("INFO %04X ", fh->frag_off);
137 DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7);
138 DEBUGP("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6);
139 DEBUGP("MF %04X ", fh->frag_off & htons(IP6_MF));
140 DEBUGP("ID %u %08X\n", ntohl(fh->identification),
141 ntohl(fh->identification));
142
143 DEBUGP("IPv6 FRAG id %02X ",
144 (id_match(fraginfo->ids[0], fraginfo->ids[1],
145 ntohl(fh->identification),
146 !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))));
147 DEBUGP("res %02X %02X%04X %02X ",
148 (fraginfo->flags & IP6T_FRAG_RES), fh->reserved,
149 ntohs(fh->frag_off) & 0x6,
150 !((fraginfo->flags & IP6T_FRAG_RES)
151 && (fh->reserved || (ntohs(fh->frag_off) & 0x06))));
152 DEBUGP("first %02X %02X %02X ",
153 (fraginfo->flags & IP6T_FRAG_FST),
154 ntohs(fh->frag_off) & ~0x7,
155 !((fraginfo->flags & IP6T_FRAG_FST)
156 && (ntohs(fh->frag_off) & ~0x7)));
157 DEBUGP("mf %02X %02X %02X ",
158 (fraginfo->flags & IP6T_FRAG_MF),
159 ntohs(fh->frag_off) & IP6_MF,
160 !((fraginfo->flags & IP6T_FRAG_MF)
161 && !((ntohs(fh->frag_off) & IP6_MF))));
162 DEBUGP("last %02X %02X %02X\n",
163 (fraginfo->flags & IP6T_FRAG_NMF),
164 ntohs(fh->frag_off) & IP6_MF,
165 !((fraginfo->flags & IP6T_FRAG_NMF)
166 && (ntohs(fh->frag_off) & IP6_MF)));
167
168 return (fh != NULL)
169 &&
170 (id_match(fraginfo->ids[0], fraginfo->ids[1],
171 ntohl(fh->identification),
172 !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)))
173 &&
174 !((fraginfo->flags & IP6T_FRAG_RES)
175 && (fh->reserved || (ntohs(fh->frag_off) & 0x6)))
176 &&
177 !((fraginfo->flags & IP6T_FRAG_FST)
178 && (ntohs(fh->frag_off) & ~0x7))
179 &&
180 !((fraginfo->flags & IP6T_FRAG_MF)
181 && !(ntohs(fh->frag_off) & IP6_MF))
182 &&
183 !((fraginfo->flags & IP6T_FRAG_NMF)
184 && (ntohs(fh->frag_off) & IP6_MF));
185}
186
187/* Called when user tries to insert an entry of this type. */
188static int
189checkentry(const char *tablename,
190 const struct ip6t_ip6 *ip,
191 void *matchinfo,
192 unsigned int matchinfosize,
193 unsigned int hook_mask)
194{
195 const struct ip6t_frag *fraginfo = matchinfo;
196
197 if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_frag))) {
198 DEBUGP("ip6t_frag: matchsize %u != %u\n",
199 matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_frag)));
200 return 0;
201 }
202 if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
203 DEBUGP("ip6t_frag: unknown flags %X\n",
204 fraginfo->invflags);
205 return 0;
206 }
207
208 return 1;
209}
210
211static struct ip6t_match frag_match = {
212 .name = "frag",
213 .match = &match,
214 .checkentry = &checkentry,
215 .me = THIS_MODULE,
216};
217
218static int __init init(void)
219{
220 return ip6t_register_match(&frag_match);
221}
222
223static void __exit cleanup(void)
224{
225 ip6t_unregister_match(&frag_match);
226}
227
228module_init(init);
229module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
new file mode 100644
index 000000000000..27f3650d127e
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -0,0 +1,298 @@
1/* Kernel module to match Hop-by-Hop and Destination parameters. */
2
3/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/ipv6.h>
13#include <linux/types.h>
14#include <net/checksum.h>
15#include <net/ipv6.h>
16
17#include <asm/byteorder.h>
18
19#include <linux/netfilter_ipv6/ip6_tables.h>
20#include <linux/netfilter_ipv6/ip6t_opts.h>
21
22#define HOPBYHOP 1
23
24MODULE_LICENSE("GPL");
25#if HOPBYHOP
26MODULE_DESCRIPTION("IPv6 HbH match");
27#else
28MODULE_DESCRIPTION("IPv6 DST match");
29#endif
30MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
31
32#if 0
33#define DEBUGP printk
34#else
35#define DEBUGP(format, args...)
36#endif
37
38/*
39 * (Type & 0xC0) >> 6
40 * 0 -> ignorable
41 * 1 -> must drop the packet
42 * 2 -> send ICMP PARM PROB regardless and drop packet
43 * 3 -> Send ICMP if not a multicast address and drop packet
44 * (Type & 0x20) >> 5
45 * 0 -> invariant
46 * 1 -> can change the routing
47 * (Type & 0x1F) Type
48 * 0 -> Pad1 (only 1 byte!)
49 * 1 -> PadN LENGTH info (total length = length + 2)
50 * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k )
51 * 5 -> RTALERT 2 x x
52 */
53
54static int
55match(const struct sk_buff *skb,
56 const struct net_device *in,
57 const struct net_device *out,
58 const void *matchinfo,
59 int offset,
60 unsigned int protoff,
61 int *hotdrop)
62{
63 struct ipv6_opt_hdr _optsh, *oh;
64 const struct ip6t_opts *optinfo = matchinfo;
65 unsigned int temp;
66 unsigned int len;
67 u8 nexthdr;
68 unsigned int ptr;
69 unsigned int hdrlen = 0;
70 unsigned int ret = 0;
71 u8 _opttype, *tp = NULL;
72 u8 _optlen, *lp = NULL;
73 unsigned int optlen;
74
75 /* type of the 1st exthdr */
76 nexthdr = skb->nh.ipv6h->nexthdr;
77 /* pointer to the 1st exthdr */
78 ptr = sizeof(struct ipv6hdr);
79 /* available length */
80 len = skb->len - ptr;
81 temp = 0;
82
83 while (ip6t_ext_hdr(nexthdr)) {
84 struct ipv6_opt_hdr _hdr, *hp;
85
86 DEBUGP("ipv6_opts header iteration \n");
87
88 /* Is there enough space for the next ext header? */
89 if (len < (int)sizeof(struct ipv6_opt_hdr))
90 return 0;
91 /* No more exthdr -> evaluate */
92 if (nexthdr == NEXTHDR_NONE) {
93 break;
94 }
95 /* ESP -> evaluate */
96 if (nexthdr == NEXTHDR_ESP) {
97 break;
98 }
99
100 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
101 BUG_ON(hp == NULL);
102
103 /* Calculate the header length */
104 if (nexthdr == NEXTHDR_FRAGMENT) {
105 hdrlen = 8;
106 } else if (nexthdr == NEXTHDR_AUTH)
107 hdrlen = (hp->hdrlen+2)<<2;
108 else
109 hdrlen = ipv6_optlen(hp);
110
111 /* OPTS -> evaluate */
112#if HOPBYHOP
113 if (nexthdr == NEXTHDR_HOP) {
114 temp |= MASK_HOPOPTS;
115#else
116 if (nexthdr == NEXTHDR_DEST) {
117 temp |= MASK_DSTOPTS;
118#endif
119 break;
120 }
121
122
123 /* set the flag */
124 switch (nexthdr){
125 case NEXTHDR_HOP:
126 case NEXTHDR_ROUTING:
127 case NEXTHDR_FRAGMENT:
128 case NEXTHDR_AUTH:
129 case NEXTHDR_DEST:
130 break;
131 default:
132 DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr);
133 return 0;
134 break;
135 }
136
137 nexthdr = hp->nexthdr;
138 len -= hdrlen;
139 ptr += hdrlen;
140 if ( ptr > skb->len ) {
141 DEBUGP("ipv6_opts: new pointer is too large! \n");
142 break;
143 }
144 }
145
146 /* OPTIONS header not found */
147#if HOPBYHOP
148 if ( temp != MASK_HOPOPTS ) return 0;
149#else
150 if ( temp != MASK_DSTOPTS ) return 0;
151#endif
152
153 if (len < (int)sizeof(struct ipv6_opt_hdr)){
154 *hotdrop = 1;
155 return 0;
156 }
157
158 if (len < hdrlen){
159 /* Packet smaller than it's length field */
160 return 0;
161 }
162
163 oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
164 BUG_ON(oh == NULL);
165
166 DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
167
168 DEBUGP("len %02X %04X %02X ",
169 optinfo->hdrlen, hdrlen,
170 (!(optinfo->flags & IP6T_OPTS_LEN) ||
171 ((optinfo->hdrlen == hdrlen) ^
172 !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
173
174 ret = (oh != NULL)
175 &&
176 (!(optinfo->flags & IP6T_OPTS_LEN) ||
177 ((optinfo->hdrlen == hdrlen) ^
178 !!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
179
180 ptr += 2;
181 hdrlen -= 2;
182 if ( !(optinfo->flags & IP6T_OPTS_OPTS) ){
183 return ret;
184 } else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
185 DEBUGP("Not strict - not implemented");
186 } else {
187 DEBUGP("Strict ");
188 DEBUGP("#%d ",optinfo->optsnr);
189 for(temp=0; temp<optinfo->optsnr; temp++){
190 /* type field exists ? */
191 if (hdrlen < 1)
192 break;
193 tp = skb_header_pointer(skb, ptr, sizeof(_opttype),
194 &_opttype);
195 if (tp == NULL)
196 break;
197
198 /* Type check */
199 if (*tp != (optinfo->opts[temp] & 0xFF00)>>8){
200 DEBUGP("Tbad %02X %02X\n",
201 *tp,
202 (optinfo->opts[temp] & 0xFF00)>>8);
203 return 0;
204 } else {
205 DEBUGP("Tok ");
206 }
207 /* Length check */
208 if (*tp) {
209 u16 spec_len;
210
211 /* length field exists ? */
212 if (hdrlen < 2)
213 break;
214 lp = skb_header_pointer(skb, ptr + 1,
215 sizeof(_optlen),
216 &_optlen);
217 if (lp == NULL)
218 break;
219 spec_len = optinfo->opts[temp] & 0x00FF;
220
221 if (spec_len != 0x00FF && spec_len != *lp) {
222 DEBUGP("Lbad %02X %04X\n", *lp,
223 spec_len);
224 return 0;
225 }
226 DEBUGP("Lok ");
227 optlen = *lp + 2;
228 } else {
229 DEBUGP("Pad1\n");
230 optlen = 1;
231 }
232
233 /* Step to the next */
234 DEBUGP("len%04X \n", optlen);
235
236 if ((ptr > skb->len - optlen || hdrlen < optlen) &&
237 (temp < optinfo->optsnr - 1)) {
238 DEBUGP("new pointer is too large! \n");
239 break;
240 }
241 ptr += optlen;
242 hdrlen -= optlen;
243 }
244 if (temp == optinfo->optsnr)
245 return ret;
246 else return 0;
247 }
248
249 return 0;
250}
251
252/* Called when user tries to insert an entry of this type. */
253static int
254checkentry(const char *tablename,
255 const struct ip6t_ip6 *ip,
256 void *matchinfo,
257 unsigned int matchinfosize,
258 unsigned int hook_mask)
259{
260 const struct ip6t_opts *optsinfo = matchinfo;
261
262 if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) {
263 DEBUGP("ip6t_opts: matchsize %u != %u\n",
264 matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts)));
265 return 0;
266 }
267 if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
268 DEBUGP("ip6t_opts: unknown flags %X\n",
269 optsinfo->invflags);
270 return 0;
271 }
272
273 return 1;
274}
275
276static struct ip6t_match opts_match = {
277#if HOPBYHOP
278 .name = "hbh",
279#else
280 .name = "dst",
281#endif
282 .match = &match,
283 .checkentry = &checkentry,
284 .me = THIS_MODULE,
285};
286
287static int __init init(void)
288{
289 return ip6t_register_match(&opts_match);
290}
291
292static void __exit cleanup(void)
293{
294 ip6t_unregister_match(&opts_match);
295}
296
297module_init(init);
298module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
new file mode 100644
index 000000000000..0beaff5471dd
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -0,0 +1,80 @@
1/* Hop Limit matching module */
2
3/* (C) 2001-2002 Maciej Soltysiak <solt@dns.toxicfilms.tv>
4 * Based on HW's ttl module
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13
14#include <linux/netfilter_ipv6/ip6t_hl.h>
15#include <linux/netfilter_ipv6/ip6_tables.h>
16
17MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
18MODULE_DESCRIPTION("IP tables Hop Limit matching module");
19MODULE_LICENSE("GPL");
20
21static int match(const struct sk_buff *skb, const struct net_device *in,
22 const struct net_device *out, const void *matchinfo,
23 int offset, unsigned int protoff,
24 int *hotdrop)
25{
26 const struct ip6t_hl_info *info = matchinfo;
27 const struct ipv6hdr *ip6h = skb->nh.ipv6h;
28
29 switch (info->mode) {
30 case IP6T_HL_EQ:
31 return (ip6h->hop_limit == info->hop_limit);
32 break;
33 case IP6T_HL_NE:
34 return (!(ip6h->hop_limit == info->hop_limit));
35 break;
36 case IP6T_HL_LT:
37 return (ip6h->hop_limit < info->hop_limit);
38 break;
39 case IP6T_HL_GT:
40 return (ip6h->hop_limit > info->hop_limit);
41 break;
42 default:
43 printk(KERN_WARNING "ip6t_hl: unknown mode %d\n",
44 info->mode);
45 return 0;
46 }
47
48 return 0;
49}
50
51static int checkentry(const char *tablename, const struct ip6t_ip6 *ip,
52 void *matchinfo, unsigned int matchsize,
53 unsigned int hook_mask)
54{
55 if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_hl_info)))
56 return 0;
57
58 return 1;
59}
60
61static struct ip6t_match hl_match = {
62 .name = "hl",
63 .match = &match,
64 .checkentry = &checkentry,
65 .me = THIS_MODULE,
66};
67
68static int __init init(void)
69{
70 return ip6t_register_match(&hl_match);
71}
72
73static void __exit fini(void)
74{
75 ip6t_unregister_match(&hl_match);
76
77}
78
79module_init(init);
80module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
new file mode 100644
index 000000000000..32e67f05845b
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -0,0 +1,167 @@
1/* ipv6header match - matches IPv6 packets based
2 on whether they contain certain headers */
3
4/* Original idea: Brad Chapman
5 * Rewritten by: Andras Kis-Szabo <kisza@sch.bme.hu> */
6
7/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/module.h>
15#include <linux/skbuff.h>
16#include <linux/ipv6.h>
17#include <linux/types.h>
18#include <net/checksum.h>
19#include <net/ipv6.h>
20
21#include <linux/netfilter_ipv6/ip6_tables.h>
22#include <linux/netfilter_ipv6/ip6t_ipv6header.h>
23
24MODULE_LICENSE("GPL");
25MODULE_DESCRIPTION("IPv6 headers match");
26MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
27
28static int
29ipv6header_match(const struct sk_buff *skb,
30 const struct net_device *in,
31 const struct net_device *out,
32 const void *matchinfo,
33 int offset,
34 unsigned int protoff,
35 int *hotdrop)
36{
37 const struct ip6t_ipv6header_info *info = matchinfo;
38 unsigned int temp;
39 int len;
40 u8 nexthdr;
41 unsigned int ptr;
42
43 /* Make sure this isn't an evil packet */
44
45 /* type of the 1st exthdr */
46 nexthdr = skb->nh.ipv6h->nexthdr;
47 /* pointer to the 1st exthdr */
48 ptr = sizeof(struct ipv6hdr);
49 /* available length */
50 len = skb->len - ptr;
51 temp = 0;
52
53 while (ip6t_ext_hdr(nexthdr)) {
54 struct ipv6_opt_hdr _hdr, *hp;
55 int hdrlen;
56
57 /* Is there enough space for the next ext header? */
58 if (len < (int)sizeof(struct ipv6_opt_hdr))
59 return 0;
60 /* No more exthdr -> evaluate */
61 if (nexthdr == NEXTHDR_NONE) {
62 temp |= MASK_NONE;
63 break;
64 }
65 /* ESP -> evaluate */
66 if (nexthdr == NEXTHDR_ESP) {
67 temp |= MASK_ESP;
68 break;
69 }
70
71 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
72 BUG_ON(hp == NULL);
73
74 /* Calculate the header length */
75 if (nexthdr == NEXTHDR_FRAGMENT) {
76 hdrlen = 8;
77 } else if (nexthdr == NEXTHDR_AUTH)
78 hdrlen = (hp->hdrlen+2)<<2;
79 else
80 hdrlen = ipv6_optlen(hp);
81
82 /* set the flag */
83 switch (nexthdr){
84 case NEXTHDR_HOP:
85 temp |= MASK_HOPOPTS;
86 break;
87 case NEXTHDR_ROUTING:
88 temp |= MASK_ROUTING;
89 break;
90 case NEXTHDR_FRAGMENT:
91 temp |= MASK_FRAGMENT;
92 break;
93 case NEXTHDR_AUTH:
94 temp |= MASK_AH;
95 break;
96 case NEXTHDR_DEST:
97 temp |= MASK_DSTOPTS;
98 break;
99 default:
100 return 0;
101 break;
102 }
103
104 nexthdr = hp->nexthdr;
105 len -= hdrlen;
106 ptr += hdrlen;
107 if (ptr > skb->len)
108 break;
109 }
110
111 if ( (nexthdr != NEXTHDR_NONE ) && (nexthdr != NEXTHDR_ESP) )
112 temp |= MASK_PROTO;
113
114 if (info->modeflag)
115 return !((temp ^ info->matchflags ^ info->invflags)
116 & info->matchflags);
117 else {
118 if (info->invflags)
119 return temp != info->matchflags;
120 else
121 return temp == info->matchflags;
122 }
123}
124
125static int
126ipv6header_checkentry(const char *tablename,
127 const struct ip6t_ip6 *ip,
128 void *matchinfo,
129 unsigned int matchsize,
130 unsigned int hook_mask)
131{
132 const struct ip6t_ipv6header_info *info = matchinfo;
133
134 /* Check for obvious errors */
135 /* This match is valid in all hooks! */
136 if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_ipv6header_info)))
137 return 0;
138
139 /* invflags is 0 or 0xff in hard mode */
140 if ((!info->modeflag) && info->invflags != 0x00
141 && info->invflags != 0xFF)
142 return 0;
143
144 return 1;
145}
146
147static struct ip6t_match ip6t_ipv6header_match = {
148 .name = "ipv6header",
149 .match = &ipv6header_match,
150 .checkentry = &ipv6header_checkentry,
151 .destroy = NULL,
152 .me = THIS_MODULE,
153};
154
155static int __init ipv6header_init(void)
156{
157 return ip6t_register_match(&ip6t_ipv6header_match);
158}
159
160static void __exit ipv6header_exit(void)
161{
162 ip6t_unregister_match(&ip6t_ipv6header_match);
163}
164
165module_init(ipv6header_init);
166module_exit(ipv6header_exit);
167
diff --git a/net/ipv6/netfilter/ip6t_length.c b/net/ipv6/netfilter/ip6t_length.c
new file mode 100644
index 000000000000..e0537d3811d5
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_length.c
@@ -0,0 +1,66 @@
1/* Length Match - IPv6 Port */
2
3/* (C) 1999-2001 James Morris <jmorros@intercode.com.au>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/netfilter_ipv6/ip6t_length.h>
14#include <linux/netfilter_ipv6/ip6_tables.h>
15
16MODULE_LICENSE("GPL");
17MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
18MODULE_DESCRIPTION("IPv6 packet length match");
19
20static int
21match(const struct sk_buff *skb,
22 const struct net_device *in,
23 const struct net_device *out,
24 const void *matchinfo,
25 int offset,
26 unsigned int protoff,
27 int *hotdrop)
28{
29 const struct ip6t_length_info *info = matchinfo;
30 u_int16_t pktlen = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
31
32 return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
33}
34
35static int
36checkentry(const char *tablename,
37 const struct ip6t_ip6 *ip,
38 void *matchinfo,
39 unsigned int matchsize,
40 unsigned int hook_mask)
41{
42 if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_length_info)))
43 return 0;
44
45 return 1;
46}
47
48static struct ip6t_match length_match = {
49 .name = "length",
50 .match = &match,
51 .checkentry = &checkentry,
52 .me = THIS_MODULE,
53};
54
55static int __init init(void)
56{
57 return ip6t_register_match(&length_match);
58}
59
60static void __exit fini(void)
61{
62 ip6t_unregister_match(&length_match);
63}
64
65module_init(init);
66module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_limit.c b/net/ipv6/netfilter/ip6t_limit.c
new file mode 100644
index 000000000000..fb782f610be2
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_limit.c
@@ -0,0 +1,147 @@
1/* Kernel module to control the rate
2 *
3 * 2 September 1999: Changed from the target RATE to the match
4 * `limit', removed logging. Did I mention that
5 * Alexey is a fucking genius?
6 * Rusty Russell (rusty@rustcorp.com.au). */
7
8/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
9 * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15
16#include <linux/module.h>
17#include <linux/skbuff.h>
18#include <linux/spinlock.h>
19#include <linux/interrupt.h>
20
21#include <linux/netfilter_ipv6/ip6_tables.h>
22#include <linux/netfilter_ipv6/ip6t_limit.h>
23
24MODULE_LICENSE("GPL");
25MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
26MODULE_DESCRIPTION("rate limiting within ip6tables");
27
28/* The algorithm used is the Simple Token Bucket Filter (TBF)
29 * see net/sched/sch_tbf.c in the linux source tree
30 */
31
32static DEFINE_SPINLOCK(limit_lock);
33
34/* Rusty: This is my (non-mathematically-inclined) understanding of
35 this algorithm. The `average rate' in jiffies becomes your initial
36 amount of credit `credit' and the most credit you can ever have
37 `credit_cap'. The `peak rate' becomes the cost of passing the
38 test, `cost'.
39
40 `prev' tracks the last packet hit: you gain one credit per jiffy.
41 If you get credit balance more than this, the extra credit is
42 discarded. Every time the match passes, you lose `cost' credits;
43 if you don't have that many, the test fails.
44
45 See Alexey's formal explanation in net/sched/sch_tbf.c.
46
47 To avoid underflow, we multiply by 128 (ie. you get 128 credits per
48 jiffy). Hence a cost of 2^32-1, means one pass per 32768 seconds
49 at 1024HZ (or one every 9 hours). A cost of 1 means 12800 passes
50 per second at 100HZ. */
51
52#define CREDITS_PER_JIFFY 128
53
54static int
55ip6t_limit_match(const struct sk_buff *skb,
56 const struct net_device *in,
57 const struct net_device *out,
58 const void *matchinfo,
59 int offset,
60 unsigned int protoff,
61 int *hotdrop)
62{
63 struct ip6t_rateinfo *r = ((struct ip6t_rateinfo *)matchinfo)->master;
64 unsigned long now = jiffies;
65
66 spin_lock_bh(&limit_lock);
67 r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY;
68 if (r->credit > r->credit_cap)
69 r->credit = r->credit_cap;
70
71 if (r->credit >= r->cost) {
72 /* We're not limited. */
73 r->credit -= r->cost;
74 spin_unlock_bh(&limit_lock);
75 return 1;
76 }
77
78 spin_unlock_bh(&limit_lock);
79 return 0;
80}
81
82/* Precision saver. */
83static u_int32_t
84user2credits(u_int32_t user)
85{
86 /* If multiplying would overflow... */
87 if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
88 /* Divide first. */
89 return (user / IP6T_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
90
91 return (user * HZ * CREDITS_PER_JIFFY) / IP6T_LIMIT_SCALE;
92}
93
94static int
95ip6t_limit_checkentry(const char *tablename,
96 const struct ip6t_ip6 *ip,
97 void *matchinfo,
98 unsigned int matchsize,
99 unsigned int hook_mask)
100{
101 struct ip6t_rateinfo *r = matchinfo;
102
103 if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_rateinfo)))
104 return 0;
105
106 /* Check for overflow. */
107 if (r->burst == 0
108 || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
109 printk("Call rusty: overflow in ip6t_limit: %u/%u\n",
110 r->avg, r->burst);
111 return 0;
112 }
113
114 /* User avg in seconds * IP6T_LIMIT_SCALE: convert to jiffies *
115 128. */
116 r->prev = jiffies;
117 r->credit = user2credits(r->avg * r->burst); /* Credits full. */
118 r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
119 r->cost = user2credits(r->avg);
120
121 /* For SMP, we only want to use one set of counters. */
122 r->master = r;
123
124 return 1;
125}
126
127static struct ip6t_match ip6t_limit_reg = {
128 .name = "limit",
129 .match = ip6t_limit_match,
130 .checkentry = ip6t_limit_checkentry,
131 .me = THIS_MODULE,
132};
133
134static int __init init(void)
135{
136 if (ip6t_register_match(&ip6t_limit_reg))
137 return -EINVAL;
138 return 0;
139}
140
141static void __exit fini(void)
142{
143 ip6t_unregister_match(&ip6t_limit_reg);
144}
145
146module_init(init);
147module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_mac.c b/net/ipv6/netfilter/ip6t_mac.c
new file mode 100644
index 000000000000..526d43e37234
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_mac.c
@@ -0,0 +1,80 @@
1/* Kernel module to match MAC address parameters. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/if_ether.h>
14
15#include <linux/netfilter_ipv6/ip6t_mac.h>
16#include <linux/netfilter_ipv6/ip6_tables.h>
17
18MODULE_LICENSE("GPL");
19MODULE_DESCRIPTION("MAC address matching module for IPv6");
20MODULE_AUTHOR("Netfilter Core Teaam <coreteam@netfilter.org>");
21
22static int
23match(const struct sk_buff *skb,
24 const struct net_device *in,
25 const struct net_device *out,
26 const void *matchinfo,
27 int offset,
28 unsigned int protoff,
29 int *hotdrop)
30{
31 const struct ip6t_mac_info *info = matchinfo;
32
33 /* Is mac pointer valid? */
34 return (skb->mac.raw >= skb->head
35 && (skb->mac.raw + ETH_HLEN) <= skb->data
36 /* If so, compare... */
37 && ((memcmp(eth_hdr(skb)->h_source, info->srcaddr, ETH_ALEN)
38 == 0) ^ info->invert));
39}
40
41static int
42ip6t_mac_checkentry(const char *tablename,
43 const struct ip6t_ip6 *ip,
44 void *matchinfo,
45 unsigned int matchsize,
46 unsigned int hook_mask)
47{
48 if (hook_mask
49 & ~((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN)
50 | (1 << NF_IP6_FORWARD))) {
51 printk("ip6t_mac: only valid for PRE_ROUTING, LOCAL_IN or"
52 " FORWARD\n");
53 return 0;
54 }
55
56 if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_mac_info)))
57 return 0;
58
59 return 1;
60}
61
62static struct ip6t_match mac_match = {
63 .name = "mac",
64 .match = &match,
65 .checkentry = &ip6t_mac_checkentry,
66 .me = THIS_MODULE,
67};
68
69static int __init init(void)
70{
71 return ip6t_register_match(&mac_match);
72}
73
74static void __exit fini(void)
75{
76 ip6t_unregister_match(&mac_match);
77}
78
79module_init(init);
80module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_mark.c b/net/ipv6/netfilter/ip6t_mark.c
new file mode 100644
index 000000000000..affc3de364fc
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_mark.c
@@ -0,0 +1,66 @@
1/* Kernel module to match NFMARK values. */
2
3/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13
14#include <linux/netfilter_ipv6/ip6t_mark.h>
15#include <linux/netfilter_ipv6/ip6_tables.h>
16
17MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
19MODULE_DESCRIPTION("ip6tables mark match");
20
21static int
22match(const struct sk_buff *skb,
23 const struct net_device *in,
24 const struct net_device *out,
25 const void *matchinfo,
26 int offset,
27 unsigned int protoff,
28 int *hotdrop)
29{
30 const struct ip6t_mark_info *info = matchinfo;
31
32 return ((skb->nfmark & info->mask) == info->mark) ^ info->invert;
33}
34
35static int
36checkentry(const char *tablename,
37 const struct ip6t_ip6 *ip,
38 void *matchinfo,
39 unsigned int matchsize,
40 unsigned int hook_mask)
41{
42 if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_mark_info)))
43 return 0;
44
45 return 1;
46}
47
48static struct ip6t_match mark_match = {
49 .name = "mark",
50 .match = &match,
51 .checkentry = &checkentry,
52 .me = THIS_MODULE,
53};
54
55static int __init init(void)
56{
57 return ip6t_register_match(&mark_match);
58}
59
60static void __exit fini(void)
61{
62 ip6t_unregister_match(&mark_match);
63}
64
65module_init(init);
66module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_multiport.c b/net/ipv6/netfilter/ip6t_multiport.c
new file mode 100644
index 000000000000..6e3246153fa3
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_multiport.c
@@ -0,0 +1,125 @@
1/* Kernel module to match one of a list of TCP/UDP ports: ports are in
2 the same place so we can treat them as equal. */
3
4/* (C) 1999-2001 Paul `Rusty' Russell
5 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/module.h>
13#include <linux/types.h>
14#include <linux/udp.h>
15#include <linux/skbuff.h>
16#include <linux/in.h>
17
18#include <linux/netfilter_ipv6/ip6t_multiport.h>
19#include <linux/netfilter_ipv6/ip6_tables.h>
20
21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
23MODULE_DESCRIPTION("ip6tables match for multiple ports");
24
25#if 0
26#define duprintf(format, args...) printk(format , ## args)
27#else
28#define duprintf(format, args...)
29#endif
30
31/* Returns 1 if the port is matched by the test, 0 otherwise. */
32static inline int
33ports_match(const u_int16_t *portlist, enum ip6t_multiport_flags flags,
34 u_int8_t count, u_int16_t src, u_int16_t dst)
35{
36 unsigned int i;
37 for (i=0; i<count; i++) {
38 if (flags != IP6T_MULTIPORT_DESTINATION
39 && portlist[i] == src)
40 return 1;
41
42 if (flags != IP6T_MULTIPORT_SOURCE
43 && portlist[i] == dst)
44 return 1;
45 }
46
47 return 0;
48}
49
50static int
51match(const struct sk_buff *skb,
52 const struct net_device *in,
53 const struct net_device *out,
54 const void *matchinfo,
55 int offset,
56 unsigned int protoff,
57 int *hotdrop)
58{
59 u16 _ports[2], *pptr;
60 const struct ip6t_multiport *multiinfo = matchinfo;
61
62 /* Must not be a fragment. */
63 if (offset)
64 return 0;
65
66 /* Must be big enough to read ports (both UDP and TCP have
67 them at the start). */
68 pptr = skb_header_pointer(skb, protoff, sizeof(_ports), &_ports[0]);
69 if (pptr == NULL) {
70 /* We've been asked to examine this packet, and we
71 * can't. Hence, no choice but to drop.
72 */
73 duprintf("ip6t_multiport:"
74 " Dropping evil offset=0 tinygram.\n");
75 *hotdrop = 1;
76 return 0;
77 }
78
79 return ports_match(multiinfo->ports,
80 multiinfo->flags, multiinfo->count,
81 ntohs(pptr[0]), ntohs(pptr[1]));
82}
83
84/* Called when user tries to insert an entry of this type. */
85static int
86checkentry(const char *tablename,
87 const struct ip6t_ip6 *ip,
88 void *matchinfo,
89 unsigned int matchsize,
90 unsigned int hook_mask)
91{
92 const struct ip6t_multiport *multiinfo = matchinfo;
93
94 if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_multiport)))
95 return 0;
96
97 /* Must specify proto == TCP/UDP, no unknown flags or bad count */
98 return (ip->proto == IPPROTO_TCP || ip->proto == IPPROTO_UDP)
99 && !(ip->invflags & IP6T_INV_PROTO)
100 && matchsize == IP6T_ALIGN(sizeof(struct ip6t_multiport))
101 && (multiinfo->flags == IP6T_MULTIPORT_SOURCE
102 || multiinfo->flags == IP6T_MULTIPORT_DESTINATION
103 || multiinfo->flags == IP6T_MULTIPORT_EITHER)
104 && multiinfo->count <= IP6T_MULTI_PORTS;
105}
106
107static struct ip6t_match multiport_match = {
108 .name = "multiport",
109 .match = &match,
110 .checkentry = &checkentry,
111 .me = THIS_MODULE,
112};
113
114static int __init init(void)
115{
116 return ip6t_register_match(&multiport_match);
117}
118
119static void __exit fini(void)
120{
121 ip6t_unregister_match(&multiport_match);
122}
123
124module_init(init);
125module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
new file mode 100644
index 000000000000..ab0e32d3de46
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -0,0 +1,174 @@
1/* Kernel module to match various things tied to sockets associated with
2 locally generated outgoing packets. */
3
4/* (C) 2000-2001 Marc Boucher <marc@mbsi.ca>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/file.h>
14#include <net/sock.h>
15
16#include <linux/netfilter_ipv6/ip6t_owner.h>
17#include <linux/netfilter_ipv6/ip6_tables.h>
18
19MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
20MODULE_DESCRIPTION("IP6 tables owner matching module");
21MODULE_LICENSE("GPL");
22
23static int
24match_pid(const struct sk_buff *skb, pid_t pid)
25{
26 struct task_struct *p;
27 struct files_struct *files;
28 int i;
29
30 read_lock(&tasklist_lock);
31 p = find_task_by_pid(pid);
32 if (!p)
33 goto out;
34 task_lock(p);
35 files = p->files;
36 if(files) {
37 spin_lock(&files->file_lock);
38 for (i=0; i < files->max_fds; i++) {
39 if (fcheck_files(files, i) == skb->sk->sk_socket->file) {
40 spin_unlock(&files->file_lock);
41 task_unlock(p);
42 read_unlock(&tasklist_lock);
43 return 1;
44 }
45 }
46 spin_unlock(&files->file_lock);
47 }
48 task_unlock(p);
49out:
50 read_unlock(&tasklist_lock);
51 return 0;
52}
53
54static int
55match_sid(const struct sk_buff *skb, pid_t sid)
56{
57 struct task_struct *g, *p;
58 struct file *file = skb->sk->sk_socket->file;
59 int i, found=0;
60
61 read_lock(&tasklist_lock);
62 do_each_thread(g, p) {
63 struct files_struct *files;
64 if (p->signal->session != sid)
65 continue;
66
67 task_lock(p);
68 files = p->files;
69 if (files) {
70 spin_lock(&files->file_lock);
71 for (i=0; i < files->max_fds; i++) {
72 if (fcheck_files(files, i) == file) {
73 found = 1;
74 break;
75 }
76 }
77 spin_unlock(&files->file_lock);
78 }
79 task_unlock(p);
80 if (found)
81 goto out;
82 } while_each_thread(g, p);
83out:
84 read_unlock(&tasklist_lock);
85
86 return found;
87}
88
89static int
90match(const struct sk_buff *skb,
91 const struct net_device *in,
92 const struct net_device *out,
93 const void *matchinfo,
94 int offset,
95 unsigned int protoff,
96 int *hotdrop)
97{
98 const struct ip6t_owner_info *info = matchinfo;
99
100 if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
101 return 0;
102
103 if(info->match & IP6T_OWNER_UID) {
104 if((skb->sk->sk_socket->file->f_uid != info->uid) ^
105 !!(info->invert & IP6T_OWNER_UID))
106 return 0;
107 }
108
109 if(info->match & IP6T_OWNER_GID) {
110 if((skb->sk->sk_socket->file->f_gid != info->gid) ^
111 !!(info->invert & IP6T_OWNER_GID))
112 return 0;
113 }
114
115 if(info->match & IP6T_OWNER_PID) {
116 if (!match_pid(skb, info->pid) ^
117 !!(info->invert & IP6T_OWNER_PID))
118 return 0;
119 }
120
121 if(info->match & IP6T_OWNER_SID) {
122 if (!match_sid(skb, info->sid) ^
123 !!(info->invert & IP6T_OWNER_SID))
124 return 0;
125 }
126
127 return 1;
128}
129
130static int
131checkentry(const char *tablename,
132 const struct ip6t_ip6 *ip,
133 void *matchinfo,
134 unsigned int matchsize,
135 unsigned int hook_mask)
136{
137 if (hook_mask
138 & ~((1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING))) {
139 printk("ip6t_owner: only valid for LOCAL_OUT or POST_ROUTING.\n");
140 return 0;
141 }
142
143 if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_owner_info)))
144 return 0;
145#ifdef CONFIG_SMP
146 /* files->file_lock can not be used in a BH */
147 if (((struct ip6t_owner_info *)matchinfo)->match
148 & (IP6T_OWNER_PID|IP6T_OWNER_SID)) {
149 printk("ip6t_owner: pid and sid matching is broken on SMP.\n");
150 return 0;
151 }
152#endif
153 return 1;
154}
155
156static struct ip6t_match owner_match = {
157 .name = "owner",
158 .match = &match,
159 .checkentry = &checkentry,
160 .me = THIS_MODULE,
161};
162
163static int __init init(void)
164{
165 return ip6t_register_match(&owner_match);
166}
167
168static void __exit fini(void)
169{
170 ip6t_unregister_match(&owner_match);
171}
172
173module_init(init);
174module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_physdev.c b/net/ipv6/netfilter/ip6t_physdev.c
new file mode 100644
index 000000000000..71515c86ece1
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_physdev.c
@@ -0,0 +1,135 @@
1/* Kernel module to match the bridge port in and
2 * out device for IP packets coming into contact with a bridge. */
3
4/* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/netfilter_ipv6/ip6t_physdev.h>
14#include <linux/netfilter_ipv6/ip6_tables.h>
15#include <linux/netfilter_bridge.h>
16#define MATCH 1
17#define NOMATCH 0
18
19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
21MODULE_DESCRIPTION("iptables bridge physical device match module");
22
23static int
24match(const struct sk_buff *skb,
25 const struct net_device *in,
26 const struct net_device *out,
27 const void *matchinfo,
28 int offset,
29 unsigned int protoff,
30 int *hotdrop)
31{
32 int i;
33 static const char nulldevname[IFNAMSIZ];
34 const struct ip6t_physdev_info *info = matchinfo;
35 unsigned int ret;
36 const char *indev, *outdev;
37 struct nf_bridge_info *nf_bridge;
38
39 /* Not a bridged IP packet or no info available yet:
40 * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
41 * the destination device will be a bridge. */
42 if (!(nf_bridge = skb->nf_bridge)) {
43 /* Return MATCH if the invert flags of the used options are on */
44 if ((info->bitmask & IP6T_PHYSDEV_OP_BRIDGED) &&
45 !(info->invert & IP6T_PHYSDEV_OP_BRIDGED))
46 return NOMATCH;
47 if ((info->bitmask & IP6T_PHYSDEV_OP_ISIN) &&
48 !(info->invert & IP6T_PHYSDEV_OP_ISIN))
49 return NOMATCH;
50 if ((info->bitmask & IP6T_PHYSDEV_OP_ISOUT) &&
51 !(info->invert & IP6T_PHYSDEV_OP_ISOUT))
52 return NOMATCH;
53 if ((info->bitmask & IP6T_PHYSDEV_OP_IN) &&
54 !(info->invert & IP6T_PHYSDEV_OP_IN))
55 return NOMATCH;
56 if ((info->bitmask & IP6T_PHYSDEV_OP_OUT) &&
57 !(info->invert & IP6T_PHYSDEV_OP_OUT))
58 return NOMATCH;
59 return MATCH;
60 }
61
62 /* This only makes sense in the FORWARD and POSTROUTING chains */
63 if ((info->bitmask & IP6T_PHYSDEV_OP_BRIDGED) &&
64 (!!(nf_bridge->mask & BRNF_BRIDGED) ^
65 !(info->invert & IP6T_PHYSDEV_OP_BRIDGED)))
66 return NOMATCH;
67
68 if ((info->bitmask & IP6T_PHYSDEV_OP_ISIN &&
69 (!nf_bridge->physindev ^ !!(info->invert & IP6T_PHYSDEV_OP_ISIN))) ||
70 (info->bitmask & IP6T_PHYSDEV_OP_ISOUT &&
71 (!nf_bridge->physoutdev ^ !!(info->invert & IP6T_PHYSDEV_OP_ISOUT))))
72 return NOMATCH;
73
74 if (!(info->bitmask & IP6T_PHYSDEV_OP_IN))
75 goto match_outdev;
76 indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
77 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
78 ret |= (((const unsigned int *)indev)[i]
79 ^ ((const unsigned int *)info->physindev)[i])
80 & ((const unsigned int *)info->in_mask)[i];
81 }
82
83 if ((ret == 0) ^ !(info->invert & IP6T_PHYSDEV_OP_IN))
84 return NOMATCH;
85
86match_outdev:
87 if (!(info->bitmask & IP6T_PHYSDEV_OP_OUT))
88 return MATCH;
89 outdev = nf_bridge->physoutdev ?
90 nf_bridge->physoutdev->name : nulldevname;
91 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
92 ret |= (((const unsigned int *)outdev)[i]
93 ^ ((const unsigned int *)info->physoutdev)[i])
94 & ((const unsigned int *)info->out_mask)[i];
95 }
96
97 return (ret != 0) ^ !(info->invert & IP6T_PHYSDEV_OP_OUT);
98}
99
100static int
101checkentry(const char *tablename,
102 const struct ip6t_ip6 *ip,
103 void *matchinfo,
104 unsigned int matchsize,
105 unsigned int hook_mask)
106{
107 const struct ip6t_physdev_info *info = matchinfo;
108
109 if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_physdev_info)))
110 return 0;
111 if (!(info->bitmask & IP6T_PHYSDEV_OP_MASK) ||
112 info->bitmask & ~IP6T_PHYSDEV_OP_MASK)
113 return 0;
114 return 1;
115}
116
117static struct ip6t_match physdev_match = {
118 .name = "physdev",
119 .match = &match,
120 .checkentry = &checkentry,
121 .me = THIS_MODULE,
122};
123
124static int __init init(void)
125{
126 return ip6t_register_match(&physdev_match);
127}
128
129static void __exit fini(void)
130{
131 ip6t_unregister_match(&physdev_match);
132}
133
134module_init(init);
135module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
new file mode 100644
index 000000000000..a9526b773d28
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -0,0 +1,301 @@
1/* Kernel module to match ROUTING parameters. */
2
3/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/ipv6.h>
13#include <linux/types.h>
14#include <net/checksum.h>
15#include <net/ipv6.h>
16
17#include <asm/byteorder.h>
18
19#include <linux/netfilter_ipv6/ip6_tables.h>
20#include <linux/netfilter_ipv6/ip6t_rt.h>
21
22MODULE_LICENSE("GPL");
23MODULE_DESCRIPTION("IPv6 RT match");
24MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
25
26#if 0
27#define DEBUGP printk
28#else
29#define DEBUGP(format, args...)
30#endif
31
32/* Returns 1 if the id is matched by the range, 0 otherwise */
33static inline int
34segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert)
35{
36 int r=0;
37 DEBUGP("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
38 min,id,max);
39 r=(id >= min && id <= max) ^ invert;
40 DEBUGP(" result %s\n",r? "PASS" : "FAILED");
41 return r;
42}
43
44static int
45match(const struct sk_buff *skb,
46 const struct net_device *in,
47 const struct net_device *out,
48 const void *matchinfo,
49 int offset,
50 unsigned int protoff,
51 int *hotdrop)
52{
53 struct ipv6_rt_hdr _route, *rh = NULL;
54 const struct ip6t_rt *rtinfo = matchinfo;
55 unsigned int temp;
56 unsigned int len;
57 u8 nexthdr;
58 unsigned int ptr;
59 unsigned int hdrlen = 0;
60 unsigned int ret = 0;
61 struct in6_addr *ap, _addr;
62
63 /* type of the 1st exthdr */
64 nexthdr = skb->nh.ipv6h->nexthdr;
65 /* pointer to the 1st exthdr */
66 ptr = sizeof(struct ipv6hdr);
67 /* available length */
68 len = skb->len - ptr;
69 temp = 0;
70
71 while (ip6t_ext_hdr(nexthdr)) {
72 struct ipv6_opt_hdr _hdr, *hp;
73
74 DEBUGP("ipv6_rt header iteration \n");
75
76 /* Is there enough space for the next ext header? */
77 if (len < (int)sizeof(struct ipv6_opt_hdr))
78 return 0;
79 /* No more exthdr -> evaluate */
80 if (nexthdr == NEXTHDR_NONE) {
81 break;
82 }
83 /* ESP -> evaluate */
84 if (nexthdr == NEXTHDR_ESP) {
85 break;
86 }
87
88 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
89 BUG_ON(hp == NULL);
90
91 /* Calculate the header length */
92 if (nexthdr == NEXTHDR_FRAGMENT) {
93 hdrlen = 8;
94 } else if (nexthdr == NEXTHDR_AUTH)
95 hdrlen = (hp->hdrlen+2)<<2;
96 else
97 hdrlen = ipv6_optlen(hp);
98
99 /* ROUTING -> evaluate */
100 if (nexthdr == NEXTHDR_ROUTING) {
101 temp |= MASK_ROUTING;
102 break;
103 }
104
105
106 /* set the flag */
107 switch (nexthdr){
108 case NEXTHDR_HOP:
109 case NEXTHDR_ROUTING:
110 case NEXTHDR_FRAGMENT:
111 case NEXTHDR_AUTH:
112 case NEXTHDR_DEST:
113 break;
114 default:
115 DEBUGP("ipv6_rt match: unknown nextheader %u\n",nexthdr);
116 return 0;
117 break;
118 }
119
120 nexthdr = hp->nexthdr;
121 len -= hdrlen;
122 ptr += hdrlen;
123 if ( ptr > skb->len ) {
124 DEBUGP("ipv6_rt: new pointer is too large! \n");
125 break;
126 }
127 }
128
129 /* ROUTING header not found */
130 if ( temp != MASK_ROUTING ) return 0;
131
132 if (len < (int)sizeof(struct ipv6_rt_hdr)){
133 *hotdrop = 1;
134 return 0;
135 }
136
137 if (len < hdrlen){
138 /* Pcket smaller than its length field */
139 return 0;
140 }
141
142 rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
143 BUG_ON(rh == NULL);
144
145 DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
146 DEBUGP("TYPE %04X ", rh->type);
147 DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
148
149 DEBUGP("IPv6 RT segsleft %02X ",
150 (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
151 rh->segments_left,
152 !!(rtinfo->invflags & IP6T_RT_INV_SGS))));
153 DEBUGP("type %02X %02X %02X ",
154 rtinfo->rt_type, rh->type,
155 (!(rtinfo->flags & IP6T_RT_TYP) ||
156 ((rtinfo->rt_type == rh->type) ^
157 !!(rtinfo->invflags & IP6T_RT_INV_TYP))));
158 DEBUGP("len %02X %04X %02X ",
159 rtinfo->hdrlen, hdrlen,
160 (!(rtinfo->flags & IP6T_RT_LEN) ||
161 ((rtinfo->hdrlen == hdrlen) ^
162 !!(rtinfo->invflags & IP6T_RT_INV_LEN))));
163 DEBUGP("res %02X %02X %02X ",
164 (rtinfo->flags & IP6T_RT_RES), ((struct rt0_hdr *)rh)->bitmap,
165 !((rtinfo->flags & IP6T_RT_RES) && (((struct rt0_hdr *)rh)->bitmap)));
166
167 ret = (rh != NULL)
168 &&
169 (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
170 rh->segments_left,
171 !!(rtinfo->invflags & IP6T_RT_INV_SGS)))
172 &&
173 (!(rtinfo->flags & IP6T_RT_LEN) ||
174 ((rtinfo->hdrlen == hdrlen) ^
175 !!(rtinfo->invflags & IP6T_RT_INV_LEN)))
176 &&
177 (!(rtinfo->flags & IP6T_RT_TYP) ||
178 ((rtinfo->rt_type == rh->type) ^
179 !!(rtinfo->invflags & IP6T_RT_INV_TYP)));
180
181 if (ret && (rtinfo->flags & IP6T_RT_RES)) {
182 u_int32_t *bp, _bitmap;
183 bp = skb_header_pointer(skb,
184 ptr + offsetof(struct rt0_hdr, bitmap),
185 sizeof(_bitmap), &_bitmap);
186
187 ret = (*bp == 0);
188 }
189
190 DEBUGP("#%d ",rtinfo->addrnr);
191 if ( !(rtinfo->flags & IP6T_RT_FST) ){
192 return ret;
193 } else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) {
194 DEBUGP("Not strict ");
195 if ( rtinfo->addrnr > (unsigned int)((hdrlen-8)/16) ){
196 DEBUGP("There isn't enough space\n");
197 return 0;
198 } else {
199 unsigned int i = 0;
200
201 DEBUGP("#%d ",rtinfo->addrnr);
202 for(temp=0; temp<(unsigned int)((hdrlen-8)/16); temp++){
203 ap = skb_header_pointer(skb,
204 ptr
205 + sizeof(struct rt0_hdr)
206 + temp * sizeof(_addr),
207 sizeof(_addr),
208 &_addr);
209
210 BUG_ON(ap == NULL);
211
212 if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
213 DEBUGP("i=%d temp=%d;\n",i,temp);
214 i++;
215 }
216 if (i==rtinfo->addrnr) break;
217 }
218 DEBUGP("i=%d #%d\n", i, rtinfo->addrnr);
219 if (i == rtinfo->addrnr)
220 return ret;
221 else return 0;
222 }
223 } else {
224 DEBUGP("Strict ");
225 if ( rtinfo->addrnr > (unsigned int)((hdrlen-8)/16) ){
226 DEBUGP("There isn't enough space\n");
227 return 0;
228 } else {
229 DEBUGP("#%d ",rtinfo->addrnr);
230 for(temp=0; temp<rtinfo->addrnr; temp++){
231 ap = skb_header_pointer(skb,
232 ptr
233 + sizeof(struct rt0_hdr)
234 + temp * sizeof(_addr),
235 sizeof(_addr),
236 &_addr);
237 BUG_ON(ap == NULL);
238
239 if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp]))
240 break;
241 }
242 DEBUGP("temp=%d #%d\n", temp, rtinfo->addrnr);
243 if ((temp == rtinfo->addrnr) && (temp == (unsigned int)((hdrlen-8)/16)))
244 return ret;
245 else return 0;
246 }
247 }
248
249 return 0;
250}
251
252/* Called when user tries to insert an entry of this type. */
253static int
254checkentry(const char *tablename,
255 const struct ip6t_ip6 *ip,
256 void *matchinfo,
257 unsigned int matchinfosize,
258 unsigned int hook_mask)
259{
260 const struct ip6t_rt *rtinfo = matchinfo;
261
262 if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_rt))) {
263 DEBUGP("ip6t_rt: matchsize %u != %u\n",
264 matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_rt)));
265 return 0;
266 }
267 if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
268 DEBUGP("ip6t_rt: unknown flags %X\n",
269 rtinfo->invflags);
270 return 0;
271 }
272 if ( (rtinfo->flags & (IP6T_RT_RES|IP6T_RT_FST_MASK)) &&
273 (!(rtinfo->flags & IP6T_RT_TYP) ||
274 (rtinfo->rt_type != 0) ||
275 (rtinfo->invflags & IP6T_RT_INV_TYP)) ) {
276 DEBUGP("`--rt-type 0' required before `--rt-0-*'");
277 return 0;
278 }
279
280 return 1;
281}
282
283static struct ip6t_match rt_match = {
284 .name = "rt",
285 .match = &match,
286 .checkentry = &checkentry,
287 .me = THIS_MODULE,
288};
289
290static int __init init(void)
291{
292 return ip6t_register_match(&rt_match);
293}
294
295static void __exit cleanup(void)
296{
297 ip6t_unregister_match(&rt_match);
298}
299
300module_init(init);
301module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
new file mode 100644
index 000000000000..4c0028671c20
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -0,0 +1,214 @@
1/*
2 * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/module.h>
13#include <linux/moduleparam.h>
14#include <linux/netfilter_ipv6/ip6_tables.h>
15
16MODULE_LICENSE("GPL");
17MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
18MODULE_DESCRIPTION("ip6tables filter table");
19
20#define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT))
21
22/* Standard entry. */
23struct ip6t_standard
24{
25 struct ip6t_entry entry;
26 struct ip6t_standard_target target;
27};
28
29struct ip6t_error_target
30{
31 struct ip6t_entry_target target;
32 char errorname[IP6T_FUNCTION_MAXNAMELEN];
33};
34
35struct ip6t_error
36{
37 struct ip6t_entry entry;
38 struct ip6t_error_target target;
39};
40
41static struct
42{
43 struct ip6t_replace repl;
44 struct ip6t_standard entries[3];
45 struct ip6t_error term;
46} initial_table __initdata
47= { { "filter", FILTER_VALID_HOOKS, 4,
48 sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
49 { [NF_IP6_LOCAL_IN] = 0,
50 [NF_IP6_FORWARD] = sizeof(struct ip6t_standard),
51 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 },
52 { [NF_IP6_LOCAL_IN] = 0,
53 [NF_IP6_FORWARD] = sizeof(struct ip6t_standard),
54 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 },
55 0, NULL, { } },
56 {
57 /* LOCAL_IN */
58 { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
59 0,
60 sizeof(struct ip6t_entry),
61 sizeof(struct ip6t_standard),
62 0, { 0, 0 }, { } },
63 { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
64 -NF_ACCEPT - 1 } },
65 /* FORWARD */
66 { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
67 0,
68 sizeof(struct ip6t_entry),
69 sizeof(struct ip6t_standard),
70 0, { 0, 0 }, { } },
71 { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
72 -NF_ACCEPT - 1 } },
73 /* LOCAL_OUT */
74 { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
75 0,
76 sizeof(struct ip6t_entry),
77 sizeof(struct ip6t_standard),
78 0, { 0, 0 }, { } },
79 { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
80 -NF_ACCEPT - 1 } }
81 },
82 /* ERROR */
83 { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
84 0,
85 sizeof(struct ip6t_entry),
86 sizeof(struct ip6t_error),
87 0, { 0, 0 }, { } },
88 { { { { IP6T_ALIGN(sizeof(struct ip6t_error_target)), IP6T_ERROR_TARGET } },
89 { } },
90 "ERROR"
91 }
92 }
93};
94
95static struct ip6t_table packet_filter = {
96 .name = "filter",
97 .valid_hooks = FILTER_VALID_HOOKS,
98 .lock = RW_LOCK_UNLOCKED,
99 .me = THIS_MODULE,
100};
101
102/* The work comes in here from netfilter.c. */
103static unsigned int
104ip6t_hook(unsigned int hook,
105 struct sk_buff **pskb,
106 const struct net_device *in,
107 const struct net_device *out,
108 int (*okfn)(struct sk_buff *))
109{
110 return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
111}
112
113static unsigned int
114ip6t_local_out_hook(unsigned int hook,
115 struct sk_buff **pskb,
116 const struct net_device *in,
117 const struct net_device *out,
118 int (*okfn)(struct sk_buff *))
119{
120#if 0
121 /* root is playing with raw sockets. */
122 if ((*pskb)->len < sizeof(struct iphdr)
123 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
124 if (net_ratelimit())
125 printk("ip6t_hook: happy cracking.\n");
126 return NF_ACCEPT;
127 }
128#endif
129
130 return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
131}
132
133static struct nf_hook_ops ip6t_ops[] = {
134 {
135 .hook = ip6t_hook,
136 .owner = THIS_MODULE,
137 .pf = PF_INET6,
138 .hooknum = NF_IP6_LOCAL_IN,
139 .priority = NF_IP6_PRI_FILTER,
140 },
141 {
142 .hook = ip6t_hook,
143 .owner = THIS_MODULE,
144 .pf = PF_INET6,
145 .hooknum = NF_IP6_FORWARD,
146 .priority = NF_IP6_PRI_FILTER,
147 },
148 {
149 .hook = ip6t_local_out_hook,
150 .owner = THIS_MODULE,
151 .pf = PF_INET6,
152 .hooknum = NF_IP6_LOCAL_OUT,
153 .priority = NF_IP6_PRI_FILTER,
154 },
155};
156
157/* Default to forward because I got too much mail already. */
158static int forward = NF_ACCEPT;
159module_param(forward, bool, 0000);
160
161static int __init init(void)
162{
163 int ret;
164
165 if (forward < 0 || forward > NF_MAX_VERDICT) {
166 printk("iptables forward must be 0 or 1\n");
167 return -EINVAL;
168 }
169
170 /* Entry 1 is the FORWARD hook */
171 initial_table.entries[1].target.verdict = -forward - 1;
172
173 /* Register table */
174 ret = ip6t_register_table(&packet_filter, &initial_table.repl);
175 if (ret < 0)
176 return ret;
177
178 /* Register hooks */
179 ret = nf_register_hook(&ip6t_ops[0]);
180 if (ret < 0)
181 goto cleanup_table;
182
183 ret = nf_register_hook(&ip6t_ops[1]);
184 if (ret < 0)
185 goto cleanup_hook0;
186
187 ret = nf_register_hook(&ip6t_ops[2]);
188 if (ret < 0)
189 goto cleanup_hook1;
190
191 return ret;
192
193 cleanup_hook1:
194 nf_unregister_hook(&ip6t_ops[1]);
195 cleanup_hook0:
196 nf_unregister_hook(&ip6t_ops[0]);
197 cleanup_table:
198 ip6t_unregister_table(&packet_filter);
199
200 return ret;
201}
202
203static void __exit fini(void)
204{
205 unsigned int i;
206
207 for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
208 nf_unregister_hook(&ip6t_ops[i]);
209
210 ip6t_unregister_table(&packet_filter);
211}
212
213module_init(init);
214module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
new file mode 100644
index 000000000000..85c1e6eada19
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -0,0 +1,287 @@
1/*
2 * IPv6 packet mangling table, a port of the IPv4 mangle table to IPv6
3 *
4 * Copyright (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
5 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
12 */
13#include <linux/module.h>
14#include <linux/netfilter_ipv6/ip6_tables.h>
15
16MODULE_LICENSE("GPL");
17MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
18MODULE_DESCRIPTION("ip6tables mangle table");
19
20#define MANGLE_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | \
21 (1 << NF_IP6_LOCAL_IN) | \
22 (1 << NF_IP6_FORWARD) | \
23 (1 << NF_IP6_LOCAL_OUT) | \
24 (1 << NF_IP6_POST_ROUTING))
25
26#if 0
27#define DEBUGP(x, args...) printk(KERN_DEBUG x, ## args)
28#else
29#define DEBUGP(x, args...)
30#endif
31
32/* Standard entry. */
33struct ip6t_standard
34{
35 struct ip6t_entry entry;
36 struct ip6t_standard_target target;
37};
38
39struct ip6t_error_target
40{
41 struct ip6t_entry_target target;
42 char errorname[IP6T_FUNCTION_MAXNAMELEN];
43};
44
45struct ip6t_error
46{
47 struct ip6t_entry entry;
48 struct ip6t_error_target target;
49};
50
51static struct
52{
53 struct ip6t_replace repl;
54 struct ip6t_standard entries[5];
55 struct ip6t_error term;
56} initial_table __initdata
57= { { "mangle", MANGLE_VALID_HOOKS, 6,
58 sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error),
59 { [NF_IP6_PRE_ROUTING] = 0,
60 [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard),
61 [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2,
62 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3,
63 [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4},
64 { [NF_IP6_PRE_ROUTING] = 0,
65 [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard),
66 [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2,
67 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3,
68 [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4},
69 0, NULL, { } },
70 {
71 /* PRE_ROUTING */
72 { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
73 0,
74 sizeof(struct ip6t_entry),
75 sizeof(struct ip6t_standard),
76 0, { 0, 0 }, { } },
77 { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
78 -NF_ACCEPT - 1 } },
79 /* LOCAL_IN */
80 { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
81 0,
82 sizeof(struct ip6t_entry),
83 sizeof(struct ip6t_standard),
84 0, { 0, 0 }, { } },
85 { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
86 -NF_ACCEPT - 1 } },
87 /* FORWARD */
88 { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
89 0,
90 sizeof(struct ip6t_entry),
91 sizeof(struct ip6t_standard),
92 0, { 0, 0 }, { } },
93 { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
94 -NF_ACCEPT - 1 } },
95 /* LOCAL_OUT */
96 { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
97 0,
98 sizeof(struct ip6t_entry),
99 sizeof(struct ip6t_standard),
100 0, { 0, 0 }, { } },
101 { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
102 -NF_ACCEPT - 1 } },
103 /* POST_ROUTING */
104 { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
105 0,
106 sizeof(struct ip6t_entry),
107 sizeof(struct ip6t_standard),
108 0, { 0, 0 }, { } },
109 { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
110 -NF_ACCEPT - 1 } }
111 },
112 /* ERROR */
113 { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
114 0,
115 sizeof(struct ip6t_entry),
116 sizeof(struct ip6t_error),
117 0, { 0, 0 }, { } },
118 { { { { IP6T_ALIGN(sizeof(struct ip6t_error_target)), IP6T_ERROR_TARGET } },
119 { } },
120 "ERROR"
121 }
122 }
123};
124
125static struct ip6t_table packet_mangler = {
126 .name = "mangle",
127 .valid_hooks = MANGLE_VALID_HOOKS,
128 .lock = RW_LOCK_UNLOCKED,
129 .me = THIS_MODULE,
130};
131
132/* The work comes in here from netfilter.c. */
133static unsigned int
134ip6t_route_hook(unsigned int hook,
135 struct sk_buff **pskb,
136 const struct net_device *in,
137 const struct net_device *out,
138 int (*okfn)(struct sk_buff *))
139{
140 return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
141}
142
143static unsigned int
144ip6t_local_hook(unsigned int hook,
145 struct sk_buff **pskb,
146 const struct net_device *in,
147 const struct net_device *out,
148 int (*okfn)(struct sk_buff *))
149{
150
151 unsigned long nfmark;
152 unsigned int ret;
153 struct in6_addr saddr, daddr;
154 u_int8_t hop_limit;
155 u_int32_t flowlabel;
156
157#if 0
158 /* root is playing with raw sockets. */
159 if ((*pskb)->len < sizeof(struct iphdr)
160 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
161 if (net_ratelimit())
162 printk("ip6t_hook: happy cracking.\n");
163 return NF_ACCEPT;
164 }
165#endif
166
167 /* save source/dest address, nfmark, hoplimit, flowlabel, priority, */
168 memcpy(&saddr, &(*pskb)->nh.ipv6h->saddr, sizeof(saddr));
169 memcpy(&daddr, &(*pskb)->nh.ipv6h->daddr, sizeof(daddr));
170 nfmark = (*pskb)->nfmark;
171 hop_limit = (*pskb)->nh.ipv6h->hop_limit;
172
173 /* flowlabel and prio (includes version, which shouldn't change either */
174 flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
175
176 ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
177
178 if (ret != NF_DROP && ret != NF_STOLEN
179 && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
180 || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr))
181 || (*pskb)->nfmark != nfmark
182 || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) {
183
184 /* something which could affect routing has changed */
185
186 DEBUGP("ip6table_mangle: we'd need to re-route a packet\n");
187 }
188
189 return ret;
190}
191
192static struct nf_hook_ops ip6t_ops[] = {
193 {
194 .hook = ip6t_route_hook,
195 .owner = THIS_MODULE,
196 .pf = PF_INET6,
197 .hooknum = NF_IP6_PRE_ROUTING,
198 .priority = NF_IP6_PRI_MANGLE,
199 },
200 {
201 .hook = ip6t_local_hook,
202 .owner = THIS_MODULE,
203 .pf = PF_INET6,
204 .hooknum = NF_IP6_LOCAL_IN,
205 .priority = NF_IP6_PRI_MANGLE,
206 },
207 {
208 .hook = ip6t_route_hook,
209 .owner = THIS_MODULE,
210 .pf = PF_INET6,
211 .hooknum = NF_IP6_FORWARD,
212 .priority = NF_IP6_PRI_MANGLE,
213 },
214 {
215 .hook = ip6t_local_hook,
216 .owner = THIS_MODULE,
217 .pf = PF_INET6,
218 .hooknum = NF_IP6_LOCAL_OUT,
219 .priority = NF_IP6_PRI_MANGLE,
220 },
221 {
222 .hook = ip6t_route_hook,
223 .owner = THIS_MODULE,
224 .pf = PF_INET6,
225 .hooknum = NF_IP6_POST_ROUTING,
226 .priority = NF_IP6_PRI_MANGLE,
227 },
228};
229
230static int __init init(void)
231{
232 int ret;
233
234 /* Register table */
235 ret = ip6t_register_table(&packet_mangler, &initial_table.repl);
236 if (ret < 0)
237 return ret;
238
239 /* Register hooks */
240 ret = nf_register_hook(&ip6t_ops[0]);
241 if (ret < 0)
242 goto cleanup_table;
243
244 ret = nf_register_hook(&ip6t_ops[1]);
245 if (ret < 0)
246 goto cleanup_hook0;
247
248 ret = nf_register_hook(&ip6t_ops[2]);
249 if (ret < 0)
250 goto cleanup_hook1;
251
252 ret = nf_register_hook(&ip6t_ops[3]);
253 if (ret < 0)
254 goto cleanup_hook2;
255
256 ret = nf_register_hook(&ip6t_ops[4]);
257 if (ret < 0)
258 goto cleanup_hook3;
259
260 return ret;
261
262 cleanup_hook3:
263 nf_unregister_hook(&ip6t_ops[3]);
264 cleanup_hook2:
265 nf_unregister_hook(&ip6t_ops[2]);
266 cleanup_hook1:
267 nf_unregister_hook(&ip6t_ops[1]);
268 cleanup_hook0:
269 nf_unregister_hook(&ip6t_ops[0]);
270 cleanup_table:
271 ip6t_unregister_table(&packet_mangler);
272
273 return ret;
274}
275
276static void __exit fini(void)
277{
278 unsigned int i;
279
280 for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
281 nf_unregister_hook(&ip6t_ops[i]);
282
283 ip6t_unregister_table(&packet_mangler);
284}
285
286module_init(init);
287module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
new file mode 100644
index 000000000000..71407beaf790
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -0,0 +1,182 @@
1/*
2 * IPv6 raw table, a port of the IPv4 raw table to IPv6
3 *
4 * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
5 */
6#include <linux/module.h>
7#include <linux/netfilter_ipv6/ip6_tables.h>
8
9#define RAW_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT))
10
11#if 0
12#define DEBUGP(x, args...) printk(KERN_DEBUG x, ## args)
13#else
14#define DEBUGP(x, args...)
15#endif
16
17/* Standard entry. */
18struct ip6t_standard
19{
20 struct ip6t_entry entry;
21 struct ip6t_standard_target target;
22};
23
24struct ip6t_error_target
25{
26 struct ip6t_entry_target target;
27 char errorname[IP6T_FUNCTION_MAXNAMELEN];
28};
29
30struct ip6t_error
31{
32 struct ip6t_entry entry;
33 struct ip6t_error_target target;
34};
35
36static struct
37{
38 struct ip6t_replace repl;
39 struct ip6t_standard entries[2];
40 struct ip6t_error term;
41} initial_table __initdata = {
42 .repl = {
43 .name = "raw",
44 .valid_hooks = RAW_VALID_HOOKS,
45 .num_entries = 3,
46 .size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error),
47 .hook_entry = {
48 [NF_IP6_PRE_ROUTING] = 0,
49 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard)
50 },
51 .underflow = {
52 [NF_IP6_PRE_ROUTING] = 0,
53 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard)
54 },
55 },
56 .entries = {
57 /* PRE_ROUTING */
58 {
59 .entry = {
60 .target_offset = sizeof(struct ip6t_entry),
61 .next_offset = sizeof(struct ip6t_standard),
62 },
63 .target = {
64 .target = {
65 .u = {
66 .target_size = IP6T_ALIGN(sizeof(struct ip6t_standard_target)),
67 },
68 },
69 .verdict = -NF_ACCEPT - 1,
70 },
71 },
72
73 /* LOCAL_OUT */
74 {
75 .entry = {
76 .target_offset = sizeof(struct ip6t_entry),
77 .next_offset = sizeof(struct ip6t_standard),
78 },
79 .target = {
80 .target = {
81 .u = {
82 .target_size = IP6T_ALIGN(sizeof(struct ip6t_standard_target)),
83 },
84 },
85 .verdict = -NF_ACCEPT - 1,
86 },
87 },
88 },
89 /* ERROR */
90 .term = {
91 .entry = {
92 .target_offset = sizeof(struct ip6t_entry),
93 .next_offset = sizeof(struct ip6t_error),
94 },
95 .target = {
96 .target = {
97 .u = {
98 .user = {
99 .target_size = IP6T_ALIGN(sizeof(struct ip6t_error_target)),
100 .name = IP6T_ERROR_TARGET,
101 },
102 },
103 },
104 .errorname = "ERROR",
105 },
106 }
107};
108
109static struct ip6t_table packet_raw = {
110 .name = "raw",
111 .valid_hooks = RAW_VALID_HOOKS,
112 .lock = RW_LOCK_UNLOCKED,
113 .me = THIS_MODULE
114};
115
116/* The work comes in here from netfilter.c. */
117static unsigned int
118ip6t_hook(unsigned int hook,
119 struct sk_buff **pskb,
120 const struct net_device *in,
121 const struct net_device *out,
122 int (*okfn)(struct sk_buff *))
123{
124 return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL);
125}
126
127static struct nf_hook_ops ip6t_ops[] = {
128 {
129 .hook = ip6t_hook,
130 .pf = PF_INET6,
131 .hooknum = NF_IP6_PRE_ROUTING,
132 .priority = NF_IP6_PRI_FIRST
133 },
134 {
135 .hook = ip6t_hook,
136 .pf = PF_INET6,
137 .hooknum = NF_IP6_LOCAL_OUT,
138 .priority = NF_IP6_PRI_FIRST
139 },
140};
141
142static int __init init(void)
143{
144 int ret;
145
146 /* Register table */
147 ret = ip6t_register_table(&packet_raw, &initial_table.repl);
148 if (ret < 0)
149 return ret;
150
151 /* Register hooks */
152 ret = nf_register_hook(&ip6t_ops[0]);
153 if (ret < 0)
154 goto cleanup_table;
155
156 ret = nf_register_hook(&ip6t_ops[1]);
157 if (ret < 0)
158 goto cleanup_hook0;
159
160 return ret;
161
162 cleanup_hook0:
163 nf_unregister_hook(&ip6t_ops[0]);
164 cleanup_table:
165 ip6t_unregister_table(&packet_raw);
166
167 return ret;
168}
169
170static void __exit fini(void)
171{
172 unsigned int i;
173
174 for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
175 nf_unregister_hook(&ip6t_ops[i]);
176
177 ip6t_unregister_table(&packet_raw);
178}
179
180module_init(init);
181module_exit(fini);
182MODULE_LICENSE("GPL");
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
new file mode 100644
index 000000000000..334a5967831e
--- /dev/null
+++ b/net/ipv6/proc.c
@@ -0,0 +1,303 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * This file implements the various access functions for the
7 * PROC file system. This is very similar to the IPv4 version,
8 * except it reports the sockets in the INET6 address family.
9 *
10 * Version: $Id: proc.c,v 1.17 2002/02/01 22:01:04 davem Exp $
11 *
12 * Authors: David S. Miller (davem@caip.rutgers.edu)
13 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 */
20#include <linux/config.h>
21#include <linux/sched.h>
22#include <linux/socket.h>
23#include <linux/net.h>
24#include <linux/ipv6.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/stddef.h>
28#include <net/sock.h>
29#include <net/tcp.h>
30#include <net/transp_v6.h>
31#include <net/ipv6.h>
32
33#ifdef CONFIG_PROC_FS
34static struct proc_dir_entry *proc_net_devsnmp6;
35
36static int fold_prot_inuse(struct proto *proto)
37{
38 int res = 0;
39 int cpu;
40
41 for (cpu=0; cpu<NR_CPUS; cpu++)
42 res += proto->stats[cpu].inuse;
43
44 return res;
45}
46
47static int sockstat6_seq_show(struct seq_file *seq, void *v)
48{
49 seq_printf(seq, "TCP6: inuse %d\n",
50 fold_prot_inuse(&tcpv6_prot));
51 seq_printf(seq, "UDP6: inuse %d\n",
52 fold_prot_inuse(&udpv6_prot));
53 seq_printf(seq, "RAW6: inuse %d\n",
54 fold_prot_inuse(&rawv6_prot));
55 seq_printf(seq, "FRAG6: inuse %d memory %d\n",
56 ip6_frag_nqueues, atomic_read(&ip6_frag_mem));
57 return 0;
58}
59
60static struct snmp_mib snmp6_ipstats_list[] = {
61/* ipv6 mib according to RFC 2465 */
62 SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INRECEIVES),
63 SNMP_MIB_ITEM("Ip6InHdrErrors", IPSTATS_MIB_INHDRERRORS),
64 SNMP_MIB_ITEM("Ip6InTooBigErrors", IPSTATS_MIB_INTOOBIGERRORS),
65 SNMP_MIB_ITEM("Ip6InNoRoutes", IPSTATS_MIB_INNOROUTES),
66 SNMP_MIB_ITEM("Ip6InAddrErrors", IPSTATS_MIB_INADDRERRORS),
67 SNMP_MIB_ITEM("Ip6InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS),
68 SNMP_MIB_ITEM("Ip6InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS),
69 SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS),
70 SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS),
71 SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS),
72 SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTREQUESTS),
73 SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS),
74 SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES),
75 SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT),
76 SNMP_MIB_ITEM("Ip6ReasmReqds", IPSTATS_MIB_REASMREQDS),
77 SNMP_MIB_ITEM("Ip6ReasmOKs", IPSTATS_MIB_REASMOKS),
78 SNMP_MIB_ITEM("Ip6ReasmFails", IPSTATS_MIB_REASMFAILS),
79 SNMP_MIB_ITEM("Ip6FragOKs", IPSTATS_MIB_FRAGOKS),
80 SNMP_MIB_ITEM("Ip6FragFails", IPSTATS_MIB_FRAGFAILS),
81 SNMP_MIB_ITEM("Ip6FragCreates", IPSTATS_MIB_FRAGCREATES),
82 SNMP_MIB_ITEM("Ip6InMcastPkts", IPSTATS_MIB_INMCASTPKTS),
83 SNMP_MIB_ITEM("Ip6OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS),
84 SNMP_MIB_SENTINEL
85};
86
87static struct snmp_mib snmp6_icmp6_list[] = {
88/* icmpv6 mib according to RFC 2466
89
90 Exceptions: {In|Out}AdminProhibs are removed, because I see
91 no good reasons to account them separately
92 of another dest.unreachs.
93 OutErrs is zero identically.
94 OutEchos too.
95 OutRouterAdvertisements too.
96 OutGroupMembQueries too.
97 */
98 SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS),
99 SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
100 SNMP_MIB_ITEM("Icmp6InDestUnreachs", ICMP6_MIB_INDESTUNREACHS),
101 SNMP_MIB_ITEM("Icmp6InPktTooBigs", ICMP6_MIB_INPKTTOOBIGS),
102 SNMP_MIB_ITEM("Icmp6InTimeExcds", ICMP6_MIB_INTIMEEXCDS),
103 SNMP_MIB_ITEM("Icmp6InParmProblems", ICMP6_MIB_INPARMPROBLEMS),
104 SNMP_MIB_ITEM("Icmp6InEchos", ICMP6_MIB_INECHOS),
105 SNMP_MIB_ITEM("Icmp6InEchoReplies", ICMP6_MIB_INECHOREPLIES),
106 SNMP_MIB_ITEM("Icmp6InGroupMembQueries", ICMP6_MIB_INGROUPMEMBQUERIES),
107 SNMP_MIB_ITEM("Icmp6InGroupMembResponses", ICMP6_MIB_INGROUPMEMBRESPONSES),
108 SNMP_MIB_ITEM("Icmp6InGroupMembReductions", ICMP6_MIB_INGROUPMEMBREDUCTIONS),
109 SNMP_MIB_ITEM("Icmp6InRouterSolicits", ICMP6_MIB_INROUTERSOLICITS),
110 SNMP_MIB_ITEM("Icmp6InRouterAdvertisements", ICMP6_MIB_INROUTERADVERTISEMENTS),
111 SNMP_MIB_ITEM("Icmp6InNeighborSolicits", ICMP6_MIB_INNEIGHBORSOLICITS),
112 SNMP_MIB_ITEM("Icmp6InNeighborAdvertisements", ICMP6_MIB_INNEIGHBORADVERTISEMENTS),
113 SNMP_MIB_ITEM("Icmp6InRedirects", ICMP6_MIB_INREDIRECTS),
114 SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
115 SNMP_MIB_ITEM("Icmp6OutDestUnreachs", ICMP6_MIB_OUTDESTUNREACHS),
116 SNMP_MIB_ITEM("Icmp6OutPktTooBigs", ICMP6_MIB_OUTPKTTOOBIGS),
117 SNMP_MIB_ITEM("Icmp6OutTimeExcds", ICMP6_MIB_OUTTIMEEXCDS),
118 SNMP_MIB_ITEM("Icmp6OutParmProblems", ICMP6_MIB_OUTPARMPROBLEMS),
119 SNMP_MIB_ITEM("Icmp6OutEchoReplies", ICMP6_MIB_OUTECHOREPLIES),
120 SNMP_MIB_ITEM("Icmp6OutRouterSolicits", ICMP6_MIB_OUTROUTERSOLICITS),
121 SNMP_MIB_ITEM("Icmp6OutNeighborSolicits", ICMP6_MIB_OUTNEIGHBORSOLICITS),
122 SNMP_MIB_ITEM("Icmp6OutNeighborAdvertisements", ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS),
123 SNMP_MIB_ITEM("Icmp6OutRedirects", ICMP6_MIB_OUTREDIRECTS),
124 SNMP_MIB_ITEM("Icmp6OutGroupMembResponses", ICMP6_MIB_OUTGROUPMEMBRESPONSES),
125 SNMP_MIB_ITEM("Icmp6OutGroupMembReductions", ICMP6_MIB_OUTGROUPMEMBREDUCTIONS),
126 SNMP_MIB_SENTINEL
127};
128
129static struct snmp_mib snmp6_udp6_list[] = {
130 SNMP_MIB_ITEM("Udp6InDatagrams", UDP_MIB_INDATAGRAMS),
131 SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
132 SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS),
133 SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
134 SNMP_MIB_SENTINEL
135};
136
137static unsigned long
138fold_field(void *mib[], int offt)
139{
140 unsigned long res = 0;
141 int i;
142
143 for (i = 0; i < NR_CPUS; i++) {
144 if (!cpu_possible(i))
145 continue;
146 res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt);
147 res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt);
148 }
149 return res;
150}
151
152static inline void
153snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist)
154{
155 int i;
156 for (i=0; itemlist[i].name; i++)
157 seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
158 fold_field(mib, itemlist[i].entry));
159}
160
161static int snmp6_seq_show(struct seq_file *seq, void *v)
162{
163 struct inet6_dev *idev = (struct inet6_dev *)seq->private;
164
165 if (idev) {
166 seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
167 snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list);
168 } else {
169 snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list);
170 snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
171 snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list);
172 }
173 return 0;
174}
175
176static int sockstat6_seq_open(struct inode *inode, struct file *file)
177{
178 return single_open(file, sockstat6_seq_show, NULL);
179}
180
181static struct file_operations sockstat6_seq_fops = {
182 .owner = THIS_MODULE,
183 .open = sockstat6_seq_open,
184 .read = seq_read,
185 .llseek = seq_lseek,
186 .release = single_release,
187};
188
189static int snmp6_seq_open(struct inode *inode, struct file *file)
190{
191 return single_open(file, snmp6_seq_show, PDE(inode)->data);
192}
193
194static struct file_operations snmp6_seq_fops = {
195 .owner = THIS_MODULE,
196 .open = snmp6_seq_open,
197 .read = seq_read,
198 .llseek = seq_lseek,
199 .release = single_release,
200};
201
202int snmp6_register_dev(struct inet6_dev *idev)
203{
204 struct proc_dir_entry *p;
205
206 if (!idev || !idev->dev)
207 return -EINVAL;
208
209 if (!proc_net_devsnmp6)
210 return -ENOENT;
211
212 p = create_proc_entry(idev->dev->name, S_IRUGO, proc_net_devsnmp6);
213 if (!p)
214 return -ENOMEM;
215
216 p->data = idev;
217 p->proc_fops = &snmp6_seq_fops;
218
219 idev->stats.proc_dir_entry = p;
220 return 0;
221}
222
223int snmp6_unregister_dev(struct inet6_dev *idev)
224{
225 if (!proc_net_devsnmp6)
226 return -ENOENT;
227 if (!idev || !idev->stats.proc_dir_entry)
228 return -EINVAL;
229 remove_proc_entry(idev->stats.proc_dir_entry->name,
230 proc_net_devsnmp6);
231 return 0;
232}
233
234int __init ipv6_misc_proc_init(void)
235{
236 int rc = 0;
237
238 if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops))
239 goto proc_snmp6_fail;
240
241 proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
242 if (!proc_net_devsnmp6)
243 goto proc_dev_snmp6_fail;
244
245 if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops))
246 goto proc_sockstat6_fail;
247out:
248 return rc;
249
250proc_sockstat6_fail:
251 proc_net_remove("dev_snmp6");
252proc_dev_snmp6_fail:
253 proc_net_remove("snmp6");
254proc_snmp6_fail:
255 rc = -ENOMEM;
256 goto out;
257}
258
259void ipv6_misc_proc_exit(void)
260{
261 proc_net_remove("sockstat6");
262 proc_net_remove("dev_snmp6");
263 proc_net_remove("snmp6");
264}
265
266#else /* CONFIG_PROC_FS */
267
268
269int snmp6_register_dev(struct inet6_dev *idev)
270{
271 return 0;
272}
273
274int snmp6_unregister_dev(struct inet6_dev *idev)
275{
276 return 0;
277}
278#endif /* CONFIG_PROC_FS */
279
280int snmp6_alloc_dev(struct inet6_dev *idev)
281{
282 int err = -ENOMEM;
283
284 if (!idev || !idev->dev)
285 return -EINVAL;
286
287 if (snmp6_mib_init((void **)idev->stats.icmpv6, sizeof(struct icmpv6_mib),
288 __alignof__(struct icmpv6_mib)) < 0)
289 goto err_icmp;
290
291 return 0;
292
293err_icmp:
294 return err;
295}
296
297int snmp6_free_dev(struct inet6_dev *idev)
298{
299 snmp6_mib_free((void **)idev->stats.icmpv6);
300 return 0;
301}
302
303
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
new file mode 100644
index 000000000000..52c1d58b6ca6
--- /dev/null
+++ b/net/ipv6/protocol.c
@@ -0,0 +1,86 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PF_INET6 protocol dispatch tables.
7 *
8 * Version: $Id: protocol.c,v 1.10 2001/05/18 02:25:49 davem Exp $
9 *
10 * Authors: Pedro Roque <roque@di.fc.ul.pt>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18/*
19 * Changes:
20 *
21 * Vince Laviano (vince@cs.stanford.edu) 16 May 2001
22 * - Removed unused variable 'inet6_protocol_base'
23 * - Modified inet6_del_protocol() to correctly maintain copy bit.
24 */
25
26#include <linux/errno.h>
27#include <linux/types.h>
28#include <linux/socket.h>
29#include <linux/sockios.h>
30#include <linux/sched.h>
31#include <linux/net.h>
32#include <linux/in6.h>
33#include <linux/netdevice.h>
34#include <linux/if_arp.h>
35
36#include <net/sock.h>
37#include <net/snmp.h>
38
39#include <net/ipv6.h>
40#include <net/protocol.h>
41
42struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
43static DEFINE_SPINLOCK(inet6_proto_lock);
44
45
46int inet6_add_protocol(struct inet6_protocol *prot, unsigned char protocol)
47{
48 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
49
50 spin_lock_bh(&inet6_proto_lock);
51
52 if (inet6_protos[hash]) {
53 ret = -1;
54 } else {
55 inet6_protos[hash] = prot;
56 ret = 0;
57 }
58
59 spin_unlock_bh(&inet6_proto_lock);
60
61 return ret;
62}
63
64/*
65 * Remove a protocol from the hash tables.
66 */
67
68int inet6_del_protocol(struct inet6_protocol *prot, unsigned char protocol)
69{
70 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
71
72 spin_lock_bh(&inet6_proto_lock);
73
74 if (inet6_protos[hash] != prot) {
75 ret = -1;
76 } else {
77 inet6_protos[hash] = NULL;
78 ret = 0;
79 }
80
81 spin_unlock_bh(&inet6_proto_lock);
82
83 synchronize_net();
84
85 return ret;
86}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
new file mode 100644
index 000000000000..5488ad0de4f6
--- /dev/null
+++ b/net/ipv6/raw.c
@@ -0,0 +1,1157 @@
1/*
2 * RAW sockets for IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Adapted from linux/net/ipv4/raw.c
9 *
10 * $Id: raw.c,v 1.51 2002/02/01 22:01:04 davem Exp $
11 *
12 * Fixes:
13 * Hideaki YOSHIFUJI : sin6_scope_id support
14 * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance)
15 * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License
19 * as published by the Free Software Foundation; either version
20 * 2 of the License, or (at your option) any later version.
21 */
22
23#include <linux/errno.h>
24#include <linux/types.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/sched.h>
28#include <linux/net.h>
29#include <linux/in6.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/icmpv6.h>
33#include <linux/netfilter.h>
34#include <linux/netfilter_ipv6.h>
35#include <asm/uaccess.h>
36#include <asm/ioctls.h>
37
38#include <net/ip.h>
39#include <net/sock.h>
40#include <net/snmp.h>
41
42#include <net/ipv6.h>
43#include <net/ndisc.h>
44#include <net/protocol.h>
45#include <net/ip6_route.h>
46#include <net/ip6_checksum.h>
47#include <net/addrconf.h>
48#include <net/transp_v6.h>
49#include <net/udp.h>
50#include <net/inet_common.h>
51
52#include <net/rawv6.h>
53#include <net/xfrm.h>
54
55#include <linux/proc_fs.h>
56#include <linux/seq_file.h>
57
58struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
59DEFINE_RWLOCK(raw_v6_lock);
60
61static void raw_v6_hash(struct sock *sk)
62{
63 struct hlist_head *list = &raw_v6_htable[inet_sk(sk)->num &
64 (RAWV6_HTABLE_SIZE - 1)];
65
66 write_lock_bh(&raw_v6_lock);
67 sk_add_node(sk, list);
68 sock_prot_inc_use(sk->sk_prot);
69 write_unlock_bh(&raw_v6_lock);
70}
71
72static void raw_v6_unhash(struct sock *sk)
73{
74 write_lock_bh(&raw_v6_lock);
75 if (sk_del_node_init(sk))
76 sock_prot_dec_use(sk->sk_prot);
77 write_unlock_bh(&raw_v6_lock);
78}
79
80
81/* Grumble... icmp and ip_input want to get at this... */
82struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
83 struct in6_addr *loc_addr, struct in6_addr *rmt_addr)
84{
85 struct hlist_node *node;
86 int is_multicast = ipv6_addr_is_multicast(loc_addr);
87
88 sk_for_each_from(sk, node)
89 if (inet_sk(sk)->num == num) {
90 struct ipv6_pinfo *np = inet6_sk(sk);
91
92 if (!ipv6_addr_any(&np->daddr) &&
93 !ipv6_addr_equal(&np->daddr, rmt_addr))
94 continue;
95
96 if (!ipv6_addr_any(&np->rcv_saddr)) {
97 if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
98 goto found;
99 if (is_multicast &&
100 inet6_mc_check(sk, loc_addr, rmt_addr))
101 goto found;
102 continue;
103 }
104 goto found;
105 }
106 sk = NULL;
107found:
108 return sk;
109}
110
111/*
112 * 0 - deliver
113 * 1 - block
114 */
115static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
116{
117 struct icmp6hdr *icmph;
118 struct raw6_sock *rp = raw6_sk(sk);
119
120 if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
121 __u32 *data = &rp->filter.data[0];
122 int bit_nr;
123
124 icmph = (struct icmp6hdr *) skb->data;
125 bit_nr = icmph->icmp6_type;
126
127 return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0;
128 }
129 return 0;
130}
131
132/*
133 * demultiplex raw sockets.
134 * (should consider queueing the skb in the sock receive_queue
135 * without calling rawv6.c)
136 *
137 * Caller owns SKB so we must make clones.
138 */
139void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
140{
141 struct in6_addr *saddr;
142 struct in6_addr *daddr;
143 struct sock *sk;
144 __u8 hash;
145
146 saddr = &skb->nh.ipv6h->saddr;
147 daddr = saddr + 1;
148
149 hash = nexthdr & (MAX_INET_PROTOS - 1);
150
151 read_lock(&raw_v6_lock);
152 sk = sk_head(&raw_v6_htable[hash]);
153
154 /*
155 * The first socket found will be delivered after
156 * delivery to transport protocols.
157 */
158
159 if (sk == NULL)
160 goto out;
161
162 sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr);
163
164 while (sk) {
165 if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) {
166 struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
167
168 /* Not releasing hash table! */
169 if (clone)
170 rawv6_rcv(sk, clone);
171 }
172 sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr);
173 }
174out:
175 read_unlock(&raw_v6_lock);
176}
177
178/* This cleans up af_inet6 a bit. -DaveM */
179static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
180{
181 struct inet_sock *inet = inet_sk(sk);
182 struct ipv6_pinfo *np = inet6_sk(sk);
183 struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
184 __u32 v4addr = 0;
185 int addr_type;
186 int err;
187
188 if (addr_len < SIN6_LEN_RFC2133)
189 return -EINVAL;
190 addr_type = ipv6_addr_type(&addr->sin6_addr);
191
192 /* Raw sockets are IPv6 only */
193 if (addr_type == IPV6_ADDR_MAPPED)
194 return(-EADDRNOTAVAIL);
195
196 lock_sock(sk);
197
198 err = -EINVAL;
199 if (sk->sk_state != TCP_CLOSE)
200 goto out;
201
202 /* Check if the address belongs to the host. */
203 if (addr_type != IPV6_ADDR_ANY) {
204 struct net_device *dev = NULL;
205
206 if (addr_type & IPV6_ADDR_LINKLOCAL) {
207 if (addr_len >= sizeof(struct sockaddr_in6) &&
208 addr->sin6_scope_id) {
209 /* Override any existing binding, if another
210 * one is supplied by user.
211 */
212 sk->sk_bound_dev_if = addr->sin6_scope_id;
213 }
214
215 /* Binding to link-local address requires an interface */
216 if (!sk->sk_bound_dev_if)
217 goto out;
218
219 dev = dev_get_by_index(sk->sk_bound_dev_if);
220 if (!dev) {
221 err = -ENODEV;
222 goto out;
223 }
224 }
225
226 /* ipv4 addr of the socket is invalid. Only the
227 * unspecified and mapped address have a v4 equivalent.
228 */
229 v4addr = LOOPBACK4_IPV6;
230 if (!(addr_type & IPV6_ADDR_MULTICAST)) {
231 err = -EADDRNOTAVAIL;
232 if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) {
233 if (dev)
234 dev_put(dev);
235 goto out;
236 }
237 }
238 if (dev)
239 dev_put(dev);
240 }
241
242 inet->rcv_saddr = inet->saddr = v4addr;
243 ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
244 if (!(addr_type & IPV6_ADDR_MULTICAST))
245 ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
246 err = 0;
247out:
248 release_sock(sk);
249 return err;
250}
251
252void rawv6_err(struct sock *sk, struct sk_buff *skb,
253 struct inet6_skb_parm *opt,
254 int type, int code, int offset, u32 info)
255{
256 struct inet_sock *inet = inet_sk(sk);
257 struct ipv6_pinfo *np = inet6_sk(sk);
258 int err;
259 int harderr;
260
261 /* Report error on raw socket, if:
262 1. User requested recverr.
263 2. Socket is connected (otherwise the error indication
264 is useless without recverr and error is hard.
265 */
266 if (!np->recverr && sk->sk_state != TCP_ESTABLISHED)
267 return;
268
269 harderr = icmpv6_err_convert(type, code, &err);
270 if (type == ICMPV6_PKT_TOOBIG)
271 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
272
273 if (np->recverr) {
274 u8 *payload = skb->data;
275 if (!inet->hdrincl)
276 payload += offset;
277 ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
278 }
279
280 if (np->recverr || harderr) {
281 sk->sk_err = err;
282 sk->sk_error_report(sk);
283 }
284}
285
286static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
287{
288 if ((raw6_sk(sk)->checksum || sk->sk_filter) &&
289 skb->ip_summed != CHECKSUM_UNNECESSARY) {
290 if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
291 /* FIXME: increment a raw6 drops counter here */
292 kfree_skb(skb);
293 return 0;
294 }
295 skb->ip_summed = CHECKSUM_UNNECESSARY;
296 }
297
298 /* Charge it to the socket. */
299 if (sock_queue_rcv_skb(sk,skb)<0) {
300 /* FIXME: increment a raw6 drops counter here */
301 kfree_skb(skb);
302 return 0;
303 }
304
305 return 0;
306}
307
308/*
309 * This is next to useless...
310 * if we demultiplex in network layer we don't need the extra call
311 * just to queue the skb...
312 * maybe we could have the network decide upon a hint if it
313 * should call raw_rcv for demultiplexing
314 */
315int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
316{
317 struct inet_sock *inet = inet_sk(sk);
318 struct raw6_sock *rp = raw6_sk(sk);
319
320 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
321 kfree_skb(skb);
322 return NET_RX_DROP;
323 }
324
325 if (!rp->checksum)
326 skb->ip_summed = CHECKSUM_UNNECESSARY;
327
328 if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
329 if (skb->ip_summed == CHECKSUM_HW) {
330 skb->ip_summed = CHECKSUM_UNNECESSARY;
331 if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
332 &skb->nh.ipv6h->daddr,
333 skb->len, inet->num, skb->csum)) {
334 LIMIT_NETDEBUG(
335 printk(KERN_DEBUG "raw v6 hw csum failure.\n"));
336 skb->ip_summed = CHECKSUM_NONE;
337 }
338 }
339 if (skb->ip_summed == CHECKSUM_NONE)
340 skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
341 &skb->nh.ipv6h->daddr,
342 skb->len, inet->num, 0);
343 }
344
345 if (inet->hdrincl) {
346 if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
347 (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
348 /* FIXME: increment a raw6 drops counter here */
349 kfree_skb(skb);
350 return 0;
351 }
352 skb->ip_summed = CHECKSUM_UNNECESSARY;
353 }
354
355 rawv6_rcv_skb(sk, skb);
356 return 0;
357}
358
359
360/*
361 * This should be easy, if there is something there
362 * we return it, otherwise we block.
363 */
364
365static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
366 struct msghdr *msg, size_t len,
367 int noblock, int flags, int *addr_len)
368{
369 struct ipv6_pinfo *np = inet6_sk(sk);
370 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name;
371 struct sk_buff *skb;
372 size_t copied;
373 int err;
374
375 if (flags & MSG_OOB)
376 return -EOPNOTSUPP;
377
378 if (addr_len)
379 *addr_len=sizeof(*sin6);
380
381 if (flags & MSG_ERRQUEUE)
382 return ipv6_recv_error(sk, msg, len);
383
384 skb = skb_recv_datagram(sk, flags, noblock, &err);
385 if (!skb)
386 goto out;
387
388 copied = skb->len;
389 if (copied > len) {
390 copied = len;
391 msg->msg_flags |= MSG_TRUNC;
392 }
393
394 if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
395 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
396 } else if (msg->msg_flags&MSG_TRUNC) {
397 if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
398 goto csum_copy_err;
399 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
400 } else {
401 err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov);
402 if (err == -EINVAL)
403 goto csum_copy_err;
404 }
405 if (err)
406 goto out_free;
407
408 /* Copy the address. */
409 if (sin6) {
410 sin6->sin6_family = AF_INET6;
411 ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
412 sin6->sin6_flowinfo = 0;
413 sin6->sin6_scope_id = 0;
414 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
415 sin6->sin6_scope_id = IP6CB(skb)->iif;
416 }
417
418 sock_recv_timestamp(msg, sk, skb);
419
420 if (np->rxopt.all)
421 datagram_recv_ctl(sk, msg, skb);
422
423 err = copied;
424 if (flags & MSG_TRUNC)
425 err = skb->len;
426
427out_free:
428 skb_free_datagram(sk, skb);
429out:
430 return err;
431
432csum_copy_err:
433 /* Clear queue. */
434 if (flags&MSG_PEEK) {
435 int clear = 0;
436 spin_lock_irq(&sk->sk_receive_queue.lock);
437 if (skb == skb_peek(&sk->sk_receive_queue)) {
438 __skb_unlink(skb, &sk->sk_receive_queue);
439 clear = 1;
440 }
441 spin_unlock_irq(&sk->sk_receive_queue.lock);
442 if (clear)
443 kfree_skb(skb);
444 }
445
446 /* Error for blocking case is chosen to masquerade
447 as some normal condition.
448 */
449 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
450 /* FIXME: increment a raw6 drops counter here */
451 goto out_free;
452}
453
454static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
455 struct raw6_sock *rp, int len)
456{
457 struct sk_buff *skb;
458 int err = 0;
459 u16 *csum;
460 u32 tmp_csum;
461
462 if (!rp->checksum)
463 goto send;
464
465 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
466 goto out;
467
468 if (rp->offset + 1 < len)
469 csum = (u16 *)(skb->h.raw + rp->offset);
470 else {
471 err = -EINVAL;
472 goto out;
473 }
474
475 /* should be check HW csum miyazawa */
476 if (skb_queue_len(&sk->sk_write_queue) == 1) {
477 /*
478 * Only one fragment on the socket.
479 */
480 tmp_csum = skb->csum;
481 } else {
482 tmp_csum = 0;
483
484 skb_queue_walk(&sk->sk_write_queue, skb) {
485 tmp_csum = csum_add(tmp_csum, skb->csum);
486 }
487 }
488
489 /* in case cksum was not initialized */
490 if (unlikely(*csum))
491 tmp_csum = csum_sub(tmp_csum, *csum);
492
493 *csum = csum_ipv6_magic(&fl->fl6_src,
494 &fl->fl6_dst,
495 len, fl->proto, tmp_csum);
496
497 if (*csum == 0)
498 *csum = -1;
499send:
500 err = ip6_push_pending_frames(sk);
501out:
502 return err;
503}
504
505static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
506 struct flowi *fl, struct rt6_info *rt,
507 unsigned int flags)
508{
509 struct inet_sock *inet = inet_sk(sk);
510 struct ipv6hdr *iph;
511 struct sk_buff *skb;
512 unsigned int hh_len;
513 int err;
514
515 if (length > rt->u.dst.dev->mtu) {
516 ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
517 return -EMSGSIZE;
518 }
519 if (flags&MSG_PROBE)
520 goto out;
521
522 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
523
524 skb = sock_alloc_send_skb(sk, length+hh_len+15,
525 flags&MSG_DONTWAIT, &err);
526 if (skb == NULL)
527 goto error;
528 skb_reserve(skb, hh_len);
529
530 skb->priority = sk->sk_priority;
531 skb->dst = dst_clone(&rt->u.dst);
532
533 skb->nh.ipv6h = iph = (struct ipv6hdr *)skb_put(skb, length);
534
535 skb->ip_summed = CHECKSUM_NONE;
536
537 skb->h.raw = skb->nh.raw;
538 err = memcpy_fromiovecend((void *)iph, from, 0, length);
539 if (err)
540 goto error_fault;
541
542 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
543 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
544 dst_output);
545 if (err > 0)
546 err = inet->recverr ? net_xmit_errno(err) : 0;
547 if (err)
548 goto error;
549out:
550 return 0;
551
552error_fault:
553 err = -EFAULT;
554 kfree_skb(skb);
555error:
556 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
557 return err;
558}
559
560static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
561{
562 struct iovec *iov;
563 u8 __user *type = NULL;
564 u8 __user *code = NULL;
565 int probed = 0;
566 int i;
567
568 if (!msg->msg_iov)
569 return;
570
571 for (i = 0; i < msg->msg_iovlen; i++) {
572 iov = &msg->msg_iov[i];
573 if (!iov)
574 continue;
575
576 switch (fl->proto) {
577 case IPPROTO_ICMPV6:
578 /* check if one-byte field is readable or not. */
579 if (iov->iov_base && iov->iov_len < 1)
580 break;
581
582 if (!type) {
583 type = iov->iov_base;
584 /* check if code field is readable or not. */
585 if (iov->iov_len > 1)
586 code = type + 1;
587 } else if (!code)
588 code = iov->iov_base;
589
590 if (type && code) {
591 get_user(fl->fl_icmp_type, type);
592 __get_user(fl->fl_icmp_code, code);
593 probed = 1;
594 }
595 break;
596 default:
597 probed = 1;
598 break;
599 }
600 if (probed)
601 break;
602 }
603}
604
605static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
606 struct msghdr *msg, size_t len)
607{
608 struct ipv6_txoptions opt_space;
609 struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
610 struct in6_addr *daddr, *final_p = NULL, final;
611 struct inet_sock *inet = inet_sk(sk);
612 struct ipv6_pinfo *np = inet6_sk(sk);
613 struct raw6_sock *rp = raw6_sk(sk);
614 struct ipv6_txoptions *opt = NULL;
615 struct ip6_flowlabel *flowlabel = NULL;
616 struct dst_entry *dst = NULL;
617 struct flowi fl;
618 int addr_len = msg->msg_namelen;
619 int hlimit = -1;
620 u16 proto;
621 int err;
622
623 /* Rough check on arithmetic overflow,
624 better check is made in ip6_build_xmit
625 */
626 if (len < 0)
627 return -EMSGSIZE;
628
629 /* Mirror BSD error message compatibility */
630 if (msg->msg_flags & MSG_OOB)
631 return -EOPNOTSUPP;
632
633 /*
634 * Get and verify the address.
635 */
636 memset(&fl, 0, sizeof(fl));
637
638 if (sin6) {
639 if (addr_len < SIN6_LEN_RFC2133)
640 return -EINVAL;
641
642 if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
643 return(-EAFNOSUPPORT);
644
645 /* port is the proto value [0..255] carried in nexthdr */
646 proto = ntohs(sin6->sin6_port);
647
648 if (!proto)
649 proto = inet->num;
650 else if (proto != inet->num)
651 return(-EINVAL);
652
653 if (proto > 255)
654 return(-EINVAL);
655
656 daddr = &sin6->sin6_addr;
657 if (np->sndflow) {
658 fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
659 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
660 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
661 if (flowlabel == NULL)
662 return -EINVAL;
663 daddr = &flowlabel->dst;
664 }
665 }
666
667 /*
668 * Otherwise it will be difficult to maintain
669 * sk->sk_dst_cache.
670 */
671 if (sk->sk_state == TCP_ESTABLISHED &&
672 ipv6_addr_equal(daddr, &np->daddr))
673 daddr = &np->daddr;
674
675 if (addr_len >= sizeof(struct sockaddr_in6) &&
676 sin6->sin6_scope_id &&
677 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
678 fl.oif = sin6->sin6_scope_id;
679 } else {
680 if (sk->sk_state != TCP_ESTABLISHED)
681 return -EDESTADDRREQ;
682
683 proto = inet->num;
684 daddr = &np->daddr;
685 fl.fl6_flowlabel = np->flow_label;
686 }
687
688 if (ipv6_addr_any(daddr)) {
689 /*
690 * unspecified destination address
691 * treated as error... is this correct ?
692 */
693 fl6_sock_release(flowlabel);
694 return(-EINVAL);
695 }
696
697 if (fl.oif == 0)
698 fl.oif = sk->sk_bound_dev_if;
699
700 if (msg->msg_controllen) {
701 opt = &opt_space;
702 memset(opt, 0, sizeof(struct ipv6_txoptions));
703 opt->tot_len = sizeof(struct ipv6_txoptions);
704
705 err = datagram_send_ctl(msg, &fl, opt, &hlimit);
706 if (err < 0) {
707 fl6_sock_release(flowlabel);
708 return err;
709 }
710 if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
711 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
712 if (flowlabel == NULL)
713 return -EINVAL;
714 }
715 if (!(opt->opt_nflen|opt->opt_flen))
716 opt = NULL;
717 }
718 if (opt == NULL)
719 opt = np->opt;
720 if (flowlabel)
721 opt = fl6_merge_options(&opt_space, flowlabel, opt);
722
723 fl.proto = proto;
724 rawv6_probe_proto_opt(&fl, msg);
725
726 ipv6_addr_copy(&fl.fl6_dst, daddr);
727 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
728 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
729
730 /* merge ip6_build_xmit from ip6_output */
731 if (opt && opt->srcrt) {
732 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
733 ipv6_addr_copy(&final, &fl.fl6_dst);
734 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
735 final_p = &final;
736 }
737
738 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
739 fl.oif = np->mcast_oif;
740
741 err = ip6_dst_lookup(sk, &dst, &fl);
742 if (err)
743 goto out;
744 if (final_p)
745 ipv6_addr_copy(&fl.fl6_dst, final_p);
746
747 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
748 dst_release(dst);
749 goto out;
750 }
751
752 if (hlimit < 0) {
753 if (ipv6_addr_is_multicast(&fl.fl6_dst))
754 hlimit = np->mcast_hops;
755 else
756 hlimit = np->hop_limit;
757 if (hlimit < 0)
758 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
759 if (hlimit < 0)
760 hlimit = ipv6_get_hoplimit(dst->dev);
761 }
762
763 if (msg->msg_flags&MSG_CONFIRM)
764 goto do_confirm;
765
766back_from_confirm:
767 if (inet->hdrincl) {
768 err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags);
769 } else {
770 lock_sock(sk);
771 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
772 hlimit, opt, &fl, (struct rt6_info*)dst, msg->msg_flags);
773
774 if (err)
775 ip6_flush_pending_frames(sk);
776 else if (!(msg->msg_flags & MSG_MORE))
777 err = rawv6_push_pending_frames(sk, &fl, rp, len);
778 }
779done:
780 ip6_dst_store(sk, dst,
781 ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
782 &np->daddr : NULL);
783 if (err > 0)
784 err = np->recverr ? net_xmit_errno(err) : 0;
785
786 release_sock(sk);
787out:
788 fl6_sock_release(flowlabel);
789 return err<0?err:len;
790do_confirm:
791 dst_confirm(dst);
792 if (!(msg->msg_flags & MSG_PROBE) || len)
793 goto back_from_confirm;
794 err = 0;
795 goto done;
796}
797
798static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
799 char __user *optval, int optlen)
800{
801 switch (optname) {
802 case ICMPV6_FILTER:
803 if (optlen > sizeof(struct icmp6_filter))
804 optlen = sizeof(struct icmp6_filter);
805 if (copy_from_user(&raw6_sk(sk)->filter, optval, optlen))
806 return -EFAULT;
807 return 0;
808 default:
809 return -ENOPROTOOPT;
810 };
811
812 return 0;
813}
814
815static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
816 char __user *optval, int __user *optlen)
817{
818 int len;
819
820 switch (optname) {
821 case ICMPV6_FILTER:
822 if (get_user(len, optlen))
823 return -EFAULT;
824 if (len < 0)
825 return -EINVAL;
826 if (len > sizeof(struct icmp6_filter))
827 len = sizeof(struct icmp6_filter);
828 if (put_user(len, optlen))
829 return -EFAULT;
830 if (copy_to_user(optval, &raw6_sk(sk)->filter, len))
831 return -EFAULT;
832 return 0;
833 default:
834 return -ENOPROTOOPT;
835 };
836
837 return 0;
838}
839
840
841static int rawv6_setsockopt(struct sock *sk, int level, int optname,
842 char __user *optval, int optlen)
843{
844 struct raw6_sock *rp = raw6_sk(sk);
845 int val;
846
847 switch(level) {
848 case SOL_RAW:
849 break;
850
851 case SOL_ICMPV6:
852 if (inet_sk(sk)->num != IPPROTO_ICMPV6)
853 return -EOPNOTSUPP;
854 return rawv6_seticmpfilter(sk, level, optname, optval,
855 optlen);
856 case SOL_IPV6:
857 if (optname == IPV6_CHECKSUM)
858 break;
859 default:
860 return ipv6_setsockopt(sk, level, optname, optval,
861 optlen);
862 };
863
864 if (get_user(val, (int __user *)optval))
865 return -EFAULT;
866
867 switch (optname) {
868 case IPV6_CHECKSUM:
869 /* You may get strange result with a positive odd offset;
870 RFC2292bis agrees with me. */
871 if (val > 0 && (val&1))
872 return(-EINVAL);
873 if (val < 0) {
874 rp->checksum = 0;
875 } else {
876 rp->checksum = 1;
877 rp->offset = val;
878 }
879
880 return 0;
881 break;
882
883 default:
884 return(-ENOPROTOOPT);
885 }
886}
887
888static int rawv6_getsockopt(struct sock *sk, int level, int optname,
889 char __user *optval, int __user *optlen)
890{
891 struct raw6_sock *rp = raw6_sk(sk);
892 int val, len;
893
894 switch(level) {
895 case SOL_RAW:
896 break;
897
898 case SOL_ICMPV6:
899 if (inet_sk(sk)->num != IPPROTO_ICMPV6)
900 return -EOPNOTSUPP;
901 return rawv6_geticmpfilter(sk, level, optname, optval,
902 optlen);
903 case SOL_IPV6:
904 if (optname == IPV6_CHECKSUM)
905 break;
906 default:
907 return ipv6_getsockopt(sk, level, optname, optval,
908 optlen);
909 };
910
911 if (get_user(len,optlen))
912 return -EFAULT;
913
914 switch (optname) {
915 case IPV6_CHECKSUM:
916 if (rp->checksum == 0)
917 val = -1;
918 else
919 val = rp->offset;
920 break;
921
922 default:
923 return -ENOPROTOOPT;
924 }
925
926 len = min_t(unsigned int, sizeof(int), len);
927
928 if (put_user(len, optlen))
929 return -EFAULT;
930 if (copy_to_user(optval,&val,len))
931 return -EFAULT;
932 return 0;
933}
934
935static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
936{
937 switch(cmd) {
938 case SIOCOUTQ:
939 {
940 int amount = atomic_read(&sk->sk_wmem_alloc);
941 return put_user(amount, (int __user *)arg);
942 }
943 case SIOCINQ:
944 {
945 struct sk_buff *skb;
946 int amount = 0;
947
948 spin_lock_irq(&sk->sk_receive_queue.lock);
949 skb = skb_peek(&sk->sk_receive_queue);
950 if (skb != NULL)
951 amount = skb->tail - skb->h.raw;
952 spin_unlock_irq(&sk->sk_receive_queue.lock);
953 return put_user(amount, (int __user *)arg);
954 }
955
956 default:
957 return -ENOIOCTLCMD;
958 }
959}
960
961static void rawv6_close(struct sock *sk, long timeout)
962{
963 if (inet_sk(sk)->num == IPPROTO_RAW)
964 ip6_ra_control(sk, -1, NULL);
965
966 sk_common_release(sk);
967}
968
969static int rawv6_init_sk(struct sock *sk)
970{
971 if (inet_sk(sk)->num == IPPROTO_ICMPV6) {
972 struct raw6_sock *rp = raw6_sk(sk);
973 rp->checksum = 1;
974 rp->offset = 2;
975 }
976 return(0);
977}
978
979struct proto rawv6_prot = {
980 .name = "RAWv6",
981 .owner = THIS_MODULE,
982 .close = rawv6_close,
983 .connect = ip6_datagram_connect,
984 .disconnect = udp_disconnect,
985 .ioctl = rawv6_ioctl,
986 .init = rawv6_init_sk,
987 .destroy = inet6_destroy_sock,
988 .setsockopt = rawv6_setsockopt,
989 .getsockopt = rawv6_getsockopt,
990 .sendmsg = rawv6_sendmsg,
991 .recvmsg = rawv6_recvmsg,
992 .bind = rawv6_bind,
993 .backlog_rcv = rawv6_rcv_skb,
994 .hash = raw_v6_hash,
995 .unhash = raw_v6_unhash,
996 .obj_size = sizeof(struct raw6_sock),
997};
998
999#ifdef CONFIG_PROC_FS
1000struct raw6_iter_state {
1001 int bucket;
1002};
1003
1004#define raw6_seq_private(seq) ((struct raw6_iter_state *)(seq)->private)
1005
1006static struct sock *raw6_get_first(struct seq_file *seq)
1007{
1008 struct sock *sk;
1009 struct hlist_node *node;
1010 struct raw6_iter_state* state = raw6_seq_private(seq);
1011
1012 for (state->bucket = 0; state->bucket < RAWV6_HTABLE_SIZE; ++state->bucket)
1013 sk_for_each(sk, node, &raw_v6_htable[state->bucket])
1014 if (sk->sk_family == PF_INET6)
1015 goto out;
1016 sk = NULL;
1017out:
1018 return sk;
1019}
1020
1021static struct sock *raw6_get_next(struct seq_file *seq, struct sock *sk)
1022{
1023 struct raw6_iter_state* state = raw6_seq_private(seq);
1024
1025 do {
1026 sk = sk_next(sk);
1027try_again:
1028 ;
1029 } while (sk && sk->sk_family != PF_INET6);
1030
1031 if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) {
1032 sk = sk_head(&raw_v6_htable[state->bucket]);
1033 goto try_again;
1034 }
1035 return sk;
1036}
1037
1038static struct sock *raw6_get_idx(struct seq_file *seq, loff_t pos)
1039{
1040 struct sock *sk = raw6_get_first(seq);
1041 if (sk)
1042 while (pos && (sk = raw6_get_next(seq, sk)) != NULL)
1043 --pos;
1044 return pos ? NULL : sk;
1045}
1046
1047static void *raw6_seq_start(struct seq_file *seq, loff_t *pos)
1048{
1049 read_lock(&raw_v6_lock);
1050 return *pos ? raw6_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1051}
1052
1053static void *raw6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1054{
1055 struct sock *sk;
1056
1057 if (v == SEQ_START_TOKEN)
1058 sk = raw6_get_first(seq);
1059 else
1060 sk = raw6_get_next(seq, v);
1061 ++*pos;
1062 return sk;
1063}
1064
1065static void raw6_seq_stop(struct seq_file *seq, void *v)
1066{
1067 read_unlock(&raw_v6_lock);
1068}
1069
1070static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1071{
1072 struct ipv6_pinfo *np = inet6_sk(sp);
1073 struct in6_addr *dest, *src;
1074 __u16 destp, srcp;
1075
1076 dest = &np->daddr;
1077 src = &np->rcv_saddr;
1078 destp = 0;
1079 srcp = inet_sk(sp)->num;
1080 seq_printf(seq,
1081 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1082 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n",
1083 i,
1084 src->s6_addr32[0], src->s6_addr32[1],
1085 src->s6_addr32[2], src->s6_addr32[3], srcp,
1086 dest->s6_addr32[0], dest->s6_addr32[1],
1087 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1088 sp->sk_state,
1089 atomic_read(&sp->sk_wmem_alloc),
1090 atomic_read(&sp->sk_rmem_alloc),
1091 0, 0L, 0,
1092 sock_i_uid(sp), 0,
1093 sock_i_ino(sp),
1094 atomic_read(&sp->sk_refcnt), sp);
1095}
1096
1097static int raw6_seq_show(struct seq_file *seq, void *v)
1098{
1099 if (v == SEQ_START_TOKEN)
1100 seq_printf(seq,
1101 " sl "
1102 "local_address "
1103 "remote_address "
1104 "st tx_queue rx_queue tr tm->when retrnsmt"
1105 " uid timeout inode\n");
1106 else
1107 raw6_sock_seq_show(seq, v, raw6_seq_private(seq)->bucket);
1108 return 0;
1109}
1110
1111static struct seq_operations raw6_seq_ops = {
1112 .start = raw6_seq_start,
1113 .next = raw6_seq_next,
1114 .stop = raw6_seq_stop,
1115 .show = raw6_seq_show,
1116};
1117
1118static int raw6_seq_open(struct inode *inode, struct file *file)
1119{
1120 struct seq_file *seq;
1121 int rc = -ENOMEM;
1122 struct raw6_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
1123 if (!s)
1124 goto out;
1125 rc = seq_open(file, &raw6_seq_ops);
1126 if (rc)
1127 goto out_kfree;
1128 seq = file->private_data;
1129 seq->private = s;
1130 memset(s, 0, sizeof(*s));
1131out:
1132 return rc;
1133out_kfree:
1134 kfree(s);
1135 goto out;
1136}
1137
1138static struct file_operations raw6_seq_fops = {
1139 .owner = THIS_MODULE,
1140 .open = raw6_seq_open,
1141 .read = seq_read,
1142 .llseek = seq_lseek,
1143 .release = seq_release_private,
1144};
1145
1146int __init raw6_proc_init(void)
1147{
1148 if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops))
1149 return -ENOMEM;
1150 return 0;
1151}
1152
1153void raw6_proc_exit(void)
1154{
1155 proc_net_remove("raw6");
1156}
1157#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
new file mode 100644
index 000000000000..59e7c6317872
--- /dev/null
+++ b/net/ipv6/reassembly.c
@@ -0,0 +1,771 @@
1/*
2 * IPv6 fragment reassembly
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: reassembly.c,v 1.26 2001/03/07 22:00:57 davem Exp $
9 *
10 * Based on: net/ipv4/ip_fragment.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18/*
19 * Fixes:
20 * Andi Kleen Make it work with multiple hosts.
21 * More RFC compliance.
22 *
23 * Horst von Brand Add missing #include <linux/string.h>
24 * Alexey Kuznetsov SMP races, threading, cleanup.
25 * Patrick McHardy LRU queue of frag heads for evictor.
26 * Mitsuru KANDA @USAGI Register inet6_protocol{}.
27 * David Stevens and
28 * YOSHIFUJI,H. @USAGI Always remove fragment header to
29 * calculate ICV correctly.
30 */
31#include <linux/config.h>
32#include <linux/errno.h>
33#include <linux/types.h>
34#include <linux/string.h>
35#include <linux/socket.h>
36#include <linux/sockios.h>
37#include <linux/jiffies.h>
38#include <linux/net.h>
39#include <linux/list.h>
40#include <linux/netdevice.h>
41#include <linux/in6.h>
42#include <linux/ipv6.h>
43#include <linux/icmpv6.h>
44#include <linux/random.h>
45#include <linux/jhash.h>
46
47#include <net/sock.h>
48#include <net/snmp.h>
49
50#include <net/ipv6.h>
51#include <net/protocol.h>
52#include <net/transp_v6.h>
53#include <net/rawv6.h>
54#include <net/ndisc.h>
55#include <net/addrconf.h>
56
57int sysctl_ip6frag_high_thresh = 256*1024;
58int sysctl_ip6frag_low_thresh = 192*1024;
59
60int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT;
61
62struct ip6frag_skb_cb
63{
64 struct inet6_skb_parm h;
65 int offset;
66};
67
68#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb))
69
70
71/*
72 * Equivalent of ipv4 struct ipq
73 */
74
75struct frag_queue
76{
77 struct frag_queue *next;
78 struct list_head lru_list; /* lru list member */
79
80 __u32 id; /* fragment id */
81 struct in6_addr saddr;
82 struct in6_addr daddr;
83
84 spinlock_t lock;
85 atomic_t refcnt;
86 struct timer_list timer; /* expire timer */
87 struct sk_buff *fragments;
88 int len;
89 int meat;
90 int iif;
91 struct timeval stamp;
92 unsigned int csum;
93 __u8 last_in; /* has first/last segment arrived? */
94#define COMPLETE 4
95#define FIRST_IN 2
96#define LAST_IN 1
97 __u16 nhoffset;
98 struct frag_queue **pprev;
99};
100
101/* Hash table. */
102
103#define IP6Q_HASHSZ 64
104
105static struct frag_queue *ip6_frag_hash[IP6Q_HASHSZ];
106static DEFINE_RWLOCK(ip6_frag_lock);
107static u32 ip6_frag_hash_rnd;
108static LIST_HEAD(ip6_frag_lru_list);
109int ip6_frag_nqueues = 0;
110
111static __inline__ void __fq_unlink(struct frag_queue *fq)
112{
113 if(fq->next)
114 fq->next->pprev = fq->pprev;
115 *fq->pprev = fq->next;
116 list_del(&fq->lru_list);
117 ip6_frag_nqueues--;
118}
119
120static __inline__ void fq_unlink(struct frag_queue *fq)
121{
122 write_lock(&ip6_frag_lock);
123 __fq_unlink(fq);
124 write_unlock(&ip6_frag_lock);
125}
126
127static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
128 struct in6_addr *daddr)
129{
130 u32 a, b, c;
131
132 a = saddr->s6_addr32[0];
133 b = saddr->s6_addr32[1];
134 c = saddr->s6_addr32[2];
135
136 a += JHASH_GOLDEN_RATIO;
137 b += JHASH_GOLDEN_RATIO;
138 c += ip6_frag_hash_rnd;
139 __jhash_mix(a, b, c);
140
141 a += saddr->s6_addr32[3];
142 b += daddr->s6_addr32[0];
143 c += daddr->s6_addr32[1];
144 __jhash_mix(a, b, c);
145
146 a += daddr->s6_addr32[2];
147 b += daddr->s6_addr32[3];
148 c += id;
149 __jhash_mix(a, b, c);
150
151 return c & (IP6Q_HASHSZ - 1);
152}
153
154static struct timer_list ip6_frag_secret_timer;
155int sysctl_ip6frag_secret_interval = 10 * 60 * HZ;
156
157static void ip6_frag_secret_rebuild(unsigned long dummy)
158{
159 unsigned long now = jiffies;
160 int i;
161
162 write_lock(&ip6_frag_lock);
163 get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32));
164 for (i = 0; i < IP6Q_HASHSZ; i++) {
165 struct frag_queue *q;
166
167 q = ip6_frag_hash[i];
168 while (q) {
169 struct frag_queue *next = q->next;
170 unsigned int hval = ip6qhashfn(q->id,
171 &q->saddr,
172 &q->daddr);
173
174 if (hval != i) {
175 /* Unlink. */
176 if (q->next)
177 q->next->pprev = q->pprev;
178 *q->pprev = q->next;
179
180 /* Relink to new hash chain. */
181 if ((q->next = ip6_frag_hash[hval]) != NULL)
182 q->next->pprev = &q->next;
183 ip6_frag_hash[hval] = q;
184 q->pprev = &ip6_frag_hash[hval];
185 }
186
187 q = next;
188 }
189 }
190 write_unlock(&ip6_frag_lock);
191
192 mod_timer(&ip6_frag_secret_timer, now + sysctl_ip6frag_secret_interval);
193}
194
195atomic_t ip6_frag_mem = ATOMIC_INIT(0);
196
197/* Memory Tracking Functions. */
198static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
199{
200 if (work)
201 *work -= skb->truesize;
202 atomic_sub(skb->truesize, &ip6_frag_mem);
203 kfree_skb(skb);
204}
205
206static inline void frag_free_queue(struct frag_queue *fq, int *work)
207{
208 if (work)
209 *work -= sizeof(struct frag_queue);
210 atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem);
211 kfree(fq);
212}
213
214static inline struct frag_queue *frag_alloc_queue(void)
215{
216 struct frag_queue *fq = kmalloc(sizeof(struct frag_queue), GFP_ATOMIC);
217
218 if(!fq)
219 return NULL;
220 atomic_add(sizeof(struct frag_queue), &ip6_frag_mem);
221 return fq;
222}
223
224/* Destruction primitives. */
225
226/* Complete destruction of fq. */
227static void ip6_frag_destroy(struct frag_queue *fq, int *work)
228{
229 struct sk_buff *fp;
230
231 BUG_TRAP(fq->last_in&COMPLETE);
232 BUG_TRAP(del_timer(&fq->timer) == 0);
233
234 /* Release all fragment data. */
235 fp = fq->fragments;
236 while (fp) {
237 struct sk_buff *xp = fp->next;
238
239 frag_kfree_skb(fp, work);
240 fp = xp;
241 }
242
243 frag_free_queue(fq, work);
244}
245
246static __inline__ void fq_put(struct frag_queue *fq, int *work)
247{
248 if (atomic_dec_and_test(&fq->refcnt))
249 ip6_frag_destroy(fq, work);
250}
251
252/* Kill fq entry. It is not destroyed immediately,
253 * because caller (and someone more) holds reference count.
254 */
255static __inline__ void fq_kill(struct frag_queue *fq)
256{
257 if (del_timer(&fq->timer))
258 atomic_dec(&fq->refcnt);
259
260 if (!(fq->last_in & COMPLETE)) {
261 fq_unlink(fq);
262 atomic_dec(&fq->refcnt);
263 fq->last_in |= COMPLETE;
264 }
265}
266
267static void ip6_evictor(void)
268{
269 struct frag_queue *fq;
270 struct list_head *tmp;
271 int work;
272
273 work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh;
274 if (work <= 0)
275 return;
276
277 while(work > 0) {
278 read_lock(&ip6_frag_lock);
279 if (list_empty(&ip6_frag_lru_list)) {
280 read_unlock(&ip6_frag_lock);
281 return;
282 }
283 tmp = ip6_frag_lru_list.next;
284 fq = list_entry(tmp, struct frag_queue, lru_list);
285 atomic_inc(&fq->refcnt);
286 read_unlock(&ip6_frag_lock);
287
288 spin_lock(&fq->lock);
289 if (!(fq->last_in&COMPLETE))
290 fq_kill(fq);
291 spin_unlock(&fq->lock);
292
293 fq_put(fq, &work);
294 IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
295 }
296}
297
298static void ip6_frag_expire(unsigned long data)
299{
300 struct frag_queue *fq = (struct frag_queue *) data;
301
302 spin_lock(&fq->lock);
303
304 if (fq->last_in & COMPLETE)
305 goto out;
306
307 fq_kill(fq);
308
309 IP6_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
310 IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
311
312 /* Send error only if the first segment arrived. */
313 if (fq->last_in&FIRST_IN && fq->fragments) {
314 struct net_device *dev = dev_get_by_index(fq->iif);
315
316 /*
317 But use as source device on which LAST ARRIVED
318 segment was received. And do not use fq->dev
319 pointer directly, device might already disappeared.
320 */
321 if (dev) {
322 fq->fragments->dev = dev;
323 icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0,
324 dev);
325 dev_put(dev);
326 }
327 }
328out:
329 spin_unlock(&fq->lock);
330 fq_put(fq, NULL);
331}
332
333/* Creation primitives. */
334
335
336static struct frag_queue *ip6_frag_intern(unsigned int hash,
337 struct frag_queue *fq_in)
338{
339 struct frag_queue *fq;
340
341 write_lock(&ip6_frag_lock);
342#ifdef CONFIG_SMP
343 for (fq = ip6_frag_hash[hash]; fq; fq = fq->next) {
344 if (fq->id == fq_in->id &&
345 ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
346 ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
347 atomic_inc(&fq->refcnt);
348 write_unlock(&ip6_frag_lock);
349 fq_in->last_in |= COMPLETE;
350 fq_put(fq_in, NULL);
351 return fq;
352 }
353 }
354#endif
355 fq = fq_in;
356
357 if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time))
358 atomic_inc(&fq->refcnt);
359
360 atomic_inc(&fq->refcnt);
361 if((fq->next = ip6_frag_hash[hash]) != NULL)
362 fq->next->pprev = &fq->next;
363 ip6_frag_hash[hash] = fq;
364 fq->pprev = &ip6_frag_hash[hash];
365 INIT_LIST_HEAD(&fq->lru_list);
366 list_add_tail(&fq->lru_list, &ip6_frag_lru_list);
367 ip6_frag_nqueues++;
368 write_unlock(&ip6_frag_lock);
369 return fq;
370}
371
372
373static struct frag_queue *
374ip6_frag_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst)
375{
376 struct frag_queue *fq;
377
378 if ((fq = frag_alloc_queue()) == NULL)
379 goto oom;
380
381 memset(fq, 0, sizeof(struct frag_queue));
382
383 fq->id = id;
384 ipv6_addr_copy(&fq->saddr, src);
385 ipv6_addr_copy(&fq->daddr, dst);
386
387 init_timer(&fq->timer);
388 fq->timer.function = ip6_frag_expire;
389 fq->timer.data = (long) fq;
390 spin_lock_init(&fq->lock);
391 atomic_set(&fq->refcnt, 1);
392
393 return ip6_frag_intern(hash, fq);
394
395oom:
396 IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
397 return NULL;
398}
399
400static __inline__ struct frag_queue *
401fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
402{
403 struct frag_queue *fq;
404 unsigned int hash = ip6qhashfn(id, src, dst);
405
406 read_lock(&ip6_frag_lock);
407 for(fq = ip6_frag_hash[hash]; fq; fq = fq->next) {
408 if (fq->id == id &&
409 ipv6_addr_equal(src, &fq->saddr) &&
410 ipv6_addr_equal(dst, &fq->daddr)) {
411 atomic_inc(&fq->refcnt);
412 read_unlock(&ip6_frag_lock);
413 return fq;
414 }
415 }
416 read_unlock(&ip6_frag_lock);
417
418 return ip6_frag_create(hash, id, src, dst);
419}
420
421
422static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
423 struct frag_hdr *fhdr, int nhoff)
424{
425 struct sk_buff *prev, *next;
426 int offset, end;
427
428 if (fq->last_in & COMPLETE)
429 goto err;
430
431 offset = ntohs(fhdr->frag_off) & ~0x7;
432 end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
433 ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
434
435 if ((unsigned int)end > IPV6_MAXPLEN) {
436 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
437 icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw);
438 return;
439 }
440
441 if (skb->ip_summed == CHECKSUM_HW)
442 skb->csum = csum_sub(skb->csum,
443 csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
444
445 /* Is this the final fragment? */
446 if (!(fhdr->frag_off & htons(IP6_MF))) {
447 /* If we already have some bits beyond end
448 * or have different end, the segment is corrupted.
449 */
450 if (end < fq->len ||
451 ((fq->last_in & LAST_IN) && end != fq->len))
452 goto err;
453 fq->last_in |= LAST_IN;
454 fq->len = end;
455 } else {
456 /* Check if the fragment is rounded to 8 bytes.
457 * Required by the RFC.
458 */
459 if (end & 0x7) {
460 /* RFC2460 says always send parameter problem in
461 * this case. -DaveM
462 */
463 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
464 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
465 offsetof(struct ipv6hdr, payload_len));
466 return;
467 }
468 if (end > fq->len) {
469 /* Some bits beyond end -> corruption. */
470 if (fq->last_in & LAST_IN)
471 goto err;
472 fq->len = end;
473 }
474 }
475
476 if (end == offset)
477 goto err;
478
479 /* Point into the IP datagram 'data' part. */
480 if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
481 goto err;
482 if (end-offset < skb->len) {
483 if (pskb_trim(skb, end - offset))
484 goto err;
485 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
486 skb->ip_summed = CHECKSUM_NONE;
487 }
488
489 /* Find out which fragments are in front and at the back of us
490 * in the chain of fragments so far. We must know where to put
491 * this fragment, right?
492 */
493 prev = NULL;
494 for(next = fq->fragments; next != NULL; next = next->next) {
495 if (FRAG6_CB(next)->offset >= offset)
496 break; /* bingo! */
497 prev = next;
498 }
499
500 /* We found where to put this one. Check for overlap with
501 * preceding fragment, and, if needed, align things so that
502 * any overlaps are eliminated.
503 */
504 if (prev) {
505 int i = (FRAG6_CB(prev)->offset + prev->len) - offset;
506
507 if (i > 0) {
508 offset += i;
509 if (end <= offset)
510 goto err;
511 if (!pskb_pull(skb, i))
512 goto err;
513 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
514 skb->ip_summed = CHECKSUM_NONE;
515 }
516 }
517
518 /* Look for overlap with succeeding segments.
519 * If we can merge fragments, do it.
520 */
521 while (next && FRAG6_CB(next)->offset < end) {
522 int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */
523
524 if (i < next->len) {
525 /* Eat head of the next overlapped fragment
526 * and leave the loop. The next ones cannot overlap.
527 */
528 if (!pskb_pull(next, i))
529 goto err;
530 FRAG6_CB(next)->offset += i; /* next fragment */
531 fq->meat -= i;
532 if (next->ip_summed != CHECKSUM_UNNECESSARY)
533 next->ip_summed = CHECKSUM_NONE;
534 break;
535 } else {
536 struct sk_buff *free_it = next;
537
538 /* Old fragment is completely overridden with
539 * new one drop it.
540 */
541 next = next->next;
542
543 if (prev)
544 prev->next = next;
545 else
546 fq->fragments = next;
547
548 fq->meat -= free_it->len;
549 frag_kfree_skb(free_it, NULL);
550 }
551 }
552
553 FRAG6_CB(skb)->offset = offset;
554
555 /* Insert this fragment in the chain of fragments. */
556 skb->next = next;
557 if (prev)
558 prev->next = skb;
559 else
560 fq->fragments = skb;
561
562 if (skb->dev)
563 fq->iif = skb->dev->ifindex;
564 skb->dev = NULL;
565 fq->stamp = skb->stamp;
566 fq->meat += skb->len;
567 atomic_add(skb->truesize, &ip6_frag_mem);
568
569 /* The first fragment.
570 * nhoffset is obtained from the first fragment, of course.
571 */
572 if (offset == 0) {
573 fq->nhoffset = nhoff;
574 fq->last_in |= FIRST_IN;
575 }
576 write_lock(&ip6_frag_lock);
577 list_move_tail(&fq->lru_list, &ip6_frag_lru_list);
578 write_unlock(&ip6_frag_lock);
579 return;
580
581err:
582 IP6_INC_STATS(IPSTATS_MIB_REASMFAILS);
583 kfree_skb(skb);
584}
585
586/*
587 * Check if this packet is complete.
588 * Returns NULL on failure by any reason, and pointer
589 * to current nexthdr field in reassembled frame.
590 *
591 * It is called with locked fq, and caller must check that
592 * queue is eligible for reassembly i.e. it is not COMPLETE,
593 * the last and the first frames arrived and all the bits are here.
594 */
595static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
596 unsigned int *nhoffp,
597 struct net_device *dev)
598{
599 struct sk_buff *fp, *head = fq->fragments;
600 int payload_len;
601 unsigned int nhoff;
602
603 fq_kill(fq);
604
605 BUG_TRAP(head != NULL);
606 BUG_TRAP(FRAG6_CB(head)->offset == 0);
607
608 /* Unfragmented part is taken from the first segment. */
609 payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
610 if (payload_len > IPV6_MAXPLEN)
611 goto out_oversize;
612
613 /* Head of list must not be cloned. */
614 if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
615 goto out_oom;
616
617 /* If the first fragment is fragmented itself, we split
618 * it to two chunks: the first with data and paged part
619 * and the second, holding only fragments. */
620 if (skb_shinfo(head)->frag_list) {
621 struct sk_buff *clone;
622 int i, plen = 0;
623
624 if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL)
625 goto out_oom;
626 clone->next = head->next;
627 head->next = clone;
628 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
629 skb_shinfo(head)->frag_list = NULL;
630 for (i=0; i<skb_shinfo(head)->nr_frags; i++)
631 plen += skb_shinfo(head)->frags[i].size;
632 clone->len = clone->data_len = head->data_len - plen;
633 head->data_len -= clone->len;
634 head->len -= clone->len;
635 clone->csum = 0;
636 clone->ip_summed = head->ip_summed;
637 atomic_add(clone->truesize, &ip6_frag_mem);
638 }
639
640 /* We have to remove fragment header from datagram and to relocate
641 * header in order to calculate ICV correctly. */
642 nhoff = fq->nhoffset;
643 head->nh.raw[nhoff] = head->h.raw[0];
644 memmove(head->head + sizeof(struct frag_hdr), head->head,
645 (head->data - head->head) - sizeof(struct frag_hdr));
646 head->mac.raw += sizeof(struct frag_hdr);
647 head->nh.raw += sizeof(struct frag_hdr);
648
649 skb_shinfo(head)->frag_list = head->next;
650 head->h.raw = head->data;
651 skb_push(head, head->data - head->nh.raw);
652 atomic_sub(head->truesize, &ip6_frag_mem);
653
654 for (fp=head->next; fp; fp = fp->next) {
655 head->data_len += fp->len;
656 head->len += fp->len;
657 if (head->ip_summed != fp->ip_summed)
658 head->ip_summed = CHECKSUM_NONE;
659 else if (head->ip_summed == CHECKSUM_HW)
660 head->csum = csum_add(head->csum, fp->csum);
661 head->truesize += fp->truesize;
662 atomic_sub(fp->truesize, &ip6_frag_mem);
663 }
664
665 head->next = NULL;
666 head->dev = dev;
667 head->stamp = fq->stamp;
668 head->nh.ipv6h->payload_len = htons(payload_len);
669
670 *skb_in = head;
671
672 /* Yes, and fold redundant checksum back. 8) */
673 if (head->ip_summed == CHECKSUM_HW)
674 head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
675
676 IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
677 fq->fragments = NULL;
678 *nhoffp = nhoff;
679 return 1;
680
681out_oversize:
682 if (net_ratelimit())
683 printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len);
684 goto out_fail;
685out_oom:
686 if (net_ratelimit())
687 printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n");
688out_fail:
689 IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
690 return -1;
691}
692
693static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
694{
695 struct sk_buff *skb = *skbp;
696 struct net_device *dev = skb->dev;
697 struct frag_hdr *fhdr;
698 struct frag_queue *fq;
699 struct ipv6hdr *hdr;
700
701 hdr = skb->nh.ipv6h;
702
703 IP6_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
704
705 /* Jumbo payload inhibits frag. header */
706 if (hdr->payload_len==0) {
707 IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS);
708 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
709 return -1;
710 }
711 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+sizeof(struct frag_hdr))) {
712 IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS);
713 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
714 return -1;
715 }
716
717 hdr = skb->nh.ipv6h;
718 fhdr = (struct frag_hdr *)skb->h.raw;
719
720 if (!(fhdr->frag_off & htons(0xFFF9))) {
721 /* It is not a fragmented frame */
722 skb->h.raw += sizeof(struct frag_hdr);
723 IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
724
725 *nhoffp = (u8*)fhdr - skb->nh.raw;
726 return 1;
727 }
728
729 if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
730 ip6_evictor();
731
732 if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr)) != NULL) {
733 int ret = -1;
734
735 spin_lock(&fq->lock);
736
737 ip6_frag_queue(fq, skb, fhdr, *nhoffp);
738
739 if (fq->last_in == (FIRST_IN|LAST_IN) &&
740 fq->meat == fq->len)
741 ret = ip6_frag_reasm(fq, skbp, nhoffp, dev);
742
743 spin_unlock(&fq->lock);
744 fq_put(fq, NULL);
745 return ret;
746 }
747
748 IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
749 kfree_skb(skb);
750 return -1;
751}
752
753static struct inet6_protocol frag_protocol =
754{
755 .handler = ipv6_frag_rcv,
756 .flags = INET6_PROTO_NOPOLICY,
757};
758
759void __init ipv6_frag_init(void)
760{
761 if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
762 printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
763
764 ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
765 (jiffies ^ (jiffies >> 6)));
766
767 init_timer(&ip6_frag_secret_timer);
768 ip6_frag_secret_timer.function = ip6_frag_secret_rebuild;
769 ip6_frag_secret_timer.expires = jiffies + sysctl_ip6frag_secret_interval;
770 add_timer(&ip6_frag_secret_timer);
771}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
new file mode 100644
index 000000000000..183802902c02
--- /dev/null
+++ b/net/ipv6/route.c
@@ -0,0 +1,2131 @@
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
27#include <linux/config.h>
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
38#include <linux/netlink.h>
39#include <linux/if_arp.h>
40
41#ifdef CONFIG_PROC_FS
42#include <linux/proc_fs.h>
43#include <linux/seq_file.h>
44#endif
45
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
56
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
74
75static int ip6_rt_max_size = 4096;
76static int ip6_rt_gc_min_interval = HZ / 2;
77static int ip6_rt_gc_timeout = 60*HZ;
78int ip6_rt_gc_interval = 30*HZ;
79static int ip6_rt_gc_elasticity = 9;
80static int ip6_rt_mtu_expires = 10*60*HZ;
81static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
89static int ip6_dst_gc(void);
90
91static int ip6_pkt_discard(struct sk_buff *skb);
92static int ip6_pkt_discard_out(struct sk_buff *skb);
93static void ip6_link_failure(struct sk_buff *skb);
94static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95
96static struct dst_ops ip6_dst_ops = {
97 .family = AF_INET6,
98 .protocol = __constant_htons(ETH_P_IPV6),
99 .gc = ip6_dst_gc,
100 .gc_thresh = 1024,
101 .check = ip6_dst_check,
102 .destroy = ip6_dst_destroy,
103 .ifdown = ip6_dst_ifdown,
104 .negative_advice = ip6_negative_advice,
105 .link_failure = ip6_link_failure,
106 .update_pmtu = ip6_rt_update_pmtu,
107 .entry_size = sizeof(struct rt6_info),
108};
109
110struct rt6_info ip6_null_entry = {
111 .u = {
112 .dst = {
113 .__refcnt = ATOMIC_INIT(1),
114 .__use = 1,
115 .dev = &loopback_dev,
116 .obsolete = -1,
117 .error = -ENETUNREACH,
118 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
119 .input = ip6_pkt_discard,
120 .output = ip6_pkt_discard_out,
121 .ops = &ip6_dst_ops,
122 .path = (struct dst_entry*)&ip6_null_entry,
123 }
124 },
125 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
126 .rt6i_metric = ~(u32) 0,
127 .rt6i_ref = ATOMIC_INIT(1),
128};
129
130struct fib6_node ip6_routing_table = {
131 .leaf = &ip6_null_entry,
132 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133};
134
135/* Protects all the ip6 fib */
136
137DEFINE_RWLOCK(rt6_lock);
138
139
140/* allocate dst with ip6_dst_ops */
141static __inline__ struct rt6_info *ip6_dst_alloc(void)
142{
143 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144}
145
146static void ip6_dst_destroy(struct dst_entry *dst)
147{
148 struct rt6_info *rt = (struct rt6_info *)dst;
149 struct inet6_dev *idev = rt->rt6i_idev;
150
151 if (idev != NULL) {
152 rt->rt6i_idev = NULL;
153 in6_dev_put(idev);
154 }
155}
156
157static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158 int how)
159{
160 struct rt6_info *rt = (struct rt6_info *)dst;
161 struct inet6_dev *idev = rt->rt6i_idev;
162
163 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165 if (loopback_idev != NULL) {
166 rt->rt6i_idev = loopback_idev;
167 in6_dev_put(idev);
168 }
169 }
170}
171
172static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173{
174 return (rt->rt6i_flags & RTF_EXPIRES &&
175 time_after(jiffies, rt->rt6i_expires));
176}
177
178/*
179 * Route lookup. Any rt6_lock is implied.
180 */
181
182static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183 int oif,
184 int strict)
185{
186 struct rt6_info *local = NULL;
187 struct rt6_info *sprt;
188
189 if (oif) {
190 for (sprt = rt; sprt; sprt = sprt->u.next) {
191 struct net_device *dev = sprt->rt6i_dev;
192 if (dev->ifindex == oif)
193 return sprt;
194 if (dev->flags & IFF_LOOPBACK) {
195 if (sprt->rt6i_idev == NULL ||
196 sprt->rt6i_idev->dev->ifindex != oif) {
197 if (strict && oif)
198 continue;
199 if (local && (!oif ||
200 local->rt6i_idev->dev->ifindex == oif))
201 continue;
202 }
203 local = sprt;
204 }
205 }
206
207 if (local)
208 return local;
209
210 if (strict)
211 return &ip6_null_entry;
212 }
213 return rt;
214}
215
216/*
217 * pointer to the last default router chosen. BH is disabled locally.
218 */
219static struct rt6_info *rt6_dflt_pointer;
220static DEFINE_SPINLOCK(rt6_dflt_lock);
221
222void rt6_reset_dflt_pointer(struct rt6_info *rt)
223{
224 spin_lock_bh(&rt6_dflt_lock);
225 if (rt == NULL || rt == rt6_dflt_pointer) {
226 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227 rt6_dflt_pointer = NULL;
228 }
229 spin_unlock_bh(&rt6_dflt_lock);
230}
231
232/* Default Router Selection (RFC 2461 6.3.6) */
233static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234{
235 struct rt6_info *match = NULL;
236 struct rt6_info *sprt;
237 int mpri = 0;
238
239 for (sprt = rt; sprt; sprt = sprt->u.next) {
240 struct neighbour *neigh;
241 int m = 0;
242
243 if (!oif ||
244 (sprt->rt6i_dev &&
245 sprt->rt6i_dev->ifindex == oif))
246 m += 8;
247
248 if (rt6_check_expired(sprt))
249 continue;
250
251 if (sprt == rt6_dflt_pointer)
252 m += 4;
253
254 if ((neigh = sprt->rt6i_nexthop) != NULL) {
255 read_lock_bh(&neigh->lock);
256 switch (neigh->nud_state) {
257 case NUD_REACHABLE:
258 m += 3;
259 break;
260
261 case NUD_STALE:
262 case NUD_DELAY:
263 case NUD_PROBE:
264 m += 2;
265 break;
266
267 case NUD_NOARP:
268 case NUD_PERMANENT:
269 m += 1;
270 break;
271
272 case NUD_INCOMPLETE:
273 default:
274 read_unlock_bh(&neigh->lock);
275 continue;
276 }
277 read_unlock_bh(&neigh->lock);
278 } else {
279 continue;
280 }
281
282 if (m > mpri || m >= 12) {
283 match = sprt;
284 mpri = m;
285 if (m >= 12) {
286 /* we choose the last default router if it
287 * is in (probably) reachable state.
288 * If route changed, we should do pmtu
289 * discovery. --yoshfuji
290 */
291 break;
292 }
293 }
294 }
295
296 spin_lock(&rt6_dflt_lock);
297 if (!match) {
298 /*
299 * No default routers are known to be reachable.
300 * SHOULD round robin
301 */
302 if (rt6_dflt_pointer) {
303 for (sprt = rt6_dflt_pointer->u.next;
304 sprt; sprt = sprt->u.next) {
305 if (sprt->u.dst.obsolete <= 0 &&
306 sprt->u.dst.error == 0 &&
307 !rt6_check_expired(sprt)) {
308 match = sprt;
309 break;
310 }
311 }
312 for (sprt = rt;
313 !match && sprt;
314 sprt = sprt->u.next) {
315 if (sprt->u.dst.obsolete <= 0 &&
316 sprt->u.dst.error == 0 &&
317 !rt6_check_expired(sprt)) {
318 match = sprt;
319 break;
320 }
321 if (sprt == rt6_dflt_pointer)
322 break;
323 }
324 }
325 }
326
327 if (match) {
328 if (rt6_dflt_pointer != match)
329 RT6_TRACE("changed default router: %p->%p\n",
330 rt6_dflt_pointer, match);
331 rt6_dflt_pointer = match;
332 }
333 spin_unlock(&rt6_dflt_lock);
334
335 if (!match) {
336 /*
337 * Last Resort: if no default routers found,
338 * use addrconf default route.
339 * We don't record this route.
340 */
341 for (sprt = ip6_routing_table.leaf;
342 sprt; sprt = sprt->u.next) {
343 if (!rt6_check_expired(sprt) &&
344 (sprt->rt6i_flags & RTF_DEFAULT) &&
345 (!oif ||
346 (sprt->rt6i_dev &&
347 sprt->rt6i_dev->ifindex == oif))) {
348 match = sprt;
349 break;
350 }
351 }
352 if (!match) {
353 /* no default route. give up. */
354 match = &ip6_null_entry;
355 }
356 }
357
358 return match;
359}
360
361struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362 int oif, int strict)
363{
364 struct fib6_node *fn;
365 struct rt6_info *rt;
366
367 read_lock_bh(&rt6_lock);
368 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369 rt = rt6_device_match(fn->leaf, oif, strict);
370 dst_hold(&rt->u.dst);
371 rt->u.dst.__use++;
372 read_unlock_bh(&rt6_lock);
373
374 rt->u.dst.lastuse = jiffies;
375 if (rt->u.dst.error == 0)
376 return rt;
377 dst_release(&rt->u.dst);
378 return NULL;
379}
380
381/* ip6_ins_rt is called with FREE rt6_lock.
382 It takes new route entry, the addition fails by any reason the
383 route is freed. In any case, if caller does not hold it, it may
384 be destroyed.
385 */
386
387int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
388{
389 int err;
390
391 write_lock_bh(&rt6_lock);
392 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
393 write_unlock_bh(&rt6_lock);
394
395 return err;
396}
397
398/* No rt6_lock! If COW failed, the function returns dead route entry
399 with dst->error set to errno value.
400 */
401
402static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
403 struct in6_addr *saddr)
404{
405 int err;
406 struct rt6_info *rt;
407
408 /*
409 * Clone the route.
410 */
411
412 rt = ip6_rt_copy(ort);
413
414 if (rt) {
415 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
416
417 if (!(rt->rt6i_flags&RTF_GATEWAY))
418 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
419
420 rt->rt6i_dst.plen = 128;
421 rt->rt6i_flags |= RTF_CACHE;
422 rt->u.dst.flags |= DST_HOST;
423
424#ifdef CONFIG_IPV6_SUBTREES
425 if (rt->rt6i_src.plen && saddr) {
426 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
427 rt->rt6i_src.plen = 128;
428 }
429#endif
430
431 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
432
433 dst_hold(&rt->u.dst);
434
435 err = ip6_ins_rt(rt, NULL, NULL);
436 if (err == 0)
437 return rt;
438
439 rt->u.dst.error = err;
440
441 return rt;
442 }
443 dst_hold(&ip6_null_entry.u.dst);
444 return &ip6_null_entry;
445}
446
447#define BACKTRACK() \
448if (rt == &ip6_null_entry && strict) { \
449 while ((fn = fn->parent) != NULL) { \
450 if (fn->fn_flags & RTN_ROOT) { \
451 dst_hold(&rt->u.dst); \
452 goto out; \
453 } \
454 if (fn->fn_flags & RTN_RTINFO) \
455 goto restart; \
456 } \
457}
458
459
460void ip6_route_input(struct sk_buff *skb)
461{
462 struct fib6_node *fn;
463 struct rt6_info *rt;
464 int strict;
465 int attempts = 3;
466
467 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
468
469relookup:
470 read_lock_bh(&rt6_lock);
471
472 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
473 &skb->nh.ipv6h->saddr);
474
475restart:
476 rt = fn->leaf;
477
478 if ((rt->rt6i_flags & RTF_CACHE)) {
479 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
480 BACKTRACK();
481 dst_hold(&rt->u.dst);
482 goto out;
483 }
484
485 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
486 BACKTRACK();
487
488 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
489 struct rt6_info *nrt;
490 dst_hold(&rt->u.dst);
491 read_unlock_bh(&rt6_lock);
492
493 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
494 &skb->nh.ipv6h->saddr);
495
496 dst_release(&rt->u.dst);
497 rt = nrt;
498
499 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
500 goto out2;
501
502 /* Race condition! In the gap, when rt6_lock was
503 released someone could insert this route. Relookup.
504 */
505 dst_release(&rt->u.dst);
506 goto relookup;
507 }
508 dst_hold(&rt->u.dst);
509
510out:
511 read_unlock_bh(&rt6_lock);
512out2:
513 rt->u.dst.lastuse = jiffies;
514 rt->u.dst.__use++;
515 skb->dst = (struct dst_entry *) rt;
516}
517
518struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
519{
520 struct fib6_node *fn;
521 struct rt6_info *rt;
522 int strict;
523 int attempts = 3;
524
525 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
526
527relookup:
528 read_lock_bh(&rt6_lock);
529
530 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
531
532restart:
533 rt = fn->leaf;
534
535 if ((rt->rt6i_flags & RTF_CACHE)) {
536 rt = rt6_device_match(rt, fl->oif, strict);
537 BACKTRACK();
538 dst_hold(&rt->u.dst);
539 goto out;
540 }
541 if (rt->rt6i_flags & RTF_DEFAULT) {
542 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
543 rt = rt6_best_dflt(rt, fl->oif);
544 } else {
545 rt = rt6_device_match(rt, fl->oif, strict);
546 BACKTRACK();
547 }
548
549 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
550 struct rt6_info *nrt;
551 dst_hold(&rt->u.dst);
552 read_unlock_bh(&rt6_lock);
553
554 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
555
556 dst_release(&rt->u.dst);
557 rt = nrt;
558
559 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
560 goto out2;
561
562 /* Race condition! In the gap, when rt6_lock was
563 released someone could insert this route. Relookup.
564 */
565 dst_release(&rt->u.dst);
566 goto relookup;
567 }
568 dst_hold(&rt->u.dst);
569
570out:
571 read_unlock_bh(&rt6_lock);
572out2:
573 rt->u.dst.lastuse = jiffies;
574 rt->u.dst.__use++;
575 return &rt->u.dst;
576}
577
578
579/*
580 * Destination cache support functions
581 */
582
583static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
584{
585 struct rt6_info *rt;
586
587 rt = (struct rt6_info *) dst;
588
589 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
590 return dst;
591
592 return NULL;
593}
594
595static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
596{
597 struct rt6_info *rt = (struct rt6_info *) dst;
598
599 if (rt) {
600 if (rt->rt6i_flags & RTF_CACHE)
601 ip6_del_rt(rt, NULL, NULL);
602 else
603 dst_release(dst);
604 }
605 return NULL;
606}
607
608static void ip6_link_failure(struct sk_buff *skb)
609{
610 struct rt6_info *rt;
611
612 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
613
614 rt = (struct rt6_info *) skb->dst;
615 if (rt) {
616 if (rt->rt6i_flags&RTF_CACHE) {
617 dst_set_expires(&rt->u.dst, 0);
618 rt->rt6i_flags |= RTF_EXPIRES;
619 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
620 rt->rt6i_node->fn_sernum = -1;
621 }
622}
623
624static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
625{
626 struct rt6_info *rt6 = (struct rt6_info*)dst;
627
628 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
629 rt6->rt6i_flags |= RTF_MODIFIED;
630 if (mtu < IPV6_MIN_MTU) {
631 mtu = IPV6_MIN_MTU;
632 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
633 }
634 dst->metrics[RTAX_MTU-1] = mtu;
635 }
636}
637
638/* Protected by rt6_lock. */
639static struct dst_entry *ndisc_dst_gc_list;
640static int ipv6_get_mtu(struct net_device *dev);
641
642static inline unsigned int ipv6_advmss(unsigned int mtu)
643{
644 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
645
646 if (mtu < ip6_rt_min_advmss)
647 mtu = ip6_rt_min_advmss;
648
649 /*
650 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
651 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
652 * IPV6_MAXPLEN is also valid and means: "any MSS,
653 * rely only on pmtu discovery"
654 */
655 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
656 mtu = IPV6_MAXPLEN;
657 return mtu;
658}
659
660struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
661 struct neighbour *neigh,
662 struct in6_addr *addr,
663 int (*output)(struct sk_buff *))
664{
665 struct rt6_info *rt;
666 struct inet6_dev *idev = in6_dev_get(dev);
667
668 if (unlikely(idev == NULL))
669 return NULL;
670
671 rt = ip6_dst_alloc();
672 if (unlikely(rt == NULL)) {
673 in6_dev_put(idev);
674 goto out;
675 }
676
677 dev_hold(dev);
678 if (neigh)
679 neigh_hold(neigh);
680 else
681 neigh = ndisc_get_neigh(dev, addr);
682
683 rt->rt6i_dev = dev;
684 rt->rt6i_idev = idev;
685 rt->rt6i_nexthop = neigh;
686 atomic_set(&rt->u.dst.__refcnt, 1);
687 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
688 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
689 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
690 rt->u.dst.output = output;
691
692#if 0 /* there's no chance to use these for ndisc */
693 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
694 ? DST_HOST
695 : 0;
696 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
697 rt->rt6i_dst.plen = 128;
698#endif
699
700 write_lock_bh(&rt6_lock);
701 rt->u.dst.next = ndisc_dst_gc_list;
702 ndisc_dst_gc_list = &rt->u.dst;
703 write_unlock_bh(&rt6_lock);
704
705 fib6_force_start_gc();
706
707out:
708 return (struct dst_entry *)rt;
709}
710
711int ndisc_dst_gc(int *more)
712{
713 struct dst_entry *dst, *next, **pprev;
714 int freed;
715
716 next = NULL;
717 pprev = &ndisc_dst_gc_list;
718 freed = 0;
719 while ((dst = *pprev) != NULL) {
720 if (!atomic_read(&dst->__refcnt)) {
721 *pprev = dst->next;
722 dst_free(dst);
723 freed++;
724 } else {
725 pprev = &dst->next;
726 (*more)++;
727 }
728 }
729
730 return freed;
731}
732
733static int ip6_dst_gc(void)
734{
735 static unsigned expire = 30*HZ;
736 static unsigned long last_gc;
737 unsigned long now = jiffies;
738
739 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
740 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
741 goto out;
742
743 expire++;
744 fib6_run_gc(expire);
745 last_gc = now;
746 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
747 expire = ip6_rt_gc_timeout>>1;
748
749out:
750 expire -= expire>>ip6_rt_gc_elasticity;
751 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
752}
753
754/* Clean host part of a prefix. Not necessary in radix tree,
755 but results in cleaner routing tables.
756
757 Remove it only when all the things will work!
758 */
759
760static int ipv6_get_mtu(struct net_device *dev)
761{
762 int mtu = IPV6_MIN_MTU;
763 struct inet6_dev *idev;
764
765 idev = in6_dev_get(dev);
766 if (idev) {
767 mtu = idev->cnf.mtu6;
768 in6_dev_put(idev);
769 }
770 return mtu;
771}
772
773int ipv6_get_hoplimit(struct net_device *dev)
774{
775 int hoplimit = ipv6_devconf.hop_limit;
776 struct inet6_dev *idev;
777
778 idev = in6_dev_get(dev);
779 if (idev) {
780 hoplimit = idev->cnf.hop_limit;
781 in6_dev_put(idev);
782 }
783 return hoplimit;
784}
785
786/*
787 *
788 */
789
790int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
791{
792 int err;
793 struct rtmsg *r;
794 struct rtattr **rta;
795 struct rt6_info *rt = NULL;
796 struct net_device *dev = NULL;
797 struct inet6_dev *idev = NULL;
798 int addr_type;
799
800 rta = (struct rtattr **) _rtattr;
801
802 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
803 return -EINVAL;
804#ifndef CONFIG_IPV6_SUBTREES
805 if (rtmsg->rtmsg_src_len)
806 return -EINVAL;
807#endif
808 if (rtmsg->rtmsg_ifindex) {
809 err = -ENODEV;
810 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
811 if (!dev)
812 goto out;
813 idev = in6_dev_get(dev);
814 if (!idev)
815 goto out;
816 }
817
818 if (rtmsg->rtmsg_metric == 0)
819 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
820
821 rt = ip6_dst_alloc();
822
823 if (rt == NULL) {
824 err = -ENOMEM;
825 goto out;
826 }
827
828 rt->u.dst.obsolete = -1;
829 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
830 if (nlh && (r = NLMSG_DATA(nlh))) {
831 rt->rt6i_protocol = r->rtm_protocol;
832 } else {
833 rt->rt6i_protocol = RTPROT_BOOT;
834 }
835
836 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
837
838 if (addr_type & IPV6_ADDR_MULTICAST)
839 rt->u.dst.input = ip6_mc_input;
840 else
841 rt->u.dst.input = ip6_forward;
842
843 rt->u.dst.output = ip6_output;
844
845 ipv6_addr_prefix(&rt->rt6i_dst.addr,
846 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
847 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
848 if (rt->rt6i_dst.plen == 128)
849 rt->u.dst.flags = DST_HOST;
850
851#ifdef CONFIG_IPV6_SUBTREES
852 ipv6_addr_prefix(&rt->rt6i_src.addr,
853 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
854 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
855#endif
856
857 rt->rt6i_metric = rtmsg->rtmsg_metric;
858
859 /* We cannot add true routes via loopback here,
860 they would result in kernel looping; promote them to reject routes
861 */
862 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
863 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
864 /* hold loopback dev/idev if we haven't done so. */
865 if (dev != &loopback_dev) {
866 if (dev) {
867 dev_put(dev);
868 in6_dev_put(idev);
869 }
870 dev = &loopback_dev;
871 dev_hold(dev);
872 idev = in6_dev_get(dev);
873 if (!idev) {
874 err = -ENODEV;
875 goto out;
876 }
877 }
878 rt->u.dst.output = ip6_pkt_discard_out;
879 rt->u.dst.input = ip6_pkt_discard;
880 rt->u.dst.error = -ENETUNREACH;
881 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
882 goto install_route;
883 }
884
885 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
886 struct in6_addr *gw_addr;
887 int gwa_type;
888
889 gw_addr = &rtmsg->rtmsg_gateway;
890 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
891 gwa_type = ipv6_addr_type(gw_addr);
892
893 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
894 struct rt6_info *grt;
895
896 /* IPv6 strictly inhibits using not link-local
897 addresses as nexthop address.
898 Otherwise, router will not able to send redirects.
899 It is very good, but in some (rare!) circumstances
900 (SIT, PtP, NBMA NOARP links) it is handy to allow
901 some exceptions. --ANK
902 */
903 err = -EINVAL;
904 if (!(gwa_type&IPV6_ADDR_UNICAST))
905 goto out;
906
907 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
908
909 err = -EHOSTUNREACH;
910 if (grt == NULL)
911 goto out;
912 if (dev) {
913 if (dev != grt->rt6i_dev) {
914 dst_release(&grt->u.dst);
915 goto out;
916 }
917 } else {
918 dev = grt->rt6i_dev;
919 idev = grt->rt6i_idev;
920 dev_hold(dev);
921 in6_dev_hold(grt->rt6i_idev);
922 }
923 if (!(grt->rt6i_flags&RTF_GATEWAY))
924 err = 0;
925 dst_release(&grt->u.dst);
926
927 if (err)
928 goto out;
929 }
930 err = -EINVAL;
931 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
932 goto out;
933 }
934
935 err = -ENODEV;
936 if (dev == NULL)
937 goto out;
938
939 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
940 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
941 if (IS_ERR(rt->rt6i_nexthop)) {
942 err = PTR_ERR(rt->rt6i_nexthop);
943 rt->rt6i_nexthop = NULL;
944 goto out;
945 }
946 }
947
948 rt->rt6i_flags = rtmsg->rtmsg_flags;
949
950install_route:
951 if (rta && rta[RTA_METRICS-1]) {
952 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
953 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
954
955 while (RTA_OK(attr, attrlen)) {
956 unsigned flavor = attr->rta_type;
957 if (flavor) {
958 if (flavor > RTAX_MAX) {
959 err = -EINVAL;
960 goto out;
961 }
962 rt->u.dst.metrics[flavor-1] =
963 *(u32 *)RTA_DATA(attr);
964 }
965 attr = RTA_NEXT(attr, attrlen);
966 }
967 }
968
969 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
970 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
971 if (!rt->u.dst.metrics[RTAX_MTU-1])
972 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
973 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
974 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
975 rt->u.dst.dev = dev;
976 rt->rt6i_idev = idev;
977 return ip6_ins_rt(rt, nlh, _rtattr);
978
979out:
980 if (dev)
981 dev_put(dev);
982 if (idev)
983 in6_dev_put(idev);
984 if (rt)
985 dst_free((struct dst_entry *) rt);
986 return err;
987}
988
989int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
990{
991 int err;
992
993 write_lock_bh(&rt6_lock);
994
995 rt6_reset_dflt_pointer(NULL);
996
997 err = fib6_del(rt, nlh, _rtattr);
998 dst_release(&rt->u.dst);
999
1000 write_unlock_bh(&rt6_lock);
1001
1002 return err;
1003}
1004
1005static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
1006{
1007 struct fib6_node *fn;
1008 struct rt6_info *rt;
1009 int err = -ESRCH;
1010
1011 read_lock_bh(&rt6_lock);
1012
1013 fn = fib6_locate(&ip6_routing_table,
1014 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1015 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1016
1017 if (fn) {
1018 for (rt = fn->leaf; rt; rt = rt->u.next) {
1019 if (rtmsg->rtmsg_ifindex &&
1020 (rt->rt6i_dev == NULL ||
1021 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1022 continue;
1023 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1024 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1025 continue;
1026 if (rtmsg->rtmsg_metric &&
1027 rtmsg->rtmsg_metric != rt->rt6i_metric)
1028 continue;
1029 dst_hold(&rt->u.dst);
1030 read_unlock_bh(&rt6_lock);
1031
1032 return ip6_del_rt(rt, nlh, _rtattr);
1033 }
1034 }
1035 read_unlock_bh(&rt6_lock);
1036
1037 return err;
1038}
1039
1040/*
1041 * Handle redirects
1042 */
1043void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1044 struct neighbour *neigh, u8 *lladdr, int on_link)
1045{
1046 struct rt6_info *rt, *nrt;
1047
1048 /* Locate old route to this destination. */
1049 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1050
1051 if (rt == NULL)
1052 return;
1053
1054 if (neigh->dev != rt->rt6i_dev)
1055 goto out;
1056
1057 /*
1058 * Current route is on-link; redirect is always invalid.
1059 *
1060 * Seems, previous statement is not true. It could
1061 * be node, which looks for us as on-link (f.e. proxy ndisc)
1062 * But then router serving it might decide, that we should
1063 * know truth 8)8) --ANK (980726).
1064 */
1065 if (!(rt->rt6i_flags&RTF_GATEWAY))
1066 goto out;
1067
1068 /*
1069 * RFC 2461 specifies that redirects should only be
1070 * accepted if they come from the nexthop to the target.
1071 * Due to the way default routers are chosen, this notion
1072 * is a bit fuzzy and one might need to check all default
1073 * routers.
1074 */
1075 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1076 if (rt->rt6i_flags & RTF_DEFAULT) {
1077 struct rt6_info *rt1;
1078
1079 read_lock(&rt6_lock);
1080 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1081 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1082 dst_hold(&rt1->u.dst);
1083 dst_release(&rt->u.dst);
1084 read_unlock(&rt6_lock);
1085 rt = rt1;
1086 goto source_ok;
1087 }
1088 }
1089 read_unlock(&rt6_lock);
1090 }
1091 if (net_ratelimit())
1092 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1093 "for redirect target\n");
1094 goto out;
1095 }
1096
1097source_ok:
1098
1099 /*
1100 * We have finally decided to accept it.
1101 */
1102
1103 neigh_update(neigh, lladdr, NUD_STALE,
1104 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1105 NEIGH_UPDATE_F_OVERRIDE|
1106 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1107 NEIGH_UPDATE_F_ISROUTER))
1108 );
1109
1110 /*
1111 * Redirect received -> path was valid.
1112 * Look, redirects are sent only in response to data packets,
1113 * so that this nexthop apparently is reachable. --ANK
1114 */
1115 dst_confirm(&rt->u.dst);
1116
1117 /* Duplicate redirect: silently ignore. */
1118 if (neigh == rt->u.dst.neighbour)
1119 goto out;
1120
1121 nrt = ip6_rt_copy(rt);
1122 if (nrt == NULL)
1123 goto out;
1124
1125 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1126 if (on_link)
1127 nrt->rt6i_flags &= ~RTF_GATEWAY;
1128
1129 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1130 nrt->rt6i_dst.plen = 128;
1131 nrt->u.dst.flags |= DST_HOST;
1132
1133 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1134 nrt->rt6i_nexthop = neigh_clone(neigh);
1135 /* Reset pmtu, it may be better */
1136 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1137 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1138
1139 if (ip6_ins_rt(nrt, NULL, NULL))
1140 goto out;
1141
1142 if (rt->rt6i_flags&RTF_CACHE) {
1143 ip6_del_rt(rt, NULL, NULL);
1144 return;
1145 }
1146
1147out:
1148 dst_release(&rt->u.dst);
1149 return;
1150}
1151
1152/*
1153 * Handle ICMP "packet too big" messages
1154 * i.e. Path MTU discovery
1155 */
1156
1157void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1158 struct net_device *dev, u32 pmtu)
1159{
1160 struct rt6_info *rt, *nrt;
1161 int allfrag = 0;
1162
1163 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1164 if (rt == NULL)
1165 return;
1166
1167 if (pmtu >= dst_mtu(&rt->u.dst))
1168 goto out;
1169
1170 if (pmtu < IPV6_MIN_MTU) {
1171 /*
1172 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1173 * MTU (1280) and a fragment header should always be included
1174 * after a node receiving Too Big message reporting PMTU is
1175 * less than the IPv6 Minimum Link MTU.
1176 */
1177 pmtu = IPV6_MIN_MTU;
1178 allfrag = 1;
1179 }
1180
1181 /* New mtu received -> path was valid.
1182 They are sent only in response to data packets,
1183 so that this nexthop apparently is reachable. --ANK
1184 */
1185 dst_confirm(&rt->u.dst);
1186
1187 /* Host route. If it is static, it would be better
1188 not to override it, but add new one, so that
1189 when cache entry will expire old pmtu
1190 would return automatically.
1191 */
1192 if (rt->rt6i_flags & RTF_CACHE) {
1193 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1194 if (allfrag)
1195 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1196 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1197 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1198 goto out;
1199 }
1200
1201 /* Network route.
1202 Two cases are possible:
1203 1. It is connected route. Action: COW
1204 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1205 */
1206 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1207 nrt = rt6_cow(rt, daddr, saddr);
1208 if (!nrt->u.dst.error) {
1209 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1210 if (allfrag)
1211 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1212 /* According to RFC 1981, detecting PMTU increase shouldn't be
1213 happened within 5 mins, the recommended timer is 10 mins.
1214 Here this route expiration time is set to ip6_rt_mtu_expires
1215 which is 10 mins. After 10 mins the decreased pmtu is expired
1216 and detecting PMTU increase will be automatically happened.
1217 */
1218 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1219 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1220 }
1221 dst_release(&nrt->u.dst);
1222 } else {
1223 nrt = ip6_rt_copy(rt);
1224 if (nrt == NULL)
1225 goto out;
1226 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1227 nrt->rt6i_dst.plen = 128;
1228 nrt->u.dst.flags |= DST_HOST;
1229 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1230 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1231 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1232 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1233 if (allfrag)
1234 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1235 ip6_ins_rt(nrt, NULL, NULL);
1236 }
1237
1238out:
1239 dst_release(&rt->u.dst);
1240}
1241
1242/*
1243 * Misc support functions
1244 */
1245
1246static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1247{
1248 struct rt6_info *rt = ip6_dst_alloc();
1249
1250 if (rt) {
1251 rt->u.dst.input = ort->u.dst.input;
1252 rt->u.dst.output = ort->u.dst.output;
1253
1254 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1255 rt->u.dst.dev = ort->u.dst.dev;
1256 if (rt->u.dst.dev)
1257 dev_hold(rt->u.dst.dev);
1258 rt->rt6i_idev = ort->rt6i_idev;
1259 if (rt->rt6i_idev)
1260 in6_dev_hold(rt->rt6i_idev);
1261 rt->u.dst.lastuse = jiffies;
1262 rt->rt6i_expires = 0;
1263
1264 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1265 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1266 rt->rt6i_metric = 0;
1267
1268 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1269#ifdef CONFIG_IPV6_SUBTREES
1270 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1271#endif
1272 }
1273 return rt;
1274}
1275
1276struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1277{
1278 struct rt6_info *rt;
1279 struct fib6_node *fn;
1280
1281 fn = &ip6_routing_table;
1282
1283 write_lock_bh(&rt6_lock);
1284 for (rt = fn->leaf; rt; rt=rt->u.next) {
1285 if (dev == rt->rt6i_dev &&
1286 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1287 break;
1288 }
1289 if (rt)
1290 dst_hold(&rt->u.dst);
1291 write_unlock_bh(&rt6_lock);
1292 return rt;
1293}
1294
1295struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1296 struct net_device *dev)
1297{
1298 struct in6_rtmsg rtmsg;
1299
1300 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1301 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1302 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1303 rtmsg.rtmsg_metric = 1024;
1304 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1305
1306 rtmsg.rtmsg_ifindex = dev->ifindex;
1307
1308 ip6_route_add(&rtmsg, NULL, NULL);
1309 return rt6_get_dflt_router(gwaddr, dev);
1310}
1311
1312void rt6_purge_dflt_routers(void)
1313{
1314 struct rt6_info *rt;
1315
1316restart:
1317 read_lock_bh(&rt6_lock);
1318 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1319 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1320 dst_hold(&rt->u.dst);
1321
1322 rt6_reset_dflt_pointer(NULL);
1323
1324 read_unlock_bh(&rt6_lock);
1325
1326 ip6_del_rt(rt, NULL, NULL);
1327
1328 goto restart;
1329 }
1330 }
1331 read_unlock_bh(&rt6_lock);
1332}
1333
1334int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1335{
1336 struct in6_rtmsg rtmsg;
1337 int err;
1338
1339 switch(cmd) {
1340 case SIOCADDRT: /* Add a route */
1341 case SIOCDELRT: /* Delete a route */
1342 if (!capable(CAP_NET_ADMIN))
1343 return -EPERM;
1344 err = copy_from_user(&rtmsg, arg,
1345 sizeof(struct in6_rtmsg));
1346 if (err)
1347 return -EFAULT;
1348
1349 rtnl_lock();
1350 switch (cmd) {
1351 case SIOCADDRT:
1352 err = ip6_route_add(&rtmsg, NULL, NULL);
1353 break;
1354 case SIOCDELRT:
1355 err = ip6_route_del(&rtmsg, NULL, NULL);
1356 break;
1357 default:
1358 err = -EINVAL;
1359 }
1360 rtnl_unlock();
1361
1362 return err;
1363 };
1364
1365 return -EINVAL;
1366}
1367
1368/*
1369 * Drop the packet on the floor
1370 */
1371
1372int ip6_pkt_discard(struct sk_buff *skb)
1373{
1374 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1375 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1376 kfree_skb(skb);
1377 return 0;
1378}
1379
1380int ip6_pkt_discard_out(struct sk_buff *skb)
1381{
1382 skb->dev = skb->dst->dev;
1383 return ip6_pkt_discard(skb);
1384}
1385
1386/*
1387 * Allocate a dst for local (unicast / anycast) address.
1388 */
1389
1390struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1391 const struct in6_addr *addr,
1392 int anycast)
1393{
1394 struct rt6_info *rt = ip6_dst_alloc();
1395
1396 if (rt == NULL)
1397 return ERR_PTR(-ENOMEM);
1398
1399 dev_hold(&loopback_dev);
1400 in6_dev_hold(idev);
1401
1402 rt->u.dst.flags = DST_HOST;
1403 rt->u.dst.input = ip6_input;
1404 rt->u.dst.output = ip6_output;
1405 rt->rt6i_dev = &loopback_dev;
1406 rt->rt6i_idev = idev;
1407 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1408 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1409 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1410 rt->u.dst.obsolete = -1;
1411
1412 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1413 if (!anycast)
1414 rt->rt6i_flags |= RTF_LOCAL;
1415 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1416 if (rt->rt6i_nexthop == NULL) {
1417 dst_free((struct dst_entry *) rt);
1418 return ERR_PTR(-ENOMEM);
1419 }
1420
1421 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1422 rt->rt6i_dst.plen = 128;
1423
1424 atomic_set(&rt->u.dst.__refcnt, 1);
1425
1426 return rt;
1427}
1428
1429static int fib6_ifdown(struct rt6_info *rt, void *arg)
1430{
1431 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1432 rt != &ip6_null_entry) {
1433 RT6_TRACE("deleted by ifdown %p\n", rt);
1434 return -1;
1435 }
1436 return 0;
1437}
1438
1439void rt6_ifdown(struct net_device *dev)
1440{
1441 write_lock_bh(&rt6_lock);
1442 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1443 write_unlock_bh(&rt6_lock);
1444}
1445
1446struct rt6_mtu_change_arg
1447{
1448 struct net_device *dev;
1449 unsigned mtu;
1450};
1451
1452static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1453{
1454 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1455 struct inet6_dev *idev;
1456
1457 /* In IPv6 pmtu discovery is not optional,
1458 so that RTAX_MTU lock cannot disable it.
1459 We still use this lock to block changes
1460 caused by addrconf/ndisc.
1461 */
1462
1463 idev = __in6_dev_get(arg->dev);
1464 if (idev == NULL)
1465 return 0;
1466
1467 /* For administrative MTU increase, there is no way to discover
1468 IPv6 PMTU increase, so PMTU increase should be updated here.
1469 Since RFC 1981 doesn't include administrative MTU increase
1470 update PMTU increase is a MUST. (i.e. jumbo frame)
1471 */
1472 /*
1473 If new MTU is less than route PMTU, this new MTU will be the
1474 lowest MTU in the path, update the route PMTU to reflect PMTU
1475 decreases; if new MTU is greater than route PMTU, and the
1476 old MTU is the lowest MTU in the path, update the route PMTU
1477 to reflect the increase. In this case if the other nodes' MTU
1478 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1479 PMTU discouvery.
1480 */
1481 if (rt->rt6i_dev == arg->dev &&
1482 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1483 (dst_mtu(&rt->u.dst) > arg->mtu ||
1484 (dst_mtu(&rt->u.dst) < arg->mtu &&
1485 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1486 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1487 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1488 return 0;
1489}
1490
1491void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1492{
1493 struct rt6_mtu_change_arg arg;
1494
1495 arg.dev = dev;
1496 arg.mtu = mtu;
1497 read_lock_bh(&rt6_lock);
1498 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1499 read_unlock_bh(&rt6_lock);
1500}
1501
1502static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1503 struct in6_rtmsg *rtmsg)
1504{
1505 memset(rtmsg, 0, sizeof(*rtmsg));
1506
1507 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1508 rtmsg->rtmsg_src_len = r->rtm_src_len;
1509 rtmsg->rtmsg_flags = RTF_UP;
1510 if (r->rtm_type == RTN_UNREACHABLE)
1511 rtmsg->rtmsg_flags |= RTF_REJECT;
1512
1513 if (rta[RTA_GATEWAY-1]) {
1514 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1515 return -EINVAL;
1516 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1517 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1518 }
1519 if (rta[RTA_DST-1]) {
1520 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1521 return -EINVAL;
1522 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1523 }
1524 if (rta[RTA_SRC-1]) {
1525 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1526 return -EINVAL;
1527 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1528 }
1529 if (rta[RTA_OIF-1]) {
1530 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1531 return -EINVAL;
1532 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1533 }
1534 if (rta[RTA_PRIORITY-1]) {
1535 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1536 return -EINVAL;
1537 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1538 }
1539 return 0;
1540}
1541
1542int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1543{
1544 struct rtmsg *r = NLMSG_DATA(nlh);
1545 struct in6_rtmsg rtmsg;
1546
1547 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1548 return -EINVAL;
1549 return ip6_route_del(&rtmsg, nlh, arg);
1550}
1551
1552int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1553{
1554 struct rtmsg *r = NLMSG_DATA(nlh);
1555 struct in6_rtmsg rtmsg;
1556
1557 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1558 return -EINVAL;
1559 return ip6_route_add(&rtmsg, nlh, arg);
1560}
1561
1562struct rt6_rtnl_dump_arg
1563{
1564 struct sk_buff *skb;
1565 struct netlink_callback *cb;
1566};
1567
1568static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1569 struct in6_addr *dst,
1570 struct in6_addr *src,
1571 int iif,
1572 int type, u32 pid, u32 seq,
1573 struct nlmsghdr *in_nlh, int prefix)
1574{
1575 struct rtmsg *rtm;
1576 struct nlmsghdr *nlh;
1577 unsigned char *b = skb->tail;
1578 struct rta_cacheinfo ci;
1579
1580 if (prefix) { /* user wants prefix routes only */
1581 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1582 /* success since this is not a prefix route */
1583 return 1;
1584 }
1585 }
1586
1587 if (!pid && in_nlh) {
1588 pid = in_nlh->nlmsg_pid;
1589 }
1590
1591 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1592 rtm = NLMSG_DATA(nlh);
1593 rtm->rtm_family = AF_INET6;
1594 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1595 rtm->rtm_src_len = rt->rt6i_src.plen;
1596 rtm->rtm_tos = 0;
1597 rtm->rtm_table = RT_TABLE_MAIN;
1598 if (rt->rt6i_flags&RTF_REJECT)
1599 rtm->rtm_type = RTN_UNREACHABLE;
1600 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1601 rtm->rtm_type = RTN_LOCAL;
1602 else
1603 rtm->rtm_type = RTN_UNICAST;
1604 rtm->rtm_flags = 0;
1605 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1606 rtm->rtm_protocol = rt->rt6i_protocol;
1607 if (rt->rt6i_flags&RTF_DYNAMIC)
1608 rtm->rtm_protocol = RTPROT_REDIRECT;
1609 else if (rt->rt6i_flags & RTF_ADDRCONF)
1610 rtm->rtm_protocol = RTPROT_KERNEL;
1611 else if (rt->rt6i_flags&RTF_DEFAULT)
1612 rtm->rtm_protocol = RTPROT_RA;
1613
1614 if (rt->rt6i_flags&RTF_CACHE)
1615 rtm->rtm_flags |= RTM_F_CLONED;
1616
1617 if (dst) {
1618 RTA_PUT(skb, RTA_DST, 16, dst);
1619 rtm->rtm_dst_len = 128;
1620 } else if (rtm->rtm_dst_len)
1621 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1622#ifdef CONFIG_IPV6_SUBTREES
1623 if (src) {
1624 RTA_PUT(skb, RTA_SRC, 16, src);
1625 rtm->rtm_src_len = 128;
1626 } else if (rtm->rtm_src_len)
1627 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1628#endif
1629 if (iif)
1630 RTA_PUT(skb, RTA_IIF, 4, &iif);
1631 else if (dst) {
1632 struct in6_addr saddr_buf;
1633 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1634 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1635 }
1636 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1637 goto rtattr_failure;
1638 if (rt->u.dst.neighbour)
1639 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1640 if (rt->u.dst.dev)
1641 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1642 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1643 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1644 if (rt->rt6i_expires)
1645 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1646 else
1647 ci.rta_expires = 0;
1648 ci.rta_used = rt->u.dst.__use;
1649 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1650 ci.rta_error = rt->u.dst.error;
1651 ci.rta_id = 0;
1652 ci.rta_ts = 0;
1653 ci.rta_tsage = 0;
1654 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1655 nlh->nlmsg_len = skb->tail - b;
1656 return skb->len;
1657
1658nlmsg_failure:
1659rtattr_failure:
1660 skb_trim(skb, b - skb->data);
1661 return -1;
1662}
1663
1664static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1665{
1666 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1667 int prefix;
1668
1669 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1670 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1671 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1672 } else
1673 prefix = 0;
1674
1675 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1676 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1677 NULL, prefix);
1678}
1679
1680static int fib6_dump_node(struct fib6_walker_t *w)
1681{
1682 int res;
1683 struct rt6_info *rt;
1684
1685 for (rt = w->leaf; rt; rt = rt->u.next) {
1686 res = rt6_dump_route(rt, w->args);
1687 if (res < 0) {
1688 /* Frame is full, suspend walking */
1689 w->leaf = rt;
1690 return 1;
1691 }
1692 BUG_TRAP(res!=0);
1693 }
1694 w->leaf = NULL;
1695 return 0;
1696}
1697
1698static void fib6_dump_end(struct netlink_callback *cb)
1699{
1700 struct fib6_walker_t *w = (void*)cb->args[0];
1701
1702 if (w) {
1703 cb->args[0] = 0;
1704 fib6_walker_unlink(w);
1705 kfree(w);
1706 }
1707 if (cb->args[1]) {
1708 cb->done = (void*)cb->args[1];
1709 cb->args[1] = 0;
1710 }
1711}
1712
1713static int fib6_dump_done(struct netlink_callback *cb)
1714{
1715 fib6_dump_end(cb);
1716 return cb->done(cb);
1717}
1718
1719int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1720{
1721 struct rt6_rtnl_dump_arg arg;
1722 struct fib6_walker_t *w;
1723 int res;
1724
1725 arg.skb = skb;
1726 arg.cb = cb;
1727
1728 w = (void*)cb->args[0];
1729 if (w == NULL) {
1730 /* New dump:
1731 *
1732 * 1. hook callback destructor.
1733 */
1734 cb->args[1] = (long)cb->done;
1735 cb->done = fib6_dump_done;
1736
1737 /*
1738 * 2. allocate and initialize walker.
1739 */
1740 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1741 if (w == NULL)
1742 return -ENOMEM;
1743 RT6_TRACE("dump<%p", w);
1744 memset(w, 0, sizeof(*w));
1745 w->root = &ip6_routing_table;
1746 w->func = fib6_dump_node;
1747 w->args = &arg;
1748 cb->args[0] = (long)w;
1749 read_lock_bh(&rt6_lock);
1750 res = fib6_walk(w);
1751 read_unlock_bh(&rt6_lock);
1752 } else {
1753 w->args = &arg;
1754 read_lock_bh(&rt6_lock);
1755 res = fib6_walk_continue(w);
1756 read_unlock_bh(&rt6_lock);
1757 }
1758#if RT6_DEBUG >= 3
1759 if (res <= 0 && skb->len == 0)
1760 RT6_TRACE("%p>dump end\n", w);
1761#endif
1762 res = res < 0 ? res : skb->len;
1763 /* res < 0 is an error. (really, impossible)
1764 res == 0 means that dump is complete, but skb still can contain data.
1765 res > 0 dump is not complete, but frame is full.
1766 */
1767 /* Destroy walker, if dump of this table is complete. */
1768 if (res <= 0)
1769 fib6_dump_end(cb);
1770 return res;
1771}
1772
1773int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1774{
1775 struct rtattr **rta = arg;
1776 int iif = 0;
1777 int err = -ENOBUFS;
1778 struct sk_buff *skb;
1779 struct flowi fl;
1780 struct rt6_info *rt;
1781
1782 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1783 if (skb == NULL)
1784 goto out;
1785
1786 /* Reserve room for dummy headers, this skb can pass
1787 through good chunk of routing engine.
1788 */
1789 skb->mac.raw = skb->data;
1790 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1791
1792 memset(&fl, 0, sizeof(fl));
1793 if (rta[RTA_SRC-1])
1794 ipv6_addr_copy(&fl.fl6_src,
1795 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1796 if (rta[RTA_DST-1])
1797 ipv6_addr_copy(&fl.fl6_dst,
1798 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1799
1800 if (rta[RTA_IIF-1])
1801 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1802
1803 if (iif) {
1804 struct net_device *dev;
1805 dev = __dev_get_by_index(iif);
1806 if (!dev) {
1807 err = -ENODEV;
1808 goto out_free;
1809 }
1810 }
1811
1812 fl.oif = 0;
1813 if (rta[RTA_OIF-1])
1814 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1815
1816 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1817
1818 skb->dst = &rt->u.dst;
1819
1820 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1821 err = rt6_fill_node(skb, rt,
1822 &fl.fl6_dst, &fl.fl6_src,
1823 iif,
1824 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1825 nlh->nlmsg_seq, nlh, 0);
1826 if (err < 0) {
1827 err = -EMSGSIZE;
1828 goto out_free;
1829 }
1830
1831 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1832 if (err > 0)
1833 err = 0;
1834out:
1835 return err;
1836out_free:
1837 kfree_skb(skb);
1838 goto out;
1839}
1840
1841void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1842{
1843 struct sk_buff *skb;
1844 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1845
1846 skb = alloc_skb(size, gfp_any());
1847 if (!skb) {
1848 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1849 return;
1850 }
1851 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1852 kfree_skb(skb);
1853 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1854 return;
1855 }
1856 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1857 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1858}
1859
1860/*
1861 * /proc
1862 */
1863
1864#ifdef CONFIG_PROC_FS
1865
1866#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1867
1868struct rt6_proc_arg
1869{
1870 char *buffer;
1871 int offset;
1872 int length;
1873 int skip;
1874 int len;
1875};
1876
1877static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1878{
1879 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1880 int i;
1881
1882 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1883 arg->skip++;
1884 return 0;
1885 }
1886
1887 if (arg->len >= arg->length)
1888 return 0;
1889
1890 for (i=0; i<16; i++) {
1891 sprintf(arg->buffer + arg->len, "%02x",
1892 rt->rt6i_dst.addr.s6_addr[i]);
1893 arg->len += 2;
1894 }
1895 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1896 rt->rt6i_dst.plen);
1897
1898#ifdef CONFIG_IPV6_SUBTREES
1899 for (i=0; i<16; i++) {
1900 sprintf(arg->buffer + arg->len, "%02x",
1901 rt->rt6i_src.addr.s6_addr[i]);
1902 arg->len += 2;
1903 }
1904 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1905 rt->rt6i_src.plen);
1906#else
1907 sprintf(arg->buffer + arg->len,
1908 "00000000000000000000000000000000 00 ");
1909 arg->len += 36;
1910#endif
1911
1912 if (rt->rt6i_nexthop) {
1913 for (i=0; i<16; i++) {
1914 sprintf(arg->buffer + arg->len, "%02x",
1915 rt->rt6i_nexthop->primary_key[i]);
1916 arg->len += 2;
1917 }
1918 } else {
1919 sprintf(arg->buffer + arg->len,
1920 "00000000000000000000000000000000");
1921 arg->len += 32;
1922 }
1923 arg->len += sprintf(arg->buffer + arg->len,
1924 " %08x %08x %08x %08x %8s\n",
1925 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1926 rt->u.dst.__use, rt->rt6i_flags,
1927 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1928 return 0;
1929}
1930
1931static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1932{
1933 struct rt6_proc_arg arg;
1934 arg.buffer = buffer;
1935 arg.offset = offset;
1936 arg.length = length;
1937 arg.skip = 0;
1938 arg.len = 0;
1939
1940 read_lock_bh(&rt6_lock);
1941 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1942 read_unlock_bh(&rt6_lock);
1943
1944 *start = buffer;
1945 if (offset)
1946 *start += offset % RT6_INFO_LEN;
1947
1948 arg.len -= offset % RT6_INFO_LEN;
1949
1950 if (arg.len > length)
1951 arg.len = length;
1952 if (arg.len < 0)
1953 arg.len = 0;
1954
1955 return arg.len;
1956}
1957
1958extern struct rt6_statistics rt6_stats;
1959
1960static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1961{
1962 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1963 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1964 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1965 rt6_stats.fib_rt_cache,
1966 atomic_read(&ip6_dst_ops.entries),
1967 rt6_stats.fib_discarded_routes);
1968
1969 return 0;
1970}
1971
1972static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1973{
1974 return single_open(file, rt6_stats_seq_show, NULL);
1975}
1976
1977static struct file_operations rt6_stats_seq_fops = {
1978 .owner = THIS_MODULE,
1979 .open = rt6_stats_seq_open,
1980 .read = seq_read,
1981 .llseek = seq_lseek,
1982 .release = single_release,
1983};
1984#endif /* CONFIG_PROC_FS */
1985
1986#ifdef CONFIG_SYSCTL
1987
1988static int flush_delay;
1989
1990static
1991int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1992 void __user *buffer, size_t *lenp, loff_t *ppos)
1993{
1994 if (write) {
1995 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1996 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1997 return 0;
1998 } else
1999 return -EINVAL;
2000}
2001
2002ctl_table ipv6_route_table[] = {
2003 {
2004 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2005 .procname = "flush",
2006 .data = &flush_delay,
2007 .maxlen = sizeof(int),
2008 .mode = 0644,
2009 .proc_handler = &ipv6_sysctl_rtcache_flush
2010 },
2011 {
2012 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2013 .procname = "gc_thresh",
2014 .data = &ip6_dst_ops.gc_thresh,
2015 .maxlen = sizeof(int),
2016 .mode = 0644,
2017 .proc_handler = &proc_dointvec,
2018 },
2019 {
2020 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2021 .procname = "max_size",
2022 .data = &ip6_rt_max_size,
2023 .maxlen = sizeof(int),
2024 .mode = 0644,
2025 .proc_handler = &proc_dointvec,
2026 },
2027 {
2028 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2029 .procname = "gc_min_interval",
2030 .data = &ip6_rt_gc_min_interval,
2031 .maxlen = sizeof(int),
2032 .mode = 0644,
2033 .proc_handler = &proc_dointvec_jiffies,
2034 .strategy = &sysctl_jiffies,
2035 },
2036 {
2037 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2038 .procname = "gc_timeout",
2039 .data = &ip6_rt_gc_timeout,
2040 .maxlen = sizeof(int),
2041 .mode = 0644,
2042 .proc_handler = &proc_dointvec_jiffies,
2043 .strategy = &sysctl_jiffies,
2044 },
2045 {
2046 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2047 .procname = "gc_interval",
2048 .data = &ip6_rt_gc_interval,
2049 .maxlen = sizeof(int),
2050 .mode = 0644,
2051 .proc_handler = &proc_dointvec_jiffies,
2052 .strategy = &sysctl_jiffies,
2053 },
2054 {
2055 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2056 .procname = "gc_elasticity",
2057 .data = &ip6_rt_gc_elasticity,
2058 .maxlen = sizeof(int),
2059 .mode = 0644,
2060 .proc_handler = &proc_dointvec_jiffies,
2061 .strategy = &sysctl_jiffies,
2062 },
2063 {
2064 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2065 .procname = "mtu_expires",
2066 .data = &ip6_rt_mtu_expires,
2067 .maxlen = sizeof(int),
2068 .mode = 0644,
2069 .proc_handler = &proc_dointvec_jiffies,
2070 .strategy = &sysctl_jiffies,
2071 },
2072 {
2073 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2074 .procname = "min_adv_mss",
2075 .data = &ip6_rt_min_advmss,
2076 .maxlen = sizeof(int),
2077 .mode = 0644,
2078 .proc_handler = &proc_dointvec_jiffies,
2079 .strategy = &sysctl_jiffies,
2080 },
2081 {
2082 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2083 .procname = "gc_min_interval_ms",
2084 .data = &ip6_rt_gc_min_interval,
2085 .maxlen = sizeof(int),
2086 .mode = 0644,
2087 .proc_handler = &proc_dointvec_ms_jiffies,
2088 .strategy = &sysctl_ms_jiffies,
2089 },
2090 { .ctl_name = 0 }
2091};
2092
2093#endif
2094
2095void __init ip6_route_init(void)
2096{
2097 struct proc_dir_entry *p;
2098
2099 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2100 sizeof(struct rt6_info),
2101 0, SLAB_HWCACHE_ALIGN,
2102 NULL, NULL);
2103 if (!ip6_dst_ops.kmem_cachep)
2104 panic("cannot create ip6_dst_cache");
2105
2106 fib6_init();
2107#ifdef CONFIG_PROC_FS
2108 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2109 if (p)
2110 p->owner = THIS_MODULE;
2111
2112 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2113#endif
2114#ifdef CONFIG_XFRM
2115 xfrm6_init();
2116#endif
2117}
2118
2119void ip6_route_cleanup(void)
2120{
2121#ifdef CONFIG_PROC_FS
2122 proc_net_remove("ipv6_route");
2123 proc_net_remove("rt6_stats");
2124#endif
2125#ifdef CONFIG_XFRM
2126 xfrm6_fini();
2127#endif
2128 rt6_ifdown(NULL);
2129 fib6_gc_cleanup();
2130 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2131}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
new file mode 100644
index 000000000000..b788f55e139b
--- /dev/null
+++ b/net/ipv6/sit.c
@@ -0,0 +1,833 @@
1/*
2 * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT)
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 *
9 * $Id: sit.c,v 1.53 2001/09/25 05:09:53 davem Exp $
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 *
16 * Changes:
17 * Roger Venning <r.venning@telstra.com>: 6to4 support
18 * Nate Thompson <nate@thebog.net>: 6to4 support
19 */
20
21#include <linux/config.h>
22#include <linux/module.h>
23#include <linux/errno.h>
24#include <linux/types.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/sched.h>
28#include <linux/net.h>
29#include <linux/in6.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/icmp.h>
33#include <asm/uaccess.h>
34#include <linux/init.h>
35#include <linux/netfilter_ipv4.h>
36
37#include <net/sock.h>
38#include <net/snmp.h>
39
40#include <net/ipv6.h>
41#include <net/protocol.h>
42#include <net/transp_v6.h>
43#include <net/ip6_fib.h>
44#include <net/ip6_route.h>
45#include <net/ndisc.h>
46#include <net/addrconf.h>
47#include <net/ip.h>
48#include <net/udp.h>
49#include <net/icmp.h>
50#include <net/ipip.h>
51#include <net/inet_ecn.h>
52#include <net/xfrm.h>
53#include <net/dsfield.h>
54
55/*
56 This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
57
58 For comments look at net/ipv4/ip_gre.c --ANK
59 */
60
61#define HASH_SIZE 16
62#define HASH(addr) ((addr^(addr>>4))&0xF)
63
64static int ipip6_fb_tunnel_init(struct net_device *dev);
65static int ipip6_tunnel_init(struct net_device *dev);
66static void ipip6_tunnel_setup(struct net_device *dev);
67
68static struct net_device *ipip6_fb_tunnel_dev;
69
70static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
71static struct ip_tunnel *tunnels_r[HASH_SIZE];
72static struct ip_tunnel *tunnels_l[HASH_SIZE];
73static struct ip_tunnel *tunnels_wc[1];
74static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
75
76static DEFINE_RWLOCK(ipip6_lock);
77
78static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local)
79{
80 unsigned h0 = HASH(remote);
81 unsigned h1 = HASH(local);
82 struct ip_tunnel *t;
83
84 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
85 if (local == t->parms.iph.saddr &&
86 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
87 return t;
88 }
89 for (t = tunnels_r[h0]; t; t = t->next) {
90 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
91 return t;
92 }
93 for (t = tunnels_l[h1]; t; t = t->next) {
94 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
95 return t;
96 }
97 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
98 return t;
99 return NULL;
100}
101
102static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
103{
104 u32 remote = t->parms.iph.daddr;
105 u32 local = t->parms.iph.saddr;
106 unsigned h = 0;
107 int prio = 0;
108
109 if (remote) {
110 prio |= 2;
111 h ^= HASH(remote);
112 }
113 if (local) {
114 prio |= 1;
115 h ^= HASH(local);
116 }
117 return &tunnels[prio][h];
118}
119
120static void ipip6_tunnel_unlink(struct ip_tunnel *t)
121{
122 struct ip_tunnel **tp;
123
124 for (tp = ipip6_bucket(t); *tp; tp = &(*tp)->next) {
125 if (t == *tp) {
126 write_lock_bh(&ipip6_lock);
127 *tp = t->next;
128 write_unlock_bh(&ipip6_lock);
129 break;
130 }
131 }
132}
133
134static void ipip6_tunnel_link(struct ip_tunnel *t)
135{
136 struct ip_tunnel **tp = ipip6_bucket(t);
137
138 t->next = *tp;
139 write_lock_bh(&ipip6_lock);
140 *tp = t;
141 write_unlock_bh(&ipip6_lock);
142}
143
144static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create)
145{
146 u32 remote = parms->iph.daddr;
147 u32 local = parms->iph.saddr;
148 struct ip_tunnel *t, **tp, *nt;
149 struct net_device *dev;
150 unsigned h = 0;
151 int prio = 0;
152 char name[IFNAMSIZ];
153
154 if (remote) {
155 prio |= 2;
156 h ^= HASH(remote);
157 }
158 if (local) {
159 prio |= 1;
160 h ^= HASH(local);
161 }
162 for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
163 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
164 return t;
165 }
166 if (!create)
167 goto failed;
168
169 if (parms->name[0])
170 strlcpy(name, parms->name, IFNAMSIZ);
171 else {
172 int i;
173 for (i=1; i<100; i++) {
174 sprintf(name, "sit%d", i);
175 if (__dev_get_by_name(name) == NULL)
176 break;
177 }
178 if (i==100)
179 goto failed;
180 }
181
182 dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
183 if (dev == NULL)
184 return NULL;
185
186 nt = dev->priv;
187 dev->init = ipip6_tunnel_init;
188 nt->parms = *parms;
189
190 if (register_netdevice(dev) < 0) {
191 free_netdev(dev);
192 goto failed;
193 }
194
195 dev_hold(dev);
196
197 ipip6_tunnel_link(nt);
198 /* Do not decrement MOD_USE_COUNT here. */
199 return nt;
200
201failed:
202 return NULL;
203}
204
205static void ipip6_tunnel_uninit(struct net_device *dev)
206{
207 if (dev == ipip6_fb_tunnel_dev) {
208 write_lock_bh(&ipip6_lock);
209 tunnels_wc[0] = NULL;
210 write_unlock_bh(&ipip6_lock);
211 dev_put(dev);
212 } else {
213 ipip6_tunnel_unlink((struct ip_tunnel*)dev->priv);
214 dev_put(dev);
215 }
216}
217
218
219static void ipip6_err(struct sk_buff *skb, u32 info)
220{
221#ifndef I_WISH_WORLD_WERE_PERFECT
222
223/* It is not :-( All the routers (except for Linux) return only
224 8 bytes of packet payload. It means, that precise relaying of
225 ICMP in the real Internet is absolutely infeasible.
226 */
227 struct iphdr *iph = (struct iphdr*)skb->data;
228 int type = skb->h.icmph->type;
229 int code = skb->h.icmph->code;
230 struct ip_tunnel *t;
231
232 switch (type) {
233 default:
234 case ICMP_PARAMETERPROB:
235 return;
236
237 case ICMP_DEST_UNREACH:
238 switch (code) {
239 case ICMP_SR_FAILED:
240 case ICMP_PORT_UNREACH:
241 /* Impossible event. */
242 return;
243 case ICMP_FRAG_NEEDED:
244 /* Soft state for pmtu is maintained by IP core. */
245 return;
246 default:
247 /* All others are translated to HOST_UNREACH.
248 rfc2003 contains "deep thoughts" about NET_UNREACH,
249 I believe they are just ether pollution. --ANK
250 */
251 break;
252 }
253 break;
254 case ICMP_TIME_EXCEEDED:
255 if (code != ICMP_EXC_TTL)
256 return;
257 break;
258 }
259
260 read_lock(&ipip6_lock);
261 t = ipip6_tunnel_lookup(iph->daddr, iph->saddr);
262 if (t == NULL || t->parms.iph.daddr == 0)
263 goto out;
264 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
265 goto out;
266
267 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
268 t->err_count++;
269 else
270 t->err_count = 1;
271 t->err_time = jiffies;
272out:
273 read_unlock(&ipip6_lock);
274 return;
275#else
276 struct iphdr *iph = (struct iphdr*)dp;
277 int hlen = iph->ihl<<2;
278 struct ipv6hdr *iph6;
279 int type = skb->h.icmph->type;
280 int code = skb->h.icmph->code;
281 int rel_type = 0;
282 int rel_code = 0;
283 int rel_info = 0;
284 struct sk_buff *skb2;
285 struct rt6_info *rt6i;
286
287 if (len < hlen + sizeof(struct ipv6hdr))
288 return;
289 iph6 = (struct ipv6hdr*)(dp + hlen);
290
291 switch (type) {
292 default:
293 return;
294 case ICMP_PARAMETERPROB:
295 if (skb->h.icmph->un.gateway < hlen)
296 return;
297
298 /* So... This guy found something strange INSIDE encapsulated
299 packet. Well, he is fool, but what can we do ?
300 */
301 rel_type = ICMPV6_PARAMPROB;
302 rel_info = skb->h.icmph->un.gateway - hlen;
303 break;
304
305 case ICMP_DEST_UNREACH:
306 switch (code) {
307 case ICMP_SR_FAILED:
308 case ICMP_PORT_UNREACH:
309 /* Impossible event. */
310 return;
311 case ICMP_FRAG_NEEDED:
312 /* Too complicated case ... */
313 return;
314 default:
315 /* All others are translated to HOST_UNREACH.
316 rfc2003 contains "deep thoughts" about NET_UNREACH,
317 I believe, it is just ether pollution. --ANK
318 */
319 rel_type = ICMPV6_DEST_UNREACH;
320 rel_code = ICMPV6_ADDR_UNREACH;
321 break;
322 }
323 break;
324 case ICMP_TIME_EXCEEDED:
325 if (code != ICMP_EXC_TTL)
326 return;
327 rel_type = ICMPV6_TIME_EXCEED;
328 rel_code = ICMPV6_EXC_HOPLIMIT;
329 break;
330 }
331
332 /* Prepare fake skb to feed it to icmpv6_send */
333 skb2 = skb_clone(skb, GFP_ATOMIC);
334 if (skb2 == NULL)
335 return;
336 dst_release(skb2->dst);
337 skb2->dst = NULL;
338 skb_pull(skb2, skb->data - (u8*)iph6);
339 skb2->nh.raw = skb2->data;
340
341 /* Try to guess incoming interface */
342 rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0);
343 if (rt6i && rt6i->rt6i_dev) {
344 skb2->dev = rt6i->rt6i_dev;
345
346 rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0);
347
348 if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) {
349 struct ip_tunnel * t = (struct ip_tunnel*)rt6i->rt6i_dev->priv;
350 if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) {
351 rel_type = ICMPV6_DEST_UNREACH;
352 rel_code = ICMPV6_ADDR_UNREACH;
353 }
354 icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
355 }
356 }
357 kfree_skb(skb2);
358 return;
359#endif
360}
361
362static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
363{
364 if (INET_ECN_is_ce(iph->tos))
365 IP6_ECN_set_ce(skb->nh.ipv6h);
366}
367
368static int ipip6_rcv(struct sk_buff *skb)
369{
370 struct iphdr *iph;
371 struct ip_tunnel *tunnel;
372
373 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
374 goto out;
375
376 iph = skb->nh.iph;
377
378 read_lock(&ipip6_lock);
379 if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
380 secpath_reset(skb);
381 skb->mac.raw = skb->nh.raw;
382 skb->nh.raw = skb->data;
383 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
384 skb->protocol = htons(ETH_P_IPV6);
385 skb->pkt_type = PACKET_HOST;
386 tunnel->stat.rx_packets++;
387 tunnel->stat.rx_bytes += skb->len;
388 skb->dev = tunnel->dev;
389 dst_release(skb->dst);
390 skb->dst = NULL;
391 nf_reset(skb);
392 ipip6_ecn_decapsulate(iph, skb);
393 netif_rx(skb);
394 read_unlock(&ipip6_lock);
395 return 0;
396 }
397
398 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
399 kfree_skb(skb);
400 read_unlock(&ipip6_lock);
401out:
402 return 0;
403}
404
405/* Returns the embedded IPv4 address if the IPv6 address
406 comes from 6to4 (RFC 3056) addr space */
407
408static inline u32 try_6to4(struct in6_addr *v6dst)
409{
410 u32 dst = 0;
411
412 if (v6dst->s6_addr16[0] == htons(0x2002)) {
413 /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
414 memcpy(&dst, &v6dst->s6_addr16[1], 4);
415 }
416 return dst;
417}
418
419/*
420 * This function assumes it is being called from dev_queue_xmit()
421 * and that skb is filled properly by that function.
422 */
423
424static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
425{
426 struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
427 struct net_device_stats *stats = &tunnel->stat;
428 struct iphdr *tiph = &tunnel->parms.iph;
429 struct ipv6hdr *iph6 = skb->nh.ipv6h;
430 u8 tos = tunnel->parms.iph.tos;
431 struct rtable *rt; /* Route to the other host */
432 struct net_device *tdev; /* Device to other host */
433 struct iphdr *iph; /* Our new IP header */
434 int max_headroom; /* The extra header space needed */
435 u32 dst = tiph->daddr;
436 int mtu;
437 struct in6_addr *addr6;
438 int addr_type;
439
440 if (tunnel->recursion++) {
441 tunnel->stat.collisions++;
442 goto tx_error;
443 }
444
445 if (skb->protocol != htons(ETH_P_IPV6))
446 goto tx_error;
447
448 if (!dst)
449 dst = try_6to4(&iph6->daddr);
450
451 if (!dst) {
452 struct neighbour *neigh = NULL;
453
454 if (skb->dst)
455 neigh = skb->dst->neighbour;
456
457 if (neigh == NULL) {
458 if (net_ratelimit())
459 printk(KERN_DEBUG "sit: nexthop == NULL\n");
460 goto tx_error;
461 }
462
463 addr6 = (struct in6_addr*)&neigh->primary_key;
464 addr_type = ipv6_addr_type(addr6);
465
466 if (addr_type == IPV6_ADDR_ANY) {
467 addr6 = &skb->nh.ipv6h->daddr;
468 addr_type = ipv6_addr_type(addr6);
469 }
470
471 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
472 goto tx_error_icmp;
473
474 dst = addr6->s6_addr32[3];
475 }
476
477 {
478 struct flowi fl = { .nl_u = { .ip4_u =
479 { .daddr = dst,
480 .saddr = tiph->saddr,
481 .tos = RT_TOS(tos) } },
482 .oif = tunnel->parms.link,
483 .proto = IPPROTO_IPV6 };
484 if (ip_route_output_key(&rt, &fl)) {
485 tunnel->stat.tx_carrier_errors++;
486 goto tx_error_icmp;
487 }
488 }
489 if (rt->rt_type != RTN_UNICAST) {
490 ip_rt_put(rt);
491 tunnel->stat.tx_carrier_errors++;
492 goto tx_error_icmp;
493 }
494 tdev = rt->u.dst.dev;
495
496 if (tdev == dev) {
497 ip_rt_put(rt);
498 tunnel->stat.collisions++;
499 goto tx_error;
500 }
501
502 if (tiph->frag_off)
503 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
504 else
505 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
506
507 if (mtu < 68) {
508 tunnel->stat.collisions++;
509 ip_rt_put(rt);
510 goto tx_error;
511 }
512 if (mtu < IPV6_MIN_MTU)
513 mtu = IPV6_MIN_MTU;
514 if (tunnel->parms.iph.daddr && skb->dst)
515 skb->dst->ops->update_pmtu(skb->dst, mtu);
516
517 if (skb->len > mtu) {
518 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
519 ip_rt_put(rt);
520 goto tx_error;
521 }
522
523 if (tunnel->err_count > 0) {
524 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
525 tunnel->err_count--;
526 dst_link_failure(skb);
527 } else
528 tunnel->err_count = 0;
529 }
530
531 /*
532 * Okay, now see if we can stuff it in the buffer as-is.
533 */
534 max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
535
536 if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
537 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
538 if (!new_skb) {
539 ip_rt_put(rt);
540 stats->tx_dropped++;
541 dev_kfree_skb(skb);
542 tunnel->recursion--;
543 return 0;
544 }
545 if (skb->sk)
546 skb_set_owner_w(new_skb, skb->sk);
547 dev_kfree_skb(skb);
548 skb = new_skb;
549 iph6 = skb->nh.ipv6h;
550 }
551
552 skb->h.raw = skb->nh.raw;
553 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
554 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
555 dst_release(skb->dst);
556 skb->dst = &rt->u.dst;
557
558 /*
559 * Push down and install the IPIP header.
560 */
561
562 iph = skb->nh.iph;
563 iph->version = 4;
564 iph->ihl = sizeof(struct iphdr)>>2;
565 if (mtu > IPV6_MIN_MTU)
566 iph->frag_off = htons(IP_DF);
567 else
568 iph->frag_off = 0;
569
570 iph->protocol = IPPROTO_IPV6;
571 iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
572 iph->daddr = rt->rt_dst;
573 iph->saddr = rt->rt_src;
574
575 if ((iph->ttl = tiph->ttl) == 0)
576 iph->ttl = iph6->hop_limit;
577
578 nf_reset(skb);
579
580 IPTUNNEL_XMIT();
581 tunnel->recursion--;
582 return 0;
583
584tx_error_icmp:
585 dst_link_failure(skb);
586tx_error:
587 stats->tx_errors++;
588 dev_kfree_skb(skb);
589 tunnel->recursion--;
590 return 0;
591}
592
593static int
594ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
595{
596 int err = 0;
597 struct ip_tunnel_parm p;
598 struct ip_tunnel *t;
599
600 switch (cmd) {
601 case SIOCGETTUNNEL:
602 t = NULL;
603 if (dev == ipip6_fb_tunnel_dev) {
604 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
605 err = -EFAULT;
606 break;
607 }
608 t = ipip6_tunnel_locate(&p, 0);
609 }
610 if (t == NULL)
611 t = (struct ip_tunnel*)dev->priv;
612 memcpy(&p, &t->parms, sizeof(p));
613 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
614 err = -EFAULT;
615 break;
616
617 case SIOCADDTUNNEL:
618 case SIOCCHGTUNNEL:
619 err = -EPERM;
620 if (!capable(CAP_NET_ADMIN))
621 goto done;
622
623 err = -EFAULT;
624 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
625 goto done;
626
627 err = -EINVAL;
628 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 ||
629 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
630 goto done;
631 if (p.iph.ttl)
632 p.iph.frag_off |= htons(IP_DF);
633
634 t = ipip6_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
635
636 if (dev != ipip6_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
637 if (t != NULL) {
638 if (t->dev != dev) {
639 err = -EEXIST;
640 break;
641 }
642 } else {
643 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
644 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
645 err = -EINVAL;
646 break;
647 }
648 t = (struct ip_tunnel*)dev->priv;
649 ipip6_tunnel_unlink(t);
650 t->parms.iph.saddr = p.iph.saddr;
651 t->parms.iph.daddr = p.iph.daddr;
652 memcpy(dev->dev_addr, &p.iph.saddr, 4);
653 memcpy(dev->broadcast, &p.iph.daddr, 4);
654 ipip6_tunnel_link(t);
655 netdev_state_change(dev);
656 }
657 }
658
659 if (t) {
660 err = 0;
661 if (cmd == SIOCCHGTUNNEL) {
662 t->parms.iph.ttl = p.iph.ttl;
663 t->parms.iph.tos = p.iph.tos;
664 }
665 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
666 err = -EFAULT;
667 } else
668 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
669 break;
670
671 case SIOCDELTUNNEL:
672 err = -EPERM;
673 if (!capable(CAP_NET_ADMIN))
674 goto done;
675
676 if (dev == ipip6_fb_tunnel_dev) {
677 err = -EFAULT;
678 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
679 goto done;
680 err = -ENOENT;
681 if ((t = ipip6_tunnel_locate(&p, 0)) == NULL)
682 goto done;
683 err = -EPERM;
684 if (t == ipip6_fb_tunnel_dev->priv)
685 goto done;
686 dev = t->dev;
687 }
688 err = unregister_netdevice(dev);
689 break;
690
691 default:
692 err = -EINVAL;
693 }
694
695done:
696 return err;
697}
698
699static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev)
700{
701 return &(((struct ip_tunnel*)dev->priv)->stat);
702}
703
704static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
705{
706 if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
707 return -EINVAL;
708 dev->mtu = new_mtu;
709 return 0;
710}
711
712static void ipip6_tunnel_setup(struct net_device *dev)
713{
714 SET_MODULE_OWNER(dev);
715 dev->uninit = ipip6_tunnel_uninit;
716 dev->destructor = free_netdev;
717 dev->hard_start_xmit = ipip6_tunnel_xmit;
718 dev->get_stats = ipip6_tunnel_get_stats;
719 dev->do_ioctl = ipip6_tunnel_ioctl;
720 dev->change_mtu = ipip6_tunnel_change_mtu;
721
722 dev->type = ARPHRD_SIT;
723 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
724 dev->mtu = 1500 - sizeof(struct iphdr);
725 dev->flags = IFF_NOARP;
726 dev->iflink = 0;
727 dev->addr_len = 4;
728}
729
730static int ipip6_tunnel_init(struct net_device *dev)
731{
732 struct net_device *tdev = NULL;
733 struct ip_tunnel *tunnel;
734 struct iphdr *iph;
735
736 tunnel = (struct ip_tunnel*)dev->priv;
737 iph = &tunnel->parms.iph;
738
739 tunnel->dev = dev;
740 strcpy(tunnel->parms.name, dev->name);
741
742 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
743 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
744
745 if (iph->daddr) {
746 struct flowi fl = { .nl_u = { .ip4_u =
747 { .daddr = iph->daddr,
748 .saddr = iph->saddr,
749 .tos = RT_TOS(iph->tos) } },
750 .oif = tunnel->parms.link,
751 .proto = IPPROTO_IPV6 };
752 struct rtable *rt;
753 if (!ip_route_output_key(&rt, &fl)) {
754 tdev = rt->u.dst.dev;
755 ip_rt_put(rt);
756 }
757 dev->flags |= IFF_POINTOPOINT;
758 }
759
760 if (!tdev && tunnel->parms.link)
761 tdev = __dev_get_by_index(tunnel->parms.link);
762
763 if (tdev) {
764 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
765 dev->mtu = tdev->mtu - sizeof(struct iphdr);
766 if (dev->mtu < IPV6_MIN_MTU)
767 dev->mtu = IPV6_MIN_MTU;
768 }
769 dev->iflink = tunnel->parms.link;
770
771 return 0;
772}
773
774int __init ipip6_fb_tunnel_init(struct net_device *dev)
775{
776 struct ip_tunnel *tunnel = dev->priv;
777 struct iphdr *iph = &tunnel->parms.iph;
778
779 tunnel->dev = dev;
780 strcpy(tunnel->parms.name, dev->name);
781
782 iph->version = 4;
783 iph->protocol = IPPROTO_IPV6;
784 iph->ihl = 5;
785 iph->ttl = 64;
786
787 dev_hold(dev);
788 tunnels_wc[0] = tunnel;
789 return 0;
790}
791
792static struct net_protocol sit_protocol = {
793 .handler = ipip6_rcv,
794 .err_handler = ipip6_err,
795};
796
797void __exit sit_cleanup(void)
798{
799 inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
800 unregister_netdev(ipip6_fb_tunnel_dev);
801}
802
803int __init sit_init(void)
804{
805 int err;
806
807 printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n");
808
809 if (inet_add_protocol(&sit_protocol, IPPROTO_IPV6) < 0) {
810 printk(KERN_INFO "sit init: Can't add protocol\n");
811 return -EAGAIN;
812 }
813
814 ipip6_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
815 ipip6_tunnel_setup);
816 if (!ipip6_fb_tunnel_dev) {
817 err = -ENOMEM;
818 goto err1;
819 }
820
821 ipip6_fb_tunnel_dev->init = ipip6_fb_tunnel_init;
822
823 if ((err = register_netdev(ipip6_fb_tunnel_dev)))
824 goto err2;
825
826 out:
827 return err;
828 err2:
829 free_netdev(ipip6_fb_tunnel_dev);
830 err1:
831 inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
832 goto out;
833}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
new file mode 100644
index 000000000000..3a18e0e6ffed
--- /dev/null
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -0,0 +1,125 @@
1/*
2 * sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem.
3 *
4 * Changes:
5 * YOSHIFUJI Hideaki @USAGI: added icmp sysctl table.
6 */
7
8#include <linux/mm.h>
9#include <linux/sysctl.h>
10#include <linux/config.h>
11#include <linux/in6.h>
12#include <linux/ipv6.h>
13#include <net/ndisc.h>
14#include <net/ipv6.h>
15#include <net/addrconf.h>
16
17extern ctl_table ipv6_route_table[];
18extern ctl_table ipv6_icmp_table[];
19
20#ifdef CONFIG_SYSCTL
21
22static ctl_table ipv6_table[] = {
23 {
24 .ctl_name = NET_IPV6_ROUTE,
25 .procname = "route",
26 .maxlen = 0,
27 .mode = 0555,
28 .child = ipv6_route_table
29 },
30 {
31 .ctl_name = NET_IPV6_ICMP,
32 .procname = "icmp",
33 .maxlen = 0,
34 .mode = 0555,
35 .child = ipv6_icmp_table
36 },
37 {
38 .ctl_name = NET_IPV6_BINDV6ONLY,
39 .procname = "bindv6only",
40 .data = &sysctl_ipv6_bindv6only,
41 .maxlen = sizeof(int),
42 .mode = 0644,
43 .proc_handler = &proc_dointvec
44 },
45 {
46 .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH,
47 .procname = "ip6frag_high_thresh",
48 .data = &sysctl_ip6frag_high_thresh,
49 .maxlen = sizeof(int),
50 .mode = 0644,
51 .proc_handler = &proc_dointvec
52 },
53 {
54 .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH,
55 .procname = "ip6frag_low_thresh",
56 .data = &sysctl_ip6frag_low_thresh,
57 .maxlen = sizeof(int),
58 .mode = 0644,
59 .proc_handler = &proc_dointvec
60 },
61 {
62 .ctl_name = NET_IPV6_IP6FRAG_TIME,
63 .procname = "ip6frag_time",
64 .data = &sysctl_ip6frag_time,
65 .maxlen = sizeof(int),
66 .mode = 0644,
67 .proc_handler = &proc_dointvec_jiffies,
68 .strategy = &sysctl_jiffies,
69 },
70 {
71 .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL,
72 .procname = "ip6frag_secret_interval",
73 .data = &sysctl_ip6frag_secret_interval,
74 .maxlen = sizeof(int),
75 .mode = 0644,
76 .proc_handler = &proc_dointvec_jiffies,
77 .strategy = &sysctl_jiffies
78 },
79 {
80 .ctl_name = NET_IPV6_MLD_MAX_MSF,
81 .procname = "mld_max_msf",
82 .data = &sysctl_mld_max_msf,
83 .maxlen = sizeof(int),
84 .mode = 0644,
85 .proc_handler = &proc_dointvec
86 },
87 { .ctl_name = 0 }
88};
89
90static struct ctl_table_header *ipv6_sysctl_header;
91
92static ctl_table ipv6_net_table[] = {
93 {
94 .ctl_name = NET_IPV6,
95 .procname = "ipv6",
96 .mode = 0555,
97 .child = ipv6_table
98 },
99 { .ctl_name = 0 }
100};
101
102static ctl_table ipv6_root_table[] = {
103 {
104 .ctl_name = CTL_NET,
105 .procname = "net",
106 .mode = 0555,
107 .child = ipv6_net_table
108 },
109 { .ctl_name = 0 }
110};
111
112void ipv6_sysctl_register(void)
113{
114 ipv6_sysctl_header = register_sysctl_table(ipv6_root_table, 0);
115}
116
117void ipv6_sysctl_unregister(void)
118{
119 unregister_sysctl_table(ipv6_sysctl_header);
120}
121
122#endif /* CONFIG_SYSCTL */
123
124
125
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
new file mode 100644
index 000000000000..4760c85e19db
--- /dev/null
+++ b/net/ipv6/tcp_ipv6.c
@@ -0,0 +1,2265 @@
1/*
2 * TCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
14 *
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28#include <linux/module.h>
29#include <linux/config.h>
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/jiffies.h>
36#include <linux/in.h>
37#include <linux/in6.h>
38#include <linux/netdevice.h>
39#include <linux/init.h>
40#include <linux/jhash.h>
41#include <linux/ipsec.h>
42#include <linux/times.h>
43
44#include <linux/ipv6.h>
45#include <linux/icmpv6.h>
46#include <linux/random.h>
47
48#include <net/tcp.h>
49#include <net/ndisc.h>
50#include <net/ipv6.h>
51#include <net/transp_v6.h>
52#include <net/addrconf.h>
53#include <net/ip6_route.h>
54#include <net/ip6_checksum.h>
55#include <net/inet_ecn.h>
56#include <net/protocol.h>
57#include <net/xfrm.h>
58#include <net/addrconf.h>
59#include <net/snmp.h>
60#include <net/dsfield.h>
61
62#include <asm/uaccess.h>
63
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
66
67static void tcp_v6_send_reset(struct sk_buff *skb);
68static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
69static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
70 struct sk_buff *skb);
71
72static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74
75static struct tcp_func ipv6_mapped;
76static struct tcp_func ipv6_specific;
77
78/* I have no idea if this is a good hash for v6 or not. -DaveM */
79static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
81{
82 int hashent = (lport ^ fport);
83
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_ehash_size - 1));
88}
89
90static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
91{
92 struct inet_sock *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
99}
100
101static inline int tcp_v6_bind_conflict(struct sock *sk,
102 struct tcp_bind_bucket *tb)
103{
104 struct sock *sk2;
105 struct hlist_node *node;
106
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
109 if (sk != sk2 &&
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
116 break;
117 }
118
119 return node != NULL;
120}
121
122/* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
126 */
127static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
128{
129 struct tcp_bind_hashbucket *head;
130 struct tcp_bind_bucket *tb;
131 struct hlist_node *node;
132 int ret;
133
134 local_bh_disable();
135 if (snum == 0) {
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
139 int rover;
140
141 spin_lock(&tcp_portalloc_lock);
142 rover = tcp_port_rover;
143 do { rover++;
144 if ((rover < low) || (rover > high))
145 rover = low;
146 head = &tcp_bhash[tcp_bhashfn(rover)];
147 spin_lock(&head->lock);
148 tb_for_each(tb, node, &head->chain)
149 if (tb->port == rover)
150 goto next;
151 break;
152 next:
153 spin_unlock(&head->lock);
154 } while (--remaining > 0);
155 tcp_port_rover = rover;
156 spin_unlock(&tcp_portalloc_lock);
157
158 /* Exhausted local port range during search? */
159 ret = 1;
160 if (remaining <= 0)
161 goto fail;
162
163 /* OK, here is the one we will use. */
164 snum = rover;
165 } else {
166 head = &tcp_bhash[tcp_bhashfn(snum)];
167 spin_lock(&head->lock);
168 tb_for_each(tb, node, &head->chain)
169 if (tb->port == snum)
170 goto tb_found;
171 }
172 tb = NULL;
173 goto tb_not_found;
174tb_found:
175 if (tb && !hlist_empty(&tb->owners)) {
176 if (tb->fastreuse > 0 && sk->sk_reuse &&
177 sk->sk_state != TCP_LISTEN) {
178 goto success;
179 } else {
180 ret = 1;
181 if (tcp_v6_bind_conflict(sk, tb))
182 goto fail_unlock;
183 }
184 }
185tb_not_found:
186 ret = 1;
187 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
188 goto fail_unlock;
189 if (hlist_empty(&tb->owners)) {
190 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
191 tb->fastreuse = 1;
192 else
193 tb->fastreuse = 0;
194 } else if (tb->fastreuse &&
195 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
196 tb->fastreuse = 0;
197
198success:
199 if (!tcp_sk(sk)->bind_hash)
200 tcp_bind_hash(sk, tb, snum);
201 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
202 ret = 0;
203
204fail_unlock:
205 spin_unlock(&head->lock);
206fail:
207 local_bh_enable();
208 return ret;
209}
210
211static __inline__ void __tcp_v6_hash(struct sock *sk)
212{
213 struct hlist_head *list;
214 rwlock_t *lock;
215
216 BUG_TRAP(sk_unhashed(sk));
217
218 if (sk->sk_state == TCP_LISTEN) {
219 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
220 lock = &tcp_lhash_lock;
221 tcp_listen_wlock();
222 } else {
223 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
224 list = &tcp_ehash[sk->sk_hashent].chain;
225 lock = &tcp_ehash[sk->sk_hashent].lock;
226 write_lock(lock);
227 }
228
229 __sk_add_node(sk, list);
230 sock_prot_inc_use(sk->sk_prot);
231 write_unlock(lock);
232}
233
234
235static void tcp_v6_hash(struct sock *sk)
236{
237 if (sk->sk_state != TCP_CLOSE) {
238 struct tcp_sock *tp = tcp_sk(sk);
239
240 if (tp->af_specific == &ipv6_mapped) {
241 tcp_prot.hash(sk);
242 return;
243 }
244 local_bh_disable();
245 __tcp_v6_hash(sk);
246 local_bh_enable();
247 }
248}
249
250static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
251{
252 struct sock *sk;
253 struct hlist_node *node;
254 struct sock *result = NULL;
255 int score, hiscore;
256
257 hiscore=0;
258 read_lock(&tcp_lhash_lock);
259 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
260 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
261 struct ipv6_pinfo *np = inet6_sk(sk);
262
263 score = 1;
264 if (!ipv6_addr_any(&np->rcv_saddr)) {
265 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
266 continue;
267 score++;
268 }
269 if (sk->sk_bound_dev_if) {
270 if (sk->sk_bound_dev_if != dif)
271 continue;
272 score++;
273 }
274 if (score == 3) {
275 result = sk;
276 break;
277 }
278 if (score > hiscore) {
279 hiscore = score;
280 result = sk;
281 }
282 }
283 }
284 if (result)
285 sock_hold(result);
286 read_unlock(&tcp_lhash_lock);
287 return result;
288}
289
290/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
291 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
292 *
293 * The sockhash lock must be held as a reader here.
294 */
295
296static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
297 struct in6_addr *daddr, u16 hnum,
298 int dif)
299{
300 struct tcp_ehash_bucket *head;
301 struct sock *sk;
302 struct hlist_node *node;
303 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
304 int hash;
305
306 /* Optimize here for direct hit, only listening connections can
307 * have wildcards anyways.
308 */
309 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
310 head = &tcp_ehash[hash];
311 read_lock(&head->lock);
312 sk_for_each(sk, node, &head->chain) {
313 /* For IPV6 do the cheaper port and family tests first. */
314 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
315 goto hit; /* You sunk my battleship! */
316 }
317 /* Must check for a TIME_WAIT'er before going to listener hash. */
318 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
319 /* FIXME: acme: check this... */
320 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
321
322 if(*((__u32 *)&(tw->tw_dport)) == ports &&
323 sk->sk_family == PF_INET6) {
324 if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
325 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
326 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
327 goto hit;
328 }
329 }
330 read_unlock(&head->lock);
331 return NULL;
332
333hit:
334 sock_hold(sk);
335 read_unlock(&head->lock);
336 return sk;
337}
338
339
340static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
341 struct in6_addr *daddr, u16 hnum,
342 int dif)
343{
344 struct sock *sk;
345
346 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
347
348 if (sk)
349 return sk;
350
351 return tcp_v6_lookup_listener(daddr, hnum, dif);
352}
353
354inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
355 struct in6_addr *daddr, u16 dport,
356 int dif)
357{
358 struct sock *sk;
359
360 local_bh_disable();
361 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
362 local_bh_enable();
363
364 return sk;
365}
366
367EXPORT_SYMBOL_GPL(tcp_v6_lookup);
368
369
370/*
371 * Open request hash tables.
372 */
373
374static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
375{
376 u32 a, b, c;
377
378 a = raddr->s6_addr32[0];
379 b = raddr->s6_addr32[1];
380 c = raddr->s6_addr32[2];
381
382 a += JHASH_GOLDEN_RATIO;
383 b += JHASH_GOLDEN_RATIO;
384 c += rnd;
385 __jhash_mix(a, b, c);
386
387 a += raddr->s6_addr32[3];
388 b += (u32) rport;
389 __jhash_mix(a, b, c);
390
391 return c & (TCP_SYNQ_HSIZE - 1);
392}
393
394static struct open_request *tcp_v6_search_req(struct tcp_sock *tp,
395 struct open_request ***prevp,
396 __u16 rport,
397 struct in6_addr *raddr,
398 struct in6_addr *laddr,
399 int iif)
400{
401 struct tcp_listen_opt *lopt = tp->listen_opt;
402 struct open_request *req, **prev;
403
404 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
405 (req = *prev) != NULL;
406 prev = &req->dl_next) {
407 if (req->rmt_port == rport &&
408 req->class->family == AF_INET6 &&
409 ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) &&
410 ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) &&
411 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
412 BUG_TRAP(req->sk == NULL);
413 *prevp = prev;
414 return req;
415 }
416 }
417
418 return NULL;
419}
420
421static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
422 struct in6_addr *saddr,
423 struct in6_addr *daddr,
424 unsigned long base)
425{
426 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
427}
428
429static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
430{
431 if (skb->protocol == htons(ETH_P_IPV6)) {
432 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
433 skb->nh.ipv6h->saddr.s6_addr32,
434 skb->h.th->dest,
435 skb->h.th->source);
436 } else {
437 return secure_tcp_sequence_number(skb->nh.iph->daddr,
438 skb->nh.iph->saddr,
439 skb->h.th->dest,
440 skb->h.th->source);
441 }
442}
443
444static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
445 struct tcp_tw_bucket **twp)
446{
447 struct inet_sock *inet = inet_sk(sk);
448 struct ipv6_pinfo *np = inet6_sk(sk);
449 struct in6_addr *daddr = &np->rcv_saddr;
450 struct in6_addr *saddr = &np->daddr;
451 int dif = sk->sk_bound_dev_if;
452 u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
453 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
454 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
455 struct sock *sk2;
456 struct hlist_node *node;
457 struct tcp_tw_bucket *tw;
458
459 write_lock(&head->lock);
460
461 /* Check TIME-WAIT sockets first. */
462 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
463 tw = (struct tcp_tw_bucket*)sk2;
464
465 if(*((__u32 *)&(tw->tw_dport)) == ports &&
466 sk2->sk_family == PF_INET6 &&
467 ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
468 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
469 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
470 struct tcp_sock *tp = tcp_sk(sk);
471
472 if (tw->tw_ts_recent_stamp &&
473 (!twp || (sysctl_tcp_tw_reuse &&
474 xtime.tv_sec -
475 tw->tw_ts_recent_stamp > 1))) {
476 /* See comment in tcp_ipv4.c */
477 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
478 if (!tp->write_seq)
479 tp->write_seq = 1;
480 tp->rx_opt.ts_recent = tw->tw_ts_recent;
481 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
482 sock_hold(sk2);
483 goto unique;
484 } else
485 goto not_unique;
486 }
487 }
488 tw = NULL;
489
490 /* And established part... */
491 sk_for_each(sk2, node, &head->chain) {
492 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
493 goto not_unique;
494 }
495
496unique:
497 BUG_TRAP(sk_unhashed(sk));
498 __sk_add_node(sk, &head->chain);
499 sk->sk_hashent = hash;
500 sock_prot_inc_use(sk->sk_prot);
501 write_unlock(&head->lock);
502
503 if (twp) {
504 *twp = tw;
505 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
506 } else if (tw) {
507 /* Silly. Should hash-dance instead... */
508 tcp_tw_deschedule(tw);
509 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
510
511 tcp_tw_put(tw);
512 }
513 return 0;
514
515not_unique:
516 write_unlock(&head->lock);
517 return -EADDRNOTAVAIL;
518}
519
520static inline u32 tcpv6_port_offset(const struct sock *sk)
521{
522 const struct inet_sock *inet = inet_sk(sk);
523 const struct ipv6_pinfo *np = inet6_sk(sk);
524
525 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
526 np->daddr.s6_addr32,
527 inet->dport);
528}
529
530static int tcp_v6_hash_connect(struct sock *sk)
531{
532 unsigned short snum = inet_sk(sk)->num;
533 struct tcp_bind_hashbucket *head;
534 struct tcp_bind_bucket *tb;
535 int ret;
536
537 if (!snum) {
538 int low = sysctl_local_port_range[0];
539 int high = sysctl_local_port_range[1];
540 int range = high - low;
541 int i;
542 int port;
543 static u32 hint;
544 u32 offset = hint + tcpv6_port_offset(sk);
545 struct hlist_node *node;
546 struct tcp_tw_bucket *tw = NULL;
547
548 local_bh_disable();
549 for (i = 1; i <= range; i++) {
550 port = low + (i + offset) % range;
551 head = &tcp_bhash[tcp_bhashfn(port)];
552 spin_lock(&head->lock);
553
554 /* Does not bother with rcv_saddr checks,
555 * because the established check is already
556 * unique enough.
557 */
558 tb_for_each(tb, node, &head->chain) {
559 if (tb->port == port) {
560 BUG_TRAP(!hlist_empty(&tb->owners));
561 if (tb->fastreuse >= 0)
562 goto next_port;
563 if (!__tcp_v6_check_established(sk,
564 port,
565 &tw))
566 goto ok;
567 goto next_port;
568 }
569 }
570
571 tb = tcp_bucket_create(head, port);
572 if (!tb) {
573 spin_unlock(&head->lock);
574 break;
575 }
576 tb->fastreuse = -1;
577 goto ok;
578
579 next_port:
580 spin_unlock(&head->lock);
581 }
582 local_bh_enable();
583
584 return -EADDRNOTAVAIL;
585
586ok:
587 hint += i;
588
589 /* Head lock still held and bh's disabled */
590 tcp_bind_hash(sk, tb, port);
591 if (sk_unhashed(sk)) {
592 inet_sk(sk)->sport = htons(port);
593 __tcp_v6_hash(sk);
594 }
595 spin_unlock(&head->lock);
596
597 if (tw) {
598 tcp_tw_deschedule(tw);
599 tcp_tw_put(tw);
600 }
601
602 ret = 0;
603 goto out;
604 }
605
606 head = &tcp_bhash[tcp_bhashfn(snum)];
607 tb = tcp_sk(sk)->bind_hash;
608 spin_lock_bh(&head->lock);
609
610 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
611 __tcp_v6_hash(sk);
612 spin_unlock_bh(&head->lock);
613 return 0;
614 } else {
615 spin_unlock(&head->lock);
616 /* No definite answer... Walk to established hash table */
617 ret = __tcp_v6_check_established(sk, snum, NULL);
618out:
619 local_bh_enable();
620 return ret;
621 }
622}
623
624static __inline__ int tcp_v6_iif(struct sk_buff *skb)
625{
626 return IP6CB(skb)->iif;
627}
628
629static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
630 int addr_len)
631{
632 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
633 struct inet_sock *inet = inet_sk(sk);
634 struct ipv6_pinfo *np = inet6_sk(sk);
635 struct tcp_sock *tp = tcp_sk(sk);
636 struct in6_addr *saddr = NULL, *final_p = NULL, final;
637 struct flowi fl;
638 struct dst_entry *dst;
639 int addr_type;
640 int err;
641
642 if (addr_len < SIN6_LEN_RFC2133)
643 return -EINVAL;
644
645 if (usin->sin6_family != AF_INET6)
646 return(-EAFNOSUPPORT);
647
648 memset(&fl, 0, sizeof(fl));
649
650 if (np->sndflow) {
651 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
652 IP6_ECN_flow_init(fl.fl6_flowlabel);
653 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
654 struct ip6_flowlabel *flowlabel;
655 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
656 if (flowlabel == NULL)
657 return -EINVAL;
658 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
659 fl6_sock_release(flowlabel);
660 }
661 }
662
663 /*
664 * connect() to INADDR_ANY means loopback (BSD'ism).
665 */
666
667 if(ipv6_addr_any(&usin->sin6_addr))
668 usin->sin6_addr.s6_addr[15] = 0x1;
669
670 addr_type = ipv6_addr_type(&usin->sin6_addr);
671
672 if(addr_type & IPV6_ADDR_MULTICAST)
673 return -ENETUNREACH;
674
675 if (addr_type&IPV6_ADDR_LINKLOCAL) {
676 if (addr_len >= sizeof(struct sockaddr_in6) &&
677 usin->sin6_scope_id) {
678 /* If interface is set while binding, indices
679 * must coincide.
680 */
681 if (sk->sk_bound_dev_if &&
682 sk->sk_bound_dev_if != usin->sin6_scope_id)
683 return -EINVAL;
684
685 sk->sk_bound_dev_if = usin->sin6_scope_id;
686 }
687
688 /* Connect to link-local address requires an interface */
689 if (!sk->sk_bound_dev_if)
690 return -EINVAL;
691 }
692
693 if (tp->rx_opt.ts_recent_stamp &&
694 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
695 tp->rx_opt.ts_recent = 0;
696 tp->rx_opt.ts_recent_stamp = 0;
697 tp->write_seq = 0;
698 }
699
700 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
701 np->flow_label = fl.fl6_flowlabel;
702
703 /*
704 * TCP over IPv4
705 */
706
707 if (addr_type == IPV6_ADDR_MAPPED) {
708 u32 exthdrlen = tp->ext_header_len;
709 struct sockaddr_in sin;
710
711 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
712
713 if (__ipv6_only_sock(sk))
714 return -ENETUNREACH;
715
716 sin.sin_family = AF_INET;
717 sin.sin_port = usin->sin6_port;
718 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
719
720 tp->af_specific = &ipv6_mapped;
721 sk->sk_backlog_rcv = tcp_v4_do_rcv;
722
723 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
724
725 if (err) {
726 tp->ext_header_len = exthdrlen;
727 tp->af_specific = &ipv6_specific;
728 sk->sk_backlog_rcv = tcp_v6_do_rcv;
729 goto failure;
730 } else {
731 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
732 inet->saddr);
733 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
734 inet->rcv_saddr);
735 }
736
737 return err;
738 }
739
740 if (!ipv6_addr_any(&np->rcv_saddr))
741 saddr = &np->rcv_saddr;
742
743 fl.proto = IPPROTO_TCP;
744 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
745 ipv6_addr_copy(&fl.fl6_src,
746 (saddr ? saddr : &np->saddr));
747 fl.oif = sk->sk_bound_dev_if;
748 fl.fl_ip_dport = usin->sin6_port;
749 fl.fl_ip_sport = inet->sport;
750
751 if (np->opt && np->opt->srcrt) {
752 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
753 ipv6_addr_copy(&final, &fl.fl6_dst);
754 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
755 final_p = &final;
756 }
757
758 err = ip6_dst_lookup(sk, &dst, &fl);
759 if (err)
760 goto failure;
761 if (final_p)
762 ipv6_addr_copy(&fl.fl6_dst, final_p);
763
764 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
765 dst_release(dst);
766 goto failure;
767 }
768
769 if (saddr == NULL) {
770 saddr = &fl.fl6_src;
771 ipv6_addr_copy(&np->rcv_saddr, saddr);
772 }
773
774 /* set the source address */
775 ipv6_addr_copy(&np->saddr, saddr);
776 inet->rcv_saddr = LOOPBACK4_IPV6;
777
778 ip6_dst_store(sk, dst, NULL);
779 sk->sk_route_caps = dst->dev->features &
780 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
781
782 tp->ext_header_len = 0;
783 if (np->opt)
784 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
785
786 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
787
788 inet->dport = usin->sin6_port;
789
790 tcp_set_state(sk, TCP_SYN_SENT);
791 err = tcp_v6_hash_connect(sk);
792 if (err)
793 goto late_failure;
794
795 if (!tp->write_seq)
796 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
797 np->daddr.s6_addr32,
798 inet->sport,
799 inet->dport);
800
801 err = tcp_connect(sk);
802 if (err)
803 goto late_failure;
804
805 return 0;
806
807late_failure:
808 tcp_set_state(sk, TCP_CLOSE);
809 __sk_dst_reset(sk);
810failure:
811 inet->dport = 0;
812 sk->sk_route_caps = 0;
813 return err;
814}
815
816static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
817 int type, int code, int offset, __u32 info)
818{
819 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
820 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
821 struct ipv6_pinfo *np;
822 struct sock *sk;
823 int err;
824 struct tcp_sock *tp;
825 __u32 seq;
826
827 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
828
829 if (sk == NULL) {
830 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
831 return;
832 }
833
834 if (sk->sk_state == TCP_TIME_WAIT) {
835 tcp_tw_put((struct tcp_tw_bucket*)sk);
836 return;
837 }
838
839 bh_lock_sock(sk);
840 if (sock_owned_by_user(sk))
841 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
842
843 if (sk->sk_state == TCP_CLOSE)
844 goto out;
845
846 tp = tcp_sk(sk);
847 seq = ntohl(th->seq);
848 if (sk->sk_state != TCP_LISTEN &&
849 !between(seq, tp->snd_una, tp->snd_nxt)) {
850 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
851 goto out;
852 }
853
854 np = inet6_sk(sk);
855
856 if (type == ICMPV6_PKT_TOOBIG) {
857 struct dst_entry *dst = NULL;
858
859 if (sock_owned_by_user(sk))
860 goto out;
861 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
862 goto out;
863
864 /* icmp should have updated the destination cache entry */
865 dst = __sk_dst_check(sk, np->dst_cookie);
866
867 if (dst == NULL) {
868 struct inet_sock *inet = inet_sk(sk);
869 struct flowi fl;
870
871 /* BUGGG_FUTURE: Again, it is not clear how
872 to handle rthdr case. Ignore this complexity
873 for now.
874 */
875 memset(&fl, 0, sizeof(fl));
876 fl.proto = IPPROTO_TCP;
877 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
878 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
879 fl.oif = sk->sk_bound_dev_if;
880 fl.fl_ip_dport = inet->dport;
881 fl.fl_ip_sport = inet->sport;
882
883 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
884 sk->sk_err_soft = -err;
885 goto out;
886 }
887
888 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
889 sk->sk_err_soft = -err;
890 goto out;
891 }
892
893 } else
894 dst_hold(dst);
895
896 if (tp->pmtu_cookie > dst_mtu(dst)) {
897 tcp_sync_mss(sk, dst_mtu(dst));
898 tcp_simple_retransmit(sk);
899 } /* else let the usual retransmit timer handle it */
900 dst_release(dst);
901 goto out;
902 }
903
904 icmpv6_err_convert(type, code, &err);
905
906 /* Might be for an open_request */
907 switch (sk->sk_state) {
908 struct open_request *req, **prev;
909 case TCP_LISTEN:
910 if (sock_owned_by_user(sk))
911 goto out;
912
913 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
914 &hdr->saddr, tcp_v6_iif(skb));
915 if (!req)
916 goto out;
917
918 /* ICMPs are not backlogged, hence we cannot get
919 * an established socket here.
920 */
921 BUG_TRAP(req->sk == NULL);
922
923 if (seq != req->snt_isn) {
924 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
925 goto out;
926 }
927
928 tcp_synq_drop(sk, req, prev);
929 goto out;
930
931 case TCP_SYN_SENT:
932 case TCP_SYN_RECV: /* Cannot happen.
933 It can, it SYNs are crossed. --ANK */
934 if (!sock_owned_by_user(sk)) {
935 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
936 sk->sk_err = err;
937 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
938
939 tcp_done(sk);
940 } else
941 sk->sk_err_soft = err;
942 goto out;
943 }
944
945 if (!sock_owned_by_user(sk) && np->recverr) {
946 sk->sk_err = err;
947 sk->sk_error_report(sk);
948 } else
949 sk->sk_err_soft = err;
950
951out:
952 bh_unlock_sock(sk);
953 sock_put(sk);
954}
955
956
957static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
958 struct dst_entry *dst)
959{
960 struct ipv6_pinfo *np = inet6_sk(sk);
961 struct sk_buff * skb;
962 struct ipv6_txoptions *opt = NULL;
963 struct in6_addr * final_p = NULL, final;
964 struct flowi fl;
965 int err = -1;
966
967 memset(&fl, 0, sizeof(fl));
968 fl.proto = IPPROTO_TCP;
969 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
970 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
971 fl.fl6_flowlabel = 0;
972 fl.oif = req->af.v6_req.iif;
973 fl.fl_ip_dport = req->rmt_port;
974 fl.fl_ip_sport = inet_sk(sk)->sport;
975
976 if (dst == NULL) {
977 opt = np->opt;
978 if (opt == NULL &&
979 np->rxopt.bits.srcrt == 2 &&
980 req->af.v6_req.pktopts) {
981 struct sk_buff *pktopts = req->af.v6_req.pktopts;
982 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
983 if (rxopt->srcrt)
984 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
985 }
986
987 if (opt && opt->srcrt) {
988 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
989 ipv6_addr_copy(&final, &fl.fl6_dst);
990 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
991 final_p = &final;
992 }
993
994 err = ip6_dst_lookup(sk, &dst, &fl);
995 if (err)
996 goto done;
997 if (final_p)
998 ipv6_addr_copy(&fl.fl6_dst, final_p);
999 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1000 goto done;
1001 }
1002
1003 skb = tcp_make_synack(sk, dst, req);
1004 if (skb) {
1005 struct tcphdr *th = skb->h.th;
1006
1007 th->check = tcp_v6_check(th, skb->len,
1008 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
1009 csum_partial((char *)th, skb->len, skb->csum));
1010
1011 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1012 err = ip6_xmit(sk, skb, &fl, opt, 0);
1013 if (err == NET_XMIT_CN)
1014 err = 0;
1015 }
1016
1017done:
1018 dst_release(dst);
1019 if (opt && opt != np->opt)
1020 sock_kfree_s(sk, opt, opt->tot_len);
1021 return err;
1022}
1023
1024static void tcp_v6_or_free(struct open_request *req)
1025{
1026 if (req->af.v6_req.pktopts)
1027 kfree_skb(req->af.v6_req.pktopts);
1028}
1029
1030static struct or_calltable or_ipv6 = {
1031 .family = AF_INET6,
1032 .rtx_syn_ack = tcp_v6_send_synack,
1033 .send_ack = tcp_v6_or_send_ack,
1034 .destructor = tcp_v6_or_free,
1035 .send_reset = tcp_v6_send_reset
1036};
1037
1038static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1039{
1040 struct ipv6_pinfo *np = inet6_sk(sk);
1041 struct inet6_skb_parm *opt = IP6CB(skb);
1042
1043 if (np->rxopt.all) {
1044 if ((opt->hop && np->rxopt.bits.hopopts) ||
1045 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1046 np->rxopt.bits.rxflow) ||
1047 (opt->srcrt && np->rxopt.bits.srcrt) ||
1048 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1049 return 1;
1050 }
1051 return 0;
1052}
1053
1054
1055static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1056 struct sk_buff *skb)
1057{
1058 struct ipv6_pinfo *np = inet6_sk(sk);
1059
1060 if (skb->ip_summed == CHECKSUM_HW) {
1061 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1062 skb->csum = offsetof(struct tcphdr, check);
1063 } else {
1064 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1065 csum_partial((char *)th, th->doff<<2,
1066 skb->csum));
1067 }
1068}
1069
1070
1071static void tcp_v6_send_reset(struct sk_buff *skb)
1072{
1073 struct tcphdr *th = skb->h.th, *t1;
1074 struct sk_buff *buff;
1075 struct flowi fl;
1076
1077 if (th->rst)
1078 return;
1079
1080 if (!ipv6_unicast_destination(skb))
1081 return;
1082
1083 /*
1084 * We need to grab some memory, and put together an RST,
1085 * and then put it into the queue to be sent.
1086 */
1087
1088 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1089 GFP_ATOMIC);
1090 if (buff == NULL)
1091 return;
1092
1093 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1094
1095 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1096
1097 /* Swap the send and the receive. */
1098 memset(t1, 0, sizeof(*t1));
1099 t1->dest = th->source;
1100 t1->source = th->dest;
1101 t1->doff = sizeof(*t1)/4;
1102 t1->rst = 1;
1103
1104 if(th->ack) {
1105 t1->seq = th->ack_seq;
1106 } else {
1107 t1->ack = 1;
1108 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1109 + skb->len - (th->doff<<2));
1110 }
1111
1112 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1113
1114 memset(&fl, 0, sizeof(fl));
1115 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1116 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1117
1118 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1119 sizeof(*t1), IPPROTO_TCP,
1120 buff->csum);
1121
1122 fl.proto = IPPROTO_TCP;
1123 fl.oif = tcp_v6_iif(skb);
1124 fl.fl_ip_dport = t1->dest;
1125 fl.fl_ip_sport = t1->source;
1126
1127 /* sk = NULL, but it is safe for now. RST socket required. */
1128 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1129
1130 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1131 dst_release(buff->dst);
1132 return;
1133 }
1134
1135 ip6_xmit(NULL, buff, &fl, NULL, 0);
1136 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1137 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1138 return;
1139 }
1140
1141 kfree_skb(buff);
1142}
1143
1144static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1145{
1146 struct tcphdr *th = skb->h.th, *t1;
1147 struct sk_buff *buff;
1148 struct flowi fl;
1149 int tot_len = sizeof(struct tcphdr);
1150
1151 if (ts)
1152 tot_len += 3*4;
1153
1154 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1155 GFP_ATOMIC);
1156 if (buff == NULL)
1157 return;
1158
1159 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1160
1161 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1162
1163 /* Swap the send and the receive. */
1164 memset(t1, 0, sizeof(*t1));
1165 t1->dest = th->source;
1166 t1->source = th->dest;
1167 t1->doff = tot_len/4;
1168 t1->seq = htonl(seq);
1169 t1->ack_seq = htonl(ack);
1170 t1->ack = 1;
1171 t1->window = htons(win);
1172
1173 if (ts) {
1174 u32 *ptr = (u32*)(t1 + 1);
1175 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1176 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1177 *ptr++ = htonl(tcp_time_stamp);
1178 *ptr = htonl(ts);
1179 }
1180
1181 buff->csum = csum_partial((char *)t1, tot_len, 0);
1182
1183 memset(&fl, 0, sizeof(fl));
1184 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1185 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1186
1187 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1188 tot_len, IPPROTO_TCP,
1189 buff->csum);
1190
1191 fl.proto = IPPROTO_TCP;
1192 fl.oif = tcp_v6_iif(skb);
1193 fl.fl_ip_dport = t1->dest;
1194 fl.fl_ip_sport = t1->source;
1195
1196 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1197 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1198 dst_release(buff->dst);
1199 return;
1200 }
1201 ip6_xmit(NULL, buff, &fl, NULL, 0);
1202 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1203 return;
1204 }
1205
1206 kfree_skb(buff);
1207}
1208
1209static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1210{
1211 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1212
1213 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1214 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1215
1216 tcp_tw_put(tw);
1217}
1218
1219static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1220{
1221 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1222}
1223
1224
1225static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1226{
1227 struct open_request *req, **prev;
1228 struct tcphdr *th = skb->h.th;
1229 struct tcp_sock *tp = tcp_sk(sk);
1230 struct sock *nsk;
1231
1232 /* Find possible connection requests. */
1233 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1234 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1235 if (req)
1236 return tcp_check_req(sk, skb, req, prev);
1237
1238 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1239 th->source,
1240 &skb->nh.ipv6h->daddr,
1241 ntohs(th->dest),
1242 tcp_v6_iif(skb));
1243
1244 if (nsk) {
1245 if (nsk->sk_state != TCP_TIME_WAIT) {
1246 bh_lock_sock(nsk);
1247 return nsk;
1248 }
1249 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1250 return NULL;
1251 }
1252
1253#if 0 /*def CONFIG_SYN_COOKIES*/
1254 if (!th->rst && !th->syn && th->ack)
1255 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1256#endif
1257 return sk;
1258}
1259
1260static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1261{
1262 struct tcp_sock *tp = tcp_sk(sk);
1263 struct tcp_listen_opt *lopt = tp->listen_opt;
1264 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1265
1266 req->sk = NULL;
1267 req->expires = jiffies + TCP_TIMEOUT_INIT;
1268 req->retrans = 0;
1269 req->dl_next = lopt->syn_table[h];
1270
1271 write_lock(&tp->syn_wait_lock);
1272 lopt->syn_table[h] = req;
1273 write_unlock(&tp->syn_wait_lock);
1274
1275 tcp_synq_added(sk);
1276}
1277
1278
1279/* FIXME: this is substantially similar to the ipv4 code.
1280 * Can some kind of merge be done? -- erics
1281 */
1282static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1283{
1284 struct ipv6_pinfo *np = inet6_sk(sk);
1285 struct tcp_options_received tmp_opt;
1286 struct tcp_sock *tp = tcp_sk(sk);
1287 struct open_request *req = NULL;
1288 __u32 isn = TCP_SKB_CB(skb)->when;
1289
1290 if (skb->protocol == htons(ETH_P_IP))
1291 return tcp_v4_conn_request(sk, skb);
1292
1293 if (!ipv6_unicast_destination(skb))
1294 goto drop;
1295
1296 /*
1297 * There are no SYN attacks on IPv6, yet...
1298 */
1299 if (tcp_synq_is_full(sk) && !isn) {
1300 if (net_ratelimit())
1301 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1302 goto drop;
1303 }
1304
1305 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1306 goto drop;
1307
1308 req = tcp_openreq_alloc();
1309 if (req == NULL)
1310 goto drop;
1311
1312 tcp_clear_options(&tmp_opt);
1313 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1314 tmp_opt.user_mss = tp->rx_opt.user_mss;
1315
1316 tcp_parse_options(skb, &tmp_opt, 0);
1317
1318 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1319 tcp_openreq_init(req, &tmp_opt, skb);
1320
1321 req->class = &or_ipv6;
1322 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1323 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1324 TCP_ECN_create_request(req, skb->h.th);
1325 req->af.v6_req.pktopts = NULL;
1326 if (ipv6_opt_accepted(sk, skb) ||
1327 np->rxopt.bits.rxinfo ||
1328 np->rxopt.bits.rxhlim) {
1329 atomic_inc(&skb->users);
1330 req->af.v6_req.pktopts = skb;
1331 }
1332 req->af.v6_req.iif = sk->sk_bound_dev_if;
1333
1334 /* So that link locals have meaning */
1335 if (!sk->sk_bound_dev_if &&
1336 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1337 req->af.v6_req.iif = tcp_v6_iif(skb);
1338
1339 if (isn == 0)
1340 isn = tcp_v6_init_sequence(sk,skb);
1341
1342 req->snt_isn = isn;
1343
1344 if (tcp_v6_send_synack(sk, req, NULL))
1345 goto drop;
1346
1347 tcp_v6_synq_add(sk, req);
1348
1349 return 0;
1350
1351drop:
1352 if (req)
1353 tcp_openreq_free(req);
1354
1355 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1356 return 0; /* don't send reset */
1357}
1358
1359static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1360 struct open_request *req,
1361 struct dst_entry *dst)
1362{
1363 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1364 struct tcp6_sock *newtcp6sk;
1365 struct inet_sock *newinet;
1366 struct tcp_sock *newtp;
1367 struct sock *newsk;
1368 struct ipv6_txoptions *opt;
1369
1370 if (skb->protocol == htons(ETH_P_IP)) {
1371 /*
1372 * v6 mapped
1373 */
1374
1375 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1376
1377 if (newsk == NULL)
1378 return NULL;
1379
1380 newtcp6sk = (struct tcp6_sock *)newsk;
1381 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1382
1383 newinet = inet_sk(newsk);
1384 newnp = inet6_sk(newsk);
1385 newtp = tcp_sk(newsk);
1386
1387 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1388
1389 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1390 newinet->daddr);
1391
1392 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1393 newinet->saddr);
1394
1395 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1396
1397 newtp->af_specific = &ipv6_mapped;
1398 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1399 newnp->pktoptions = NULL;
1400 newnp->opt = NULL;
1401 newnp->mcast_oif = tcp_v6_iif(skb);
1402 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1403
1404 /* Charge newly allocated IPv6 socket. Though it is mapped,
1405 * it is IPv6 yet.
1406 */
1407#ifdef INET_REFCNT_DEBUG
1408 atomic_inc(&inet6_sock_nr);
1409#endif
1410
1411 /* It is tricky place. Until this moment IPv4 tcp
1412 worked with IPv6 af_tcp.af_specific.
1413 Sync it now.
1414 */
1415 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1416
1417 return newsk;
1418 }
1419
1420 opt = np->opt;
1421
1422 if (sk_acceptq_is_full(sk))
1423 goto out_overflow;
1424
1425 if (np->rxopt.bits.srcrt == 2 &&
1426 opt == NULL && req->af.v6_req.pktopts) {
1427 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1428 if (rxopt->srcrt)
1429 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1430 }
1431
1432 if (dst == NULL) {
1433 struct in6_addr *final_p = NULL, final;
1434 struct flowi fl;
1435
1436 memset(&fl, 0, sizeof(fl));
1437 fl.proto = IPPROTO_TCP;
1438 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1439 if (opt && opt->srcrt) {
1440 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1441 ipv6_addr_copy(&final, &fl.fl6_dst);
1442 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1443 final_p = &final;
1444 }
1445 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1446 fl.oif = sk->sk_bound_dev_if;
1447 fl.fl_ip_dport = req->rmt_port;
1448 fl.fl_ip_sport = inet_sk(sk)->sport;
1449
1450 if (ip6_dst_lookup(sk, &dst, &fl))
1451 goto out;
1452
1453 if (final_p)
1454 ipv6_addr_copy(&fl.fl6_dst, final_p);
1455
1456 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1457 goto out;
1458 }
1459
1460 newsk = tcp_create_openreq_child(sk, req, skb);
1461 if (newsk == NULL)
1462 goto out;
1463
1464 /* Charge newly allocated IPv6 socket */
1465#ifdef INET_REFCNT_DEBUG
1466 atomic_inc(&inet6_sock_nr);
1467#endif
1468
1469 ip6_dst_store(newsk, dst, NULL);
1470 newsk->sk_route_caps = dst->dev->features &
1471 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1472
1473 newtcp6sk = (struct tcp6_sock *)newsk;
1474 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1475
1476 newtp = tcp_sk(newsk);
1477 newinet = inet_sk(newsk);
1478 newnp = inet6_sk(newsk);
1479
1480 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1481
1482 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1483 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1484 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1485 newsk->sk_bound_dev_if = req->af.v6_req.iif;
1486
1487 /* Now IPv6 options...
1488
1489 First: no IPv4 options.
1490 */
1491 newinet->opt = NULL;
1492
1493 /* Clone RX bits */
1494 newnp->rxopt.all = np->rxopt.all;
1495
1496 /* Clone pktoptions received with SYN */
1497 newnp->pktoptions = NULL;
1498 if (req->af.v6_req.pktopts) {
1499 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1500 GFP_ATOMIC);
1501 kfree_skb(req->af.v6_req.pktopts);
1502 req->af.v6_req.pktopts = NULL;
1503 if (newnp->pktoptions)
1504 skb_set_owner_r(newnp->pktoptions, newsk);
1505 }
1506 newnp->opt = NULL;
1507 newnp->mcast_oif = tcp_v6_iif(skb);
1508 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1509
1510 /* Clone native IPv6 options from listening socket (if any)
1511
1512 Yes, keeping reference count would be much more clever,
1513 but we make one more one thing there: reattach optmem
1514 to newsk.
1515 */
1516 if (opt) {
1517 newnp->opt = ipv6_dup_options(newsk, opt);
1518 if (opt != np->opt)
1519 sock_kfree_s(sk, opt, opt->tot_len);
1520 }
1521
1522 newtp->ext_header_len = 0;
1523 if (newnp->opt)
1524 newtp->ext_header_len = newnp->opt->opt_nflen +
1525 newnp->opt->opt_flen;
1526
1527 tcp_sync_mss(newsk, dst_mtu(dst));
1528 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1529 tcp_initialize_rcv_mss(newsk);
1530
1531 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1532
1533 __tcp_v6_hash(newsk);
1534 tcp_inherit_port(sk, newsk);
1535
1536 return newsk;
1537
1538out_overflow:
1539 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1540out:
1541 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1542 if (opt && opt != np->opt)
1543 sock_kfree_s(sk, opt, opt->tot_len);
1544 dst_release(dst);
1545 return NULL;
1546}
1547
1548static int tcp_v6_checksum_init(struct sk_buff *skb)
1549{
1550 if (skb->ip_summed == CHECKSUM_HW) {
1551 skb->ip_summed = CHECKSUM_UNNECESSARY;
1552 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1553 &skb->nh.ipv6h->daddr,skb->csum))
1554 return 0;
1555 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1556 }
1557 if (skb->len <= 76) {
1558 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1559 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1560 return -1;
1561 skb->ip_summed = CHECKSUM_UNNECESSARY;
1562 } else {
1563 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1564 &skb->nh.ipv6h->daddr,0);
1565 }
1566 return 0;
1567}
1568
1569/* The socket must have it's spinlock held when we get
1570 * here.
1571 *
1572 * We have a potential double-lock case here, so even when
1573 * doing backlog processing we use the BH locking scheme.
1574 * This is because we cannot sleep with the original spinlock
1575 * held.
1576 */
1577static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1578{
1579 struct ipv6_pinfo *np = inet6_sk(sk);
1580 struct tcp_sock *tp;
1581 struct sk_buff *opt_skb = NULL;
1582
1583 /* Imagine: socket is IPv6. IPv4 packet arrives,
1584 goes to IPv4 receive handler and backlogged.
1585 From backlog it always goes here. Kerboom...
1586 Fortunately, tcp_rcv_established and rcv_established
1587 handle them correctly, but it is not case with
1588 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1589 */
1590
1591 if (skb->protocol == htons(ETH_P_IP))
1592 return tcp_v4_do_rcv(sk, skb);
1593
1594 if (sk_filter(sk, skb, 0))
1595 goto discard;
1596
1597 /*
1598 * socket locking is here for SMP purposes as backlog rcv
1599 * is currently called with bh processing disabled.
1600 */
1601
1602 /* Do Stevens' IPV6_PKTOPTIONS.
1603
1604 Yes, guys, it is the only place in our code, where we
1605 may make it not affecting IPv4.
1606 The rest of code is protocol independent,
1607 and I do not like idea to uglify IPv4.
1608
1609 Actually, all the idea behind IPV6_PKTOPTIONS
1610 looks not very well thought. For now we latch
1611 options, received in the last packet, enqueued
1612 by tcp. Feel free to propose better solution.
1613 --ANK (980728)
1614 */
1615 if (np->rxopt.all)
1616 opt_skb = skb_clone(skb, GFP_ATOMIC);
1617
1618 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1619 TCP_CHECK_TIMER(sk);
1620 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1621 goto reset;
1622 TCP_CHECK_TIMER(sk);
1623 if (opt_skb)
1624 goto ipv6_pktoptions;
1625 return 0;
1626 }
1627
1628 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1629 goto csum_err;
1630
1631 if (sk->sk_state == TCP_LISTEN) {
1632 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1633 if (!nsk)
1634 goto discard;
1635
1636 /*
1637 * Queue it on the new socket if the new socket is active,
1638 * otherwise we just shortcircuit this and continue with
1639 * the new socket..
1640 */
1641 if(nsk != sk) {
1642 if (tcp_child_process(sk, nsk, skb))
1643 goto reset;
1644 if (opt_skb)
1645 __kfree_skb(opt_skb);
1646 return 0;
1647 }
1648 }
1649
1650 TCP_CHECK_TIMER(sk);
1651 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1652 goto reset;
1653 TCP_CHECK_TIMER(sk);
1654 if (opt_skb)
1655 goto ipv6_pktoptions;
1656 return 0;
1657
1658reset:
1659 tcp_v6_send_reset(skb);
1660discard:
1661 if (opt_skb)
1662 __kfree_skb(opt_skb);
1663 kfree_skb(skb);
1664 return 0;
1665csum_err:
1666 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1667 goto discard;
1668
1669
1670ipv6_pktoptions:
1671 /* Do you ask, what is it?
1672
1673 1. skb was enqueued by tcp.
1674 2. skb is added to tail of read queue, rather than out of order.
1675 3. socket is not in passive state.
1676 4. Finally, it really contains options, which user wants to receive.
1677 */
1678 tp = tcp_sk(sk);
1679 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1680 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1681 if (np->rxopt.bits.rxinfo)
1682 np->mcast_oif = tcp_v6_iif(opt_skb);
1683 if (np->rxopt.bits.rxhlim)
1684 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1685 if (ipv6_opt_accepted(sk, opt_skb)) {
1686 skb_set_owner_r(opt_skb, sk);
1687 opt_skb = xchg(&np->pktoptions, opt_skb);
1688 } else {
1689 __kfree_skb(opt_skb);
1690 opt_skb = xchg(&np->pktoptions, NULL);
1691 }
1692 }
1693
1694 if (opt_skb)
1695 kfree_skb(opt_skb);
1696 return 0;
1697}
1698
1699static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1700{
1701 struct sk_buff *skb = *pskb;
1702 struct tcphdr *th;
1703 struct sock *sk;
1704 int ret;
1705
1706 if (skb->pkt_type != PACKET_HOST)
1707 goto discard_it;
1708
1709 /*
1710 * Count it even if it's bad.
1711 */
1712 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1713
1714 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1715 goto discard_it;
1716
1717 th = skb->h.th;
1718
1719 if (th->doff < sizeof(struct tcphdr)/4)
1720 goto bad_packet;
1721 if (!pskb_may_pull(skb, th->doff*4))
1722 goto discard_it;
1723
1724 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1725 tcp_v6_checksum_init(skb) < 0))
1726 goto bad_packet;
1727
1728 th = skb->h.th;
1729 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1730 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1731 skb->len - th->doff*4);
1732 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1733 TCP_SKB_CB(skb)->when = 0;
1734 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1735 TCP_SKB_CB(skb)->sacked = 0;
1736
1737 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1738 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1739
1740 if (!sk)
1741 goto no_tcp_socket;
1742
1743process:
1744 if (sk->sk_state == TCP_TIME_WAIT)
1745 goto do_time_wait;
1746
1747 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1748 goto discard_and_relse;
1749
1750 if (sk_filter(sk, skb, 0))
1751 goto discard_and_relse;
1752
1753 skb->dev = NULL;
1754
1755 bh_lock_sock(sk);
1756 ret = 0;
1757 if (!sock_owned_by_user(sk)) {
1758 if (!tcp_prequeue(sk, skb))
1759 ret = tcp_v6_do_rcv(sk, skb);
1760 } else
1761 sk_add_backlog(sk, skb);
1762 bh_unlock_sock(sk);
1763
1764 sock_put(sk);
1765 return ret ? -1 : 0;
1766
1767no_tcp_socket:
1768 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1769 goto discard_it;
1770
1771 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1772bad_packet:
1773 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1774 } else {
1775 tcp_v6_send_reset(skb);
1776 }
1777
1778discard_it:
1779
1780 /*
1781 * Discard frame
1782 */
1783
1784 kfree_skb(skb);
1785 return 0;
1786
1787discard_and_relse:
1788 sock_put(sk);
1789 goto discard_it;
1790
1791do_time_wait:
1792 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1793 tcp_tw_put((struct tcp_tw_bucket *) sk);
1794 goto discard_it;
1795 }
1796
1797 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1798 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1799 tcp_tw_put((struct tcp_tw_bucket *) sk);
1800 goto discard_it;
1801 }
1802
1803 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1804 skb, th, skb->len)) {
1805 case TCP_TW_SYN:
1806 {
1807 struct sock *sk2;
1808
1809 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1810 if (sk2 != NULL) {
1811 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1812 tcp_tw_put((struct tcp_tw_bucket *)sk);
1813 sk = sk2;
1814 goto process;
1815 }
1816 /* Fall through to ACK */
1817 }
1818 case TCP_TW_ACK:
1819 tcp_v6_timewait_ack(sk, skb);
1820 break;
1821 case TCP_TW_RST:
1822 goto no_tcp_socket;
1823 case TCP_TW_SUCCESS:;
1824 }
1825 goto discard_it;
1826}
1827
1828static int tcp_v6_rebuild_header(struct sock *sk)
1829{
1830 int err;
1831 struct dst_entry *dst;
1832 struct ipv6_pinfo *np = inet6_sk(sk);
1833
1834 dst = __sk_dst_check(sk, np->dst_cookie);
1835
1836 if (dst == NULL) {
1837 struct inet_sock *inet = inet_sk(sk);
1838 struct in6_addr *final_p = NULL, final;
1839 struct flowi fl;
1840
1841 memset(&fl, 0, sizeof(fl));
1842 fl.proto = IPPROTO_TCP;
1843 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1844 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1845 fl.fl6_flowlabel = np->flow_label;
1846 fl.oif = sk->sk_bound_dev_if;
1847 fl.fl_ip_dport = inet->dport;
1848 fl.fl_ip_sport = inet->sport;
1849
1850 if (np->opt && np->opt->srcrt) {
1851 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1852 ipv6_addr_copy(&final, &fl.fl6_dst);
1853 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1854 final_p = &final;
1855 }
1856
1857 err = ip6_dst_lookup(sk, &dst, &fl);
1858 if (err) {
1859 sk->sk_route_caps = 0;
1860 return err;
1861 }
1862 if (final_p)
1863 ipv6_addr_copy(&fl.fl6_dst, final_p);
1864
1865 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1866 sk->sk_err_soft = -err;
1867 dst_release(dst);
1868 return err;
1869 }
1870
1871 ip6_dst_store(sk, dst, NULL);
1872 sk->sk_route_caps = dst->dev->features &
1873 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1874 }
1875
1876 return 0;
1877}
1878
1879static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1880{
1881 struct sock *sk = skb->sk;
1882 struct inet_sock *inet = inet_sk(sk);
1883 struct ipv6_pinfo *np = inet6_sk(sk);
1884 struct flowi fl;
1885 struct dst_entry *dst;
1886 struct in6_addr *final_p = NULL, final;
1887
1888 memset(&fl, 0, sizeof(fl));
1889 fl.proto = IPPROTO_TCP;
1890 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1891 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1892 fl.fl6_flowlabel = np->flow_label;
1893 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1894 fl.oif = sk->sk_bound_dev_if;
1895 fl.fl_ip_sport = inet->sport;
1896 fl.fl_ip_dport = inet->dport;
1897
1898 if (np->opt && np->opt->srcrt) {
1899 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1900 ipv6_addr_copy(&final, &fl.fl6_dst);
1901 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1902 final_p = &final;
1903 }
1904
1905 dst = __sk_dst_check(sk, np->dst_cookie);
1906
1907 if (dst == NULL) {
1908 int err = ip6_dst_lookup(sk, &dst, &fl);
1909
1910 if (err) {
1911 sk->sk_err_soft = -err;
1912 return err;
1913 }
1914
1915 if (final_p)
1916 ipv6_addr_copy(&fl.fl6_dst, final_p);
1917
1918 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1919 sk->sk_route_caps = 0;
1920 dst_release(dst);
1921 return err;
1922 }
1923
1924 ip6_dst_store(sk, dst, NULL);
1925 sk->sk_route_caps = dst->dev->features &
1926 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1927 }
1928
1929 skb->dst = dst_clone(dst);
1930
1931 /* Restore final destination back after routing done */
1932 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1933
1934 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1935}
1936
1937static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1938{
1939 struct ipv6_pinfo *np = inet6_sk(sk);
1940 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1941
1942 sin6->sin6_family = AF_INET6;
1943 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1944 sin6->sin6_port = inet_sk(sk)->dport;
1945 /* We do not store received flowlabel for TCP */
1946 sin6->sin6_flowinfo = 0;
1947 sin6->sin6_scope_id = 0;
1948 if (sk->sk_bound_dev_if &&
1949 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1950 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1951}
1952
1953static int tcp_v6_remember_stamp(struct sock *sk)
1954{
1955 /* Alas, not yet... */
1956 return 0;
1957}
1958
1959static struct tcp_func ipv6_specific = {
1960 .queue_xmit = tcp_v6_xmit,
1961 .send_check = tcp_v6_send_check,
1962 .rebuild_header = tcp_v6_rebuild_header,
1963 .conn_request = tcp_v6_conn_request,
1964 .syn_recv_sock = tcp_v6_syn_recv_sock,
1965 .remember_stamp = tcp_v6_remember_stamp,
1966 .net_header_len = sizeof(struct ipv6hdr),
1967
1968 .setsockopt = ipv6_setsockopt,
1969 .getsockopt = ipv6_getsockopt,
1970 .addr2sockaddr = v6_addr2sockaddr,
1971 .sockaddr_len = sizeof(struct sockaddr_in6)
1972};
1973
1974/*
1975 * TCP over IPv4 via INET6 API
1976 */
1977
1978static struct tcp_func ipv6_mapped = {
1979 .queue_xmit = ip_queue_xmit,
1980 .send_check = tcp_v4_send_check,
1981 .rebuild_header = tcp_v4_rebuild_header,
1982 .conn_request = tcp_v6_conn_request,
1983 .syn_recv_sock = tcp_v6_syn_recv_sock,
1984 .remember_stamp = tcp_v4_remember_stamp,
1985 .net_header_len = sizeof(struct iphdr),
1986
1987 .setsockopt = ipv6_setsockopt,
1988 .getsockopt = ipv6_getsockopt,
1989 .addr2sockaddr = v6_addr2sockaddr,
1990 .sockaddr_len = sizeof(struct sockaddr_in6)
1991};
1992
1993
1994
1995/* NOTE: A lot of things set to zero explicitly by call to
1996 * sk_alloc() so need not be done here.
1997 */
1998static int tcp_v6_init_sock(struct sock *sk)
1999{
2000 struct tcp_sock *tp = tcp_sk(sk);
2001
2002 skb_queue_head_init(&tp->out_of_order_queue);
2003 tcp_init_xmit_timers(sk);
2004 tcp_prequeue_init(tp);
2005
2006 tp->rto = TCP_TIMEOUT_INIT;
2007 tp->mdev = TCP_TIMEOUT_INIT;
2008
2009 /* So many TCP implementations out there (incorrectly) count the
2010 * initial SYN frame in their delayed-ACK and congestion control
2011 * algorithms that we must have the following bandaid to talk
2012 * efficiently to them. -DaveM
2013 */
2014 tp->snd_cwnd = 2;
2015
2016 /* See draft-stevens-tcpca-spec-01 for discussion of the
2017 * initialization of these values.
2018 */
2019 tp->snd_ssthresh = 0x7fffffff;
2020 tp->snd_cwnd_clamp = ~0;
2021 tp->mss_cache_std = tp->mss_cache = 536;
2022
2023 tp->reordering = sysctl_tcp_reordering;
2024
2025 sk->sk_state = TCP_CLOSE;
2026
2027 tp->af_specific = &ipv6_specific;
2028
2029 sk->sk_write_space = sk_stream_write_space;
2030 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2031
2032 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2033 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2034
2035 atomic_inc(&tcp_sockets_allocated);
2036
2037 return 0;
2038}
2039
2040static int tcp_v6_destroy_sock(struct sock *sk)
2041{
2042 extern int tcp_v4_destroy_sock(struct sock *sk);
2043
2044 tcp_v4_destroy_sock(sk);
2045 return inet6_destroy_sock(sk);
2046}
2047
2048/* Proc filesystem TCPv6 sock list dumping. */
2049static void get_openreq6(struct seq_file *seq,
2050 struct sock *sk, struct open_request *req, int i, int uid)
2051{
2052 struct in6_addr *dest, *src;
2053 int ttd = req->expires - jiffies;
2054
2055 if (ttd < 0)
2056 ttd = 0;
2057
2058 src = &req->af.v6_req.loc_addr;
2059 dest = &req->af.v6_req.rmt_addr;
2060 seq_printf(seq,
2061 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2062 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2063 i,
2064 src->s6_addr32[0], src->s6_addr32[1],
2065 src->s6_addr32[2], src->s6_addr32[3],
2066 ntohs(inet_sk(sk)->sport),
2067 dest->s6_addr32[0], dest->s6_addr32[1],
2068 dest->s6_addr32[2], dest->s6_addr32[3],
2069 ntohs(req->rmt_port),
2070 TCP_SYN_RECV,
2071 0,0, /* could print option size, but that is af dependent. */
2072 1, /* timers active (only the expire timer) */
2073 jiffies_to_clock_t(ttd),
2074 req->retrans,
2075 uid,
2076 0, /* non standard timer */
2077 0, /* open_requests have no inode */
2078 0, req);
2079}
2080
2081static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2082{
2083 struct in6_addr *dest, *src;
2084 __u16 destp, srcp;
2085 int timer_active;
2086 unsigned long timer_expires;
2087 struct inet_sock *inet = inet_sk(sp);
2088 struct tcp_sock *tp = tcp_sk(sp);
2089 struct ipv6_pinfo *np = inet6_sk(sp);
2090
2091 dest = &np->daddr;
2092 src = &np->rcv_saddr;
2093 destp = ntohs(inet->dport);
2094 srcp = ntohs(inet->sport);
2095 if (tp->pending == TCP_TIME_RETRANS) {
2096 timer_active = 1;
2097 timer_expires = tp->timeout;
2098 } else if (tp->pending == TCP_TIME_PROBE0) {
2099 timer_active = 4;
2100 timer_expires = tp->timeout;
2101 } else if (timer_pending(&sp->sk_timer)) {
2102 timer_active = 2;
2103 timer_expires = sp->sk_timer.expires;
2104 } else {
2105 timer_active = 0;
2106 timer_expires = jiffies;
2107 }
2108
2109 seq_printf(seq,
2110 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2111 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2112 i,
2113 src->s6_addr32[0], src->s6_addr32[1],
2114 src->s6_addr32[2], src->s6_addr32[3], srcp,
2115 dest->s6_addr32[0], dest->s6_addr32[1],
2116 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2117 sp->sk_state,
2118 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2119 timer_active,
2120 jiffies_to_clock_t(timer_expires - jiffies),
2121 tp->retransmits,
2122 sock_i_uid(sp),
2123 tp->probes_out,
2124 sock_i_ino(sp),
2125 atomic_read(&sp->sk_refcnt), sp,
2126 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2127 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2128 );
2129}
2130
2131static void get_timewait6_sock(struct seq_file *seq,
2132 struct tcp_tw_bucket *tw, int i)
2133{
2134 struct in6_addr *dest, *src;
2135 __u16 destp, srcp;
2136 int ttd = tw->tw_ttd - jiffies;
2137
2138 if (ttd < 0)
2139 ttd = 0;
2140
2141 dest = &tw->tw_v6_daddr;
2142 src = &tw->tw_v6_rcv_saddr;
2143 destp = ntohs(tw->tw_dport);
2144 srcp = ntohs(tw->tw_sport);
2145
2146 seq_printf(seq,
2147 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2148 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2149 i,
2150 src->s6_addr32[0], src->s6_addr32[1],
2151 src->s6_addr32[2], src->s6_addr32[3], srcp,
2152 dest->s6_addr32[0], dest->s6_addr32[1],
2153 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2154 tw->tw_substate, 0, 0,
2155 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2156 atomic_read(&tw->tw_refcnt), tw);
2157}
2158
2159#ifdef CONFIG_PROC_FS
2160static int tcp6_seq_show(struct seq_file *seq, void *v)
2161{
2162 struct tcp_iter_state *st;
2163
2164 if (v == SEQ_START_TOKEN) {
2165 seq_puts(seq,
2166 " sl "
2167 "local_address "
2168 "remote_address "
2169 "st tx_queue rx_queue tr tm->when retrnsmt"
2170 " uid timeout inode\n");
2171 goto out;
2172 }
2173 st = seq->private;
2174
2175 switch (st->state) {
2176 case TCP_SEQ_STATE_LISTENING:
2177 case TCP_SEQ_STATE_ESTABLISHED:
2178 get_tcp6_sock(seq, v, st->num);
2179 break;
2180 case TCP_SEQ_STATE_OPENREQ:
2181 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2182 break;
2183 case TCP_SEQ_STATE_TIME_WAIT:
2184 get_timewait6_sock(seq, v, st->num);
2185 break;
2186 }
2187out:
2188 return 0;
2189}
2190
2191static struct file_operations tcp6_seq_fops;
2192static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2193 .owner = THIS_MODULE,
2194 .name = "tcp6",
2195 .family = AF_INET6,
2196 .seq_show = tcp6_seq_show,
2197 .seq_fops = &tcp6_seq_fops,
2198};
2199
2200int __init tcp6_proc_init(void)
2201{
2202 return tcp_proc_register(&tcp6_seq_afinfo);
2203}
2204
2205void tcp6_proc_exit(void)
2206{
2207 tcp_proc_unregister(&tcp6_seq_afinfo);
2208}
2209#endif
2210
2211struct proto tcpv6_prot = {
2212 .name = "TCPv6",
2213 .owner = THIS_MODULE,
2214 .close = tcp_close,
2215 .connect = tcp_v6_connect,
2216 .disconnect = tcp_disconnect,
2217 .accept = tcp_accept,
2218 .ioctl = tcp_ioctl,
2219 .init = tcp_v6_init_sock,
2220 .destroy = tcp_v6_destroy_sock,
2221 .shutdown = tcp_shutdown,
2222 .setsockopt = tcp_setsockopt,
2223 .getsockopt = tcp_getsockopt,
2224 .sendmsg = tcp_sendmsg,
2225 .recvmsg = tcp_recvmsg,
2226 .backlog_rcv = tcp_v6_do_rcv,
2227 .hash = tcp_v6_hash,
2228 .unhash = tcp_unhash,
2229 .get_port = tcp_v6_get_port,
2230 .enter_memory_pressure = tcp_enter_memory_pressure,
2231 .sockets_allocated = &tcp_sockets_allocated,
2232 .memory_allocated = &tcp_memory_allocated,
2233 .memory_pressure = &tcp_memory_pressure,
2234 .sysctl_mem = sysctl_tcp_mem,
2235 .sysctl_wmem = sysctl_tcp_wmem,
2236 .sysctl_rmem = sysctl_tcp_rmem,
2237 .max_header = MAX_TCP_HEADER,
2238 .obj_size = sizeof(struct tcp6_sock),
2239};
2240
2241static struct inet6_protocol tcpv6_protocol = {
2242 .handler = tcp_v6_rcv,
2243 .err_handler = tcp_v6_err,
2244 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2245};
2246
2247extern struct proto_ops inet6_stream_ops;
2248
2249static struct inet_protosw tcpv6_protosw = {
2250 .type = SOCK_STREAM,
2251 .protocol = IPPROTO_TCP,
2252 .prot = &tcpv6_prot,
2253 .ops = &inet6_stream_ops,
2254 .capability = -1,
2255 .no_check = 0,
2256 .flags = INET_PROTOSW_PERMANENT,
2257};
2258
2259void __init tcpv6_init(void)
2260{
2261 /* register inet6 protocol */
2262 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2263 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2264 inet6_register_protosw(&tcpv6_protosw);
2265}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
new file mode 100644
index 000000000000..e251d0ba4f39
--- /dev/null
+++ b/net/ipv6/udp.c
@@ -0,0 +1,1075 @@
1/*
2 * UDP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/ipv4/udp.c
9 *
10 * $Id: udp.c,v 1.65 2002/02/01 22:01:04 davem Exp $
11 *
12 * Fixes:
13 * Hideaki YOSHIFUJI : sin6_scope_id support
14 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
15 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
16 * a single port at the same time.
17 * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file.
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 */
25
26#include <linux/config.h>
27#include <linux/errno.h>
28#include <linux/types.h>
29#include <linux/socket.h>
30#include <linux/sockios.h>
31#include <linux/sched.h>
32#include <linux/net.h>
33#include <linux/in6.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/ipv6.h>
37#include <linux/icmpv6.h>
38#include <linux/init.h>
39#include <asm/uaccess.h>
40
41#include <net/sock.h>
42#include <net/snmp.h>
43
44#include <net/ipv6.h>
45#include <net/ndisc.h>
46#include <net/protocol.h>
47#include <net/transp_v6.h>
48#include <net/ip6_route.h>
49#include <net/addrconf.h>
50#include <net/ip.h>
51#include <net/udp.h>
52#include <net/raw.h>
53#include <net/inet_common.h>
54
55#include <net/ip6_checksum.h>
56#include <net/xfrm.h>
57
58#include <linux/proc_fs.h>
59#include <linux/seq_file.h>
60
61DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6);
62
63/* Grrr, addr_type already calculated by caller, but I don't want
64 * to add some silly "cookie" argument to this method just for that.
65 */
66static int udp_v6_get_port(struct sock *sk, unsigned short snum)
67{
68 struct sock *sk2;
69 struct hlist_node *node;
70
71 write_lock_bh(&udp_hash_lock);
72 if (snum == 0) {
73 int best_size_so_far, best, result, i;
74
75 if (udp_port_rover > sysctl_local_port_range[1] ||
76 udp_port_rover < sysctl_local_port_range[0])
77 udp_port_rover = sysctl_local_port_range[0];
78 best_size_so_far = 32767;
79 best = result = udp_port_rover;
80 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
81 int size;
82 struct hlist_head *list;
83
84 list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
85 if (hlist_empty(list)) {
86 if (result > sysctl_local_port_range[1])
87 result = sysctl_local_port_range[0] +
88 ((result - sysctl_local_port_range[0]) &
89 (UDP_HTABLE_SIZE - 1));
90 goto gotit;
91 }
92 size = 0;
93 sk_for_each(sk2, node, list)
94 if (++size >= best_size_so_far)
95 goto next;
96 best_size_so_far = size;
97 best = result;
98 next:;
99 }
100 result = best;
101 for(;; result += UDP_HTABLE_SIZE) {
102 if (result > sysctl_local_port_range[1])
103 result = sysctl_local_port_range[0]
104 + ((result - sysctl_local_port_range[0]) &
105 (UDP_HTABLE_SIZE - 1));
106 if (!udp_lport_inuse(result))
107 break;
108 }
109gotit:
110 udp_port_rover = snum = result;
111 } else {
112 sk_for_each(sk2, node,
113 &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
114 if (inet_sk(sk2)->num == snum &&
115 sk2 != sk &&
116 (!sk2->sk_bound_dev_if ||
117 !sk->sk_bound_dev_if ||
118 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
119 (!sk2->sk_reuse || !sk->sk_reuse) &&
120 ipv6_rcv_saddr_equal(sk, sk2))
121 goto fail;
122 }
123 }
124
125 inet_sk(sk)->num = snum;
126 if (sk_unhashed(sk)) {
127 sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
128 sock_prot_inc_use(sk->sk_prot);
129 }
130 write_unlock_bh(&udp_hash_lock);
131 return 0;
132
133fail:
134 write_unlock_bh(&udp_hash_lock);
135 return 1;
136}
137
138static void udp_v6_hash(struct sock *sk)
139{
140 BUG();
141}
142
143static void udp_v6_unhash(struct sock *sk)
144{
145 write_lock_bh(&udp_hash_lock);
146 if (sk_del_node_init(sk)) {
147 inet_sk(sk)->num = 0;
148 sock_prot_dec_use(sk->sk_prot);
149 }
150 write_unlock_bh(&udp_hash_lock);
151}
152
153static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport,
154 struct in6_addr *daddr, u16 dport, int dif)
155{
156 struct sock *sk, *result = NULL;
157 struct hlist_node *node;
158 unsigned short hnum = ntohs(dport);
159 int badness = -1;
160
161 read_lock(&udp_hash_lock);
162 sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
163 struct inet_sock *inet = inet_sk(sk);
164
165 if (inet->num == hnum && sk->sk_family == PF_INET6) {
166 struct ipv6_pinfo *np = inet6_sk(sk);
167 int score = 0;
168 if (inet->dport) {
169 if (inet->dport != sport)
170 continue;
171 score++;
172 }
173 if (!ipv6_addr_any(&np->rcv_saddr)) {
174 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
175 continue;
176 score++;
177 }
178 if (!ipv6_addr_any(&np->daddr)) {
179 if (!ipv6_addr_equal(&np->daddr, saddr))
180 continue;
181 score++;
182 }
183 if (sk->sk_bound_dev_if) {
184 if (sk->sk_bound_dev_if != dif)
185 continue;
186 score++;
187 }
188 if(score == 4) {
189 result = sk;
190 break;
191 } else if(score > badness) {
192 result = sk;
193 badness = score;
194 }
195 }
196 }
197 if (result)
198 sock_hold(result);
199 read_unlock(&udp_hash_lock);
200 return result;
201}
202
203/*
204 *
205 */
206
207static void udpv6_close(struct sock *sk, long timeout)
208{
209 sk_common_release(sk);
210}
211
212/*
213 * This should be easy, if there is something there we
214 * return it, otherwise we block.
215 */
216
217static int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
218 struct msghdr *msg, size_t len,
219 int noblock, int flags, int *addr_len)
220{
221 struct ipv6_pinfo *np = inet6_sk(sk);
222 struct inet_sock *inet = inet_sk(sk);
223 struct sk_buff *skb;
224 size_t copied;
225 int err;
226
227 if (addr_len)
228 *addr_len=sizeof(struct sockaddr_in6);
229
230 if (flags & MSG_ERRQUEUE)
231 return ipv6_recv_error(sk, msg, len);
232
233try_again:
234 skb = skb_recv_datagram(sk, flags, noblock, &err);
235 if (!skb)
236 goto out;
237
238 copied = skb->len - sizeof(struct udphdr);
239 if (copied > len) {
240 copied = len;
241 msg->msg_flags |= MSG_TRUNC;
242 }
243
244 if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
245 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
246 copied);
247 } else if (msg->msg_flags&MSG_TRUNC) {
248 if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
249 goto csum_copy_err;
250 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
251 copied);
252 } else {
253 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
254 if (err == -EINVAL)
255 goto csum_copy_err;
256 }
257 if (err)
258 goto out_free;
259
260 sock_recv_timestamp(msg, sk, skb);
261
262 /* Copy the address. */
263 if (msg->msg_name) {
264 struct sockaddr_in6 *sin6;
265
266 sin6 = (struct sockaddr_in6 *) msg->msg_name;
267 sin6->sin6_family = AF_INET6;
268 sin6->sin6_port = skb->h.uh->source;
269 sin6->sin6_flowinfo = 0;
270 sin6->sin6_scope_id = 0;
271
272 if (skb->protocol == htons(ETH_P_IP))
273 ipv6_addr_set(&sin6->sin6_addr, 0, 0,
274 htonl(0xffff), skb->nh.iph->saddr);
275 else {
276 ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
277 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
278 sin6->sin6_scope_id = IP6CB(skb)->iif;
279 }
280
281 }
282 if (skb->protocol == htons(ETH_P_IP)) {
283 if (inet->cmsg_flags)
284 ip_cmsg_recv(msg, skb);
285 } else {
286 if (np->rxopt.all)
287 datagram_recv_ctl(sk, msg, skb);
288 }
289
290 err = copied;
291 if (flags & MSG_TRUNC)
292 err = skb->len - sizeof(struct udphdr);
293
294out_free:
295 skb_free_datagram(sk, skb);
296out:
297 return err;
298
299csum_copy_err:
300 /* Clear queue. */
301 if (flags&MSG_PEEK) {
302 int clear = 0;
303 spin_lock_irq(&sk->sk_receive_queue.lock);
304 if (skb == skb_peek(&sk->sk_receive_queue)) {
305 __skb_unlink(skb, &sk->sk_receive_queue);
306 clear = 1;
307 }
308 spin_unlock_irq(&sk->sk_receive_queue.lock);
309 if (clear)
310 kfree_skb(skb);
311 }
312
313 skb_free_datagram(sk, skb);
314
315 if (flags & MSG_DONTWAIT) {
316 UDP6_INC_STATS_USER(UDP_MIB_INERRORS);
317 return -EAGAIN;
318 }
319 goto try_again;
320}
321
322static void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
323 int type, int code, int offset, __u32 info)
324{
325 struct ipv6_pinfo *np;
326 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
327 struct net_device *dev = skb->dev;
328 struct in6_addr *saddr = &hdr->saddr;
329 struct in6_addr *daddr = &hdr->daddr;
330 struct udphdr *uh = (struct udphdr*)(skb->data+offset);
331 struct sock *sk;
332 int err;
333
334 sk = udp_v6_lookup(daddr, uh->dest, saddr, uh->source, dev->ifindex);
335
336 if (sk == NULL)
337 return;
338
339 np = inet6_sk(sk);
340
341 if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
342 goto out;
343
344 if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
345 goto out;
346
347 if (np->recverr)
348 ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
349
350 sk->sk_err = err;
351 sk->sk_error_report(sk);
352out:
353 sock_put(sk);
354}
355
356static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
357{
358 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
359 kfree_skb(skb);
360 return -1;
361 }
362
363 if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
364 if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
365 UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
366 kfree_skb(skb);
367 return 0;
368 }
369 skb->ip_summed = CHECKSUM_UNNECESSARY;
370 }
371
372 if (sock_queue_rcv_skb(sk,skb)<0) {
373 UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
374 kfree_skb(skb);
375 return 0;
376 }
377 UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS);
378 return 0;
379}
380
381static struct sock *udp_v6_mcast_next(struct sock *sk,
382 u16 loc_port, struct in6_addr *loc_addr,
383 u16 rmt_port, struct in6_addr *rmt_addr,
384 int dif)
385{
386 struct hlist_node *node;
387 struct sock *s = sk;
388 unsigned short num = ntohs(loc_port);
389
390 sk_for_each_from(s, node) {
391 struct inet_sock *inet = inet_sk(s);
392
393 if (inet->num == num && s->sk_family == PF_INET6) {
394 struct ipv6_pinfo *np = inet6_sk(s);
395 if (inet->dport) {
396 if (inet->dport != rmt_port)
397 continue;
398 }
399 if (!ipv6_addr_any(&np->daddr) &&
400 !ipv6_addr_equal(&np->daddr, rmt_addr))
401 continue;
402
403 if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
404 continue;
405
406 if (!ipv6_addr_any(&np->rcv_saddr)) {
407 if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
408 return s;
409 continue;
410 }
411 if(!inet6_mc_check(s, loc_addr, rmt_addr))
412 continue;
413 return s;
414 }
415 }
416 return NULL;
417}
418
419/*
420 * Note: called only from the BH handler context,
421 * so we don't need to lock the hashes.
422 */
423static void udpv6_mcast_deliver(struct udphdr *uh,
424 struct in6_addr *saddr, struct in6_addr *daddr,
425 struct sk_buff *skb)
426{
427 struct sock *sk, *sk2;
428 int dif;
429
430 read_lock(&udp_hash_lock);
431 sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
432 dif = skb->dev->ifindex;
433 sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
434 if (!sk) {
435 kfree_skb(skb);
436 goto out;
437 }
438
439 sk2 = sk;
440 while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr,
441 uh->source, saddr, dif))) {
442 struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
443 if (buff)
444 udpv6_queue_rcv_skb(sk2, buff);
445 }
446 udpv6_queue_rcv_skb(sk, skb);
447out:
448 read_unlock(&udp_hash_lock);
449}
450
451static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
452{
453 struct sk_buff *skb = *pskb;
454 struct sock *sk;
455 struct udphdr *uh;
456 struct net_device *dev = skb->dev;
457 struct in6_addr *saddr, *daddr;
458 u32 ulen = 0;
459
460 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
461 goto short_packet;
462
463 saddr = &skb->nh.ipv6h->saddr;
464 daddr = &skb->nh.ipv6h->daddr;
465 uh = skb->h.uh;
466
467 ulen = ntohs(uh->len);
468
469 /* Check for jumbo payload */
470 if (ulen == 0)
471 ulen = skb->len;
472
473 if (ulen > skb->len || ulen < sizeof(*uh))
474 goto short_packet;
475
476 if (uh->check == 0) {
477 /* RFC 2460 section 8.1 says that we SHOULD log
478 this error. Well, it is reasonable.
479 */
480 LIMIT_NETDEBUG(
481 printk(KERN_INFO "IPv6: udp checksum is 0\n"));
482 goto discard;
483 }
484
485 if (ulen < skb->len) {
486 if (__pskb_trim(skb, ulen))
487 goto discard;
488 saddr = &skb->nh.ipv6h->saddr;
489 daddr = &skb->nh.ipv6h->daddr;
490 uh = skb->h.uh;
491 }
492
493 if (skb->ip_summed==CHECKSUM_HW) {
494 skb->ip_summed = CHECKSUM_UNNECESSARY;
495 if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) {
496 LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v6 hw csum failure.\n"));
497 skb->ip_summed = CHECKSUM_NONE;
498 }
499 }
500 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
501 skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0);
502
503 /*
504 * Multicast receive code
505 */
506 if (ipv6_addr_is_multicast(daddr)) {
507 udpv6_mcast_deliver(uh, saddr, daddr, skb);
508 return 0;
509 }
510
511 /* Unicast */
512
513 /*
514 * check socket cache ... must talk to Alan about his plans
515 * for sock caches... i'll skip this for now.
516 */
517 sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, dev->ifindex);
518
519 if (sk == NULL) {
520 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
521 goto discard;
522
523 if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
524 (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
525 goto discard;
526 UDP6_INC_STATS_BH(UDP_MIB_NOPORTS);
527
528 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
529
530 kfree_skb(skb);
531 return(0);
532 }
533
534 /* deliver */
535
536 udpv6_queue_rcv_skb(sk, skb);
537 sock_put(sk);
538 return(0);
539
540short_packet:
541 if (net_ratelimit())
542 printk(KERN_DEBUG "UDP: short packet: %d/%u\n", ulen, skb->len);
543
544discard:
545 UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
546 kfree_skb(skb);
547 return(0);
548}
549/*
550 * Throw away all pending data and cancel the corking. Socket is locked.
551 */
552static void udp_v6_flush_pending_frames(struct sock *sk)
553{
554 struct udp_sock *up = udp_sk(sk);
555
556 if (up->pending) {
557 up->len = 0;
558 up->pending = 0;
559 ip6_flush_pending_frames(sk);
560 }
561}
562
563/*
564 * Sending
565 */
566
567static int udp_v6_push_pending_frames(struct sock *sk, struct udp_sock *up)
568{
569 struct sk_buff *skb;
570 struct udphdr *uh;
571 struct inet_sock *inet = inet_sk(sk);
572 struct flowi *fl = &inet->cork.fl;
573 int err = 0;
574
575 /* Grab the skbuff where UDP header space exists. */
576 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
577 goto out;
578
579 /*
580 * Create a UDP header
581 */
582 uh = skb->h.uh;
583 uh->source = fl->fl_ip_sport;
584 uh->dest = fl->fl_ip_dport;
585 uh->len = htons(up->len);
586 uh->check = 0;
587
588 if (sk->sk_no_check == UDP_CSUM_NOXMIT) {
589 skb->ip_summed = CHECKSUM_NONE;
590 goto send;
591 }
592
593 if (skb_queue_len(&sk->sk_write_queue) == 1) {
594 skb->csum = csum_partial((char *)uh,
595 sizeof(struct udphdr), skb->csum);
596 uh->check = csum_ipv6_magic(&fl->fl6_src,
597 &fl->fl6_dst,
598 up->len, fl->proto, skb->csum);
599 } else {
600 u32 tmp_csum = 0;
601
602 skb_queue_walk(&sk->sk_write_queue, skb) {
603 tmp_csum = csum_add(tmp_csum, skb->csum);
604 }
605 tmp_csum = csum_partial((char *)uh,
606 sizeof(struct udphdr), tmp_csum);
607 tmp_csum = csum_ipv6_magic(&fl->fl6_src,
608 &fl->fl6_dst,
609 up->len, fl->proto, tmp_csum);
610 uh->check = tmp_csum;
611
612 }
613 if (uh->check == 0)
614 uh->check = -1;
615
616send:
617 err = ip6_push_pending_frames(sk);
618out:
619 up->len = 0;
620 up->pending = 0;
621 return err;
622}
623
624static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
625 struct msghdr *msg, size_t len)
626{
627 struct ipv6_txoptions opt_space;
628 struct udp_sock *up = udp_sk(sk);
629 struct inet_sock *inet = inet_sk(sk);
630 struct ipv6_pinfo *np = inet6_sk(sk);
631 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
632 struct in6_addr *daddr, *final_p = NULL, final;
633 struct ipv6_txoptions *opt = NULL;
634 struct ip6_flowlabel *flowlabel = NULL;
635 struct flowi *fl = &inet->cork.fl;
636 struct dst_entry *dst;
637 int addr_len = msg->msg_namelen;
638 int ulen = len;
639 int hlimit = -1;
640 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
641 int err;
642
643 /* destination address check */
644 if (sin6) {
645 if (addr_len < offsetof(struct sockaddr, sa_data))
646 return -EINVAL;
647
648 switch (sin6->sin6_family) {
649 case AF_INET6:
650 if (addr_len < SIN6_LEN_RFC2133)
651 return -EINVAL;
652 daddr = &sin6->sin6_addr;
653 break;
654 case AF_INET:
655 goto do_udp_sendmsg;
656 case AF_UNSPEC:
657 msg->msg_name = sin6 = NULL;
658 msg->msg_namelen = addr_len = 0;
659 daddr = NULL;
660 break;
661 default:
662 return -EINVAL;
663 }
664 } else if (!up->pending) {
665 if (sk->sk_state != TCP_ESTABLISHED)
666 return -EDESTADDRREQ;
667 daddr = &np->daddr;
668 } else
669 daddr = NULL;
670
671 if (daddr) {
672 if (ipv6_addr_type(daddr) == IPV6_ADDR_MAPPED) {
673 struct sockaddr_in sin;
674 sin.sin_family = AF_INET;
675 sin.sin_port = sin6 ? sin6->sin6_port : inet->dport;
676 sin.sin_addr.s_addr = daddr->s6_addr32[3];
677 msg->msg_name = &sin;
678 msg->msg_namelen = sizeof(sin);
679do_udp_sendmsg:
680 if (__ipv6_only_sock(sk))
681 return -ENETUNREACH;
682 return udp_sendmsg(iocb, sk, msg, len);
683 }
684 }
685
686 if (up->pending == AF_INET)
687 return udp_sendmsg(iocb, sk, msg, len);
688
689 /* Rough check on arithmetic overflow,
690 better check is made in ip6_build_xmit
691 */
692 if (len > INT_MAX - sizeof(struct udphdr))
693 return -EMSGSIZE;
694
695 if (up->pending) {
696 /*
697 * There are pending frames.
698 * The socket lock must be held while it's corked.
699 */
700 lock_sock(sk);
701 if (likely(up->pending)) {
702 if (unlikely(up->pending != AF_INET6)) {
703 release_sock(sk);
704 return -EAFNOSUPPORT;
705 }
706 dst = NULL;
707 goto do_append_data;
708 }
709 release_sock(sk);
710 }
711 ulen += sizeof(struct udphdr);
712
713 memset(fl, 0, sizeof(*fl));
714
715 if (sin6) {
716 if (sin6->sin6_port == 0)
717 return -EINVAL;
718
719 fl->fl_ip_dport = sin6->sin6_port;
720 daddr = &sin6->sin6_addr;
721
722 if (np->sndflow) {
723 fl->fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
724 if (fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
725 flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
726 if (flowlabel == NULL)
727 return -EINVAL;
728 daddr = &flowlabel->dst;
729 }
730 }
731
732 /*
733 * Otherwise it will be difficult to maintain
734 * sk->sk_dst_cache.
735 */
736 if (sk->sk_state == TCP_ESTABLISHED &&
737 ipv6_addr_equal(daddr, &np->daddr))
738 daddr = &np->daddr;
739
740 if (addr_len >= sizeof(struct sockaddr_in6) &&
741 sin6->sin6_scope_id &&
742 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
743 fl->oif = sin6->sin6_scope_id;
744 } else {
745 if (sk->sk_state != TCP_ESTABLISHED)
746 return -EDESTADDRREQ;
747
748 fl->fl_ip_dport = inet->dport;
749 daddr = &np->daddr;
750 fl->fl6_flowlabel = np->flow_label;
751 }
752
753 if (!fl->oif)
754 fl->oif = sk->sk_bound_dev_if;
755
756 if (msg->msg_controllen) {
757 opt = &opt_space;
758 memset(opt, 0, sizeof(struct ipv6_txoptions));
759 opt->tot_len = sizeof(*opt);
760
761 err = datagram_send_ctl(msg, fl, opt, &hlimit);
762 if (err < 0) {
763 fl6_sock_release(flowlabel);
764 return err;
765 }
766 if ((fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
767 flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
768 if (flowlabel == NULL)
769 return -EINVAL;
770 }
771 if (!(opt->opt_nflen|opt->opt_flen))
772 opt = NULL;
773 }
774 if (opt == NULL)
775 opt = np->opt;
776 if (flowlabel)
777 opt = fl6_merge_options(&opt_space, flowlabel, opt);
778
779 fl->proto = IPPROTO_UDP;
780 ipv6_addr_copy(&fl->fl6_dst, daddr);
781 if (ipv6_addr_any(&fl->fl6_src) && !ipv6_addr_any(&np->saddr))
782 ipv6_addr_copy(&fl->fl6_src, &np->saddr);
783 fl->fl_ip_sport = inet->sport;
784
785 /* merge ip6_build_xmit from ip6_output */
786 if (opt && opt->srcrt) {
787 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
788 ipv6_addr_copy(&final, &fl->fl6_dst);
789 ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
790 final_p = &final;
791 }
792
793 if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst))
794 fl->oif = np->mcast_oif;
795
796 err = ip6_dst_lookup(sk, &dst, fl);
797 if (err)
798 goto out;
799 if (final_p)
800 ipv6_addr_copy(&fl->fl6_dst, final_p);
801
802 if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0) {
803 dst_release(dst);
804 goto out;
805 }
806
807 if (hlimit < 0) {
808 if (ipv6_addr_is_multicast(&fl->fl6_dst))
809 hlimit = np->mcast_hops;
810 else
811 hlimit = np->hop_limit;
812 if (hlimit < 0)
813 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
814 if (hlimit < 0)
815 hlimit = ipv6_get_hoplimit(dst->dev);
816 }
817
818 if (msg->msg_flags&MSG_CONFIRM)
819 goto do_confirm;
820back_from_confirm:
821
822 lock_sock(sk);
823 if (unlikely(up->pending)) {
824 /* The socket is already corked while preparing it. */
825 /* ... which is an evident application bug. --ANK */
826 release_sock(sk);
827
828 LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n"));
829 err = -EINVAL;
830 goto out;
831 }
832
833 up->pending = AF_INET6;
834
835do_append_data:
836 up->len += ulen;
837 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, sizeof(struct udphdr),
838 hlimit, opt, fl, (struct rt6_info*)dst,
839 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
840 if (err)
841 udp_v6_flush_pending_frames(sk);
842 else if (!corkreq)
843 err = udp_v6_push_pending_frames(sk, up);
844
845 if (dst)
846 ip6_dst_store(sk, dst,
847 ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ?
848 &np->daddr : NULL);
849 if (err > 0)
850 err = np->recverr ? net_xmit_errno(err) : 0;
851 release_sock(sk);
852out:
853 fl6_sock_release(flowlabel);
854 if (!err) {
855 UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
856 return len;
857 }
858 return err;
859
860do_confirm:
861 dst_confirm(dst);
862 if (!(msg->msg_flags&MSG_PROBE) || len)
863 goto back_from_confirm;
864 err = 0;
865 goto out;
866}
867
868static int udpv6_destroy_sock(struct sock *sk)
869{
870 lock_sock(sk);
871 udp_v6_flush_pending_frames(sk);
872 release_sock(sk);
873
874 inet6_destroy_sock(sk);
875
876 return 0;
877}
878
879/*
880 * Socket option code for UDP
881 */
882static int udpv6_setsockopt(struct sock *sk, int level, int optname,
883 char __user *optval, int optlen)
884{
885 struct udp_sock *up = udp_sk(sk);
886 int val;
887 int err = 0;
888
889 if (level != SOL_UDP)
890 return ipv6_setsockopt(sk, level, optname, optval, optlen);
891
892 if(optlen<sizeof(int))
893 return -EINVAL;
894
895 if (get_user(val, (int __user *)optval))
896 return -EFAULT;
897
898 switch(optname) {
899 case UDP_CORK:
900 if (val != 0) {
901 up->corkflag = 1;
902 } else {
903 up->corkflag = 0;
904 lock_sock(sk);
905 udp_v6_push_pending_frames(sk, up);
906 release_sock(sk);
907 }
908 break;
909
910 case UDP_ENCAP:
911 switch (val) {
912 case 0:
913 up->encap_type = val;
914 break;
915 default:
916 err = -ENOPROTOOPT;
917 break;
918 }
919 break;
920
921 default:
922 err = -ENOPROTOOPT;
923 break;
924 };
925
926 return err;
927}
928
929static int udpv6_getsockopt(struct sock *sk, int level, int optname,
930 char __user *optval, int __user *optlen)
931{
932 struct udp_sock *up = udp_sk(sk);
933 int val, len;
934
935 if (level != SOL_UDP)
936 return ipv6_getsockopt(sk, level, optname, optval, optlen);
937
938 if(get_user(len,optlen))
939 return -EFAULT;
940
941 len = min_t(unsigned int, len, sizeof(int));
942
943 if(len < 0)
944 return -EINVAL;
945
946 switch(optname) {
947 case UDP_CORK:
948 val = up->corkflag;
949 break;
950
951 case UDP_ENCAP:
952 val = up->encap_type;
953 break;
954
955 default:
956 return -ENOPROTOOPT;
957 };
958
959 if(put_user(len, optlen))
960 return -EFAULT;
961 if(copy_to_user(optval, &val,len))
962 return -EFAULT;
963 return 0;
964}
965
966static struct inet6_protocol udpv6_protocol = {
967 .handler = udpv6_rcv,
968 .err_handler = udpv6_err,
969 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
970};
971
972/* ------------------------------------------------------------------------ */
973#ifdef CONFIG_PROC_FS
974
975static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket)
976{
977 struct inet_sock *inet = inet_sk(sp);
978 struct ipv6_pinfo *np = inet6_sk(sp);
979 struct in6_addr *dest, *src;
980 __u16 destp, srcp;
981
982 dest = &np->daddr;
983 src = &np->rcv_saddr;
984 destp = ntohs(inet->dport);
985 srcp = ntohs(inet->sport);
986 seq_printf(seq,
987 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
988 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n",
989 bucket,
990 src->s6_addr32[0], src->s6_addr32[1],
991 src->s6_addr32[2], src->s6_addr32[3], srcp,
992 dest->s6_addr32[0], dest->s6_addr32[1],
993 dest->s6_addr32[2], dest->s6_addr32[3], destp,
994 sp->sk_state,
995 atomic_read(&sp->sk_wmem_alloc),
996 atomic_read(&sp->sk_rmem_alloc),
997 0, 0L, 0,
998 sock_i_uid(sp), 0,
999 sock_i_ino(sp),
1000 atomic_read(&sp->sk_refcnt), sp);
1001}
1002
1003static int udp6_seq_show(struct seq_file *seq, void *v)
1004{
1005 if (v == SEQ_START_TOKEN)
1006 seq_printf(seq,
1007 " sl "
1008 "local_address "
1009 "remote_address "
1010 "st tx_queue rx_queue tr tm->when retrnsmt"
1011 " uid timeout inode\n");
1012 else
1013 udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket);
1014 return 0;
1015}
1016
1017static struct file_operations udp6_seq_fops;
1018static struct udp_seq_afinfo udp6_seq_afinfo = {
1019 .owner = THIS_MODULE,
1020 .name = "udp6",
1021 .family = AF_INET6,
1022 .seq_show = udp6_seq_show,
1023 .seq_fops = &udp6_seq_fops,
1024};
1025
1026int __init udp6_proc_init(void)
1027{
1028 return udp_proc_register(&udp6_seq_afinfo);
1029}
1030
1031void udp6_proc_exit(void) {
1032 udp_proc_unregister(&udp6_seq_afinfo);
1033}
1034#endif /* CONFIG_PROC_FS */
1035
1036/* ------------------------------------------------------------------------ */
1037
1038struct proto udpv6_prot = {
1039 .name = "UDPv6",
1040 .owner = THIS_MODULE,
1041 .close = udpv6_close,
1042 .connect = ip6_datagram_connect,
1043 .disconnect = udp_disconnect,
1044 .ioctl = udp_ioctl,
1045 .destroy = udpv6_destroy_sock,
1046 .setsockopt = udpv6_setsockopt,
1047 .getsockopt = udpv6_getsockopt,
1048 .sendmsg = udpv6_sendmsg,
1049 .recvmsg = udpv6_recvmsg,
1050 .backlog_rcv = udpv6_queue_rcv_skb,
1051 .hash = udp_v6_hash,
1052 .unhash = udp_v6_unhash,
1053 .get_port = udp_v6_get_port,
1054 .obj_size = sizeof(struct udp6_sock),
1055};
1056
1057extern struct proto_ops inet6_dgram_ops;
1058
1059static struct inet_protosw udpv6_protosw = {
1060 .type = SOCK_DGRAM,
1061 .protocol = IPPROTO_UDP,
1062 .prot = &udpv6_prot,
1063 .ops = &inet6_dgram_ops,
1064 .capability =-1,
1065 .no_check = UDP_CSUM_DEFAULT,
1066 .flags = INET_PROTOSW_PERMANENT,
1067};
1068
1069
1070void __init udpv6_init(void)
1071{
1072 if (inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP) < 0)
1073 printk(KERN_ERR "udpv6_init: Could not register protocol\n");
1074 inet6_register_protosw(&udpv6_protosw);
1075}
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
new file mode 100644
index 000000000000..28c29d78338e
--- /dev/null
+++ b/net/ipv6/xfrm6_input.c
@@ -0,0 +1,150 @@
1/*
2 * xfrm6_input.c: based on net/ipv4/xfrm4_input.c
3 *
4 * Authors:
5 * Mitsuru KANDA @USAGI
6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * YOSHIFUJI Hideaki @USAGI
9 * IPv6 support
10 */
11
12#include <linux/module.h>
13#include <linux/string.h>
14#include <net/dsfield.h>
15#include <net/inet_ecn.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/xfrm.h>
19
20static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
21{
22 struct ipv6hdr *outer_iph = skb->nh.ipv6h;
23 struct ipv6hdr *inner_iph = skb->h.ipv6h;
24
25 if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
26 IP6_ECN_set_ce(inner_iph);
27}
28
29int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
30{
31 struct sk_buff *skb = *pskb;
32 int err;
33 u32 seq;
34 struct sec_decap_state xfrm_vec[XFRM_MAX_DEPTH];
35 struct xfrm_state *x;
36 int xfrm_nr = 0;
37 int decaps = 0;
38 int nexthdr;
39 unsigned int nhoff;
40
41 nhoff = *nhoffp;
42 nexthdr = skb->nh.raw[nhoff];
43
44 seq = 0;
45 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
46 goto drop;
47
48 do {
49 struct ipv6hdr *iph = skb->nh.ipv6h;
50
51 if (xfrm_nr == XFRM_MAX_DEPTH)
52 goto drop;
53
54 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, nexthdr, AF_INET6);
55 if (x == NULL)
56 goto drop;
57 spin_lock(&x->lock);
58 if (unlikely(x->km.state != XFRM_STATE_VALID))
59 goto drop_unlock;
60
61 if (x->props.replay_window && xfrm_replay_check(x, seq))
62 goto drop_unlock;
63
64 if (xfrm_state_check_expire(x))
65 goto drop_unlock;
66
67 nexthdr = x->type->input(x, &(xfrm_vec[xfrm_nr].decap), skb);
68 if (nexthdr <= 0)
69 goto drop_unlock;
70
71 skb->nh.raw[nhoff] = nexthdr;
72
73 if (x->props.replay_window)
74 xfrm_replay_advance(x, seq);
75
76 x->curlft.bytes += skb->len;
77 x->curlft.packets++;
78
79 spin_unlock(&x->lock);
80
81 xfrm_vec[xfrm_nr++].xvec = x;
82
83 if (x->props.mode) { /* XXX */
84 if (nexthdr != IPPROTO_IPV6)
85 goto drop;
86 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
87 goto drop;
88 if (skb_cloned(skb) &&
89 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
90 goto drop;
91 if (x->props.flags & XFRM_STATE_DECAP_DSCP)
92 ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h);
93 if (!(x->props.flags & XFRM_STATE_NOECN))
94 ipip6_ecn_decapsulate(skb);
95 skb->mac.raw = memmove(skb->data - skb->mac_len,
96 skb->mac.raw, skb->mac_len);
97 skb->nh.raw = skb->data;
98 decaps = 1;
99 break;
100 }
101
102 if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) < 0)
103 goto drop;
104 } while (!err);
105
106 /* Allocate new secpath or COW existing one. */
107 if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
108 struct sec_path *sp;
109 sp = secpath_dup(skb->sp);
110 if (!sp)
111 goto drop;
112 if (skb->sp)
113 secpath_put(skb->sp);
114 skb->sp = sp;
115 }
116
117 if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
118 goto drop;
119
120 memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
121 skb->sp->len += xfrm_nr;
122 skb->ip_summed = CHECKSUM_NONE;
123
124 if (decaps) {
125 if (!(skb->dev->flags&IFF_LOOPBACK)) {
126 dst_release(skb->dst);
127 skb->dst = NULL;
128 }
129 netif_rx(skb);
130 return -1;
131 } else {
132 return 1;
133 }
134
135drop_unlock:
136 spin_unlock(&x->lock);
137 xfrm_state_put(x);
138drop:
139 while (--xfrm_nr >= 0)
140 xfrm_state_put(xfrm_vec[xfrm_nr].xvec);
141 kfree_skb(skb);
142 return -1;
143}
144
145EXPORT_SYMBOL(xfrm6_rcv_spi);
146
147int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
148{
149 return xfrm6_rcv_spi(pskb, nhoffp, 0);
150}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
new file mode 100644
index 000000000000..601a148f60f3
--- /dev/null
+++ b/net/ipv6/xfrm6_output.c
@@ -0,0 +1,143 @@
1/*
2 * xfrm6_output.c - Common IPsec encapsulation code for IPv6.
3 * Copyright (C) 2002 USAGI/WIDE Project
4 * Copyright (c) 2004 Herbert Xu <herbert@gondor.apana.org.au>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/skbuff.h>
13#include <linux/spinlock.h>
14#include <linux/icmpv6.h>
15#include <net/dsfield.h>
16#include <net/inet_ecn.h>
17#include <net/ipv6.h>
18#include <net/xfrm.h>
19
20/* Add encapsulation header.
21 *
22 * In transport mode, the IP header and mutable extension headers will be moved
23 * forward to make space for the encapsulation header.
24 *
25 * In tunnel mode, the top IP header will be constructed per RFC 2401.
26 * The following fields in it shall be filled in by x->type->output:
27 * payload_len
28 *
29 * On exit, skb->h will be set to the start of the encapsulation header to be
30 * filled in by x->type->output and skb->nh will be set to the nextheader field
31 * of the extension header directly preceding the encapsulation header, or in
32 * its absence, that of the top IP header. The value of skb->data will always
33 * point to the top IP header.
34 */
35static void xfrm6_encap(struct sk_buff *skb)
36{
37 struct dst_entry *dst = skb->dst;
38 struct xfrm_state *x = dst->xfrm;
39 struct ipv6hdr *iph, *top_iph;
40 int dsfield;
41
42 skb_push(skb, x->props.header_len);
43 iph = skb->nh.ipv6h;
44
45 if (!x->props.mode) {
46 u8 *prevhdr;
47 int hdr_len;
48
49 hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
50 skb->nh.raw = prevhdr - x->props.header_len;
51 skb->h.raw = skb->data + hdr_len;
52 memmove(skb->data, iph, hdr_len);
53 return;
54 }
55
56 skb->nh.raw = skb->data;
57 top_iph = skb->nh.ipv6h;
58 skb->nh.raw = &top_iph->nexthdr;
59 skb->h.ipv6h = top_iph + 1;
60
61 top_iph->version = 6;
62 top_iph->priority = iph->priority;
63 top_iph->flow_lbl[0] = iph->flow_lbl[0];
64 top_iph->flow_lbl[1] = iph->flow_lbl[1];
65 top_iph->flow_lbl[2] = iph->flow_lbl[2];
66 dsfield = ipv6_get_dsfield(top_iph);
67 dsfield = INET_ECN_encapsulate(dsfield, dsfield);
68 if (x->props.flags & XFRM_STATE_NOECN)
69 dsfield &= ~INET_ECN_MASK;
70 ipv6_change_dsfield(top_iph, 0, dsfield);
71 top_iph->nexthdr = IPPROTO_IPV6;
72 top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT);
73 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
74 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
75}
76
77static int xfrm6_tunnel_check_size(struct sk_buff *skb)
78{
79 int mtu, ret = 0;
80 struct dst_entry *dst = skb->dst;
81
82 mtu = dst_mtu(dst);
83 if (mtu < IPV6_MIN_MTU)
84 mtu = IPV6_MIN_MTU;
85
86 if (skb->len > mtu) {
87 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
88 ret = -EMSGSIZE;
89 }
90
91 return ret;
92}
93
94int xfrm6_output(struct sk_buff *skb)
95{
96 struct dst_entry *dst = skb->dst;
97 struct xfrm_state *x = dst->xfrm;
98 int err;
99
100 if (skb->ip_summed == CHECKSUM_HW) {
101 err = skb_checksum_help(skb, 0);
102 if (err)
103 goto error_nolock;
104 }
105
106 if (x->props.mode) {
107 err = xfrm6_tunnel_check_size(skb);
108 if (err)
109 goto error_nolock;
110 }
111
112 spin_lock_bh(&x->lock);
113 err = xfrm_state_check(x, skb);
114 if (err)
115 goto error;
116
117 xfrm6_encap(skb);
118
119 err = x->type->output(x, skb);
120 if (err)
121 goto error;
122
123 x->curlft.bytes += skb->len;
124 x->curlft.packets++;
125
126 spin_unlock_bh(&x->lock);
127
128 skb->nh.raw = skb->data;
129
130 if (!(skb->dst = dst_pop(dst))) {
131 err = -EHOSTUNREACH;
132 goto error_nolock;
133 }
134 err = NET_XMIT_BYPASS;
135
136out_exit:
137 return err;
138error:
139 spin_unlock_bh(&x->lock);
140error_nolock:
141 kfree_skb(skb);
142 goto out_exit;
143}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
new file mode 100644
index 000000000000..8a4f37de4d2d
--- /dev/null
+++ b/net/ipv6/xfrm6_policy.c
@@ -0,0 +1,295 @@
1/*
2 * xfrm6_policy.c: based on xfrm4_policy.c
3 *
4 * Authors:
5 * Mitsuru KANDA @USAGI
6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * IPv6 support
9 * YOSHIFUJI Hideaki
10 * Split up af-specific portion
11 *
12 */
13
14#include <linux/config.h>
15#include <net/xfrm.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/ip6_route.h>
19
20static struct dst_ops xfrm6_dst_ops;
21static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
22
23static struct xfrm_type_map xfrm6_type_map = { .lock = RW_LOCK_UNLOCKED };
24
25static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
26{
27 int err = 0;
28 *dst = (struct xfrm_dst*)ip6_route_output(NULL, fl);
29 if (!*dst)
30 err = -ENETUNREACH;
31 return err;
32}
33
34static struct dst_entry *
35__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
36{
37 struct dst_entry *dst;
38
39 /* Still not clear if we should set fl->fl6_{src,dst}... */
40 read_lock_bh(&policy->lock);
41 for (dst = policy->bundles; dst; dst = dst->next) {
42 struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
43 struct in6_addr fl_dst_prefix, fl_src_prefix;
44
45 ipv6_addr_prefix(&fl_dst_prefix,
46 &fl->fl6_dst,
47 xdst->u.rt6.rt6i_dst.plen);
48 ipv6_addr_prefix(&fl_src_prefix,
49 &fl->fl6_src,
50 xdst->u.rt6.rt6i_src.plen);
51 if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
52 ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
53 xfrm_bundle_ok(xdst, fl, AF_INET6)) {
54 dst_clone(dst);
55 break;
56 }
57 }
58 read_unlock_bh(&policy->lock);
59 return dst;
60}
61
62/* Allocate chain of dst_entry's, attach known xfrm's, calculate
63 * all the metrics... Shortly, bundle a bundle.
64 */
65
66static int
67__xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
68 struct flowi *fl, struct dst_entry **dst_p)
69{
70 struct dst_entry *dst, *dst_prev;
71 struct rt6_info *rt0 = (struct rt6_info*)(*dst_p);
72 struct rt6_info *rt = rt0;
73 struct in6_addr *remote = &fl->fl6_dst;
74 struct in6_addr *local = &fl->fl6_src;
75 struct flowi fl_tunnel = {
76 .nl_u = {
77 .ip6_u = {
78 .saddr = *local,
79 .daddr = *remote
80 }
81 }
82 };
83 int i;
84 int err = 0;
85 int header_len = 0;
86 int trailer_len = 0;
87
88 dst = dst_prev = NULL;
89 dst_hold(&rt->u.dst);
90
91 for (i = 0; i < nx; i++) {
92 struct dst_entry *dst1 = dst_alloc(&xfrm6_dst_ops);
93 struct xfrm_dst *xdst;
94 int tunnel = 0;
95
96 if (unlikely(dst1 == NULL)) {
97 err = -ENOBUFS;
98 dst_release(&rt->u.dst);
99 goto error;
100 }
101
102 if (!dst)
103 dst = dst1;
104 else {
105 dst_prev->child = dst1;
106 dst1->flags |= DST_NOHASH;
107 dst_clone(dst1);
108 }
109
110 xdst = (struct xfrm_dst *)dst1;
111 xdst->route = &rt->u.dst;
112
113 dst1->next = dst_prev;
114 dst_prev = dst1;
115 if (xfrm[i]->props.mode) {
116 remote = (struct in6_addr*)&xfrm[i]->id.daddr;
117 local = (struct in6_addr*)&xfrm[i]->props.saddr;
118 tunnel = 1;
119 }
120 header_len += xfrm[i]->props.header_len;
121 trailer_len += xfrm[i]->props.trailer_len;
122
123 if (tunnel) {
124 ipv6_addr_copy(&fl_tunnel.fl6_dst, remote);
125 ipv6_addr_copy(&fl_tunnel.fl6_src, local);
126 err = xfrm_dst_lookup((struct xfrm_dst **) &rt,
127 &fl_tunnel, AF_INET6);
128 if (err)
129 goto error;
130 } else
131 dst_hold(&rt->u.dst);
132 }
133
134 dst_prev->child = &rt->u.dst;
135 dst->path = &rt->u.dst;
136
137 *dst_p = dst;
138 dst = dst_prev;
139
140 dst_prev = *dst_p;
141 i = 0;
142 for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
143 struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
144
145 dst_prev->xfrm = xfrm[i++];
146 dst_prev->dev = rt->u.dst.dev;
147 if (rt->u.dst.dev)
148 dev_hold(rt->u.dst.dev);
149 dst_prev->obsolete = -1;
150 dst_prev->flags |= DST_HOST;
151 dst_prev->lastuse = jiffies;
152 dst_prev->header_len = header_len;
153 dst_prev->trailer_len = trailer_len;
154 memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
155
156 /* Copy neighbour for reachability confirmation */
157 dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour);
158 dst_prev->input = rt->u.dst.input;
159 dst_prev->output = xfrm6_output;
160 /* Sheit... I remember I did this right. Apparently,
161 * it was magically lost, so this code needs audit */
162 x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
163 x->u.rt6.rt6i_metric = rt0->rt6i_metric;
164 x->u.rt6.rt6i_node = rt0->rt6i_node;
165 x->u.rt6.rt6i_gateway = rt0->rt6i_gateway;
166 memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway));
167 x->u.rt6.rt6i_dst = rt0->rt6i_dst;
168 x->u.rt6.rt6i_src = rt0->rt6i_src;
169 header_len -= x->u.dst.xfrm->props.header_len;
170 trailer_len -= x->u.dst.xfrm->props.trailer_len;
171 }
172
173 xfrm_init_pmtu(dst);
174 return 0;
175
176error:
177 if (dst)
178 dst_free(dst);
179 return err;
180}
181
182static inline void
183_decode_session6(struct sk_buff *skb, struct flowi *fl)
184{
185 u16 offset = sizeof(struct ipv6hdr);
186 struct ipv6hdr *hdr = skb->nh.ipv6h;
187 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
188 u8 nexthdr = skb->nh.ipv6h->nexthdr;
189
190 memset(fl, 0, sizeof(struct flowi));
191 ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
192 ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
193
194 while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) {
195 switch (nexthdr) {
196 case NEXTHDR_ROUTING:
197 case NEXTHDR_HOP:
198 case NEXTHDR_DEST:
199 offset += ipv6_optlen(exthdr);
200 nexthdr = exthdr->nexthdr;
201 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
202 break;
203
204 case IPPROTO_UDP:
205 case IPPROTO_TCP:
206 case IPPROTO_SCTP:
207 if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
208 u16 *ports = (u16 *)exthdr;
209
210 fl->fl_ip_sport = ports[0];
211 fl->fl_ip_dport = ports[1];
212 }
213 fl->proto = nexthdr;
214 return;
215
216 case IPPROTO_ICMPV6:
217 if (pskb_may_pull(skb, skb->nh.raw + offset + 2 - skb->data)) {
218 u8 *icmp = (u8 *)exthdr;
219
220 fl->fl_icmp_type = icmp[0];
221 fl->fl_icmp_code = icmp[1];
222 }
223 fl->proto = nexthdr;
224 return;
225
226 /* XXX Why are there these headers? */
227 case IPPROTO_AH:
228 case IPPROTO_ESP:
229 case IPPROTO_COMP:
230 default:
231 fl->fl_ipsec_spi = 0;
232 fl->proto = nexthdr;
233 return;
234 };
235 }
236}
237
238static inline int xfrm6_garbage_collect(void)
239{
240 read_lock(&xfrm6_policy_afinfo.lock);
241 xfrm6_policy_afinfo.garbage_collect();
242 read_unlock(&xfrm6_policy_afinfo.lock);
243 return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2);
244}
245
246static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
247{
248 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
249 struct dst_entry *path = xdst->route;
250
251 path->ops->update_pmtu(path, mtu);
252}
253
254static struct dst_ops xfrm6_dst_ops = {
255 .family = AF_INET6,
256 .protocol = __constant_htons(ETH_P_IPV6),
257 .gc = xfrm6_garbage_collect,
258 .update_pmtu = xfrm6_update_pmtu,
259 .gc_thresh = 1024,
260 .entry_size = sizeof(struct xfrm_dst),
261};
262
263static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
264 .family = AF_INET6,
265 .lock = RW_LOCK_UNLOCKED,
266 .type_map = &xfrm6_type_map,
267 .dst_ops = &xfrm6_dst_ops,
268 .dst_lookup = xfrm6_dst_lookup,
269 .find_bundle = __xfrm6_find_bundle,
270 .bundle_create = __xfrm6_bundle_create,
271 .decode_session = _decode_session6,
272};
273
274static void __init xfrm6_policy_init(void)
275{
276 xfrm_policy_register_afinfo(&xfrm6_policy_afinfo);
277}
278
279static void xfrm6_policy_fini(void)
280{
281 xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo);
282}
283
284void __init xfrm6_init(void)
285{
286 xfrm6_policy_init();
287 xfrm6_state_init();
288}
289
290void xfrm6_fini(void)
291{
292 //xfrm6_input_fini();
293 xfrm6_policy_fini();
294 xfrm6_state_fini();
295}
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
new file mode 100644
index 000000000000..bf0d0abc3871
--- /dev/null
+++ b/net/ipv6/xfrm6_state.c
@@ -0,0 +1,136 @@
1/*
2 * xfrm6_state.c: based on xfrm4_state.c
3 *
4 * Authors:
5 * Mitsuru KANDA @USAGI
6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * IPv6 support
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific portion
11 *
12 */
13
14#include <net/xfrm.h>
15#include <linux/pfkeyv2.h>
16#include <linux/ipsec.h>
17#include <net/ipv6.h>
18
19static struct xfrm_state_afinfo xfrm6_state_afinfo;
20
21static void
22__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
23 struct xfrm_tmpl *tmpl,
24 xfrm_address_t *daddr, xfrm_address_t *saddr)
25{
26 /* Initialize temporary selector matching only
27 * to current session. */
28 ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst);
29 ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src);
30 x->sel.dport = xfrm_flowi_dport(fl);
31 x->sel.dport_mask = ~0;
32 x->sel.sport = xfrm_flowi_sport(fl);
33 x->sel.sport_mask = ~0;
34 x->sel.prefixlen_d = 128;
35 x->sel.prefixlen_s = 128;
36 x->sel.proto = fl->proto;
37 x->sel.ifindex = fl->oif;
38 x->id = tmpl->id;
39 if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
40 memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
41 memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
42 if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
43 memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
44 x->props.mode = tmpl->mode;
45 x->props.reqid = tmpl->reqid;
46 x->props.family = AF_INET6;
47}
48
49static struct xfrm_state *
50__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
51{
52 unsigned h = __xfrm6_spi_hash(daddr, spi, proto);
53 struct xfrm_state *x;
54
55 list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) {
56 if (x->props.family == AF_INET6 &&
57 spi == x->id.spi &&
58 ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
59 proto == x->id.proto) {
60 xfrm_state_hold(x);
61 return x;
62 }
63 }
64 return NULL;
65}
66
67static struct xfrm_state *
68__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto,
69 xfrm_address_t *daddr, xfrm_address_t *saddr,
70 int create)
71{
72 struct xfrm_state *x, *x0;
73 unsigned h = __xfrm6_dst_hash(daddr);
74
75 x0 = NULL;
76
77 list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) {
78 if (x->props.family == AF_INET6 &&
79 ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
80 mode == x->props.mode &&
81 proto == x->id.proto &&
82 ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) &&
83 reqid == x->props.reqid &&
84 x->km.state == XFRM_STATE_ACQ &&
85 !x->id.spi) {
86 x0 = x;
87 break;
88 }
89 }
90 if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
91 ipv6_addr_copy((struct in6_addr *)x0->sel.daddr.a6,
92 (struct in6_addr *)daddr);
93 ipv6_addr_copy((struct in6_addr *)x0->sel.saddr.a6,
94 (struct in6_addr *)saddr);
95 x0->sel.prefixlen_d = 128;
96 x0->sel.prefixlen_s = 128;
97 ipv6_addr_copy((struct in6_addr *)x0->props.saddr.a6,
98 (struct in6_addr *)saddr);
99 x0->km.state = XFRM_STATE_ACQ;
100 ipv6_addr_copy((struct in6_addr *)x0->id.daddr.a6,
101 (struct in6_addr *)daddr);
102 x0->id.proto = proto;
103 x0->props.family = AF_INET6;
104 x0->props.mode = mode;
105 x0->props.reqid = reqid;
106 x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
107 xfrm_state_hold(x0);
108 x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
109 add_timer(&x0->timer);
110 xfrm_state_hold(x0);
111 list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h);
112 wake_up(&km_waitq);
113 }
114 if (x0)
115 xfrm_state_hold(x0);
116 return x0;
117}
118
119static struct xfrm_state_afinfo xfrm6_state_afinfo = {
120 .family = AF_INET6,
121 .lock = RW_LOCK_UNLOCKED,
122 .init_tempsel = __xfrm6_init_tempsel,
123 .state_lookup = __xfrm6_state_lookup,
124 .find_acq = __xfrm6_find_acq,
125};
126
127void __init xfrm6_state_init(void)
128{
129 xfrm_state_register_afinfo(&xfrm6_state_afinfo);
130}
131
132void xfrm6_state_fini(void)
133{
134 xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
135}
136
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
new file mode 100644
index 000000000000..ffcadd68b951
--- /dev/null
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -0,0 +1,543 @@
1/*
2 * Copyright (C)2003,2004 USAGI/WIDE Project
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Authors Mitsuru KANDA <mk@linux-ipv6.org>
19 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
20 *
21 * Based on net/ipv4/xfrm4_tunnel.c
22 *
23 */
24#include <linux/config.h>
25#include <linux/module.h>
26#include <linux/xfrm.h>
27#include <linux/list.h>
28#include <net/ip.h>
29#include <net/xfrm.h>
30#include <net/ipv6.h>
31#include <net/protocol.h>
32#include <linux/ipv6.h>
33#include <linux/icmpv6.h>
34
35#ifdef CONFIG_IPV6_XFRM6_TUNNEL_DEBUG
36# define X6TDEBUG 3
37#else
38# define X6TDEBUG 1
39#endif
40
41#define X6TPRINTK(fmt, args...) printk(fmt, ## args)
42#define X6TNOPRINTK(fmt, args...) do { ; } while(0)
43
44#if X6TDEBUG >= 1
45# define X6TPRINTK1 X6TPRINTK
46#else
47# define X6TPRINTK1 X6TNOPRINTK
48#endif
49
50#if X6TDEBUG >= 3
51# define X6TPRINTK3 X6TPRINTK
52#else
53# define X6TPRINTK3 X6TNOPRINTK
54#endif
55
56/*
57 * xfrm_tunnel_spi things are for allocating unique id ("spi")
58 * per xfrm_address_t.
59 */
60struct xfrm6_tunnel_spi {
61 struct hlist_node list_byaddr;
62 struct hlist_node list_byspi;
63 xfrm_address_t addr;
64 u32 spi;
65 atomic_t refcnt;
66#ifdef XFRM6_TUNNEL_SPI_MAGIC
67 u32 magic;
68#endif
69};
70
71#ifdef CONFIG_IPV6_XFRM6_TUNNEL_DEBUG
72# define XFRM6_TUNNEL_SPI_MAGIC 0xdeadbeef
73#endif
74
75static DEFINE_RWLOCK(xfrm6_tunnel_spi_lock);
76
77static u32 xfrm6_tunnel_spi;
78
79#define XFRM6_TUNNEL_SPI_MIN 1
80#define XFRM6_TUNNEL_SPI_MAX 0xffffffff
81
82static kmem_cache_t *xfrm6_tunnel_spi_kmem;
83
84#define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
85#define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
86
87static struct hlist_head xfrm6_tunnel_spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE];
88static struct hlist_head xfrm6_tunnel_spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE];
89
90#ifdef XFRM6_TUNNEL_SPI_MAGIC
91static int x6spi_check_magic(const struct xfrm6_tunnel_spi *x6spi,
92 const char *name)
93{
94 if (unlikely(x6spi->magic != XFRM6_TUNNEL_SPI_MAGIC)) {
95 X6TPRINTK3(KERN_DEBUG "%s(): x6spi object "
96 "at %p has corrupted magic %08x "
97 "(should be %08x)\n",
98 name, x6spi, x6spi->magic, XFRM6_TUNNEL_SPI_MAGIC);
99 return -1;
100 }
101 return 0;
102}
103#else
104static int inline x6spi_check_magic(const struct xfrm6_tunnel_spi *x6spi,
105 const char *name)
106{
107 return 0;
108}
109#endif
110
111#define X6SPI_CHECK_MAGIC(x6spi) x6spi_check_magic((x6spi), __FUNCTION__)
112
113
114static unsigned inline xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr)
115{
116 unsigned h;
117
118 X6TPRINTK3(KERN_DEBUG "%s(addr=%p)\n", __FUNCTION__, addr);
119
120 h = addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3];
121 h ^= h >> 16;
122 h ^= h >> 8;
123 h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1;
124
125 X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, h);
126
127 return h;
128}
129
130static unsigned inline xfrm6_tunnel_spi_hash_byspi(u32 spi)
131{
132 return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE;
133}
134
135
136static int xfrm6_tunnel_spi_init(void)
137{
138 int i;
139
140 X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
141
142 xfrm6_tunnel_spi = 0;
143 xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
144 sizeof(struct xfrm6_tunnel_spi),
145 0, SLAB_HWCACHE_ALIGN,
146 NULL, NULL);
147 if (!xfrm6_tunnel_spi_kmem) {
148 X6TPRINTK1(KERN_ERR
149 "%s(): failed to allocate xfrm6_tunnel_spi_kmem\n",
150 __FUNCTION__);
151 return -ENOMEM;
152 }
153
154 for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
155 INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byaddr[i]);
156 for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
157 INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byspi[i]);
158 return 0;
159}
160
161static void xfrm6_tunnel_spi_fini(void)
162{
163 int i;
164
165 X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
166
167 for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) {
168 if (!hlist_empty(&xfrm6_tunnel_spi_byaddr[i]))
169 goto err;
170 }
171 for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) {
172 if (!hlist_empty(&xfrm6_tunnel_spi_byspi[i]))
173 goto err;
174 }
175 kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
176 xfrm6_tunnel_spi_kmem = NULL;
177 return;
178err:
179 X6TPRINTK1(KERN_ERR "%s(): table is not empty\n", __FUNCTION__);
180 return;
181}
182
183static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
184{
185 struct xfrm6_tunnel_spi *x6spi;
186 struct hlist_node *pos;
187
188 X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
189
190 hlist_for_each_entry(x6spi, pos,
191 &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
192 list_byaddr) {
193 if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
194 X6SPI_CHECK_MAGIC(x6spi);
195 X6TPRINTK3(KERN_DEBUG "%s() = %p(%u)\n", __FUNCTION__, x6spi, x6spi->spi);
196 return x6spi;
197 }
198 }
199
200 X6TPRINTK3(KERN_DEBUG "%s() = NULL(0)\n", __FUNCTION__);
201 return NULL;
202}
203
204u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
205{
206 struct xfrm6_tunnel_spi *x6spi;
207 u32 spi;
208
209 X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
210
211 read_lock_bh(&xfrm6_tunnel_spi_lock);
212 x6spi = __xfrm6_tunnel_spi_lookup(saddr);
213 spi = x6spi ? x6spi->spi : 0;
214 read_unlock_bh(&xfrm6_tunnel_spi_lock);
215 return spi;
216}
217
218EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup);
219
220static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
221{
222 u32 spi;
223 struct xfrm6_tunnel_spi *x6spi;
224 struct hlist_node *pos;
225 unsigned index;
226
227 X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
228
229 if (xfrm6_tunnel_spi < XFRM6_TUNNEL_SPI_MIN ||
230 xfrm6_tunnel_spi >= XFRM6_TUNNEL_SPI_MAX)
231 xfrm6_tunnel_spi = XFRM6_TUNNEL_SPI_MIN;
232 else
233 xfrm6_tunnel_spi++;
234
235 for (spi = xfrm6_tunnel_spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) {
236 index = xfrm6_tunnel_spi_hash_byspi(spi);
237 hlist_for_each_entry(x6spi, pos,
238 &xfrm6_tunnel_spi_byspi[index],
239 list_byspi) {
240 if (x6spi->spi == spi)
241 goto try_next_1;
242 }
243 xfrm6_tunnel_spi = spi;
244 goto alloc_spi;
245try_next_1:;
246 }
247 for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tunnel_spi; spi++) {
248 index = xfrm6_tunnel_spi_hash_byspi(spi);
249 hlist_for_each_entry(x6spi, pos,
250 &xfrm6_tunnel_spi_byspi[index],
251 list_byspi) {
252 if (x6spi->spi == spi)
253 goto try_next_2;
254 }
255 xfrm6_tunnel_spi = spi;
256 goto alloc_spi;
257try_next_2:;
258 }
259 spi = 0;
260 goto out;
261alloc_spi:
262 X6TPRINTK3(KERN_DEBUG "%s(): allocate new spi for "
263 "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
264 __FUNCTION__,
265 NIP6(*(struct in6_addr *)saddr));
266 x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, SLAB_ATOMIC);
267 if (!x6spi) {
268 X6TPRINTK1(KERN_ERR "%s(): kmem_cache_alloc() failed\n",
269 __FUNCTION__);
270 goto out;
271 }
272#ifdef XFRM6_TUNNEL_SPI_MAGIC
273 x6spi->magic = XFRM6_TUNNEL_SPI_MAGIC;
274#endif
275 memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr));
276 x6spi->spi = spi;
277 atomic_set(&x6spi->refcnt, 1);
278
279 hlist_add_head(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]);
280
281 index = xfrm6_tunnel_spi_hash_byaddr(saddr);
282 hlist_add_head(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]);
283 X6SPI_CHECK_MAGIC(x6spi);
284out:
285 X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, spi);
286 return spi;
287}
288
289u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
290{
291 struct xfrm6_tunnel_spi *x6spi;
292 u32 spi;
293
294 X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
295
296 write_lock_bh(&xfrm6_tunnel_spi_lock);
297 x6spi = __xfrm6_tunnel_spi_lookup(saddr);
298 if (x6spi) {
299 atomic_inc(&x6spi->refcnt);
300 spi = x6spi->spi;
301 } else
302 spi = __xfrm6_tunnel_alloc_spi(saddr);
303 write_unlock_bh(&xfrm6_tunnel_spi_lock);
304
305 X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, spi);
306
307 return spi;
308}
309
310EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi);
311
312void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
313{
314 struct xfrm6_tunnel_spi *x6spi;
315 struct hlist_node *pos, *n;
316
317 X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
318
319 write_lock_bh(&xfrm6_tunnel_spi_lock);
320
321 hlist_for_each_entry_safe(x6spi, pos, n,
322 &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
323 list_byaddr)
324 {
325 if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
326 X6TPRINTK3(KERN_DEBUG "%s(): x6spi object "
327 "for %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
328 "found at %p\n",
329 __FUNCTION__,
330 NIP6(*(struct in6_addr *)saddr),
331 x6spi);
332 X6SPI_CHECK_MAGIC(x6spi);
333 if (atomic_dec_and_test(&x6spi->refcnt)) {
334 hlist_del(&x6spi->list_byaddr);
335 hlist_del(&x6spi->list_byspi);
336 kmem_cache_free(xfrm6_tunnel_spi_kmem, x6spi);
337 break;
338 }
339 }
340 }
341 write_unlock_bh(&xfrm6_tunnel_spi_lock);
342}
343
344EXPORT_SYMBOL(xfrm6_tunnel_free_spi);
345
346static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
347{
348 struct ipv6hdr *top_iph;
349
350 top_iph = (struct ipv6hdr *)skb->data;
351 top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
352
353 return 0;
354}
355
356static int xfrm6_tunnel_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
357{
358 return 0;
359}
360
361static struct xfrm6_tunnel *xfrm6_tunnel_handler;
362static DECLARE_MUTEX(xfrm6_tunnel_sem);
363
364int xfrm6_tunnel_register(struct xfrm6_tunnel *handler)
365{
366 int ret;
367
368 down(&xfrm6_tunnel_sem);
369 ret = 0;
370 if (xfrm6_tunnel_handler != NULL)
371 ret = -EINVAL;
372 if (!ret)
373 xfrm6_tunnel_handler = handler;
374 up(&xfrm6_tunnel_sem);
375
376 return ret;
377}
378
379EXPORT_SYMBOL(xfrm6_tunnel_register);
380
381int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler)
382{
383 int ret;
384
385 down(&xfrm6_tunnel_sem);
386 ret = 0;
387 if (xfrm6_tunnel_handler != handler)
388 ret = -EINVAL;
389 if (!ret)
390 xfrm6_tunnel_handler = NULL;
391 up(&xfrm6_tunnel_sem);
392
393 synchronize_net();
394
395 return ret;
396}
397
398EXPORT_SYMBOL(xfrm6_tunnel_deregister);
399
400static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
401{
402 struct sk_buff *skb = *pskb;
403 struct xfrm6_tunnel *handler = xfrm6_tunnel_handler;
404 struct ipv6hdr *iph = skb->nh.ipv6h;
405 u32 spi;
406
407 /* device-like_ip6ip6_handler() */
408 if (handler && handler->handler(pskb, nhoffp) == 0)
409 return 0;
410
411 spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
412 return xfrm6_rcv_spi(pskb, nhoffp, spi);
413}
414
415static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
416 int type, int code, int offset, __u32 info)
417{
418 struct xfrm6_tunnel *handler = xfrm6_tunnel_handler;
419
420 /* call here first for device-like ip6ip6 err handling */
421 if (handler) {
422 handler->err_handler(skb, opt, type, code, offset, info);
423 return;
424 }
425
426 /* xfrm6_tunnel native err handling */
427 switch (type) {
428 case ICMPV6_DEST_UNREACH:
429 switch (code) {
430 case ICMPV6_NOROUTE:
431 case ICMPV6_ADM_PROHIBITED:
432 case ICMPV6_NOT_NEIGHBOUR:
433 case ICMPV6_ADDR_UNREACH:
434 case ICMPV6_PORT_UNREACH:
435 default:
436 X6TPRINTK3(KERN_DEBUG
437 "xfrm6_tunnel: Destination Unreach.\n");
438 break;
439 }
440 break;
441 case ICMPV6_PKT_TOOBIG:
442 X6TPRINTK3(KERN_DEBUG
443 "xfrm6_tunnel: Packet Too Big.\n");
444 break;
445 case ICMPV6_TIME_EXCEED:
446 switch (code) {
447 case ICMPV6_EXC_HOPLIMIT:
448 X6TPRINTK3(KERN_DEBUG
449 "xfrm6_tunnel: Too small Hoplimit.\n");
450 break;
451 case ICMPV6_EXC_FRAGTIME:
452 default:
453 break;
454 }
455 break;
456 case ICMPV6_PARAMPROB:
457 switch (code) {
458 case ICMPV6_HDR_FIELD: break;
459 case ICMPV6_UNK_NEXTHDR: break;
460 case ICMPV6_UNK_OPTION: break;
461 }
462 break;
463 default:
464 break;
465 }
466 return;
467}
468
469static int xfrm6_tunnel_init_state(struct xfrm_state *x, void *args)
470{
471 if (!x->props.mode)
472 return -EINVAL;
473
474 if (x->encap)
475 return -EINVAL;
476
477 x->props.header_len = sizeof(struct ipv6hdr);
478
479 return 0;
480}
481
482static void xfrm6_tunnel_destroy(struct xfrm_state *x)
483{
484 xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
485}
486
487static struct xfrm_type xfrm6_tunnel_type = {
488 .description = "IP6IP6",
489 .owner = THIS_MODULE,
490 .proto = IPPROTO_IPV6,
491 .init_state = xfrm6_tunnel_init_state,
492 .destructor = xfrm6_tunnel_destroy,
493 .input = xfrm6_tunnel_input,
494 .output = xfrm6_tunnel_output,
495};
496
497static struct inet6_protocol xfrm6_tunnel_protocol = {
498 .handler = xfrm6_tunnel_rcv,
499 .err_handler = xfrm6_tunnel_err,
500 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
501};
502
503static int __init xfrm6_tunnel_init(void)
504{
505 X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
506
507 if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0) {
508 X6TPRINTK1(KERN_ERR
509 "xfrm6_tunnel init: can't add xfrm type\n");
510 return -EAGAIN;
511 }
512 if (inet6_add_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6) < 0) {
513 X6TPRINTK1(KERN_ERR
514 "xfrm6_tunnel init(): can't add protocol\n");
515 xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
516 return -EAGAIN;
517 }
518 if (xfrm6_tunnel_spi_init() < 0) {
519 X6TPRINTK1(KERN_ERR
520 "xfrm6_tunnel init: failed to initialize spi\n");
521 inet6_del_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6);
522 xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
523 return -EAGAIN;
524 }
525 return 0;
526}
527
528static void __exit xfrm6_tunnel_fini(void)
529{
530 X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
531
532 xfrm6_tunnel_spi_fini();
533 if (inet6_del_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6) < 0)
534 X6TPRINTK1(KERN_ERR
535 "xfrm6_tunnel close: can't remove protocol\n");
536 if (xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6) < 0)
537 X6TPRINTK1(KERN_ERR
538 "xfrm6_tunnel close: can't remove xfrm type\n");
539}
540
541module_init(xfrm6_tunnel_init);
542module_exit(xfrm6_tunnel_fini);
543MODULE_LICENSE("GPL");